├── .github
├── CODEOWNERS
├── ISSUE_TEMPLATE
│ ├── 1-feature-request.yml
│ ├── 2-bug.yml
│ └── config.yml
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── dependabot-auto-merge-go.yml
│ ├── lint.yml
│ ├── project-automation.yml
│ ├── release.yml
│ ├── test.yml
│ └── validate-generated-files.yml
├── .gitignore
├── .golangci.goheader.template
├── .golangci.yml
├── .goreleaser.yml
├── LICENSE.md
├── Makefile
├── README.md
├── cmd
└── connector
│ └── main.go
├── connector.go
├── connector.yaml
├── destination.go
├── destination
└── config.go
├── destination_integration_test.go
├── go.mod
├── go.sum
├── internal
├── db_info.go
├── utils.go
└── utils_test.go
├── scripts
├── bump_version.sh
├── common.sh
└── tag.sh
├── source.go
├── source
├── config.go
├── config_test.go
├── cpool
│ ├── cpool.go
│ ├── json.go
│ └── json_test.go
├── iterator.go
├── logrepl
│ ├── cdc.go
│ ├── cdc_test.go
│ ├── cleaner.go
│ ├── cleaner_test.go
│ ├── combined.go
│ ├── combined_test.go
│ ├── handler.go
│ ├── handler_test.go
│ └── internal
│ │ ├── error.go
│ │ ├── publication.go
│ │ ├── publication_test.go
│ │ ├── relationset.go
│ │ ├── relationset_test.go
│ │ ├── replication_slot.go
│ │ ├── replication_slot_test.go
│ │ ├── subscription.go
│ │ └── subscription_test.go
├── position
│ ├── position.go
│ ├── position_test.go
│ └── type_string.go
├── schema
│ ├── avro.go
│ └── avro_test.go
├── snapshot
│ ├── convert.go
│ ├── fetch_worker.go
│ ├── fetch_worker_test.go
│ ├── iterator.go
│ └── iterator_test.go
└── types
│ ├── numeric.go
│ ├── types.go
│ ├── types_test.go
│ └── uuid.go
├── source_integration_test.go
├── test
├── conf.d
│ └── postgresql.conf
├── docker-compose.yml
└── helper.go
└── tools
├── go.mod
└── go.sum
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @ConduitIO/conduit-core
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/1-feature-request.yml:
--------------------------------------------------------------------------------
1 | name: Feature Request
2 | description: Request a new feature.
3 | title: "Feature:
"
4 | labels: [feature, triage]
5 | body:
6 | - type: textarea
7 | attributes:
8 | label: Feature description
9 | description: A clear and concise description of what you want to happen and what problem will this solve.
10 | validations:
11 | required: true
12 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/2-bug.yml:
--------------------------------------------------------------------------------
1 | name: Bug
2 | description: Report a bug.
3 | title: "Bug: "
4 | labels: [bug, triage]
5 | body:
6 | - type: textarea
7 | attributes:
8 | label: Bug description
9 | description: A concise description of what you're experiencing and what you expected to happen instead.
10 | validations:
11 | required: true
12 | - type: textarea
13 | attributes:
14 | label: Steps to reproduce
15 | description: Steps to reproduce the behavior.
16 | placeholder: |
17 | 1. In this environment...
18 | 2. With this config...
19 | 3. Run '...'
20 | 4. See error...
21 | validations:
22 | required: true
23 | - type: input
24 | attributes:
25 | label: Version
26 | description: "Version of the Conduit connector as well as version of the Postgres connector you're using."
27 | placeholder: v0.1.0
28 | validations:
29 | required: true
30 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 | - name: ❓ Ask a Question
4 | url: https://github.com/ConduitIO/conduit/discussions
5 | about: Please ask and answer questions here.
6 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # Docs: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates
2 | version: 2
3 | updates:
4 |
5 | # Maintain dependencies for GitHub Actions
6 | - package-ecosystem: "github-actions"
7 | directory: "/"
8 | schedule:
9 | interval: "daily"
10 | commit-message:
11 | prefix: ".github:"
12 |
13 | # Maintain dependencies for Go
14 | - package-ecosystem: "gomod"
15 | directory: "/"
16 | schedule:
17 | interval: "daily"
18 | commit-message:
19 | prefix: "go.mod:"
20 |
21 | # Maintain dependencies for Go tools
22 | - package-ecosystem: "gomod"
23 | directory: "/tools"
24 | schedule:
25 | interval: "weekly"
26 | commit-message:
27 | prefix: "Go tools:"
28 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ### Description
2 |
3 | Please include a summary of the change and what type of change it is (new feature, bug fix, refactoring, documentation).
4 | Please also include relevant motivation and context.
5 | List any dependencies that are required for this change.
6 |
7 | Fixes # (issue)
8 |
9 | ### Quick checks:
10 |
11 | - [ ] I have followed the [Code Guidelines](https://github.com/ConduitIO/conduit/blob/main/docs/code_guidelines.md).
12 | - [ ] There is no other [pull request](https://github.com/ConduitIO/conduit-connector-postgres/pulls) for the same update/change.
13 | - [ ] I have written unit tests.
14 | - [ ] I have made sure that the PR is of reasonable size and can be easily reviewed.
15 |
--------------------------------------------------------------------------------
/.github/workflows/dependabot-auto-merge-go.yml:
--------------------------------------------------------------------------------
1 | # This action automatically merges dependabot PRs that update go dependencies (only patch and minor updates).
2 | # Based on: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request
3 |
4 | name: Dependabot auto-merge
5 | on:
6 | pull_request:
7 | # Run this action when dependabot labels the PR, we care about the 'go' label.
8 | types: [labeled]
9 |
10 | permissions:
11 | pull-requests: write
12 | contents: write
13 |
14 | jobs:
15 | dependabot-go:
16 | runs-on: ubuntu-latest
17 | if: ${{ github.actor == 'dependabot[bot]' && contains(github.event.pull_request.labels.*.name, 'go') }}
18 | steps:
19 | - name: Dependabot metadata
20 | id: metadata
21 | uses: dependabot/fetch-metadata@v2.4.0
22 | with:
23 | github-token: "${{ secrets.GITHUB_TOKEN }}"
24 |
25 | - name: Approve PR
26 | # Approve only patch and minor updates
27 | if: ${{ steps.metadata.outputs.update-type != 'version-update:semver-major' }}
28 | run: gh pr review --approve "$PR_URL"
29 | env:
30 | PR_URL: ${{ github.event.pull_request.html_url }}
31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32 |
33 | - name: Enable auto-merge for Dependabot PRs
34 | # Enable auto-merging only for patch and minor updates
35 | if: ${{ steps.metadata.outputs.update-type != 'version-update:semver-major' }}
36 | run: gh pr merge --auto --squash "$PR_URL"
37 | env:
38 | PR_URL: ${{ github.event.pull_request.html_url }}
39 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 |
8 | jobs:
9 | golangci-lint:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 | - uses: actions/setup-go@v5
14 | with:
15 | go-version-file: 'go.mod'
16 |
17 | # This step sets up the variable steps.golangci-lint-version.outputs.v
18 | # to contain the version of golangci-lint (e.g. v1.54.2).
19 | # The version is taken from go.mod.
20 | - name: Golangci-lint version
21 | id: golangci-lint-version
22 | run: |
23 | GOLANGCI_LINT_VERSION=$( go list -modfile=tools/go.mod -m -f '{{.Version}}' github.com/golangci/golangci-lint/v2 )
24 | echo "v=$GOLANGCI_LINT_VERSION" >> "$GITHUB_OUTPUT"
25 |
26 | - name: golangci-lint
27 | uses: golangci/golangci-lint-action@v8
28 | with:
29 | version: ${{ steps.golangci-lint-version.outputs.v }}
30 |
--------------------------------------------------------------------------------
/.github/workflows/project-automation.yml:
--------------------------------------------------------------------------------
1 | name: project-management
2 |
3 | on:
4 | issues:
5 | types: [opened]
6 |
7 | jobs:
8 | project-mgmt:
9 | uses: ConduitIO/automation/.github/workflows/project-automation.yml@main
10 | secrets:
11 | project-automation-token: ${{ secrets.PROJECT_AUTOMATION }}
12 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: release
2 |
3 | on:
4 | push:
5 | tags:
6 | - '*'
7 |
8 | permissions:
9 | contents: write
10 |
11 | jobs:
12 | release:
13 | name: Release
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 | with:
19 | fetch-depth: 0
20 |
21 | - name: Check Connector Tag
22 | uses: conduitio/automation/actions/check_connector_tag@main
23 |
24 | - name: Set up Go
25 | uses: actions/setup-go@v5
26 | with:
27 | go-version-file: 'go.mod'
28 |
29 | - name: Run GoReleaser
30 | uses: goreleaser/goreleaser-action@v6
31 | with:
32 | distribution: goreleaser
33 | version: latest
34 | args: release
35 | env:
36 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
37 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 |
8 | jobs:
9 | test:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 |
14 | - name: Set up Go
15 | uses: actions/setup-go@v5
16 | with:
17 | go-version-file: 'go.mod'
18 |
19 | - name: Test
20 | run: make test GOTEST_FLAGS="-v -count=1"
21 |
--------------------------------------------------------------------------------
/.github/workflows/validate-generated-files.yml:
--------------------------------------------------------------------------------
1 | name: validate-generated-files
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | pull_request:
7 |
8 | jobs:
9 | validate-generated-files:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 |
14 | - name: Set up Go
15 | uses: actions/setup-go@v5
16 | with:
17 | go-version-file: 'go.mod'
18 |
19 | - name: Check generated files
20 | run: |
21 | export PATH=$PATH:$(go env GOPATH)/bin
22 | make install-tools generate
23 | git diff
24 | git diff --exit-code --numstat
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binary built with `make build`
2 | /conduit-connector-postgres
3 | .DS_Store
4 | .idea/
5 |
--------------------------------------------------------------------------------
/.golangci.goheader.template:
--------------------------------------------------------------------------------
1 | Copyright © {{ copyright-year }} Meroxa, Inc.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | version: "2"
2 | linters:
3 | default: none
4 | enable:
5 | - bodyclose
6 | - copyloopvar
7 | - dogsled
8 | - durationcheck
9 | - errcheck
10 | - errname
11 | - forbidigo
12 | - goconst
13 | - gocritic
14 | - gocyclo
15 | - goheader
16 | - gomoddirectives
17 | - gomodguard
18 | - goprintffuncname
19 | - gosec
20 | - govet
21 | - ineffassign
22 | - makezero
23 | - noctx
24 | - nolintlint
25 | - predeclared
26 | - revive
27 | - rowserrcheck
28 | - sqlclosecheck
29 | - staticcheck
30 | - unconvert
31 | - unused
32 | - wastedassign
33 | - whitespace
34 | settings:
35 | forbidigo:
36 | forbid:
37 | - pattern: ^pgxpool\.New.*$
38 | msg: Use github.com/conduitio/conduit-connector-postgres/source/cpool.New instead.
39 | gocyclo:
40 | min-complexity: 20
41 | goheader:
42 | values:
43 | regexp:
44 | copyright-year: 20[2-9]\d
45 | template-path: .golangci.goheader.template
46 | nolintlint:
47 | require-explanation: true
48 | require-specific: true
49 | allow-unused: false
50 | exclusions:
51 | generated: lax
52 | presets:
53 | - comments
54 | - common-false-positives
55 | - legacy
56 | - std-error-handling
57 | rules:
58 | - linters:
59 | - gosec
60 | path: test/helper\.go
61 | - linters:
62 | - forbidigo
63 | path: source/cpool/cpool\.go
64 | - linters:
65 | - goconst
66 | path: (.+)_test\.go
67 | paths:
68 | - third_party$
69 | - builtin$
70 | - examples$
71 | formatters:
72 | enable:
73 | - gofmt
74 | - goimports
75 | settings:
76 | gofmt:
77 | simplify: false
78 | exclusions:
79 | generated: lax
80 | paths:
81 | - third_party$
82 | - builtin$
83 | - examples$
84 |
--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | builds:
3 | - main: ./cmd/connector/main.go
4 | goos:
5 | - darwin
6 | - linux
7 | - windows
8 | env:
9 | - CGO_ENABLED=0
10 | ldflags:
11 | - "-s -w -X 'github.com/conduitio/conduit-connector-postgres.version={{ .Tag }}'"
12 | checksum:
13 | name_template: checksums.txt
14 | archives:
15 | - name_template: >-
16 | {{ .ProjectName }}_
17 | {{- .Version }}_
18 | {{- title .Os }}_
19 | {{- if eq .Arch "amd64" }}x86_64
20 | {{- else if eq .Arch "386" }}i386
21 | {{- else }}{{ .Arch }}{{ end }}
22 | changelog:
23 | sort: asc
24 | use: github
25 | filters:
26 | exclude:
27 | - '^docs:'
28 | - '^test:'
29 | - '^go.mod:'
30 | - '^.github:'
31 | - Merge branch
32 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2022 Meroxa, Inc.
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | VERSION=$(shell git describe --tags --dirty --always)
2 |
3 | .PHONY: build
4 | build:
5 | go build -ldflags "-X 'github.com/conduitio/conduit-connector-postgres.version=${VERSION}'" -o conduit-connector-postgres cmd/connector/main.go
6 |
7 | .PHONY: test
8 | test:
9 | # run required docker containers, execute integration tests, stop containers after tests
10 | docker compose -f test/docker-compose.yml up --force-recreate --quiet-pull -d --wait
11 | go test $(GOTEST_FLAGS) -race ./...; ret=$$?; \
12 | docker compose -f test/docker-compose.yml down --volumes; \
13 | exit $$ret
14 |
15 | .PHONY: lint
16 | lint:
17 | golangci-lint run
18 |
19 | .PHONY: generate
20 | generate:
21 | go generate ./...
22 | conn-sdk-cli readmegen -w
23 |
24 | .PHONY: fmt
25 | fmt:
26 | gofumpt -l -w .
27 | gci write --skip-generated .
28 |
29 | .PHONY: install-tools
30 | install-tools:
31 | @echo Installing tools from tools/go.mod
32 | @go list -modfile=tools/go.mod tool | xargs -I % go list -modfile=tools/go.mod -f "%@{{.Module.Version}}" % | xargs -tI % go install %
33 | @go mod tidy
34 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Conduit Connector PostgreSQL
2 |
3 | The PostgreSQL connector is a [Conduit](https://github.com/ConduitIO/conduit)
4 | plugin. It provides both, a source and a destination PostgresSQL connector.
5 |
6 |
7 | ## Source
8 |
9 | The Postgres Source Connector connects to a database with the provided `url` and
10 | starts creating records for each change detected in the provided tables.
11 |
12 | Upon starting, the source takes a snapshot of the provided tables in the database,
13 | then switches into CDC mode. In CDC mode, the plugin reads from a buffer of CDC events.
14 |
15 | ### Snapshot
16 |
17 | When the connector first starts, snapshot mode is enabled. The connector acquires
18 | a read-only lock on the tables, and then reads all rows of the tables into Conduit.
19 | Once all rows in that initial snapshot are read the connector releases its lock and
20 | switches into CDC mode.
21 |
22 | This behavior is enabled by default, but can be turned off by adding
23 | `"snapshotMode": "never"` to the Source configuration.
24 |
25 | ### Change Data Capture
26 |
27 | This connector implements Change Data Capture (CDC) features for PostgreSQL by
28 | creating a logical replication slot and a publication that listens to changes in the
29 | configured tables. Every detected change is converted into a record. If there are no
30 | records available, the connector blocks until a record is available or the connector
31 | receives a stop signal.
32 |
33 | #### Logical Replication Configuration
34 |
35 | When the connector switches to CDC mode, it attempts to run the initial setup commands
36 | to create its logical replication slot and publication. It will connect to an existing
37 | slot if one with the configured name exists.
38 |
39 | The Postgres user specified in the connection URL must have sufficient privileges to
40 | run all of these setup commands, or it will fail.
41 |
42 | Example pipeline configuration that's using logical replication:
43 |
44 | ```yaml
45 | version: 2.2
46 | pipelines:
47 | - id: pg-to-log
48 | status: running
49 | connectors:
50 | - id: pg
51 | type: source
52 | plugin: builtin:postgres
53 | settings:
54 | url: "postgres://exampleuser:examplepass@localhost:5433/exampledb?sslmode=disable"
55 | tables: "users"
56 | cdcMode: "logrepl"
57 | logrepl.publicationName: "examplepub"
58 | logrepl.slotName: "exampleslot"
59 | - id: log
60 | type: destination
61 | plugin: builtin:log
62 | settings:
63 | level: info
64 | ```
65 |
66 | :warning: When the connector or pipeline is deleted, the connector will automatically
67 | attempt to delete the replication slot and publication. This is the default behaviour
68 | and can be disabled by setting `logrepl.autoCleanup` to `false`.
69 |
70 | ### Key Handling
71 |
72 | The connector will automatically look up the primary key column for the specified tables
73 | and use them as the key value. If that can't be determined, the connector will return
74 | an error.
75 |
76 | ## Destination
77 |
78 | The Postgres Destination takes a Conduit record and stores it using a SQL statement.
79 | The Destination is designed to handle different payloads and keys. Because of this,
80 | each record is individually parsed and upserted.
81 |
82 | ### Handling record operations
83 |
84 | Based on the `Operation` field in the record, the destination will either insert,
85 | update or delete the record in the target table. Snapshot records are always inserted.
86 |
87 | If the target table already contains a record with the same key as a record being
88 | inserted, the record will be updated (upserted). This can overwrite and thus potentially
89 | lose data, so keys should be assigned correctly from the Source.
90 |
91 | If the target table does not contain a record with the same key as a record being
92 | deleted, the record will be ignored.
93 |
94 | If there is no key, the record will be simply appended.
95 |
96 |
97 | ## Source Configuration Parameters
98 |
99 |
100 | ```yaml
101 | version: 2.2
102 | pipelines:
103 | - id: example
104 | status: running
105 | connectors:
106 | - id: example
107 | plugin: "postgres"
108 | settings:
109 | # Tables is a List of table names to read from, separated by a comma,
110 | # e.g.:"table1,table2". Use "*" if you'd like to listen to all tables.
111 | # Type: string
112 | # Required: yes
113 | tables: ""
114 | # URL is the connection string for the Postgres database.
115 | # Type: string
116 | # Required: yes
117 | url: ""
118 | # CDCMode determines how the connector should listen to changes.
119 | # Type: string
120 | # Required: no
121 | cdcMode: "auto"
122 | # LogreplAutoCleanup determines if the replication slot and
123 | # publication should be removed when the connector is deleted.
124 | # Type: bool
125 | # Required: no
126 | logrepl.autoCleanup: "true"
127 | # LogreplPublicationName determines the publication name in case the
128 | # connector uses logical replication to listen to changes (see
129 | # CDCMode).
130 | # Type: string
131 | # Required: no
132 | logrepl.publicationName: "conduitpub"
133 | # LogreplSlotName determines the replication slot name in case the
134 | # connector uses logical replication to listen to changes (see
135 | # CDCMode). Can only contain lower-case letters, numbers, and the
136 | # underscore character.
137 | # Type: string
138 | # Required: no
139 | logrepl.slotName: "conduitslot"
140 | # WithAvroSchema determines whether the connector should attach an
141 | # avro schema on each record.
142 | # Type: bool
143 | # Required: no
144 | logrepl.withAvroSchema: "true"
145 | # Snapshot fetcher size determines the number of rows to retrieve at a
146 | # time.
147 | # Type: int
148 | # Required: no
149 | snapshot.fetchSize: "50000"
150 | # SnapshotMode is whether the plugin will take a snapshot of the
151 | # entire table before starting cdc mode.
152 | # Type: string
153 | # Required: no
154 | snapshotMode: "initial"
155 | # Maximum delay before an incomplete batch is read from the source.
156 | # Type: duration
157 | # Required: no
158 | sdk.batch.delay: "0"
159 | # Maximum size of batch before it gets read from the source.
160 | # Type: int
161 | # Required: no
162 | sdk.batch.size: "0"
163 | # Specifies whether to use a schema context name. If set to false, no
164 | # schema context name will be used, and schemas will be saved with the
165 | # subject name specified in the connector (not safe because of name
166 | # conflicts).
167 | # Type: bool
168 | # Required: no
169 | sdk.schema.context.enabled: "true"
170 | # Schema context name to be used. Used as a prefix for all schema
171 | # subject names. If empty, defaults to the connector ID.
172 | # Type: string
173 | # Required: no
174 | sdk.schema.context.name: ""
175 | # Whether to extract and encode the record key with a schema.
176 | # Type: bool
177 | # Required: no
178 | sdk.schema.extract.key.enabled: "false"
179 | # The subject of the key schema. If the record metadata contains the
180 | # field "opencdc.collection" it is prepended to the subject name and
181 | # separated with a dot.
182 | # Type: string
183 | # Required: no
184 | sdk.schema.extract.key.subject: "key"
185 | # Whether to extract and encode the record payload with a schema.
186 | # Type: bool
187 | # Required: no
188 | sdk.schema.extract.payload.enabled: "false"
189 | # The subject of the payload schema. If the record metadata contains
190 | # the field "opencdc.collection" it is prepended to the subject name
191 | # and separated with a dot.
192 | # Type: string
193 | # Required: no
194 | sdk.schema.extract.payload.subject: "payload"
195 | # The type of the payload schema.
196 | # Type: string
197 | # Required: no
198 | sdk.schema.extract.type: "avro"
199 | ```
200 |
201 |
202 | ## Destination Configuration Parameters
203 |
204 |
205 | ```yaml
206 | version: 2.2
207 | pipelines:
208 | - id: example
209 | status: running
210 | connectors:
211 | - id: example
212 | plugin: "postgres"
213 | settings:
214 | # URL is the connection string for the Postgres database.
215 | # Type: string
216 | # Required: yes
217 | url: ""
218 | # Key represents the column name for the key used to identify and
219 | # update existing rows.
220 | # Type: string
221 | # Required: no
222 | key: ""
223 | # Table is used as the target table into which records are inserted.
224 | # Type: string
225 | # Required: no
226 | table: "{{ index .Metadata "opencdc.collection" }}"
227 | # Maximum delay before an incomplete batch is written to the
228 | # destination.
229 | # Type: duration
230 | # Required: no
231 | sdk.batch.delay: "0"
232 | # Maximum size of batch before it gets written to the destination.
233 | # Type: int
234 | # Required: no
235 | sdk.batch.size: "0"
236 | # Allow bursts of at most X records (0 or less means that bursts are
237 | # not limited). Only takes effect if a rate limit per second is set.
238 | # Note that if `sdk.batch.size` is bigger than `sdk.rate.burst`, the
239 | # effective batch size will be equal to `sdk.rate.burst`.
240 | # Type: int
241 | # Required: no
242 | sdk.rate.burst: "0"
243 | # Maximum number of records written per second (0 means no rate
244 | # limit).
245 | # Type: float
246 | # Required: no
247 | sdk.rate.perSecond: "0"
248 | # The format of the output record. See the Conduit documentation for a
249 | # full list of supported formats
250 | # (https://conduit.io/docs/using/connectors/configuration-parameters/output-format).
251 | # Type: string
252 | # Required: no
253 | sdk.record.format: "opencdc/json"
254 | # Options to configure the chosen output record format. Options are
255 | # normally key=value pairs separated with comma (e.g.
256 | # opt1=val2,opt2=val2), except for the `template` record format, where
257 | # options are a Go template.
258 | # Type: string
259 | # Required: no
260 | sdk.record.format.options: ""
261 | # Whether to extract and decode the record key with a schema.
262 | # Type: bool
263 | # Required: no
264 | sdk.schema.extract.key.enabled: "true"
265 | # Whether to extract and decode the record payload with a schema.
266 | # Type: bool
267 | # Required: no
268 | sdk.schema.extract.payload.enabled: "true"
269 | ```
270 |
271 |
272 | ## Testing
273 |
274 | Run `make test` to run all the unit and integration tests, which require Docker
275 | to be installed and running. The command will handle starting and stopping
276 | docker containers for you.
277 |
278 | ## References
279 |
280 | - https://github.com/bitnami/bitnami-docker-postgresql-repmgr
281 | - https://github.com/Masterminds/squirrel
282 |
283 | 
284 |
--------------------------------------------------------------------------------
/cmd/connector/main.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package main
16 |
17 | import (
18 | postgres "github.com/conduitio/conduit-connector-postgres"
19 | sdk "github.com/conduitio/conduit-connector-sdk"
20 | )
21 |
22 | func main() {
23 | sdk.Serve(postgres.Connector)
24 | }
25 |
--------------------------------------------------------------------------------
/connector.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | //go:generate conn-sdk-cli specgen
16 |
17 | package postgres
18 |
19 | import (
20 | _ "embed"
21 |
22 | sdk "github.com/conduitio/conduit-connector-sdk"
23 | )
24 |
25 | //go:embed connector.yaml
26 | var specs string
27 |
28 | var version = "(devel)"
29 |
30 | var Connector = sdk.Connector{
31 | NewSpecification: sdk.YAMLSpecification(specs, version),
32 | NewSource: NewSource,
33 | NewDestination: NewDestination,
34 | }
35 |
--------------------------------------------------------------------------------
/connector.yaml:
--------------------------------------------------------------------------------
1 | version: "1.0"
2 | specification:
3 | name: postgres
4 | summary: Conduit connector for PostgreSQL
5 | description: |
6 | ## Source
7 |
8 | The Postgres Source Connector connects to a database with the provided `url` and
9 | starts creating records for each change detected in the provided tables.
10 |
11 | Upon starting, the source takes a snapshot of the provided tables in the database,
12 | then switches into CDC mode. In CDC mode, the plugin reads from a buffer of CDC events.
13 |
14 | ### Snapshot
15 |
16 | When the connector first starts, snapshot mode is enabled. The connector acquires
17 | a read-only lock on the tables, and then reads all rows of the tables into Conduit.
18 | Once all rows in that initial snapshot are read the connector releases its lock and
19 | switches into CDC mode.
20 |
21 | This behavior is enabled by default, but can be turned off by adding
22 | `"snapshotMode": "never"` to the Source configuration.
23 |
24 | ### Change Data Capture
25 |
26 | This connector implements Change Data Capture (CDC) features for PostgreSQL by
27 | creating a logical replication slot and a publication that listens to changes in the
28 | configured tables. Every detected change is converted into a record. If there are no
29 | records available, the connector blocks until a record is available or the connector
30 | receives a stop signal.
31 |
32 | #### Logical Replication Configuration
33 |
34 | When the connector switches to CDC mode, it attempts to run the initial setup commands
35 | to create its logical replication slot and publication. It will connect to an existing
36 | slot if one with the configured name exists.
37 |
38 | The Postgres user specified in the connection URL must have sufficient privileges to
39 | run all of these setup commands, or it will fail.
40 |
41 | Example pipeline configuration that's using logical replication:
42 |
43 | ```yaml
44 | version: 2.2
45 | pipelines:
46 | - id: pg-to-log
47 | status: running
48 | connectors:
49 | - id: pg
50 | type: source
51 | plugin: builtin:postgres
52 | settings:
53 | url: "postgres://exampleuser:examplepass@localhost:5433/exampledb?sslmode=disable"
54 | tables: "users"
55 | cdcMode: "logrepl"
56 | logrepl.publicationName: "examplepub"
57 | logrepl.slotName: "exampleslot"
58 | - id: log
59 | type: destination
60 | plugin: builtin:log
61 | settings:
62 | level: info
63 | ```
64 |
65 | :warning: When the connector or pipeline is deleted, the connector will automatically
66 | attempt to delete the replication slot and publication. This is the default behaviour
67 | and can be disabled by setting `logrepl.autoCleanup` to `false`.
68 |
69 | ### Key Handling
70 |
71 | The connector will automatically look up the primary key column for the specified tables
72 | and use them as the key value. If that can't be determined, the connector will return
73 | an error.
74 |
75 | ## Destination
76 |
77 | The Postgres Destination takes a Conduit record and stores it using a SQL statement.
78 | The Destination is designed to handle different payloads and keys. Because of this,
79 | each record is individually parsed and upserted.
80 |
81 | ### Handling record operations
82 |
83 | Based on the `Operation` field in the record, the destination will either insert,
84 | update or delete the record in the target table. Snapshot records are always inserted.
85 |
86 | If the target table already contains a record with the same key as a record being
87 | inserted, the record will be updated (upserted). This can overwrite and thus potentially
88 | lose data, so keys should be assigned correctly from the Source.
89 |
90 | If the target table does not contain a record with the same key as a record being
91 | deleted, the record will be ignored.
92 |
93 | If there is no key, the record will be simply appended.
94 | version: v0.13.0
95 | author: Meroxa, Inc.
96 | source:
97 | parameters:
98 | - name: tables
99 | description: |-
100 | Tables is a List of table names to read from, separated by a comma, e.g.:"table1,table2".
101 | Use "*" if you'd like to listen to all tables.
102 | type: string
103 | default: ""
104 | validations:
105 | - type: required
106 | value: ""
107 | - name: url
108 | description: URL is the connection string for the Postgres database.
109 | type: string
110 | default: ""
111 | validations:
112 | - type: required
113 | value: ""
114 | - name: cdcMode
115 | description: CDCMode determines how the connector should listen to changes.
116 | type: string
117 | default: auto
118 | validations:
119 | - type: inclusion
120 | value: auto,logrepl
121 | - name: logrepl.autoCleanup
122 | description: |-
123 | LogreplAutoCleanup determines if the replication slot and publication should be
124 | removed when the connector is deleted.
125 | type: bool
126 | default: "true"
127 | validations: []
128 | - name: logrepl.publicationName
129 | description: |-
130 | LogreplPublicationName determines the publication name in case the
131 | connector uses logical replication to listen to changes (see CDCMode).
132 | type: string
133 | default: conduitpub
134 | validations: []
135 | - name: logrepl.slotName
136 | description: |-
137 | LogreplSlotName determines the replication slot name in case the
138 | connector uses logical replication to listen to changes (see CDCMode).
139 | Can only contain lower-case letters, numbers, and the underscore character.
140 | type: string
141 | default: conduitslot
142 | validations:
143 | - type: regex
144 | value: ^[a-z0-9_]+$
145 | - name: logrepl.withAvroSchema
146 | description: |-
147 | WithAvroSchema determines whether the connector should attach an avro schema on each
148 | record.
149 | type: bool
150 | default: "true"
151 | validations: []
152 | - name: snapshot.fetchSize
153 | description: Snapshot fetcher size determines the number of rows to retrieve at a time.
154 | type: int
155 | default: "50000"
156 | validations: []
157 | - name: snapshotMode
158 | description: SnapshotMode is whether the plugin will take a snapshot of the entire table before starting cdc mode.
159 | type: string
160 | default: initial
161 | validations:
162 | - type: inclusion
163 | value: initial,never
164 | - name: sdk.batch.delay
165 | description: Maximum delay before an incomplete batch is read from the source.
166 | type: duration
167 | default: "0"
168 | validations: []
169 | - name: sdk.batch.size
170 | description: Maximum size of batch before it gets read from the source.
171 | type: int
172 | default: "0"
173 | validations:
174 | - type: greater-than
175 | value: "-1"
176 | - name: sdk.schema.context.enabled
177 | description: |-
178 | Specifies whether to use a schema context name. If set to false, no schema context name will
179 | be used, and schemas will be saved with the subject name specified in the connector
180 | (not safe because of name conflicts).
181 | type: bool
182 | default: "true"
183 | validations: []
184 | - name: sdk.schema.context.name
185 | description: |-
186 | Schema context name to be used. Used as a prefix for all schema subject names.
187 | If empty, defaults to the connector ID.
188 | type: string
189 | default: ""
190 | validations: []
191 | - name: sdk.schema.extract.key.enabled
192 | description: Whether to extract and encode the record key with a schema.
193 | type: bool
194 | default: "false"
195 | validations: []
196 | - name: sdk.schema.extract.key.subject
197 | description: |-
198 | The subject of the key schema. If the record metadata contains the field
199 | "opencdc.collection" it is prepended to the subject name and separated
200 | with a dot.
201 | type: string
202 | default: key
203 | validations: []
204 | - name: sdk.schema.extract.payload.enabled
205 | description: Whether to extract and encode the record payload with a schema.
206 | type: bool
207 | default: "false"
208 | validations: []
209 | - name: sdk.schema.extract.payload.subject
210 | description: |-
211 | The subject of the payload schema. If the record metadata contains the
212 | field "opencdc.collection" it is prepended to the subject name and
213 | separated with a dot.
214 | type: string
215 | default: payload
216 | validations: []
217 | - name: sdk.schema.extract.type
218 | description: The type of the payload schema.
219 | type: string
220 | default: avro
221 | validations:
222 | - type: inclusion
223 | value: avro
224 | destination:
225 | parameters:
226 | - name: url
227 | description: URL is the connection string for the Postgres database.
228 | type: string
229 | default: ""
230 | validations:
231 | - type: required
232 | value: ""
233 | - name: key
234 | description: Key represents the column name for the key used to identify and update existing rows.
235 | type: string
236 | default: ""
237 | validations: []
238 | - name: table
239 | description: Table is used as the target table into which records are inserted.
240 | type: string
241 | default: '{{ index .Metadata "opencdc.collection" }}'
242 | validations: []
243 | - name: sdk.batch.delay
244 | description: Maximum delay before an incomplete batch is written to the destination.
245 | type: duration
246 | default: "0"
247 | validations: []
248 | - name: sdk.batch.size
249 | description: Maximum size of batch before it gets written to the destination.
250 | type: int
251 | default: "0"
252 | validations:
253 | - type: greater-than
254 | value: "-1"
255 | - name: sdk.rate.burst
256 | description: |-
257 | Allow bursts of at most X records (0 or less means that bursts are not
258 | limited). Only takes effect if a rate limit per second is set. Note that
259 | if `sdk.batch.size` is bigger than `sdk.rate.burst`, the effective batch
260 | size will be equal to `sdk.rate.burst`.
261 | type: int
262 | default: "0"
263 | validations:
264 | - type: greater-than
265 | value: "-1"
266 | - name: sdk.rate.perSecond
267 | description: Maximum number of records written per second (0 means no rate limit).
268 | type: float
269 | default: "0"
270 | validations:
271 | - type: greater-than
272 | value: "-1"
273 | - name: sdk.record.format
274 | description: |-
275 | The format of the output record. See the Conduit documentation for a full
276 | list of supported formats (https://conduit.io/docs/using/connectors/configuration-parameters/output-format).
277 | type: string
278 | default: opencdc/json
279 | validations: []
280 | - name: sdk.record.format.options
281 | description: |-
282 | Options to configure the chosen output record format. Options are normally
283 | key=value pairs separated with comma (e.g. opt1=val2,opt2=val2), except
284 | for the `template` record format, where options are a Go template.
285 | type: string
286 | default: ""
287 | validations: []
288 | - name: sdk.schema.extract.key.enabled
289 | description: Whether to extract and decode the record key with a schema.
290 | type: bool
291 | default: "true"
292 | validations: []
293 | - name: sdk.schema.extract.payload.enabled
294 | description: Whether to extract and decode the record payload with a schema.
295 | type: bool
296 | default: "true"
297 | validations: []
298 |
--------------------------------------------------------------------------------
/destination/config.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2023 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package destination
16 |
17 | import (
18 | "bytes"
19 | "context"
20 | "fmt"
21 | "strings"
22 | "text/template"
23 |
24 | "github.com/Masterminds/sprig/v3"
25 | "github.com/conduitio/conduit-commons/opencdc"
26 | sdk "github.com/conduitio/conduit-connector-sdk"
27 | "github.com/jackc/pgx/v5"
28 | )
29 |
30 | type TableFn func(opencdc.Record) (string, error)
31 |
32 | type Config struct {
33 | sdk.DefaultDestinationMiddleware
34 |
35 | // URL is the connection string for the Postgres database.
36 | URL string `json:"url" validate:"required"`
37 | // Table is used as the target table into which records are inserted.
38 | Table string `json:"table" default:"{{ index .Metadata \"opencdc.collection\" }}"`
39 | // Key represents the column name for the key used to identify and update existing rows.
40 | Key string `json:"key"`
41 | }
42 |
43 | func (c *Config) Validate(ctx context.Context) error {
44 | if _, err := pgx.ParseConfig(c.URL); err != nil {
45 | return fmt.Errorf("invalid url: %w", err)
46 | }
47 |
48 | if _, err := c.TableFunction(); err != nil {
49 | return fmt.Errorf("invalid table name or table function: %w", err)
50 | }
51 |
52 | err := c.DefaultDestinationMiddleware.Validate(ctx)
53 | if err != nil {
54 | return fmt.Errorf("middleware validation failed: %w", err)
55 | }
56 |
57 | return nil
58 | }
59 |
60 | // TableFunction returns a function that determines the table for each record individually.
61 | // The function might be returning a static table name.
62 | // If the table is neither static nor a template, an error is returned.
63 | func (c *Config) TableFunction() (f TableFn, err error) {
64 | // Not a template, i.e. it's a static table name
65 | if !strings.HasPrefix(c.Table, "{{") && !strings.HasSuffix(c.Table, "}}") {
66 | return func(_ opencdc.Record) (string, error) {
67 | return c.Table, nil
68 | }, nil
69 | }
70 |
71 | // Try to parse the table
72 | t, err := template.New("table").Funcs(sprig.FuncMap()).Parse(c.Table)
73 | if err != nil {
74 | // The table is not a valid Go template.
75 | return nil, fmt.Errorf("table is neither a valid static table nor a valid Go template: %w", err)
76 | }
77 |
78 | // The table is a valid template, return TableFn.
79 | var buf bytes.Buffer
80 | return func(r opencdc.Record) (string, error) {
81 | buf.Reset()
82 | if err := t.Execute(&buf, r); err != nil {
83 | return "", fmt.Errorf("failed to execute table template: %w", err)
84 | }
85 | return buf.String(), nil
86 | }, nil
87 | }
88 |
--------------------------------------------------------------------------------
/destination_integration_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package postgres
16 |
17 | import (
18 | "context"
19 | "fmt"
20 | "math/big"
21 | "strings"
22 | "testing"
23 |
24 | "github.com/conduitio/conduit-commons/opencdc"
25 | "github.com/conduitio/conduit-connector-postgres/test"
26 | sdk "github.com/conduitio/conduit-connector-sdk"
27 | "github.com/google/go-cmp/cmp"
28 | "github.com/jackc/pgx/v5"
29 | "github.com/matryer/is"
30 | )
31 |
32 | func TestDestination_Write(t *testing.T) {
33 | is := is.New(t)
34 | ctx := test.Context(t)
35 | conn := test.ConnectSimple(ctx, t, test.RegularConnString)
36 |
37 | // tables with capital letters should be quoted
38 | tableName := strings.ToUpper(test.RandomIdentifier(t))
39 | test.SetupTestTableWithName(ctx, t, conn, tableName)
40 |
41 | d := NewDestination()
42 | err := sdk.Util.ParseConfig(
43 | ctx,
44 | map[string]string{
45 | "url": test.RegularConnString,
46 | "table": "{{ index .Metadata \"opencdc.collection\" }}",
47 | },
48 | d.Config(),
49 | Connector.NewSpecification().DestinationParams,
50 | )
51 | is.NoErr(err)
52 |
53 | err = d.Open(ctx)
54 | is.NoErr(err)
55 | defer func() {
56 | err := d.Teardown(ctx)
57 | is.NoErr(err)
58 | }()
59 |
60 | tests := []struct {
61 | name string
62 | record opencdc.Record
63 | }{
64 | {
65 | name: "snapshot",
66 | record: opencdc.Record{
67 | Position: opencdc.Position("foo"),
68 | Operation: opencdc.OperationSnapshot,
69 | Metadata: map[string]string{opencdc.MetadataCollection: tableName},
70 | Key: opencdc.StructuredData{"id": 5000},
71 | Payload: opencdc.Change{
72 | After: opencdc.StructuredData{
73 | "column1": "foo",
74 | "column2": 123,
75 | "column3": true,
76 | "column4": nil,
77 | "UppercaseColumn1": 222,
78 | },
79 | },
80 | },
81 | },
82 | {
83 | name: "create",
84 | record: opencdc.Record{
85 | Position: opencdc.Position("foo"),
86 | Operation: opencdc.OperationCreate,
87 | Metadata: map[string]string{opencdc.MetadataCollection: tableName},
88 | Key: opencdc.StructuredData{"id": 5},
89 | Payload: opencdc.Change{
90 | After: opencdc.StructuredData{
91 | "column1": "foo",
92 | "column2": 456,
93 | "column3": false,
94 | "column4": nil,
95 | "UppercaseColumn1": 333,
96 | },
97 | },
98 | },
99 | },
100 | {
101 | name: "insert on update (upsert)",
102 | record: opencdc.Record{
103 | Position: opencdc.Position("foo"),
104 | Operation: opencdc.OperationUpdate,
105 | Metadata: map[string]string{opencdc.MetadataCollection: tableName},
106 | Key: opencdc.StructuredData{"id": 6},
107 | Payload: opencdc.Change{
108 | After: opencdc.StructuredData{
109 | "column1": "bar",
110 | "column2": 567,
111 | "column3": true,
112 | "column4": nil,
113 | "UppercaseColumn1": 444,
114 | },
115 | },
116 | },
117 | },
118 | {
119 | name: "update on conflict",
120 | record: opencdc.Record{
121 | Position: opencdc.Position("foo"),
122 | Operation: opencdc.OperationUpdate,
123 | Metadata: map[string]string{opencdc.MetadataCollection: tableName},
124 | Key: opencdc.StructuredData{"id": 1},
125 | Payload: opencdc.Change{
126 | After: opencdc.StructuredData{
127 | "column1": "foobar",
128 | "column2": 567,
129 | "column3": true,
130 | "column4": nil,
131 | "UppercaseColumn1": 555,
132 | },
133 | },
134 | },
135 | },
136 | {
137 | name: "delete",
138 | record: opencdc.Record{
139 | Position: opencdc.Position("foo"),
140 | Metadata: map[string]string{opencdc.MetadataCollection: tableName},
141 | Operation: opencdc.OperationDelete,
142 | Key: opencdc.StructuredData{"id": 4},
143 | },
144 | },
145 | {
146 | name: "write a big.Rat",
147 | record: opencdc.Record{
148 | Position: opencdc.Position("foo"),
149 | Operation: opencdc.OperationSnapshot,
150 | Metadata: map[string]string{opencdc.MetadataCollection: tableName},
151 | Key: opencdc.StructuredData{"id": 123},
152 | Payload: opencdc.Change{
153 | After: opencdc.StructuredData{
154 | "column1": "abcdef",
155 | "column2": 567,
156 | "column3": true,
157 | "column4": big.NewRat(123, 100),
158 | "UppercaseColumn1": 555,
159 | },
160 | },
161 | },
162 | },
163 | }
164 | for _, tt := range tests {
165 | t.Run(tt.name, func(t *testing.T) {
166 | is = is.New(t)
167 | id := tt.record.Key.(opencdc.StructuredData)["id"]
168 |
169 | i, err := d.Write(ctx, []opencdc.Record{tt.record})
170 | is.NoErr(err)
171 | is.Equal(i, 1)
172 |
173 | got, err := queryTestTable(ctx, conn, tableName, id)
174 | switch tt.record.Operation {
175 | case opencdc.OperationCreate, opencdc.OperationSnapshot, opencdc.OperationUpdate:
176 | is.NoErr(err)
177 | is.Equal(
178 | "",
179 | cmp.Diff(
180 | tt.record.Payload.After,
181 | got,
182 | cmp.Comparer(func(x, y *big.Rat) bool {
183 | return x.Cmp(y) == 0
184 | }),
185 | ),
186 | ) // -want, +got
187 | case opencdc.OperationDelete:
188 | is.Equal(err, pgx.ErrNoRows)
189 | }
190 | })
191 | }
192 | }
193 |
194 | func TestDestination_Batch(t *testing.T) {
195 | is := is.New(t)
196 | ctx := test.Context(t)
197 | conn := test.ConnectSimple(ctx, t, test.RegularConnString)
198 |
199 | tableName := strings.ToUpper(test.RandomIdentifier(t))
200 | test.SetupTestTableWithName(ctx, t, conn, tableName)
201 |
202 | d := NewDestination()
203 |
204 | err := sdk.Util.ParseConfig(
205 | ctx,
206 | map[string]string{"url": test.RegularConnString, "table": tableName},
207 | d.Config(),
208 | Connector.NewSpecification().DestinationParams,
209 | )
210 | is.NoErr(err)
211 |
212 | err = d.Open(ctx)
213 | is.NoErr(err)
214 | defer func() {
215 | err := d.Teardown(ctx)
216 | is.NoErr(err)
217 | }()
218 |
219 | records := []opencdc.Record{
220 | {
221 | Position: opencdc.Position("foo1"),
222 | Operation: opencdc.OperationCreate,
223 | Key: opencdc.StructuredData{"id": 5},
224 | Payload: opencdc.Change{
225 | After: opencdc.StructuredData{
226 | "column1": "foo1",
227 | "column2": 1,
228 | "column3": false,
229 | "column4": nil,
230 | "UppercaseColumn1": 111,
231 | },
232 | },
233 | },
234 | {
235 | Position: opencdc.Position("foo2"),
236 | Operation: opencdc.OperationCreate,
237 | Key: opencdc.StructuredData{"id": 6},
238 | Payload: opencdc.Change{
239 | After: opencdc.StructuredData{
240 | "column1": "foo2",
241 | "column2": 2,
242 | "column3": true,
243 | "column4": nil,
244 | "UppercaseColumn1": 222,
245 | },
246 | },
247 | },
248 | {
249 | Position: opencdc.Position("foo3"),
250 | Operation: opencdc.OperationCreate,
251 | Key: opencdc.StructuredData{"id": 7},
252 | Payload: opencdc.Change{
253 | After: opencdc.StructuredData{
254 | "column1": "foo3",
255 | "column2": 3,
256 | "column3": false,
257 | "column4": nil,
258 | "UppercaseColumn1": 333,
259 | },
260 | },
261 | },
262 | }
263 |
264 | i, err := d.Write(ctx, records)
265 | is.NoErr(err)
266 | is.Equal(i, len(records))
267 |
268 | for _, rec := range records {
269 | got, err := queryTestTable(ctx, conn, tableName, rec.Key.(opencdc.StructuredData)["id"])
270 | is.NoErr(err)
271 | is.Equal(rec.Payload.After, got)
272 | }
273 | }
274 |
275 | func queryTestTable(ctx context.Context, conn test.Querier, tableName string, id any) (opencdc.StructuredData, error) {
276 | row := conn.QueryRow(
277 | ctx,
278 | fmt.Sprintf(`SELECT column1, column2, column3, column4, "UppercaseColumn1" FROM %q WHERE id = $1`, tableName),
279 | id,
280 | )
281 |
282 | var (
283 | col1 string
284 | col2 int
285 | col3 bool
286 | col4Str *string
287 | uppercaseCol1 int
288 | )
289 |
290 | err := row.Scan(&col1, &col2, &col3, &col4Str, &uppercaseCol1)
291 | if err != nil {
292 | return nil, err
293 | }
294 |
295 | // Handle the potential nil case for col4
296 | var col4 interface{}
297 | if col4Str != nil {
298 | r := new(big.Rat)
299 | r.SetString(*col4Str)
300 | col4 = r
301 | }
302 |
303 | return opencdc.StructuredData{
304 | "column1": col1,
305 | "column2": col2,
306 | "column3": col3,
307 | "column4": col4,
308 | "UppercaseColumn1": uppercaseCol1,
309 | }, nil
310 | }
311 |
--------------------------------------------------------------------------------
/internal/db_info.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2025 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 |
22 | "github.com/jackc/pgx/v5"
23 | )
24 |
25 | // DbInfo provides information about tables in a database.
26 | type DbInfo struct {
27 | conn *pgx.Conn
28 | cache map[string]*tableCache
29 | }
30 |
31 | // tableCache stores information about a table.
32 | // The information is cached and refreshed every 'cacheExpiration'.
33 | type tableCache struct {
34 | columns map[string]int
35 | }
36 |
37 | func NewDbInfo(conn *pgx.Conn) *DbInfo {
38 | return &DbInfo{
39 | conn: conn,
40 | cache: map[string]*tableCache{},
41 | }
42 | }
43 |
44 | func (d *DbInfo) GetNumericColumnScale(ctx context.Context, table string, column string) (int, error) {
45 | // Check if table exists in cache and is not expired
46 | tableInfo, ok := d.cache[table]
47 | if ok {
48 | scale, ok := tableInfo.columns[column]
49 | if ok {
50 | return scale, nil
51 | }
52 | } else {
53 | // Table info has expired, refresh the cache
54 | d.cache[table] = &tableCache{
55 | columns: map[string]int{},
56 | }
57 | }
58 |
59 | // Fetch scale from database
60 | scale, err := d.numericScaleFromDb(ctx, table, column)
61 | if err != nil {
62 | return 0, err
63 | }
64 |
65 | d.cache[table].columns[column] = scale
66 |
67 | return scale, nil
68 | }
69 |
70 | func (d *DbInfo) numericScaleFromDb(ctx context.Context, table string, column string) (int, error) {
71 | // Query to get the column type and numeric scale
72 | query := `
73 | SELECT
74 | data_type,
75 | numeric_scale
76 | FROM
77 | information_schema.columns
78 | WHERE
79 | table_name = $1
80 | AND column_name = $2
81 | `
82 |
83 | var dataType string
84 | var numericScale *int
85 |
86 | err := d.conn.QueryRow(ctx, query, table, column).Scan(&dataType, &numericScale)
87 | if err != nil {
88 | if errors.Is(err, pgx.ErrNoRows) {
89 | return 0, fmt.Errorf("column %s not found in table %s", column, table)
90 | }
91 | return 0, fmt.Errorf("error querying column info: %w", err)
92 | }
93 |
94 | // Check if the column is of the numeric/decimal type
95 | if dataType != "numeric" && dataType != "decimal" {
96 | return 0, fmt.Errorf("column %s in table %s is not a numeric type (actual type: %s)", column, table, dataType)
97 | }
98 |
99 | // Handle case where numeric_scale is NULL
100 | if numericScale == nil {
101 | return 0, nil // The default scale is 0 when not specified
102 | }
103 |
104 | return *numericScale, nil
105 | }
106 |
--------------------------------------------------------------------------------
/internal/utils.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2025 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "strconv"
19 | )
20 |
21 | // WrapSQLIdent is used to wrap PostgreSQL identifier under quotes.
22 | // It allows to use uppercase letters and special characters (like `-`) in the
23 | // names of identifiers
24 | var WrapSQLIdent = strconv.Quote
25 |
--------------------------------------------------------------------------------
/internal/utils_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2025 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "context"
19 | "fmt"
20 | "strings"
21 | "testing"
22 |
23 | "github.com/conduitio/conduit-connector-postgres/test"
24 | "github.com/matryer/is"
25 | )
26 |
27 | func TestSQLIdentWrapping(t *testing.T) {
28 | is := is.New(t)
29 | ctx := test.Context(t)
30 | conn := test.ConnectSimple(ctx, t, test.RegularConnString)
31 |
32 | cases := []struct {
33 | ident string
34 | testName string
35 | expectError bool
36 | }{
37 | {"just_a_name", "common case", false},
38 | {"слон", "unicode chars", false},
39 | {"test table", "spaces", false},
40 | {"TEST_table", "uppercase letters", false},
41 | {`'test_table'`, "single quotes", false},
42 | {"tes`t_table", "apostrophe", false},
43 | {`te"st_table`, "double quotes", true},
44 | }
45 |
46 | for _, c := range cases {
47 | t.Run(c.testName, func(t *testing.T) {
48 | w := WrapSQLIdent(c.ident)
49 |
50 | t.Cleanup(func() {
51 | if c.expectError {
52 | return
53 | }
54 |
55 | query := fmt.Sprintf("DROP TABLE %s", w)
56 | _, err := conn.Exec(context.Background(), query)
57 | is.NoErr(err)
58 | })
59 |
60 | query := fmt.Sprintf("CREATE TABLE %s (%s int)", w, w)
61 | _, err := conn.Exec(context.Background(), query)
62 |
63 | if c.expectError {
64 | is.True(err != nil)
65 | is.True(strings.Contains(err.Error(), `(SQLSTATE 42601)`)) // syntax error
66 | } else {
67 | is.NoErr(err)
68 | }
69 | })
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/scripts/bump_version.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Copyright © 2025 Meroxa, Inc.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # Get the directory where the script is located
19 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
20 |
21 | source "${SCRIPT_DIR}/common.sh"
22 |
23 | TAG=$1
24 |
25 | if ! check_semver "$TAG"; then
26 | echo "$TAG is NOT a valid semver string"
27 | exit 1
28 | fi
29 |
30 | # Check if yq is installed
31 | if ! command -v yq &> /dev/null; then
32 | echo "Error: yq is not installed. Please install it and try again."
33 | exit 1
34 | fi
35 |
36 | V_TAG="v$TAG"
37 |
38 | BRANCH=$(git rev-parse --abbrev-ref HEAD)
39 | CURRENT_TAG=$(get_spec_version connector.yaml)
40 | MSG="You are about to bump the version from ${CURRENT_TAG} to ${V_TAG} on branch '${BRANCH}'.\n"
41 | while true; do
42 | printf "${MSG}"
43 | read -p "Are you sure you want to continue? [y/n]" yn
44 | echo
45 | case $yn in
46 | [Yy]* )
47 | BRANCH_NAME="update-version-$V_TAG"
48 | git checkout -b "$BRANCH_NAME"
49 | yq e ".specification.version = \"${V_TAG}\"" -i connector.yaml
50 | git commit -am "Update version to $V_TAG"
51 | git push origin "$BRANCH_NAME"
52 |
53 | # Check if gh is installed
54 | if command -v gh &> /dev/null; then
55 | echo "Creating pull request..."
56 | gh pr create \
57 | --base main \
58 | --title "Update version to $V_TAG" \
59 | --body "Automated version update to $V_TAG" \
60 | --head "$BRANCH_NAME"
61 | else
62 | echo "GitHub CLI (gh) is not installed. To create a PR, please install gh or create it manually."
63 | echo "Branch '$BRANCH_NAME' has been pushed to origin."
64 | fi
65 |
66 | echo "Once the change has been merged, you can use scripts/tag.sh to push a new tag."
67 | break;;
68 | [Nn]* ) exit;;
69 | * ) echo "Please answer yes or no.";;
70 | esac
71 | done
72 |
--------------------------------------------------------------------------------
/scripts/common.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright © 2025 Meroxa, Inc.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | check_semver() {
18 | local version=$1
19 | local SV_REGEX="^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?$"
20 |
21 | if ! [[ $version =~ $SV_REGEX ]]; then
22 | echo "$version is NOT a valid semver string"
23 | return 1
24 | fi
25 | return 0
26 | }
27 |
28 | get_spec_version() {
29 | local yaml_file=$1
30 |
31 | if command -v yq &> /dev/null; then
32 | yq '.specification.version' "$yaml_file"
33 | else
34 | sed -n '/specification:/,/version:/ s/.*version: //p' "$yaml_file" | tail -1
35 | fi
36 | }
37 |
--------------------------------------------------------------------------------
/scripts/tag.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright © 2025 Meroxa, Inc.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Get the directory where the script is located
18 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
19 |
20 | source "${SCRIPT_DIR}/common.sh"
21 |
22 | HAS_UNCOMMITTED=$(git status --porcelain=v1 2>/dev/null | wc -l | awk '{print $1}')
23 | if (( $HAS_UNCOMMITTED != 0 )); then
24 | echo "You have uncommitted changes, cannot tag."
25 | exit 1
26 | fi
27 |
28 | LAST_COMMIT=$(git log -1 --oneline)
29 | BRANCH=$(git rev-parse --abbrev-ref HEAD)
30 | CURRENT_TAG=$(git describe --tags --abbrev=0)
31 | V_TAG=$(get_spec_version connector.yaml)
32 | MSG="You are about to bump the version from ${CURRENT_TAG} to ${V_TAG}.
33 | Current commit is '${LAST_COMMIT}' on branch '${BRANCH}'.
34 | The release process is automatic and quick, so if you make a mistake,
35 | everyone will see it very soon."
36 |
37 | while true; do
38 | printf "${MSG}"
39 | read -p "Are you sure you want to continue? [y/n]" yn
40 | echo
41 | case $yn in
42 | [Yy]* )
43 | git tag -a $V_TAG -m "Release: $V_TAG"
44 | git push origin $V_TAG
45 | break;;
46 | [Nn]* ) exit;;
47 | * ) echo "Please answer yes or no.";;
48 | esac
49 | done
50 |
--------------------------------------------------------------------------------
/source.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package postgres
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 | "time"
22 |
23 | "github.com/conduitio/conduit-commons/config"
24 | "github.com/conduitio/conduit-commons/csync"
25 | "github.com/conduitio/conduit-commons/lang"
26 | "github.com/conduitio/conduit-commons/opencdc"
27 | "github.com/conduitio/conduit-connector-postgres/internal"
28 | "github.com/conduitio/conduit-connector-postgres/source"
29 | "github.com/conduitio/conduit-connector-postgres/source/cpool"
30 | "github.com/conduitio/conduit-connector-postgres/source/logrepl"
31 | sdk "github.com/conduitio/conduit-connector-sdk"
32 | "github.com/jackc/pgx/v5"
33 | "github.com/jackc/pgx/v5/pgxpool"
34 | )
35 |
36 | // Source is a Postgres source plugin.
37 | type Source struct {
38 | sdk.UnimplementedSource
39 |
40 | iterator source.Iterator
41 | config source.Config
42 | pool *pgxpool.Pool
43 | tableKeys map[string]string
44 | }
45 |
46 | func (s *Source) Config() sdk.SourceConfig {
47 | return &s.config
48 | }
49 |
50 | func NewSource() sdk.Source {
51 | return sdk.SourceWithMiddleware(
52 | &Source{
53 | tableKeys: make(map[string]string),
54 | config: source.Config{
55 | DefaultSourceMiddleware: sdk.DefaultSourceMiddleware{
56 | // disable schema extraction by default, postgres will build its own schema
57 | SourceWithSchemaExtraction: sdk.SourceWithSchemaExtraction{
58 | PayloadEnabled: lang.Ptr(false),
59 | KeyEnabled: lang.Ptr(false),
60 | },
61 | },
62 | },
63 | },
64 | )
65 | }
66 |
67 | func (s *Source) Open(ctx context.Context, pos opencdc.Position) error {
68 | pool, err := cpool.New(ctx, s.config.URL)
69 | if err != nil {
70 | return fmt.Errorf("failed to create a connection pool to database: %w", err)
71 | }
72 | s.pool = pool
73 |
74 | logger := sdk.Logger(ctx)
75 | if s.readingAllTables() {
76 | logger.Info().Msg("Detecting all tables...")
77 | s.config.Tables, err = s.getAllTables(ctx)
78 | if err != nil {
79 | return fmt.Errorf("failed to connect to get all tables: %w", err)
80 | }
81 | logger.Info().
82 | Strs("tables", s.config.Tables).
83 | Int("count", len(s.config.Tables)).
84 | Msg("Successfully detected tables")
85 | }
86 |
87 | // ensure we have keys for all tables
88 | for _, tableName := range s.config.Tables {
89 | s.tableKeys[tableName], err = s.getPrimaryKey(ctx, tableName)
90 | if err != nil {
91 | return fmt.Errorf("failed to find primary key for table %s: %w", tableName, err)
92 | }
93 | }
94 |
95 | switch s.config.CDCMode {
96 | case source.CDCModeAuto:
97 | // TODO add logic that checks if the DB supports logical replication
98 | // (since that's the only thing we support at the moment)
99 | fallthrough
100 | case source.CDCModeLogrepl:
101 | i, err := logrepl.NewCombinedIterator(ctx, s.pool, logrepl.Config{
102 | Position: pos,
103 | SlotName: s.config.LogreplSlotName,
104 | PublicationName: s.config.LogreplPublicationName,
105 | Tables: s.config.Tables,
106 | TableKeys: s.tableKeys,
107 | WithSnapshot: s.config.SnapshotMode == source.SnapshotModeInitial,
108 | WithAvroSchema: s.config.WithAvroSchema,
109 | BatchSize: *s.config.BatchSize,
110 | })
111 | if err != nil {
112 | return fmt.Errorf("failed to create logical replication iterator: %w", err)
113 | }
114 | s.iterator = i
115 | default:
116 | // shouldn't happen, config was validated
117 | return fmt.Errorf("unsupported CDC mode %q", s.config.CDCMode)
118 | }
119 | return nil
120 | }
121 |
122 | func (s *Source) ReadN(ctx context.Context, n int) ([]opencdc.Record, error) {
123 | return s.iterator.NextN(ctx, n)
124 | }
125 |
126 | func (s *Source) Ack(ctx context.Context, pos opencdc.Position) error {
127 | return s.iterator.Ack(ctx, pos)
128 | }
129 |
130 | func (s *Source) Teardown(ctx context.Context) error {
131 | logger := sdk.Logger(ctx)
132 |
133 | var errs []error
134 | if s.iterator != nil {
135 | logger.Debug().Msg("Tearing down iterator...")
136 | if err := s.iterator.Teardown(ctx); err != nil {
137 | logger.Warn().Err(err).Msg("Failed to tear down iterator")
138 | errs = append(errs, fmt.Errorf("failed to tear down iterator: %w", err))
139 | }
140 | }
141 | if s.pool != nil {
142 | logger.Debug().Msg("Closing connection pool...")
143 | err := csync.RunTimeout(ctx, s.pool.Close, time.Minute)
144 | if err != nil {
145 | errs = append(errs, fmt.Errorf("failed to close DB connection pool: %w", err))
146 | }
147 | }
148 | return errors.Join(errs...)
149 | }
150 |
151 | func (s *Source) LifecycleOnDeleted(ctx context.Context, cfg config.Config) error {
152 | var oldConfig source.Config
153 | err := sdk.Util.ParseConfig(ctx, cfg, &oldConfig, Connector.NewSpecification().SourceParams)
154 | if err != nil {
155 | return fmt.Errorf("lifecycle delete event: failed to parse configuration: %w", err)
156 | }
157 |
158 | switch oldConfig.CDCMode {
159 | case source.CDCModeAuto:
160 | fallthrough // TODO: Adjust as `auto` changes.
161 | case source.CDCModeLogrepl:
162 | if !oldConfig.LogreplAutoCleanup {
163 | sdk.Logger(ctx).Warn().Msg("Skipping logrepl auto cleanup")
164 | return nil
165 | }
166 |
167 | return logrepl.Cleanup(ctx, logrepl.CleanupConfig{
168 | URL: oldConfig.URL,
169 | SlotName: oldConfig.LogreplSlotName,
170 | PublicationName: oldConfig.LogreplPublicationName,
171 | })
172 | default:
173 | sdk.Logger(ctx).Warn().Msgf("cannot handle CDC mode %q", oldConfig.CDCMode)
174 | return nil
175 | }
176 | }
177 |
178 | func (s *Source) readingAllTables() bool {
179 | return len(s.config.Tables) == 1 && s.config.Tables[0] == source.AllTablesWildcard
180 | }
181 |
182 | func (s *Source) getAllTables(ctx context.Context) ([]string, error) {
183 | query := "SELECT tablename FROM pg_tables WHERE schemaname = 'public'"
184 |
185 | rows, err := s.pool.Query(ctx, query)
186 | if err != nil {
187 | return nil, err
188 | }
189 | defer rows.Close()
190 |
191 | var tables []string
192 | for rows.Next() {
193 | var tableName string
194 | if err := rows.Scan(&tableName); err != nil {
195 | return nil, fmt.Errorf("failed to scan table name: %w", err)
196 | }
197 | tables = append(tables, tableName)
198 | }
199 | if err := rows.Err(); err != nil {
200 | return nil, fmt.Errorf("rows error: %w", err)
201 | }
202 | return tables, nil
203 | }
204 |
205 | // getPrimaryKey queries the db for the name of the primary key column for a
206 | // table if one exists and returns it.
207 | func (s *Source) getPrimaryKey(ctx context.Context, tableName string) (string, error) {
208 | query := `SELECT a.attname FROM pg_index i
209 | JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
210 | WHERE i.indrelid = $1::regclass AND i.indisprimary`
211 |
212 | rows, err := s.pool.Query(ctx, query, internal.WrapSQLIdent(tableName))
213 | if err != nil {
214 | return "", fmt.Errorf("failed to query table keys: %w", err)
215 | }
216 | defer rows.Close()
217 |
218 | if !rows.Next() {
219 | if rows.Err() != nil {
220 | return "", fmt.Errorf("query failed: %w", rows.Err())
221 | }
222 | return "", fmt.Errorf("no table keys found: %w", pgx.ErrNoRows)
223 | }
224 |
225 | var colName string
226 | err = rows.Scan(&colName)
227 | if err != nil {
228 | return "", fmt.Errorf("failed to scan row: %w", err)
229 | }
230 |
231 | if rows.Next() {
232 | // we only support single column primary keys for now
233 | return "", errors.New("composite keys are not supported")
234 | }
235 |
236 | return colName, nil
237 | }
238 |
--------------------------------------------------------------------------------
/source/config.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package source
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 |
22 | sdk "github.com/conduitio/conduit-connector-sdk"
23 | "github.com/jackc/pgx/v5"
24 | )
25 |
26 | type SnapshotMode string
27 |
28 | const (
29 | // SnapshotModeInitial creates a snapshot in the first run of the pipeline.
30 | SnapshotModeInitial SnapshotMode = "initial"
31 | // SnapshotModeNever skips snapshot creation altogether.
32 | SnapshotModeNever SnapshotMode = "never"
33 | )
34 |
35 | type CDCMode string
36 |
37 | const (
38 | // CDCModeAuto tries to set up logical replication and falls back to long
39 | // polling if that is impossible.
40 | CDCModeAuto CDCMode = "auto"
41 | // CDCModeLogrepl uses logical replication to listen to changes.
42 | CDCModeLogrepl CDCMode = "logrepl"
43 |
44 | // AllTablesWildcard can be used if you'd like to listen to all tables.
45 | AllTablesWildcard = "*"
46 | )
47 |
48 | type Config struct {
49 | sdk.DefaultSourceMiddleware
50 |
51 | // URL is the connection string for the Postgres database.
52 | URL string `json:"url" validate:"required"`
53 |
54 | // Tables is a List of table names to read from, separated by a comma, e.g.:"table1,table2".
55 | // Use "*" if you'd like to listen to all tables.
56 | Tables []string `json:"tables" validate:"required"`
57 |
58 | // SnapshotMode is whether the plugin will take a snapshot of the entire table before starting cdc mode.
59 | SnapshotMode SnapshotMode `json:"snapshotMode" validate:"inclusion=initial|never" default:"initial"`
60 |
61 | // Snapshot fetcher size determines the number of rows to retrieve at a time.
62 | SnapshotFetchSize int `json:"snapshot.fetchSize" default:"50000"`
63 |
64 | // CDCMode determines how the connector should listen to changes.
65 | CDCMode CDCMode `json:"cdcMode" validate:"inclusion=auto|logrepl" default:"auto"`
66 |
67 | // LogreplPublicationName determines the publication name in case the
68 | // connector uses logical replication to listen to changes (see CDCMode).
69 | LogreplPublicationName string `json:"logrepl.publicationName" default:"conduitpub"`
70 | // LogreplSlotName determines the replication slot name in case the
71 | // connector uses logical replication to listen to changes (see CDCMode).
72 | // Can only contain lower-case letters, numbers, and the underscore character.
73 | LogreplSlotName string `json:"logrepl.slotName" validate:"regex=^[a-z0-9_]+$" default:"conduitslot"`
74 |
75 | // LogreplAutoCleanup determines if the replication slot and publication should be
76 | // removed when the connector is deleted.
77 | LogreplAutoCleanup bool `json:"logrepl.autoCleanup" default:"true"`
78 |
79 | // WithAvroSchema determines whether the connector should attach an avro schema on each
80 | // record.
81 | WithAvroSchema bool `json:"logrepl.withAvroSchema" default:"true"`
82 | }
83 |
84 | // Validate validates the provided config values.
85 | func (c *Config) Validate(ctx context.Context) error {
86 | var errs []error
87 | if _, err := pgx.ParseConfig(c.URL); err != nil {
88 | errs = append(errs, fmt.Errorf("invalid url: %w", err))
89 | }
90 |
91 | err := c.DefaultSourceMiddleware.Validate(ctx)
92 | if err != nil {
93 | errs = append(errs, err)
94 | }
95 |
96 | return errors.Join(errs...)
97 | }
98 |
--------------------------------------------------------------------------------
/source/config_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2023 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package source
16 |
17 | import (
18 | "context"
19 | "testing"
20 |
21 | "github.com/matryer/is"
22 | )
23 |
24 | func TestConfig_Validate(t *testing.T) {
25 | testCases := []struct {
26 | name string
27 | cfg Config
28 | wantErr bool
29 | }{
30 | {
31 | name: "valid config",
32 | cfg: Config{
33 | URL: "postgresql://meroxauser:meroxapass@127.0.0.1:5432/meroxadb",
34 | Tables: []string{"table1", "table2"},
35 | CDCMode: CDCModeLogrepl,
36 | },
37 | wantErr: false,
38 | }, {
39 | name: "invalid postgres url",
40 | cfg: Config{
41 | URL: "postgresql",
42 | Tables: []string{"table1", "table2"},
43 | CDCMode: CDCModeLogrepl,
44 | },
45 | wantErr: true,
46 | },
47 | }
48 | for _, tc := range testCases {
49 | t.Run(tc.name, func(t *testing.T) {
50 | is := is.New(t)
51 | err := tc.cfg.Validate(context.Background())
52 | if tc.wantErr {
53 | is.True(err != nil)
54 | return
55 | }
56 | is.True(err == nil)
57 | })
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/source/cpool/cpool.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package cpool
16 |
17 | import (
18 | "context"
19 | "encoding/json"
20 | "fmt"
21 |
22 | "github.com/jackc/pgx/v5"
23 | "github.com/jackc/pgx/v5/pgtype"
24 | "github.com/jackc/pgx/v5/pgxpool"
25 | )
26 |
27 | type replicationCtxKey struct{}
28 |
29 | func WithReplication(ctx context.Context) context.Context {
30 | return context.WithValue(ctx, replicationCtxKey{}, true)
31 | }
32 |
33 | // New returns new pgxpool.Pool with added hooks.
34 | func New(ctx context.Context, conninfo string) (*pgxpool.Pool, error) {
35 | config, err := pgxpool.ParseConfig(conninfo)
36 | if err != nil {
37 | return nil, fmt.Errorf("failed to parse pool config: %w", err)
38 | }
39 |
40 | config.BeforeAcquire = beforeAcquireHook
41 | config.BeforeConnect = beforeConnectHook
42 | config.AfterConnect = afterConnectHook
43 | config.AfterRelease = afterReleaseHook
44 |
45 | pool, err := pgxpool.NewWithConfig(ctx, config)
46 | if err != nil {
47 | return nil, err
48 | }
49 |
50 | return pool, nil
51 | }
52 |
53 | func afterConnectHook(_ context.Context, conn *pgx.Conn) error {
54 | // Override the JSON and JSONB codec to return bytes rather than the
55 | // unmarshalled representation of map.
56 | conn.TypeMap().RegisterType(&pgtype.Type{
57 | Name: "json",
58 | OID: pgtype.JSONOID,
59 | Codec: &pgtype.JSONCodec{Marshal: json.Marshal, Unmarshal: jsonNoopUnmarshal},
60 | })
61 | conn.TypeMap().RegisterType(&pgtype.Type{
62 | Name: "jsonb",
63 | OID: pgtype.JSONBOID,
64 | Codec: &pgtype.JSONBCodec{Marshal: json.Marshal, Unmarshal: jsonNoopUnmarshal},
65 | })
66 |
67 | return nil
68 | }
69 |
70 | // beforeAcquireHook ensures purpose specific connections are returned:
71 | // * If a replication connection is requested, ensure the connection has replication enabled.
72 | // * If a regular connection is requested, return non-replication connections.
73 | func beforeAcquireHook(ctx context.Context, conn *pgx.Conn) bool {
74 | replReq := ctx.Value(replicationCtxKey{}) != nil
75 | replOn := conn.Config().RuntimeParams["replication"] != ""
76 |
77 | return replReq == replOn
78 | }
79 |
80 | // beforeConnectHook customizes the configuration of the new connection.
81 | func beforeConnectHook(ctx context.Context, config *pgx.ConnConfig) error {
82 | if config.RuntimeParams["application_name"] == "" {
83 | config.RuntimeParams["application_name"] = "conduit-connector-postgres"
84 | }
85 |
86 | if v := ctx.Value(replicationCtxKey{}); v != nil {
87 | config.RuntimeParams["replication"] = "database"
88 | }
89 |
90 | return nil
91 | }
92 |
93 | // afterReleaseHook marks all replication connections for disposal.
94 | func afterReleaseHook(conn *pgx.Conn) bool {
95 | return conn.Config().RuntimeParams["replication"] == ""
96 | }
97 |
--------------------------------------------------------------------------------
/source/cpool/json.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package cpool
16 |
17 | import (
18 | "encoding/json"
19 | "reflect"
20 | )
21 |
22 | // noopUnmarshal will copy source into dst.
23 | // this is to be used with the pgtype JSON codec
24 | func jsonNoopUnmarshal(src []byte, dst any) error {
25 | dstptr, ok := (dst.(*any))
26 | if dst == nil || !ok {
27 | return &json.InvalidUnmarshalError{Type: reflect.TypeOf(dst)}
28 | }
29 |
30 | v := make([]byte, len(src))
31 | copy(v, src)
32 |
33 | // set the slice to the value of the ptr.
34 | *dstptr = v
35 |
36 | return nil
37 | }
38 |
--------------------------------------------------------------------------------
/source/cpool/json_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package cpool
16 |
17 | import (
18 | "testing"
19 |
20 | "github.com/matryer/is"
21 | )
22 |
23 | func Test_jsonNoopUnmarshal(t *testing.T) {
24 | is := is.New(t)
25 |
26 | var dst any
27 | data := []byte(`{"foo":"bar"}`)
28 |
29 | is.NoErr(jsonNoopUnmarshal(data, &dst))
30 | is.Equal(data, dst.([]byte))
31 |
32 | var err error
33 |
34 | err = jsonNoopUnmarshal(data, dst)
35 | is.True(err != nil)
36 | if err != nil {
37 | is.Equal(err.Error(), "json: Unmarshal(non-pointer []uint8)")
38 | }
39 |
40 | err = jsonNoopUnmarshal(data, nil)
41 | is.True(err != nil)
42 | if err != nil {
43 | is.Equal(err.Error(), "json: Unmarshal(nil)")
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/source/iterator.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package source
16 |
17 | import (
18 | "context"
19 |
20 | "github.com/conduitio/conduit-commons/opencdc"
21 | "github.com/conduitio/conduit-connector-postgres/source/logrepl"
22 | )
23 |
24 | // Iterator is an object that can iterate over a queue of records.
25 | type Iterator interface {
26 | // NextN takes and returns up to n records from the queue. NextN is allowed to
27 | // block until either at least one record is available or the context gets canceled.
28 | NextN(context.Context, int) ([]opencdc.Record, error)
29 | // Ack signals that a record at a specific position was successfully
30 | // processed.
31 | Ack(context.Context, opencdc.Position) error
32 | // Teardown attempts to gracefully teardown the iterator.
33 | Teardown(context.Context) error
34 | }
35 |
36 | var _ Iterator = (*logrepl.CDCIterator)(nil)
37 |
--------------------------------------------------------------------------------
/source/logrepl/cdc.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package logrepl
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 | "time"
22 |
23 | "github.com/conduitio/conduit-commons/opencdc"
24 | "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal"
25 | "github.com/conduitio/conduit-connector-postgres/source/position"
26 | sdk "github.com/conduitio/conduit-connector-sdk"
27 | "github.com/jackc/pglogrepl"
28 | "github.com/jackc/pgx/v5/pgxpool"
29 | )
30 |
31 | // CDCConfig holds configuration values for CDCIterator.
32 | type CDCConfig struct {
33 | LSN pglogrepl.LSN
34 | SlotName string
35 | PublicationName string
36 | Tables []string
37 | TableKeys map[string]string
38 | WithAvroSchema bool
39 | // BatchSize is the maximum size of a batch that will be read from the DB
40 | // in one go and processed by the CDCHandler.
41 | BatchSize int
42 | }
43 |
44 | // CDCIterator asynchronously listens for events from the logical replication
45 | // slot and returns them to the caller through NextN.
46 | type CDCIterator struct {
47 | config CDCConfig
48 | sub *internal.Subscription
49 |
50 | // batchesCh is a channel shared between this iterator and a CDCHandler,
51 | // to which the CDCHandler is sending batches of records.
52 | // Using a shared queue here would be the fastest option. However,
53 | // we also need to watch for a context that can get cancelled,
54 | // and for the subscription that can end, so using a channel is
55 | // the best option at the moment.
56 | batchesCh chan []opencdc.Record
57 |
58 | // recordsForNextRead contains records from the previous batch (returned by the CDCHandler),
59 | // that weren't return by this iterator's ReadN method.
60 | recordsForNextRead []opencdc.Record
61 | }
62 |
63 | // NewCDCIterator initializes logical replication by creating the publication and subscription manager.
64 | func NewCDCIterator(ctx context.Context, pool *pgxpool.Pool, c CDCConfig) (*CDCIterator, error) {
65 | if err := internal.CreatePublication(
66 | ctx,
67 | pool,
68 | c.PublicationName,
69 | internal.CreatePublicationOptions{Tables: c.Tables},
70 | ); err != nil {
71 | // If creating the publication fails with code 42710, this means
72 | // the publication already exists.
73 | if !internal.IsPgDuplicateErr(err) {
74 | return nil, err
75 | }
76 |
77 | sdk.Logger(ctx).Warn().
78 | Msgf("Publication %q already exists.", c.PublicationName)
79 | }
80 |
81 | // Using a buffered channel here so that the handler can send a batch
82 | // to the channel and start building a new batch.
83 | // This is useful when the first batch in the channel didn't reach BatchSize (which is sdk.batch.size).
84 | // The handler can prepare the next batch, and the CDCIterator can use them
85 | // to return the maximum number of records.
86 | batchesCh := make(chan []opencdc.Record, 1)
87 | handler := NewCDCHandler(
88 | ctx,
89 | internal.NewRelationSet(),
90 | c.TableKeys,
91 | batchesCh,
92 | c.WithAvroSchema,
93 | c.BatchSize,
94 | // todo make configurable
95 | time.Second,
96 | )
97 |
98 | sub, err := internal.CreateSubscription(
99 | ctx,
100 | pool,
101 | c.SlotName,
102 | c.PublicationName,
103 | c.Tables,
104 | c.LSN,
105 | handler.Handle,
106 | )
107 | if err != nil {
108 | return nil, fmt.Errorf("failed to initialize subscription: %w", err)
109 | }
110 |
111 | return &CDCIterator{
112 | config: c,
113 | batchesCh: batchesCh,
114 | sub: sub,
115 | }, nil
116 | }
117 |
118 | // StartSubscriber starts the logical replication service in the background.
119 | // Blocks until the subscription becomes ready.
120 | func (i *CDCIterator) StartSubscriber(ctx context.Context) error {
121 | sdk.Logger(ctx).Info().
122 | Str("slot", i.config.SlotName).
123 | Str("publication", i.config.PublicationName).
124 | Msgf("Starting logical replication at %s", i.sub.StartLSN)
125 |
126 | go func() {
127 | if err := i.sub.Run(ctx); err != nil {
128 | sdk.Logger(ctx).Error().
129 | Err(err).
130 | Msg("replication exited with an error")
131 | }
132 | }()
133 |
134 | <-i.sub.Ready()
135 |
136 | sdk.Logger(ctx).Info().
137 | Str("slot", i.config.SlotName).
138 | Str("publication", i.config.PublicationName).
139 | Msg("Logical replication started")
140 |
141 | return nil
142 | }
143 |
144 | // NextN returns up to n records from the internal channel with records.
145 | // NextN is allowed to block until either at least one record is available
146 | // or the context gets canceled.
147 | func (i *CDCIterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error) {
148 | if !i.subscriberReady() {
149 | return nil, errors.New("logical replication has not been started")
150 | }
151 |
152 | if n <= 0 {
153 | return nil, fmt.Errorf("n must be greater than 0, got %d", n)
154 | }
155 |
156 | // First, we check if there are any records from the previous batch
157 | // that we can start with.
158 | recs := make([]opencdc.Record, len(i.recordsForNextRead), n)
159 | copy(recs, i.recordsForNextRead)
160 | i.recordsForNextRead = nil
161 |
162 | // NextN needs to wait until at least 1 record is available.
163 | if len(recs) == 0 {
164 | batch, err := i.nextRecordsBatchBlocking(ctx)
165 | if err != nil {
166 | return nil, fmt.Errorf("failed to fetch next batch of records (blocking): %w", err)
167 | }
168 | recs = batch
169 | }
170 |
171 | // We add any already available batches (i.e., we're not blocking waiting for any new batches to arrive)
172 | // to return at most n records.
173 | for len(recs) < n {
174 | batch, err := i.nextRecordsBatch(ctx)
175 | if err != nil {
176 | return nil, fmt.Errorf("failed to fetch next batch of records: %w", err)
177 | }
178 | if batch == nil {
179 | break
180 | }
181 | recs = i.appendRecordsWithLimit(recs, batch, n)
182 | }
183 |
184 | sdk.Logger(ctx).Trace().
185 | Int("records", len(recs)).
186 | Int("records_for_next_read", len(i.recordsForNextRead)).
187 | Msg("CDCIterator.NextN returning records")
188 | return recs, nil
189 | }
190 |
191 | // nextRecordsBatchBlocking waits for the next batch of records to arrive,
192 | // or for the context to be done, or for the subscription to be done,
193 | // whichever comes first.
194 | func (i *CDCIterator) nextRecordsBatchBlocking(ctx context.Context) ([]opencdc.Record, error) {
195 | select {
196 | case <-ctx.Done():
197 | return nil, ctx.Err()
198 | case <-i.sub.Done():
199 | if err := i.sub.Err(); err != nil {
200 | return nil, fmt.Errorf("logical replication error: %w", err)
201 | }
202 | if err := ctx.Err(); err != nil {
203 | // subscription is done because the context is canceled, we went
204 | // into the wrong case by chance
205 | return nil, err
206 | }
207 | // subscription stopped without an error and the context is still
208 | // open, this is a strange case, shouldn't actually happen
209 | return nil, fmt.Errorf("subscription stopped, no more data to fetch (this smells like a bug)")
210 | case batch := <-i.batchesCh:
211 | sdk.Logger(ctx).Trace().
212 | Int("records", len(batch)).
213 | Msg("CDCIterator.NextN received batch of records (blocking)")
214 | return batch, nil
215 | }
216 | }
217 |
218 | func (i *CDCIterator) nextRecordsBatch(ctx context.Context) ([]opencdc.Record, error) {
219 | select {
220 | case <-ctx.Done():
221 | // Return what we have with the error
222 | return nil, ctx.Err()
223 | case <-i.sub.Done():
224 | if err := i.sub.Err(); err != nil {
225 | return nil, fmt.Errorf("logical replication error: %w", err)
226 | }
227 | if err := ctx.Err(); err != nil {
228 | // Return what we have with the context error
229 | return nil, err
230 | }
231 | // Return what we have with subscription stopped error
232 | return nil, fmt.Errorf("subscription stopped, no more data to fetch (this smells like a bug)")
233 | case batch := <-i.batchesCh:
234 | sdk.Logger(ctx).Trace().
235 | Int("records", len(batch)).
236 | Msg("CDCIterator.NextN received batch of records")
237 |
238 | return batch, nil
239 | default:
240 | // No more records currently available
241 | return nil, nil
242 | }
243 | }
244 |
245 | // appendRecordsWithLimit appends records to dst from src, until the given limit is reached,
246 | // or all records from src have been moved.
247 | // If some records from src are not moved (probably because they lack emotions),
248 | // they are saved to recordsForNextRead.
249 | func (i *CDCIterator) appendRecordsWithLimit(dst []opencdc.Record, src []opencdc.Record, limit int) []opencdc.Record {
250 | if len(src) == 0 || len(dst) > limit {
251 | return src
252 | }
253 |
254 | needed := limit - len(dst)
255 | if needed > len(src) {
256 | needed = len(src)
257 | }
258 |
259 | dst = append(dst, src[:needed]...)
260 | i.recordsForNextRead = src[needed:]
261 |
262 | return dst
263 | }
264 |
265 | // Ack forwards the acknowledgment to the subscription.
266 | func (i *CDCIterator) Ack(_ context.Context, sdkPos opencdc.Position) error {
267 | pos, err := position.ParseSDKPosition(sdkPos)
268 | if err != nil {
269 | return err
270 | }
271 |
272 | if pos.Type != position.TypeCDC {
273 | return fmt.Errorf("invalid type %q for CDC position", pos.Type.String())
274 | }
275 |
276 | lsn, err := pos.LSN()
277 | if err != nil {
278 | return err
279 | }
280 |
281 | if lsn == 0 {
282 | return fmt.Errorf("cannot ack zero position")
283 | }
284 |
285 | i.sub.Ack(lsn)
286 | return nil
287 | }
288 |
289 | // Teardown stops the CDC subscription and blocks until the subscription is done
290 | // or the context gets canceled. If the subscription stopped with an unexpected
291 | // error, the error is returned.
292 | func (i *CDCIterator) Teardown(ctx context.Context) error {
293 | if i.sub != nil {
294 | return i.sub.Teardown(ctx)
295 | }
296 |
297 | return nil
298 | }
299 |
300 | // subscriberReady returns true when the subscriber is running.
301 | func (i *CDCIterator) subscriberReady() bool {
302 | select {
303 | case <-i.sub.Ready():
304 | return true
305 | default:
306 | return false
307 | }
308 | }
309 |
310 | // TXSnapshotID returns the transaction snapshot which is received
311 | // when the replication slot is created. The value can be empty, when the
312 | // iterator is resuming.
313 | func (i *CDCIterator) TXSnapshotID() string {
314 | return i.sub.TXSnapshotID
315 | }
316 |
--------------------------------------------------------------------------------
/source/logrepl/cleaner.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package logrepl
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 |
22 | "github.com/conduitio/conduit-connector-postgres/source/cpool"
23 | "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal"
24 | sdk "github.com/conduitio/conduit-connector-sdk"
25 | )
26 |
27 | type CleanupConfig struct {
28 | URL string
29 | SlotName string
30 | PublicationName string
31 | }
32 |
33 | // Cleanup drops the provided replication slot and publication.
34 | // It will terminate any backends consuming the replication slot before deletion.
35 | func Cleanup(ctx context.Context, c CleanupConfig) error {
36 | logger := sdk.Logger(ctx)
37 |
38 | pool, err := cpool.New(ctx, c.URL)
39 | if err != nil {
40 | return fmt.Errorf("failed to connect to database: %w", err)
41 | }
42 | defer pool.Close()
43 |
44 | var errs []error
45 |
46 | logger.Debug().
47 | Str("slot", c.SlotName).
48 | Str("publication", c.PublicationName).
49 | Msg("removing replication slot and publication")
50 |
51 | if c.SlotName != "" {
52 | // Terminate any outstanding backends which are consuming the slot before deleting it.
53 | if _, err := pool.Exec(
54 | ctx,
55 | "SELECT pg_terminate_backend(active_pid) FROM pg_replication_slots WHERE slot_name=$1 AND active=true", c.SlotName,
56 | ); err != nil {
57 | errs = append(errs, fmt.Errorf("failed to terminate active backends on slot: %w", err))
58 | }
59 |
60 | if _, err := pool.Exec(
61 | ctx,
62 | "SELECT pg_drop_replication_slot($1)", c.SlotName,
63 | ); err != nil {
64 | errs = append(errs, fmt.Errorf("failed to clean up replication slot %q: %w", c.SlotName, err))
65 | }
66 | } else {
67 | logger.Warn().Msg("cleanup: skipping replication slot cleanup, name is empty")
68 | }
69 |
70 | if c.PublicationName != "" {
71 | if err := internal.DropPublication(
72 | ctx,
73 | pool,
74 | c.PublicationName,
75 | internal.DropPublicationOptions{IfExists: true},
76 | ); err != nil {
77 | errs = append(errs, fmt.Errorf("failed to clean up publication %q: %w", c.PublicationName, err))
78 | }
79 | } else {
80 | logger.Warn().Msg("cleanup: skipping publication cleanup, name is empty")
81 | }
82 |
83 | return errors.Join(errs...)
84 | }
85 |
--------------------------------------------------------------------------------
/source/logrepl/cleaner_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package logrepl
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "strings"
21 | "testing"
22 |
23 | "github.com/conduitio/conduit-connector-postgres/test"
24 | "github.com/matryer/is"
25 | )
26 |
27 | func Test_Cleanup(t *testing.T) {
28 | conn := test.ConnectSimple(context.Background(), t, test.RepmgrConnString)
29 |
30 | tests := []struct {
31 | desc string
32 | setup func(t *testing.T)
33 | conf CleanupConfig
34 |
35 | wantErr error
36 | }{
37 | {
38 | desc: "drops slot and pub",
39 | conf: CleanupConfig{
40 | URL: test.RepmgrConnString,
41 | SlotName: "conduitslot1",
42 | PublicationName: "conduitpub1",
43 | },
44 | setup: func(t *testing.T) {
45 | table := test.SetupTestTable(context.Background(), t, conn)
46 | test.CreatePublication(t, conn, "conduitpub1", []string{table})
47 | test.CreateReplicationSlot(t, conn, "conduitslot1")
48 | },
49 | },
50 | {
51 | desc: "drops pub slot unspecified",
52 | conf: CleanupConfig{
53 | URL: test.RepmgrConnString,
54 | PublicationName: "conduitpub2",
55 | },
56 | setup: func(t *testing.T) {
57 | table := test.SetupTestTable(context.Background(), t, conn)
58 | test.CreatePublication(t, conn, "conduitpub2", []string{table})
59 | },
60 | },
61 | {
62 | desc: "drops slot pub unspecified",
63 | conf: CleanupConfig{
64 | URL: test.RepmgrConnString,
65 | SlotName: "conduitslot3",
66 | },
67 | setup: func(t *testing.T) {
68 | test.CreateReplicationSlot(t, conn, "conduitslot3")
69 | },
70 | },
71 | {
72 | desc: "drops pub slot missing",
73 | conf: CleanupConfig{
74 | URL: test.RepmgrConnString,
75 | SlotName: "conduitslot4",
76 | PublicationName: "conduitpub4",
77 | },
78 | setup: func(t *testing.T) {
79 | table := test.SetupTestTable(context.Background(), t, conn)
80 | test.CreatePublication(t, conn, "conduitpub4", []string{table})
81 | },
82 | wantErr: errors.New(`replication slot "conduitslot4" does not exist`),
83 | },
84 | {
85 | desc: "drops slot, pub missing", // no op
86 | conf: CleanupConfig{
87 | URL: test.RepmgrConnString,
88 | SlotName: "conduitslot5",
89 | PublicationName: "conduitpub5",
90 | },
91 | setup: func(t *testing.T) {
92 | test.CreateReplicationSlot(t, conn, "conduitslot5")
93 | },
94 | },
95 | }
96 |
97 | for _, tc := range tests {
98 | t.Run(tc.desc, func(t *testing.T) {
99 | is := is.New(t)
100 |
101 | if tc.setup != nil {
102 | tc.setup(t)
103 | }
104 |
105 | err := Cleanup(context.Background(), tc.conf)
106 |
107 | if tc.wantErr != nil {
108 | is.True(strings.Contains(err.Error(), tc.wantErr.Error()))
109 | } else {
110 | is.NoErr(err)
111 | }
112 | })
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/source/logrepl/combined.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package logrepl
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 |
22 | "github.com/conduitio/conduit-commons/opencdc"
23 | "github.com/conduitio/conduit-connector-postgres/source/position"
24 | "github.com/conduitio/conduit-connector-postgres/source/snapshot"
25 | sdk "github.com/conduitio/conduit-connector-sdk"
26 | "github.com/jackc/pgx/v5/pgxpool"
27 | )
28 |
29 | type iterator interface {
30 | NextN(context.Context, int) ([]opencdc.Record, error)
31 | Ack(context.Context, opencdc.Position) error
32 | Teardown(context.Context) error
33 | }
34 |
35 | type CombinedIterator struct {
36 | conf Config
37 | pool *pgxpool.Pool
38 |
39 | cdcIterator *CDCIterator
40 | snapshotIterator *snapshot.Iterator
41 | activeIterator iterator
42 | }
43 |
44 | type Config struct {
45 | Position opencdc.Position
46 | SlotName string
47 | PublicationName string
48 | Tables []string
49 | TableKeys map[string]string
50 | WithSnapshot bool
51 | WithAvroSchema bool
52 | BatchSize int
53 | }
54 |
55 | // Validate performs validation tasks on the config.
56 | func (c Config) Validate() error {
57 | var errs []error
58 | // make sure we have all table keys
59 | for _, tableName := range c.Tables {
60 | if c.TableKeys[tableName] == "" {
61 | errs = append(errs, fmt.Errorf("missing key for table %q", tableName))
62 | }
63 | }
64 |
65 | return errors.Join(errs...)
66 | }
67 |
68 | // NewCombinedIterator will initialize and start the Snapshot and CDC iterators.
69 | // Failure to parse the position or validate the config will return an error.
70 | func NewCombinedIterator(ctx context.Context, pool *pgxpool.Pool, conf Config) (*CombinedIterator, error) {
71 | pos, err := position.ParseSDKPosition(conf.Position)
72 | if err != nil {
73 | sdk.Logger(ctx).Debug().
74 | Err(err).
75 | Msgf("failed to parse position: %s", string(conf.Position))
76 |
77 | return nil, fmt.Errorf("failed to create logrepl iterator: %w", err)
78 | }
79 |
80 | if err := conf.Validate(); err != nil {
81 | return nil, fmt.Errorf("failed to validate logrepl config: %w", err)
82 | }
83 |
84 | c := &CombinedIterator{
85 | conf: conf,
86 | pool: pool,
87 | }
88 |
89 | // Initialize the CDC iterator.
90 | if err := c.initCDCIterator(ctx, pos); err != nil {
91 | return nil, err
92 | }
93 |
94 | // Initialize the snapshot iterator when snapshotting is enabled and not completed.
95 | // The CDC iterator must be initialized first when snapshotting is requested.
96 | if err := c.initSnapshotIterator(ctx, pos); err != nil {
97 | return nil, err
98 | }
99 |
100 | switch {
101 | case c.snapshotIterator != nil:
102 | c.activeIterator = c.snapshotIterator
103 | default:
104 | if err := c.cdcIterator.StartSubscriber(ctx); err != nil {
105 | return nil, fmt.Errorf("failed to start CDC iterator: %w", err)
106 | }
107 |
108 | c.activeIterator = c.cdcIterator
109 | }
110 |
111 | return c, nil
112 | }
113 |
114 | // NextN retrieves up to n records from the active iterator.
115 | // If the end of the snapshot is reached during this call, it will switch to the CDC iterator
116 | // and continue retrieving records from there.
117 | func (c *CombinedIterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error) {
118 | if n <= 0 {
119 | return nil, fmt.Errorf("n must be greater than 0, got %d", n)
120 | }
121 |
122 | records, err := c.activeIterator.NextN(ctx, n)
123 | if err != nil {
124 | if !errors.Is(err, snapshot.ErrIteratorDone) {
125 | return nil, fmt.Errorf("failed to fetch records in batch: %w", err)
126 | }
127 |
128 | // Snapshot iterator is done, handover to CDC iterator
129 | if err := c.useCDCIterator(ctx); err != nil {
130 | return nil, err
131 | }
132 |
133 | sdk.Logger(ctx).Debug().Msg("Snapshot completed, switching to CDC mode")
134 | return c.NextN(ctx, n)
135 | }
136 |
137 | return records, nil
138 | }
139 |
140 | func (c *CombinedIterator) Ack(ctx context.Context, p opencdc.Position) error {
141 | return c.activeIterator.Ack(ctx, p)
142 | }
143 |
144 | // Teardown will stop and teardown the CDC and Snapshot iterators.
145 | func (c *CombinedIterator) Teardown(ctx context.Context) error {
146 | logger := sdk.Logger(ctx)
147 |
148 | var errs []error
149 |
150 | if c.cdcIterator != nil {
151 | if err := c.cdcIterator.Teardown(ctx); err != nil {
152 | logger.Warn().Err(err).Msg("Failed to tear down cdc iterator")
153 | errs = append(errs, fmt.Errorf("failed to teardown cdc iterator: %w", err))
154 | }
155 | }
156 |
157 | if c.snapshotIterator != nil {
158 | if err := c.snapshotIterator.Teardown(ctx); err != nil {
159 | logger.Warn().Err(err).Msg("Failed to tear down snapshot iterator")
160 | errs = append(errs, fmt.Errorf("failed to teardown snapshot iterator: %w", err))
161 | }
162 | }
163 |
164 | return errors.Join(errs...)
165 | }
166 |
167 | // initCDCIterator initializes the CDC iterator, which will create the replication slot.
168 | // When snapshotting is disabled or the last known position is of CDC type, the iterator
169 | // will start to consume CDC events from the created slot.
170 | // Returns error when:
171 | // * LSN position cannot be parsed,
172 | // * The CDC iterator fails to initalize or fail to start.
173 | func (c *CombinedIterator) initCDCIterator(ctx context.Context, pos position.Position) error {
174 | lsn, err := pos.LSN()
175 | if err != nil {
176 | return fmt.Errorf("failed to parse LSN in position: %w", err)
177 | }
178 |
179 | cdcIterator, err := NewCDCIterator(ctx, c.pool, CDCConfig{
180 | LSN: lsn,
181 | SlotName: c.conf.SlotName,
182 | PublicationName: c.conf.PublicationName,
183 | Tables: c.conf.Tables,
184 | TableKeys: c.conf.TableKeys,
185 | WithAvroSchema: c.conf.WithAvroSchema,
186 | BatchSize: c.conf.BatchSize,
187 | })
188 | if err != nil {
189 | return fmt.Errorf("failed to create CDC iterator: %w", err)
190 | }
191 |
192 | c.cdcIterator = cdcIterator
193 |
194 | return nil
195 | }
196 |
197 | // initSnapshotIterator initializes the Snapshot iterator. The CDC iterator must be initalized.
198 | func (c *CombinedIterator) initSnapshotIterator(ctx context.Context, pos position.Position) error {
199 | if !c.conf.WithSnapshot || pos.Type == position.TypeCDC {
200 | return nil
201 | }
202 |
203 | if c.cdcIterator == nil {
204 | return fmt.Errorf("CDC iterator needs to be initialized before snapshot")
205 | }
206 |
207 | snapshotIterator, err := snapshot.NewIterator(ctx, c.pool, snapshot.Config{
208 | Position: c.conf.Position,
209 | Tables: c.conf.Tables,
210 | TableKeys: c.conf.TableKeys,
211 | TXSnapshotID: c.cdcIterator.TXSnapshotID(),
212 | FetchSize: c.conf.BatchSize,
213 | WithAvroSchema: c.conf.WithAvroSchema,
214 | })
215 | if err != nil {
216 | return fmt.Errorf("failed to create snapshot iterator: %w", err)
217 | }
218 |
219 | sdk.Logger(ctx).Info().Msg("Initial snapshot requested, starting..")
220 |
221 | c.snapshotIterator = snapshotIterator
222 |
223 | return nil
224 | }
225 |
226 | // useCDCIterator will activate and start the CDC iterator. The snapshot iterator
227 | // will be torn down if initialized.
228 | func (c *CombinedIterator) useCDCIterator(ctx context.Context) error {
229 | if c.snapshotIterator != nil {
230 | if err := c.snapshotIterator.Teardown(ctx); err != nil {
231 | return fmt.Errorf("failed to teardown snapshot iterator during switch: %w", err)
232 | }
233 | }
234 |
235 | c.activeIterator, c.snapshotIterator = c.cdcIterator, nil
236 |
237 | if err := c.cdcIterator.StartSubscriber(ctx); err != nil {
238 | return fmt.Errorf("failed to start CDC iterator: %w", err)
239 | }
240 |
241 | return nil
242 | }
243 |
--------------------------------------------------------------------------------
/source/logrepl/handler.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package logrepl
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 | "sync"
22 | "time"
23 |
24 | "github.com/conduitio/conduit-commons/opencdc"
25 | cschema "github.com/conduitio/conduit-commons/schema"
26 | "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal"
27 | "github.com/conduitio/conduit-connector-postgres/source/position"
28 | "github.com/conduitio/conduit-connector-postgres/source/schema"
29 | sdk "github.com/conduitio/conduit-connector-sdk"
30 | sdkschema "github.com/conduitio/conduit-connector-sdk/schema"
31 | "github.com/jackc/pglogrepl"
32 | )
33 |
34 | // CDCHandler is responsible for handling logical replication messages,
35 | // converting them to a record and sending them to a channel.
36 | type CDCHandler struct {
37 | tableKeys map[string]string
38 | relationSet *internal.RelationSet
39 |
40 | // batchSize is the largest number of records this handler will send at once.
41 | batchSize int
42 | flushInterval time.Duration
43 |
44 | // recordBatch holds the batch that is currently being built.
45 | recordBatch []opencdc.Record
46 | recordBatchLock sync.Mutex
47 |
48 | // out is a sending channel with batches of records.
49 | out chan<- []opencdc.Record
50 | lastTXLSN pglogrepl.LSN
51 | withAvroSchema bool
52 | keySchemas map[string]cschema.Schema
53 | payloadSchemas map[string]cschema.Schema
54 | }
55 |
56 | func NewCDCHandler(
57 | ctx context.Context,
58 | rs *internal.RelationSet,
59 | tableKeys map[string]string,
60 | out chan<- []opencdc.Record,
61 | withAvroSchema bool,
62 | batchSize int,
63 | flushInterval time.Duration,
64 | ) *CDCHandler {
65 | h := &CDCHandler{
66 | tableKeys: tableKeys,
67 | relationSet: rs,
68 | recordBatch: make([]opencdc.Record, 0, batchSize),
69 | out: out,
70 | withAvroSchema: withAvroSchema,
71 | keySchemas: make(map[string]cschema.Schema),
72 | payloadSchemas: make(map[string]cschema.Schema),
73 | batchSize: batchSize,
74 | flushInterval: flushInterval,
75 | }
76 |
77 | go h.scheduleFlushing(ctx)
78 |
79 | return h
80 | }
81 |
82 | func (h *CDCHandler) scheduleFlushing(ctx context.Context) {
83 | ticker := time.NewTicker(h.flushInterval)
84 | defer ticker.Stop()
85 |
86 | for range time.Tick(h.flushInterval) {
87 | h.flush(ctx)
88 | }
89 | }
90 |
91 | func (h *CDCHandler) flush(ctx context.Context) {
92 | h.recordBatchLock.Lock()
93 | defer h.recordBatchLock.Unlock()
94 |
95 | if len(h.recordBatch) == 0 {
96 | return
97 | }
98 |
99 | if errors.Is(ctx.Err(), context.Canceled) {
100 | close(h.out)
101 | sdk.Logger(ctx).Warn().
102 | Err(ctx.Err()).
103 | Int("records", len(h.recordBatch)).
104 | Msg("CDCHandler flushing records cancelled")
105 | return
106 | }
107 |
108 | h.out <- h.recordBatch
109 | sdk.Logger(ctx).Debug().
110 | Int("records", len(h.recordBatch)).
111 | Msg("CDCHandler sending batch of records")
112 | h.recordBatch = make([]opencdc.Record, 0, h.batchSize)
113 | }
114 |
115 | // Handle is the handler function that receives all logical replication messages.
116 | // Returns non-zero LSN when a record was emitted for the message.
117 | func (h *CDCHandler) Handle(ctx context.Context, m pglogrepl.Message, lsn pglogrepl.LSN) (pglogrepl.LSN, error) {
118 | sdk.Logger(ctx).Trace().
119 | Str("lsn", lsn.String()).
120 | Str("messageType", m.Type().String()).
121 | Msg("handler received pglogrepl.Message")
122 |
123 | switch m := m.(type) {
124 | case *pglogrepl.RelationMessage:
125 | // We have to add the Relations to our Set so that we can decode our own output
126 | h.relationSet.Add(m)
127 | case *pglogrepl.InsertMessage:
128 | if err := h.handleInsert(ctx, m, lsn); err != nil {
129 | return 0, fmt.Errorf("logrepl handler insert: %w", err)
130 | }
131 | return lsn, nil
132 | case *pglogrepl.UpdateMessage:
133 | if err := h.handleUpdate(ctx, m, lsn); err != nil {
134 | return 0, fmt.Errorf("logrepl handler update: %w", err)
135 | }
136 | return lsn, nil
137 | case *pglogrepl.DeleteMessage:
138 | if err := h.handleDelete(ctx, m, lsn); err != nil {
139 | return 0, fmt.Errorf("logrepl handler delete: %w", err)
140 | }
141 | return lsn, nil
142 | case *pglogrepl.BeginMessage:
143 | h.lastTXLSN = m.FinalLSN
144 | case *pglogrepl.CommitMessage:
145 | if h.lastTXLSN != 0 && h.lastTXLSN != m.CommitLSN {
146 | return 0, fmt.Errorf("out of order commit %s, expected %s", m.CommitLSN, h.lastTXLSN)
147 | }
148 | }
149 |
150 | return 0, nil
151 | }
152 |
153 | // handleInsert formats a Record with INSERT event data from Postgres and sends
154 | // it to the output channel.
155 | func (h *CDCHandler) handleInsert(
156 | ctx context.Context,
157 | msg *pglogrepl.InsertMessage,
158 | lsn pglogrepl.LSN,
159 | ) error {
160 | rel, err := h.relationSet.Get(msg.RelationID)
161 | if err != nil {
162 | return fmt.Errorf("failed getting relation %v: %w", msg.RelationID, err)
163 | }
164 |
165 | newValues, err := h.relationSet.Values(msg.RelationID, msg.Tuple)
166 | if err != nil {
167 | return fmt.Errorf("failed to decode new values: %w", err)
168 | }
169 |
170 | if err := h.updateAvroSchema(ctx, rel); err != nil {
171 | return fmt.Errorf("failed to update avro schema: %w", err)
172 | }
173 |
174 | rec := sdk.Util.Source.NewRecordCreate(
175 | h.buildPosition(lsn),
176 | h.buildRecordMetadata(rel),
177 | h.buildRecordKey(newValues, rel.RelationName),
178 | h.buildRecordPayload(newValues),
179 | )
180 | h.attachSchemas(rec, rel.RelationName)
181 | h.addToBatch(ctx, rec)
182 |
183 | return nil
184 | }
185 |
186 | // handleUpdate formats a record with UPDATE event data from Postgres and sends
187 | // it to the output channel.
188 | func (h *CDCHandler) handleUpdate(
189 | ctx context.Context,
190 | msg *pglogrepl.UpdateMessage,
191 | lsn pglogrepl.LSN,
192 | ) error {
193 | rel, err := h.relationSet.Get(msg.RelationID)
194 | if err != nil {
195 | return err
196 | }
197 |
198 | newValues, err := h.relationSet.Values(msg.RelationID, msg.NewTuple)
199 | if err != nil {
200 | return fmt.Errorf("failed to decode new values: %w", err)
201 | }
202 |
203 | if err := h.updateAvroSchema(ctx, rel); err != nil {
204 | return fmt.Errorf("failed to update avro schema: %w", err)
205 | }
206 |
207 | oldValues, err := h.relationSet.Values(msg.RelationID, msg.OldTuple)
208 | if err != nil {
209 | // this is not a critical error, old values are optional, just log it
210 | // we use level "trace" intentionally to not clog up the logs in production
211 | sdk.Logger(ctx).Trace().Err(err).Msg("could not parse old values from UpdateMessage")
212 | }
213 |
214 | rec := sdk.Util.Source.NewRecordUpdate(
215 | h.buildPosition(lsn),
216 | h.buildRecordMetadata(rel),
217 | h.buildRecordKey(newValues, rel.RelationName),
218 | h.buildRecordPayload(oldValues),
219 | h.buildRecordPayload(newValues),
220 | )
221 | h.attachSchemas(rec, rel.RelationName)
222 | h.addToBatch(ctx, rec)
223 |
224 | return nil
225 | }
226 |
227 | // handleDelete formats a record with DELETE event data from Postgres and sends
228 | // it to the output channel. Deleted records only contain the key and no payload.
229 | func (h *CDCHandler) handleDelete(
230 | ctx context.Context,
231 | msg *pglogrepl.DeleteMessage,
232 | lsn pglogrepl.LSN,
233 | ) error {
234 | rel, err := h.relationSet.Get(msg.RelationID)
235 | if err != nil {
236 | return err
237 | }
238 |
239 | oldValues, err := h.relationSet.Values(msg.RelationID, msg.OldTuple)
240 | if err != nil {
241 | return fmt.Errorf("failed to decode old values: %w", err)
242 | }
243 |
244 | if err := h.updateAvroSchema(ctx, rel); err != nil {
245 | return fmt.Errorf("failed to update avro schema: %w", err)
246 | }
247 |
248 | rec := sdk.Util.Source.NewRecordDelete(
249 | h.buildPosition(lsn),
250 | h.buildRecordMetadata(rel),
251 | h.buildRecordKey(oldValues, rel.RelationName),
252 | h.buildRecordPayload(oldValues),
253 | )
254 | h.attachSchemas(rec, rel.RelationName)
255 | h.addToBatch(ctx, rec)
256 |
257 | return nil
258 | }
259 |
260 | // addToBatch the record to the output channel or detect the cancellation of the
261 | // context and return the context error.
262 | func (h *CDCHandler) addToBatch(ctx context.Context, rec opencdc.Record) {
263 | h.recordBatchLock.Lock()
264 |
265 | h.recordBatch = append(h.recordBatch, rec)
266 | currentBatchSize := len(h.recordBatch)
267 |
268 | sdk.Logger(ctx).Trace().
269 | Int("current_batch_size", currentBatchSize).
270 | Msg("CDCHandler added record to batch")
271 |
272 | h.recordBatchLock.Unlock()
273 |
274 | if currentBatchSize >= h.batchSize {
275 | h.flush(ctx)
276 | }
277 | }
278 |
279 | func (h *CDCHandler) buildRecordMetadata(rel *pglogrepl.RelationMessage) map[string]string {
280 | m := map[string]string{
281 | opencdc.MetadataCollection: rel.RelationName,
282 | }
283 |
284 | return m
285 | }
286 |
287 | // buildRecordKey takes the values from the message and extracts the key that
288 | // matches the configured keyColumnName.
289 | func (h *CDCHandler) buildRecordKey(values map[string]any, table string) opencdc.Data {
290 | keyColumn := h.tableKeys[table]
291 | key := make(opencdc.StructuredData)
292 | for k, v := range values {
293 | if keyColumn == k {
294 | key[k] = v
295 | break // TODO add support for composite keys
296 | }
297 | }
298 | return key
299 | }
300 |
301 | // buildRecordPayload takes the values from the message and extracts the payload
302 | // for the record.
303 | func (h *CDCHandler) buildRecordPayload(values map[string]any) opencdc.Data {
304 | if len(values) == 0 {
305 | return nil
306 | }
307 | return opencdc.StructuredData(values)
308 | }
309 |
310 | // buildPosition stores the LSN in position and converts it to bytes.
311 | func (*CDCHandler) buildPosition(lsn pglogrepl.LSN) opencdc.Position {
312 | return position.Position{
313 | Type: position.TypeCDC,
314 | LastLSN: lsn.String(),
315 | }.ToSDKPosition()
316 | }
317 |
318 | // updateAvroSchema generates and stores avro schema based on the relation's row
319 | // when usage of avro schema is requested.
320 | func (h *CDCHandler) updateAvroSchema(ctx context.Context, rel *pglogrepl.RelationMessage) error {
321 | if !h.withAvroSchema {
322 | return nil
323 | }
324 | // Payload schema
325 | avroPayloadSch, err := schema.Avro.ExtractLogrepl(rel.RelationName+"_payload", rel)
326 | if err != nil {
327 | return fmt.Errorf("failed to extract payload schema: %w", err)
328 | }
329 | ps, err := sdkschema.Create(
330 | ctx,
331 | cschema.TypeAvro,
332 | avroPayloadSch.Name(),
333 | []byte(avroPayloadSch.String()),
334 | )
335 | if err != nil {
336 | return fmt.Errorf("failed creating payload schema for relation %v: %w", rel.RelationName, err)
337 | }
338 | h.payloadSchemas[rel.RelationName] = ps
339 |
340 | // Key schema
341 | avroKeySch, err := schema.Avro.ExtractLogrepl(rel.RelationName+"_key", rel, h.tableKeys[rel.RelationName])
342 | if err != nil {
343 | return fmt.Errorf("failed to extract key schema: %w", err)
344 | }
345 | ks, err := sdkschema.Create(
346 | ctx,
347 | cschema.TypeAvro,
348 | avroKeySch.Name(),
349 | []byte(avroKeySch.String()),
350 | )
351 | if err != nil {
352 | return fmt.Errorf("failed creating key schema for relation %v: %w", rel.RelationName, err)
353 | }
354 | h.keySchemas[rel.RelationName] = ks
355 |
356 | return nil
357 | }
358 |
359 | func (h *CDCHandler) attachSchemas(rec opencdc.Record, relationName string) {
360 | if !h.withAvroSchema {
361 | return
362 | }
363 | cschema.AttachPayloadSchemaToRecord(rec, h.payloadSchemas[relationName])
364 | cschema.AttachKeySchemaToRecord(rec, h.keySchemas[relationName])
365 | }
366 |
--------------------------------------------------------------------------------
/source/logrepl/handler_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2025 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package logrepl
16 |
17 | import (
18 | "context"
19 | "testing"
20 | "time"
21 |
22 | "github.com/conduitio/conduit-commons/cchan"
23 | "github.com/conduitio/conduit-commons/opencdc"
24 | "github.com/matryer/is"
25 | )
26 |
27 | func TestHandler_Batching_BatchSizeReached(t *testing.T) {
28 | ctx := context.Background()
29 | is := is.New(t)
30 |
31 | ch := make(chan []opencdc.Record, 1)
32 | underTest := NewCDCHandler(ctx, nil, nil, ch, false, 5, time.Second)
33 | want := make([]opencdc.Record, 5)
34 | for i := 0; i < cap(want); i++ {
35 | rec := newTestRecord(i)
36 | underTest.addToBatch(ctx, rec)
37 | want[i] = rec
38 | }
39 |
40 | recs, gotRecs, err := cchan.ChanOut[[]opencdc.Record](ch).RecvTimeout(ctx, time.Second)
41 | is.NoErr(err)
42 | is.True(gotRecs)
43 | is.Equal(recs, want)
44 | }
45 |
46 | // TestHandler_Batching_FlushInterval tests if the handler flushes
47 | // a batch once the flush interval passes, even if the batch size is not reached.
48 | func TestHandler_Batching_FlushInterval(t *testing.T) {
49 | ctx := context.Background()
50 | is := is.New(t)
51 |
52 | ch := make(chan []opencdc.Record, 1)
53 | flushInterval := time.Second
54 | underTest := NewCDCHandler(ctx, nil, nil, ch, false, 5, flushInterval)
55 |
56 | want := make([]opencdc.Record, 3)
57 | for i := 0; i < cap(want); i++ {
58 | rec := newTestRecord(i)
59 | underTest.addToBatch(ctx, rec)
60 | want[i] = rec
61 | }
62 |
63 | start := time.Now()
64 | recs, gotRecs, err := cchan.ChanOut[[]opencdc.Record](ch).RecvTimeout(ctx, 1200*time.Millisecond)
65 |
66 | is.NoErr(err)
67 | is.True(gotRecs)
68 | is.Equal(recs, want)
69 | is.True(time.Since(start) >= flushInterval)
70 | }
71 |
72 | func TestHandler_Batching_ContextCancelled(t *testing.T) {
73 | ctx, cancel := context.WithCancel(context.Background())
74 | is := is.New(t)
75 |
76 | ch := make(chan []opencdc.Record, 1)
77 | underTest := NewCDCHandler(ctx, nil, nil, ch, false, 5, time.Second)
78 | cancel()
79 | <-ctx.Done()
80 | underTest.addToBatch(ctx, newTestRecord(0))
81 |
82 | _, recordReceived := <-ch
83 | is.True(!recordReceived)
84 | }
85 |
86 | func newTestRecord(id int) opencdc.Record {
87 | return opencdc.Record{
88 | Key: opencdc.StructuredData{
89 | "id": id,
90 | },
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/source/logrepl/internal/error.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "errors"
19 |
20 | "github.com/jackc/pgerrcode"
21 | "github.com/jackc/pgx/v5/pgconn"
22 | )
23 |
24 | func IsPgDuplicateErr(err error) bool {
25 | var pgerr *pgconn.PgError
26 | return errors.As(err, &pgerr) && pgerr.Code == pgerrcode.DuplicateObject
27 | }
28 |
--------------------------------------------------------------------------------
/source/logrepl/internal/publication.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "context"
19 | "fmt"
20 | "strings"
21 |
22 | "github.com/conduitio/conduit-connector-postgres/internal"
23 | "github.com/jackc/pgx/v5/pgxpool"
24 | )
25 |
26 | // CreatePublicationOptions contains additional options for creating a publication.
27 | // If AllTables and Tables are both true and not empty at the same time,
28 | // publication creation will fail.
29 | type CreatePublicationOptions struct {
30 | Tables []string
31 | PublicationParams []string
32 | }
33 |
34 | // CreatePublication creates a publication.
35 | func CreatePublication(ctx context.Context, conn *pgxpool.Pool, name string, opts CreatePublicationOptions) error {
36 | if len(opts.Tables) == 0 {
37 | return fmt.Errorf("publication %q requires at least one table", name)
38 | }
39 |
40 | wrappedTablesNames := make([]string, 0, len(opts.Tables))
41 | for _, t := range opts.Tables {
42 | wrappedTablesNames = append(wrappedTablesNames, internal.WrapSQLIdent(t))
43 | }
44 |
45 | forTableString := fmt.Sprintf("FOR TABLE %s", strings.Join(wrappedTablesNames, ", "))
46 |
47 | var publicationParams string
48 | if len(opts.PublicationParams) > 0 {
49 | publicationParams = fmt.Sprintf("WITH (%s)", strings.Join(opts.PublicationParams, ", "))
50 | }
51 |
52 | if _, err := conn.Exec(
53 | ctx,
54 | fmt.Sprintf("CREATE PUBLICATION %q %s %s", name, forTableString, publicationParams),
55 | ); err != nil {
56 | return fmt.Errorf("failed to create publication %q: %w", name, err)
57 | }
58 |
59 | return nil
60 | }
61 |
62 | // DropPublicationOptions contains additional options for dropping a publication.
63 | type DropPublicationOptions struct {
64 | IfExists bool
65 | }
66 |
67 | // DropPublication drops a publication.
68 | func DropPublication(ctx context.Context, conn *pgxpool.Pool, name string, opts DropPublicationOptions) error {
69 | var ifExistsString string
70 | if opts.IfExists {
71 | ifExistsString = "IF EXISTS"
72 | }
73 |
74 | if _, err := conn.Exec(
75 | ctx,
76 | fmt.Sprintf("DROP PUBLICATION %s %q", ifExistsString, name),
77 | ); err != nil {
78 | return fmt.Errorf("failed to drop publication %q: %w", name, err)
79 | }
80 |
81 | return nil
82 | }
83 |
--------------------------------------------------------------------------------
/source/logrepl/internal/publication_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "fmt"
19 | "strings"
20 | "testing"
21 |
22 | "github.com/conduitio/conduit-connector-postgres/test"
23 | "github.com/matryer/is"
24 | )
25 |
26 | func TestCreatePublication(t *testing.T) {
27 | ctx := test.Context(t)
28 | pool := test.ConnectPool(ctx, t, test.RegularConnString)
29 |
30 | pubNames := []string{"testpub", "123", "test-hyphen", "test:semicolon", "test.dot", "test=equal"}
31 | pubParams := [][]string{
32 | nil,
33 | {"publish = 'insert'"},
34 | {"publish = 'insert,update,delete'"},
35 | }
36 |
37 | tables := []string{
38 | test.SetupTestTable(ctx, t, pool),
39 | test.SetupTestTable(ctx, t, pool),
40 | }
41 |
42 | for _, givenPubName := range pubNames {
43 | for i, givenPubParams := range pubParams {
44 | testName := fmt.Sprintf("%s_%d", givenPubName, i)
45 | t.Run(testName, func(t *testing.T) {
46 | is := is.New(t)
47 | err := CreatePublication(
48 | ctx,
49 | pool,
50 | givenPubName,
51 | CreatePublicationOptions{
52 | Tables: tables,
53 | PublicationParams: givenPubParams,
54 | },
55 | )
56 | is.NoErr(err)
57 | // cleanup
58 | is.NoErr(DropPublication(ctx, pool, givenPubName, DropPublicationOptions{}))
59 | })
60 | }
61 | }
62 |
63 | // Without tables
64 | t.Run("fails without tables", func(t *testing.T) {
65 | is := is.New(t)
66 |
67 | err := CreatePublication(ctx, nil, "testpub", CreatePublicationOptions{})
68 | is.Equal(err.Error(), `publication "testpub" requires at least one table`)
69 | })
70 | }
71 |
72 | func TestCreatePublicationForTables(t *testing.T) {
73 | ctx := test.Context(t)
74 | pub := test.RandomIdentifier(t)
75 | pool := test.ConnectPool(ctx, t, test.RegularConnString)
76 |
77 | tables := [][]string{
78 | {test.SetupTestTable(ctx, t, pool)},
79 | {test.SetupTestTable(ctx, t, pool), test.SetupTestTable(ctx, t, pool)},
80 | }
81 |
82 | for _, givenTables := range tables {
83 | testName := strings.Join(givenTables, ",")
84 | t.Run(testName, func(t *testing.T) {
85 | is := is.New(t)
86 | err := CreatePublication(
87 | ctx,
88 | pool,
89 | pub,
90 | CreatePublicationOptions{
91 | Tables: givenTables,
92 | },
93 | )
94 | is.NoErr(err)
95 | // cleanup
96 | is.NoErr(DropPublication(ctx, pool, pub, DropPublicationOptions{}))
97 | })
98 | }
99 | }
100 |
101 | func TestDropPublication(t *testing.T) {
102 | ctx := test.Context(t)
103 | is := is.New(t)
104 | pub := test.RandomIdentifier(t)
105 |
106 | pool := test.ConnectPool(ctx, t, test.RegularConnString)
107 | err := DropPublication(
108 | ctx,
109 | pool,
110 | pub,
111 | DropPublicationOptions{
112 | IfExists: false, // fail if pub doesn't exist
113 | },
114 | )
115 | test.IsPgError(is, err, "42704")
116 |
117 | // next connect with repmgr
118 | err = DropPublication(
119 | ctx,
120 | pool,
121 | pub,
122 | DropPublicationOptions{
123 | IfExists: true, // fail if pub doesn't exist
124 | },
125 | )
126 | is.NoErr(err)
127 | }
128 |
--------------------------------------------------------------------------------
/source/logrepl/internal/relationset.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "errors"
19 | "fmt"
20 |
21 | "github.com/conduitio/conduit-connector-postgres/source/types"
22 | "github.com/jackc/pglogrepl"
23 | "github.com/jackc/pgx/v5/pgtype"
24 | )
25 |
26 | // RelationSet can be used to build a cache of relations returned by logical
27 | // replication.
28 | type RelationSet struct {
29 | relations map[uint32]*pglogrepl.RelationMessage
30 | connInfo *pgtype.Map
31 | }
32 |
33 | // NewRelationSet creates a new relation set.
34 | func NewRelationSet() *RelationSet {
35 | return &RelationSet{
36 | relations: map[uint32]*pglogrepl.RelationMessage{},
37 | connInfo: pgtype.NewMap(),
38 | }
39 | }
40 |
41 | func (rs *RelationSet) Add(r *pglogrepl.RelationMessage) {
42 | rs.relations[r.RelationID] = r
43 | }
44 |
45 | func (rs *RelationSet) Get(id uint32) (*pglogrepl.RelationMessage, error) {
46 | msg, ok := rs.relations[id]
47 | if !ok {
48 | return nil, fmt.Errorf("no relation for %d", id)
49 | }
50 | return msg, nil
51 | }
52 |
53 | func (rs *RelationSet) Values(id uint32, row *pglogrepl.TupleData) (map[string]any, error) {
54 | if row == nil {
55 | return nil, errors.New("no tuple data")
56 | }
57 |
58 | rel, err := rs.Get(id)
59 | if err != nil {
60 | return nil, fmt.Errorf("no relation for %d", id)
61 | }
62 |
63 | values := map[string]any{}
64 |
65 | // assert same number of row and rel columns
66 | for i, tuple := range row.Columns {
67 | col := rel.Columns[i]
68 | v, decodeErr := rs.decodeValue(col, tuple.Data)
69 | if decodeErr != nil {
70 | return nil, fmt.Errorf("failed to decode value for column %q: %w", col.Name, err)
71 | }
72 |
73 | values[col.Name] = v
74 | }
75 |
76 | return values, nil
77 | }
78 |
79 | func (rs *RelationSet) oidToCodec(id uint32) pgtype.Codec {
80 | dt, ok := rs.connInfo.TypeForOID(id)
81 | if !ok {
82 | return rs.oidToCodec(pgtype.UnknownOID)
83 | }
84 | return dt.Codec
85 | }
86 |
87 | func (rs *RelationSet) decodeValue(col *pglogrepl.RelationMessageColumn, data []byte) (any, error) {
88 | decoder := rs.oidToCodec(col.DataType)
89 | // This workaround is due to an issue in pgx v5.7.1.
90 | // Namely, that version introduces an XML codec
91 | // (see: https://github.com/jackc/pgx/pull/2083/files#diff-8288d41e69f73d01a874b40de086684e5894da83a627e845e484b06d5e053a44).
92 | // The XML codec, however, always return nil when deserializing input bytes
93 | // (see: https://github.com/jackc/pgx/pull/2083#discussion_r1755768269).
94 | var val any
95 | var err error
96 |
97 | switch col.DataType {
98 | case pgtype.XMLOID, pgtype.XMLArrayOID, pgtype.JSONBOID, pgtype.JSONOID:
99 | val, err = decoder.DecodeDatabaseSQLValue(rs.connInfo, col.DataType, pgtype.TextFormatCode, data)
100 | default:
101 | val, err = decoder.DecodeValue(rs.connInfo, col.DataType, pgtype.TextFormatCode, data)
102 | }
103 |
104 | if err != nil {
105 | return nil, fmt.Errorf("failed to decode value of pgtype %v: %w", col.DataType, err)
106 | }
107 |
108 | v, err := types.Format(col.DataType, val)
109 | if err != nil {
110 | return nil, fmt.Errorf("failed to format column %q type %T: %w", col.Name, val, err)
111 | }
112 |
113 | return v, nil
114 | }
115 |
--------------------------------------------------------------------------------
/source/logrepl/internal/replication_slot.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 |
22 | "github.com/jackc/pglogrepl"
23 | "github.com/jackc/pgx/v5"
24 | "github.com/jackc/pgx/v5/pgxpool"
25 | )
26 |
27 | var ErrMissingSlot = errors.New("replication slot missing")
28 |
29 | type ReadReplicationSlotResult struct {
30 | Name string
31 | ConfirmedFlushLSN pglogrepl.LSN
32 | RestartLSN pglogrepl.LSN
33 | }
34 |
35 | // ReadReplicationSlot returns state of an existing replication slot.
36 | func ReadReplicationSlot(ctx context.Context, conn *pgxpool.Pool, name string) (ReadReplicationSlotResult, error) {
37 | var r ReadReplicationSlotResult
38 |
39 | qr := conn.QueryRow(ctx, "SELECT slot_name, confirmed_flush_lsn, restart_lsn FROM pg_replication_slots WHERE slot_name=$1", name)
40 | if err := qr.Scan(&r.Name, &r.ConfirmedFlushLSN, &r.RestartLSN); err != nil {
41 | if errors.Is(err, pgx.ErrNoRows) {
42 | return ReadReplicationSlotResult{}, fmt.Errorf("%s: %w", name, ErrMissingSlot)
43 | }
44 | return ReadReplicationSlotResult{}, fmt.Errorf("failed to read replication slot %q: %w", name, err)
45 | }
46 |
47 | return r, nil
48 | }
49 |
--------------------------------------------------------------------------------
/source/logrepl/internal/replication_slot_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "errors"
19 | "fmt"
20 | "testing"
21 |
22 | "github.com/conduitio/conduit-connector-postgres/test"
23 | "github.com/matryer/is"
24 | )
25 |
26 | func Test_ReadReplicationSlot(t *testing.T) {
27 | var (
28 | ctx = test.Context(t)
29 | pool = test.ConnectPool(ctx, t, test.RepmgrConnString)
30 | slotName = test.RandomIdentifier(t)
31 | )
32 |
33 | t.Run("read replication slot", func(t *testing.T) {
34 | is := is.New(t)
35 |
36 | test.CreateReplicationSlot(t, pool, slotName)
37 | res, err := ReadReplicationSlot(ctx, pool, slotName)
38 | is.NoErr(err)
39 | is.Equal(res.Name, slotName)
40 | is.True(res.ConfirmedFlushLSN > 0)
41 | is.True(res.RestartLSN > 0)
42 | })
43 |
44 | t.Run("fails when slot is missing", func(t *testing.T) {
45 | is := is.New(t)
46 |
47 | _, err := ReadReplicationSlot(ctx, pool, slotName)
48 | is.True(err != nil)
49 | is.True(errors.Is(err, ErrMissingSlot))
50 | })
51 |
52 | t.Run("fails when conn errors", func(t *testing.T) {
53 | is := is.New(t)
54 | pool := test.ConnectPool(ctx, t, test.RepmgrConnString)
55 | pool.Close()
56 |
57 | _, err := ReadReplicationSlot(ctx, pool, slotName)
58 | is.True(err != nil)
59 | is.Equal(err.Error(), fmt.Sprintf("failed to read replication slot %q: closed pool", slotName))
60 | })
61 | }
62 |
--------------------------------------------------------------------------------
/source/logrepl/internal/subscription_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package internal
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 | "sync/atomic"
22 | "testing"
23 | "time"
24 |
25 | "github.com/conduitio/conduit-connector-postgres/test"
26 | "github.com/jackc/pglogrepl"
27 | "github.com/jackc/pgx/v5/pgxpool"
28 | "github.com/matryer/is"
29 | )
30 |
31 | func TestSubscription_Create(t *testing.T) {
32 | ctx := test.Context(t)
33 | is := is.New(t)
34 | pool := test.ConnectPool(ctx, t, test.RepmgrConnString)
35 | pool.Close()
36 |
37 | _, err := CreateSubscription(ctx, pool, "slotname", "pubname", nil, 0, nil)
38 | is.Equal(err.Error(), "could not establish replication connection: closed pool")
39 | }
40 |
41 | func TestSubscription_WithRepmgr(t *testing.T) {
42 | var (
43 | ctx = test.Context(t)
44 | pool = test.ConnectPool(ctx, t, test.RepmgrConnString)
45 | table1 = test.SetupTestTable(ctx, t, pool)
46 | table2 = test.SetupTestTable(ctx, t, pool)
47 | )
48 |
49 | sub, messages := setupSubscription(ctx, t, pool, table1, table2)
50 |
51 | fetchAndAssertMessageTypes := func(is *is.I, m chan pglogrepl.Message, msgTypes ...pglogrepl.MessageType) []pglogrepl.Message {
52 | out := make([]pglogrepl.Message, len(msgTypes))
53 | for i, msgType := range msgTypes {
54 | select {
55 | case msg := <-m:
56 | is.Equal(msg.Type(), msgType)
57 | out[i] = msg
58 | case <-time.After(time.Second):
59 | is.Fail() // timeout while waiting to receive message
60 | }
61 | }
62 | return out
63 | }
64 |
65 | t.Run("first insert table1", func(t *testing.T) {
66 | is := is.New(t)
67 | query := `INSERT INTO %s (id, column1, column2, column3)
68 | VALUES (6, 'bizz', 456, false)`
69 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table1))
70 | is.NoErr(err)
71 |
72 | _ = fetchAndAssertMessageTypes(
73 | is,
74 | messages,
75 | // first insert should contain the relation as well
76 | pglogrepl.MessageTypeBegin,
77 | pglogrepl.MessageTypeRelation,
78 | pglogrepl.MessageTypeInsert,
79 | pglogrepl.MessageTypeCommit,
80 | )
81 | })
82 |
83 | t.Run("second insert table1", func(t *testing.T) {
84 | is := is.New(t)
85 | query := `INSERT INTO %s (id, column1, column2, column3)
86 | VALUES (7, 'bizz', 456, false)`
87 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table1))
88 | is.NoErr(err)
89 |
90 | _ = fetchAndAssertMessageTypes(
91 | is,
92 | messages,
93 | // second insert does not ship the relation
94 | pglogrepl.MessageTypeBegin,
95 | pglogrepl.MessageTypeInsert,
96 | pglogrepl.MessageTypeCommit,
97 | )
98 | })
99 |
100 | t.Run("first update table2", func(t *testing.T) {
101 | is := is.New(t)
102 | query := `UPDATE %s SET column1 = 'foo' WHERE id = 1`
103 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table2))
104 | is.NoErr(err)
105 |
106 | _ = fetchAndAssertMessageTypes(
107 | is,
108 | messages,
109 | // first insert should contain the relation as well
110 | pglogrepl.MessageTypeBegin,
111 | pglogrepl.MessageTypeRelation,
112 | pglogrepl.MessageTypeUpdate,
113 | pglogrepl.MessageTypeCommit,
114 | )
115 | })
116 |
117 | t.Run("update all table 2", func(t *testing.T) {
118 | is := is.New(t)
119 | query := `UPDATE %s SET column1 = 'bar'` // update all rows
120 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table2))
121 | is.NoErr(err)
122 |
123 | _ = fetchAndAssertMessageTypes(
124 | is,
125 | messages,
126 | // we already got the relation so second update is without relation
127 | pglogrepl.MessageTypeBegin,
128 | pglogrepl.MessageTypeUpdate,
129 | pglogrepl.MessageTypeUpdate,
130 | pglogrepl.MessageTypeUpdate,
131 | pglogrepl.MessageTypeUpdate,
132 | pglogrepl.MessageTypeCommit,
133 | )
134 | })
135 |
136 | t.Run("Last WAL written is behind keepalive", func(t *testing.T) {
137 | is := is.New(t)
138 | time.Sleep(2 * time.Second)
139 |
140 | walFlushed := pglogrepl.LSN(atomic.LoadUint64((*uint64)(&sub.walFlushed)))
141 | serverWALEnd := pglogrepl.LSN(atomic.LoadUint64((*uint64)(&sub.serverWALEnd)))
142 |
143 | is.True(serverWALEnd >= sub.walWritten)
144 | is.True(sub.walWritten > walFlushed)
145 | })
146 |
147 | t.Run("no more messages", func(t *testing.T) {
148 | isNoMoreMessages(t, messages, time.Millisecond*500)
149 | })
150 | }
151 |
152 | func TestSubscription_ClosedContext(t *testing.T) {
153 | ctx := test.Context(t)
154 | ctx, cancel := context.WithCancel(ctx)
155 |
156 | var (
157 | is = is.New(t)
158 | pool = test.ConnectPool(ctx, t, test.RepmgrConnString)
159 | table = test.SetupTestTable(ctx, t, pool)
160 | )
161 |
162 | sub, messages := setupSubscription(ctx, t, pool, table)
163 |
164 | // insert to get new messages into publication
165 | query := `INSERT INTO %s (id, column1, column2, column3)
166 | VALUES (6, 'bizz', 456, false)`
167 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table))
168 | is.NoErr(err)
169 |
170 | cancel()
171 | // do not fetch messages, just close context instead
172 | select {
173 | case <-time.After(time.Second):
174 | is.Fail() // timed out while waiting for subscription to close
175 | case <-sub.Done():
176 | // all good
177 | }
178 |
179 | is.True(errors.Is(sub.Err(), context.Canceled))
180 | isNoMoreMessages(t, messages, time.Millisecond*500)
181 | }
182 |
183 | func TestSubscription_Ack(t *testing.T) {
184 | is := is.New(t)
185 |
186 | s := &Subscription{}
187 | s.Ack(12345)
188 |
189 | is.Equal(s.walFlushed, pglogrepl.LSN(12345))
190 | }
191 |
192 | func TestSubscription_Stop(t *testing.T) {
193 | t.Run("with stop function", func(t *testing.T) {
194 | is := is.New(t)
195 |
196 | var stopped bool
197 |
198 | s := &Subscription{
199 | stop: func() {
200 | stopped = true
201 | },
202 | }
203 |
204 | s.Stop()
205 | is.True(stopped)
206 | })
207 |
208 | t.Run("with missing stop function", func(*testing.T) {
209 | s := &Subscription{}
210 | s.Stop()
211 | })
212 | }
213 |
214 | func setupSubscription(
215 | ctx context.Context,
216 | t *testing.T,
217 | pool *pgxpool.Pool,
218 | tables ...string,
219 | ) (*Subscription, chan pglogrepl.Message) {
220 | is := is.New(t)
221 |
222 | slotName := test.RandomIdentifier(t)
223 | publication := test.RandomIdentifier(t)
224 |
225 | test.CreatePublication(t, pool, publication, tables)
226 |
227 | messages := make(chan pglogrepl.Message)
228 | sub, err := CreateSubscription(
229 | ctx,
230 | pool,
231 | slotName,
232 | publication,
233 | tables,
234 | 0,
235 | func(ctx context.Context, msg pglogrepl.Message, lsn pglogrepl.LSN) (pglogrepl.LSN, error) {
236 | select {
237 | case <-ctx.Done():
238 | return 0, ctx.Err()
239 | case messages <- msg:
240 | return lsn, nil
241 | }
242 | },
243 | )
244 | is.NoErr(err)
245 |
246 | sub.StatusTimeout = 1 * time.Second
247 |
248 | go func() {
249 | err := sub.Run(ctx)
250 | if !errors.Is(err, context.Canceled) {
251 | t.Logf("unexpected error: %+v", err)
252 | is.Fail()
253 | }
254 | }()
255 |
256 | // wait for subscription to be ready
257 | select {
258 | case <-sub.Ready():
259 | // all good
260 | case <-time.After(5 * time.Second):
261 | t.Fatalf("timed out while waiting for subscription to be ready")
262 | }
263 |
264 | t.Cleanup(func() {
265 | // stop subscription
266 | cctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
267 | is.NoErr(sub.Teardown(cctx))
268 | cancel()
269 |
270 | _, err := pool.Exec(
271 | context.Background(),
272 | "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name=$1",
273 | slotName,
274 | )
275 | is.NoErr(err)
276 | })
277 |
278 | return sub, messages
279 | }
280 |
281 | // isNoMoreMessages waits for the duration of the timeout and logs any new
282 | // messages if they are received. If a message is received that is not a "begin"
283 | // or "commit" message, the test is marked as failed.
284 | func isNoMoreMessages(t *testing.T, messages <-chan pglogrepl.Message, timeout time.Duration) {
285 | is := is.New(t)
286 |
287 | // there should be no more messages, wait shortly to make sure and log any
288 | // messages that we receive in the meantime
289 | var messagesReceived bool
290 | timeoutChan := time.After(timeout)
291 | for {
292 | select {
293 | case msg := <-messages:
294 | // empty begin/commit blocks are expected, work is being done to
295 | // reduce them (https://commitfest.postgresql.org/33/3093/)
296 | if msg.Type() == pglogrepl.MessageTypeBegin ||
297 | msg.Type() == pglogrepl.MessageTypeCommit {
298 | t.Logf("got message of type %s: %+v", msg.Type(), msg)
299 | } else {
300 | t.Logf("unexpected message of type %s: %+v", msg.Type(), msg)
301 | messagesReceived = true
302 | }
303 | case <-timeoutChan:
304 | if messagesReceived {
305 | is.Fail() // expected no more messages
306 | }
307 | return
308 | }
309 | }
310 | }
311 |
--------------------------------------------------------------------------------
/source/position/position.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package position
16 |
17 | import (
18 | "encoding/json"
19 | "fmt"
20 |
21 | "github.com/conduitio/conduit-commons/opencdc"
22 | "github.com/jackc/pglogrepl"
23 | )
24 |
25 | //go:generate stringer -type=Type -trimprefix Type
26 |
27 | type Type int
28 |
29 | const (
30 | TypeInitial Type = iota
31 | TypeSnapshot
32 | TypeCDC
33 | )
34 |
35 | type Position struct {
36 | Type Type `json:"type"`
37 | Snapshots SnapshotPositions `json:"snapshots,omitempty"`
38 | LastLSN string `json:"last_lsn,omitempty"`
39 | }
40 |
41 | type SnapshotPositions map[string]SnapshotPosition
42 |
43 | type SnapshotPosition struct {
44 | LastRead int64 `json:"last_read"`
45 | SnapshotEnd int64 `json:"snapshot_end"`
46 | }
47 |
48 | func ParseSDKPosition(sdkPos opencdc.Position) (Position, error) {
49 | var p Position
50 |
51 | if len(sdkPos) == 0 {
52 | return p, nil
53 | }
54 |
55 | if err := json.Unmarshal(sdkPos, &p); err != nil {
56 | return p, fmt.Errorf("invalid position: %w", err)
57 | }
58 | return p, nil
59 | }
60 |
61 | func (p Position) ToSDKPosition() opencdc.Position {
62 | v, err := json.Marshal(p)
63 | if err != nil {
64 | // This should never happen, all Position structs should be valid.
65 | panic(err)
66 | }
67 | return v
68 | }
69 |
70 | // LSN returns the last LSN (Log Sequence Number) in the position.
71 | func (p Position) LSN() (pglogrepl.LSN, error) {
72 | if p.LastLSN == "" {
73 | return 0, nil
74 | }
75 |
76 | lsn, err := pglogrepl.ParseLSN(p.LastLSN)
77 | if err != nil {
78 | return 0, err
79 | }
80 |
81 | return lsn, nil
82 | }
83 |
--------------------------------------------------------------------------------
/source/position/position_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package position
16 |
17 | import (
18 | "testing"
19 |
20 | "github.com/conduitio/conduit-commons/opencdc"
21 | "github.com/matryer/is"
22 | )
23 |
24 | func Test_ToSDKPosition(t *testing.T) {
25 | is := is.New(t)
26 |
27 | p := Position{
28 | Type: TypeSnapshot,
29 | Snapshots: SnapshotPositions{
30 | "orders": {LastRead: 1, SnapshotEnd: 2},
31 | },
32 | LastLSN: "4/137515E8",
33 | }
34 |
35 | sdkPos := p.ToSDKPosition()
36 | is.Equal(
37 | string(sdkPos),
38 | `{"type":1,"snapshots":{"orders":{"last_read":1,"snapshot_end":2}},"last_lsn":"4/137515E8"}`,
39 | )
40 | }
41 |
42 | func Test_PositionLSN(t *testing.T) {
43 | is := is.New(t)
44 |
45 | invalid := Position{LastLSN: "invalid"}
46 | _, err := invalid.LSN()
47 | is.True(err != nil)
48 | is.Equal(err.Error(), "failed to parse LSN: expected integer")
49 |
50 | valid := Position{LastLSN: "4/137515E8"}
51 | lsn, noErr := valid.LSN()
52 | is.NoErr(noErr)
53 | is.Equal(uint64(lsn), uint64(17506309608))
54 | }
55 |
56 | func Test_ParseSDKPosition(t *testing.T) {
57 | is := is.New(t)
58 |
59 | valid := opencdc.Position(
60 | []byte(
61 | `{"type":1,"snapshots":{"orders":{"last_read":1,"snapshot_end":2}},"last_lsn":"4/137515E8"}`,
62 | ),
63 | )
64 |
65 | p, validErr := ParseSDKPosition(valid)
66 | is.NoErr(validErr)
67 |
68 | is.Equal(p, Position{
69 | Type: TypeSnapshot,
70 | Snapshots: SnapshotPositions{
71 | "orders": {LastRead: 1, SnapshotEnd: 2},
72 | },
73 | LastLSN: "4/137515E8",
74 | })
75 |
76 | _, invalidErr := ParseSDKPosition(opencdc.Position("{"))
77 | is.True(invalidErr != nil)
78 | is.Equal(invalidErr.Error(), "invalid position: unexpected end of JSON input")
79 | }
80 |
--------------------------------------------------------------------------------
/source/position/type_string.go:
--------------------------------------------------------------------------------
1 | // Code generated by "stringer -type=Type -trimprefix Type"; DO NOT EDIT.
2 |
3 | package position
4 |
5 | import "strconv"
6 |
7 | func _() {
8 | // An "invalid array index" compiler error signifies that the constant values have changed.
9 | // Re-run the stringer command to generate them again.
10 | var x [1]struct{}
11 | _ = x[TypeInitial-0]
12 | _ = x[TypeSnapshot-1]
13 | _ = x[TypeCDC-2]
14 | }
15 |
16 | const _Type_name = "InitialSnapshotCDC"
17 |
18 | var _Type_index = [...]uint8{0, 7, 15, 18}
19 |
20 | func (i Type) String() string {
21 | if i < 0 || i >= Type(len(_Type_index)-1) {
22 | return "Type(" + strconv.FormatInt(int64(i), 10) + ")"
23 | }
24 | return _Type_name[_Type_index[i]:_Type_index[i+1]]
25 | }
26 |
--------------------------------------------------------------------------------
/source/schema/avro.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package schema
16 |
17 | import (
18 | "cmp"
19 | "fmt"
20 | "slices"
21 |
22 | "github.com/hamba/avro/v2"
23 | "github.com/jackc/pglogrepl"
24 | "github.com/jackc/pgx/v5/pgconn"
25 | "github.com/jackc/pgx/v5/pgtype"
26 | )
27 |
28 | var Avro = &avroExtractor{
29 | pgMap: pgtype.NewMap(),
30 | avroMap: map[string]*avro.PrimitiveSchema{
31 | "bool": avro.NewPrimitiveSchema(avro.Boolean, nil),
32 | "bytea": avro.NewPrimitiveSchema(avro.Bytes, nil),
33 | "float4": avro.NewPrimitiveSchema(avro.Float, nil),
34 | "float8": avro.NewPrimitiveSchema(avro.Double, nil),
35 | "int8": avro.NewPrimitiveSchema(avro.Long, nil),
36 | "int4": avro.NewPrimitiveSchema(avro.Int, nil),
37 | "int2": avro.NewPrimitiveSchema(avro.Int, nil),
38 | "text": avro.NewPrimitiveSchema(avro.String, nil),
39 | "varchar": avro.NewPrimitiveSchema(avro.String, nil),
40 | "jsonb": avro.NewPrimitiveSchema(avro.Bytes, nil),
41 | "json": avro.NewPrimitiveSchema(avro.Bytes, nil),
42 | "timestamptz": avro.NewPrimitiveSchema(
43 | avro.Long,
44 | avro.NewPrimitiveLogicalSchema(avro.TimestampMicros),
45 | ),
46 | "timestamp": avro.NewPrimitiveSchema(
47 | avro.Long,
48 | avro.NewPrimitiveLogicalSchema(avro.LocalTimestampMicros),
49 | ),
50 | "date": avro.NewPrimitiveSchema(
51 | avro.Int,
52 | avro.NewPrimitiveLogicalSchema(avro.Date),
53 | ),
54 | "uuid": avro.NewPrimitiveSchema(
55 | avro.String,
56 | avro.NewPrimitiveLogicalSchema(avro.UUID),
57 | ),
58 | },
59 | }
60 |
61 | type avroExtractor struct {
62 | pgMap *pgtype.Map
63 | avroMap map[string]*avro.PrimitiveSchema
64 | }
65 |
66 | // ExtractLogrepl extracts an Avro schema from the given pglogrepl.RelationMessage.
67 | // If `fieldNames` are specified, then only the given fields will be included in the schema.
68 | func (a avroExtractor) ExtractLogrepl(schemaName string, rel *pglogrepl.RelationMessage, fieldNames ...string) (*avro.RecordSchema, error) {
69 | var fields []pgconn.FieldDescription
70 |
71 | for i := range rel.Columns {
72 | fields = append(fields, pgconn.FieldDescription{
73 | Name: rel.Columns[i].Name,
74 | DataTypeOID: rel.Columns[i].DataType,
75 | TypeModifier: rel.Columns[i].TypeModifier,
76 | })
77 | }
78 |
79 | return a.Extract(schemaName, fields, fieldNames...)
80 | }
81 |
82 | // Extract extracts an Avro schema from the given Postgres field descriptions.
83 | // If `fieldNames` are specified, then only the given fields will be included in the schema.
84 | func (a *avroExtractor) Extract(schemaName string, fields []pgconn.FieldDescription, fieldNames ...string) (*avro.RecordSchema, error) {
85 | var avroFields []*avro.Field
86 |
87 | for _, f := range fields {
88 | if len(fieldNames) > 0 && !slices.Contains(fieldNames, f.Name) {
89 | continue
90 | }
91 |
92 | t, ok := a.pgMap.TypeForOID(f.DataTypeOID)
93 | if !ok {
94 | return nil, fmt.Errorf("field %q with OID %d cannot be resolved", f.Name, f.DataTypeOID)
95 | }
96 |
97 | s, err := a.extractType(t, f.TypeModifier)
98 | if err != nil {
99 | return nil, err
100 | }
101 |
102 | af, err := avro.NewField(f.Name, s)
103 | if err != nil {
104 | return nil, fmt.Errorf("failed to create avro field %q: %w", f.Name, err)
105 | }
106 |
107 | avroFields = append(avroFields, af)
108 | }
109 |
110 | slices.SortFunc(avroFields, func(a, b *avro.Field) int {
111 | return cmp.Compare(a.Name(), b.Name())
112 | })
113 |
114 | sch, err := avro.NewRecordSchema(schemaName, "", avroFields)
115 | if err != nil {
116 | return nil, fmt.Errorf("failed to create avro schema: %w", err)
117 | }
118 |
119 | return sch, nil
120 | }
121 |
122 | func (a *avroExtractor) extractType(t *pgtype.Type, typeMod int32) (avro.Schema, error) {
123 | if ps, ok := a.avroMap[t.Name]; ok {
124 | return ps, nil
125 | }
126 |
127 | switch t.OID {
128 | case pgtype.NumericOID:
129 | scale := int((typeMod - 4) & 65535)
130 | precision := int(((typeMod - 4) >> 16) & 65535)
131 | return avro.NewPrimitiveSchema(
132 | avro.Bytes,
133 | avro.NewDecimalLogicalSchema(precision, scale),
134 | ), nil
135 | default:
136 | return nil, fmt.Errorf("cannot resolve field type %q ", t.Name)
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/source/schema/avro_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package schema
16 |
17 | import (
18 | "cmp"
19 | "context"
20 | "fmt"
21 | "math/big"
22 | "slices"
23 | "testing"
24 | "time"
25 |
26 | "github.com/conduitio/conduit-connector-postgres/source/types"
27 | "github.com/conduitio/conduit-connector-postgres/test"
28 | "github.com/hamba/avro/v2"
29 | "github.com/jackc/pgx/v5/pgconn"
30 | "github.com/jackc/pgx/v5/pgtype"
31 | "github.com/matryer/is"
32 | )
33 |
34 | func Test_AvroExtract(t *testing.T) {
35 | ctx := test.Context(t)
36 | is := is.New(t)
37 |
38 | c := test.ConnectSimple(ctx, t, test.RegularConnString)
39 | table := setupAvroTestTable(ctx, t, c)
40 | insertAvroTestRow(ctx, t, c, table)
41 |
42 | rows, err := c.Query(ctx, "SELECT * FROM "+table)
43 | is.NoErr(err)
44 | defer rows.Close()
45 |
46 | rows.Next()
47 |
48 | values, err := rows.Values()
49 | is.NoErr(err)
50 |
51 | fields := rows.FieldDescriptions()
52 |
53 | sch, err := Avro.Extract(table, fields)
54 | is.NoErr(err)
55 |
56 | t.Run("schema is parsable", func(t *testing.T) {
57 | is := is.New(t)
58 | is.NoErr(err)
59 | is.Equal(sch, avroTestSchema(t, table))
60 |
61 | _, err = avro.Parse(sch.String())
62 | is.NoErr(err)
63 | })
64 |
65 | t.Run("serde row", func(t *testing.T) {
66 | is := is.New(t)
67 |
68 | row := avrolizeMap(fields, values)
69 |
70 | sch, err := avro.Parse(sch.String())
71 | is.NoErr(err)
72 |
73 | data, err := avro.Marshal(sch, row)
74 | is.NoErr(err)
75 | is.True(len(data) > 0)
76 |
77 | decoded := make(map[string]any)
78 | is.NoErr(avro.Unmarshal(sch, data, &decoded))
79 |
80 | is.Equal(len(decoded), len(row))
81 | is.Equal(row["col_boolean"], decoded["col_boolean"])
82 | is.Equal(row["col_bytea"], decoded["col_bytea"])
83 | is.Equal(row["col_varchar"], decoded["col_varchar"])
84 | is.Equal(row["col_date"], decoded["col_date"])
85 | is.Equal(row["col_float4"], decoded["col_float4"])
86 | is.Equal(row["col_float8"], decoded["col_float8"])
87 |
88 | colInt2 := int(row["col_int2"].(int16))
89 | is.Equal(colInt2, decoded["col_int2"])
90 |
91 | colInt4 := int(row["col_int4"].(int32))
92 | is.Equal(colInt4, decoded["col_int4"])
93 |
94 | is.Equal(row["col_int8"], decoded["col_int8"])
95 |
96 | numRow := row["col_numeric"].(*big.Rat)
97 | numDecoded := decoded["col_numeric"].(*big.Rat)
98 | is.Equal(numRow.RatString(), numDecoded.RatString())
99 |
100 | is.Equal(row["col_text"], decoded["col_text"])
101 |
102 | rowTS, colTS := row["col_timestamp"].(time.Time), decoded["col_timestamp"].(time.Time)
103 | is.Equal(rowTS.UTC().String(), colTS.UTC().String())
104 |
105 | rowTSTZ, colTSTZ := row["col_timestamptz"].(time.Time), decoded["col_timestamptz"].(time.Time)
106 | is.Equal(rowTSTZ.UTC().String(), colTSTZ.UTC().String())
107 |
108 | is.Equal(row["col_uuid"], decoded["col_uuid"])
109 | })
110 | }
111 |
112 | func setupAvroTestTable(ctx context.Context, t *testing.T, conn test.Querier) string {
113 | is := is.New(t)
114 | table := test.RandomIdentifier(t)
115 |
116 | query := `
117 | CREATE TABLE %s (
118 | col_boolean boolean,
119 | col_bytea bytea,
120 | col_varchar varchar(10),
121 | col_date date,
122 | col_float4 float4,
123 | col_float8 float8,
124 | col_int2 int2,
125 | col_int4 int4,
126 | col_int8 int8,
127 | col_numeric numeric(8,2),
128 | col_text text,
129 | col_timestamp timestamp,
130 | col_timestamptz timestamptz,
131 | col_uuid uuid
132 | )`
133 | query = fmt.Sprintf(query, table)
134 | _, err := conn.Exec(ctx, query)
135 | is.NoErr(err)
136 |
137 | return table
138 | }
139 |
140 | func insertAvroTestRow(ctx context.Context, t *testing.T, conn test.Querier, table string) {
141 | is := is.New(t)
142 | query := `
143 | INSERT INTO %s (
144 | col_boolean,
145 | col_bytea,
146 | col_varchar,
147 | col_date,
148 | col_float4,
149 | col_float8,
150 | col_int2,
151 | col_int4,
152 | col_int8,
153 | col_numeric,
154 | col_text,
155 | col_timestamp,
156 | col_timestamptz,
157 | col_uuid
158 | ) VALUES (
159 | true, -- col_boolean
160 | '\x07', -- col_bytea
161 | '9', -- col_varchar
162 | '2022-03-14', -- col_date
163 | 15, -- col_float4
164 | 16.16, -- col_float8
165 | 32767, -- col_int2
166 | 2147483647, -- col_int4
167 | 9223372036854775807, -- col_int8
168 | '292929.29', -- col_numeric
169 | 'foo bar baz', -- col_text
170 | '2022-03-14 15:16:17', -- col_timestamp
171 | '2022-03-14 15:16:17-08', -- col_timestamptz
172 | 'bd94ee0b-564f-4088-bf4e-8d5e626caf66' -- col_uuid
173 | )`
174 | query = fmt.Sprintf(query, table)
175 | _, err := conn.Exec(ctx, query)
176 | is.NoErr(err)
177 | }
178 |
179 | func avroTestSchema(t *testing.T, table string) avro.Schema {
180 | is := is.New(t)
181 |
182 | fields := []*avro.Field{
183 | assert(avro.NewField("col_boolean", avro.NewPrimitiveSchema(avro.Boolean, nil))),
184 | assert(avro.NewField("col_bytea", avro.NewPrimitiveSchema(avro.Bytes, nil))),
185 | assert(avro.NewField("col_varchar", avro.NewPrimitiveSchema(avro.String, nil))),
186 | assert(avro.NewField("col_float4", avro.NewPrimitiveSchema(avro.Float, nil))),
187 | assert(avro.NewField("col_float8", avro.NewPrimitiveSchema(avro.Double, nil))),
188 | assert(avro.NewField("col_int2", avro.NewPrimitiveSchema(avro.Int, nil))),
189 | assert(avro.NewField("col_int4", avro.NewPrimitiveSchema(avro.Int, nil))),
190 | assert(avro.NewField("col_int8", avro.NewPrimitiveSchema(avro.Long, nil))),
191 | assert(avro.NewField("col_text", avro.NewPrimitiveSchema(avro.String, nil))),
192 | assert(avro.NewField("col_numeric", avro.NewPrimitiveSchema(
193 | avro.Bytes,
194 | avro.NewDecimalLogicalSchema(8, 2),
195 | ))),
196 | assert(avro.NewField("col_date", avro.NewPrimitiveSchema(
197 | avro.Int,
198 | avro.NewPrimitiveLogicalSchema(avro.Date),
199 | ))),
200 | assert(avro.NewField("col_timestamp", avro.NewPrimitiveSchema(
201 | avro.Long,
202 | avro.NewPrimitiveLogicalSchema(avro.LocalTimestampMicros),
203 | ))),
204 | assert(avro.NewField("col_timestamptz", avro.NewPrimitiveSchema(
205 | avro.Long,
206 | avro.NewPrimitiveLogicalSchema(avro.TimestampMicros),
207 | ))),
208 | assert(avro.NewField("col_uuid", avro.NewPrimitiveSchema(
209 | avro.String,
210 | avro.NewPrimitiveLogicalSchema(avro.UUID),
211 | ))),
212 | }
213 |
214 | slices.SortFunc(fields, func(a, b *avro.Field) int {
215 | return cmp.Compare(a.Name(), b.Name())
216 | })
217 |
218 | s, err := avro.NewRecordSchema(table, "", fields)
219 | is.NoErr(err)
220 |
221 | return s
222 | }
223 |
224 | func avrolizeMap(fields []pgconn.FieldDescription, values []any) map[string]any {
225 | row := make(map[string]any)
226 |
227 | for i, f := range fields {
228 | switch f.DataTypeOID {
229 | case pgtype.NumericOID:
230 | n := new(big.Rat)
231 | n.SetString(fmt.Sprint(types.Format(0, values[i])))
232 | row[f.Name] = n
233 | case pgtype.UUIDOID:
234 | row[f.Name] = fmt.Sprint(values[i])
235 | default:
236 | row[f.Name] = values[i]
237 | }
238 | }
239 |
240 | return row
241 | }
242 |
243 | func assert[T any](a T, err error) T {
244 | if err != nil {
245 | panic(err)
246 | }
247 |
248 | return a
249 | }
250 |
--------------------------------------------------------------------------------
/source/snapshot/convert.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package snapshot
16 |
17 | import (
18 | "fmt"
19 | )
20 |
21 | func keyInt64(id any) (int64, error) {
22 | switch t := id.(type) {
23 | case int:
24 | return int64(t), nil
25 | case int8:
26 | return int64(t), nil
27 | case int16:
28 | return int64(t), nil
29 | case int32:
30 | return int64(t), nil
31 | case int64:
32 | return t, nil
33 | default:
34 | return 0, fmt.Errorf("invalid type for key %T", id)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/source/snapshot/iterator.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package snapshot
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 |
22 | "github.com/conduitio/conduit-commons/csync"
23 | "github.com/conduitio/conduit-commons/opencdc"
24 | cschema "github.com/conduitio/conduit-commons/schema"
25 | "github.com/conduitio/conduit-connector-postgres/source/position"
26 | sdk "github.com/conduitio/conduit-connector-sdk"
27 | "github.com/jackc/pgx/v5/pgxpool"
28 | "gopkg.in/tomb.v2"
29 | )
30 |
31 | var ErrIteratorDone = errors.New("snapshot complete")
32 |
33 | type Config struct {
34 | Position opencdc.Position
35 | Tables []string
36 | TableKeys map[string]string
37 | TXSnapshotID string
38 | FetchSize int
39 | WithAvroSchema bool
40 | }
41 |
42 | type Iterator struct {
43 | db *pgxpool.Pool
44 |
45 | workersTomb *tomb.Tomb
46 | workers []*FetchWorker
47 | acks csync.WaitGroup
48 |
49 | conf Config
50 |
51 | lastPosition position.Position
52 |
53 | data chan []FetchData
54 | }
55 |
56 | func NewIterator(ctx context.Context, db *pgxpool.Pool, c Config) (*Iterator, error) {
57 | p, err := position.ParseSDKPosition(c.Position)
58 | if err != nil {
59 | return nil, fmt.Errorf("failed to parse position: %w", err)
60 | }
61 |
62 | if p.Snapshots == nil {
63 | p.Snapshots = make(position.SnapshotPositions)
64 | }
65 |
66 | t, _ := tomb.WithContext(ctx)
67 | i := &Iterator{
68 | db: db,
69 | workersTomb: t,
70 | conf: c,
71 | data: make(chan []FetchData),
72 | lastPosition: p,
73 | }
74 |
75 | if err := i.initFetchers(ctx); err != nil {
76 | return nil, fmt.Errorf("failed to initialize table fetchers: %w", err)
77 | }
78 |
79 | i.startWorkers()
80 |
81 | return i, nil
82 | }
83 |
84 | // NextN takes and returns up to n records from the queue. NextN is allowed to
85 | // block until either at least one record is available or the context gets canceled.
86 | func (i *Iterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error) {
87 | if n <= 0 {
88 | return nil, fmt.Errorf("n must be greater than 0, got %d", n)
89 | }
90 |
91 | var records []opencdc.Record
92 |
93 | // Get first record (blocking)
94 | select {
95 | case <-ctx.Done():
96 | return nil, fmt.Errorf("iterator stopped: %w", ctx.Err())
97 | case batch, ok := <-i.data:
98 | if !ok { // closed
99 | if err := i.workersTomb.Err(); err != nil {
100 | return nil, fmt.Errorf("fetchers exited unexpectedly: %w", err)
101 | }
102 | if err := i.acks.Wait(ctx); err != nil {
103 | return nil, fmt.Errorf("failed to wait for acks: %w", err)
104 | }
105 | return nil, ErrIteratorDone
106 | }
107 |
108 | for _, d := range batch {
109 | i.acks.Add(1)
110 | records = append(records, i.buildRecord(d))
111 | }
112 | }
113 |
114 | // Try to get remaining records non-blocking
115 | for len(records) < n {
116 | select {
117 | case <-ctx.Done():
118 | return records, ctx.Err()
119 | case batch, ok := <-i.data:
120 | if !ok { // closed
121 | return records, nil
122 | }
123 | for _, d := range batch {
124 | i.acks.Add(1)
125 | records = append(records, i.buildRecord(d))
126 | }
127 | default:
128 | // No more records currently available
129 | return records, nil
130 | }
131 | }
132 |
133 | return records, nil
134 | }
135 |
136 | func (i *Iterator) Ack(_ context.Context, _ opencdc.Position) error {
137 | i.acks.Done()
138 | return nil
139 | }
140 |
141 | func (i *Iterator) Teardown(_ context.Context) error {
142 | if i.workersTomb != nil {
143 | i.workersTomb.Kill(errors.New("tearing down snapshot iterator"))
144 | }
145 |
146 | return nil
147 | }
148 |
149 | func (i *Iterator) buildRecord(d FetchData) opencdc.Record {
150 | // merge this position with latest position
151 | i.lastPosition.Type = position.TypeSnapshot
152 | i.lastPosition.Snapshots[d.Table] = d.Position
153 |
154 | pos := i.lastPosition.ToSDKPosition()
155 | metadata := make(opencdc.Metadata)
156 | metadata[opencdc.MetadataCollection] = d.Table
157 |
158 | rec := sdk.Util.Source.NewRecordSnapshot(pos, metadata, d.Key, d.Payload)
159 | if i.conf.WithAvroSchema {
160 | cschema.AttachKeySchemaToRecord(rec, d.KeySchema)
161 | cschema.AttachPayloadSchemaToRecord(rec, d.PayloadSchema)
162 | }
163 |
164 | return rec
165 | }
166 |
167 | func (i *Iterator) initFetchers(ctx context.Context) error {
168 | var errs []error
169 |
170 | i.workers = make([]*FetchWorker, len(i.conf.Tables))
171 |
172 | for j, t := range i.conf.Tables {
173 | w := NewFetchWorker(i.db, i.data, FetchConfig{
174 | Table: t,
175 | Key: i.conf.TableKeys[t],
176 | TXSnapshotID: i.conf.TXSnapshotID,
177 | Position: i.lastPosition,
178 | FetchSize: i.conf.FetchSize,
179 | WithAvroSchema: i.conf.WithAvroSchema,
180 | })
181 |
182 | if err := w.Validate(ctx); err != nil {
183 | errs = append(errs, fmt.Errorf("failed to validate table fetcher %q config: %w", t, err))
184 | }
185 |
186 | i.workers[j] = w
187 | }
188 |
189 | return errors.Join(errs...)
190 | }
191 |
192 | func (i *Iterator) startWorkers() {
193 | for _, worker := range i.workers {
194 | i.workersTomb.Go(func() error {
195 | ctx := i.workersTomb.Context(nil) //nolint:staticcheck // This is the correct usage of tomb.Context
196 | if err := worker.Run(ctx); err != nil {
197 | return fmt.Errorf("fetcher for table %q exited: %w", worker.conf.Table, err)
198 | }
199 | return nil
200 | })
201 | }
202 | go func() {
203 | <-i.workersTomb.Dead()
204 | close(i.data)
205 | }()
206 | }
207 |
--------------------------------------------------------------------------------
/source/snapshot/iterator_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package snapshot
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "testing"
21 | "time"
22 |
23 | "github.com/conduitio/conduit-commons/opencdc"
24 | "github.com/conduitio/conduit-connector-postgres/source/position"
25 | "github.com/conduitio/conduit-connector-postgres/test"
26 | "github.com/matryer/is"
27 | )
28 |
29 | func Test_Iterator_NextN(t *testing.T) {
30 | var (
31 | ctx = test.Context(t)
32 | pool = test.ConnectPool(ctx, t, test.RegularConnString)
33 | table = test.SetupTestTable(ctx, t, pool)
34 | )
35 |
36 | t.Run("success", func(t *testing.T) {
37 | is := is.New(t)
38 |
39 | i, err := NewIterator(ctx, pool, Config{
40 | Position: position.Position{}.ToSDKPosition(),
41 | Tables: []string{table},
42 | TableKeys: map[string]string{
43 | table: "id",
44 | },
45 | FetchSize: 2,
46 | })
47 | is.NoErr(err)
48 | defer func() {
49 | is.NoErr(i.Teardown(ctx))
50 | }()
51 |
52 | // Get first 2 records
53 | records, err := i.NextN(ctx, 2)
54 | is.NoErr(err)
55 | is.Equal(len(records), 2)
56 | for _, r := range records {
57 | is.Equal(r.Operation, opencdc.OperationSnapshot)
58 | is.Equal(r.Metadata[opencdc.MetadataCollection], table)
59 | }
60 |
61 | // Get the remaining 2 records
62 | records, err = i.NextN(ctx, 2)
63 | is.NoErr(err)
64 | is.Equal(len(records), 2)
65 | for _, r := range records {
66 | is.Equal(r.Operation, opencdc.OperationSnapshot)
67 | is.Equal(r.Metadata[opencdc.MetadataCollection], table)
68 | }
69 |
70 | // Ack all records
71 | for j := 1; j <= 4; j++ {
72 | err = i.Ack(ctx, nil)
73 | is.NoErr(err)
74 | }
75 |
76 | // Should return ErrIteratorDone
77 | _, err = i.NextN(ctx, 1)
78 | is.Equal(err, ErrIteratorDone)
79 | })
80 |
81 | t.Run("next waits for acks", func(t *testing.T) {
82 | is := is.New(t)
83 |
84 | i, err := NewIterator(ctx, pool, Config{
85 | Position: position.Position{}.ToSDKPosition(),
86 | Tables: []string{table},
87 | TableKeys: map[string]string{
88 | table: "id",
89 | },
90 | })
91 | is.NoErr(err)
92 | defer func() {
93 | is.NoErr(i.Teardown(ctx))
94 | }()
95 |
96 | // Get all 4 records in multiple calls since NextN is non-blocking
97 | var allRecords []opencdc.Record
98 | for len(allRecords) < 4 {
99 | records, err := i.NextN(ctx, 4)
100 | is.NoErr(err)
101 | allRecords = append(allRecords, records...)
102 | }
103 | is.Equal(len(allRecords), 4)
104 |
105 | // Only ack 3 records
106 | for j := 1; j <= 3; j++ {
107 | err = i.Ack(ctx, nil)
108 | is.NoErr(err)
109 | }
110 |
111 | ctxTimeout, cancel := context.WithTimeout(ctx, time.Millisecond*10)
112 | defer cancel()
113 |
114 | // No more records, but NextN blocks because we haven't acked all records
115 | _, err = i.NextN(ctxTimeout, 1)
116 | is.True(errors.Is(err, context.DeadlineExceeded))
117 |
118 | // Ack the last record
119 | err = i.Ack(ctx, nil)
120 | is.NoErr(err)
121 |
122 | // Now NextN won't block
123 | _, err = i.NextN(ctx, 1)
124 | is.Equal(err, ErrIteratorDone)
125 | })
126 |
127 | t.Run("context cancelled", func(t *testing.T) {
128 | is := is.New(t)
129 |
130 | i, err := NewIterator(ctx, pool, Config{
131 | Position: position.Position{}.ToSDKPosition(),
132 | Tables: []string{table},
133 | TableKeys: map[string]string{
134 | table: "id",
135 | },
136 | })
137 | is.NoErr(err)
138 | defer func() {
139 | is.NoErr(i.Teardown(ctx))
140 | }()
141 |
142 | cancelCtx, cancel := context.WithCancel(ctx)
143 | cancel()
144 |
145 | _, err = i.NextN(cancelCtx, 1)
146 | is.Equal(err.Error(), "iterator stopped: context canceled")
147 | })
148 |
149 | t.Run("tomb exited", func(t *testing.T) {
150 | is := is.New(t)
151 | cancelCtx, cancel := context.WithCancel(ctx)
152 |
153 | i, err := NewIterator(cancelCtx, pool, Config{
154 | Position: position.Position{}.ToSDKPosition(),
155 | Tables: []string{table},
156 | TableKeys: map[string]string{
157 | table: "id",
158 | },
159 | })
160 | is.NoErr(err)
161 | defer func() {
162 | is.NoErr(i.Teardown(ctx))
163 | }()
164 |
165 | cancel()
166 |
167 | _, err = i.NextN(ctx, 1)
168 | is.True(errors.Is(err, context.Canceled))
169 | })
170 |
171 | t.Run("invalid n", func(t *testing.T) {
172 | is := is.New(t)
173 |
174 | i, err := NewIterator(ctx, pool, Config{
175 | Position: position.Position{}.ToSDKPosition(),
176 | Tables: []string{table},
177 | TableKeys: map[string]string{
178 | table: "id",
179 | },
180 | })
181 | is.NoErr(err)
182 | defer func() {
183 | is.NoErr(i.Teardown(ctx))
184 | }()
185 |
186 | _, err = i.NextN(ctx, 0)
187 | is.True(err != nil)
188 | is.Equal(err.Error(), "n must be greater than 0, got 0")
189 |
190 | _, err = i.NextN(ctx, -1)
191 | is.True(err != nil)
192 | is.Equal(err.Error(), "n must be greater than 0, got -1")
193 | })
194 | }
195 |
--------------------------------------------------------------------------------
/source/types/numeric.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package types
16 |
17 | import (
18 | "math/big"
19 |
20 | "github.com/jackc/pgx/v5/pgtype"
21 | )
22 |
23 | type NumericFormatter struct{}
24 |
25 | // BigRatFromNumeric converts a pgtype.Numeric to a big.Rat.
26 | func (NumericFormatter) BigRatFromNumeric(num pgtype.Numeric) (*big.Rat, error) {
27 | if num.Int == nil {
28 | return nil, nil
29 | }
30 | v := new(big.Rat)
31 | driverVal, err := num.Value()
32 | if err != nil {
33 | return nil, err
34 | }
35 | v.SetString(driverVal.(string))
36 | return v, nil
37 | }
38 |
--------------------------------------------------------------------------------
/source/types/types.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package types
16 |
17 | import (
18 | "github.com/jackc/pgx/v5/pgtype"
19 | )
20 |
21 | var (
22 | Numeric = NumericFormatter{}
23 | UUID = UUIDFormatter{}
24 | )
25 |
26 | func Format(oid uint32, v any) (any, error) {
27 | if oid == pgtype.UUIDOID {
28 | return UUID.Format(v)
29 | }
30 |
31 | switch t := v.(type) {
32 | case pgtype.Numeric:
33 | return Numeric.BigRatFromNumeric(t)
34 | case *pgtype.Numeric:
35 | return Numeric.BigRatFromNumeric(*t)
36 | case []uint8:
37 | if oid == pgtype.XMLOID {
38 | return string(t), nil
39 | }
40 | return t, nil
41 | default:
42 | return t, nil
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/source/types/types_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2024 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package types
16 |
17 | import (
18 | "math/big"
19 | "testing"
20 | "time"
21 |
22 | "github.com/jackc/pgx/v5/pgtype"
23 | "github.com/matryer/is"
24 | )
25 |
26 | func Test_Format(t *testing.T) {
27 | now := time.Now().UTC()
28 |
29 | tests := []struct {
30 | name string
31 | input []any
32 | inputOID []uint32
33 | expect []any
34 | withBuiltin bool
35 | }{
36 | {
37 | name: "int float string bool",
38 | input: []any{
39 | 1021, 199.2, "foo", true,
40 | },
41 | inputOID: []uint32{
42 | 0, 0, 0, 0,
43 | },
44 | expect: []any{
45 | 1021, 199.2, "foo", true,
46 | },
47 | },
48 | {
49 | name: "pgtype.Numeric",
50 | input: []any{
51 | pgxNumeric(t, "12.2121"), pgxNumeric(t, "101"), pgxNumeric(t, "0"), &pgtype.Numeric{}, nil,
52 | },
53 | inputOID: []uint32{
54 | 0, 0, 0, 0, 0,
55 | },
56 | expect: []any{
57 | big.NewRat(122121, 10000), big.NewRat(101, 1), big.NewRat(0, 1), nil, nil,
58 | },
59 | },
60 | {
61 | name: "builtin time.Time",
62 | input: []any{
63 | now,
64 | },
65 | inputOID: []uint32{
66 | 0,
67 | },
68 | expect: []any{
69 | now,
70 | },
71 | withBuiltin: true,
72 | },
73 | {
74 | name: "uuid",
75 | input: []any{
76 | [16]uint8{0xbd, 0x94, 0xee, 0x0b, 0x56, 0x4f, 0x40, 0x88, 0xbf, 0x4e, 0x8d, 0x5e, 0x62, 0x6c, 0xaf, 0x66}, nil,
77 | },
78 | inputOID: []uint32{
79 | pgtype.UUIDOID, pgtype.UUIDOID,
80 | },
81 | expect: []any{
82 | "bd94ee0b-564f-4088-bf4e-8d5e626caf66", "",
83 | },
84 | },
85 | }
86 | _ = time.Now()
87 |
88 | for _, tc := range tests {
89 | t.Run(tc.name, func(t *testing.T) {
90 | is := is.New(t)
91 |
92 | for i, in := range tc.input {
93 | v, err := Format(tc.inputOID[i], in)
94 | is.NoErr(err)
95 | is.Equal(v, tc.expect[i])
96 | }
97 | })
98 | }
99 | }
100 |
101 | // as per https://github.com/jackc/pgx/blob/master/pgtype/numeric_test.go#L66
102 | func pgxNumeric(t *testing.T, num string) pgtype.Numeric {
103 | is := is.New(t)
104 | is.Helper()
105 |
106 | var n pgtype.Numeric
107 | plan := pgtype.NumericCodec{}.PlanScan(nil, pgtype.NumericOID, pgtype.TextFormatCode, &n)
108 | is.True(plan != nil)
109 | is.NoErr(plan.Scan([]byte(num), &n))
110 |
111 | return n
112 | }
113 |
--------------------------------------------------------------------------------
/source/types/uuid.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package types
16 |
17 | import (
18 | "fmt"
19 |
20 | "github.com/jackc/pgx/v5/pgtype"
21 | )
22 |
23 | type UUIDFormatter struct{}
24 |
25 | // Format takes a slice of bytes and returns a UUID in string format
26 | // Returns error when byte array cannot be parsed.
27 | func (UUIDFormatter) Format(v any) (string, error) {
28 | if v == nil {
29 | return "", nil
30 | }
31 |
32 | b, ok := v.([16]byte)
33 | if !ok {
34 | return "", fmt.Errorf("failed to parse uuid byte array %v", v)
35 | }
36 |
37 | uuid := pgtype.UUID{Bytes: b, Valid: true}
38 |
39 | uv, err := uuid.Value()
40 | if err != nil {
41 | return "", fmt.Errorf("failed to format uuid: %w", err)
42 | }
43 |
44 | return uv.(string), nil
45 | }
46 |
--------------------------------------------------------------------------------
/source_integration_test.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package postgres
16 |
17 | import (
18 | "context"
19 | "fmt"
20 | "strings"
21 | "testing"
22 |
23 | "github.com/conduitio/conduit-commons/config"
24 | "github.com/conduitio/conduit-connector-postgres/source"
25 | "github.com/conduitio/conduit-connector-postgres/source/logrepl"
26 | "github.com/conduitio/conduit-connector-postgres/test"
27 | sdk "github.com/conduitio/conduit-connector-sdk"
28 | "github.com/matryer/is"
29 | )
30 |
31 | func TestSource_Open(t *testing.T) {
32 | is := is.New(t)
33 | ctx := test.Context(t)
34 | conn := test.ConnectSimple(ctx, t, test.RepmgrConnString)
35 |
36 | // Be sure primary key discovering works correctly on
37 | // table names with capital letters
38 | tableName := strings.ToUpper(test.RandomIdentifier(t))
39 | test.SetupTestTableWithName(ctx, t, conn, tableName)
40 |
41 | slotName := "conduitslot1"
42 | publicationName := "conduitpub1"
43 |
44 | s := NewSource()
45 | err := sdk.Util.ParseConfig(
46 | ctx,
47 | map[string]string{
48 | "url": test.RepmgrConnString,
49 | "tables": tableName,
50 | "snapshotMode": "initial",
51 | "cdcMode": "logrepl",
52 | "logrepl.slotName": slotName,
53 | "logrepl.publicationName": publicationName,
54 | },
55 | s.Config(),
56 | Connector.NewSpecification().SourceParams,
57 | )
58 | is.NoErr(err)
59 |
60 | err = s.Open(ctx, nil)
61 | is.NoErr(err)
62 |
63 | defer func() {
64 | is.NoErr(logrepl.Cleanup(context.Background(), logrepl.CleanupConfig{
65 | URL: test.RepmgrConnString,
66 | SlotName: slotName,
67 | PublicationName: publicationName,
68 | }))
69 | is.NoErr(s.Teardown(ctx))
70 | }()
71 | }
72 |
73 | func TestSource_ParseConfig(t *testing.T) {
74 | testCases := []struct {
75 | name string
76 | cfg config.Config
77 | wantErr bool
78 | }{
79 | {
80 | name: "valid postgres replication slot name",
81 | cfg: config.Config{
82 | "url": "postgresql://meroxauser:meroxapass@127.0.0.1:5432/meroxadb",
83 | "tables": "table1,table2",
84 | "cdcMode": "logrepl",
85 | "logrepl.slotName": "valid_slot_name",
86 | },
87 | wantErr: false,
88 | }, {
89 | name: "invalid postgres replication slot name",
90 | cfg: config.Config{
91 | "url": "postgresql://meroxauser:meroxapass@127.0.0.1:5432/meroxadb",
92 | "tables": "table1,table2",
93 | "cdcMode": "logrepl",
94 | "logrepl.slotName": "invalid:slot.name",
95 | },
96 | wantErr: true,
97 | },
98 | }
99 |
100 | for _, tc := range testCases {
101 | t.Run(tc.name, func(t *testing.T) {
102 | is := is.New(t)
103 |
104 | var cfg source.Config
105 | err := sdk.Util.ParseConfig(context.Background(), tc.cfg, cfg, Connector.NewSpecification().SourceParams)
106 |
107 | if tc.wantErr {
108 | is.True(err != nil)
109 | return
110 | }
111 | is.NoErr(err)
112 | })
113 | }
114 | }
115 |
116 | func TestSource_Read(t *testing.T) {
117 | ctx := test.Context(t)
118 | is := is.New(t)
119 |
120 | conn := test.ConnectSimple(ctx, t, test.RegularConnString)
121 | table := setupSourceTable(ctx, t, conn)
122 | insertSourceRow(ctx, t, conn, table)
123 |
124 | s := NewSource()
125 | err := sdk.Util.ParseConfig(
126 | ctx,
127 | map[string]string{
128 | "url": test.RepmgrConnString,
129 | "tables": table,
130 | "snapshotMode": "initial",
131 | "cdcMode": "logrepl",
132 | },
133 | s.Config(),
134 | Connector.NewSpecification().SourceParams,
135 | )
136 | is.NoErr(err)
137 |
138 | err = s.Open(ctx, nil)
139 | is.NoErr(err)
140 |
141 | recs, err := s.ReadN(ctx, 1)
142 | is.NoErr(err)
143 |
144 | fmt.Println(recs)
145 | }
146 |
147 | // setupSourceTable creates a new table with all types and returns its name.
148 | func setupSourceTable(ctx context.Context, t *testing.T, conn test.Querier) string {
149 | is := is.New(t)
150 | table := test.RandomIdentifier(t)
151 | // todo still need to support:
152 | // bit, varbit, box, char(n), cidr, circle, inet, interval, line, lseg,
153 | // macaddr, macaddr8, money, path, pg_lsn, pg_snapshot, point, polygon,
154 | // time, timetz, tsquery, tsvector, xml
155 | query := `
156 | CREATE TABLE %s (
157 | id bigserial PRIMARY KEY,
158 | col_boolean boolean,
159 | col_bytea bytea,
160 | col_varchar varchar(10),
161 | col_date date,
162 | col_float4 float4,
163 | col_float8 float8,
164 | col_int2 int2,
165 | col_int4 int4,
166 | col_int8 int8,
167 | col_json json,
168 | col_jsonb jsonb,
169 | col_numeric numeric(8,2),
170 | col_serial2 serial2,
171 | col_serial4 serial4,
172 | col_serial8 serial8,
173 | col_text text,
174 | col_timestamp timestamp,
175 | col_timestamptz timestamptz,
176 | col_uuid uuid
177 | )`
178 | query = fmt.Sprintf(query, table)
179 | _, err := conn.Exec(ctx, query)
180 | is.NoErr(err)
181 |
182 | t.Cleanup(func() {
183 | query := `DROP TABLE %s`
184 | query = fmt.Sprintf(query, table)
185 | _, err := conn.Exec(context.Background(), query)
186 | is.NoErr(err)
187 | })
188 | return table
189 | }
190 |
191 | func insertSourceRow(ctx context.Context, t *testing.T, conn test.Querier, table string) {
192 | is := is.New(t)
193 | query := `
194 | INSERT INTO %s (
195 | col_boolean,
196 | col_bytea,
197 | col_varchar,
198 | col_date,
199 | col_float4,
200 | col_float8,
201 | col_int2,
202 | col_int4,
203 | col_int8,
204 | col_json,
205 | col_jsonb,
206 | col_numeric,
207 | col_serial2,
208 | col_serial4,
209 | col_serial8,
210 | col_text,
211 | col_timestamp,
212 | col_timestamptz,
213 | col_uuid
214 | ) VALUES (
215 | true, -- col_boolean
216 | '\x07', -- col_bytea
217 | '9', -- col_varchar
218 | '2022-03-14', -- col_date
219 | 15, -- col_float4
220 | 16.16, -- col_float8
221 | 32767, -- col_int2
222 | 2147483647, -- col_int4
223 | 9223372036854775807, -- col_int8
224 | '{"foo": "bar"}', -- col_json
225 | '{"foo": "baz"}', -- col_jsonb
226 | '292929.29', -- col_numeric
227 | 32767, -- col_serial2
228 | 2147483647, -- col_serial4
229 | 9223372036854775807, -- col_serial8
230 | 'foo bar baz', -- col_text
231 | '2022-03-14 15:16:17', -- col_timestamp
232 | '2022-03-14 15:16:17-08', -- col_timestamptz
233 | 'bd94ee0b-564f-4088-bf4e-8d5e626caf66' -- col_uuid
234 | )`
235 | query = fmt.Sprintf(query, table)
236 | _, err := conn.Exec(ctx, query)
237 | is.NoErr(err)
238 | }
239 |
--------------------------------------------------------------------------------
/test/conf.d/postgresql.conf:
--------------------------------------------------------------------------------
1 | wal_level=logical
2 | max_wal_senders=5
3 | max_replication_slots=5
4 | log_statement='all'
5 | log_connections=true
6 | log_disconnections=true
7 | log_duration=true
8 | log_replication_commands=true
9 |
--------------------------------------------------------------------------------
/test/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | pg-0:
3 | image: docker.io/bitnami/postgresql-repmgr:17.5.0
4 | ports:
5 | - "5433:5432"
6 | volumes:
7 | - "pg_0_data:/bitnami/postgresql"
8 | - "./conf.d/:/bitnami/postgresql/conf/conf.d/"
9 | healthcheck:
10 | test: [ "CMD", "pg_isready", "-q", "-d", "meroxadb", "-U", "meroxauser" ]
11 | timeout: 30s
12 | interval: 10s
13 | retries: 5
14 | environment:
15 | - POSTGRESQL_POSTGRES_PASSWORD=meroxaadmin
16 | - POSTGRESQL_USERNAME=meroxauser
17 | - POSTGRESQL_PASSWORD=meroxapass
18 | - POSTGRESQL_DATABASE=meroxadb
19 | - REPMGR_USERNAME=repmgr
20 | - REPMGR_PASSWORD=repmgrmeroxa
21 | - REPMGR_PRIMARY_HOST=pg-0
22 | - REPMGR_PRIMARY_PORT=5432
23 | - REPMGR_PARTNER_NODES=pg-0
24 | - REPMGR_NODE_NAME=pg-0
25 | - REPMGR_NODE_NETWORK_NAME=pg-0
26 | - REPMGR_PORT_NUMBER=5432
27 | volumes:
28 | pg_0_data:
29 | driver: local
30 |
--------------------------------------------------------------------------------
/test/helper.go:
--------------------------------------------------------------------------------
1 | // Copyright © 2022 Meroxa, Inc.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | package test
16 |
17 | import (
18 | "context"
19 | "errors"
20 | "fmt"
21 | "strconv"
22 | "strings"
23 | "testing"
24 | "time"
25 |
26 | "github.com/conduitio/conduit-commons/csync"
27 | "github.com/conduitio/conduit-connector-postgres/source/cpool"
28 | "github.com/jackc/pgx/v5"
29 | "github.com/jackc/pgx/v5/pgconn"
30 | "github.com/jackc/pgx/v5/pgxpool"
31 | "github.com/matryer/is"
32 | "github.com/rs/zerolog"
33 | )
34 |
35 | // RepmgrConnString is a replication user connection string for the test postgres.
36 | const RepmgrConnString = "postgres://repmgr:repmgrmeroxa@127.0.0.1:5433/meroxadb?sslmode=disable"
37 |
38 | // RegularConnString is a non-replication user connection string for the test postgres.
39 | const RegularConnString = "postgres://meroxauser:meroxapass@127.0.0.1:5433/meroxadb?sslmode=disable"
40 |
41 | // TestTableAvroSchemaV1 is the Avro schema representation of the test table
42 | // defined through testTableCreateQuery.
43 | // The fields are sorted by name.
44 | const TestTableAvroSchemaV1 = `{
45 | "type": "record",
46 | "name": "%s",
47 | "fields":
48 | [
49 | {"name":"UppercaseColumn1","type":"int"},
50 | {"name":"column1","type":"string"},
51 | {"name":"column2","type":"int"},
52 | {"name":"column3","type":"boolean"},
53 | {
54 | "name": "column4",
55 | "type":
56 | {
57 | "type": "bytes",
58 | "logicalType": "decimal",
59 | "precision": 16,
60 | "scale": 3
61 | }
62 | },
63 | {
64 | "name": "column5",
65 | "type":
66 | {
67 | "type": "bytes",
68 | "logicalType": "decimal",
69 | "precision": 5
70 | }
71 | },
72 | {"name":"column6","type":"bytes"},
73 | {"name":"column7","type":"bytes"},
74 | {"name":"id","type":"long"},
75 | {"name":"key","type":"bytes"}
76 | ]
77 | }`
78 |
79 | // TestTableAvroSchemaV2 is TestTableAvroSchemaV1 with `column6` (local-timestamp-micros) added.
80 | const TestTableAvroSchemaV2 = `{
81 | "type": "record",
82 | "name": "%s",
83 | "fields":
84 | [
85 | {"name":"UppercaseColumn1","type":"int"},
86 | {"name":"column1","type":"string"},
87 | {"name":"column101","type":{"type":"long","logicalType":"local-timestamp-micros"}},
88 | {"name":"column2","type":"int"},
89 | {"name":"column3","type":"boolean"},
90 | {
91 | "name": "column4",
92 | "type":
93 | {
94 | "type": "bytes",
95 | "logicalType": "decimal",
96 | "precision": 16,
97 | "scale": 3
98 | }
99 | },
100 | {
101 | "name": "column5",
102 | "type":
103 | {
104 | "type": "bytes",
105 | "logicalType": "decimal",
106 | "precision": 5
107 | }
108 | },
109 | {"name":"column6","type":"bytes"},
110 | {"name":"column7","type":"bytes"},
111 | {"name":"id","type":"long"},
112 | {"name":"key","type":"bytes"}
113 | ]
114 | }`
115 |
116 | // TestTableAvroSchemaV3 is TestTableAvroSchemaV1 with `column4` and `column5` dropped.
117 | const TestTableAvroSchemaV3 = `{
118 | "type": "record",
119 | "name": "%s",
120 | "fields":
121 | [
122 | {"name":"UppercaseColumn1","type":"int"},
123 | {"name":"column1","type":"string"},
124 | {"name":"column101","type":{"type":"long","logicalType":"local-timestamp-micros"}},
125 | {"name":"column2","type":"int"},
126 | {"name":"column3","type":"boolean"},
127 | {"name":"column6","type":"bytes"},
128 | {"name":"column7","type":"bytes"},
129 | {"name":"id","type":"long"},
130 | {"name":"key","type":"bytes"}
131 | ]
132 | }`
133 |
134 | // TestTableKeyAvroSchema is the Avro schema for the test table's key column.
135 | const TestTableKeyAvroSchema = `{
136 | "type": "record",
137 | "name": "%s",
138 | "fields":
139 | [
140 | {"name":"id","type":"long"}
141 | ]
142 | }`
143 |
144 | // When updating this table, TestTableAvroSchemaV1 needs to be updated too.
145 | const testTableCreateQuery = `
146 | CREATE TABLE %q (
147 | id bigserial PRIMARY KEY,
148 | key bytea,
149 | column1 varchar(256),
150 | column2 integer,
151 | column3 boolean,
152 | column4 numeric(16,3),
153 | column5 numeric(5),
154 | column6 jsonb,
155 | column7 json,
156 | "UppercaseColumn1" integer
157 | )`
158 |
159 | type Querier interface {
160 | Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error)
161 | Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error)
162 | QueryRow(ctx context.Context, sql string, args ...any) pgx.Row
163 | }
164 |
165 | func ConnectPool(ctx context.Context, t *testing.T, connString string) *pgxpool.Pool {
166 | is := is.New(t)
167 | pool, err := cpool.New(ctx, connString)
168 | is.NoErr(err)
169 | t.Cleanup(func() {
170 | // close connection with fresh context
171 | is := is.New(t)
172 | is.NoErr(csync.RunTimeout(context.Background(), pool.Close, time.Second*10))
173 | })
174 | return pool
175 | }
176 |
177 | func ConnectSimple(ctx context.Context, t *testing.T, connString string) *pgx.Conn {
178 | is := is.New(t)
179 | pool := ConnectPool(ctx, t, connString)
180 | conn, err := pool.Acquire(ctx)
181 | is.NoErr(err)
182 | t.Cleanup(func() {
183 | conn.Release()
184 | })
185 | return conn.Conn()
186 | }
187 |
188 | // SetupTestTable creates a new table and returns its name.
189 | func SetupEmptyTestTable(ctx context.Context, t *testing.T, conn Querier) string {
190 | table := RandomIdentifier(t)
191 | SetupEmptyTestTableWithName(ctx, t, conn, table)
192 | return table
193 | }
194 |
195 | func SetupEmptyTestTableWithName(ctx context.Context, t *testing.T, conn Querier, table string) {
196 | is := is.New(t)
197 |
198 | query := fmt.Sprintf(testTableCreateQuery, table)
199 | _, err := conn.Exec(ctx, query)
200 | is.NoErr(err)
201 |
202 | t.Cleanup(func() {
203 | query := `DROP TABLE %q`
204 | query = fmt.Sprintf(query, table)
205 | _, err := conn.Exec(context.Background(), query)
206 | is.NoErr(err)
207 | })
208 | }
209 |
210 | func SetupTestTableWithName(ctx context.Context, t *testing.T, conn Querier, table string) {
211 | is := is.New(t)
212 | SetupEmptyTestTableWithName(ctx, t, conn, table)
213 |
214 | query := `
215 | INSERT INTO %q (key, column1, column2, column3, column4, column5, column6, column7, "UppercaseColumn1")
216 | VALUES ('1', 'foo', 123, false, 12.2, 4, '{"foo": "bar"}', '{"foo": "baz"}', 1),
217 | ('2', 'bar', 456, true, 13.42, 8, '{"foo": "bar"}', '{"foo": "baz"}', 2),
218 | ('3', 'baz', 789, false, null, 9, '{"foo": "bar"}', '{"foo": "baz"}', 3),
219 | ('4', null, null, null, 91.1, null, null, null, null)`
220 | query = fmt.Sprintf(query, table)
221 | _, err := conn.Exec(ctx, query)
222 | is.NoErr(err)
223 | }
224 |
225 | // SetupTestTable creates a new table and returns its name.
226 | func SetupTestTable(ctx context.Context, t *testing.T, conn Querier) string {
227 | table := RandomIdentifier(t)
228 | SetupTestTableWithName(ctx, t, conn, table)
229 | return table
230 | }
231 |
232 | func CreateReplicationSlot(t *testing.T, conn Querier, slotName string) {
233 | is := is.New(t)
234 |
235 | _, err := conn.Exec(
236 | context.Background(),
237 | "SELECT pg_create_logical_replication_slot($1, $2)",
238 | slotName,
239 | "pgoutput",
240 | )
241 | is.NoErr(err)
242 |
243 | t.Cleanup(func() {
244 | _, err := conn.Exec(
245 | context.Background(),
246 | "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name=$1",
247 | slotName,
248 | )
249 | is.NoErr(err)
250 | })
251 | }
252 |
253 | func CreatePublication(t *testing.T, conn Querier, pubName string, tables []string) {
254 | is := is.New(t)
255 |
256 | quotedTables := make([]string, 0, len(tables))
257 | for _, t := range tables {
258 | // don't use internal.WrapSQLIdent to prevent import cycle
259 | quotedTables = append(quotedTables, strconv.Quote(t))
260 | }
261 |
262 | _, err := conn.Exec(
263 | context.Background(),
264 | fmt.Sprintf("CREATE PUBLICATION %s FOR TABLE %s", pubName, strings.Join(quotedTables, ",")),
265 | )
266 | is.NoErr(err)
267 |
268 | t.Cleanup(func() {
269 | _, err := conn.Exec(context.Background(), fmt.Sprintf("DROP PUBLICATION IF EXISTS %q", pubName))
270 | is.NoErr(err)
271 | })
272 | }
273 |
274 | func RandomIdentifier(t *testing.T) string {
275 | return fmt.Sprintf("conduit_%v_%d",
276 | strings.ReplaceAll(strings.ToLower(t.Name()), "/", "_"),
277 | time.Now().UnixMicro()%1000)
278 | }
279 |
280 | func IsPgError(is *is.I, err error, wantCode string) {
281 | is.True(err != nil)
282 | var pgerr *pgconn.PgError
283 | ok := errors.As(err, &pgerr)
284 | is.True(ok) // expected err to be a *pgconn.PgError
285 | is.Equal(pgerr.Code, wantCode)
286 | }
287 |
288 | func Context(t *testing.T) context.Context {
289 | ctx := context.Background()
290 | if testing.Short() || !testing.Verbose() {
291 | return ctx
292 | }
293 |
294 | writer := zerolog.NewTestWriter(t)
295 | logger := zerolog.New(writer).Level(zerolog.InfoLevel)
296 | return logger.WithContext(ctx)
297 | }
298 |
--------------------------------------------------------------------------------