├── .github
├── dependabot.yml
└── workflows
│ ├── ci.yml
│ ├── integration.yml
│ └── release-and-docker.yml
├── .gitignore
├── .golangci.yml
├── .goreleaser.yaml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── cmd
└── root.go
├── go.mod
├── go.sum
├── internal
├── docker-compose.yml
├── examples
│ └── README.md
├── how-it-works.md
├── nats-server.conf
├── pg-flo.yaml
├── pg_flo_logo.png
└── scripts
│ ├── e2e_common.sh
│ ├── e2e_copy_and_stream.sh
│ ├── e2e_copy_only.sh
│ ├── e2e_ddl.sh
│ ├── e2e_multi_tenant.sh
│ ├── e2e_order_test.rb
│ ├── e2e_postgres.sh
│ ├── e2e_postgres_data_type.sh
│ ├── e2e_postgres_uniqueness_test.rb
│ ├── e2e_resume_test.rb
│ ├── e2e_routing.sh
│ ├── e2e_stream_only.sh
│ ├── e2e_test_local.sh
│ ├── e2e_transform_filter.sh
│ ├── multi_tenant_rules.yml
│ ├── rules.yml
│ └── webhook_test.sh
├── main.go
└── pkg
├── pgflonats
└── pgflonats.go
├── replicator
├── base_replicator.go
├── buffer.go
├── config.go
├── copy_and_stream_replicator.go
├── ddl_replicator.go
├── errors.go
├── factory.go
├── interfaces.go
├── json_encoder.go
├── replication_connection.go
├── standard_connection.go
├── stream_replicator.go
├── table_handling.go
└── tests
│ ├── base_replicator_test.go
│ ├── buffer_test.go
│ ├── copy_and_stream_replicator_test.go
│ ├── ddl_replicator_test.go
│ ├── json_encoder_test.go
│ └── mocks_test.go
├── routing
├── README.md
├── router.go
└── tests
│ └── routing_test.go
├── rules
├── README.md
├── engine.go
├── rules.go
├── tests
│ ├── engine_test.go
│ ├── mocks_test.go
│ └── rules_test.go
└── types.go
├── sinks
├── README.md
├── file.go
├── postgres.go
├── shared.go
├── sink.go
├── stdout.go
├── types.go
└── webhooks.go
├── utils
├── cdc_encoding.go
├── cdc_message.go
├── retry.go
├── shared.go
├── shared_types.go
└── zerolog_logger.go
└── worker
└── worker.go
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "gomod" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 |
8 | jobs:
9 | lint:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 |
14 | - name: Set up Go
15 | uses: actions/setup-go@v4
16 | with:
17 | go-version: "1.21"
18 |
19 | - name: Install golangci-lint
20 | run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.60.1
21 |
22 | - name: Lint
23 | run: make lint
24 |
25 | test:
26 | runs-on: ubuntu-latest
27 | steps:
28 | - uses: actions/checkout@v4
29 |
30 | - name: Set up Go
31 | uses: actions/setup-go@v4
32 | with:
33 | go-version: "1.21"
34 |
35 | - name: Test
36 | run: make test
37 | build:
38 | needs: [lint, test]
39 | runs-on: ubuntu-latest
40 | steps:
41 | - uses: actions/checkout@v4
42 |
43 | - name: Set up Go
44 | uses: actions/setup-go@v4
45 | with:
46 | go-version: "1.21"
47 |
48 | - name: Build
49 | run: make build
50 |
51 | - name: Set up QEMU
52 | uses: docker/setup-qemu-action@v3
53 |
54 | - name: Set up Docker Buildx
55 | uses: docker/setup-buildx-action@v3
56 | with:
57 | buildkitd-flags: --debug
58 |
59 | - name: Set build timestamp
60 | id: timestamp
61 | run: echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT
62 |
63 | - name: Build Docker image
64 | uses: docker/build-push-action@v5
65 | with:
66 | context: .
67 | platforms: linux/amd64
68 | push: false
69 | load: true
70 | tags: pg_flo:test
71 | build-args: |
72 | VERSION=${{ github.sha }}
73 | COMMIT=${{ github.sha }}
74 | DATE=${{ steps.timestamp.outputs.timestamp }}
75 |
76 | - name: Verify Docker image version
77 | run: |
78 | docker run --rm pg_flo:test version | grep ${{ github.sha }}
79 |
--------------------------------------------------------------------------------
/.github/workflows/integration.yml:
--------------------------------------------------------------------------------
1 | name: Integration Tests
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 |
8 | permissions:
9 | contents: read
10 | actions: write
11 |
12 | jobs:
13 | build:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v4
17 |
18 | - name: Set up Go
19 | uses: actions/setup-go@v4
20 | with:
21 | go-version: "1.21"
22 |
23 | - name: Build
24 | run: make build
25 |
26 | - name: Upload binary
27 | uses: actions/upload-artifact@v4
28 | with:
29 | name: pg_flo-binary
30 | path: bin/pg_flo
31 |
32 | tests:
33 | needs: build
34 | runs-on: ubuntu-latest
35 | strategy:
36 | fail-fast: false
37 | matrix:
38 | test:
39 | [
40 | stream_only,
41 | copy_only,
42 | transform_filter,
43 | ddl,
44 | postgres,
45 | postgres_data_type,
46 | multi_tenant,
47 | routing,
48 | copy_and_stream,
49 | order,
50 | resume,
51 | postgres_uniqueness,
52 | ]
53 | steps:
54 | - uses: actions/checkout@v4
55 | - name: Download binary
56 | uses: actions/download-artifact@v4
57 | with:
58 | name: pg_flo-binary
59 | path: bin
60 | - name: Make binary executable
61 | run: chmod +x bin/pg_flo
62 | - name: Install dependencies
63 | run: |
64 | sudo apt-get update
65 | sudo apt-get install -y postgresql-client jq ruby ruby-dev libpq-dev build-essential
66 | sudo gem install pg
67 | - name: Set up Docker Compose
68 | run: |
69 | sudo curl -L "https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
70 | sudo chmod +x /usr/local/bin/docker-compose
71 | - name: Run test
72 | env:
73 | PG_HOST: localhost
74 | PG_PORT: 5433
75 | PG_USER: myuser
76 | PG_PASSWORD: mypassword!@#%1234
77 | PG_DB: mydb
78 | TARGET_PG_HOST: localhost
79 | TARGET_PG_PORT: 5434
80 | TARGET_PG_USER: targetuser
81 | TARGET_PG_PASSWORD: targetpassword!@#1234
82 | TARGET_PG_DB: targetdb
83 | run: |
84 | docker-compose -f internal/docker-compose.yml up -d
85 | sleep 10
86 | if [[ "${{ matrix.test }}" == "order" || "${{ matrix.test }}" == "resume" || "${{ matrix.test }}" == "postgres_uniqueness" ]]; then
87 | ruby ./internal/scripts/e2e_${{ matrix.test }}_test.rb
88 | else
89 | ./internal/scripts/e2e_${{ matrix.test }}.sh
90 | fi
91 | docker-compose -f internal/docker-compose.yml down -v
92 |
--------------------------------------------------------------------------------
/.github/workflows/release-and-docker.yml:
--------------------------------------------------------------------------------
1 | name: Release and Docker
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*"
7 |
8 | permissions:
9 | contents: write
10 | packages: write
11 |
12 | jobs:
13 | goreleaser:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 | with:
19 | fetch-depth: 0
20 |
21 | - name: Set up Go
22 | uses: actions/setup-go@v4
23 | with:
24 | go-version: "1.21"
25 |
26 | - name: Run GoReleaser
27 | uses: goreleaser/goreleaser-action@v5
28 | with:
29 | distribution: goreleaser
30 | version: latest
31 | args: release --clean
32 | env:
33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
34 |
35 | docker:
36 | needs: goreleaser
37 | runs-on: ubuntu-latest
38 | steps:
39 | - name: Checkout
40 | uses: actions/checkout@v4
41 |
42 | - name: Set up QEMU
43 | uses: docker/setup-qemu-action@v3
44 |
45 | - name: Set up Docker Buildx
46 | uses: docker/setup-buildx-action@v3
47 | with:
48 | buildkitd-flags: --debug
49 |
50 | - name: Extract metadata (shayonj)
51 | id: meta_shayonj
52 | uses: docker/metadata-action@v5
53 | with:
54 | images: docker.io/shayonj/pg_flo
55 | tags: |
56 | type=semver,pattern={{version}}
57 | type=semver,pattern={{major}}.{{minor}}
58 | type=semver,pattern={{major}}
59 |
60 | - name: Login to DockerHub (shayonj)
61 | uses: docker/login-action@v3
62 | with:
63 | username: ${{ secrets.DOCKERHUB_USERNAME }}
64 | password: ${{ secrets.DOCKERHUB_TOKEN }}
65 |
66 | - name: Set build timestamp
67 | id: timestamp
68 | run: echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT
69 |
70 | - name: Build and push (shayonj)
71 | uses: docker/build-push-action@v5
72 | with:
73 | context: .
74 | platforms: linux/amd64,linux/arm64
75 | push: true
76 | tags: ${{ steps.meta_shayonj.outputs.tags }}
77 | labels: ${{ steps.meta_shayonj.outputs.labels }}
78 | build-args: |
79 | VERSION=${{ github.ref_name }}
80 | COMMIT=${{ github.sha }}
81 | DATE=${{ steps.timestamp.outputs.timestamp }}
82 |
83 | - name: Extract metadata (pgflo)
84 | id: meta_pgflo
85 | uses: docker/metadata-action@v5
86 | with:
87 | images: docker.io/pgflo/pg_flo
88 | tags: |
89 | type=semver,pattern={{version}}
90 | type=semver,pattern={{major}}.{{minor}}
91 | type=semver,pattern={{major}}
92 |
93 | - name: Login to DockerHub (pgflo)
94 | uses: docker/login-action@v3
95 | with:
96 | username: ${{ secrets.PG_FLO_DOCKER_HUB_USERNAME }}
97 | password: ${{ secrets.PG_FLO_DOCKER_HUB_TOKEN }}
98 |
99 | - name: Build and push (pgflo)
100 | uses: docker/build-push-action@v5
101 | with:
102 | context: .
103 | platforms: linux/amd64,linux/arm64
104 | push: true
105 | tags: ${{ steps.meta_pgflo.outputs.tags }}
106 | labels: ${{ steps.meta_pgflo.outputs.labels }}
107 | build-args: |
108 | VERSION=${{ github.ref_name }}
109 | COMMIT=${{ github.sha }}
110 | DATE=${{ steps.timestamp.outputs.timestamp }}
111 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 |
8 | # Test binary, built with `go test -c`
9 | *.test
10 |
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 |
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 |
17 | # Go workspace file
18 | go.work
19 |
20 | # IDE-specific files
21 | .idea/
22 | .vscode/
23 |
24 | # OS-specific files
25 | .DS_Store
26 | Thumbs.db
27 |
28 | # Log files
29 | *.log
30 |
31 | # Binary output directory
32 | /bin/
33 |
34 | # Environment variables file
35 | .env
36 |
37 | pg_flo
38 |
39 | bin/
40 | coverage.txt
41 |
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | linters:
2 | enable:
3 | - gofmt
4 | - goimports
5 | - govet
6 | - errcheck
7 | - staticcheck
8 | - ineffassign
9 | - unconvert
10 | - misspell
11 | - gosec
12 | - revive
13 |
14 | linters-settings:
15 | govet:
16 | # Check-shadowing option removed
17 | revive:
18 | min-confidence: 0.8
19 | gocyclo:
20 | min-complexity: 15
21 | maligned:
22 | suggest-new: true
23 | dupl:
24 | threshold: 100
25 | goconst:
26 | min-len: 2
27 | min-occurrences: 2
28 |
29 | issues:
30 | exclude-rules:
31 | - path: _test\.go
32 | linters:
33 | - gocyclo
34 | - errcheck
35 | - dupl
36 | - gosec
37 | exclude-dirs:
38 | - vendor/
39 | exclude-files:
40 | - ".*_test.go"
41 |
42 | output:
43 | formats: colored-line-number
44 | print-issued-lines: true
45 | print-linter-name: true
46 |
--------------------------------------------------------------------------------
/.goreleaser.yaml:
--------------------------------------------------------------------------------
1 | before:
2 | hooks:
3 | - go mod tidy
4 |
5 | builds:
6 | - main: .
7 | env:
8 | - CGO_ENABLED=0
9 | goos:
10 | - linux
11 | - darwin
12 | goarch:
13 | - amd64
14 | - arm64
15 | ldflags:
16 | - -s -w
17 | - -X github.com/pgflo/pg_flo/cmd.version={{.Version}}
18 | - -X github.com/pgflo/pg_flo/cmd.commit={{.Commit}}
19 | - -X github.com/pgflo/pg_flo/cmd.date={{.Date}}
20 | binary: pg_flo
21 |
22 | archives:
23 | - format: tar.gz
24 | name_template: >-
25 | {{ .ProjectName }}_
26 | {{- title .Os }}_
27 | {{- if eq .Arch "amd64" }}x86_64
28 | {{- else }}{{ .Arch }}{{ end }}
29 | format_overrides:
30 | - goos: windows
31 | format: zip
32 |
33 | changelog:
34 | sort: asc
35 | filters:
36 | exclude:
37 | - "^docs:"
38 | - "^test:"
39 | - "^ci:"
40 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.21-alpine AS builder
2 | RUN apk update && apk upgrade --no-cache
3 | WORKDIR /app
4 | COPY . .
5 | ARG VERSION=dev
6 | ARG COMMIT=none
7 | ARG DATE=unknown
8 | RUN CGO_ENABLED=0 GOOS=linux go build -v \
9 | -ldflags "-s -w \
10 | -X 'github.com/pgflo/pg_flo/cmd.version=${VERSION}' \
11 | -X 'github.com/pgflo/pg_flo/cmd.commit=${COMMIT}' \
12 | -X 'github.com/pgflo/pg_flo/cmd.date=${DATE}'" \
13 | -o pg_flo .
14 |
15 | FROM alpine:latest
16 | RUN apk update && apk upgrade --no-cache && \
17 | apk add --no-cache postgresql15-client
18 | COPY --from=builder /app/pg_flo /usr/local/bin/
19 | ENTRYPOINT ["pg_flo"]
20 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: test lint build clean
2 |
3 | # Define the default goal
4 | .DEFAULT_GOAL := build
5 |
6 | # Build the application
7 | build:
8 | go build -o bin/pg_flo
9 |
10 | # Run tests with race detection and coverage
11 | test:
12 | go test -v -race -coverprofile=coverage.txt -covermode=atomic ./...
13 |
14 | # Run linter
15 | lint:
16 | golangci-lint run --timeout=5m
17 |
18 | # Clean build artifacts
19 | clean:
20 | rm -rf bin/ coverage.txt
21 |
22 | # Run all checks (lint and test)
23 | check: lint test
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #
pg_flo
2 |
3 | [](https://github.com/pgflo/pg_flo/actions/workflows/ci.yml)
4 | [](https://github.com/pgflo/pg_flo/actions/workflows/integration.yml)
5 | [](https://github.com/pgflo/pg_flo/releases/latest)
6 | [](https://hub.docker.com/r/pgflo/pg_flo/tags)
7 |
8 | > The easiest way to move and transform data between PostgreSQL databases using Logical Replication.
9 |
10 | ℹ️ `pg_flo` is in active development. The design and architecture is continuously improving. PRs/Issues are very much welcome 🙏
11 |
12 | ## Key Features
13 |
14 | - **Real-time Data Streaming** - Capture inserts, updates, deletes, and DDL changes in near real-time
15 | - **Fast Initial Loads** - Parallel copy of existing data with automatic follow-up continuous replication
16 | - **Powerful Transformations** - Filter and transform data on-the-fly ([see rules](pkg/rules/README.md))
17 | - **Flexible Routing** - Route to different tables and remap columns ([see routing](pkg/routing/README.md))
18 | - **Production Ready** - Supports resumable streaming, DDL tracking, and more
19 |
20 | ## Common Use Cases
21 |
22 | - Real-time data replication between PostgreSQL databases
23 | - ETL pipelines with data transformation
24 | - Data re-routing, masking and filtering
25 | - Database migration with zero downtime
26 | - Event streaming from PostgreSQL
27 |
28 | [View detailed examples →](internal/examples/README.md)
29 |
30 | ## Quick Start
31 |
32 | ### Prerequisites
33 |
34 | - Docker
35 | - PostgreSQL database with `wal_level=logical`
36 |
37 | ### 1. Install
38 |
39 | ```shell
40 | docker pull pgflo/pg_flo:latest
41 | ```
42 |
43 | ### 2. Configure
44 |
45 | Choose one:
46 |
47 | - Environment variables
48 | - YAML configuration file ([example](internal/pg-flo.yaml))
49 | - CLI flags
50 |
51 | ### 3. Run
52 |
53 | ```shell
54 | # Start NATS server
55 | docker run -d --name pg_flo_nats \
56 | --network host \
57 | -v /path/to/nats-server.conf:/etc/nats/nats-server.conf \
58 | nats:latest \
59 | -c /etc/nats/nats-server.conf
60 |
61 | # Start replicator (using config file)
62 | docker run -d --name pg_flo_replicator \
63 | --network host \
64 | -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
65 | pgflo/pg_flo:latest \
66 | replicator --config /etc/pg_flo/config.yaml
67 |
68 | # Start worker
69 | docker run -d --name pg_flo_worker \
70 | --network host \
71 | -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
72 | pgflo/pg_flo:latest \
73 | worker postgres --config /etc/pg_flo/config.yaml
74 | ```
75 |
76 | #### Example Configuration (config.yaml)
77 |
78 | ```yaml
79 | # Replicator settings
80 | host: "localhost"
81 | port: 5432
82 | dbname: "myapp"
83 | user: "replicator"
84 | password: "secret"
85 | group: "users"
86 | tables:
87 | - "users"
88 |
89 | # Worker settings (postgres sink)
90 | target-host: "dest-db"
91 | target-dbname: "myapp"
92 | target-user: "writer"
93 | target-password: "secret"
94 |
95 | # Common settings
96 | nats-url: "nats://localhost:4222"
97 | ```
98 |
99 | [View full configuration options →](internal/pg-flo.yaml)
100 |
101 | ## Core Concepts
102 |
103 | ### Architecture
104 |
105 | pg_flo uses two main components:
106 |
107 | - **Replicator**: Captures PostgreSQL changes via logical replication
108 | - **Worker**: Processes and routes changes through NATS
109 |
110 | [Learn how it works →](internal/how-it-works.md)
111 |
112 | ### Groups
113 |
114 | Groups are used to:
115 |
116 | - Identify replication processes
117 | - Isolate replication slots and publications
118 | - Run multiple instances on same database
119 | - Maintain state for resumability
120 | - Enable parallel processing
121 |
122 | ```shell
123 | # Example: Separate groups for different tables
124 | pg_flo replicator --group users_orders --tables users,orders
125 |
126 | pg_flo replicator --group products --tables products
127 | ```
128 |
129 | ### Streaming Modes
130 |
131 | 1. **Stream Only** (default)
132 | - Real-time streaming of changes
133 |
134 | ```shell
135 | pg_flo replicator --stream
136 | ```
137 |
138 | 2. **Copy Only**
139 | - One-time parallel copy of existing data
140 |
141 | ```shell
142 | pg_flo replicator --copy --max-copy-workers-per-table 4
143 | ```
144 |
145 | 3. **Copy and Stream**
146 | - Initial parallel copy followed by continuous streaming
147 |
148 | ```shell
149 | pg_flo replicator --copy-and-stream --max-copy-workers-per-table 4
150 | ```
151 |
152 | ### Destinations
153 |
154 | - **stdout**: Console output
155 | - **file**: File writing
156 | - **postgres**: Database replication
157 | - **webhook**: HTTP endpoints
158 |
159 | [View destination details →](pkg/sinks/README.md)
160 |
161 | ## Advanced Features
162 |
163 | ### Message Routing
164 |
165 | Routing configuration is defined in a separate YAML file:
166 |
167 | ```yaml
168 | # routing.yaml
169 | users:
170 | source_table: users
171 | destination_table: customers
172 | column_mappings:
173 | - source: id
174 | destination: customer_id
175 | ```
176 |
177 | ```shell
178 | # Apply routing configuration
179 | pg_flo worker postgres --routing-config /path/to/routing.yaml
180 | ```
181 |
182 | [Learn about routing →](pkg/routing/README.md)
183 |
184 | ### Transformation Rules
185 |
186 | Rules are defined in a separate YAML file:
187 |
188 | ```yaml
189 | # rules.yaml
190 | users:
191 | - type: exclude_columns
192 | columns: [password, ssn]
193 | - type: mask_columns
194 | columns: [email]
195 | ```
196 |
197 | ```shell
198 | # Apply transformation rules
199 | pg_flo worker file --rules-config /path/to/rules.yaml
200 | ```
201 |
202 | [View transformation options →](pkg/rules/README.md)
203 |
204 | ### Combined Example
205 |
206 | ```shell
207 | pg_flo worker postgres --config /etc/pg_flo/config.yaml --routing-config routing.yaml --rules-config rules.yaml
208 | ```
209 |
210 | ## Scaling Guide
211 |
212 | Best practices:
213 |
214 | - Run one worker per group
215 | - Use groups to replicate different tables independently
216 | - Scale horizontally using multiple groups
217 |
218 | Example scaling setup:
219 |
220 | ```shell
221 | # Group: sales
222 | pg_flo replicator --group sales --tables sales
223 | pg_flo worker postgres --group sales
224 |
225 | # Group: inventory
226 | pg_flo replicator --group inventory --tables inventory
227 | pg_flo worker postgres --group inventory
228 | ```
229 |
230 | ## Limits and Considerations
231 |
232 | - NATS message size: 8MB (configurable)
233 | - One worker per group recommended
234 | - PostgreSQL logical replication prerequisites required
235 | - Tables must have one of the following for replication:
236 | - Primary key
237 | - Unique constraint with `NOT NULL` columns
238 | - `REPLICA IDENTITY FULL` set
239 |
240 | Example table configurations:
241 |
242 | ```sql
243 | -- Using primary key (recommended)
244 | CREATE TABLE users (
245 | id SERIAL PRIMARY KEY,
246 | email TEXT,
247 | name TEXT
248 | );
249 |
250 | -- Using unique constraint
251 | CREATE TABLE orders (
252 | order_id TEXT NOT NULL,
253 | customer_id TEXT NOT NULL,
254 | data JSONB,
255 | CONSTRAINT orders_unique UNIQUE (order_id, customer_id)
256 | );
257 | ALTER TABLE orders REPLICA IDENTITY USING INDEX orders_unique;
258 |
259 | -- Using all columns (higher overhead in terms of performance)
260 | CREATE TABLE audit_logs (
261 | id SERIAL,
262 | action TEXT,
263 | data JSONB
264 | );
265 | ALTER TABLE audit_logs REPLICA IDENTITY FULL;
266 | ```
267 |
268 | ## Development
269 |
270 | ```shell
271 | make build
272 | make test
273 | make lint
274 |
275 | # E2E tests
276 | ./internal/scripts/e2e_local.sh
277 | ```
278 |
279 | ## Contributing
280 |
281 | Contributions welcome! Please open an issue or submit a pull request.
282 |
283 | ## License
284 |
285 | Apache License 2.0. [View license →](LICENSE)
286 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/pgflo/pg_flo
2 |
3 | go 1.21.5
4 |
5 | require (
6 | github.com/goccy/go-json v0.10.5
7 | github.com/jackc/pglogrepl v0.0.0-20240307033717-828fbfe908e9
8 | github.com/jackc/pgtype v1.14.4
9 | github.com/jackc/pgx/v5 v5.7.2
10 | github.com/nats-io/nats.go v1.38.0
11 | github.com/rs/zerolog v1.33.0
12 | github.com/shopspring/decimal v1.4.0
13 | github.com/spf13/cobra v1.9.1
14 | github.com/spf13/pflag v1.0.6
15 | github.com/spf13/viper v1.19.0
16 | github.com/stretchr/testify v1.10.0
17 | gopkg.in/yaml.v2 v2.4.0
18 | )
19 |
20 | require (
21 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
22 | github.com/fsnotify/fsnotify v1.7.0 // indirect
23 | github.com/hashicorp/hcl v1.0.0 // indirect
24 | github.com/inconshreveable/mousetrap v1.1.0 // indirect
25 | github.com/jackc/pgio v1.0.0 // indirect
26 | github.com/jackc/pgpassfile v1.0.0 // indirect
27 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
28 | github.com/jackc/puddle/v2 v2.2.2 // indirect
29 | github.com/klauspost/compress v1.17.9 // indirect
30 | github.com/magiconair/properties v1.8.7 // indirect
31 | github.com/mattn/go-colorable v0.1.13 // indirect
32 | github.com/mattn/go-isatty v0.0.20 // indirect
33 | github.com/mitchellh/mapstructure v1.5.0 // indirect
34 | github.com/nats-io/nkeys v0.4.9 // indirect
35 | github.com/nats-io/nuid v1.0.1 // indirect
36 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect
37 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
38 | github.com/sagikazarmark/locafero v0.6.0 // indirect
39 | github.com/sagikazarmark/slog-shim v0.1.0 // indirect
40 | github.com/sourcegraph/conc v0.3.0 // indirect
41 | github.com/spf13/afero v1.11.0 // indirect
42 | github.com/spf13/cast v1.7.0 // indirect
43 | github.com/stretchr/objx v0.5.2 // indirect
44 | github.com/subosito/gotenv v1.6.0 // indirect
45 | go.uber.org/multierr v1.11.0 // indirect
46 | golang.org/x/crypto v0.31.0 // indirect
47 | golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
48 | golang.org/x/sync v0.10.0 // indirect
49 | golang.org/x/sys v0.28.0 // indirect
50 | golang.org/x/text v0.21.0 // indirect
51 | gopkg.in/ini.v1 v1.67.0 // indirect
52 | gopkg.in/yaml.v3 v3.0.1 // indirect
53 | )
54 |
--------------------------------------------------------------------------------
/internal/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.8"
2 |
3 | services:
4 | postgres:
5 | image: postgres:14
6 | container_name: pg_logical_replication
7 | environment:
8 | POSTGRES_USER: myuser
9 | POSTGRES_PASSWORD: mypassword!@#%1234
10 | POSTGRES_DB: mydb
11 | volumes:
12 | - postgres_data:/var/lib/postgresql/data
13 | ports:
14 | - "5433:5432"
15 | command:
16 | - "postgres"
17 | - "-c"
18 | - "wal_level=logical"
19 | - "-c"
20 | - "max_replication_slots=5"
21 | - "-c"
22 | - "max_wal_senders=5"
23 | restart: unless-stopped
24 |
25 | target_postgres:
26 | image: postgres:14
27 | container_name: pg_target
28 | environment:
29 | POSTGRES_USER: targetuser
30 | POSTGRES_PASSWORD: targetpassword!@#1234
31 | POSTGRES_DB: targetdb
32 | volumes:
33 | - target_postgres_data:/var/lib/postgresql/data
34 | ports:
35 | - "5434:5432"
36 | restart: unless-stopped
37 |
38 | nats:
39 | image: nats:latest
40 | container_name: pg_flo_nats
41 | command: ["-c", "/etc/nats/nats-server.conf"]
42 | volumes:
43 | - ./nats-server.conf:/etc/nats/nats-server.conf
44 | - nats_data:/data
45 | ports:
46 | - "4222:4222"
47 | - "8222:8222"
48 | restart: unless-stopped
49 |
50 | volumes:
51 | postgres_data:
52 | target_postgres_data:
53 | nats_data:
54 |
--------------------------------------------------------------------------------
/internal/examples/README.md:
--------------------------------------------------------------------------------
1 | # pg_flo Examples
2 |
3 | This guide demonstrates common use cases for pg_flo with practical examples. For full configuration options, see the [example config file](../pg-flo.yaml).
4 |
5 | ## Basic Replication
6 |
7 | Simple database-to-database replication:
8 |
9 | ```bash
10 | # Start NATS server
11 | docker run -d --name pg_flo_nats \
12 | --network host \
13 | -v /path/to/nats-server.conf:/etc/nats/nats-server.conf \
14 | nats:latest \
15 | -c /etc/nats/nats-server.conf
16 |
17 | # Start replicator
18 | docker run -d --name pg_flo_replicator \
19 | --network host \
20 | -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
21 | pgflo/pg_flo:latest \
22 | replicator --config /etc/pg_flo/config.yaml
23 |
24 | # Start worker
25 | docker run -d --name pg_flo_worker \
26 | --network host \
27 | -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
28 | pgflo/pg_flo:latest \
29 | worker postgres --config /etc/pg_flo/config.yaml
30 | ```
31 |
32 | ## Data Masking and Transformation
33 |
34 | Mask sensitive data during replication:
35 |
36 | ```yaml
37 | # rules.yaml
38 | rules:
39 | - table: users
40 | type: transform
41 | column: email
42 | parameters:
43 | type: mask
44 | mask_char: "*"
45 | operations: [INSERT, UPDATE]
46 | - table: payments
47 | type: transform
48 | column: card_number
49 | parameters:
50 | type: regex_replace
51 | pattern: "(\d{12})(\d{4})"
52 | replacement: "************$2"
53 | ```
54 |
55 | ```bash
56 | pg_flo worker postgres \
57 | --group sensitive_data \
58 | --rules-config /path/to/rules.yaml \
59 | # ... other postgres connection flags
60 | ```
61 |
62 | ## Custom Table Routing
63 |
64 | Route and rename tables/columns:
65 |
66 | ```yaml
67 | # routing.yaml
68 | users:
69 | source_table: users
70 | destination_table: customers
71 | column_mappings:
72 | - source: user_id
73 | destination: customer_id
74 | - source: created_at
75 | destination: signup_date
76 | operations:
77 | - INSERT
78 | - UPDATE
79 | ```
80 |
81 | ```bash
82 | pg_flo worker postgres \
83 | --group user_migration \
84 | --routing-config /path/to/routing.yaml \
85 | # ... other config flags
86 | ```
87 |
88 | ## Initial Load Options
89 |
90 | ### Copy Only (One-time Data Copy)
91 |
92 | Copy existing data without streaming changes:
93 |
94 | ```bash
95 | pg_flo replicator \
96 | --copy \
97 | --max-copy-workers-per-table 4 \
98 | --group initial_load \
99 | # ... other config flags
100 | ```
101 |
102 | ### Copy and Stream
103 |
104 | Perform parallel initial data load followed by continuous streaming:
105 |
106 | ```bash
107 | pg_flo replicator \
108 | --copy-and-stream \
109 | --max-copy-workers-per-table 4 \
110 | --group full_sync \
111 | # ... other config flags
112 | ```
113 |
114 | ## Multi-Destination Pipeline
115 |
116 | Stream changes to multiple destinations simultaneously:
117 |
118 | ```bash
119 | # Terminal 1: Stream to PostgreSQL
120 | pg_flo worker postgres \
121 | --group audit \
122 | # ... other config flags
123 |
124 | # Terminal 2: Stream to files for archival
125 | pg_flo worker file \
126 | --group audit \
127 | --file-output-dir /archive/changes
128 |
129 | # Terminal 3: Stream to webhook for external processing
130 | pg_flo worker webhook \
131 | --group audit \
132 | --webhook-url https://api.example.com/changes \
133 | --webhook-batch-size 100
134 | ```
135 |
136 | ## Schema Tracking
137 |
138 | Enable DDL tracking to capture schema changes. DDLs are applied on the destination as they arrive:
139 |
140 | ```bash
141 | pg_flo replicator \
142 | --track-ddl \
143 | --group schema_sync \
144 | # ... other config flags
145 |
146 | pg_flo worker postgres \
147 | --group schema_sync \
148 | --target-sync-schema true \
149 | # ... other postgres connection flags
150 | ```
151 |
152 | ## Configuration File
153 |
154 | Instead of CLI flags, you can use a configuration file:
155 |
156 | ```yaml
157 | # ~/.pg_flo.yaml
158 | host: "source-db.example.com"
159 | port: 5432
160 | dbname: "myapp"
161 | user: "replicator"
162 | password: "secret"
163 | group: "production"
164 | tables:
165 | - users
166 | - orders
167 | - payments
168 | nats-url: "nats://localhost:4222"
169 | target-host: "dest-db.example.com"
170 | target-dbname: "myapp"
171 | target-user: "writer"
172 | target-password: "secret"
173 | ```
174 |
175 | ```bash
176 | pg_flo replicator --config /path/to/config.yaml
177 | pg_flo worker postgres --config /path/to/config.yaml
178 | ```
179 |
180 | See the [example config file](../pg-flo.yaml) for more details.
181 |
182 | ## Environment Variables
183 |
184 | All configuration options can also be set via environment variables:
185 |
186 | ```bash
187 | export PG_FLO_HOST=source-db.example.com
188 | export PG_FLO_PORT=5432
189 | export PG_FLO_DBNAME=myapp
190 | export PG_FLO_USER=replicator
191 | export PG_FLO_PASSWORD=secret
192 | export PG_FLO_GROUP=production
193 | export PG_FLO_NATS_URL=nats://localhost:4222
194 |
195 | pg_flo replicator
196 | ```
197 |
--------------------------------------------------------------------------------
/internal/how-it-works.md:
--------------------------------------------------------------------------------
1 | # How it Works
2 |
3 | `pg_flo` leverages PostgreSQL's logical replication system to capture and stream data while applying transformations and filtrations to the data before it reaches the destination. It utilizes **NATS** as a message broker to decouple the replicator and worker processes, providing flexibility and scalability.
4 |
5 | 1. **Publication Creation**: Creates a PostgreSQL publication for the specified tables or all tables (per `group`).
6 |
7 | 2. **Replication Slot**: A replication slot is created to ensure no data is lost between streaming sessions.
8 |
9 | 3. **Operation Modes**:
10 |
11 | - **Copy-and-Stream**: Performs an initial bulk copy followed by streaming changes.
12 | - **Stream-Only**: Starts streaming changes immediately from the last known position.
13 |
14 | 4. **Initial Bulk Copy** (for Copy-and-Stream mode):
15 |
16 | - If no valid LSN is found in NATS, `pg_flo` performs an initial bulk copy of existing data.
17 | - This process is parallelized for fast data sync:
18 | - A snapshot is taken to ensure consistency.
19 | - Each table is divided into page ranges.
20 | - Multiple workers copy different ranges concurrently.
21 |
22 | 5. **Streaming Changes**:
23 |
24 | - After the initial copy (or immediately in Stream-Only mode), the replicator streams changes from PostgreSQL and publishes them to NATS.
25 | - The last processed LSN is stored in NATS, allowing `pg_flo` to resume operations from where it left off in case of interruptions.
26 |
27 | 6. **Message Processing**: The worker processes various types of messages from NATS:
28 |
29 | - Relation messages to understand table structures
30 | - Insert, Update, and Delete messages containing actual data changes
31 | - Begin and Commit messages for transaction boundaries
32 | - DDL changes like ALTER TABLE, CREATE INDEX, etc.
33 |
34 | 7. **Data Transformation**: Received data is converted into a structured format, with type-aware conversions for different PostgreSQL data types.
35 |
36 | 8. **Rule Application**: If configured, transformation and filtering rules are applied to the data:
37 |
38 | - **Transform Rules**:
39 | - Regex: Apply regular expression transformations to string values.
40 | - Mask: Mask sensitive data, keeping the first and last characters visible.
41 | - **Filter Rules**:
42 | - Comparison: Filter based on equality, inequality, greater than, less than, etc.
43 | - Contains: Filter string values based on whether they contain a specific substring.
44 | - Rules can be applied selectively to insert, update, or delete operations.
45 |
46 | 9. **Buffering**: Processed data is buffered and written in batches to optimize write operations to the destination.
47 |
48 | 10. **Writing to Sink**: Data is periodically flushed from the buffer to the configured sink (e.g., stdout, file, or other destinations).
49 |
50 | 11. **State Management**:
51 | - The replicator keeps track of its progress by updating the Last LSN in NATS.
52 | - The worker maintains its progress to ensure data consistency.
53 | - This allows for resumable operations across multiple runs.
54 | - Periodic status updates are sent to PostgreSQL to maintain the replication connection.
55 |
--------------------------------------------------------------------------------
/internal/nats-server.conf:
--------------------------------------------------------------------------------
1 | jetstream: enabled
2 | store_dir: /data
3 | http_port: 8222
4 | max_payload: 8388608
5 |
--------------------------------------------------------------------------------
/internal/pg-flo.yaml:
--------------------------------------------------------------------------------
1 | # [Replicator] PostgreSQL connection settings
2 | host: "localhost" # PostgreSQL host (env: PG_FLO_HOST)
3 | port: 5432 # PostgreSQL port (env: PG_FLO_PORT)
4 | dbname: "your_database" # PostgreSQL database name (env: PG_FLO_DBNAME)
5 | user: "your_user" # PostgreSQL user (env: PG_FLO_USER)
6 | password: "your_password" # PostgreSQL password (env: PG_FLO_PASSWORD)
7 | schema: "public" # PostgreSQL schema to replicate from (env: PG_FLO_SCHEMA)
8 |
9 | # Replication settings
10 | group: "your_group" # Group name to identify each replication (env: PG_FLO_GROUP)
11 | tables: # Tables to replicate (empty for all tables) (env: PG_FLO_TABLES)
12 | - "table1"
13 | - "table2"
14 | copy-and-stream: false # Enable copy and stream mode (env: PG_FLO_COPY_AND_STREAM)
15 | max-copy-workers-per-table: 4 # Maximum number of parallel workers for copy operation (env: PG_FLO_MAX_COPY_WORKERS_PER_TABLE)
16 | track-ddl: false # Enable tracking of DDL changes (env: PG_FLO_TRACK_DDL)
17 |
18 | # NATS settings
19 | nats-url: "nats://localhost:4222" # NATS server URL (env: PG_FLO_NATS_URL)
20 |
21 | # Worker settings
22 | batch-size: 1000 # Number of messages to process in a batch (env: PG_FLO_BATCH_SIZE)
23 | rules-config: "/path/to/rules.yaml" # Path to rules configuration file (env: PG_FLO_RULES_CONFIG)
24 | routing-config: "/path/to/routing.yaml" # Path to routing configuration file (env: PG_FLO_ROUTING_CONFIG)
25 |
26 | # File sink settings
27 | file-output-dir: "/tmp/pg_flo-output" # Output directory for file sink (env: PG_FLO_FILE_OUTPUT_DIR)
28 |
29 | # [Worker] Postgres sink settings
30 | target-host: "" # Target PostgreSQL host (env: PG_FLO_TARGET_HOST)
31 | target-port: 5432 # Target PostgreSQL port (env: PG_FLO_TARGET_PORT)
32 | target-dbname: "" # Target PostgreSQL database name (env: PG_FLO_TARGET_DBNAME)
33 | target-user: "" # Target PostgreSQL user (env: PG_FLO_TARGET_USER)
34 | target-password: "" # Target PostgreSQL password (env: PG_FLO_TARGET_PASSWORD)
35 | target-sync-schema: false # Sync schema from source to target (env: PG_FLO_TARGET_SYNC_SCHEMA)
36 | target-disable-foreign-keys: false # Disable foreign key constraints on target (env: PG_FLO_TARGET_DISABLE_FOREIGN_KEYS)
37 |
38 | # Source connection for schema sync (only needed with target-sync-schema: true)
39 | source-host: "" # Source PostgreSQL host (env: PG_FLO_SOURCE_HOST)
40 | source-port: 5432 # Source PostgreSQL port (env: PG_FLO_SOURCE_PORT)
41 | source-dbname: "" # Source PostgreSQL database name (env: PG_FLO_SOURCE_DBNAME)
42 | source-user: "" # Source PostgreSQL user (env: PG_FLO_SOURCE_USER)
43 | source-password: "" # Source PostgreSQL password (env: PG_FLO_SOURCE_PASSWORD)
44 |
45 | # Webhook sink settings
46 | webhook-url: "" # Webhook URL to send data (env: PG_FLO_WEBHOOK_URL)
47 |
--------------------------------------------------------------------------------
/internal/pg_flo_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pgflo/pg_flo/e9be74c2ffaa91b13f9a4326d4b5d83c81e4b450/internal/pg_flo_logo.png
--------------------------------------------------------------------------------
/internal/scripts/e2e_common.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | PG_HOST="${PG_HOST:-localhost}"
4 | PG_PORT="${PG_PORT:-5433}"
5 | PG_USER="${PG_USER:-myuser}"
6 | PG_PASSWORD="${PG_PASSWORD:-mypassword!@#%1234}"
7 | PG_DB="${PG_DB:-mydb}"
8 |
9 | TARGET_PG_HOST="${TARGET_PG_HOST:-localhost}"
10 | TARGET_PG_PORT="${TARGET_PG_PORT:-5434}"
11 | TARGET_PG_USER="${TARGET_PG_USER:-targetuser}"
12 | TARGET_PG_PASSWORD="${TARGET_PG_PASSWORD:-targetpassword!@#1234}"
13 | TARGET_PG_DB="${TARGET_PG_DB:-targetdb}"
14 |
15 | NATS_URL="${NATS_URL:-nats://localhost:4222}"
16 |
17 | pg_flo_BIN="./bin/pg_flo"
18 | OUTPUT_DIR="/tmp/pg_flo-output"
19 | pg_flo_LOG="/tmp/pg_flo.log"
20 | pg_flo_WORKER_LOG="/tmp/pg_flo_worker.log"
21 |
22 | # Helper functions
23 | log() { echo "🔹 $1"; }
24 | success() { echo "✅ $1"; }
25 | error() { echo "❌ $1"; }
26 |
27 | run_sql() {
28 | if [ ${#1} -gt 1000 ]; then
29 | local temp_file=$(mktemp)
30 | echo "$1" >"$temp_file"
31 | PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -p "$PG_PORT" -q -t -f "$temp_file"
32 | rm "$temp_file"
33 | else
34 | PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -p "$PG_PORT" -q -t -c "$1"
35 | fi
36 | }
37 |
38 | setup_postgres() {
39 | log "Ensuring PostgreSQL is ready..."
40 | for i in {1..30}; do
41 | if PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -p "$PG_PORT" -c '\q' >/dev/null 2>&1; then
42 | success "PostgreSQL is ready"
43 | return 0
44 | fi
45 | sleep 1
46 | done
47 | error "PostgreSQL is not ready after 30 seconds"
48 | exit 1
49 | }
50 |
51 | stop_pg_flo_gracefully() {
52 | log "Stopping pg_flo replicator..."
53 | if kill -0 "$pg_flo_PID" 2>/dev/null; then
54 | kill -TERM "$pg_flo_PID"
55 | wait "$pg_flo_PID" 2>/dev/null || true
56 | success "pg_flo replicator stopped"
57 | else
58 | log "pg_flo replicator process not found, it may have already completed"
59 | fi
60 |
61 | log "Stopping pg_flo worker..."
62 | if kill -0 "$pg_flo_WORKER_PID" 2>/dev/null; then
63 | kill -TERM "$pg_flo_WORKER_PID"
64 | wait "$pg_flo_WORKER_PID" 2>/dev/null || true
65 | success "pg_flo worker stopped"
66 | else
67 | log "pg_flo worker process not found, it may have already completed"
68 | fi
69 | }
70 |
71 | show_pg_flo_logs() {
72 | log "pg_flo replicator logs:"
73 | echo "----------------------------------------"
74 | cat $pg_flo_LOG*
75 | echo "----------------------------------------"
76 |
77 | log "pg_flo worker logs:"
78 | echo "----------------------------------------"
79 | cat $pg_flo_WORKER_LOG*
80 | echo "----------------------------------------"
81 | }
82 |
83 | run_sql_target() {
84 | if [ ${#1} -gt 1000 ]; then
85 | local temp_file=$(mktemp)
86 | echo "$1" >"$temp_file"
87 | PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" -p "$TARGET_PG_PORT" -q -t -f "$temp_file"
88 | rm "$temp_file"
89 | else
90 | PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" -p "$TARGET_PG_PORT" -q -t -c "$1"
91 | fi
92 | }
93 |
94 | setup_docker() {
95 | rm -Rf /tmp/pg*
96 | log "Setting up Docker environment..."
97 | docker compose -f internal/docker-compose.yml down -v
98 | docker compose -f internal/docker-compose.yml up -d
99 | success "Docker environment is set up"
100 | }
101 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_copy_and_stream.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_users() {
7 | log "Creating test table..."
8 | run_sql "DROP TABLE IF EXISTS public.users;"
9 | run_sql "CREATE TABLE public.users (
10 | id serial PRIMARY KEY,
11 | int_col integer,
12 | float_col float,
13 | text_col text,
14 | bool_col boolean,
15 | date_col date,
16 | timestamp_col timestamp with time zone,
17 | json_col jsonb,
18 | array_col integer[],
19 | bytea_col bytea
20 | );"
21 | success "Test table created"
22 | }
23 |
24 | populate_initial_data() {
25 | log "Populating initial data..."
26 | run_sql "INSERT INTO public.users (
27 | int_col, float_col, text_col, bool_col, date_col, timestamp_col, json_col, array_col, bytea_col
28 | ) SELECT
29 | generate_series(1, 500000),
30 | random() * 100,
31 | 'Initial data ' || generate_series(1, 500000),
32 | (random() > 0.5),
33 | current_date + (random() * 365)::integer * interval '1 day',
34 | current_timestamp + (random() * 365 * 24 * 60 * 60)::integer * interval '1 second',
35 | json_build_object('key', 'value' || generate_series(1, 500000), 'number', generate_series(1, 500000)),
36 | ARRAY[generate_series(1, 3)],
37 | decode(lpad(to_hex(generate_series(1, 4)), 8, '0'), 'hex')
38 | ;"
39 | run_sql "UPDATE public.users SET text_col = text_col || ' - Updated';"
40 |
41 | log "Inserting large JSON data..."
42 | local large_json='{"data":['
43 | for i in {1..10000}; do
44 | if [ "$i" -ne 1 ]; then
45 | large_json+=','
46 | fi
47 | large_json+='{"id":'$i',"name":"Item '$i'","description":"This is a long description for item '$i'. It contains a lot of text to make the JSON larger.","attributes":{"color":"red","size":"large","weight":10.5,"tags":["tag1","tag2","tag3"]}}'
48 | done
49 | large_json+=']}'
50 |
51 | run_sql "INSERT INTO public.users (int_col, json_col) VALUES (1000001, '$large_json'::jsonb);"
52 |
53 | run_sql "ANALYZE public.users;"
54 | success "Initial data populated"
55 | }
56 |
57 | simulate_concurrent_changes() {
58 | log "Simulating concurrent changes..."
59 | for i in {1..3000}; do
60 | run_sql "INSERT INTO public.users (
61 | int_col, float_col, text_col, bool_col, date_col, timestamp_col, json_col, array_col, bytea_col
62 | ) VALUES (
63 | $i,
64 | $i * 1.5,
65 | 'Concurrent data $i',
66 | ($i % 2 = 0),
67 | current_date + ($i % 365) * interval '1 day',
68 | current_timestamp + ($i % (365 * 24)) * interval '1 hour',
69 | '{\"key\": \"concurrent_$i\", \"value\": $i}',
70 | ARRAY[$i, $i+1, $i+2],
71 | decode(lpad(to_hex($i), 8, '0'), 'hex')
72 | );"
73 | done
74 | success "Concurrent changes simulated"
75 | }
76 |
77 | start_pg_flo_replication() {
78 | log "Starting pg_flo replication..."
79 | $pg_flo_BIN replicator \
80 | --host "$PG_HOST" \
81 | --port "$PG_PORT" \
82 | --dbname "$PG_DB" \
83 | --user "$PG_USER" \
84 | --password "$PG_PASSWORD" \
85 | --group "test_group" \
86 | --tables "users" \
87 | --schema "public" \
88 | --nats-url "$NATS_URL" \
89 | --copy-and-stream \
90 | --max-copy-workers-per-table 4 \
91 | >"$pg_flo_LOG" 2>&1 &
92 | pg_flo_PID=$!
93 | log "pg_flo started with PID: $pg_flo_PID"
94 | success "pg_flo replication started"
95 | }
96 |
97 | start_pg_flo_worker() {
98 | log "Starting pg_flo worker with PostgreSQL sink..."
99 | $pg_flo_BIN worker postgres \
100 | --group "test_group" \
101 | --nats-url "$NATS_URL" \
102 | --source-host "$PG_HOST" \
103 | --source-port "$PG_PORT" \
104 | --source-dbname "$PG_DB" \
105 | --source-user "$PG_USER" \
106 | --source-password "$PG_PASSWORD" \
107 | --target-host "$TARGET_PG_HOST" \
108 | --target-port "$TARGET_PG_PORT" \
109 | --target-dbname "$TARGET_PG_DB" \
110 | --target-user "$TARGET_PG_USER" \
111 | --target-password "$TARGET_PG_PASSWORD" \
112 | --batch-size 5000 \
113 | --target-sync-schema \
114 | >"$pg_flo_WORKER_LOG" 2>&1 &
115 | pg_flo_WORKER_PID=$!
116 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
117 | success "pg_flo worker started"
118 | }
119 |
120 | compare_row_counts() {
121 | log "Comparing row counts..."
122 | SOURCE_COUNT=$(run_sql "SELECT COUNT(*) FROM public.users")
123 | TARGET_COUNT=$(run_sql_target "SELECT COUNT(*) FROM public.users")
124 |
125 | log "Source database row count: $SOURCE_COUNT"
126 | log "Target database row count: $TARGET_COUNT"
127 |
128 | EXPECTED_COUNT=503001
129 |
130 | if [ "$SOURCE_COUNT" -eq "$TARGET_COUNT" ] && [ "$SOURCE_COUNT" -eq "$EXPECTED_COUNT" ]; then
131 | success "Row counts match and total is correct ($EXPECTED_COUNT)"
132 | return 0
133 | else
134 | error "Row counts do not match or total is incorrect. Expected $EXPECTED_COUNT, Source: $SOURCE_COUNT, Target: $TARGET_COUNT"
135 | return 1
136 | fi
137 | }
138 |
139 | verify_large_json() {
140 | log "Verifying large JSON data..."
141 | local source_json_length=$(run_sql "
142 | SELECT jsonb_array_length(json_col->'data')
143 | FROM public.users
144 | WHERE int_col = 1000001
145 | ")
146 | local target_json_length=$(run_sql_target "
147 | SELECT jsonb_array_length(json_col->'data')
148 | FROM public.users
149 | WHERE int_col = 1000001
150 | ")
151 |
152 | log "Source JSON length: $source_json_length"
153 | log "Target JSON length: $target_json_length"
154 |
155 | if [ -n "$source_json_length" ] && [ -n "$target_json_length" ] &&
156 | [ "$source_json_length" -eq "$target_json_length" ] &&
157 | [ "$source_json_length" -eq 10000 ]; then
158 | success "Large JSON data verified successfully"
159 | return 0
160 | else
161 | error "Large JSON data verification failed. Expected length 10000, got Source: $source_json_length, Target: $target_json_length"
162 | return 1
163 | fi
164 | }
165 |
166 | verify_data_integrity() {
167 | log "Verifying data integrity..."
168 |
169 | generate_table_hash() {
170 | local db=$1
171 | local csv_file="/tmp/pg_flo_${db}_dump.csv"
172 |
173 | if [ "$db" = "source" ]; then
174 | PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$PG_DB" \
175 | -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
176 | else
177 | PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -p "$TARGET_PG_PORT" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" \
178 | -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
179 | fi
180 |
181 | if command -v md5 >/dev/null; then
182 | md5 -q "$csv_file"
183 | elif command -v md5sum >/dev/null; then
184 | md5sum "$csv_file" | awk '{ print $1 }'
185 | else
186 | echo "Neither md5 nor md5sum command found" >&2
187 | return 1
188 | fi
189 | }
190 |
191 | local source_hash=$(generate_table_hash "source")
192 | local target_hash=$(generate_table_hash "target")
193 |
194 | log "Source data hash: $source_hash"
195 | log "Target data hash: $target_hash"
196 | log "Source CSV file: /tmp/pg_flo_source_dump.csv"
197 | log "Target CSV file: /tmp/pg_flo_target_dump.csv"
198 |
199 | if [ "$source_hash" = "$target_hash" ]; then
200 | success "Data integrity verified: source and target databases match 100%"
201 | return 0
202 | else
203 | error "Data integrity check failed: source and target databases do not match"
204 | log "You can compare the dumps using: diff /tmp/pg_flo_source_dump.csv /tmp/pg_flo_target_dump.csv"
205 | return 1
206 | fi
207 | }
208 |
209 | test_pg_flo_cdc() {
210 | setup_postgres
211 | create_users
212 | populate_initial_data
213 |
214 | start_pg_flo_replication
215 | start_pg_flo_worker
216 | simulate_concurrent_changes
217 |
218 | log "Waiting for changes to replicate..."
219 | sleep 90
220 | stop_pg_flo_gracefully
221 | compare_row_counts || return 1
222 | verify_large_json || return 1
223 | verify_data_integrity || return 1
224 | }
225 |
226 | log "Starting pg_flo CDC test..."
227 | if test_pg_flo_cdc; then
228 | success "All tests passed! 🎉"
229 | exit 0
230 | else
231 | error "Some tests failed. Please check the logs."
232 | show_pg_flo_logs
233 | exit 1
234 | fi
235 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_copy_only.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_users() {
7 | log "Creating test table..."
8 | run_sql "DROP TABLE IF EXISTS public.users;"
9 | run_sql "CREATE TABLE public.users (
10 | id serial PRIMARY KEY,
11 | int_col integer,
12 | float_col float,
13 | text_col text,
14 | bool_col boolean,
15 | date_col date,
16 | timestamp_col timestamp with time zone,
17 | json_col jsonb,
18 | array_col integer[],
19 | bytea_col bytea
20 | );"
21 | success "Test table created"
22 | }
23 |
24 | populate_initial_data() {
25 | log "Populating initial data..."
26 | run_sql "INSERT INTO public.users (
27 | int_col, float_col, text_col, bool_col, date_col, timestamp_col, json_col, array_col, bytea_col
28 | ) SELECT
29 | generate_series(1, 500000),
30 | random() * 100,
31 | 'Initial data ' || generate_series(1, 500000),
32 | (random() > 0.5),
33 | current_date + (random() * 365)::integer * interval '1 day',
34 | current_timestamp + (random() * 365 * 24 * 60 * 60)::integer * interval '1 second',
35 | json_build_object('key', 'value' || generate_series(1, 500000), 'number', generate_series(1, 500000)),
36 | ARRAY[generate_series(1, 3)],
37 | decode(lpad(to_hex(generate_series(1, 4)), 8, '0'), 'hex')
38 | ;"
39 |
40 | log "Inserting large JSON data..."
41 | local large_json='{"data":['
42 | for i in {1..10000}; do
43 | if [ "$i" -ne 1 ]; then
44 | large_json+=','
45 | fi
46 | large_json+='{"id":'$i',"name":"Item '$i'","description":"This is a long description for item '$i'. It contains a lot of text to make the JSON larger.","attributes":{"color":"red","size":"large","weight":10.5,"tags":["tag1","tag2","tag3"]}}'
47 | done
48 | large_json+=']}'
49 |
50 | run_sql "INSERT INTO public.users (int_col, json_col) VALUES (1000001, '$large_json'::jsonb);"
51 |
52 | run_sql "ANALYZE public.users;"
53 | success "Initial data populated"
54 | }
55 |
56 | start_pg_flo_copy_only() {
57 | log "Starting pg_flo in copy-only mode..."
58 | $pg_flo_BIN replicator \
59 | --host "$PG_HOST" \
60 | --port "$PG_PORT" \
61 | --dbname "$PG_DB" \
62 | --user "$PG_USER" \
63 | --password "$PG_PASSWORD" \
64 | --group "test_group" \
65 | --tables "users" \
66 | --schema "public" \
67 | --nats-url "$NATS_URL" \
68 | --copy \
69 | --max-copy-workers-per-table 10 \
70 | >"$pg_flo_LOG" 2>&1 &
71 | pg_flo_PID=$!
72 | log "pg_flo started with PID: $pg_flo_PID"
73 | success "pg_flo copy-only started"
74 | }
75 |
76 | start_pg_flo_worker() {
77 | log "Starting pg_flo worker with PostgreSQL sink..."
78 | $pg_flo_BIN worker postgres \
79 | --group "test_group" \
80 | --nats-url "$NATS_URL" \
81 | --source-host "$PG_HOST" \
82 | --source-port "$PG_PORT" \
83 | --source-dbname "$PG_DB" \
84 | --source-user "$PG_USER" \
85 | --source-password "$PG_PASSWORD" \
86 | --target-host "$TARGET_PG_HOST" \
87 | --target-port "$TARGET_PG_PORT" \
88 | --target-dbname "$TARGET_PG_DB" \
89 | --target-user "$TARGET_PG_USER" \
90 | --target-password "$TARGET_PG_PASSWORD" \
91 | --target-sync-schema \
92 | >"$pg_flo_WORKER_LOG" 2>&1 &
93 | pg_flo_WORKER_PID=$!
94 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
95 | success "pg_flo worker started"
96 | }
97 |
98 | compare_row_counts() {
99 | log "Comparing row counts..."
100 | SOURCE_COUNT=$(run_sql "SELECT COUNT(*) FROM public.users")
101 | TARGET_COUNT=$(run_sql_target "SELECT COUNT(*) FROM public.users")
102 |
103 | log "Source database row count: $SOURCE_COUNT"
104 | log "Target database row count: $TARGET_COUNT"
105 |
106 | EXPECTED_COUNT=500001 # 500,000 regular rows + 1 large JSON row
107 |
108 | if [ "$SOURCE_COUNT" -eq "$TARGET_COUNT" ] && [ "$SOURCE_COUNT" -eq "$EXPECTED_COUNT" ]; then
109 | success "Row counts match and total is correct ($EXPECTED_COUNT)"
110 | return 0
111 | else
112 | error "Row counts do not match or total is incorrect. Expected $EXPECTED_COUNT, Source: $SOURCE_COUNT, Target: $TARGET_COUNT"
113 | return 1
114 | fi
115 | }
116 |
117 | verify_large_json() {
118 | log "Verifying large JSON data..."
119 | local source_json_length=$(run_sql "
120 | SELECT jsonb_array_length(json_col->'data')
121 | FROM public.users
122 | WHERE int_col = 1000001
123 | ")
124 | local target_json_length=$(run_sql_target "
125 | SELECT jsonb_array_length(json_col->'data')
126 | FROM public.users
127 | WHERE int_col = 1000001
128 | ")
129 |
130 | log "Source JSON length: $source_json_length"
131 | log "Target JSON length: $target_json_length"
132 |
133 | if [ -n "$source_json_length" ] && [ -n "$target_json_length" ] &&
134 | [ "$source_json_length" -eq "$target_json_length" ] &&
135 | [ "$source_json_length" -eq 10000 ]; then
136 | success "Large JSON data verified successfully"
137 | return 0
138 | else
139 | error "Large JSON data verification failed. Expected length 10000, got Source: $source_json_length, Target: $target_json_length"
140 | return 1
141 | fi
142 | }
143 |
144 | verify_data_integrity() {
145 | log "Verifying data integrity..."
146 |
147 | generate_table_hash() {
148 | local db=$1
149 | local csv_file="/tmp/pg_flo_${db}_dump.csv"
150 |
151 | if [ "$db" = "source" ]; then
152 | PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$PG_DB" \
153 | -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
154 | else
155 | PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -p "$TARGET_PG_PORT" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" \
156 | -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
157 | fi
158 |
159 | if command -v md5 >/dev/null; then
160 | md5 -q "$csv_file"
161 | elif command -v md5sum >/dev/null; then
162 | md5sum "$csv_file" | awk '{ print $1 }'
163 | else
164 | echo "Neither md5 nor md5sum command found" >&2
165 | return 1
166 | fi
167 | }
168 |
169 | local source_hash=$(generate_table_hash "source")
170 | local target_hash=$(generate_table_hash "target")
171 |
172 | log "Source data hash: $source_hash"
173 | log "Target data hash: $target_hash"
174 |
175 | if [ "$source_hash" = "$target_hash" ]; then
176 | success "Data integrity verified: source and target databases match 100%"
177 | return 0
178 | else
179 | error "Data integrity check failed: source and target databases do not match"
180 | log "You can compare the dumps using: diff /tmp/pg_flo_source_dump.csv /tmp/pg_flo_target_dump.csv"
181 | return 1
182 | fi
183 | }
184 |
185 | test_pg_flo_copy_only() {
186 | setup_postgres
187 | create_users
188 | populate_initial_data
189 |
190 | start_pg_flo_copy_only
191 | start_pg_flo_worker
192 |
193 | log "Waiting for changes to replicate..."
194 | sleep 180
195 | stop_pg_flo_gracefully
196 |
197 | compare_row_counts || return 1
198 | verify_large_json || return 1
199 | verify_data_integrity || return 1
200 | }
201 |
202 | log "Starting pg_flo copy-only test..."
203 | if test_pg_flo_copy_only; then
204 | success "All tests passed! 🎉"
205 | exit 0
206 | else
207 | error "Some tests failed. Please check the logs."
208 | show_pg_flo_logs
209 | exit 1
210 | fi
211 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_ddl.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_test_tables() {
7 | log "Creating test schemas and tables..."
8 | run_sql "DROP SCHEMA IF EXISTS app CASCADE; CREATE SCHEMA app;"
9 | run_sql "DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public;"
10 |
11 | run_sql "CREATE TABLE app.users (id serial PRIMARY KEY, data text);"
12 | run_sql "CREATE TABLE app.posts (id serial PRIMARY KEY, content text);"
13 |
14 | run_sql "CREATE TABLE app.comments (id serial PRIMARY KEY, text text);"
15 | run_sql "CREATE TABLE public.metrics (id serial PRIMARY KEY, value numeric);"
16 | success "Test tables created"
17 | }
18 |
19 | start_pg_flo_replication() {
20 | log "Starting pg_flo replication..."
21 | if [ -f "$pg_flo_LOG" ]; then
22 | mv "$pg_flo_LOG" "${pg_flo_LOG}.bak"
23 | log "Backed up previous replicator log to ${pg_flo_LOG}.bak"
24 | fi
25 | $pg_flo_BIN replicator \
26 | --host "$PG_HOST" \
27 | --port "$PG_PORT" \
28 | --dbname "$PG_DB" \
29 | --user "$PG_USER" \
30 | --password "$PG_PASSWORD" \
31 | --group "group_ddl" \
32 | --schema "app" \
33 | --tables "users,posts" \
34 | --nats-url "$NATS_URL" \
35 | --track-ddl \
36 | >"$pg_flo_LOG" 2>&1 &
37 | pg_flo_PID=$!
38 | log "pg_flo replicator started with PID: $pg_flo_PID"
39 | success "pg_flo replication started"
40 | }
41 |
42 | start_pg_flo_worker() {
43 | log "Starting pg_flo worker with PostgreSQL sink..."
44 | if [ -f "$pg_flo_WORKER_LOG" ]; then
45 | mv "$pg_flo_WORKER_LOG" "${pg_flo_WORKER_LOG}.bak"
46 | log "Backed up previous worker log to ${pg_flo_WORKER_LOG}.bak"
47 | fi
48 | $pg_flo_BIN worker postgres \
49 | --group "group_ddl" \
50 | --nats-url "$NATS_URL" \
51 | --source-host "$PG_HOST" \
52 | --source-port "$PG_PORT" \
53 | --source-dbname "$PG_DB" \
54 | --source-user "$PG_USER" \
55 | --source-password "$PG_PASSWORD" \
56 | --target-host "$TARGET_PG_HOST" \
57 | --target-port "$TARGET_PG_PORT" \
58 | --target-dbname "$TARGET_PG_DB" \
59 | --target-user "$TARGET_PG_USER" \
60 | --target-password "$TARGET_PG_PASSWORD" \
61 | --target-sync-schema \
62 | >"$pg_flo_WORKER_LOG" 2>&1 &
63 | pg_flo_WORKER_PID=$!
64 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
65 | success "pg_flo worker started"
66 | }
67 |
68 | perform_ddl_operations() {
69 | log "Performing DDL operations..."
70 |
71 | # Column operations on tracked tables
72 | run_sql "ALTER TABLE app.users ADD COLUMN email text;"
73 | run_sql "ALTER TABLE app.users ADD COLUMN status varchar(50) DEFAULT 'active';"
74 | run_sql "ALTER TABLE app.posts ADD COLUMN category text;"
75 |
76 | # Index operations on tracked tables
77 | run_sql "CREATE INDEX CONCURRENTLY idx_users_email ON app.users (email);"
78 | run_sql "CREATE UNIQUE INDEX idx_posts_unique ON app.posts (content) WHERE content IS NOT NULL;"
79 |
80 | # Column modifications on tracked tables
81 | run_sql "ALTER TABLE app.users ALTER COLUMN status SET DEFAULT 'pending';"
82 | run_sql "ALTER TABLE app.posts ALTER COLUMN category TYPE varchar(100);"
83 |
84 | # Rename operations on tracked tables
85 | run_sql "ALTER TABLE app.users RENAME COLUMN data TO profile;"
86 |
87 | # Drop operations on tracked tables
88 | run_sql "DROP INDEX CONCURRENTLY IF EXISTS idx_users_email;"
89 | run_sql "ALTER TABLE app.posts DROP COLUMN IF EXISTS category;"
90 |
91 | # Operations on non-tracked tables (should be ignored)
92 | run_sql "ALTER TABLE app.comments ADD COLUMN author text;"
93 | run_sql "CREATE INDEX idx_comments_text ON app.comments (text);"
94 | run_sql "ALTER TABLE public.metrics ADD COLUMN timestamp timestamptz;"
95 |
96 | success "DDL operations performed"
97 | }
98 |
99 | verify_ddl_changes() {
100 | log "Verifying DDL changes in target database..."
101 | local failures=0
102 |
103 | check_column() {
104 | local table=$1
105 | local column=$2
106 | local expected_exists=$3
107 | local expected_type=${4:-""}
108 | local expected_default=${5:-""}
109 | local query="
110 | SELECT COUNT(*),
111 | data_type,
112 | character_maximum_length,
113 | column_default
114 | FROM information_schema.columns
115 | WHERE table_schema='app'
116 | AND table_name='$table'
117 | AND column_name='$column'
118 | GROUP BY data_type, character_maximum_length, column_default;"
119 |
120 | local result
121 | result=$(run_sql_target "$query")
122 |
123 | if [ -z "$result" ]; then
124 | exists=0
125 | data_type=""
126 | char_length=""
127 | default_value=""
128 | else
129 | read exists data_type char_length default_value < <(echo "$result" | tr '|' ' ')
130 | fi
131 |
132 | exists=${exists:-0}
133 |
134 | if [ "$exists" -eq "$expected_exists" ]; then
135 | if [ "$expected_exists" -eq 1 ]; then
136 | local type_ok=true
137 | local default_ok=true
138 |
139 | if [ -n "$expected_type" ]; then
140 | # Handle character varying type specifically
141 | if [ "$expected_type" = "character varying" ]; then
142 | if [ "$data_type" = "character varying" ] || [ "$data_type" = "varchar" ] || [ "$data_type" = "character" ]; then
143 | type_ok=true
144 | else
145 | type_ok=false
146 | fi
147 | elif [ "$data_type" != "$expected_type" ]; then
148 | type_ok=false
149 | fi
150 | fi
151 |
152 | if [ -n "$expected_default" ]; then
153 | if [[ "$default_value" == *"$expected_default"* ]]; then
154 | default_ok=true
155 | else
156 | default_ok=false
157 | fi
158 | fi
159 |
160 | if [ "$type_ok" = true ] && [ "$default_ok" = true ]; then
161 | if [[ "$expected_type" == "character varying" && -n "$char_length" ]]; then
162 | success "Column app.$table.$column verification passed (type: $data_type($char_length), default: $default_value)"
163 | else
164 | success "Column app.$table.$column verification passed (type: $data_type, default: $default_value)"
165 | fi
166 | else
167 | if [ "$type_ok" = false ]; then
168 | error "Column app.$table.$column type mismatch (expected: $expected_type, got: $data_type)"
169 | failures=$((failures + 1))
170 | fi
171 | if [ "$default_ok" = false ]; then
172 | error "Column app.$table.$column default value mismatch (expected: $expected_default, got: $default_value)"
173 | failures=$((failures + 1))
174 | fi
175 | fi
176 | else
177 | success "Column app.$table.$column verification passed (not exists)"
178 | fi
179 | else
180 | error "Column app.$table.$column verification failed (expected: $expected_exists, got: $exists)"
181 | failures=$((failures + 1))
182 | fi
183 | }
184 |
185 | check_index() {
186 | local index=$1
187 | local expected=$2
188 | local exists=$(run_sql_target "SELECT COUNT(*) FROM pg_indexes WHERE schemaname='app' AND indexname='$index';")
189 |
190 | if [ "$exists" -eq "$expected" ]; then
191 | success "Index app.$index verification passed (expected: $expected)"
192 | else
193 | error "Index app.$index verification failed (expected: $expected, got: $exists)"
194 | failures=$((failures + 1))
195 | fi
196 | }
197 |
198 | # Verify app.users changes
199 | check_column "users" "email" 1 "text"
200 | check_column "users" "status" 1 "character varying" "'pending'"
201 | check_column "users" "data" 0
202 | check_column "users" "profile" 1 "text"
203 |
204 | # Verify app.posts changes
205 | check_column "posts" "category" 0
206 | check_column "posts" "content" 1 "text"
207 | check_index "idx_posts_unique" 1 "unique"
208 |
209 | # Verify non-tracked tables
210 | check_column "comments" "author" 0
211 | check_index "idx_comments_text" 0
212 |
213 | local remaining_rows=$(run_sql "SELECT COUNT(*) FROM internal_pg_flo.ddl_log;")
214 | if [ "$remaining_rows" -eq 0 ]; then
215 | success "internal_pg_flo.ddl_log table is empty"
216 | else
217 | error "internal_pg_flo.ddl_log table is not empty. Remaining rows: $remaining_rows"
218 | failures=$((failures + 1))
219 | fi
220 |
221 | if [ "$failures" -eq 0 ]; then
222 | success "All DDL changes verified successfully"
223 | return 0
224 | else
225 | error "DDL verification failed with $failures errors"
226 | return 1
227 | fi
228 | }
229 |
230 | test_pg_flo_ddl() {
231 | setup_postgres
232 | create_test_tables
233 | start_pg_flo_worker
234 | sleep 5
235 | start_pg_flo_replication
236 | sleep 3
237 | perform_ddl_operations
238 | stop_pg_flo_gracefully
239 | verify_ddl_changes || return 1
240 | }
241 |
242 | log "Starting pg_flo CDC test with DDL tracking..."
243 | if test_pg_flo_ddl; then
244 | success "DDL tracking test passed! 🎉"
245 | exit 0
246 | else
247 | error "DDL tracking test failed. Please check the logs."
248 | show_pg_flo_logs
249 | exit 1
250 | fi
251 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_multi_tenant.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_multi_tenant_table() {
7 | log "Creating multi-tenant test table..."
8 | run_sql "DROP TABLE IF EXISTS public.events;"
9 | run_sql "CREATE TABLE public.events (
10 | id serial PRIMARY KEY,
11 | tenant_id int NOT NULL,
12 | name text,
13 | email text,
14 | created_at timestamp DEFAULT current_timestamp
15 | );"
16 | success "Multi-tenant test table created"
17 | }
18 |
19 | start_pg_flo_replication() {
20 | log "Starting pg_flo replication..."
21 | $pg_flo_BIN replicator \
22 | --host "$PG_HOST" \
23 | --port "$PG_PORT" \
24 | --dbname "$PG_DB" \
25 | --user "$PG_USER" \
26 | --password "$PG_PASSWORD" \
27 | --group "group_multi_tenant" \
28 | --tables "events" \
29 | --schema "public" \
30 | --nats-url "$NATS_URL" \
31 | >"$pg_flo_LOG" 2>&1 &
32 | pg_flo_PID=$!
33 | log "pg_flo replicator started with PID: $pg_flo_PID"
34 | success "pg_flo replication started"
35 | }
36 |
37 | start_pg_flo_worker() {
38 | log "Starting pg_flo worker with PostgreSQL sink..."
39 | $pg_flo_BIN worker postgres \
40 | --group "group_multi_tenant" \
41 | --nats-url "$NATS_URL" \
42 | --source-host "$PG_HOST" \
43 | --source-port "$PG_PORT" \
44 | --source-dbname "$PG_DB" \
45 | --source-user "$PG_USER" \
46 | --source-password "$PG_PASSWORD" \
47 | --target-host "$TARGET_PG_HOST" \
48 | --target-port "$TARGET_PG_PORT" \
49 | --target-dbname "$TARGET_PG_DB" \
50 | --target-user "$TARGET_PG_USER" \
51 | --target-password "$TARGET_PG_PASSWORD" \
52 | --target-sync-schema \
53 | --rules-config "$(dirname "$0")/multi_tenant_rules.yml" \
54 | >"$pg_flo_WORKER_LOG" 2>&1 &
55 | pg_flo_WORKER_PID=$!
56 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
57 | success "pg_flo worker started"
58 | }
59 |
60 | simulate_multi_tenant_changes() {
61 | log "Simulating multi-tenant changes..."
62 | run_sql "INSERT INTO public.events (tenant_id, name, email) VALUES
63 | (1, 'Alice', 'alice@tenant1.com'),
64 | (2, 'Bob', 'bob@tenant2.com'),
65 | (3, 'Charlie', 'charlie@tenant3.com'),
66 | (3, 'David', 'david@tenant3.com'),
67 | (4, 'Eve', 'eve@tenant4.com'),
68 | (3, 'Frank', 'frank@tenant3.com');"
69 | success "Multi-tenant changes simulated"
70 | }
71 |
72 | verify_multi_tenant_changes() {
73 | log "Verifying multi-tenant changes in target database..."
74 | local tenant_3_count=$(run_sql_target "SELECT COUNT(*) FROM public.events WHERE tenant_id = 3;" | xargs)
75 | local total_count=$(run_sql_target "SELECT COUNT(*) FROM public.events;" | xargs)
76 |
77 | log "Tenant 3 count: $tenant_3_count (expected 3)"
78 | log "Total count: $total_count (expected 3)"
79 |
80 | if [ "$tenant_3_count" -eq 3 ] && [ "$total_count" -eq 3 ]; then
81 | success "Multi-tenant filtering verified successfully"
82 | return 0
83 | else
84 | error "Multi-tenant filtering verification failed"
85 | return 1
86 | fi
87 | }
88 |
89 | test_pg_flo_multi_tenant() {
90 | setup_postgres
91 | create_multi_tenant_table
92 | start_pg_flo_replication
93 | sleep 2
94 | start_pg_flo_worker
95 | simulate_multi_tenant_changes
96 |
97 | log "Waiting for pg_flo to process changes..."
98 | sleep 5
99 |
100 | stop_pg_flo_gracefully
101 | verify_multi_tenant_changes || return 1
102 | }
103 |
104 | # Run the test
105 | log "Starting pg_flo CDC test with multi-tenant filtering..."
106 | if test_pg_flo_multi_tenant; then
107 | success "All tests passed! 🎉"
108 | exit 0
109 | else
110 | error "Some tests failed. Please check the logs."
111 | show_pg_flo_logs
112 | exit 1
113 | fi
114 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_postgres.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_tables() {
7 | log "Creating test tables in source database..."
8 | run_sql "DROP TABLE IF EXISTS public.users;"
9 | run_sql "CREATE TABLE public.users (
10 | id serial PRIMARY KEY,
11 | data text,
12 | nullable_column text,
13 | toasted_column text,
14 | created_at timestamp DEFAULT current_timestamp
15 | );"
16 | run_sql "DROP TABLE IF EXISTS public.toast_test;"
17 | run_sql "CREATE TABLE public.toast_test (id serial PRIMARY KEY, large_jsonb jsonb, small_text text);"
18 | success "Test tables created in source database"
19 | }
20 |
21 | create_config_files() {
22 | log "Creating config files..."
23 |
24 | # Create replicator config
25 | cat >"/tmp/pg_flo_replicator.yml" <"/tmp/pg_flo_worker.yml" <"$pg_flo_LOG" 2>&1 &
74 | pg_flo_PID=$!
75 | log "pg_flo replicator started with PID: $pg_flo_PID"
76 | success "pg_flo replicator started"
77 | }
78 |
79 | start_pg_flo_worker() {
80 | log "Starting pg_flo worker with PostgreSQL sink..."
81 | if [ -f "$pg_flo_WORKER_LOG" ]; then
82 | mv "$pg_flo_WORKER_LOG" "${pg_flo_WORKER_LOG}.bak"
83 | log "Backed up previous worker log to ${pg_flo_WORKER_LOG}.bak"
84 | fi
85 |
86 | $pg_flo_BIN worker postgres --config "/tmp/pg_flo_worker.yml" >"$pg_flo_WORKER_LOG" 2>&1 &
87 | pg_flo_WORKER_PID=$!
88 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
89 | success "pg_flo worker started"
90 | }
91 |
92 | simulate_changes() {
93 | log "Simulating changes..."
94 | local insert_count=6000
95 |
96 | for i in $(seq 1 "$insert_count"); do
97 | run_sql "INSERT INTO public.users (data, nullable_column, toasted_column) VALUES ('Data $i', 'Nullable $i', 'Toasted $i');"
98 | done
99 |
100 | # Insert specific rows for deletion
101 | run_sql "INSERT INTO public.users (id, data) VALUES (10001, 'To be deleted 1');"
102 | run_sql "INSERT INTO public.users (id, data) VALUES (10002, 'To be deleted 2');"
103 | run_sql "INSERT INTO public.users (id, data) VALUES (10003, 'To be deleted 3');"
104 | run_sql "INSERT INTO public.users (id, data) VALUES (10004, 'To be deleted 4');"
105 | run_sql "INSERT INTO public.users (id, data) VALUES (10005, 'To be deleted 5');"
106 |
107 | # Insert a row with potentially toasted data
108 | run_sql "INSERT INTO public.users (id, toasted_column) VALUES (10006, repeat('Large toasted data ', 1000));"
109 |
110 | # Update with various scenarios
111 | run_sql "UPDATE public.users SET data = 'Updated data' WHERE id = 1;"
112 | run_sql "UPDATE public.users SET nullable_column = NULL WHERE id = 2;"
113 | run_sql "UPDATE public.users SET data = 'Updated data', nullable_column = NULL WHERE id = 3;"
114 | run_sql "UPDATE public.users SET toasted_column = repeat('A', 10000) WHERE id = 4;"
115 | run_sql "UPDATE public.users SET data = 'Updated data' WHERE id = 5;"
116 |
117 | # Generate large JSONB data (approximately 1MB)
118 | log "Generating 1MB JSONB data..."
119 | local json_data='{"data":"'
120 | for i in {1..100000}; do
121 | json_data+="AAAAAAAAAA"
122 | done
123 | json_data+='"}'
124 |
125 | # Insert large JSONB data
126 | run_sql "INSERT INTO public.toast_test (large_jsonb, small_text) VALUES ('$json_data'::jsonb, 'Initial small text');"
127 | log "Inserted large JSONB data, waiting for replication..."
128 |
129 | # Update unrelated column
130 | run_sql "UPDATE public.toast_test SET small_text = 'Updated small text' WHERE id = 1;"
131 | log "Updated unrelated column, waiting for replication..."
132 |
133 | # Delete operations
134 | run_sql "DELETE FROM public.users WHERE id = 10001;"
135 | run_sql "DELETE FROM public.users WHERE id IN (10002, 10003);"
136 | run_sql "DELETE FROM public.users WHERE id >= 10004 AND id <= 10005;"
137 | run_sql "DELETE FROM public.users WHERE id = 10006;"
138 |
139 | success "Changes simulated"
140 | }
141 |
142 | verify_changes() {
143 | log "Verifying changes in target database..."
144 |
145 | local updated_data=$(run_sql_target "SELECT data FROM public.users WHERE id = 1;" | xargs)
146 | log "Updated data for id 1: '$updated_data' (expected 'Updated data')"
147 |
148 | local null_column=$(run_sql_target "SELECT coalesce(nullable_column, 'NULL') FROM public.users WHERE id = 2;" | xargs)
149 | log "Nullable column for id 2: '$null_column' (expected 'NULL')"
150 |
151 | local mixed_update=$(run_sql_target "SELECT data || ' | ' || coalesce(nullable_column, 'NULL') FROM public.users WHERE id = 3;" | xargs)
152 | log "Mixed update for id 3: '$mixed_update' (expected 'Updated data | NULL')"
153 |
154 | local toast_length=$(run_sql_target "SELECT length(toasted_column) FROM public.users WHERE id = 4;" | xargs)
155 | log "TOAST column length for id 4: '$toast_length' (expected '10000')"
156 |
157 | local unrelated_column=$(run_sql_target "SELECT nullable_column FROM public.users WHERE id = 5;" | xargs)
158 | log "Unrelated column for id 5: '$unrelated_column' (expected 'Nullable 5')"
159 |
160 | local jsonb_length=$(run_sql_target "SELECT octet_length(large_jsonb::text) FROM public.toast_test LIMIT 1;" | xargs)
161 | log "JSONB column length: '$jsonb_length' bytes (expected > 1000000)"
162 |
163 | local small_text=$(run_sql_target "SELECT small_text FROM public.toast_test LIMIT 1;" | xargs)
164 | log "small_text content: '$small_text' (expected 'Updated small text')"
165 |
166 | local deleted_single=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id = 10001;" | xargs)
167 | log "Count of deleted user (id 10001): '$deleted_single' (expected '0')"
168 |
169 | local deleted_multiple=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id IN (10002, 10003);" | xargs)
170 | log "Count of deleted users (ids 10002, 10003): '$deleted_multiple' (expected '0')"
171 |
172 | local deleted_range=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id >= 10004 AND id <= 10005;" | xargs)
173 | log "Count of deleted users (ids 10004-10005): '$deleted_range' (expected '0')"
174 |
175 | local deleted_toasted=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id = 10006;" | xargs)
176 | log "Count of deleted user with toasted data (id 10006): '$deleted_toasted' (expected '0')"
177 |
178 | log "Detailed verification:"
179 |
180 | if [ "$updated_data" != "Updated data" ]; then
181 | log "updated_data: '$updated_data' != 'Updated data'"
182 | error "Verification failed: updated_data mismatch"
183 | return 1
184 | fi
185 |
186 | if [ "$null_column" != "NULL" ]; then
187 | log "null_column: '$null_column' != 'NULL'"
188 | error "Verification failed: null_column mismatch"
189 | return 1
190 | fi
191 |
192 | if [ "$mixed_update" != "Updated data | NULL" ]; then
193 | log "mixed_update: '$mixed_update' != 'Updated data | NULL'"
194 | error "Verification failed: mixed_update mismatch"
195 | return 1
196 | fi
197 |
198 | if [ "$toast_length" != "10000" ]; then
199 | log "toast_length: '$toast_length' != '10000'"
200 | error "Verification failed: toast_length mismatch"
201 | return 1
202 | fi
203 |
204 | if [ "$unrelated_column" != "Nullable 5" ]; then
205 | log "unrelated_column: '$unrelated_column' != 'Nullable 5'"
206 | error "Verification failed: unrelated_column mismatch"
207 | return 1
208 | fi
209 |
210 | if [ -z "$jsonb_length" ] || [ "$jsonb_length" -le 1000000 ]; then
211 | log "jsonb_length: '$jsonb_length' <= 1000000"
212 | error "Verification failed: jsonb_length mismatch"
213 | return 1
214 | fi
215 |
216 | if [ "$small_text" != "Updated small text" ]; then
217 | log "small_text: '$small_text' != 'Updated small text'"
218 | error "Verification failed: small_text mismatch"
219 | return 1
220 | fi
221 |
222 | if [ "$deleted_single" != "0" ]; then
223 | log "deleted_single: '$deleted_single' != '0'"
224 | error "Verification failed: deleted_single mismatch"
225 | return 1
226 | fi
227 |
228 | if [ "$deleted_multiple" != "0" ]; then
229 | log "deleted_multiple: '$deleted_multiple' != '0'"
230 | error "Verification failed: deleted_multiple mismatch"
231 | return 1
232 | fi
233 |
234 | if [ "$deleted_range" != "0" ]; then
235 | log "deleted_range: '$deleted_range' != '0'"
236 | error "Verification failed: deleted_range mismatch"
237 | return 1
238 | fi
239 |
240 | if [ "$deleted_toasted" != "0" ]; then
241 | log "deleted_toasted: '$deleted_toasted' != '0'"
242 | error "Verification failed: deleted_toasted mismatch"
243 | return 1
244 | fi
245 |
246 | success "All changes verified successfully in target database"
247 | return 0
248 | }
249 |
250 | test_pg_flo_postgres_sink() {
251 | setup_postgres
252 | create_tables
253 | create_config_files
254 | start_pg_flo_replication
255 | sleep 2
256 | start_pg_flo_worker
257 | simulate_changes
258 |
259 | log "Waiting for pg_flo to process changes..."
260 |
261 | stop_pg_flo_gracefully
262 | verify_changes || return 1
263 | }
264 |
265 | # Run the test
266 | log "Starting pg_flo CDC test with PostgreSQL sink..."
267 | if test_pg_flo_postgres_sink; then
268 | success "All tests passed! 🎉"
269 | exit 0
270 | else
271 | error "Some tests failed. Please check the logs."
272 | show_pg_flo_logs
273 | exit 1
274 | fi
275 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_stream_only.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_users() {
7 | log "Creating test table..."
8 | run_sql "DROP TABLE IF EXISTS public.users;"
9 | run_sql "CREATE TABLE public.users (id serial PRIMARY KEY, data text, created_at timestamp DEFAULT current_timestamp);"
10 | success "Test table created"
11 | }
12 |
13 | start_pg_flo_replication() {
14 | log "Starting pg_flo replication..."
15 | $pg_flo_BIN replicator \
16 | --host "$PG_HOST" \
17 | --port "$PG_PORT" \
18 | --dbname "$PG_DB" \
19 | --user "$PG_USER" \
20 | --password "$PG_PASSWORD" \
21 | --group "group-2" \
22 | --tables "users" \
23 | --schema "public" \
24 | --nats-url "$NATS_URL" \
25 | >"$pg_flo_LOG" 2>&1 &
26 | pg_flo_PID=$!
27 | log "pg_flo started with PID: $pg_flo_PID"
28 | success "pg_flo replication started"
29 | }
30 |
31 | start_pg_flo_worker() {
32 | log "Starting pg_flo worker with file sink..."
33 | $pg_flo_BIN worker file \
34 | --group "group-2" \
35 | --nats-url "$NATS_URL" \
36 | --file-output-dir "$OUTPUT_DIR" \
37 | >"$pg_flo_WORKER_LOG" 2>&1 &
38 | pg_flo_WORKER_PID=$!
39 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
40 | success "pg_flo worker started"
41 | }
42 |
43 | simulate_changes() {
44 | log "Simulating changes..."
45 | local insert_count=1000
46 | local update_count=500
47 | local delete_count=250
48 |
49 | log "Simulating inserts..."
50 | for i in $(seq 1 $insert_count); do
51 | run_sql "INSERT INTO public.users (data) VALUES ('Data $i');"
52 | done
53 |
54 | log "Simulating updates..."
55 | for i in $(seq 1 $update_count); do
56 | run_sql "UPDATE public.users SET data = 'Updated data $i' WHERE id = $i;"
57 | done
58 |
59 | log "Simulating deletes..."
60 | for i in $(seq 1 $delete_count); do
61 | run_sql "DELETE FROM public.users WHERE id = $i;"
62 | done
63 |
64 | success "Changes simulated"
65 | }
66 |
67 | verify_changes() {
68 | log "Verifying changes in ${OUTPUT_DIR}..."
69 | local insert_count=$(jq -s '[.[] | select(.Type == "INSERT")] | length' "$OUTPUT_DIR"/*.jsonl)
70 | local update_count=$(jq -s '[.[] | select(.Type == "UPDATE")] | length' "$OUTPUT_DIR"/*.jsonl)
71 | local delete_count=$(jq -s '[.[] | select(.Type == "DELETE")] | length' "$OUTPUT_DIR"/*.jsonl)
72 |
73 | log "INSERT count: $insert_count (expected 1000)"
74 | log "UPDATE count: $update_count (expected 500)"
75 | log "DELETE count: $delete_count (expected 250)"
76 |
77 | if [ "$insert_count" -eq 1000 ] && [ "$update_count" -eq 500 ] && [ "$delete_count" -eq 250 ]; then
78 | success "Change counts match expected values"
79 | return 0
80 | else
81 | error "Change counts do not match expected values"
82 | return 1
83 | fi
84 | }
85 |
86 | # Main test function
87 | test_pg_flo_cdc() {
88 | setup_postgres
89 | create_users
90 | start_pg_flo_replication
91 | start_pg_flo_worker
92 | log "Waiting for replicator to initialize..."
93 | sleep 2
94 | simulate_changes
95 |
96 | log "Waiting for pg_flo to process changes..."
97 | sleep 2
98 |
99 | stop_pg_flo_gracefully
100 | verify_changes || return 1
101 | }
102 |
103 | # Run the test
104 | log "Starting pg_flo CDC test with changes..."
105 | if test_pg_flo_cdc; then
106 | success "All tests passed! 🎉"
107 | exit 0
108 | else
109 | error "Some tests failed. Please check the logs."
110 | show_pg_flo_logs
111 | exit 1
112 | fi
113 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_test_local.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | setup_docker() {
7 | pkill -9 "pg_flo" || true
8 | rm -Rf /tmp/pg*
9 | log "Setting up Docker environment..."
10 | docker compose -f internal/docker-compose.yml down -v
11 | docker compose -f internal/docker-compose.yml up -d
12 | success "Docker environment is set up"
13 | }
14 |
15 | cleanup_data() {
16 | log "Cleaning up data..."
17 | run_sql "DROP TABLE IF EXISTS public.users;"
18 | run_sql "DROP SCHEMA IF EXISTS internal_pg_flo CASCADE;"
19 | rm -rf /tmp/pg_flo-output
20 | rm -f /tmp/pg_flo.log
21 | success "Data cleanup complete"
22 | }
23 |
24 | cleanup() {
25 | log "Cleaning up..."
26 | docker compose down -v
27 | success "Cleanup complete"
28 | }
29 |
30 | trap cleanup EXIT
31 |
32 | make build
33 |
34 | setup_docker
35 |
36 | log "Running e2e ddl tests..."
37 | if CI=false ruby ./internal/scripts/e2e_resume_test.rb; then
38 | success "e2e ddl tests completed successfully"
39 | else
40 | error "Original e2e tests failed"
41 | exit 1
42 | fi
43 |
--------------------------------------------------------------------------------
/internal/scripts/e2e_transform_filter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | create_users() {
7 | log "Creating test table..."
8 | run_sql "DROP TABLE IF EXISTS public.users;"
9 | run_sql "CREATE TABLE public.users (
10 | id serial PRIMARY KEY,
11 | email text,
12 | phone text,
13 | age int,
14 | ssn text,
15 | created_at timestamp DEFAULT current_timestamp
16 | );"
17 | success "Test table created"
18 | }
19 |
20 | start_pg_flo_replication() {
21 | log "Starting pg_flo replication..."
22 | if [ -f "$pg_flo_LOG" ]; then
23 | mv "$pg_flo_LOG" "${pg_flo_LOG}.bak"
24 | log "Backed up previous replicator log to ${pg_flo_LOG}.bak"
25 | fi
26 | $pg_flo_BIN replicator \
27 | --host "$PG_HOST" \
28 | --port "$PG_PORT" \
29 | --dbname "$PG_DB" \
30 | --user "$PG_USER" \
31 | --password "$PG_PASSWORD" \
32 | --group "group_transform_filter" \
33 | --tables "users" \
34 | --schema "public" \
35 | --nats-url "$NATS_URL" \
36 | >"$pg_flo_LOG" 2>&1 &
37 | pg_flo_PID=$!
38 | log "pg_flo replicator started with PID: $pg_flo_PID"
39 | success "pg_flo replication started"
40 | }
41 |
42 | start_pg_flo_worker() {
43 | log "Starting pg_flo worker with file sink..."
44 | if [ -f "$pg_flo_WORKER_LOG" ]; then
45 | mv "$pg_flo_WORKER_LOG" "${pg_flo_WORKER_LOG}.bak"
46 | log "Backed up previous worker log to ${pg_flo_WORKER_LOG}.bak"
47 | fi
48 | $pg_flo_BIN worker file \
49 | --group "group_transform_filter" \
50 | --nats-url "$NATS_URL" \
51 | --file-output-dir "$OUTPUT_DIR" \
52 | --rules-config "$(dirname "$0")/rules.yml" \
53 | >"$pg_flo_WORKER_LOG" 2>&1 &
54 | pg_flo_WORKER_PID=$!
55 | log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
56 | success "pg_flo worker started"
57 | }
58 |
59 | simulate_changes() {
60 | log "Simulating changes..."
61 | run_sql "INSERT INTO public.users (email, phone, age, ssn) VALUES
62 | ('john@example.com', '1234567890', 25, '123-45-6789'),
63 | ('jane@example.com', '9876543210', 17, '987-65-4321'),
64 | ('bob@example.com', '5551234567', 30, '555-12-3456');"
65 |
66 | run_sql "UPDATE public.users SET email = 'updated@example.com', phone = '1112223333' WHERE id = 1;"
67 | run_sql "DELETE FROM public.users WHERE age = 30;"
68 | run_sql "DELETE FROM public.users WHERE age = 17;"
69 |
70 | success "Changes simulated"
71 | }
72 |
73 | verify_changes() {
74 | log "Verifying changes..."
75 | local insert_count=$(jq -s '[.[] | select(.Type == "INSERT")] | length' "$OUTPUT_DIR"/*.jsonl)
76 | local update_count=$(jq -s '[.[] | select(.Type == "UPDATE")] | length' "$OUTPUT_DIR"/*.jsonl)
77 | local delete_count=$(jq -s '[.[] | select(.Type == "DELETE")] | length' "$OUTPUT_DIR"/*.jsonl)
78 |
79 | log "INSERT count: $insert_count (expected 2)"
80 | log "UPDATE count: $update_count (expected 1)"
81 | log "DELETE count: $delete_count (expected 2)"
82 |
83 | if [ "$insert_count" -eq 2 ] && [ "$update_count" -eq 1 ] && [ "$delete_count" -eq 2 ]; then
84 | success "Change counts match expected values"
85 | else
86 | error "Change counts do not match expected values"
87 | return 1
88 | fi
89 |
90 | # Verify transformations and filters
91 | local masked_email=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 1) | .NewTuple.email' "$OUTPUT_DIR"/*.jsonl)
92 | local formatted_phone=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 1) | .NewTuple.phone' "$OUTPUT_DIR"/*.jsonl)
93 | local filtered_insert=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 2) | .NewTuple.id' "$OUTPUT_DIR"/*.jsonl)
94 | local updated_email=$(jq -r 'select(.Type == "UPDATE") | .NewTuple.email' "$OUTPUT_DIR"/*.jsonl)
95 | local masked_ssn=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 1) | .NewTuple.ssn' "$OUTPUT_DIR"/*.jsonl)
96 | local filtered_age=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 2) | .NewTuple.age' "$OUTPUT_DIR"/*.jsonl)
97 |
98 | if [[ "$masked_email" == "j**************m" ]] &&
99 | [[ "$formatted_phone" == "(123) 456-7890" ]] &&
100 | [[ -z "$filtered_insert" ]] &&
101 | [[ "$updated_email" == "u*****************m" ]] &&
102 | [[ "$masked_ssn" == "1XXXXXXXXX9" ]] &&
103 | [[ -z "$filtered_age" ]]; then
104 | success "Transformations and filters applied correctly"
105 | else
106 | error "Transformations or filters not applied correctly"
107 | log "Masked email: $masked_email"
108 | log "Formatted phone: $formatted_phone"
109 | log "Filtered insert: $filtered_insert"
110 | log "Updated email: $updated_email"
111 | log "Masked SSN: $masked_ssn"
112 | log "Filtered age: $filtered_age"
113 | return 1
114 | fi
115 | }
116 |
117 | test_pg_flo_transform_filter() {
118 | setup_postgres
119 | create_users
120 | start_pg_flo_replication
121 | start_pg_flo_worker
122 | sleep 2
123 | simulate_changes
124 |
125 | log "Waiting for pg_flo to process changes..."
126 |
127 | stop_pg_flo_gracefully
128 | verify_changes || return 1
129 | }
130 |
131 | log "Starting pg_flo CDC test with transformations and filters..."
132 | if test_pg_flo_transform_filter; then
133 | success "All tests passed! 🎉"
134 | exit 0
135 | else
136 | error "Some tests failed. Please check the logs."
137 | show_pg_flo_logs
138 | exit 1
139 | fi
140 |
--------------------------------------------------------------------------------
/internal/scripts/multi_tenant_rules.yml:
--------------------------------------------------------------------------------
1 | tables:
2 | events:
3 | - type: filter
4 | column: tenant_id
5 | parameters:
6 | operator: "eq"
7 | value: 3
8 | operations: [INSERT, UPDATE, DELETE]
9 |
--------------------------------------------------------------------------------
/internal/scripts/rules.yml:
--------------------------------------------------------------------------------
1 | tables:
2 | users:
3 | - type: transform
4 | column: email
5 | parameters:
6 | type: mask
7 | mask_char: "*"
8 | allow_empty_deletes: true
9 | operations: [INSERT, UPDATE, DELETE]
10 | - type: transform
11 | column: phone
12 | parameters:
13 | type: regex
14 | pattern: "^(\\d{3})(\\d{3})(\\d{4})$"
15 | replace: "($1) $2-$3"
16 | allow_empty_deletes: true
17 | operations: [INSERT, UPDATE, DELETE]
18 | - type: filter
19 | column: age
20 | parameters:
21 | operator: "gte"
22 | value: 18
23 | allow_empty_deletes: true
24 | operations: [INSERT, UPDATE, DELETE]
25 | - type: transform
26 | column: ssn
27 | parameters:
28 | type: mask
29 | mask_char: "X"
30 | allow_empty_deletes: true
31 | operations: [INSERT, UPDATE, DELETE]
32 |
--------------------------------------------------------------------------------
/internal/scripts/webhook_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 |
4 | source "$(dirname "$0")/e2e_common.sh"
5 |
6 | WEBHOOK_URL="https://deep-article-49.webhook.cool"
7 |
8 | create_users() {
9 | log "Creating initial test table..."
10 | run_sql "DROP TABLE IF EXISTS public.users;"
11 | run_sql "CREATE TABLE public.users (id serial PRIMARY KEY, data text);"
12 | success "Initial test table created"
13 | }
14 |
15 | start_pg_flo_replication() {
16 | log "Starting pg_flo replication..."
17 | $pg_flo_BIN stream webhook \
18 | --host "$PG_HOST" \
19 | --port "$PG_PORT" \
20 | --dbname "$PG_DB" \
21 | --user "$PG_USER" \
22 | --password "$PG_PASSWORD" \
23 | --group "group-webhook" \
24 | --tables "users" \
25 | --schema "public" \
26 | --status-dir "/tmp" \
27 | --webhook-url "$WEBHOOK_URL" \
28 | --track-ddl >"$pg_flo_LOG" 2>&1 &
29 | pg_flo_PID=$!
30 | log "pg_flo started with PID: $pg_flo_PID"
31 | success "pg_flo replication started"
32 | }
33 |
34 | simulate_changes() {
35 | log "Simulating changes..."
36 | local insert_count=10
37 | local update_count=5
38 | local delete_count=3
39 |
40 | for i in $(seq 1 $insert_count); do
41 | run_sql "INSERT INTO public.users (data) VALUES ('Data $i');"
42 | done
43 |
44 | for i in $(seq 1 $update_count); do
45 | run_sql "UPDATE public.users SET data = 'Updated data $i' WHERE id = $i;"
46 | done
47 |
48 | for i in $(seq 1 $delete_count); do
49 | run_sql "DELETE FROM public.users WHERE id = $i;"
50 | done
51 |
52 | success "Changes simulated"
53 | }
54 |
55 | perform_ddl_operations() {
56 | log "Performing DDL operations..."
57 | run_sql "ALTER TABLE users ADD COLUMN new_column int;"
58 | run_sql "CREATE INDEX CONCURRENTLY idx_users_data ON users (data);"
59 | run_sql "ALTER TABLE users RENAME COLUMN data TO old_data;"
60 | run_sql "DROP INDEX idx_users_data;"
61 | run_sql "ALTER TABLE users ADD COLUMN new_column_one int;"
62 | run_sql "ALTER TABLE users ALTER COLUMN old_data TYPE varchar(255);"
63 | success "DDL operations performed"
64 | }
65 |
66 | test_pg_flo_webhook() {
67 | setup_docker
68 | setup_postgres
69 | create_users
70 | start_pg_flo_replication
71 | sleep 2
72 | simulate_changes
73 | perform_ddl_operations
74 |
75 | log "Waiting for pg_flo to process changes..."
76 | sleep 10
77 |
78 | stop_pg_flo_gracefully
79 | log "Test completed. Please check https://webhook.site/#!/f5a9abdb-c779-44a2-98ce-0760b4a2fc5c for received events."
80 | }
81 |
82 | # Run the test
83 | log "Starting pg_flo CDC test with webhook sink..."
84 | if test_pg_flo_webhook; then
85 | success "Test completed successfully. Please verify the results on webhook.site"
86 | exit 0
87 | else
88 | error "Test failed. Please check the logs."
89 | show_pg_flo_logs
90 | exit 1
91 | fi
92 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 |
7 | "github.com/pgflo/pg_flo/cmd"
8 | )
9 |
10 | func main() {
11 | if err := cmd.Execute(); err != nil {
12 | fmt.Println(err)
13 | os.Exit(1)
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/pkg/pgflonats/pgflonats.go:
--------------------------------------------------------------------------------
1 | package pgflonats
2 |
3 | import (
4 | "encoding/json"
5 | "errors"
6 | "fmt"
7 | "os"
8 | "time"
9 |
10 | "github.com/jackc/pglogrepl"
11 | "github.com/nats-io/nats.go"
12 | )
13 |
14 | const (
15 | defaultNATSURL = "nats://localhost:4222"
16 | envNATSURL = "PG_FLO_NATS_URL"
17 | )
18 |
19 | // NATSClient represents a client for interacting with NATS
20 | type NATSClient struct {
21 | conn *nats.Conn
22 | js nats.JetStreamContext
23 | stream string
24 | stateBucket string
25 | }
26 |
27 | // State represents the current state of the replication process
28 | type State struct {
29 | LSN pglogrepl.LSN `json:"lsn"`
30 | LastProcessedSeq map[string]uint64
31 | }
32 |
33 | // NewNATSClient creates a new NATS client with the specified configuration, setting up the connection, main stream, and state bucket.
34 | func NewNATSClient(url, stream, group string) (*NATSClient, error) {
35 | if url == "" {
36 | url = os.Getenv(envNATSURL)
37 | if url == "" {
38 | url = defaultNATSURL
39 | }
40 | }
41 |
42 | if stream == "" {
43 | stream = fmt.Sprintf("pgflo_%s_stream", group)
44 | }
45 |
46 | nc, err := nats.Connect(url,
47 | nats.RetryOnFailedConnect(true),
48 | nats.MaxReconnects(-1),
49 | nats.ReconnectWait(time.Second),
50 | nats.DisconnectErrHandler(func(_ *nats.Conn, err error) {
51 | fmt.Printf("Disconnected due to: %s, will attempt reconnects\n", err)
52 | }),
53 | nats.ReconnectHandler(func(nc *nats.Conn) {
54 | fmt.Printf("Reconnected [%s]\n", nc.ConnectedUrl())
55 | }),
56 | nats.ClosedHandler(func(nc *nats.Conn) {
57 | fmt.Printf("Exiting: %v\n", nc.LastError())
58 | }),
59 | )
60 | if err != nil {
61 | return nil, fmt.Errorf("failed to connect to NATS: %w", err)
62 | }
63 |
64 | js, err := nc.JetStream()
65 | if err != nil {
66 | return nil, fmt.Errorf("failed to create JetStream context: %w", err)
67 | }
68 |
69 | // Create the main stream
70 | streamConfig := &nats.StreamConfig{
71 | Name: stream,
72 | Subjects: []string{fmt.Sprintf("pgflo.%s", group)},
73 | Storage: nats.FileStorage,
74 | Retention: nats.LimitsPolicy,
75 | MaxAge: 24 * time.Hour,
76 | }
77 | _, err = js.AddStream(streamConfig)
78 | if err != nil && !errors.Is(err, nats.ErrStreamNameAlreadyInUse) {
79 | return nil, fmt.Errorf("failed to create main stream: %w", err)
80 | }
81 |
82 | // Create the state bucket
83 | stateBucket := fmt.Sprintf("pg_flo_state_%s", group)
84 | _, kvErr := js.KeyValue(stateBucket)
85 | if kvErr != nil {
86 | if errors.Is(kvErr, nats.ErrBucketNotFound) {
87 | _, err = js.CreateKeyValue(&nats.KeyValueConfig{
88 | Bucket: stateBucket,
89 | })
90 | if err != nil {
91 | return nil, fmt.Errorf("failed to create state bucket: %w", err)
92 | }
93 | } else {
94 | return nil, fmt.Errorf("failed to access state bucket: %w", kvErr)
95 | }
96 | }
97 |
98 | return &NATSClient{
99 | conn: nc,
100 | js: js,
101 | stream: stream,
102 | stateBucket: stateBucket,
103 | }, nil
104 | }
105 |
106 | // PublishMessage publishes a message to the specified NATS subject.
107 | func (nc *NATSClient) PublishMessage(subject string, data []byte) error {
108 | _, err := nc.js.Publish(subject, data)
109 | if err != nil {
110 | return fmt.Errorf("failed to publish message: %w", err)
111 | }
112 | return nil
113 | }
114 |
115 | // Close closes the NATS connection.
116 | func (nc *NATSClient) Close() error {
117 | nc.conn.Close()
118 | return nil
119 | }
120 |
121 | // SaveState saves the current replication state to NATS.
122 | func (nc *NATSClient) SaveState(state State) error {
123 | kv, err := nc.js.KeyValue(nc.stateBucket)
124 | if err != nil {
125 | return fmt.Errorf("failed to get KV bucket: %v", err)
126 | }
127 |
128 | data, err := json.Marshal(state)
129 | if err != nil {
130 | return fmt.Errorf("failed to marshal state: %v", err)
131 | }
132 |
133 | _, err = kv.Put("state", data)
134 | if err != nil {
135 | return fmt.Errorf("failed to save state: %v", err)
136 | }
137 |
138 | return nil
139 | }
140 |
141 | // GetState retrieves the last saved state from NATS, initializing a new state if none is found.
142 | func (nc *NATSClient) GetState() (State, error) {
143 | kv, err := nc.js.KeyValue(nc.stateBucket)
144 | if err != nil {
145 | return State{}, fmt.Errorf("failed to get KV bucket: %v", err)
146 | }
147 |
148 | entry, err := kv.Get("state")
149 | if err != nil {
150 | if errors.Is(err, nats.ErrKeyNotFound) {
151 | initialState := State{LastProcessedSeq: make(map[string]uint64)}
152 | // Try to create initial state
153 | if err := nc.SaveState(initialState); err != nil {
154 | // If SaveState fails because the key already exists, fetch it again
155 | if errors.Is(err, nats.ErrKeyExists) || errors.Is(err, nats.ErrUpdateMetaDeleted) {
156 | entry, err = kv.Get("state")
157 | if err != nil {
158 | return State{}, fmt.Errorf("failed to get state after conflict: %v", err)
159 | }
160 | if err := json.Unmarshal(entry.Value(), &initialState); err != nil {
161 | return State{}, fmt.Errorf("failed to unmarshal state after conflict: %v", err)
162 | }
163 | return initialState, nil
164 | }
165 | return State{}, fmt.Errorf("failed to save initial state: %v", err)
166 | }
167 | return initialState, nil
168 | }
169 | return State{}, fmt.Errorf("failed to get state: %v", err)
170 | }
171 |
172 | var state State
173 | if err := json.Unmarshal(entry.Value(), &state); err != nil {
174 | return State{}, fmt.Errorf("failed to unmarshal state: %v", err)
175 | }
176 |
177 | if state.LastProcessedSeq == nil {
178 | state.LastProcessedSeq = make(map[string]uint64)
179 | }
180 | return state, nil
181 | }
182 |
183 | // JetStream returns the JetStream context.
184 | func (nc *NATSClient) JetStream() nats.JetStreamContext {
185 | return nc.js
186 | }
187 |
--------------------------------------------------------------------------------
/pkg/replicator/buffer.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "sync"
5 | "time"
6 | )
7 |
8 | // Buffer is a structure that holds data to be flushed periodically or when certain conditions are met
9 | type Buffer struct {
10 | data []interface{}
11 | maxRows int
12 | flushTimeout time.Duration
13 | lastFlush time.Time
14 | mutex sync.Mutex
15 | }
16 |
17 | // NewBuffer creates a new Buffer instance
18 | func NewBuffer(maxRows int, flushTimeout time.Duration) *Buffer {
19 | return &Buffer{
20 | data: make([]interface{}, 0, maxRows),
21 | maxRows: maxRows,
22 | flushTimeout: flushTimeout,
23 | lastFlush: time.Now(),
24 | }
25 | }
26 |
27 | // Add adds an item to the buffer and returns true if the buffer should be flushed
28 | func (b *Buffer) Add(item interface{}) bool {
29 | b.mutex.Lock()
30 | defer b.mutex.Unlock()
31 |
32 | b.data = append(b.data, item)
33 |
34 | return b.shouldFlush()
35 | }
36 |
37 | // shouldFlush checks if the buffer should be flushed based on row count, or timeout
38 | func (b *Buffer) shouldFlush() bool {
39 | return len(b.data) >= b.maxRows || time.Since(b.lastFlush) >= b.flushTimeout
40 | }
41 |
42 | // Flush flushes the buffer and returns the data
43 | func (b *Buffer) Flush() []interface{} {
44 | b.mutex.Lock()
45 | defer b.mutex.Unlock()
46 |
47 | if len(b.data) == 0 {
48 | return nil
49 | }
50 |
51 | data := b.data
52 | b.data = make([]interface{}, 0, b.maxRows)
53 | b.lastFlush = time.Now()
54 |
55 | return data
56 | }
57 |
--------------------------------------------------------------------------------
/pkg/replicator/config.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import "fmt"
4 |
5 | // Config holds the configuration for the replicator
6 | type Config struct {
7 | Host string
8 | Port uint16
9 | Database string
10 | User string
11 | Password string
12 | Group string
13 | Schema string
14 | Tables []string
15 | TrackDDL bool
16 | }
17 |
18 | // ConnectionString generates and returns a PostgreSQL connection string
19 | func (c Config) ConnectionString() string {
20 | return fmt.Sprintf("postgres://%s:%s@%s:%d/%s", c.User, c.Password, c.Host, c.Port, c.Database)
21 | }
22 |
--------------------------------------------------------------------------------
/pkg/replicator/ddl_replicator.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "fmt"
7 | "strings"
8 | "time"
9 |
10 | "github.com/jackc/pglogrepl"
11 | "github.com/jackc/pgtype"
12 | "github.com/pgflo/pg_flo/pkg/utils"
13 | )
14 |
15 | type DDLReplicator struct {
16 | DDLConn StandardConnection
17 | BaseRepl *BaseReplicator
18 | Config Config
19 | }
20 |
21 | // NewDDLReplicator creates a new DDLReplicator instance
22 | func NewDDLReplicator(config Config, BaseRepl *BaseReplicator, ddlConn StandardConnection) (*DDLReplicator, error) {
23 | return &DDLReplicator{
24 | Config: config,
25 | BaseRepl: BaseRepl,
26 | DDLConn: ddlConn,
27 | }, nil
28 | }
29 |
30 | // SetupDDLTracking sets up the necessary schema, table, and triggers for DDL tracking
31 | func (d *DDLReplicator) SetupDDLTracking(ctx context.Context) error {
32 | tables, err := d.BaseRepl.GetConfiguredTables(ctx)
33 | if err != nil {
34 | return fmt.Errorf("failed to get configured tables: %w", err)
35 | }
36 |
37 | tableConditions := make([]string, len(tables))
38 | for i, table := range tables {
39 | parts := strings.Split(table, ".")
40 | if len(parts) != 2 {
41 | return fmt.Errorf("invalid table name format: %s", table)
42 | }
43 | tableConditions[i] = fmt.Sprintf("(nspname = '%s' AND relname = '%s')",
44 | parts[0], parts[1])
45 | }
46 | tableFilter := strings.Join(tableConditions, " OR ")
47 |
48 | _, err = d.DDLConn.Exec(ctx, fmt.Sprintf(`
49 | CREATE SCHEMA IF NOT EXISTS internal_pg_flo;
50 |
51 | CREATE TABLE IF NOT EXISTS internal_pg_flo.ddl_log (
52 | id SERIAL PRIMARY KEY,
53 | event_type TEXT NOT NULL,
54 | object_type TEXT,
55 | object_identity TEXT,
56 | table_name TEXT,
57 | ddl_command TEXT NOT NULL,
58 | created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
59 | );
60 |
61 | CREATE OR REPLACE FUNCTION internal_pg_flo.ddl_trigger() RETURNS event_trigger AS $$
62 | DECLARE
63 | obj record;
64 | ddl_command text;
65 | table_name text;
66 | should_track boolean;
67 | BEGIN
68 | SELECT current_query() INTO ddl_command;
69 |
70 | IF TG_EVENT = 'ddl_command_end' THEN
71 | FOR obj IN SELECT * FROM pg_event_trigger_ddl_commands()
72 | LOOP
73 | should_track := false;
74 | -- Extract table name if object type is table or index
75 | IF obj.object_type IN ('table', 'table column') THEN
76 | SELECT nspname || '.' || relname, (%s)
77 | INTO table_name, should_track
78 | FROM pg_class c
79 | JOIN pg_namespace n ON c.relnamespace = n.oid
80 | WHERE c.oid = obj.objid;
81 | ELSIF obj.object_type = 'index' THEN
82 | WITH target_table AS (
83 | SELECT t.oid as table_oid, n.nspname, t.relname
84 | FROM pg_index i
85 | JOIN pg_class t ON t.oid = i.indrelid
86 | JOIN pg_namespace n ON t.relnamespace = n.oid
87 | WHERE i.indexrelid = obj.objid
88 | )
89 | SELECT nspname || '.' || relname, (%s)
90 | INTO table_name, should_track
91 | FROM target_table;
92 | END IF;
93 |
94 | IF should_track THEN
95 | INSERT INTO internal_pg_flo.ddl_log (event_type, object_type, object_identity, table_name, ddl_command)
96 | VALUES (TG_EVENT, obj.object_type, obj.object_identity, table_name, ddl_command);
97 | END IF;
98 | END LOOP;
99 | END IF;
100 | END;
101 | $$ LANGUAGE plpgsql;
102 |
103 | DROP EVENT TRIGGER IF EXISTS pg_flo_ddl_trigger;
104 | CREATE EVENT TRIGGER pg_flo_ddl_trigger ON ddl_command_end
105 | EXECUTE FUNCTION internal_pg_flo.ddl_trigger();
106 | `, tableFilter, tableFilter))
107 |
108 | if err != nil {
109 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to setup DDL tracking")
110 | return err
111 | }
112 | return nil
113 | }
114 |
115 | // StartDDLReplication starts the DDL replication process
116 | func (d *DDLReplicator) StartDDLReplication(ctx context.Context) {
117 | ticker := time.NewTicker(1 * time.Second)
118 | defer ticker.Stop()
119 |
120 | for {
121 | select {
122 | case <-ctx.Done():
123 | d.BaseRepl.Logger.Info().Msg("DDL replication stopping...")
124 | return
125 | case <-ticker.C:
126 | if err := d.ProcessDDLEvents(ctx); err != nil {
127 | if ctx.Err() != nil {
128 | // Context canceled, exit gracefully
129 | return
130 | }
131 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to process DDL events")
132 | }
133 | }
134 | }
135 | }
136 |
137 | // ProcessDDLEvents processes DDL events from the log table
138 | func (d *DDLReplicator) ProcessDDLEvents(ctx context.Context) error {
139 | rows, err := d.DDLConn.Query(ctx, `
140 | SELECT id, event_type, object_type, object_identity, table_name, ddl_command, created_at
141 | FROM internal_pg_flo.ddl_log
142 | ORDER BY created_at ASC
143 | `)
144 | if err != nil {
145 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to query DDL log")
146 | return nil
147 | }
148 | defer rows.Close()
149 |
150 | var processedIDs []int
151 | seenCommands := make(map[string]bool)
152 |
153 | for rows.Next() {
154 | var id int
155 | var eventType, objectType, objectIdentity, ddlCommand string
156 | var tableName sql.NullString
157 | var createdAt time.Time
158 | if err := rows.Scan(&id, &eventType, &objectType, &objectIdentity, &tableName, &ddlCommand, &createdAt); err != nil {
159 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to scan DDL log row")
160 | return nil
161 | }
162 |
163 | if d.shouldSkipDDLEvent(ddlCommand) {
164 | processedIDs = append(processedIDs, id)
165 | continue
166 | }
167 |
168 | if seenCommands[ddlCommand] {
169 | processedIDs = append(processedIDs, id)
170 | continue
171 | }
172 | seenCommands[ddlCommand] = true
173 |
174 | var schema, table string
175 | if tableName.Valid {
176 | schema, table = splitSchemaAndTable(tableName.String)
177 | } else {
178 | schema, table = "public", ""
179 | }
180 |
181 | cdcMessage := utils.CDCMessage{
182 | Type: utils.OperationDDL,
183 | Schema: schema,
184 | Table: table,
185 | EmittedAt: time.Now(),
186 | Columns: []*pglogrepl.RelationMessageColumn{
187 | {Name: "event_type", DataType: pgtype.TextOID},
188 | {Name: "object_type", DataType: pgtype.TextOID},
189 | {Name: "object_identity", DataType: pgtype.TextOID},
190 | {Name: "ddl_command", DataType: pgtype.TextOID},
191 | {Name: "created_at", DataType: pgtype.TimestamptzOID},
192 | },
193 | NewTuple: &pglogrepl.TupleData{
194 | Columns: []*pglogrepl.TupleDataColumn{
195 | {Data: []byte(eventType)},
196 | {Data: []byte(objectType)},
197 | {Data: []byte(objectIdentity)},
198 | {Data: []byte(ddlCommand)},
199 | {Data: []byte(createdAt.Format(time.RFC3339))},
200 | },
201 | },
202 | }
203 |
204 | if err := d.BaseRepl.PublishToNATS(cdcMessage); err != nil {
205 | d.BaseRepl.Logger.Error().Err(err).Msg("Error during publishing DDL event to NATS")
206 | return nil
207 | }
208 |
209 | processedIDs = append(processedIDs, id)
210 | }
211 |
212 | if err := rows.Err(); err != nil {
213 | d.BaseRepl.Logger.Error().Err(err).Msg("Error during DDL log iteration")
214 | return nil
215 | }
216 |
217 | if len(processedIDs) > 0 {
218 | _, err = d.DDLConn.Exec(ctx, "DELETE FROM internal_pg_flo.ddl_log WHERE id = ANY($1)", processedIDs)
219 | if err != nil {
220 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to clear processed DDL events")
221 | return nil
222 | }
223 | }
224 |
225 | return nil
226 | }
227 |
228 | // splitSchemaAndTable splits a full table name into schema and table parts
229 | func splitSchemaAndTable(fullName string) (string, string) {
230 | parts := strings.SplitN(fullName, ".", 2)
231 | if len(parts) == 2 {
232 | return parts[0], parts[1]
233 | }
234 | return "public", fullName
235 | }
236 |
237 | // Close closes the DDL connection
238 | func (d *DDLReplicator) Close(ctx context.Context) error {
239 | if d.DDLConn != nil {
240 | return d.DDLConn.Close(ctx)
241 | }
242 | return nil
243 | }
244 |
245 | // Shutdown performs a graceful shutdown of the DDL replicator
246 | func (d *DDLReplicator) Shutdown(ctx context.Context) error {
247 | d.BaseRepl.Logger.Info().Msg("Shutting down DDL replicator")
248 |
249 | // Process remaining events with the provided context
250 | if err := d.ProcessDDLEvents(ctx); err != nil {
251 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to process final DDL events")
252 | // Continue with shutdown even if processing fails
253 | }
254 |
255 | // Wait for any pending events with respect to context deadline
256 | ticker := time.NewTicker(100 * time.Millisecond)
257 | defer ticker.Stop()
258 |
259 | for {
260 | select {
261 | case <-ctx.Done():
262 | d.BaseRepl.Logger.Warn().Msg("Context deadline exceeded while waiting for DDL events")
263 | return ctx.Err()
264 | case <-ticker.C:
265 | hasEvents, err := d.HasPendingDDLEvents(ctx)
266 | if err != nil {
267 | d.BaseRepl.Logger.Error().Err(err).Msg("Failed to check pending DDL events")
268 | return err
269 | }
270 | if !hasEvents {
271 | d.BaseRepl.Logger.Info().Msg("All DDL events processed")
272 | return d.Close(ctx)
273 | }
274 | }
275 | }
276 | }
277 |
278 | // HasPendingDDLEvents checks if there are pending DDL events in the log
279 | func (d *DDLReplicator) HasPendingDDLEvents(ctx context.Context) (bool, error) {
280 | var count int
281 | err := d.DDLConn.QueryRow(ctx, `
282 | SELECT COUNT(*) FROM internal_pg_flo.ddl_log
283 | `).Scan(&count)
284 | if err != nil {
285 | return false, err
286 | }
287 | return count > 0, nil
288 | }
289 |
290 | // shouldSkipDDLEvent determines if a DDL event should be skipped from processing
291 | func (d *DDLReplicator) shouldSkipDDLEvent(ddlCommand string) bool {
292 | if strings.Contains(ddlCommand, "internal_pg_flo.") {
293 | return true
294 | }
295 |
296 | publicationName := GeneratePublicationName(d.Config.Group)
297 | if strings.Contains(ddlCommand, fmt.Sprintf("CREATE PUBLICATION %q", publicationName)) ||
298 | strings.Contains(ddlCommand, fmt.Sprintf("DROP PUBLICATION %q", publicationName)) ||
299 | strings.Contains(ddlCommand, "CREATE PUBLICATION pg_flo_") ||
300 | strings.Contains(ddlCommand, "DROP PUBLICATION pg_flo_") {
301 | return true
302 | }
303 |
304 | return false
305 | }
306 |
--------------------------------------------------------------------------------
/pkg/replicator/errors.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | )
7 |
8 | var (
9 | ErrReplicatorAlreadyStarted = errors.New("replicator already started")
10 | ErrReplicatorNotStarted = errors.New("replicator not started")
11 | ErrReplicatorAlreadyStopped = errors.New("replicator already stopped")
12 | )
13 |
14 | // ReplicationError represents an error that occurred during replication.
15 | type ReplicationError struct {
16 | Op string // The operation that caused the error
17 | Err error // The underlying error
18 | }
19 |
20 | // Error returns a formatted error message.
21 | func (e *ReplicationError) Error() string {
22 | return fmt.Sprintf("replication error during %s: %v", e.Op, e.Err)
23 | }
24 |
--------------------------------------------------------------------------------
/pkg/replicator/factory.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | )
7 |
8 | // ReplicatorFactory defines the interface for creating replicators
9 | type Factory interface {
10 | CreateReplicator(config Config, natsClient NATSClient) (Replicator, error)
11 | }
12 |
13 | // BaseFactory provides common functionality for factories
14 | type BaseFactory struct{}
15 |
16 | // CreateConnections creates replication and standard connections
17 | func (f *BaseFactory) CreateConnections(config Config) (ReplicationConnection, StandardConnection, error) {
18 | replicationConn := NewReplicationConnection(config)
19 | if err := replicationConn.Connect(context.Background()); err != nil {
20 | return nil, nil, fmt.Errorf("failed to connect for replication: %v", err)
21 | }
22 |
23 | standardConn, err := NewStandardConnection(config)
24 | if err != nil {
25 | return nil, nil, fmt.Errorf("failed to create standard connection: %v", err)
26 | }
27 |
28 | return replicationConn, standardConn, nil
29 | }
30 |
31 | // StreamReplicatorFactory creates `StreamReplicator` instances
32 | type StreamReplicatorFactory struct {
33 | BaseFactory
34 | }
35 |
36 | // CreateReplicator creates a new `StreamReplicator`
37 | func (f *StreamReplicatorFactory) CreateReplicator(config Config, natsClient NATSClient) (Replicator, error) {
38 | replicationConn, standardConn, err := f.CreateConnections(config)
39 | if err != nil {
40 | return nil, err
41 | }
42 |
43 | baseReplicator := NewBaseReplicator(config, replicationConn, standardConn, natsClient)
44 | return &StreamReplicator{BaseReplicator: baseReplicator}, nil
45 | }
46 |
47 | // CopyAndStreamReplicatorFactory creates `CopyAndStreamReplicator` instances
48 | type CopyAndStreamReplicatorFactory struct {
49 | BaseFactory
50 | MaxCopyWorkersPerTable int
51 | CopyOnly bool
52 | }
53 |
54 | // CreateReplicator creates a new `CopyAndStreamReplicator`
55 | func (f *CopyAndStreamReplicatorFactory) CreateReplicator(config Config, natsClient NATSClient) (Replicator, error) {
56 | replicationConn, standardConn, err := f.CreateConnections(config)
57 | if err != nil {
58 | return nil, err
59 | }
60 |
61 | baseReplicator := NewBaseReplicator(config, replicationConn, standardConn, natsClient)
62 |
63 | if f.MaxCopyWorkersPerTable <= 0 {
64 | f.MaxCopyWorkersPerTable = 4
65 | }
66 |
67 | return NewCopyAndStreamReplicator(
68 | baseReplicator,
69 | f.MaxCopyWorkersPerTable,
70 | f.CopyOnly,
71 | ), nil
72 | }
73 |
--------------------------------------------------------------------------------
/pkg/replicator/interfaces.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/jackc/pglogrepl"
7 | "github.com/jackc/pgx/v5"
8 | "github.com/jackc/pgx/v5/pgconn"
9 | "github.com/jackc/pgx/v5/pgproto3"
10 | "github.com/nats-io/nats.go"
11 | "github.com/pgflo/pg_flo/pkg/pgflonats"
12 | )
13 |
14 | type Replicator interface {
15 | Start(ctx context.Context) error
16 | Stop(ctx context.Context) error
17 | }
18 |
19 | type ReplicationConnection interface {
20 | Connect(ctx context.Context) error
21 | Close(ctx context.Context) error
22 | CreateReplicationSlot(ctx context.Context, slotName string) (pglogrepl.CreateReplicationSlotResult, error)
23 | StartReplication(ctx context.Context, slotName string, startLSN pglogrepl.LSN, options pglogrepl.StartReplicationOptions) error
24 | ReceiveMessage(ctx context.Context) (pgproto3.BackendMessage, error)
25 | SendStandbyStatusUpdate(ctx context.Context, status pglogrepl.StandbyStatusUpdate) error
26 | }
27 |
28 | type StandardConnection interface {
29 | Connect(ctx context.Context) error
30 | Close(ctx context.Context) error
31 | Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error)
32 | Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error)
33 | QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row
34 | BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error)
35 | Acquire(ctx context.Context) (PgxPoolConn, error)
36 | }
37 |
38 | type PgxPoolConn interface {
39 | BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error)
40 | Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error)
41 | Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error)
42 | QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row
43 | Release()
44 | }
45 |
46 | type NATSClient interface {
47 | PublishMessage(subject string, data []byte) error
48 | Close() error
49 | SaveState(state pgflonats.State) error
50 | GetState() (pgflonats.State, error)
51 | JetStream() nats.JetStreamContext
52 | }
53 |
--------------------------------------------------------------------------------
/pkg/replicator/json_encoder.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/pgflo/pg_flo/pkg/utils"
8 | )
9 |
10 | // InitializeOIDMap initializes the OID to type name map with custom types from the database
11 | func InitializeOIDMap(ctx context.Context, conn StandardConnection) error {
12 | rows, err := conn.Query(ctx, `
13 | SELECT oid, typname
14 | FROM pg_type
15 | WHERE typtype = 'b' AND oid > 10000 -- Only base types and custom types
16 | `)
17 | if err != nil {
18 | return fmt.Errorf("failed to query pg_type: %w", err)
19 | }
20 | defer rows.Close()
21 |
22 | for rows.Next() {
23 | var oid uint32
24 | var typeName string
25 | if err := rows.Scan(&oid, &typeName); err != nil {
26 | return fmt.Errorf("failed to scan row: %w", err)
27 | }
28 | utils.OidToTypeName[oid] = typeName
29 | }
30 |
31 | if err := rows.Err(); err != nil {
32 | return fmt.Errorf("error iterating over rows: %w", err)
33 | }
34 |
35 | return nil
36 | }
37 |
--------------------------------------------------------------------------------
/pkg/replicator/replication_connection.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/jackc/pglogrepl"
8 | "github.com/jackc/pgx/v5"
9 | "github.com/jackc/pgx/v5/pgconn"
10 | "github.com/jackc/pgx/v5/pgproto3"
11 | )
12 |
13 | // PostgresReplicationConnection implements the ReplicationConnection interface
14 | // for PostgreSQL databases.
15 | type PostgresReplicationConnection struct {
16 | Config Config
17 | Conn *pgconn.PgConn
18 | }
19 |
20 | // NewReplicationConnection creates a new PostgresReplicationConnection instance.
21 | func NewReplicationConnection(config Config) ReplicationConnection {
22 | return &PostgresReplicationConnection{
23 | Config: config,
24 | }
25 | }
26 |
27 | // Connect establishes a connection to the PostgreSQL database for replication.
28 | func (rc *PostgresReplicationConnection) Connect(ctx context.Context) error {
29 | config, err := pgx.ParseConfig(fmt.Sprintf("host=%s port=%d dbname=%s user=%s password=%s",
30 | rc.Config.Host,
31 | rc.Config.Port,
32 | rc.Config.Database,
33 | rc.Config.User,
34 | rc.Config.Password))
35 | if err != nil {
36 | return fmt.Errorf("failed to parse connection config: %v", err)
37 | }
38 |
39 | config.RuntimeParams["replication"] = "database"
40 |
41 | conn, err := pgx.ConnectConfig(ctx, config)
42 | if err != nil {
43 | return fmt.Errorf("failed to connect to PostgreSQL: %v", err)
44 | }
45 |
46 | rc.Conn = conn.PgConn()
47 | return nil
48 | }
49 |
50 | // Close terminates the connection to the PostgreSQL database.
51 | func (rc *PostgresReplicationConnection) Close(ctx context.Context) error {
52 | return rc.Conn.Close(ctx)
53 | }
54 |
55 | // CreateReplicationSlot creates a new replication slot in the PostgreSQL database.
56 | func (rc *PostgresReplicationConnection) CreateReplicationSlot(ctx context.Context, slotName string) (pglogrepl.CreateReplicationSlotResult, error) {
57 | return pglogrepl.CreateReplicationSlot(ctx, rc.Conn, slotName, "pgoutput", pglogrepl.CreateReplicationSlotOptions{Temporary: false})
58 | }
59 |
60 | // StartReplication initiates the replication process from the specified LSN.
61 | func (rc *PostgresReplicationConnection) StartReplication(ctx context.Context, slotName string, startLSN pglogrepl.LSN, options pglogrepl.StartReplicationOptions) error {
62 | return pglogrepl.StartReplication(ctx, rc.Conn, slotName, startLSN, options)
63 | }
64 |
65 | // ReceiveMessage receives a message from the PostgreSQL replication stream.
66 | func (rc *PostgresReplicationConnection) ReceiveMessage(ctx context.Context) (pgproto3.BackendMessage, error) {
67 | return rc.Conn.ReceiveMessage(ctx)
68 | }
69 |
70 | // SendStandbyStatusUpdate sends a status update to the PostgreSQL server during replication.
71 | func (rc *PostgresReplicationConnection) SendStandbyStatusUpdate(ctx context.Context, status pglogrepl.StandbyStatusUpdate) error {
72 | return pglogrepl.SendStandbyStatusUpdate(ctx, rc.Conn, status)
73 | }
74 |
--------------------------------------------------------------------------------
/pkg/replicator/standard_connection.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/jackc/pgx/v5"
8 | "github.com/jackc/pgx/v5/pgconn"
9 | "github.com/jackc/pgx/v5/pgxpool"
10 | )
11 |
12 | // StandardConnectionImpl implements the StandardConnection interface for PostgreSQL databases.
13 | type StandardConnectionImpl struct {
14 | pool *pgxpool.Pool
15 | }
16 |
17 | // NewStandardConnection creates a new StandardConnectionImpl instance and establishes a connection.
18 | func NewStandardConnection(config Config) (*StandardConnectionImpl, error) {
19 | connString := fmt.Sprintf("host=%s port=%d dbname=%s user=%s password=%s",
20 | config.Host,
21 | config.Port,
22 | config.Database,
23 | config.User,
24 | config.Password)
25 |
26 | poolConfig, err := pgxpool.ParseConfig(connString)
27 | if err != nil {
28 | return nil, fmt.Errorf("unable to parse connection string: %v", err)
29 | }
30 |
31 | poolConfig.MaxConns = 20
32 |
33 | pool, err := pgxpool.NewWithConfig(context.Background(), poolConfig)
34 | if err != nil {
35 | return nil, fmt.Errorf("unable to create connection pool: %v", err)
36 | }
37 | return &StandardConnectionImpl{pool: pool}, nil
38 | }
39 |
40 | // Connect establishes a connection to the PostgreSQL database.
41 | func (s *StandardConnectionImpl) Connect(ctx context.Context) error {
42 | return s.pool.Ping(ctx)
43 | }
44 |
45 | // Close terminates the connection to the PostgreSQL database.
46 | func (s *StandardConnectionImpl) Close(_ context.Context) error {
47 | s.pool.Close()
48 | return nil
49 | }
50 |
51 | // Exec executes a SQL query without returning any rows.
52 | func (s *StandardConnectionImpl) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
53 | return s.pool.Exec(ctx, sql, arguments...)
54 | }
55 |
56 | // BeginTx starts a new transaction with the specified options.
57 | func (s *StandardConnectionImpl) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) {
58 | return s.pool.BeginTx(ctx, txOptions)
59 | }
60 |
61 | // QueryRow executes a query that is expected to return at most one row.
62 | func (s *StandardConnectionImpl) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
63 | return s.pool.QueryRow(ctx, sql, args...)
64 | }
65 |
66 | // Query executes a query that returns rows, typically a SELECT.
67 | func (s *StandardConnectionImpl) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
68 | return s.pool.Query(ctx, sql, args...)
69 | }
70 |
71 | // Acquire acquires a connection from the pool.
72 | func (s *StandardConnectionImpl) Acquire(ctx context.Context) (PgxPoolConn, error) {
73 | conn, err := s.pool.Acquire(ctx)
74 | if err != nil {
75 | return nil, err
76 | }
77 | return &PgxPoolConnWrapper{Conn: conn}, nil
78 | }
79 |
80 | type PgxPoolConnWrapper struct {
81 | *pgxpool.Conn
82 | }
83 |
--------------------------------------------------------------------------------
/pkg/replicator/stream_replicator.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/jackc/pglogrepl"
7 | )
8 |
9 | type StreamReplicator struct {
10 | *BaseReplicator
11 | }
12 |
13 | func NewStreamReplicator(base *BaseReplicator) *StreamReplicator {
14 | return &StreamReplicator{
15 | BaseReplicator: base,
16 | }
17 | }
18 |
19 | func (r *StreamReplicator) Start(ctx context.Context) error {
20 | if err := r.BaseReplicator.Start(ctx); err != nil {
21 | return err
22 | }
23 |
24 | startLSN, err := r.GetLastState()
25 | if err != nil {
26 | r.Logger.Warn().Err(err).Msg("Failed to get last LSN, starting from 0")
27 | startLSN = pglogrepl.LSN(0)
28 | }
29 |
30 | r.Logger.Info().Str("startLSN", startLSN.String()).Msg("Starting replication")
31 |
32 | errChan := make(chan error, 1)
33 | go func() {
34 | errChan <- r.StartReplicationFromLSN(ctx, startLSN, r.stopChan)
35 | }()
36 |
37 | select {
38 | case <-ctx.Done():
39 | return ctx.Err()
40 | case err := <-errChan:
41 | return err
42 | }
43 | }
44 |
45 | func (r *StreamReplicator) Stop(ctx context.Context) error {
46 | return r.BaseReplicator.Stop(ctx)
47 | }
48 |
--------------------------------------------------------------------------------
/pkg/replicator/table_handling.go:
--------------------------------------------------------------------------------
1 | package replicator
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/pgflo/pg_flo/pkg/utils"
8 | )
9 |
10 | // AddPrimaryKeyInfo adds replication key information to the CDCMessage
11 | func (r *BaseReplicator) AddPrimaryKeyInfo(message *utils.CDCMessage, table string) {
12 | if key, ok := r.TableReplicationKeys[table]; ok {
13 | message.ReplicationKey = key
14 | } else {
15 | r.Logger.Error().
16 | Str("table", table).
17 | Msg("No replication key information found for table. This should not happen as validation is done during initialization")
18 | }
19 | }
20 |
21 | // InitializePrimaryKeyInfo initializes primary key information for all tables
22 | func (r *BaseReplicator) InitializePrimaryKeyInfo() error {
23 | query := `
24 | WITH table_info AS (
25 | SELECT
26 | t.tablename,
27 | c.relreplident,
28 | (
29 | SELECT array_agg(a.attname ORDER BY array_position(i.indkey, a.attnum))
30 | FROM pg_index i
31 | JOIN pg_attribute a ON a.attrelid = c.oid AND a.attnum = ANY(i.indkey)
32 | WHERE i.indrelid = c.oid AND i.indisprimary
33 | ) as pk_columns,
34 | (
35 | SELECT array_agg(a.attname ORDER BY array_position(i.indkey, a.attnum))
36 | FROM pg_index i
37 | JOIN pg_attribute a ON a.attrelid = c.oid AND a.attnum = ANY(i.indkey)
38 | WHERE i.indrelid = c.oid AND i.indisunique AND NOT i.indisprimary
39 | LIMIT 1
40 | ) as unique_columns
41 | FROM pg_tables t
42 | JOIN pg_class c ON t.tablename = c.relname
43 | JOIN pg_namespace n ON c.relnamespace = n.oid
44 | WHERE t.schemaname = $1
45 | )
46 | SELECT
47 | tablename,
48 | relreplident::text,
49 | COALESCE(pk_columns, ARRAY[]::text[]) as pk_columns,
50 | COALESCE(unique_columns, ARRAY[]::text[]) as unique_columns
51 | FROM table_info;
52 | `
53 |
54 | rows, err := r.StandardConn.Query(context.Background(), query, r.Config.Schema)
55 | if err != nil {
56 | return fmt.Errorf("failed to query replication key info: %v", err)
57 | }
58 | defer rows.Close()
59 |
60 | r.TableReplicationKeys = make(map[string]utils.ReplicationKey)
61 |
62 | for rows.Next() {
63 | var (
64 | tableName string
65 | replicaIdentity string
66 | pkColumns []string
67 | uniqueColumns []string
68 | )
69 |
70 | if err := rows.Scan(&tableName, &replicaIdentity, &pkColumns, &uniqueColumns); err != nil {
71 | return fmt.Errorf("failed to scan row: %v", err)
72 | }
73 |
74 | key := utils.ReplicationKey{}
75 |
76 | switch {
77 | case len(pkColumns) > 0:
78 | key = utils.ReplicationKey{
79 | Type: utils.ReplicationKeyPK,
80 | Columns: pkColumns,
81 | }
82 | case len(uniqueColumns) > 0:
83 | key = utils.ReplicationKey{
84 | Type: utils.ReplicationKeyUnique,
85 | Columns: uniqueColumns,
86 | }
87 | case replicaIdentity == "f":
88 | key = utils.ReplicationKey{
89 | Type: utils.ReplicationKeyFull,
90 | Columns: nil,
91 | }
92 | }
93 |
94 | if err := r.validateTableReplicationKey(tableName, key); err != nil {
95 | r.Logger.Warn().
96 | Str("table", tableName).
97 | Str("replica_identity", replicaIdentity).
98 | Str("key_type", string(key.Type)).
99 | Strs("columns", key.Columns).
100 | Err(err).
101 | Msg("Invalid replication key configuration")
102 | continue
103 | }
104 |
105 | r.TableReplicationKeys[tableName] = key
106 |
107 | r.Logger.Debug().
108 | Str("table", tableName).
109 | Str("key_type", string(key.Type)).
110 | Strs("columns", key.Columns).
111 | Str("replica_identity", replicaIdentity).
112 | Msg("Initialized replication key configuration")
113 | }
114 |
115 | return rows.Err()
116 | }
117 |
118 | // GetConfiguredTables returns all tables based on configuration
119 | // If no specific tables are configured, returns all tables from the configured schema
120 | func (r *BaseReplicator) GetConfiguredTables(ctx context.Context) ([]string, error) {
121 | if len(r.Config.Tables) > 0 {
122 | fullyQualifiedTables := make([]string, len(r.Config.Tables))
123 | for i, table := range r.Config.Tables {
124 | fullyQualifiedTables[i] = fmt.Sprintf("%s.%s", r.Config.Schema, table)
125 | }
126 | return fullyQualifiedTables, nil
127 | }
128 |
129 | rows, err := r.StandardConn.Query(ctx, `
130 | SELECT schemaname || '.' || tablename
131 | FROM pg_tables
132 | WHERE schemaname = $1
133 | AND schemaname NOT IN ('pg_catalog', 'information_schema', 'internal_pg_flo')
134 | `, r.Config.Schema)
135 | if err != nil {
136 | return nil, fmt.Errorf("failed to query tables: %v", err)
137 | }
138 | defer rows.Close()
139 |
140 | var tables []string
141 | for rows.Next() {
142 | var tableName string
143 | if err := rows.Scan(&tableName); err != nil {
144 | return nil, fmt.Errorf("failed to scan table name: %v", err)
145 | }
146 | tables = append(tables, tableName)
147 | }
148 |
149 | return tables, nil
150 | }
151 |
152 | func (r *BaseReplicator) validateTableReplicationKey(tableName string, key utils.ReplicationKey) error {
153 | if !key.IsValid() {
154 | return fmt.Errorf(
155 | "table %q requires one of the following:\n"+
156 | "\t1. A PRIMARY KEY constraint\n"+
157 | "\t2. A UNIQUE constraint\n"+
158 | "\t3. REPLICA IDENTITY FULL (ALTER TABLE %s REPLICA IDENTITY FULL)",
159 | tableName, tableName)
160 | }
161 | return nil
162 | }
163 |
--------------------------------------------------------------------------------
/pkg/replicator/tests/buffer_test.go:
--------------------------------------------------------------------------------
1 | package replicator_test
2 |
3 | import (
4 | "testing"
5 | "time"
6 |
7 | "github.com/pgflo/pg_flo/pkg/replicator"
8 | "github.com/stretchr/testify/assert"
9 | "github.com/stretchr/testify/mock"
10 | )
11 |
12 | func TestBuffer(t *testing.T) {
13 | t.Run("NewBuffer", func(t *testing.T) {
14 | buffer := replicator.NewBuffer(10, 5*time.Second)
15 | assert.NotNil(t, buffer)
16 | })
17 |
18 | t.Run("Add and Flush", func(t *testing.T) {
19 | buffer := replicator.NewBuffer(10, 5*time.Second)
20 |
21 | // Add items
22 | for i := 0; i < 5; i++ {
23 | shouldFlush := buffer.Add([]byte("test"))
24 | assert.False(t, shouldFlush)
25 | }
26 |
27 | // Flush
28 | data := buffer.Flush()
29 | assert.Len(t, data, 5)
30 | assert.Equal(t, []byte("test"), data[0])
31 |
32 | // Buffer should be empty after flush
33 | emptyData := buffer.Flush()
34 | assert.Nil(t, emptyData)
35 | })
36 |
37 | t.Run("Flush on MaxRows", func(t *testing.T) {
38 | buffer := replicator.NewBuffer(3, 5*time.Second)
39 |
40 | buffer.Add([]byte("test1"))
41 | buffer.Add([]byte("test2"))
42 | shouldFlush := buffer.Add([]byte("test3"))
43 |
44 | assert.True(t, shouldFlush)
45 |
46 | data := buffer.Flush()
47 | assert.Len(t, data, 3)
48 | })
49 |
50 | t.Run("Flush on Timeout", func(t *testing.T) {
51 | buffer := replicator.NewBuffer(10, 100*time.Millisecond)
52 |
53 | buffer.Add([]byte("test"))
54 | time.Sleep(150 * time.Millisecond)
55 |
56 | shouldFlush := buffer.Add([]byte("test"))
57 | assert.True(t, shouldFlush)
58 |
59 | data := buffer.Flush()
60 | assert.Len(t, data, 2)
61 | })
62 |
63 | t.Run("Concurrent Access", func(t *testing.T) {
64 | buffer := replicator.NewBuffer(100, 5*time.Second)
65 |
66 | done := make(chan bool)
67 | for i := 0; i < 10; i++ {
68 | go func() {
69 | for j := 0; j < 10; j++ {
70 | buffer.Add([]byte("test"))
71 | }
72 | done <- true
73 | }()
74 | }
75 |
76 | for i := 0; i < 10; i++ {
77 | <-done
78 | }
79 |
80 | data := buffer.Flush()
81 | assert.Len(t, data, 100)
82 | })
83 |
84 | t.Run("BufferFlush", func(t *testing.T) {
85 | mockSink := new(MockSink)
86 | buffer := replicator.NewBuffer(5, 1*time.Second)
87 |
88 | mockSink.On("WriteBatch", mock.Anything).Return(nil)
89 |
90 | for i := 0; i < 5; i++ {
91 | shouldFlush := buffer.Add(i)
92 | if shouldFlush {
93 | data := buffer.Flush()
94 | err := mockSink.WriteBatch(data)
95 | assert.NoError(t, err)
96 | }
97 | }
98 |
99 | mockSink.AssertNumberOfCalls(t, "WriteBatch", 1)
100 | mockSink.AssertExpectations(t)
101 | })
102 | }
103 |
--------------------------------------------------------------------------------
/pkg/replicator/tests/ddl_replicator_test.go:
--------------------------------------------------------------------------------
1 | package replicator_test
2 |
3 | import (
4 | "context"
5 | "strings"
6 | "testing"
7 | "time"
8 |
9 | "github.com/jackc/pgx/v5/pgconn"
10 | "github.com/pgflo/pg_flo/pkg/replicator"
11 | "github.com/pgflo/pg_flo/pkg/utils"
12 | "github.com/rs/zerolog"
13 | "github.com/stretchr/testify/assert"
14 | "github.com/stretchr/testify/mock"
15 | )
16 |
17 | func TestDDLReplicator(t *testing.T) {
18 | t.Run("NewDDLReplicator", func(t *testing.T) {
19 | mockBaseReplicator := &replicator.BaseReplicator{
20 | Logger: utils.NewZerologLogger(zerolog.New(nil)),
21 | }
22 | mockStandardConn := &MockStandardConnection{}
23 | config := replicator.Config{}
24 |
25 | ddlReplicator, err := replicator.NewDDLReplicator(config, mockBaseReplicator, mockStandardConn)
26 |
27 | assert.NoError(t, err)
28 | assert.NotNil(t, ddlReplicator)
29 | assert.Equal(t, config, ddlReplicator.Config)
30 | assert.Equal(t, mockStandardConn, ddlReplicator.DDLConn)
31 | })
32 |
33 | t.Run("SetupDDLTracking", func(t *testing.T) {
34 | mockStandardConn := &MockStandardConnection{}
35 | mockBaseRepl := &replicator.BaseReplicator{
36 | Logger: utils.NewZerologLogger(zerolog.New(zerolog.NewConsoleWriter()).With().Timestamp().Logger()),
37 | StandardConn: mockStandardConn,
38 | Config: replicator.Config{
39 | Schema: "public",
40 | Tables: []string{"test_table"},
41 | },
42 | }
43 |
44 | ddlReplicator := &replicator.DDLReplicator{
45 | DDLConn: mockStandardConn,
46 | BaseRepl: mockBaseRepl,
47 | }
48 |
49 | ctx := context.Background()
50 |
51 | mockStandardConn.On("Exec", ctx, mock.AnythingOfType("string"), mock.Anything).Return(pgconn.CommandTag{}, nil).
52 | Run(func(args mock.Arguments) {
53 | sql := args.Get(1).(string)
54 | assert.Contains(t, sql, "CREATE SCHEMA IF NOT EXISTS internal_pg_flo")
55 | assert.Contains(t, sql, "CREATE TABLE IF NOT EXISTS internal_pg_flo.ddl_log")
56 | assert.Contains(t, sql, "CREATE OR REPLACE FUNCTION internal_pg_flo.ddl_trigger()")
57 | assert.Contains(t, sql, "CREATE EVENT TRIGGER pg_flo_ddl_trigger")
58 | })
59 |
60 | err := ddlReplicator.SetupDDLTracking(ctx)
61 |
62 | assert.NoError(t, err)
63 | mockStandardConn.AssertExpectations(t)
64 | })
65 |
66 | t.Run("StartDDLReplication", func(t *testing.T) {
67 | mockStandardConn := &MockStandardConnection{}
68 | mockBaseReplicator := &replicator.BaseReplicator{
69 | Logger: utils.NewZerologLogger(zerolog.New(zerolog.NewConsoleWriter()).With().Timestamp().Logger()),
70 | }
71 | ddlReplicator := &replicator.DDLReplicator{
72 | DDLConn: mockStandardConn,
73 | BaseRepl: mockBaseReplicator,
74 | }
75 |
76 | ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
77 | defer cancel()
78 |
79 | mockRows := &MockRows{}
80 | mockStandardConn.On("Query", mock.Anything, mock.MatchedBy(func(sql string) bool {
81 | expectedParts := []string{
82 | "SELECT id, event_type, object_type, object_identity, table_name, ddl_command, created_at",
83 | "FROM internal_pg_flo.ddl_log",
84 | "ORDER BY created_at ASC",
85 | }
86 | for _, part := range expectedParts {
87 | if !strings.Contains(sql, part) {
88 | return false
89 | }
90 | }
91 | return true
92 | }), mock.Anything).Return(mockRows, nil).Maybe()
93 |
94 | mockRows.On("Next").Return(false).Maybe()
95 | mockRows.On("Err").Return(nil).Maybe()
96 | mockRows.On("Close").Return().Maybe()
97 |
98 | mockStandardConn.On("QueryRow", mock.Anything, mock.MatchedBy(func(sql string) bool {
99 | return strings.Contains(sql, "SELECT COUNT(*) FROM internal_pg_flo.ddl_log")
100 | }), mock.Anything).Return(&MockRow{
101 | scanFunc: func(dest ...interface{}) error {
102 | *dest[0].(*int) = 0
103 | return nil
104 | },
105 | }).Maybe()
106 |
107 | go ddlReplicator.StartDDLReplication(ctx)
108 |
109 | time.Sleep(100 * time.Millisecond)
110 |
111 | cancel()
112 |
113 | time.Sleep(100 * time.Millisecond)
114 |
115 | mockStandardConn.AssertExpectations(t)
116 | mockRows.AssertExpectations(t)
117 | })
118 | }
119 |
--------------------------------------------------------------------------------
/pkg/replicator/tests/json_encoder_test.go:
--------------------------------------------------------------------------------
1 | package replicator_test
2 |
3 | import (
4 | "testing"
5 | "time"
6 |
7 | "github.com/jackc/pglogrepl"
8 | "github.com/jackc/pgtype"
9 | "github.com/pgflo/pg_flo/pkg/utils"
10 | "github.com/stretchr/testify/assert"
11 | )
12 |
13 | func TestOIDToString(t *testing.T) {
14 | t.Run("OIDToString function", func(t *testing.T) {
15 | assert.Equal(t, "int4", utils.OIDToString(pgtype.Int4OID))
16 | assert.Equal(t, "text", utils.OIDToString(pgtype.TextOID))
17 | assert.Equal(t, "unknown_99999", utils.OIDToString(99999))
18 | })
19 | }
20 |
21 | func TestCDCBinaryEncoding(t *testing.T) {
22 | t.Run("Encode and decode preserves CDC types", func(t *testing.T) {
23 | testData := utils.CDCMessage{
24 | Type: utils.OperationInsert,
25 | Schema: "public",
26 | Table: "users",
27 | Columns: []*pglogrepl.RelationMessageColumn{
28 | {Name: "id", DataType: pgtype.Int4OID},
29 | {Name: "name", DataType: pgtype.TextOID},
30 | },
31 | NewTuple: &pglogrepl.TupleData{
32 | Columns: []*pglogrepl.TupleDataColumn{
33 | {Data: []byte("123")},
34 | {Data: []byte("John Doe")},
35 | },
36 | },
37 | }
38 |
39 | encoded, err := testData.MarshalBinary()
40 | assert.NoError(t, err)
41 |
42 | var decoded utils.CDCMessage
43 | err = decoded.UnmarshalBinary(encoded)
44 | assert.NoError(t, err)
45 |
46 | assert.Equal(t, testData.Type, decoded.Type)
47 | assert.Equal(t, testData.Schema, decoded.Schema)
48 | assert.Equal(t, testData.Table, decoded.Table)
49 | assert.Equal(t, testData.Columns, decoded.Columns)
50 | assert.Equal(t, testData.NewTuple, decoded.NewTuple)
51 | })
52 | }
53 |
54 | func TestBinaryEncodingComplexTypes(t *testing.T) {
55 | t.Run("Encode and decode handles complex types", func(t *testing.T) {
56 | binaryData := []byte{0x01, 0x02, 0x03, 0x04}
57 | jsonbData := []byte(`{"key": "value", "nested": {"number": 42}}`)
58 | timestamp := time.Now().UTC()
59 | floatValue := []byte("3.14159")
60 | intValue := []byte("9876543210")
61 | boolValue := []byte("true")
62 | textArrayValue := []byte("{hello,world}")
63 |
64 | testData := utils.CDCMessage{
65 | Type: utils.OperationInsert,
66 | Schema: "public",
67 | Table: "complex_types",
68 | Columns: []*pglogrepl.RelationMessageColumn{
69 | {Name: "binary", DataType: pgtype.ByteaOID},
70 | {Name: "jsonb", DataType: pgtype.JSONBOID},
71 | {Name: "timestamp", DataType: pgtype.TimestamptzOID},
72 | {Name: "float", DataType: pgtype.Float8OID},
73 | {Name: "integer", DataType: pgtype.Int8OID},
74 | {Name: "boolean", DataType: pgtype.BoolOID},
75 | {Name: "text_array", DataType: pgtype.TextArrayOID},
76 | },
77 | NewTuple: &pglogrepl.TupleData{
78 | Columns: []*pglogrepl.TupleDataColumn{
79 | {Data: binaryData},
80 | {Data: jsonbData},
81 | {Data: []byte(timestamp.Format(time.RFC3339Nano))},
82 | {Data: floatValue},
83 | {Data: intValue},
84 | {Data: boolValue},
85 | {Data: textArrayValue},
86 | },
87 | },
88 | OldTuple: &pglogrepl.TupleData{
89 | Columns: []*pglogrepl.TupleDataColumn{
90 | {Data: []byte{0x05, 0x06, 0x07, 0x08}},
91 | {Data: []byte(`{"old": "data"}`)},
92 | },
93 | },
94 | }
95 |
96 | encoded, err := testData.MarshalBinary()
97 | assert.NoError(t, err)
98 |
99 | var decoded utils.CDCMessage
100 | err = decoded.UnmarshalBinary(encoded)
101 | assert.NoError(t, err)
102 |
103 | assert.Equal(t, binaryData, decoded.NewTuple.Columns[0].Data)
104 | assert.Equal(t, jsonbData, decoded.NewTuple.Columns[1].Data)
105 | assert.Equal(t, []byte(timestamp.Format(time.RFC3339Nano)), decoded.NewTuple.Columns[2].Data)
106 | assert.Equal(t, floatValue, decoded.NewTuple.Columns[3].Data)
107 | assert.Equal(t, intValue, decoded.NewTuple.Columns[4].Data)
108 | assert.Equal(t, boolValue, decoded.NewTuple.Columns[5].Data)
109 | assert.Equal(t, textArrayValue, decoded.NewTuple.Columns[6].Data)
110 |
111 | assert.Equal(t, []byte{0x05, 0x06, 0x07, 0x08}, decoded.OldTuple.Columns[0].Data)
112 | assert.Equal(t, []byte(`{"old": "data"}`), decoded.OldTuple.Columns[1].Data)
113 |
114 | assert.Equal(t, testData.Type, decoded.Type)
115 | assert.Equal(t, testData.Schema, decoded.Schema)
116 | assert.Equal(t, testData.Table, decoded.Table)
117 | assert.Equal(t, testData.Columns, decoded.Columns)
118 | })
119 | }
120 |
--------------------------------------------------------------------------------
/pkg/replicator/tests/mocks_test.go:
--------------------------------------------------------------------------------
1 | package replicator_test
2 |
3 | import (
4 | "context"
5 |
6 | "github.com/jackc/pglogrepl"
7 | "github.com/jackc/pgx/v5"
8 | "github.com/jackc/pgx/v5/pgconn"
9 | "github.com/jackc/pgx/v5/pgproto3"
10 | "github.com/nats-io/nats.go"
11 | "github.com/pgflo/pg_flo/pkg/pgflonats"
12 | "github.com/pgflo/pg_flo/pkg/replicator"
13 | "github.com/stretchr/testify/mock"
14 | )
15 |
16 | type MockReplicationConnection struct {
17 | mock.Mock
18 | }
19 |
20 | func (m *MockReplicationConnection) Connect(ctx context.Context) error {
21 | args := m.Called(ctx)
22 | return args.Error(0)
23 | }
24 |
25 | func (m *MockReplicationConnection) Close(ctx context.Context) error {
26 | args := m.Called(ctx)
27 | return args.Error(0)
28 | }
29 |
30 | func (m *MockReplicationConnection) CreateReplicationSlot(ctx context.Context, slotName string) (pglogrepl.CreateReplicationSlotResult, error) {
31 | args := m.Called(ctx, slotName)
32 | return args.Get(0).(pglogrepl.CreateReplicationSlotResult), args.Error(1)
33 | }
34 |
35 | func (m *MockReplicationConnection) StartReplication(ctx context.Context, slotName string, startLSN pglogrepl.LSN, options pglogrepl.StartReplicationOptions) error {
36 | args := m.Called(ctx, slotName, startLSN, options)
37 | return args.Error(0)
38 | }
39 |
40 | func (m *MockReplicationConnection) ReceiveMessage(ctx context.Context) (pgproto3.BackendMessage, error) {
41 | args := m.Called(ctx)
42 | msg := args.Get(0)
43 | if msg == nil {
44 | return nil, args.Error(1)
45 | }
46 | return msg.(pgproto3.BackendMessage), args.Error(1)
47 | }
48 |
49 | func (m *MockReplicationConnection) SendStandbyStatusUpdate(ctx context.Context, status pglogrepl.StandbyStatusUpdate) error {
50 | args := m.Called(ctx, status)
51 | return args.Error(0)
52 | }
53 |
54 | type MockStandardConnection struct {
55 | mock.Mock
56 | }
57 |
58 | func (m *MockStandardConnection) Connect(ctx context.Context) error {
59 | args := m.Called(ctx)
60 | return args.Error(0)
61 | }
62 |
63 | func (m *MockStandardConnection) Close(ctx context.Context) error {
64 | args := m.Called(ctx)
65 | return args.Error(0)
66 | }
67 |
68 | func (m *MockStandardConnection) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
69 | args := m.Called(ctx, sql, arguments)
70 | return args.Get(0).(pgconn.CommandTag), args.Error(1)
71 | }
72 |
73 | func (m *MockStandardConnection) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
74 | mockArgs := m.Called(ctx, sql, args)
75 | return mockArgs.Get(0).(pgx.Rows), mockArgs.Error(1)
76 | }
77 |
78 | func (m *MockStandardConnection) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
79 | mockArgs := m.Called(ctx, sql, args)
80 | return mockArgs.Get(0).(pgx.Row)
81 | }
82 |
83 | func (m *MockStandardConnection) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) {
84 | args := m.Called(ctx, txOptions)
85 | return args.Get(0).(pgx.Tx), args.Error(1)
86 | }
87 |
88 | func (m *MockStandardConnection) Acquire(ctx context.Context) (replicator.PgxPoolConn, error) {
89 | args := m.Called(ctx)
90 | return args.Get(0).(replicator.PgxPoolConn), args.Error(1)
91 | }
92 |
93 | type MockSink struct {
94 | mock.Mock
95 | }
96 |
97 | func (m *MockSink) WriteBatch(data []interface{}) error {
98 | args := m.Called(data)
99 | return args.Error(0)
100 | }
101 |
102 | func (m *MockSink) Close() error {
103 | args := m.Called()
104 | return args.Error(0)
105 | }
106 |
107 | type MockPgxPoolConn struct {
108 | mock.Mock
109 | }
110 |
111 | func (m *MockPgxPoolConn) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) {
112 | args := m.Called(ctx, txOptions)
113 | return args.Get(0).(pgx.Tx), args.Error(1)
114 | }
115 |
116 | func (m *MockPgxPoolConn) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
117 | args := m.Called(ctx, sql, arguments)
118 | return args.Get(0).(pgconn.CommandTag), args.Error(1)
119 | }
120 |
121 | func (m *MockPgxPoolConn) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
122 | mockArgs := m.Called(ctx, sql, args)
123 | return mockArgs.Get(0).(pgx.Rows), mockArgs.Error(1)
124 | }
125 |
126 | func (m *MockPgxPoolConn) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
127 | mockArgs := m.Called(ctx, sql, args)
128 | return mockArgs.Get(0).(pgx.Row)
129 | }
130 |
131 | func (m *MockPgxPoolConn) Release() {
132 | m.Called()
133 | }
134 |
135 | type MockTx struct {
136 | mock.Mock
137 | }
138 |
139 | func (m *MockTx) Begin(ctx context.Context) (pgx.Tx, error) {
140 | args := m.Called(ctx)
141 | return args.Get(0).(pgx.Tx), args.Error(1)
142 | }
143 |
144 | func (m *MockTx) Commit(ctx context.Context) error {
145 | args := m.Called(ctx)
146 | return args.Error(0)
147 | }
148 |
149 | func (m *MockTx) CopyFrom(ctx context.Context, tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int64, error) {
150 | args := m.Called(ctx, tableName, columnNames, rowSrc)
151 | return args.Get(0).(int64), args.Error(1)
152 | }
153 |
154 | func (m *MockTx) SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults {
155 | args := m.Called(ctx, b)
156 | return args.Get(0).(pgx.BatchResults)
157 | }
158 |
159 | func (m *MockTx) LargeObjects() pgx.LargeObjects {
160 | args := m.Called()
161 | return args.Get(0).(pgx.LargeObjects)
162 | }
163 |
164 | func (m *MockTx) Prepare(ctx context.Context, name, sql string) (*pgconn.StatementDescription, error) {
165 | args := m.Called(ctx, name, sql)
166 | return args.Get(0).(*pgconn.StatementDescription), args.Error(1)
167 | }
168 |
169 | func (m *MockTx) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
170 | args := []interface{}{ctx, sql}
171 | args = append(args, arguments...)
172 | callArgs := m.Called(args...)
173 | return callArgs.Get(0).(pgconn.CommandTag), callArgs.Error(1)
174 | }
175 |
176 | func (m *MockTx) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
177 | mockArgs := m.Called(ctx, sql, args)
178 | return mockArgs.Get(0).(pgx.Rows), mockArgs.Error(1)
179 | }
180 |
181 | func (m *MockTx) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
182 | callArgs := []interface{}{ctx, sql}
183 | callArgs = append(callArgs, args...)
184 | mockArgs := m.Called(callArgs...)
185 | return mockArgs.Get(0).(pgx.Row)
186 | }
187 |
188 | func (m *MockTx) Conn() *pgx.Conn {
189 | args := m.Called()
190 | return args.Get(0).(*pgx.Conn)
191 | }
192 |
193 | func (m *MockTx) Rollback(ctx context.Context) error {
194 | args := m.Called(ctx)
195 | return args.Error(0)
196 | }
197 |
198 | type MockRow struct {
199 | scanFunc func(dest ...interface{}) error
200 | }
201 |
202 | func (m MockRow) Scan(dest ...interface{}) error {
203 | return m.scanFunc(dest...)
204 | }
205 |
206 | type MockRows struct {
207 | mock.Mock
208 | }
209 |
210 | func (m *MockRows) Next() bool {
211 | args := m.Called()
212 | return args.Bool(0)
213 | }
214 |
215 | func (m *MockRows) Scan(dest ...interface{}) error {
216 | args := m.Called(dest...)
217 | return args.Error(0)
218 | }
219 |
220 | func (m *MockRows) Err() error {
221 | args := m.Called()
222 | return args.Error(0)
223 | }
224 |
225 | func (m *MockRows) Close() {
226 | m.Called()
227 | }
228 |
229 | func (m *MockRows) CommandTag() pgconn.CommandTag {
230 | args := m.Called()
231 | return args.Get(0).(pgconn.CommandTag)
232 | }
233 |
234 | func (m *MockRows) FieldDescriptions() []pgconn.FieldDescription {
235 | args := m.Called()
236 | return args.Get(0).([]pgconn.FieldDescription)
237 | }
238 |
239 | func (m *MockRows) Values() ([]interface{}, error) {
240 | args := m.Called()
241 | return args.Get(0).([]interface{}), args.Error(1)
242 | }
243 |
244 | func (m *MockRows) RawValues() [][]byte {
245 | args := m.Called()
246 | return args.Get(0).([][]byte)
247 | }
248 |
249 | func (m *MockRows) Conn() *pgx.Conn {
250 | args := m.Called()
251 | return args.Get(0).(*pgx.Conn)
252 | }
253 |
254 | // MockNATSClient mocks the NATSClient
255 | type MockNATSClient struct {
256 | mock.Mock
257 | }
258 |
259 | // PublishMessage mocks the PublishMessage method
260 | func (m *MockNATSClient) PublishMessage(subject string, data []byte) error {
261 | args := m.Called(subject, data)
262 | return args.Error(0)
263 | }
264 |
265 | // Close mocks the Close method
266 | func (m *MockNATSClient) Close() error {
267 | args := m.Called()
268 | return args.Error(0)
269 | }
270 |
271 | // SaveState mocks the SaveState method
272 | func (m *MockNATSClient) SaveState(state pgflonats.State) error {
273 | args := m.Called(state)
274 | return args.Error(0)
275 | }
276 |
277 | // GetState mocks the GetState method
278 | func (m *MockNATSClient) GetState() (pgflonats.State, error) {
279 | args := m.Called()
280 | return args.Get(0).(pgflonats.State), args.Error(1)
281 | }
282 |
283 | // JetStream mocks the JetStream method
284 | func (m *MockNATSClient) JetStream() nats.JetStreamContext {
285 | args := m.Called()
286 | return args.Get(0).(nats.JetStreamContext)
287 | }
288 |
--------------------------------------------------------------------------------
/pkg/routing/README.md:
--------------------------------------------------------------------------------
1 | # Message Routing
2 |
3 | Table routing allows you to map source tables and columns to different destinations while preserving data types.
4 |
5 | ## Configuration
6 |
7 | Create a YAML file (e.g., `routing.yaml`) with your routing rules:
8 |
9 | ```yaml
10 | users:
11 | source_table: users
12 | destination_table: customers
13 | column_mappings:
14 | - source: id
15 | destination: customer_id
16 | - source: username
17 | destination: customer_name
18 | operations:
19 | - INSERT
20 | - UPDATE
21 |
22 | orders:
23 | source_table: orders
24 | destination_table: transactions
25 | column_mappings:
26 | - source: id
27 | destination: transaction_id
28 | - source: total_amount
29 | destination: amount
30 | operations:
31 | - INSERT
32 | - UPDATE
33 | - DELETE
34 | ```
35 |
36 | ## Usage with Routing
37 |
38 | Start the worker with the routing configuration:
39 |
40 | ```shell
41 | pg_flo worker postgres --routing-config routing.yaml ...
42 | ```
43 |
44 | ## Routing Rules
45 |
46 | Each table configuration supports:
47 |
48 | - `source_table`: Original table name (required)
49 | - `destination_table`: Target table name (optional, defaults to source_table)
50 | - `column_mappings`: List of column name mappings (optional)
51 | - `source`: Original column name
52 | - `destination`: New column name in target
53 | - `operations`: List of operations to replicate (required)
54 | - Supported: `INSERT`, `UPDATE`, `DELETE`
55 |
56 | ## Important Notes
57 |
58 | - Column data types must match between source and destination
59 | - Primary keys are automatically mapped
60 | - All specified columns must exist in both tables
61 | - Operations not listed in `operations` will be ignored. Defaults to all operations.
62 | - Unlisted columns are preserved with their original names
63 | - Complex types (jsonb, arrays) are preserved during mapping
64 |
65 | ## Examples
66 |
67 | ### Basic Table Mapping
68 |
69 | ```yaml
70 | users:
71 | source_table: users
72 | destination_table: customers
73 | operations:
74 | - INSERT
75 | - UPDATE
76 | ```
77 |
78 | ### Column Remapping
79 |
80 | ```yaml
81 | products:
82 | source_table: products
83 | destination_table: items
84 | column_mappings:
85 | - source: id
86 | destination: item_id
87 | - source: name
88 | destination: item_name
89 | operations:
90 | - INSERT
91 | - UPDATE
92 | - DELETE
93 | ```
94 |
--------------------------------------------------------------------------------
/pkg/routing/router.go:
--------------------------------------------------------------------------------
1 | package routing
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/jackc/pglogrepl"
7 | "github.com/pgflo/pg_flo/pkg/utils"
8 | "github.com/rs/zerolog"
9 | "github.com/rs/zerolog/log"
10 | )
11 |
12 | type ColumnMapping struct {
13 | Source string `yaml:"source"`
14 | Destination string `yaml:"destination"`
15 | }
16 |
17 | type TableRoute struct {
18 | SourceTable string `yaml:"source_table"`
19 | DestinationTable string `yaml:"destination_table"`
20 | ColumnMappings []ColumnMapping `yaml:"column_mappings"`
21 | Operations []utils.OperationType `yaml:"operations"`
22 | }
23 |
24 | type Router struct {
25 | Routes map[string]TableRoute
26 | mutex sync.RWMutex
27 | logger zerolog.Logger
28 | }
29 |
30 | func NewRouter() *Router {
31 | return &Router{
32 | Routes: make(map[string]TableRoute),
33 | logger: log.With().Str("component", "router").Logger(),
34 | }
35 | }
36 |
37 | func (r *Router) AddRoute(route TableRoute) {
38 | r.mutex.Lock()
39 | defer r.mutex.Unlock()
40 | r.Routes[route.SourceTable] = route
41 | }
42 |
43 | func (r *Router) ApplyRouting(message *utils.CDCMessage) (*utils.CDCMessage, error) {
44 | r.mutex.RLock()
45 | defer r.mutex.RUnlock()
46 | route, exists := r.Routes[message.Table]
47 | if !exists {
48 | return message, nil
49 | }
50 |
51 | if !ContainsOperation(route.Operations, message.Type) {
52 | return nil, nil
53 | }
54 |
55 | routedMessage := *message
56 | routedMessage.Table = route.DestinationTable
57 |
58 | if len(route.ColumnMappings) > 0 {
59 | newColumns := make([]*pglogrepl.RelationMessageColumn, len(message.Columns))
60 | for i, col := range message.Columns {
61 | newCol := *col
62 | mappedName := GetMappedColumnName(route.ColumnMappings, col.Name)
63 | if mappedName != "" {
64 | newCol.Name = mappedName
65 | }
66 | newColumns[i] = &newCol
67 | }
68 | routedMessage.Columns = newColumns
69 |
70 | if routedMessage.ReplicationKey.Type != utils.ReplicationKeyFull {
71 | mappedColumns := make([]string, len(routedMessage.ReplicationKey.Columns))
72 | for i, keyCol := range routedMessage.ReplicationKey.Columns {
73 | mappedName := GetMappedColumnName(route.ColumnMappings, keyCol)
74 | if mappedName != "" {
75 | mappedColumns[i] = mappedName
76 | } else {
77 | mappedColumns[i] = keyCol
78 | }
79 | }
80 | routedMessage.ReplicationKey.Columns = mappedColumns
81 | }
82 | }
83 |
84 | return &routedMessage, nil
85 | }
86 |
87 | // ContainsOperation checks if the given operation is in the list of operations
88 | func ContainsOperation(operations []utils.OperationType, operation utils.OperationType) bool {
89 | for _, op := range operations {
90 | if op == operation {
91 | return true
92 | }
93 | }
94 | return false
95 | }
96 |
97 | // GetMappedColumnName returns the destination column name for a given source column name
98 | func GetMappedColumnName(mappings []ColumnMapping, sourceName string) string {
99 | for _, mapping := range mappings {
100 | if mapping.Source == sourceName {
101 | return mapping.Destination
102 | }
103 | }
104 | return ""
105 | }
106 |
107 | // LoadRoutes loads routes from the provided configuration
108 | func (r *Router) LoadRoutes(config map[string]TableRoute) error {
109 | for sourceName, route := range config {
110 | r.logger.Info().
111 | Str("source_table", sourceName).
112 | Str("destination_table", route.DestinationTable).
113 | Any("operations", route.Operations).
114 | Any("column_mappings", route.ColumnMappings).
115 | Msg("Loading route")
116 |
117 | route.SourceTable = sourceName
118 | if route.DestinationTable == "" {
119 | route.DestinationTable = sourceName
120 | }
121 | r.AddRoute(route)
122 | }
123 | return nil
124 | }
125 |
--------------------------------------------------------------------------------
/pkg/routing/tests/routing_test.go:
--------------------------------------------------------------------------------
1 | package routing_test
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/jackc/pglogrepl"
7 | "github.com/pgflo/pg_flo/pkg/routing"
8 | "github.com/pgflo/pg_flo/pkg/utils"
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func TestRouter_ApplyRouting(t *testing.T) {
13 | tests := []struct {
14 | name string
15 | routes map[string]routing.TableRoute
16 | inputMessage *utils.CDCMessage
17 | expectedOutput *utils.CDCMessage
18 | expectNil bool
19 | }{
20 | {
21 | name: "Simple table routing",
22 | routes: map[string]routing.TableRoute{
23 | "source_table": {
24 | SourceTable: "source_table",
25 | DestinationTable: "dest_table",
26 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate, utils.OperationDelete},
27 | },
28 | },
29 | inputMessage: &utils.CDCMessage{
30 | Type: utils.OperationInsert,
31 | Table: "source_table",
32 | Columns: []*pglogrepl.RelationMessageColumn{
33 | {Name: "id", DataType: 23},
34 | {Name: "name", DataType: 25},
35 | },
36 | },
37 | expectedOutput: &utils.CDCMessage{
38 | Type: utils.OperationInsert,
39 | Table: "dest_table",
40 | Columns: []*pglogrepl.RelationMessageColumn{
41 | {Name: "id", DataType: 23},
42 | {Name: "name", DataType: 25},
43 | },
44 | },
45 | },
46 | {
47 | name: "Column mapping",
48 | routes: map[string]routing.TableRoute{
49 | "users": {
50 | SourceTable: "users",
51 | DestinationTable: "customers",
52 | ColumnMappings: []routing.ColumnMapping{
53 | {Source: "user_id", Destination: "customer_id"},
54 | {Source: "user_name", Destination: "customer_name"},
55 | },
56 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate, utils.OperationDelete},
57 | },
58 | },
59 | inputMessage: &utils.CDCMessage{
60 | Type: utils.OperationUpdate,
61 | Table: "users",
62 | Columns: []*pglogrepl.RelationMessageColumn{
63 | {Name: "user_id", DataType: 23},
64 | {Name: "user_name", DataType: 25},
65 | {Name: "email", DataType: 25},
66 | },
67 | },
68 | expectedOutput: &utils.CDCMessage{
69 | Type: utils.OperationUpdate,
70 | Table: "customers",
71 | Columns: []*pglogrepl.RelationMessageColumn{
72 | {Name: "customer_id", DataType: 23},
73 | {Name: "customer_name", DataType: 25},
74 | {Name: "email", DataType: 25},
75 | },
76 | },
77 | },
78 | {
79 | name: "Operation filtering - allowed",
80 | routes: map[string]routing.TableRoute{
81 | "orders": {
82 | SourceTable: "orders",
83 | DestinationTable: "processed_orders",
84 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
85 | },
86 | },
87 | inputMessage: &utils.CDCMessage{
88 | Type: utils.OperationUpdate,
89 | Table: "orders",
90 | },
91 | expectedOutput: &utils.CDCMessage{
92 | Type: utils.OperationUpdate,
93 | Table: "processed_orders",
94 | },
95 | },
96 | {
97 | name: "Operation filtering - not allowed",
98 | routes: map[string]routing.TableRoute{
99 | "orders": {
100 | SourceTable: "orders",
101 | DestinationTable: "processed_orders",
102 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
103 | },
104 | },
105 | inputMessage: &utils.CDCMessage{
106 | Type: utils.OperationDelete,
107 | Table: "orders",
108 | },
109 | expectNil: true,
110 | },
111 | {
112 | name: "No route for table",
113 | routes: map[string]routing.TableRoute{},
114 | inputMessage: &utils.CDCMessage{
115 | Type: utils.OperationInsert,
116 | Table: "unknown_table",
117 | },
118 | expectedOutput: &utils.CDCMessage{
119 | Type: utils.OperationInsert,
120 | Table: "unknown_table",
121 | },
122 | },
123 | }
124 |
125 | for _, tt := range tests {
126 | t.Run(tt.name, func(t *testing.T) {
127 | router := routing.NewRouter()
128 | for _, route := range tt.routes {
129 | router.AddRoute(route)
130 | }
131 |
132 | result, err := router.ApplyRouting(tt.inputMessage)
133 |
134 | assert.NoError(t, err)
135 |
136 | if tt.expectNil {
137 | assert.Nil(t, result)
138 | } else {
139 | assert.NotNil(t, result)
140 | assert.Equal(t, tt.expectedOutput.Type, result.Type)
141 | assert.Equal(t, tt.expectedOutput.Table, result.Table)
142 | assert.Equal(t, len(tt.expectedOutput.Columns), len(result.Columns))
143 | for i, col := range tt.expectedOutput.Columns {
144 | assert.Equal(t, col.Name, result.Columns[i].Name)
145 | assert.Equal(t, col.DataType, result.Columns[i].DataType)
146 | }
147 | }
148 | })
149 | }
150 | }
151 |
152 | func TestRouter_LoadRoutes(t *testing.T) {
153 | router := routing.NewRouter()
154 | config := map[string]routing.TableRoute{
155 | "table1": {
156 | SourceTable: "table1",
157 | DestinationTable: "dest_table1",
158 | ColumnMappings: []routing.ColumnMapping{
159 | {Source: "col1", Destination: "dest_col1"},
160 | },
161 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
162 | },
163 | "table2": {
164 | SourceTable: "table2",
165 | DestinationTable: "dest_table2",
166 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate, utils.OperationDelete},
167 | },
168 | }
169 |
170 | err := router.LoadRoutes(config)
171 | assert.NoError(t, err)
172 |
173 | assert.Len(t, router.Routes, 2)
174 | assert.Contains(t, router.Routes, "table1")
175 | assert.Contains(t, router.Routes, "table2")
176 |
177 | assert.Equal(t, "dest_table1", router.Routes["table1"].DestinationTable)
178 | assert.Equal(t, "dest_table2", router.Routes["table2"].DestinationTable)
179 |
180 | assert.Len(t, router.Routes["table1"].ColumnMappings, 1)
181 | assert.Len(t, router.Routes["table1"].Operations, 2)
182 | assert.Len(t, router.Routes["table2"].Operations, 3)
183 | }
184 |
185 | func TestRouter_AddRoute(t *testing.T) {
186 | router := routing.NewRouter()
187 | route := routing.TableRoute{
188 | SourceTable: "source",
189 | DestinationTable: "destination",
190 | ColumnMappings: []routing.ColumnMapping{
191 | {Source: "src_col", Destination: "dest_col"},
192 | },
193 | Operations: []utils.OperationType{utils.OperationInsert},
194 | }
195 |
196 | router.AddRoute(route)
197 |
198 | assert.Len(t, router.Routes, 1)
199 | assert.Contains(t, router.Routes, "source")
200 | assert.Equal(t, route, router.Routes["source"])
201 | }
202 |
203 | func TestContainsOperation(t *testing.T) {
204 | operations := []utils.OperationType{utils.OperationInsert, utils.OperationUpdate}
205 |
206 | assert.True(t, routing.ContainsOperation(operations, utils.OperationInsert))
207 | assert.True(t, routing.ContainsOperation(operations, utils.OperationUpdate))
208 | assert.False(t, routing.ContainsOperation(operations, utils.OperationDelete))
209 | }
210 |
211 | func TestGetMappedColumnName(t *testing.T) {
212 | mappings := []routing.ColumnMapping{
213 | {Source: "col1", Destination: "mapped_col1"},
214 | {Source: "col2", Destination: "mapped_col2"},
215 | }
216 |
217 | assert.Equal(t, "mapped_col1", routing.GetMappedColumnName(mappings, "col1"))
218 | assert.Equal(t, "mapped_col2", routing.GetMappedColumnName(mappings, "col2"))
219 | assert.Equal(t, "", routing.GetMappedColumnName(mappings, "col3"))
220 | }
221 |
--------------------------------------------------------------------------------
/pkg/rules/README.md:
--------------------------------------------------------------------------------
1 | ## Transformation Rules
2 |
3 | There are two types of transformation rules available:
4 |
5 | 1. **Regex Transform**
6 |
7 | - Type: `"regex"`
8 | - Parameters:
9 | - `pattern`: The regular expression pattern to match
10 | - `replace`: The replacement string
11 | - Description: Applies a regular expression replacement on string values in the specified column.
12 |
13 | 2. **Mask Transform**
14 | - Type: `"mask"`
15 | - Parameters:
16 | - `mask_char`: The character to use for masking
17 | - Description: Masks the content of string values, keeping the first and last characters visible and replacing the rest with the specified mask character.
18 |
19 | ## Filtering Rules
20 |
21 | Filtering rules use various comparison operators to determine whether a row should be included in the output. The available operators are:
22 |
23 | 1. **Equality** (`"eq"`)
24 | 2. **Inequality** (`"ne"`)
25 | 3. **Greater Than** (`"gt"`)
26 | 4. **Less Than** (`"lt"`)
27 | 5. **Greater Than or Equal To** (`"gte"`)
28 | 6. **Less Than or Equal To** (`"lte"`)
29 | 7. **Contains** (`"contains"`)
30 |
31 | ## Rule Properties
32 |
33 | Both transformation and filtering rules share these common properties:
34 |
35 | - `type`: Specifies whether it's a "transform" or "filter" rule.
36 | - `column`: The name of the column to apply the rule to.
37 | - `operations`: An array of operations (INSERT, UPDATE, DELETE) to which the rule should be applied. If not specified, it applies to all operations.
38 | - `allow_empty_deletes`: A boolean flag that, when set to true, allows the rule to process delete operations even if the column value is empty.
39 |
40 | ## Additional Notes
41 |
42 | - The rules support various data types, including integers, floats, strings, timestamps, booleans, and numeric (decimal) values.
43 | - For filtering rules, the comparison is type-aware, ensuring that values are compared appropriately based on their data type.
44 | - The `contains` operator for filtering only works on string values.
45 | - Transformation rules currently only work on string values. If a non-string value is encountered, the transformation is skipped and a warning is logged.
46 |
47 | To use these rules, you would define them in a YAML configuration file and specify the path to this file using the `--rules-config` flag when running `pg_flo`. The exact structure of the YAML file should match the rule properties and parameters described above.
48 |
--------------------------------------------------------------------------------
/pkg/rules/engine.go:
--------------------------------------------------------------------------------
1 | package rules
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/pgflo/pg_flo/pkg/utils"
7 | )
8 |
9 | // AddRule adds a new rule for the specified table
10 | func (re *RuleEngine) AddRule(tableName string, rule Rule) {
11 | re.mutex.Lock()
12 | defer re.mutex.Unlock()
13 | re.Rules[tableName] = append(re.Rules[tableName], rule)
14 | }
15 |
16 | // ApplyRules applies all rules for the specified table to the given data
17 | func (re *RuleEngine) ApplyRules(message *utils.CDCMessage) (*utils.CDCMessage, error) {
18 | re.mutex.RLock()
19 | defer re.mutex.RUnlock()
20 |
21 | rules, exists := re.Rules[message.Table]
22 | if !exists {
23 | return message, nil // No rules for this table
24 | }
25 |
26 | logger.Info().
27 | Str("table", message.Table).
28 | Str("operation", string(message.Type)).
29 | Int("ruleCount", len(rules)).
30 | Msg("Applying rules")
31 |
32 | var err error
33 | for _, rule := range rules {
34 | message, err = rule.Apply(message)
35 | if err != nil {
36 | return nil, err
37 | }
38 | if message == nil {
39 | // Message filtered out
40 | return nil, nil
41 | }
42 | }
43 | return message, nil
44 | }
45 |
46 | // LoadRules loads rules from the provided configuration
47 | func (re *RuleEngine) LoadRules(config Config) error {
48 | for tableName, ruleConfigs := range config.Tables {
49 | logger.Info().Str("table", tableName).Msg("Loading rules for table")
50 | for i, ruleConfig := range ruleConfigs {
51 | rule, err := createRule(tableName, ruleConfig)
52 | if err != nil {
53 | return fmt.Errorf("error creating rule for table %s: %w", tableName, err)
54 | }
55 | logger.Info().
56 | Str("table", tableName).
57 | Int("ruleIndex", i+1).
58 | Str("ruleType", fmt.Sprintf("%T", rule)).
59 | Msg("Created rule")
60 | re.AddRule(tableName, rule)
61 | }
62 | }
63 | return nil
64 | }
65 |
66 | // createRule creates a new rule based on the provided configuration
67 | func createRule(tableName string, config RuleConfig) (Rule, error) {
68 | logger.Info().
69 | Str("table", tableName).
70 | Str("ruleType", config.Type).
71 | Msg("Creating rule")
72 | switch config.Type {
73 | case "transform":
74 | return NewTransformRule(tableName, config.Column, config.Parameters)
75 | case "filter":
76 | return NewFilterRule(tableName, config.Column, config.Parameters)
77 | default:
78 | return nil, fmt.Errorf("unknown rule type: %s", config.Type)
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/pkg/rules/tests/engine_test.go:
--------------------------------------------------------------------------------
1 | package rules_test
2 |
3 | import (
4 | "log"
5 | "os"
6 | "testing"
7 |
8 | "github.com/jackc/pglogrepl"
9 | "github.com/jackc/pgtype"
10 | "github.com/pgflo/pg_flo/pkg/rules"
11 | "github.com/pgflo/pg_flo/pkg/utils"
12 | "github.com/stretchr/testify/assert"
13 | )
14 |
15 | func TestMain(m *testing.M) {
16 | log.SetOutput(os.Stdout)
17 | os.Exit(m.Run())
18 | }
19 |
20 | func TestRuleEngine_AddRule(t *testing.T) {
21 | re := rules.NewRuleEngine()
22 | rule := &MockRule{
23 | TableName: "users",
24 | ColumnName: "test_column",
25 | ApplyFunc: func(message *utils.CDCMessage) (*utils.CDCMessage, error) {
26 | return message, nil
27 | },
28 | }
29 | re.AddRule("users", rule)
30 |
31 | message := &utils.CDCMessage{
32 | Type: utils.OperationInsert,
33 | Schema: "public",
34 | Table: "users",
35 | Columns: []*pglogrepl.RelationMessageColumn{
36 | {Name: "test_column", DataType: pgtype.TextOID},
37 | },
38 | NewTuple: &pglogrepl.TupleData{
39 | Columns: []*pglogrepl.TupleDataColumn{
40 | {Data: []byte("original")},
41 | },
42 | },
43 | }
44 |
45 | result, err := re.ApplyRules(message)
46 | assert.NoError(t, err)
47 | assert.NotNil(t, result)
48 | }
49 |
50 | func TestRuleEngine_ApplyRules(t *testing.T) {
51 | re := rules.NewRuleEngine()
52 | rule := &MockRule{
53 | TableName: "users",
54 | ColumnName: "test_column",
55 | ApplyFunc: func(message *utils.CDCMessage) (*utils.CDCMessage, error) {
56 | message.NewTuple.Columns[0].Data = []byte("transformed")
57 | return message, nil
58 | },
59 | }
60 | re.AddRule("users", rule)
61 |
62 | message := &utils.CDCMessage{
63 | Type: utils.OperationInsert,
64 | Schema: "public",
65 | Table: "users",
66 | Columns: []*pglogrepl.RelationMessageColumn{
67 | {Name: "test_column", DataType: pgtype.TextOID},
68 | },
69 | NewTuple: &pglogrepl.TupleData{
70 | Columns: []*pglogrepl.TupleDataColumn{
71 | {Data: []byte("original")},
72 | },
73 | },
74 | }
75 |
76 | result, err := re.ApplyRules(message)
77 |
78 | assert.NoError(t, err)
79 | value, err := result.GetColumnValue("test_column", false)
80 | assert.NoError(t, err)
81 | assert.Equal(t, "transformed", value)
82 | }
83 |
84 | func TestRuleEngine_ApplyRules_NoRules(t *testing.T) {
85 | re := rules.NewRuleEngine()
86 | message := &utils.CDCMessage{
87 | Type: utils.OperationInsert,
88 | Schema: "public",
89 | Table: "users",
90 | Columns: []*pglogrepl.RelationMessageColumn{
91 | {Name: "test_column", DataType: pgtype.TextOID},
92 | },
93 | NewTuple: &pglogrepl.TupleData{
94 | Columns: []*pglogrepl.TupleDataColumn{
95 | {Data: []byte("original")},
96 | },
97 | },
98 | }
99 |
100 | result, err := re.ApplyRules(message)
101 |
102 | assert.NoError(t, err)
103 | assert.Equal(t, message, result)
104 | }
105 |
106 | func TestRuleEngine_LoadRules_Transform(t *testing.T) {
107 | re := rules.NewRuleEngine()
108 | config := rules.Config{
109 | Tables: map[string][]rules.RuleConfig{
110 | "users": {
111 | {
112 | Type: "transform",
113 | Column: "test_column",
114 | Parameters: map[string]interface{}{
115 | "type": "mask",
116 | "mask_char": "*",
117 | },
118 | Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
119 | },
120 | },
121 | },
122 | }
123 |
124 | err := re.LoadRules(config)
125 | assert.NoError(t, err)
126 |
127 | message := &utils.CDCMessage{
128 | Type: utils.OperationInsert,
129 | Schema: "public",
130 | Table: "users",
131 | Columns: []*pglogrepl.RelationMessageColumn{
132 | {Name: "test_column", DataType: pgtype.TextOID},
133 | },
134 | NewTuple: &pglogrepl.TupleData{
135 | Columns: []*pglogrepl.TupleDataColumn{
136 | {Data: []byte("test")},
137 | },
138 | },
139 | }
140 |
141 | result, err := re.ApplyRules(message)
142 | assert.NoError(t, err)
143 | assert.NotNil(t, result)
144 | value, err := result.GetColumnValue("test_column", false)
145 | assert.NoError(t, err)
146 | assert.Equal(t, "t**t", value)
147 | }
148 |
149 | func TestRuleEngine_LoadRules_Filter(t *testing.T) {
150 | re := rules.NewRuleEngine()
151 | config := rules.Config{
152 | Tables: map[string][]rules.RuleConfig{
153 | "users": {
154 | {
155 | Type: "filter",
156 | Column: "id",
157 | Parameters: map[string]interface{}{
158 | "operator": "gt",
159 | "value": int64(100),
160 | },
161 | Operations: []utils.OperationType{utils.OperationDelete},
162 | },
163 | },
164 | },
165 | }
166 |
167 | err := re.LoadRules(config)
168 | assert.NoError(t, err)
169 |
170 | message := &utils.CDCMessage{
171 | Type: utils.OperationDelete,
172 | Schema: "public",
173 | Table: "users",
174 | Columns: []*pglogrepl.RelationMessageColumn{
175 | {Name: "id", DataType: pgtype.Int8OID},
176 | },
177 | OldTuple: &pglogrepl.TupleData{
178 | Columns: []*pglogrepl.TupleDataColumn{
179 | {Data: []byte("101")},
180 | },
181 | },
182 | }
183 |
184 | result, err := re.ApplyRules(message)
185 | assert.NoError(t, err)
186 | assert.NotNil(t, result)
187 | value, err := result.GetColumnValue("id", true)
188 | assert.NoError(t, err)
189 | assert.Equal(t, int64(101), value)
190 |
191 | message.OldTuple.Columns[0].Data = []byte("99")
192 | result, err = re.ApplyRules(message)
193 | assert.NoError(t, err)
194 | assert.Nil(t, result)
195 | }
196 |
197 | func TestRuleEngine_LoadRules_EmptyDeletes(t *testing.T) {
198 | re := rules.NewRuleEngine()
199 | config := rules.Config{
200 | Tables: map[string][]rules.RuleConfig{
201 | "users": {
202 | {
203 | Type: "filter",
204 | Column: "id",
205 | AllowEmptyDeletes: true,
206 | Parameters: map[string]interface{}{
207 | "operator": "eq",
208 | "value": int64(101),
209 | },
210 | Operations: []utils.OperationType{utils.OperationDelete},
211 | },
212 | },
213 | },
214 | }
215 |
216 | err := re.LoadRules(config)
217 | assert.NoError(t, err)
218 |
219 | message := &utils.CDCMessage{
220 | Type: utils.OperationDelete,
221 | Schema: "public",
222 | Table: "users",
223 | Columns: []*pglogrepl.RelationMessageColumn{
224 | {Name: "id", DataType: pgtype.Int8OID},
225 | },
226 | OldTuple: &pglogrepl.TupleData{
227 | Columns: []*pglogrepl.TupleDataColumn{
228 | {Data: []byte("101")},
229 | },
230 | },
231 | }
232 |
233 | result, err := re.ApplyRules(message)
234 | assert.NoError(t, err)
235 | assert.NotNil(t, result)
236 | value, err := result.GetColumnValue("id", true)
237 | assert.NoError(t, err)
238 | assert.Equal(t, int64(101), value)
239 | }
240 |
241 | func TestRuleEngine_ApplyRules_FilterRule(t *testing.T) {
242 | re := rules.NewRuleEngine()
243 | config := rules.Config{
244 | Tables: map[string][]rules.RuleConfig{
245 | "users": {
246 | {
247 | Type: "filter",
248 | Column: "id",
249 | Parameters: map[string]interface{}{
250 | "operator": "gt",
251 | "value": int64(100),
252 | },
253 | Operations: []utils.OperationType{utils.OperationUpdate},
254 | },
255 | },
256 | },
257 | }
258 |
259 | err := re.LoadRules(config)
260 | assert.NoError(t, err)
261 |
262 | message := &utils.CDCMessage{
263 | Type: utils.OperationUpdate,
264 | Schema: "public",
265 | Table: "users",
266 | Columns: []*pglogrepl.RelationMessageColumn{
267 | {Name: "id", DataType: pgtype.Int8OID},
268 | },
269 | NewTuple: &pglogrepl.TupleData{
270 | Columns: []*pglogrepl.TupleDataColumn{
271 | {Data: []byte("101")},
272 | },
273 | },
274 | }
275 | result, err := re.ApplyRules(message)
276 |
277 | assert.NoError(t, err)
278 | assert.NotNil(t, result)
279 | idValue, err := result.GetColumnValue("id", false)
280 | assert.NoError(t, err)
281 | assert.Equal(t, int64(101), idValue)
282 |
283 | message.NewTuple.Columns[0].Data = []byte("99")
284 | result, err = re.ApplyRules(message)
285 |
286 | assert.NoError(t, err)
287 | assert.Nil(t, result)
288 |
289 | message.Type = utils.OperationInsert
290 | message.NewTuple.Columns[0].Data = []byte("101")
291 | result, err = re.ApplyRules(message)
292 |
293 | assert.NoError(t, err)
294 | assert.NotNil(t, result)
295 | idValue, err = result.GetColumnValue("id", false)
296 | assert.NoError(t, err)
297 | assert.Equal(t, int64(101), idValue)
298 | }
299 |
--------------------------------------------------------------------------------
/pkg/rules/tests/mocks_test.go:
--------------------------------------------------------------------------------
1 | package rules_test
2 |
3 | import (
4 | "github.com/pgflo/pg_flo/pkg/utils"
5 | )
6 |
7 | type MockRule struct {
8 | TableName string
9 | ColumnName string
10 | ApplyFunc func(*utils.CDCMessage) (*utils.CDCMessage, error)
11 | }
12 |
13 | func (r *MockRule) Apply(message *utils.CDCMessage) (*utils.CDCMessage, error) {
14 | return r.ApplyFunc(message)
15 | }
16 |
--------------------------------------------------------------------------------
/pkg/rules/types.go:
--------------------------------------------------------------------------------
1 | package rules
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/pgflo/pg_flo/pkg/utils"
7 | )
8 |
9 | // Rule interface defines the methods that all rules must implement
10 | type Rule interface {
11 | Apply(message *utils.CDCMessage) (*utils.CDCMessage, error)
12 | }
13 |
14 | // RuleConfig represents the configuration for a single rule
15 | type RuleConfig struct {
16 | Type string `yaml:"type"`
17 | Column string `yaml:"column"`
18 | Parameters map[string]interface{} `yaml:"parameters"`
19 | Operations []utils.OperationType `yaml:"operations,omitempty"`
20 | AllowEmptyDeletes bool `yaml:"allow_empty_deletes,omitempty"`
21 | }
22 |
23 | // Config represents the overall configuration for rules
24 | type Config struct {
25 | Tables map[string][]RuleConfig `yaml:"tables"`
26 | }
27 |
28 | // TransformRule represents a rule that transforms data
29 | type TransformRule struct {
30 | TableName string
31 | ColumnName string
32 | Transform func(*utils.CDCMessage) (*utils.CDCMessage, error)
33 | Operations []utils.OperationType
34 | AllowEmptyDeletes bool
35 | }
36 |
37 | // FilterRule represents a rule that filters data
38 | type FilterRule struct {
39 | TableName string
40 | ColumnName string
41 | Condition func(*utils.CDCMessage) bool
42 | Operations []utils.OperationType
43 | AllowEmptyDeletes bool
44 | }
45 |
46 | // RuleEngine manages and applies rules to data
47 | type RuleEngine struct {
48 | Rules map[string][]Rule // map of table name to slice of rules
49 | mutex sync.RWMutex
50 | }
51 |
52 | // NewRuleEngine creates a new RuleEngine instance
53 | func NewRuleEngine() *RuleEngine {
54 | return &RuleEngine{
55 | Rules: make(map[string][]Rule),
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/pkg/sinks/README.md:
--------------------------------------------------------------------------------
1 | # Supported Sinks in pg_flo
2 |
3 | pg_flo supports various sink types (destinations) for streaming data changes. This document provides an overview of the supported sinks and how to use them via the command-line interface.
4 |
5 | - [Available Sinks](#available-sinks)
6 | - [Common Flags](#common-flags)
7 | - [STDOUT Sink](#stdout-sink)
8 | - [Usage](#usage)
9 | - [Example](#example)
10 | - [File Sink](#file-sink)
11 | - [Usage](#usage-1)
12 | - [Additional Flags](#additional-flags)
13 | - [Example](#example-1)
14 | - [PostgreSQL Sink](#postgresql-sink)
15 | - [Usage](#usage-2)
16 | - [Additional Flags](#additional-flags-1)
17 | - [Example](#example-2)
18 | - [Additional Behavior](#additional-behavior)
19 | - [Webhook Sink](#webhook-sink)
20 | - [Usage](#usage-3)
21 | - [Additional Flags](#additional-flags-2)
22 | - [Example](#example-3)
23 | - [Additional Behavior](#additional-behavior-1)
24 | - [Sink Interface](#sink-interface)
25 |
26 | ## Available Sinks
27 |
28 | 1. STDOUT
29 | 2. File
30 | 3. PostgreSQL
31 | 4. Webhook
32 |
33 | ## Common Flags
34 |
35 | These flags are common to all sink types:
36 |
37 | - `--host`: PostgreSQL source host
38 | - `--port`: PostgreSQL source port
39 | - `--dbname`: PostgreSQL source database name
40 | - `--user`: PostgreSQL source user
41 | - `--password`: PostgreSQL source password
42 | - `--group`: Group name for replication
43 | - `--tables`: Tables to replicate (comma-separated)
44 | - `--status-dir`: Directory to store status files
45 |
46 | ## STDOUT Sink
47 |
48 | The STDOUT sink writes changes directly to the console output.
49 |
50 | ### Usage
51 |
52 | ```shell
53 | pg_flo stream stdout [common flags]
54 | ```
55 |
56 | ### Example
57 |
58 | ```shell
59 | pg_flo stream stdout \
60 | --host localhost \
61 | --port 5432 \
62 | --dbname your_database \
63 | --user your_user \
64 | --password your_password \
65 | --group your_group \
66 | --tables table1,table2 \
67 | --status-dir /tmp/pg_flo-status
68 | ```
69 |
70 | ## File Sink
71 |
72 | The File sink writes changes to files in the specified output directory.
73 |
74 | ### Usage
75 |
76 | ```shell
77 | pg_flo stream file [common flags] --output-dir
78 | ```
79 |
80 | ### Additional Flags
81 |
82 | - `--output-dir`: Output directory for file sink
83 |
84 | ### Example
85 |
86 | ```shell
87 | pg_flo stream file \
88 | --host localhost \
89 | --port 5432 \
90 | --dbname your_database \
91 | --user your_user \
92 | --password your_password \
93 | --group your_group \
94 | --tables table1,table2 \
95 | --status-dir /tmp/pg_flo-status \
96 | --output-dir /tmp/pg_flo-output
97 | ```
98 |
99 | ## PostgreSQL Sink
100 |
101 | The PostgreSQL sink replicates changes to another PostgreSQL database. To ensure accurate replication of updates and deletes, all tables must have a primary key defined.
102 |
103 | ### Usage
104 |
105 | ```shell
106 | pg_flo stream postgres [common flags] [postgres sink flags]
107 | ```
108 |
109 | ### Additional Flags
110 |
111 | - `--target-host`: Target PostgreSQL host
112 | - `--target-port`: Target PostgreSQL port
113 | - `--target-dbname`: Target PostgreSQL database name
114 | - `--target-user`: Target PostgreSQL user
115 | - `--target-password`: Target PostgreSQL password
116 | - `--sync-schema`: Sync schema from source to target via `pg_dump` (boolean flag)
117 |
118 | ### Example
119 |
120 | ```shell
121 | pg_flo stream postgres \
122 | --host localhost \
123 | --port 5432 \
124 | --dbname source_db \
125 | --user source_user \
126 | --password source_password \
127 | --group replication_group \
128 | --tables table1,table2 \
129 | --schema public \
130 | --status-dir /tmp/pg_flo-status \
131 | --target-host target.host.com \
132 | --target-port 5433 \
133 | --target-dbname target_db \
134 | --target-user target_user \
135 | --target-password target_password \
136 | --sync-schema
137 | ```
138 |
139 | ### Additional Behavior
140 |
141 | - Supports schema synchronization between source and target databases using `pg_dump` when the `--sync-schema` flag is set.
142 | - Creates an `internal_pg_flo` schema and `lsn_status` table to keep track of the last processed LSN.
143 | - Handles `INSERT`, `UPDATE`, `DELETE`, and `DDL` operations.
144 | - Uses `UPSERT` (`INSERT ... ON CONFLICT DO UPDATE`) for handling both `INSERT` and `UPDATE` operations efficiently.
145 | - Executes operations within a transaction for each batch of changes.
146 | - Rolls back the transaction and logs an error if any operation in the batch fails.
147 |
148 | ## Webhook Sink
149 |
150 | The Webhook sink sends changes as HTTP POST requests to a specified URL.
151 |
152 | ### Usage
153 |
154 | ```shell
155 | pg_flo stream webhook [common flags] --webhook-url
156 | ```
157 |
158 | ### Additional Flags
159 |
160 | - `--webhook-url`: URL to send webhook POST requests
161 |
162 | ### Example
163 |
164 | ```shell
165 | pg_flo stream webhook \
166 | --host localhost \
167 | --port 5432 \
168 | --dbname your_database \
169 | --user your_user \
170 | --password your_password \
171 | --group your_group \
172 | --tables table1,table2 \
173 | --schema public \
174 | --status-dir /tmp/pg_flo-status \
175 | --webhook-url https://your-webhook-endpoint.com/receive
176 | ```
177 |
178 | ### Additional Behavior
179 |
180 | - Sends each change as a separate HTTP POST request to the specified webhook URL.
181 | - Implements a retry mechanism with up to 3 attempts for failed requests.
182 | - Considers both network errors and non-2xx status codes as failures that trigger retries.
183 | - Maintains a status file to keep track of the last processed LSN.
184 | - The status file is stored in the specified status directory with the name `pg_flo_webhook_last_lsn.json`.
185 |
186 | ## Sink Interface
187 |
188 | `pg_flo` uses a common interface for all sink types, allowing for easy implementation of new sinks. The `Sink` interface defines the following methods:
189 |
190 | - `WriteBatch(data []interface{}) error`: Writes a batch of changes to the sink.
191 | - `Close() error`: Closes the sink, releasing any resources or connections.
192 |
193 | Sinks can save the last processed `LSN` at the destination (as appropriate). This ensures that if a `pg_flo` process shuts down (for example, during a deployment) and starts again, it knows where to resume from.
194 |
--------------------------------------------------------------------------------
/pkg/sinks/file.go:
--------------------------------------------------------------------------------
1 | package sinks
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "path/filepath"
7 | "sync"
8 | "time"
9 |
10 | "github.com/goccy/go-json"
11 | "github.com/pgflo/pg_flo/pkg/utils"
12 |
13 | "github.com/rs/zerolog"
14 | "github.com/rs/zerolog/log"
15 | )
16 |
17 | func init() {
18 | log.Logger = log.Output(zerolog.ConsoleWriter{
19 | Out: os.Stderr,
20 | TimeFormat: "15:04:05.000",
21 | })
22 | }
23 |
24 | // FileSink represents a sink that writes data to files
25 | type FileSink struct {
26 | outputDir string
27 | currentFile *os.File
28 | currentSize int64
29 | maxFileSize int64
30 | rotateInterval time.Duration
31 | lastRotation time.Time
32 | mutex sync.Mutex
33 | }
34 |
35 | // NewFileSink creates a new FileSink instance
36 | func NewFileSink(outputDir string) (*FileSink, error) {
37 | sink := &FileSink{
38 | outputDir: outputDir,
39 | maxFileSize: 100 * 1024 * 1024, // 100 MB
40 | rotateInterval: time.Hour, // Rotate every hour if size limit not reached
41 | }
42 |
43 | if err := os.MkdirAll(outputDir, 0755); err != nil {
44 | return nil, fmt.Errorf("failed to create output directory: %v", err)
45 | }
46 |
47 | if err := sink.rotateFile(); err != nil {
48 | return nil, fmt.Errorf("failed to create initial log file: %v", err)
49 | }
50 |
51 | return sink, nil
52 | }
53 |
54 | // rotateFile creates a new log file and updates the current file pointer
55 | func (s *FileSink) rotateFile() error {
56 | if s.currentFile != nil {
57 | err := s.currentFile.Close()
58 | if err != nil {
59 | return err
60 | }
61 | s.currentFile = nil
62 | }
63 |
64 | timestamp := time.Now().UTC().Format("20060102T150405Z")
65 | filename := fmt.Sprintf("pg_flo_log_%s.jsonl", timestamp)
66 | filepath := filepath.Join(s.outputDir, filename)
67 |
68 | file, err := os.OpenFile(filepath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
69 | if err != nil {
70 | return fmt.Errorf("failed to create new log file: %v", err)
71 | }
72 |
73 | s.currentFile = file
74 | s.currentSize = 0
75 | s.lastRotation = time.Now()
76 |
77 | log.Info().Str("file", filepath).Msg("Rotated to new log file")
78 | return nil
79 | }
80 |
81 | // WriteBatch writes a batch of data to the current log file
82 | func (s *FileSink) WriteBatch(messages []*utils.CDCMessage) error {
83 | s.mutex.Lock()
84 | defer s.mutex.Unlock()
85 |
86 | for _, message := range messages {
87 | decodedMessage, err := buildDecodedMessage(message)
88 | if err != nil {
89 | return fmt.Errorf("failed to build decoded message: %v", err)
90 | }
91 |
92 | jsonData, err := json.Marshal(decodedMessage)
93 | if err != nil {
94 | return fmt.Errorf("failed to marshal data to JSON: %v", err)
95 | }
96 |
97 | if s.currentFile == nil || s.currentSize >= s.maxFileSize || time.Since(s.lastRotation) >= s.rotateInterval {
98 | if err := s.rotateFile(); err != nil {
99 | return err
100 | }
101 | }
102 |
103 | jsonData = append(jsonData, '\n')
104 | n, err := s.currentFile.Write(jsonData)
105 | if err != nil {
106 | return fmt.Errorf("failed to write to log file: %v", err)
107 | }
108 |
109 | s.currentSize += int64(n)
110 | }
111 | return nil
112 | }
113 |
114 | // Close closes the current log file and performs any necessary cleanup
115 | func (s *FileSink) Close() error {
116 | s.mutex.Lock()
117 | defer s.mutex.Unlock()
118 |
119 | if s.currentFile != nil {
120 | err := s.currentFile.Close()
121 | s.currentFile = nil
122 | return err
123 | }
124 | return nil
125 | }
126 |
--------------------------------------------------------------------------------
/pkg/sinks/shared.go:
--------------------------------------------------------------------------------
1 | package sinks
2 |
3 | import "github.com/pgflo/pg_flo/pkg/utils"
4 |
5 | func buildDecodedMessage(message *utils.CDCMessage) (map[string]interface{}, error) {
6 | decodedMessage := make(map[string]interface{})
7 | decodedMessage["Type"] = message.Type
8 | decodedMessage["Schema"] = message.Schema
9 | decodedMessage["Table"] = message.Table
10 | decodedMessage["ReplicationKey"] = message.ReplicationKey
11 | decodedMessage["LSN"] = message.LSN
12 | decodedMessage["EmittedAt"] = message.EmittedAt
13 |
14 | if message.NewTuple != nil {
15 | newTuple := make(map[string]interface{})
16 | for _, col := range message.Columns {
17 | value, err := message.GetColumnValue(col.Name, false)
18 | if err != nil {
19 | return nil, err
20 | }
21 | newTuple[col.Name] = value
22 | }
23 | decodedMessage["NewTuple"] = newTuple
24 | }
25 |
26 | if message.OldTuple != nil {
27 | oldTuple := make(map[string]interface{})
28 | for _, col := range message.Columns {
29 | value, err := message.GetColumnValue(col.Name, true)
30 | if err != nil {
31 | return nil, err
32 | }
33 | oldTuple[col.Name] = value
34 | }
35 | decodedMessage["OldTuple"] = oldTuple
36 | }
37 |
38 | return decodedMessage, nil
39 | }
40 |
--------------------------------------------------------------------------------
/pkg/sinks/sink.go:
--------------------------------------------------------------------------------
1 | package sinks
2 |
3 | import (
4 | "github.com/pgflo/pg_flo/pkg/utils"
5 | )
6 |
7 | type Sink interface {
8 | WriteBatch(data []*utils.CDCMessage) error
9 | }
10 |
--------------------------------------------------------------------------------
/pkg/sinks/stdout.go:
--------------------------------------------------------------------------------
1 | package sinks
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/goccy/go-json"
7 | "github.com/pgflo/pg_flo/pkg/utils"
8 | )
9 |
10 | // StdoutSink represents a sink that writes data to standard output
11 | type StdoutSink struct{}
12 |
13 | // NewStdoutSink creates a new StdoutSink instance
14 | func NewStdoutSink() (*StdoutSink, error) {
15 | return &StdoutSink{}, nil
16 | }
17 |
18 | // WriteBatch writes a batch of data to standard output
19 | func (s *StdoutSink) WriteBatch(messages []*utils.CDCMessage) error {
20 | for _, message := range messages {
21 | decodedMessage, err := buildDecodedMessage(message)
22 | if err != nil {
23 | return fmt.Errorf("failed to build decoded message: %v", err)
24 | }
25 |
26 | jsonData, err := json.Marshal(decodedMessage)
27 | if err != nil {
28 | return fmt.Errorf("failed to marshal data to JSON: %v", err)
29 | }
30 |
31 | if _, err := fmt.Println(string(jsonData)); err != nil {
32 | return err
33 | }
34 | }
35 | return nil
36 | }
37 |
--------------------------------------------------------------------------------
/pkg/sinks/types.go:
--------------------------------------------------------------------------------
1 | package sinks
2 |
3 | import "github.com/jackc/pglogrepl"
4 |
5 | type Status struct {
6 | LastLSN pglogrepl.LSN `json:"last_lsn"`
7 | }
8 |
--------------------------------------------------------------------------------
/pkg/sinks/webhooks.go:
--------------------------------------------------------------------------------
1 | package sinks
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "net/http"
7 | "os"
8 |
9 | "github.com/goccy/go-json"
10 | "github.com/pgflo/pg_flo/pkg/utils"
11 | "github.com/rs/zerolog"
12 | "github.com/rs/zerolog/log"
13 | )
14 |
15 | func init() {
16 | log.Logger = log.Output(zerolog.ConsoleWriter{
17 | Out: os.Stderr,
18 | TimeFormat: "15:04:05.000",
19 | })
20 | }
21 |
22 | // WebhookSink represents a sink that sends data to a webhook endpoint
23 | type WebhookSink struct {
24 | webhookURL string
25 | client *http.Client
26 | }
27 |
28 | // NewWebhookSink creates a new WebhookSink instance
29 | func NewWebhookSink(webhookURL string) (*WebhookSink, error) {
30 | sink := &WebhookSink{
31 | webhookURL: webhookURL,
32 | client: &http.Client{},
33 | }
34 |
35 | return sink, nil
36 | }
37 |
38 | // WriteBatch sends a batch of data to the webhook endpoint
39 | func (s *WebhookSink) WriteBatch(messages []*utils.CDCMessage) error {
40 | for _, message := range messages {
41 | decodedMessage, err := buildDecodedMessage(message)
42 | if err != nil {
43 | return fmt.Errorf("failed to build decoded message: %v", err)
44 | }
45 |
46 | jsonData, err := json.Marshal(decodedMessage)
47 | if err != nil {
48 | return fmt.Errorf("failed to marshal data to JSON: %v", err)
49 | }
50 |
51 | if err = s.sendWithRetry(jsonData); err != nil {
52 | return err
53 | }
54 | }
55 | return nil
56 | }
57 |
58 | // sendWithRetry sends data to the webhook endpoint with retry logic
59 | func (s *WebhookSink) sendWithRetry(jsonData []byte) error {
60 | maxRetries := 3
61 | for attempt := 1; attempt <= maxRetries; attempt++ {
62 | req, err := http.NewRequest("POST", s.webhookURL, bytes.NewBuffer(jsonData))
63 | if err != nil {
64 | return fmt.Errorf("failed to create request: %v", err)
65 | }
66 |
67 | req.Header.Set("Content-Type", "application/json")
68 |
69 | resp, err := s.client.Do(req)
70 | if err != nil {
71 | if attempt == maxRetries {
72 | return fmt.Errorf("failed to send webhook after %d attempts: %v", maxRetries, err)
73 | }
74 | log.Warn().Err(err).Int("attempt", attempt).Msg("Webhook request failed, retrying...")
75 | continue
76 | }
77 | defer resp.Body.Close()
78 |
79 | if resp.StatusCode >= 200 && resp.StatusCode < 300 {
80 | return nil
81 | }
82 |
83 | if attempt == maxRetries {
84 | return fmt.Errorf("webhook request failed with status code: %d after %d attempts", resp.StatusCode, maxRetries)
85 | }
86 | log.Warn().Int("statusCode", resp.StatusCode).Int("attempt", attempt).Msg("Received non-2xx status code, retrying...")
87 | }
88 | return nil
89 | }
90 |
91 | // Close performs any necessary cleanup (no-op for WebhookSink)
92 | func (s *WebhookSink) Close() error {
93 | return nil
94 | }
95 |
--------------------------------------------------------------------------------
/pkg/utils/cdc_encoding.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "reflect"
7 | "strconv"
8 | "strings"
9 | "time"
10 |
11 | "github.com/jackc/pgx/v5/pgtype"
12 | )
13 |
14 | // ConvertToPgCompatibleOutput converts a Go value to its PostgreSQL output format.
15 | func ConvertToPgCompatibleOutput(value interface{}, oid uint32) ([]byte, error) {
16 | if value == nil {
17 | return nil, nil
18 | }
19 |
20 | switch oid {
21 | case pgtype.BoolOID:
22 | return strconv.AppendBool(nil, value.(bool)), nil
23 | case pgtype.Int2OID, pgtype.Int4OID, pgtype.Int8OID:
24 | switch v := value.(type) {
25 | case int:
26 | return []byte(strconv.FormatInt(int64(v), 10)), nil
27 | case int32:
28 | return []byte(strconv.FormatInt(int64(v), 10)), nil
29 | case int64:
30 | return []byte(strconv.FormatInt(v, 10)), nil
31 | default:
32 | return []byte(fmt.Sprintf("%d", value)), nil
33 | }
34 | case pgtype.Float4OID, pgtype.Float8OID:
35 | return []byte(strconv.FormatFloat(value.(float64), 'f', -1, 64)), nil
36 | case pgtype.NumericOID:
37 | return []byte(fmt.Sprintf("%v", value)), nil
38 | case pgtype.TextOID, pgtype.VarcharOID:
39 | return []byte(value.(string)), nil
40 | case pgtype.ByteaOID:
41 | if byteaData, ok := value.([]byte); ok {
42 | return byteaData, nil
43 | }
44 | return nil, fmt.Errorf("invalid bytea data type")
45 | case pgtype.TimestampOID, pgtype.TimestamptzOID:
46 | return []byte(value.(time.Time).Format(time.RFC3339Nano)), nil
47 | case pgtype.DateOID:
48 | return []byte(value.(time.Time).Format("2006-01-02")), nil
49 | case pgtype.JSONOID:
50 | switch v := value.(type) {
51 | case string:
52 | return []byte(v), nil
53 | case []byte:
54 | return v, nil
55 | default:
56 | return nil, fmt.Errorf("unsupported type for JSON data: %T", value)
57 | }
58 | case pgtype.JSONBOID:
59 | if jsonBytes, ok := value.([]byte); ok {
60 | return jsonBytes, nil
61 | }
62 | return json.Marshal(value)
63 | case pgtype.TextArrayOID, pgtype.VarcharArrayOID,
64 | pgtype.Int2ArrayOID, pgtype.Int4ArrayOID, pgtype.Int8ArrayOID,
65 | pgtype.Float4ArrayOID, pgtype.Float8ArrayOID, pgtype.BoolArrayOID:
66 | return EncodeArray(value)
67 | default:
68 | return []byte(fmt.Sprintf("%v", value)), nil
69 | }
70 | }
71 |
72 | // EncodeArray encodes a slice of values into a PostgreSQL array format.
73 | func EncodeArray(value interface{}) ([]byte, error) {
74 | var elements []string
75 |
76 | switch slice := value.(type) {
77 | case []interface{}:
78 | for _, v := range slice {
79 | elem, err := encodeArrayElement(v)
80 | if err != nil {
81 | return nil, err
82 | }
83 | elements = append(elements, elem)
84 | }
85 | case []string:
86 | elements = append(elements, slice...)
87 | case []int, []int32, []int64, []float32, []float64, []bool:
88 | sliceValue := reflect.ValueOf(slice)
89 | for i := 0; i < sliceValue.Len(); i++ {
90 | elem, err := encodeArrayElement(sliceValue.Index(i).Interface())
91 | if err != nil {
92 | return nil, err
93 | }
94 | elements = append(elements, elem)
95 | }
96 | default:
97 | return nil, fmt.Errorf("unsupported slice type: %T", value)
98 | }
99 |
100 | return []byte("{" + strings.Join(elements, ",") + "}"), nil
101 | }
102 |
103 | // encodeArrayElement encodes a single array element into a string representation.
104 | func encodeArrayElement(v interface{}) (string, error) {
105 | if v == nil {
106 | return "NULL", nil
107 | }
108 |
109 | switch val := v.(type) {
110 | case string:
111 | return val, nil
112 | case int, int32, int64, float32, float64:
113 | return fmt.Sprintf("%v", val), nil
114 | case bool:
115 | return strconv.FormatBool(val), nil
116 | case time.Time:
117 | return val.Format(time.RFC3339Nano), nil
118 | case []byte:
119 | return fmt.Sprintf("\\x%x", val), nil
120 | default:
121 | jsonBytes, err := json.Marshal(val)
122 | if err != nil {
123 | return "", fmt.Errorf("failed to marshal array element to JSON: %w", err)
124 | }
125 | return string(jsonBytes), nil
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/pkg/utils/cdc_message.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "bytes"
5 | "encoding/gob"
6 | "encoding/hex"
7 | "encoding/json"
8 | "fmt"
9 | "strconv"
10 | "strings"
11 | "time"
12 |
13 | "github.com/jackc/pglogrepl"
14 | "github.com/jackc/pgx/v5/pgtype"
15 | )
16 |
17 | // init registers types with the gob package for encoding/decoding
18 | func init() {
19 | gob.Register(json.RawMessage{})
20 | gob.Register(time.Time{})
21 | gob.Register(map[string]interface{}{})
22 | gob.Register(pglogrepl.RelationMessageColumn{})
23 | gob.Register(pglogrepl.LSN(0))
24 |
25 | gob.Register(CDCMessage{})
26 | gob.Register(pglogrepl.TupleData{})
27 | gob.Register(pglogrepl.TupleDataColumn{})
28 | }
29 |
30 | // CDCMessage represents a full message for Change Data Capture
31 | type CDCMessage struct {
32 | Type OperationType
33 | Schema string
34 | Table string
35 | Columns []*pglogrepl.RelationMessageColumn
36 | NewTuple *pglogrepl.TupleData
37 | OldTuple *pglogrepl.TupleData
38 | ReplicationKey ReplicationKey
39 | LSN string
40 | EmittedAt time.Time
41 | ToastedColumns map[string]bool
42 | }
43 |
44 | // MarshalBinary implements the encoding.BinaryMarshaler interface
45 | func (m CDCMessage) MarshalBinary() ([]byte, error) {
46 | return EncodeCDCMessage(m)
47 | }
48 |
49 | // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface
50 | func (m *CDCMessage) UnmarshalBinary(data []byte) error {
51 | decodedMessage, err := DecodeCDCMessage(data)
52 | if err != nil {
53 | return err
54 | }
55 | *m = *decodedMessage
56 | return nil
57 | }
58 |
59 | func (m *CDCMessage) GetColumnIndex(columnName string) int {
60 | for i, col := range m.Columns {
61 | if col.Name == columnName {
62 | return i
63 | }
64 | }
65 | return -1
66 | }
67 |
68 | // GetColumnValue gets a column value, optionally using old values for DELETE/UPDATE
69 | func (m *CDCMessage) GetColumnValue(columnName string, useOldValues bool) (interface{}, error) {
70 | colIndex := m.GetColumnIndex(columnName)
71 | if colIndex == -1 {
72 | return nil, fmt.Errorf("column %s not found", columnName)
73 | }
74 |
75 | var data []byte
76 | if useOldValues && m.OldTuple != nil {
77 | data = m.OldTuple.Columns[colIndex].Data
78 | } else if m.NewTuple != nil {
79 | data = m.NewTuple.Columns[colIndex].Data
80 | } else {
81 | return nil, fmt.Errorf("no data available for column %s", columnName)
82 | }
83 |
84 | return DecodeValue(data, m.Columns[colIndex].DataType)
85 | }
86 |
87 | // SetColumnValue sets the value of a column, respecting its type
88 | func (m *CDCMessage) SetColumnValue(columnName string, value interface{}) error {
89 | colIndex := m.GetColumnIndex(columnName)
90 | if colIndex == -1 {
91 | return fmt.Errorf("column %s not found", columnName)
92 | }
93 |
94 | column := m.Columns[colIndex]
95 | encodedValue, err := EncodeValue(value, column.DataType)
96 | if err != nil {
97 | return err
98 | }
99 |
100 | if m.Type == OperationDelete {
101 | m.OldTuple.Columns[colIndex] = &pglogrepl.TupleDataColumn{Data: encodedValue}
102 | } else {
103 | m.NewTuple.Columns[colIndex] = &pglogrepl.TupleDataColumn{Data: encodedValue}
104 | }
105 |
106 | return nil
107 | }
108 |
109 | // EncodeCDCMessage encodes a CDCMessage into a byte slice
110 | func EncodeCDCMessage(m CDCMessage) ([]byte, error) {
111 | var buf bytes.Buffer
112 | enc := gob.NewEncoder(&buf)
113 |
114 | if err := enc.Encode(m.Type); err != nil {
115 | return nil, err
116 | }
117 | if err := enc.Encode(m.Schema); err != nil {
118 | return nil, err
119 | }
120 | if err := enc.Encode(m.Table); err != nil {
121 | return nil, err
122 | }
123 | if err := enc.Encode(m.Columns); err != nil {
124 | return nil, err
125 | }
126 |
127 | if err := enc.Encode(m.NewTuple != nil); err != nil {
128 | return nil, err
129 | }
130 | if m.NewTuple != nil {
131 | if err := enc.Encode(m.NewTuple); err != nil {
132 | return nil, err
133 | }
134 | }
135 |
136 | if err := enc.Encode(m.OldTuple != nil); err != nil {
137 | return nil, err
138 | }
139 |
140 | if m.OldTuple != nil {
141 | if err := enc.Encode(m.OldTuple); err != nil {
142 | return nil, err
143 | }
144 | }
145 |
146 | if err := enc.Encode(m.ReplicationKey); err != nil {
147 | return nil, err
148 | }
149 |
150 | if err := enc.Encode(m.LSN); err != nil {
151 | return nil, err
152 | }
153 |
154 | if err := enc.Encode(m.EmittedAt); err != nil {
155 | return nil, err
156 | }
157 |
158 | if err := enc.Encode(m.ToastedColumns); err != nil {
159 | return nil, err
160 | }
161 |
162 | return buf.Bytes(), nil
163 | }
164 |
165 | // DecodeCDCMessage decodes a byte slice into a CDCMessage
166 | func DecodeCDCMessage(data []byte) (*CDCMessage, error) {
167 | buf := bytes.NewBuffer(data)
168 | dec := gob.NewDecoder(buf)
169 | m := &CDCMessage{}
170 |
171 | if err := dec.Decode(&m.Type); err != nil {
172 | return nil, err
173 | }
174 | if err := dec.Decode(&m.Schema); err != nil {
175 | return nil, err
176 | }
177 | if err := dec.Decode(&m.Table); err != nil {
178 | return nil, err
179 | }
180 | if err := dec.Decode(&m.Columns); err != nil {
181 | return nil, err
182 | }
183 |
184 | var newTupleExists bool
185 | if err := dec.Decode(&newTupleExists); err != nil {
186 | return nil, err
187 | }
188 | if newTupleExists {
189 | m.NewTuple = &pglogrepl.TupleData{}
190 | if err := dec.Decode(m.NewTuple); err != nil {
191 | return nil, err
192 | }
193 | }
194 |
195 | var oldTupleExists bool
196 | if err := dec.Decode(&oldTupleExists); err != nil {
197 | return nil, err
198 | }
199 | if oldTupleExists {
200 | m.OldTuple = &pglogrepl.TupleData{}
201 | if err := dec.Decode(m.OldTuple); err != nil {
202 | return nil, err
203 | }
204 | }
205 |
206 | if err := dec.Decode(&m.ReplicationKey); err != nil {
207 | return nil, err
208 | }
209 |
210 | if err := dec.Decode(&m.LSN); err != nil {
211 | return nil, err
212 | }
213 |
214 | if err := dec.Decode(&m.EmittedAt); err != nil {
215 | return nil, err
216 | }
217 |
218 | if err := dec.Decode(&m.ToastedColumns); err != nil {
219 | return nil, err
220 | }
221 |
222 | return m, nil
223 | }
224 |
225 | // DecodeValue decodes a byte slice into a Go value based on the PostgreSQL data type
226 | func DecodeValue(data []byte, dataType uint32) (interface{}, error) {
227 | if data == nil {
228 | return nil, nil
229 | }
230 | strData := string(data)
231 | switch dataType {
232 | case pgtype.BoolOID:
233 | return strconv.ParseBool(string(data))
234 | case pgtype.Int2OID, pgtype.Int4OID, pgtype.Int8OID:
235 | return strconv.ParseInt(string(data), 10, 64)
236 | case pgtype.Float4OID, pgtype.Float8OID:
237 | if strings.EqualFold(strData, "NULL") {
238 | return nil, nil
239 | }
240 | return strconv.ParseFloat(strData, 64)
241 | case pgtype.NumericOID:
242 | return string(data), nil
243 | case pgtype.TextOID, pgtype.VarcharOID:
244 | return string(data), nil
245 | case pgtype.ByteaOID:
246 | if strings.HasPrefix(strData, "\\x") {
247 | hexString := strData[2:]
248 | byteData, err := hex.DecodeString(hexString)
249 | if err != nil {
250 | return nil, fmt.Errorf("failed to decode bytea hex string: %v", err)
251 | }
252 | return byteData, nil
253 | }
254 | return data, nil
255 | case pgtype.TimestampOID, pgtype.TimestamptzOID:
256 | return ParseTimestamp(string(data))
257 | case pgtype.DateOID:
258 | return time.Parse("2006-01-02", string(data))
259 | case pgtype.JSONOID:
260 | return string(data), nil
261 | case pgtype.JSONBOID:
262 | var result interface{}
263 | err := json.Unmarshal(data, &result)
264 | return result, err
265 | case pgtype.TextArrayOID, pgtype.VarcharArrayOID:
266 | return DecodeTextArray(data)
267 | case pgtype.Int2ArrayOID, pgtype.Int4ArrayOID, pgtype.Int8ArrayOID, pgtype.Float4ArrayOID, pgtype.Float8ArrayOID, pgtype.BoolArrayOID:
268 | return DecodeArray(data, dataType)
269 | default:
270 | return string(data), nil
271 | }
272 | }
273 |
274 | // DecodeTextArray decodes a PostgreSQL text array into a []string
275 | func DecodeTextArray(data []byte) ([]string, error) {
276 | if len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
277 | return nil, fmt.Errorf("invalid array format")
278 | }
279 | elements := strings.Split(string(data[1:len(data)-1]), ",")
280 | for i, elem := range elements {
281 | elements[i] = strings.Trim(elem, "\"")
282 | }
283 | return elements, nil
284 | }
285 |
286 | // DecodeArray decodes a PostgreSQL array into a slice of the appropriate type
287 | func DecodeArray(data []byte, dataType uint32) (interface{}, error) {
288 | if len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
289 | return nil, fmt.Errorf("invalid array format")
290 | }
291 | elements := strings.Split(string(data[1:len(data)-1]), ",")
292 |
293 | switch dataType {
294 | case pgtype.Int2ArrayOID, pgtype.Int4ArrayOID, pgtype.Int8ArrayOID:
295 | result := make([]interface{}, len(elements))
296 | for i, elem := range elements {
297 | if elem == "NULL" {
298 | result[i] = nil
299 | continue
300 | }
301 | val, err := strconv.ParseInt(elem, 10, 64)
302 | if err != nil {
303 | return nil, err
304 | }
305 | result[i] = val
306 | }
307 | return result, nil
308 | case pgtype.Float4ArrayOID, pgtype.Float8ArrayOID:
309 | result := make([]interface{}, len(elements))
310 | for i, elem := range elements {
311 | if elem == "NULL" {
312 | result[i] = nil
313 | continue
314 | }
315 | val, err := strconv.ParseFloat(elem, 64)
316 | if err != nil {
317 | return nil, err
318 | }
319 | result[i] = val
320 | }
321 | return result, nil
322 | case pgtype.BoolArrayOID:
323 | result := make([]interface{}, len(elements))
324 | for i, elem := range elements {
325 | if elem == "NULL" {
326 | result[i] = nil
327 | continue
328 | }
329 | val, err := strconv.ParseBool(elem)
330 | if err != nil {
331 | return nil, err
332 | }
333 | result[i] = val
334 | }
335 | return result, nil
336 | default:
337 | return elements, nil
338 | }
339 | }
340 |
341 | // EncodeValue encodes a Go value into a byte slice based on the PostgreSQL data type
342 | func EncodeValue(value interface{}, dataType uint32) ([]byte, error) {
343 | return ConvertToPgCompatibleOutput(value, dataType)
344 | }
345 |
346 | // IsColumnToasted checks if a column was TOASTed
347 | func (m *CDCMessage) IsColumnToasted(columnName string) bool {
348 | return m.ToastedColumns[columnName]
349 | }
350 |
--------------------------------------------------------------------------------
/pkg/utils/retry.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "context"
5 | "time"
6 | )
7 |
8 | type RetryConfig struct {
9 | MaxAttempts int
10 | InitialWait time.Duration
11 | MaxWait time.Duration
12 | }
13 |
14 | func WithRetry(ctx context.Context, cfg RetryConfig, operation func() error) error {
15 | wait := cfg.InitialWait
16 | for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ {
17 | err := operation()
18 | if err == nil {
19 | return nil
20 | }
21 |
22 | if attempt == cfg.MaxAttempts {
23 | return err
24 | }
25 |
26 | select {
27 | case <-ctx.Done():
28 | return ctx.Err()
29 | case <-time.After(wait):
30 | // Exponential backoff with max wait
31 | wait *= 2
32 | if wait > cfg.MaxWait {
33 | wait = cfg.MaxWait
34 | }
35 | }
36 | }
37 | return nil
38 | }
39 |
--------------------------------------------------------------------------------
/pkg/utils/shared.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "fmt"
5 | "reflect"
6 | "strconv"
7 | "time"
8 |
9 | "github.com/jackc/pgtype"
10 | )
11 |
12 | // ParseTimestamp attempts to parse a timestamp string using multiple layouts
13 | func ParseTimestamp(value string) (time.Time, error) {
14 | layouts := []string{
15 | time.RFC3339Nano,
16 | "2006-01-02 15:04:05.999999-07",
17 | "2006-01-02 15:04:05.999999Z07:00",
18 | "2006-01-02 15:04:05.999999",
19 | "2006-01-02T15:04:05.999999Z",
20 | "2006-01-02 15:04:05",
21 | "2006-01-02T15:04:05Z",
22 | }
23 |
24 | for _, layout := range layouts {
25 | if t, err := time.Parse(layout, value); err == nil {
26 | return t, nil
27 | }
28 | }
29 |
30 | return time.Time{}, fmt.Errorf("unable to parse timestamp: %s", value)
31 | }
32 |
33 | // OidToTypeName maps PostgreSQL OIDs to their corresponding type names
34 | var OidToTypeName = map[uint32]string{
35 | pgtype.BoolOID: "bool",
36 | pgtype.ByteaOID: "bytea",
37 | pgtype.Int8OID: "int8",
38 | pgtype.Int2OID: "int2",
39 | pgtype.Int4OID: "int4",
40 | pgtype.TextOID: "text",
41 | pgtype.JSONOID: "json",
42 | pgtype.Float4OID: "float4",
43 | pgtype.Float8OID: "float8",
44 | pgtype.BoolArrayOID: "bool[]",
45 | pgtype.Int2ArrayOID: "int2[]",
46 | pgtype.Int4ArrayOID: "int4[]",
47 | pgtype.TextArrayOID: "text[]",
48 | pgtype.ByteaArrayOID: "bytea[]",
49 | pgtype.Int8ArrayOID: "int8[]",
50 | pgtype.Float4ArrayOID: "float4[]",
51 | pgtype.Float8ArrayOID: "float8[]",
52 | pgtype.BPCharOID: "bpchar",
53 | pgtype.VarcharOID: "varchar",
54 | pgtype.DateOID: "date",
55 | pgtype.TimeOID: "time",
56 | pgtype.TimestampOID: "timestamp",
57 | pgtype.TimestampArrayOID: "timestamp[]",
58 | pgtype.DateArrayOID: "date[]",
59 | pgtype.TimestamptzOID: "timestamptz",
60 | pgtype.TimestamptzArrayOID: "timestamptz[]",
61 | pgtype.IntervalOID: "interval",
62 | pgtype.NumericArrayOID: "numeric[]",
63 | pgtype.BitOID: "bit",
64 | pgtype.VarbitOID: "varbit",
65 | pgtype.NumericOID: "numeric",
66 | pgtype.UUIDOID: "uuid",
67 | pgtype.UUIDArrayOID: "uuid[]",
68 | pgtype.JSONBOID: "jsonb",
69 | pgtype.JSONBArrayOID: "jsonb[]",
70 | }
71 |
72 | // OIDToString converts a PostgreSQL OID to its string representation
73 | func OIDToString(oid uint32) string {
74 | if typeName, ok := OidToTypeName[oid]; ok {
75 | return typeName
76 | }
77 | return fmt.Sprintf("unknown_%d", oid)
78 | }
79 |
80 | // StringToOID converts a type name to its PostgreSQL OID
81 | func StringToOID(typeName string) uint32 {
82 | for oid, name := range OidToTypeName {
83 | if name == typeName {
84 | return oid
85 | }
86 | }
87 | return 0
88 | }
89 |
90 | // ToInt64 converts an interface{} to int64
91 | func ToInt64(v interface{}) (int64, bool) {
92 | switch v := v.(type) {
93 | case int, int8, int16, int32, int64:
94 | return reflect.ValueOf(v).Int(), true
95 | case uint, uint8, uint16, uint32, uint64:
96 | return int64(reflect.ValueOf(v).Uint()), true
97 | case string:
98 | if i, err := strconv.ParseInt(v, 10, 64); err == nil {
99 | return i, true
100 | }
101 | }
102 | return 0, false
103 | }
104 |
105 | // ToFloat64 converts an interface{} to float64
106 | func ToFloat64(v interface{}) (float64, bool) {
107 | switch v := v.(type) {
108 | case int, int8, int16, int32, int64:
109 | return float64(reflect.ValueOf(v).Int()), true
110 | case uint, uint8, uint16, uint32, uint64:
111 | return float64(reflect.ValueOf(v).Uint()), true
112 | case float32, float64:
113 | return reflect.ValueOf(v).Float(), true
114 | case string:
115 | if f, err := strconv.ParseFloat(v, 64); err == nil {
116 | return f, true
117 | }
118 | }
119 | return 0, false
120 | }
121 |
122 | // ToBool converts various types to bool
123 | func ToBool(v interface{}) (bool, bool) {
124 | switch v := v.(type) {
125 | case bool:
126 | return v, true
127 | case string:
128 | if v == "true" || v == "1" {
129 | return true, true
130 | }
131 | if v == "false" || v == "0" {
132 | return false, true
133 | }
134 | case int, int8, int16, int32, int64:
135 | return reflect.ValueOf(v).Int() != 0, true
136 | case uint, uint8, uint16, uint32, uint64:
137 | return reflect.ValueOf(v).Uint() != 0, true
138 | case float32, float64:
139 | return reflect.ValueOf(v).Float() != 0, true
140 | }
141 | return false, false
142 | }
143 |
144 | // IsValid checks if the replication key is properly configured
145 | func (rk *ReplicationKey) IsValid() bool {
146 | if rk.Type == ReplicationKeyFull {
147 | return true // FULL doesn't require specific columns
148 | }
149 |
150 | return len(rk.Columns) > 0 &&
151 | (rk.Type == ReplicationKeyPK || rk.Type == ReplicationKeyUnique)
152 | }
153 |
--------------------------------------------------------------------------------
/pkg/utils/shared_types.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | // OperationType represents the type of database operation
4 | type OperationType string
5 |
6 | const (
7 | OperationInsert OperationType = "INSERT"
8 | OperationUpdate OperationType = "UPDATE"
9 | OperationDelete OperationType = "DELETE"
10 | OperationDDL OperationType = "DDL"
11 | )
12 |
13 | // ReplicationKeyType represents the type of replication key
14 | type ReplicationKeyType string
15 |
16 | const (
17 | ReplicationKeyPK ReplicationKeyType = "PRIMARY KEY"
18 | ReplicationKeyUnique ReplicationKeyType = "UNIQUE"
19 | ReplicationKeyFull ReplicationKeyType = "FULL" // Replica identity full
20 | )
21 |
22 | // ReplicationKey represents a key used for replication (either PK or unique constraint)
23 | type ReplicationKey struct {
24 | Type ReplicationKeyType
25 | Columns []string
26 | }
27 |
28 | type Logger interface {
29 | Debug() LogEvent
30 | Info() LogEvent
31 | Warn() LogEvent
32 | Error() LogEvent
33 | Err(err error) LogEvent
34 | }
35 |
36 | type LogEvent interface {
37 | Str(key, val string) LogEvent
38 | Int(key string, val int) LogEvent
39 | Int64(key string, val int64) LogEvent
40 | Uint8(key string, val uint8) LogEvent
41 | Uint32(key string, val uint32) LogEvent
42 | Interface(key string, val interface{}) LogEvent
43 | Err(err error) LogEvent
44 | Strs(key string, vals []string) LogEvent
45 | Any(key string, val interface{}) LogEvent
46 | Type(key string, val interface{}) LogEvent
47 | Msg(msg string)
48 | Msgf(format string, v ...interface{})
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/utils/zerolog_logger.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "github.com/rs/zerolog"
5 | )
6 |
7 | type ZerologLogger struct {
8 | logger zerolog.Logger
9 | }
10 |
11 | func NewZerologLogger(logger zerolog.Logger) Logger {
12 | return &ZerologLogger{logger: logger}
13 | }
14 |
15 | type ZerologLogEvent struct {
16 | event *zerolog.Event
17 | }
18 |
19 | func (z *ZerologLogger) Debug() LogEvent {
20 | return &ZerologLogEvent{event: z.logger.Debug()}
21 | }
22 |
23 | func (z *ZerologLogger) Info() LogEvent {
24 | return &ZerologLogEvent{event: z.logger.Info()}
25 | }
26 |
27 | func (z *ZerologLogger) Warn() LogEvent {
28 | return &ZerologLogEvent{event: z.logger.Warn()}
29 | }
30 |
31 | func (z *ZerologLogger) Error() LogEvent {
32 | return &ZerologLogEvent{event: z.logger.Error()}
33 | }
34 |
35 | func (z *ZerologLogger) Err(err error) LogEvent {
36 | return &ZerologLogEvent{event: z.logger.Err(err)}
37 | }
38 |
39 | func (e *ZerologLogEvent) Str(key, val string) LogEvent {
40 | e.event = e.event.Str(key, val)
41 | return e
42 | }
43 |
44 | func (e *ZerologLogEvent) Int(key string, val int) LogEvent {
45 | e.event = e.event.Int(key, val)
46 | return e
47 | }
48 |
49 | func (e *ZerologLogEvent) Int64(key string, val int64) LogEvent {
50 | e.event = e.event.Int64(key, val)
51 | return e
52 | }
53 |
54 | func (e *ZerologLogEvent) Uint32(key string, val uint32) LogEvent {
55 | e.event = e.event.Uint32(key, val)
56 | return e
57 | }
58 |
59 | func (e *ZerologLogEvent) Interface(key string, val interface{}) LogEvent {
60 | e.event = e.event.Interface(key, val)
61 | return e
62 | }
63 |
64 | func (e *ZerologLogEvent) Err(err error) LogEvent {
65 | e.event = e.event.Err(err)
66 | return e
67 | }
68 |
69 | func (e *ZerologLogEvent) Msg(msg string) {
70 | e.event.Msg(msg)
71 | }
72 |
73 | func (e *ZerologLogEvent) Msgf(format string, v ...interface{}) {
74 | e.event.Msgf(format, v...)
75 | }
76 |
77 | func (e *ZerologLogEvent) Strs(key string, vals []string) LogEvent {
78 | e.event = e.event.Strs(key, vals)
79 | return e
80 | }
81 |
82 | func (e *ZerologLogEvent) Any(key string, val interface{}) LogEvent {
83 | e.event = e.event.Interface(key, val)
84 | return e
85 | }
86 |
87 | func (e *ZerologLogEvent) Uint8(key string, val uint8) LogEvent {
88 | e.event = e.event.Uint8(key, val)
89 | return e
90 | }
91 |
92 | func (e *ZerologLogEvent) Type(key string, val interface{}) LogEvent {
93 | e.event = e.event.Type(key, val)
94 | return e
95 | }
96 |
--------------------------------------------------------------------------------
/pkg/worker/worker.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "os"
8 | "sync"
9 | "time"
10 |
11 | "github.com/nats-io/nats.go" // Use the standard NATS package
12 | "github.com/pgflo/pg_flo/pkg/pgflonats"
13 | "github.com/pgflo/pg_flo/pkg/routing"
14 | "github.com/pgflo/pg_flo/pkg/rules"
15 | "github.com/pgflo/pg_flo/pkg/sinks"
16 | "github.com/pgflo/pg_flo/pkg/utils"
17 | "github.com/rs/zerolog"
18 | "github.com/rs/zerolog/log"
19 | )
20 |
21 | // Worker represents a worker that processes messages from NATS.
22 | type Worker struct {
23 | natsClient *pgflonats.NATSClient
24 | ruleEngine *rules.RuleEngine
25 | router *routing.Router
26 | sink sinks.Sink
27 | group string
28 | logger utils.Logger
29 | batchSize int
30 | buffer []*utils.CDCMessage
31 | lastSavedState uint64
32 | flushInterval time.Duration
33 | shutdownCh chan struct{}
34 | wg sync.WaitGroup
35 | }
36 |
37 | // Option is a function type that modifies Worker configuration
38 | type Option func(*Worker)
39 |
40 | // WithBatchSize sets the batch size for the worker
41 | func WithBatchSize(size int) Option {
42 | return func(w *Worker) {
43 | w.batchSize = size
44 | }
45 | }
46 |
47 | func init() {
48 | log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout, TimeFormat: "15:04:05.000"})
49 | zerolog.TimeFieldFormat = "2006-01-02T15:04:05.000Z07:00"
50 | }
51 |
52 | // NewWorker creates and returns a new Worker instance with the provided NATS client
53 | func NewWorker(natsClient *pgflonats.NATSClient, ruleEngine *rules.RuleEngine, router *routing.Router, sink sinks.Sink, group string, opts ...Option) *Worker {
54 | logger := utils.NewZerologLogger(log.With().Str("component", "worker").Logger())
55 |
56 | w := &Worker{
57 | natsClient: natsClient,
58 | ruleEngine: ruleEngine,
59 | router: router,
60 | sink: sink,
61 | group: group,
62 | logger: logger,
63 | batchSize: 1000,
64 | buffer: make([]*utils.CDCMessage, 0, 1000),
65 | lastSavedState: 0,
66 | flushInterval: 500 * time.Millisecond,
67 | shutdownCh: make(chan struct{}),
68 | }
69 |
70 | for _, opt := range opts {
71 | opt(w)
72 | }
73 | w.buffer = make([]*utils.CDCMessage, 0, w.batchSize)
74 |
75 | return w
76 | }
77 |
78 | // Start begins the worker's message processing loop, setting up the NATS consumer and processing messages.
79 | func (w *Worker) Start(ctx context.Context) error {
80 | stream := fmt.Sprintf("pgflo_%s_stream", w.group)
81 | subject := fmt.Sprintf("pgflo.%s", w.group)
82 |
83 | w.logger.Info().
84 | Str("stream", stream).
85 | Str("subject", subject).
86 | Str("group", w.group).
87 | Msg("Starting worker")
88 |
89 | js := w.natsClient.JetStream()
90 |
91 | consumerName := fmt.Sprintf("pgflo_%s_consumer", w.group)
92 |
93 | consumerConfig := &nats.ConsumerConfig{
94 | Durable: consumerName,
95 | FilterSubject: subject,
96 | AckPolicy: nats.AckExplicitPolicy,
97 | MaxDeliver: 1,
98 | AckWait: 25 * time.Minute,
99 | DeliverPolicy: nats.DeliverAllPolicy,
100 | }
101 |
102 | _, err := js.AddConsumer(stream, consumerConfig)
103 | if err != nil && !errors.Is(err, nats.ErrConsumerNameAlreadyInUse) {
104 | w.logger.Error().Err(err).Msg("Failed to add or update consumer")
105 | return fmt.Errorf("failed to add or update consumer: %w", err)
106 | }
107 |
108 | sub, err := js.PullSubscribe(subject, consumerName)
109 | if err != nil {
110 | w.logger.Error().Err(err).Msg("Failed to subscribe to subject")
111 | return fmt.Errorf("failed to subscribe to subject: %w", err)
112 | }
113 |
114 | w.wg.Add(1)
115 | go func() {
116 | defer w.wg.Done()
117 | if err := w.processMessages(ctx, sub); err != nil && err != context.Canceled {
118 | w.logger.Error().Err(err).Msg("Error processing messages")
119 | }
120 | }()
121 |
122 | <-ctx.Done()
123 | w.logger.Info().Msg("Received shutdown signal. Initiating graceful shutdown...")
124 |
125 | w.wg.Wait()
126 | w.logger.Debug().Msg("All goroutines finished")
127 |
128 | return w.flushBuffer()
129 | }
130 |
131 | // processMessages continuously processes messages from the NATS consumer.
132 | func (w *Worker) processMessages(ctx context.Context, sub *nats.Subscription) error {
133 | flushTicker := time.NewTicker(w.flushInterval)
134 | defer flushTicker.Stop()
135 |
136 | for {
137 | select {
138 | case <-ctx.Done():
139 | w.logger.Info().Msg("Flushing remaining messages")
140 | return w.flushBuffer()
141 | case <-flushTicker.C:
142 | if err := w.flushBuffer(); err != nil {
143 | w.logger.Error().Err(err).Msg("Failed to flush buffer on interval")
144 | }
145 | default:
146 | msgs, err := sub.Fetch(10, nats.MaxWait(500*time.Millisecond))
147 | if err != nil && !errors.Is(err, nats.ErrTimeout) {
148 | w.logger.Error().Err(err).Msg("Error fetching messages")
149 | continue
150 | }
151 |
152 | for _, msg := range msgs {
153 | if err := w.processMessage(msg); err != nil {
154 | w.logger.Error().Err(err).Msg("Failed to process message")
155 | }
156 | if err := msg.Ack(); err != nil {
157 | w.logger.Error().Err(err).Msg("Failed to acknowledge message")
158 | }
159 | }
160 | if len(w.buffer) >= w.batchSize {
161 | if err := w.flushBuffer(); err != nil {
162 | w.logger.Error().Err(err).Msg("Failed to flush buffer")
163 | }
164 | }
165 | }
166 | }
167 | }
168 |
169 | // processMessage handles a single message, applying rules, writing to the sink, and updating the last processed sequence.
170 | func (w *Worker) processMessage(msg *nats.Msg) error {
171 | metadata, err := msg.Metadata()
172 | if err != nil {
173 | w.logger.Error().Err(err).Msg("Failed to get message metadata")
174 | return err
175 | }
176 |
177 | var cdcMessage utils.CDCMessage
178 | err = cdcMessage.UnmarshalBinary(msg.Data)
179 | if err != nil {
180 | w.logger.Error().Err(err).Msg("Failed to unmarshal message")
181 | return err
182 | }
183 |
184 | if w.ruleEngine != nil {
185 | processedMessage, err := w.ruleEngine.ApplyRules(&cdcMessage)
186 | if err != nil {
187 | w.logger.Error().Err(err).Msg("Failed to apply rules")
188 | return err
189 | }
190 | if processedMessage == nil {
191 | w.logger.Debug().Msg("Message filtered out by rules")
192 | return nil
193 | }
194 | cdcMessage = *processedMessage
195 | }
196 |
197 | if w.router != nil {
198 | routedMessage, err := w.router.ApplyRouting(&cdcMessage)
199 | if err != nil {
200 | w.logger.Error().Err(err).Msg("Failed to apply routing")
201 | return err
202 | }
203 | if routedMessage == nil {
204 | w.logger.Debug().Msg("Message filtered out by routing")
205 | return nil
206 | }
207 | cdcMessage = *routedMessage
208 | }
209 |
210 | w.buffer = append(w.buffer, &cdcMessage)
211 | w.lastSavedState = metadata.Sequence.Stream
212 |
213 | return nil
214 | }
215 |
216 | // flushBuffer writes the buffered messages to the sink and updates the last processed sequence.
217 | func (w *Worker) flushBuffer() error {
218 | if len(w.buffer) == 0 {
219 | return nil
220 | }
221 |
222 | w.logger.Debug().
223 | Int("messages", len(w.buffer)).
224 | Int("batch_size", w.batchSize).
225 | Msg("Flushing buffer")
226 |
227 | err := w.sink.WriteBatch(w.buffer)
228 | if err != nil {
229 | w.logger.Error().Err(err).Msg("Failed to write batch to sink")
230 | return err
231 | }
232 |
233 | state, err := w.natsClient.GetState()
234 | if err != nil {
235 | w.logger.Error().Err(err).Msg("Failed to get current state")
236 | return err
237 | }
238 |
239 | state.LastProcessedSeq[w.group] = w.lastSavedState
240 | if err := w.natsClient.SaveState(state); err != nil {
241 | w.logger.Error().Err(err).Msg("Failed to save state")
242 | return err
243 | }
244 |
245 | w.buffer = w.buffer[:0]
246 | return nil
247 | }
248 |
--------------------------------------------------------------------------------