├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   ├── integration.yml
    │   └── release-and-docker.yml
├── .gitignore
├── .golangci.yml
├── .goreleaser.yaml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── cmd
    └── root.go
├── go.mod
├── go.sum
├── internal
    ├── docker-compose.yml
    ├── examples
    │   └── README.md
    ├── how-it-works.md
    ├── nats-server.conf
    ├── pg-flo.yaml
    ├── pg_flo_logo.png
    └── scripts
    │   ├── e2e_common.sh
    │   ├── e2e_copy_and_stream.sh
    │   ├── e2e_copy_only.sh
    │   ├── e2e_ddl.sh
    │   ├── e2e_multi_tenant.sh
    │   ├── e2e_order_test.rb
    │   ├── e2e_postgres.sh
    │   ├── e2e_postgres_data_type.sh
    │   ├── e2e_postgres_uniqueness_test.rb
    │   ├── e2e_resume_test.rb
    │   ├── e2e_routing.sh
    │   ├── e2e_stream_only.sh
    │   ├── e2e_test_local.sh
    │   ├── e2e_transform_filter.sh
    │   ├── multi_tenant_rules.yml
    │   ├── rules.yml
    │   └── webhook_test.sh
├── main.go
└── pkg
    ├── pgflonats
        └── pgflonats.go
    ├── replicator
        ├── base_replicator.go
        ├── buffer.go
        ├── config.go
        ├── copy_and_stream_replicator.go
        ├── ddl_replicator.go
        ├── errors.go
        ├── factory.go
        ├── interfaces.go
        ├── json_encoder.go
        ├── replication_connection.go
        ├── standard_connection.go
        ├── stream_replicator.go
        ├── table_handling.go
        └── tests
        │   ├── base_replicator_test.go
        │   ├── buffer_test.go
        │   ├── copy_and_stream_replicator_test.go
        │   ├── ddl_replicator_test.go
        │   ├── json_encoder_test.go
        │   └── mocks_test.go
    ├── routing
        ├── README.md
        ├── router.go
        └── tests
        │   └── routing_test.go
    ├── rules
        ├── README.md
        ├── engine.go
        ├── rules.go
        ├── tests
        │   ├── engine_test.go
        │   ├── mocks_test.go
        │   └── rules_test.go
        └── types.go
    ├── sinks
        ├── README.md
        ├── file.go
        ├── postgres.go
        ├── shared.go
        ├── sink.go
        ├── stdout.go
        ├── types.go
        └── webhooks.go
    ├── utils
        ├── cdc_encoding.go
        ├── cdc_message.go
        ├── retry.go
        ├── shared.go
        ├── shared_types.go
        └── zerolog_logger.go
    └── worker
        └── worker.go


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "gomod" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   lint:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 | 
14 |       - name: Set up Go
15 |         uses: actions/setup-go@v4
16 |         with:
17 |           go-version: "1.21"
18 | 
19 |       - name: Install golangci-lint
20 |         run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.60.1
21 | 
22 |       - name: Lint
23 |         run: make lint
24 | 
25 |   test:
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - uses: actions/checkout@v4
29 | 
30 |       - name: Set up Go
31 |         uses: actions/setup-go@v4
32 |         with:
33 |           go-version: "1.21"
34 | 
35 |       - name: Test
36 |         run: make test
37 |   build:
38 |     needs: [lint, test]
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - uses: actions/checkout@v4
42 | 
43 |       - name: Set up Go
44 |         uses: actions/setup-go@v4
45 |         with:
46 |           go-version: "1.21"
47 | 
48 |       - name: Build
49 |         run: make build
50 | 
51 |       - name: Set up QEMU
52 |         uses: docker/setup-qemu-action@v3
53 | 
54 |       - name: Set up Docker Buildx
55 |         uses: docker/setup-buildx-action@v3
56 |         with:
57 |           buildkitd-flags: --debug
58 | 
59 |       - name: Set build timestamp
60 |         id: timestamp
61 |         run: echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT
62 | 
63 |       - name: Build Docker image
64 |         uses: docker/build-push-action@v5
65 |         with:
66 |           context: .
67 |           platforms: linux/amd64
68 |           push: false
69 |           load: true
70 |           tags: pg_flo:test
71 |           build-args: |
72 |             VERSION=${{ github.sha }}
73 |             COMMIT=${{ github.sha }}
74 |             DATE=${{ steps.timestamp.outputs.timestamp }}
75 | 
76 |       - name: Verify Docker image version
77 |         run: |
78 |           docker run --rm pg_flo:test version | grep ${{ github.sha }}
79 | 


--------------------------------------------------------------------------------
/.github/workflows/integration.yml:
--------------------------------------------------------------------------------
 1 | name: Integration Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 | 
 8 | permissions:
 9 |   contents: read
10 |   actions: write
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: Set up Go
19 |         uses: actions/setup-go@v4
20 |         with:
21 |           go-version: "1.21"
22 | 
23 |       - name: Build
24 |         run: make build
25 | 
26 |       - name: Upload binary
27 |         uses: actions/upload-artifact@v4
28 |         with:
29 |           name: pg_flo-binary
30 |           path: bin/pg_flo
31 | 
32 |   tests:
33 |     needs: build
34 |     runs-on: ubuntu-latest
35 |     strategy:
36 |       fail-fast: false
37 |       matrix:
38 |         test:
39 |           [
40 |             stream_only,
41 |             copy_only,
42 |             transform_filter,
43 |             ddl,
44 |             postgres,
45 |             postgres_data_type,
46 |             multi_tenant,
47 |             routing,
48 |             copy_and_stream,
49 |             order,
50 |             resume,
51 |             postgres_uniqueness,
52 |           ]
53 |     steps:
54 |       - uses: actions/checkout@v4
55 |       - name: Download binary
56 |         uses: actions/download-artifact@v4
57 |         with:
58 |           name: pg_flo-binary
59 |           path: bin
60 |       - name: Make binary executable
61 |         run: chmod +x bin/pg_flo
62 |       - name: Install dependencies
63 |         run: |
64 |           sudo apt-get update
65 |           sudo apt-get install -y postgresql-client jq ruby ruby-dev libpq-dev build-essential
66 |           sudo gem install pg
67 |       - name: Set up Docker Compose
68 |         run: |
69 |           sudo curl -L "https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
70 |           sudo chmod +x /usr/local/bin/docker-compose
71 |       - name: Run test
72 |         env:
73 |           PG_HOST: localhost
74 |           PG_PORT: 5433
75 |           PG_USER: myuser
76 |           PG_PASSWORD: mypassword!@#%1234
77 |           PG_DB: mydb
78 |           TARGET_PG_HOST: localhost
79 |           TARGET_PG_PORT: 5434
80 |           TARGET_PG_USER: targetuser
81 |           TARGET_PG_PASSWORD: targetpassword!@#1234
82 |           TARGET_PG_DB: targetdb
83 |         run: |
84 |           docker-compose -f internal/docker-compose.yml up -d
85 |           sleep 10
86 |           if [[ "${{ matrix.test }}" == "order" || "${{ matrix.test }}" == "resume" || "${{ matrix.test }}" == "postgres_uniqueness" ]]; then
87 |             ruby ./internal/scripts/e2e_${{ matrix.test }}_test.rb
88 |           else
89 |             ./internal/scripts/e2e_${{ matrix.test }}.sh
90 |           fi
91 |           docker-compose -f internal/docker-compose.yml down -v
92 | 


--------------------------------------------------------------------------------
/.github/workflows/release-and-docker.yml:
--------------------------------------------------------------------------------
  1 | name: Release and Docker
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - "v*"
  7 | 
  8 | permissions:
  9 |   contents: write
 10 |   packages: write
 11 | 
 12 | jobs:
 13 |   goreleaser:
 14 |     runs-on: ubuntu-latest
 15 |     steps:
 16 |       - name: Checkout
 17 |         uses: actions/checkout@v4
 18 |         with:
 19 |           fetch-depth: 0
 20 | 
 21 |       - name: Set up Go
 22 |         uses: actions/setup-go@v4
 23 |         with:
 24 |           go-version: "1.21"
 25 | 
 26 |       - name: Run GoReleaser
 27 |         uses: goreleaser/goreleaser-action@v5
 28 |         with:
 29 |           distribution: goreleaser
 30 |           version: latest
 31 |           args: release --clean
 32 |         env:
 33 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 34 | 
 35 |   docker:
 36 |     needs: goreleaser
 37 |     runs-on: ubuntu-latest
 38 |     steps:
 39 |       - name: Checkout
 40 |         uses: actions/checkout@v4
 41 | 
 42 |       - name: Set up QEMU
 43 |         uses: docker/setup-qemu-action@v3
 44 | 
 45 |       - name: Set up Docker Buildx
 46 |         uses: docker/setup-buildx-action@v3
 47 |         with:
 48 |           buildkitd-flags: --debug
 49 | 
 50 |       - name: Extract metadata (shayonj)
 51 |         id: meta_shayonj
 52 |         uses: docker/metadata-action@v5
 53 |         with:
 54 |           images: docker.io/shayonj/pg_flo
 55 |           tags: |
 56 |             type=semver,pattern={{version}}
 57 |             type=semver,pattern={{major}}.{{minor}}
 58 |             type=semver,pattern={{major}}
 59 | 
 60 |       - name: Login to DockerHub (shayonj)
 61 |         uses: docker/login-action@v3
 62 |         with:
 63 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
 64 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
 65 | 
 66 |       - name: Set build timestamp
 67 |         id: timestamp
 68 |         run: echo "timestamp=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT
 69 | 
 70 |       - name: Build and push (shayonj)
 71 |         uses: docker/build-push-action@v5
 72 |         with:
 73 |           context: .
 74 |           platforms: linux/amd64,linux/arm64
 75 |           push: true
 76 |           tags: ${{ steps.meta_shayonj.outputs.tags }}
 77 |           labels: ${{ steps.meta_shayonj.outputs.labels }}
 78 |           build-args: |
 79 |             VERSION=${{ github.ref_name }}
 80 |             COMMIT=${{ github.sha }}
 81 |             DATE=${{ steps.timestamp.outputs.timestamp }}
 82 | 
 83 |       - name: Extract metadata (pgflo)
 84 |         id: meta_pgflo
 85 |         uses: docker/metadata-action@v5
 86 |         with:
 87 |           images: docker.io/pgflo/pg_flo
 88 |           tags: |
 89 |             type=semver,pattern={{version}}
 90 |             type=semver,pattern={{major}}.{{minor}}
 91 |             type=semver,pattern={{major}}
 92 | 
 93 |       - name: Login to DockerHub (pgflo)
 94 |         uses: docker/login-action@v3
 95 |         with:
 96 |           username: ${{ secrets.PG_FLO_DOCKER_HUB_USERNAME }}
 97 |           password: ${{ secrets.PG_FLO_DOCKER_HUB_TOKEN }}
 98 | 
 99 |       - name: Build and push (pgflo)
100 |         uses: docker/build-push-action@v5
101 |         with:
102 |           context: .
103 |           platforms: linux/amd64,linux/arm64
104 |           push: true
105 |           tags: ${{ steps.meta_pgflo.outputs.tags }}
106 |           labels: ${{ steps.meta_pgflo.outputs.labels }}
107 |           build-args: |
108 |             VERSION=${{ github.ref_name }}
109 |             COMMIT=${{ github.sha }}
110 |             DATE=${{ steps.timestamp.outputs.timestamp }}
111 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 | 
17 | # Go workspace file
18 | go.work
19 | 
20 | # IDE-specific files
21 | .idea/
22 | .vscode/
23 | 
24 | # OS-specific files
25 | .DS_Store
26 | Thumbs.db
27 | 
28 | # Log files
29 | *.log
30 | 
31 | # Binary output directory
32 | /bin/
33 | 
34 | # Environment variables file
35 | .env
36 | 
37 | pg_flo
38 | 
39 | bin/
40 | coverage.txt
41 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | linters:
 2 |   enable:
 3 |     - gofmt
 4 |     - goimports
 5 |     - govet
 6 |     - errcheck
 7 |     - staticcheck
 8 |     - ineffassign
 9 |     - unconvert
10 |     - misspell
11 |     - gosec
12 |     - revive
13 | 
14 | linters-settings:
15 |   govet:
16 |     # Check-shadowing option removed
17 |   revive:
18 |     min-confidence: 0.8
19 |   gocyclo:
20 |     min-complexity: 15
21 |   maligned:
22 |     suggest-new: true
23 |   dupl:
24 |     threshold: 100
25 |   goconst:
26 |     min-len: 2
27 |     min-occurrences: 2
28 | 
29 | issues:
30 |   exclude-rules:
31 |     - path: _test\.go
32 |       linters:
33 |         - gocyclo
34 |         - errcheck
35 |         - dupl
36 |         - gosec
37 |   exclude-dirs:
38 |     - vendor/
39 |   exclude-files:
40 |     - ".*_test.go"
41 | 
42 | output:
43 |   formats: colored-line-number
44 |   print-issued-lines: true
45 |   print-linter-name: true
46 | 


--------------------------------------------------------------------------------
/.goreleaser.yaml:
--------------------------------------------------------------------------------
 1 | before:
 2 |   hooks:
 3 |     - go mod tidy
 4 | 
 5 | builds:
 6 |   - main: .
 7 |     env:
 8 |       - CGO_ENABLED=0
 9 |     goos:
10 |       - linux
11 |       - darwin
12 |     goarch:
13 |       - amd64
14 |       - arm64
15 |     ldflags:
16 |       - -s -w
17 |       - -X github.com/pgflo/pg_flo/cmd.version={{.Version}}
18 |       - -X github.com/pgflo/pg_flo/cmd.commit={{.Commit}}
19 |       - -X github.com/pgflo/pg_flo/cmd.date={{.Date}}
20 |     binary: pg_flo
21 | 
22 | archives:
23 |   - format: tar.gz
24 |     name_template: >-
25 |       {{ .ProjectName }}_
26 |       {{- title .Os }}_
27 |       {{- if eq .Arch "amd64" }}x86_64
28 |       {{- else }}{{ .Arch }}{{ end }}
29 |     format_overrides:
30 |       - goos: windows
31 |         format: zip
32 | 
33 | changelog:
34 |   sort: asc
35 |   filters:
36 |     exclude:
37 |       - "^docs:"
38 |       - "^test:"
39 |       - "^ci:"
40 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.21-alpine AS builder
 2 | RUN apk update && apk upgrade --no-cache
 3 | WORKDIR /app
 4 | COPY . .
 5 | ARG VERSION=dev
 6 | ARG COMMIT=none
 7 | ARG DATE=unknown
 8 | RUN CGO_ENABLED=0 GOOS=linux go build -v \
 9 |   -ldflags "-s -w \
10 |   -X 'github.com/pgflo/pg_flo/cmd.version=${VERSION}' \
11 |   -X 'github.com/pgflo/pg_flo/cmd.commit=${COMMIT}' \
12 |   -X 'github.com/pgflo/pg_flo/cmd.date=${DATE}'" \
13 |   -o pg_flo .
14 | 
15 | FROM alpine:latest
16 | RUN apk update && apk upgrade --no-cache && \
17 |   apk add --no-cache postgresql15-client
18 | COPY --from=builder /app/pg_flo /usr/local/bin/
19 | ENTRYPOINT ["pg_flo"]
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: test lint build clean
 2 | 
 3 | # Define the default goal
 4 | .DEFAULT_GOAL := build
 5 | 
 6 | # Build the application
 7 | build:
 8 | 	go build -o bin/pg_flo
 9 | 
10 | # Run tests with race detection and coverage
11 | test:
12 | 	go test -v -race -coverprofile=coverage.txt -covermode=atomic ./...
13 | 
14 | # Run linter
15 | lint:
16 | 	golangci-lint run --timeout=5m
17 | 
18 | # Clean build artifacts
19 | clean:
20 | 	rm -rf bin/ coverage.txt
21 | 
22 | # Run all checks (lint and test)
23 | check: lint test
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # <img src="internal/pg_flo_logo.png" alt="pg_flo logo" width="40" align="center"> pg_flo
  2 | 
  3 | [![CI](https://github.com/pgflo/pg_flo/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/pgflo/pg_flo/actions/workflows/ci.yml)
  4 | [![Integration](https://github.com/pgflo/pg_flo/actions/workflows/integration.yml/badge.svg?branch=main)](https://github.com/pgflo/pg_flo/actions/workflows/integration.yml)
  5 | [![Release](https://img.shields.io/github/v/release/pgflo/pg_flo?style=flat&color=#959DA5&sort=semver)](https://github.com/pgflo/pg_flo/releases/latest)
  6 | [![Docker Image](https://img.shields.io/docker/v/pgflo/pg_flo?style=flat&label=docker&color=#959DA5&label=docker&sort=semver)](https://hub.docker.com/r/pgflo/pg_flo/tags)
  7 | 
  8 | > The easiest way to move and transform data between PostgreSQL databases using Logical Replication.
  9 | 
 10 | ℹ️ `pg_flo` is in active development. The design and architecture is continuously improving. PRs/Issues are very much welcome 🙏
 11 | 
 12 | ## Key Features
 13 | 
 14 | - **Real-time Data Streaming** - Capture inserts, updates, deletes, and DDL changes in near real-time
 15 | - **Fast Initial Loads** - Parallel copy of existing data with automatic follow-up continuous replication
 16 | - **Powerful Transformations** - Filter and transform data on-the-fly ([see rules](pkg/rules/README.md))
 17 | - **Flexible Routing** - Route to different tables and remap columns ([see routing](pkg/routing/README.md))
 18 | - **Production Ready** - Supports resumable streaming, DDL tracking, and more
 19 | 
 20 | ## Common Use Cases
 21 | 
 22 | - Real-time data replication between PostgreSQL databases
 23 | - ETL pipelines with data transformation
 24 | - Data re-routing, masking and filtering
 25 | - Database migration with zero downtime
 26 | - Event streaming from PostgreSQL
 27 | 
 28 | [View detailed examples →](internal/examples/README.md)
 29 | 
 30 | ## Quick Start
 31 | 
 32 | ### Prerequisites
 33 | 
 34 | - Docker
 35 | - PostgreSQL database with `wal_level=logical`
 36 | 
 37 | ### 1. Install
 38 | 
 39 | ```shell
 40 | docker pull pgflo/pg_flo:latest
 41 | ```
 42 | 
 43 | ### 2. Configure
 44 | 
 45 | Choose one:
 46 | 
 47 | - Environment variables
 48 | - YAML configuration file ([example](internal/pg-flo.yaml))
 49 | - CLI flags
 50 | 
 51 | ### 3. Run
 52 | 
 53 | ```shell
 54 | # Start NATS server
 55 | docker run -d --name pg_flo_nats \
 56 |   --network host \
 57 |   -v /path/to/nats-server.conf:/etc/nats/nats-server.conf \
 58 |   nats:latest \
 59 |   -c /etc/nats/nats-server.conf
 60 | 
 61 | # Start replicator (using config file)
 62 | docker run -d --name pg_flo_replicator \
 63 |   --network host \
 64 |   -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
 65 |   pgflo/pg_flo:latest \
 66 |   replicator --config /etc/pg_flo/config.yaml
 67 | 
 68 | # Start worker
 69 | docker run -d --name pg_flo_worker \
 70 |   --network host \
 71 |   -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
 72 |   pgflo/pg_flo:latest \
 73 |   worker postgres --config /etc/pg_flo/config.yaml
 74 | ```
 75 | 
 76 | #### Example Configuration (config.yaml)
 77 | 
 78 | ```yaml
 79 | # Replicator settings
 80 | host: "localhost"
 81 | port: 5432
 82 | dbname: "myapp"
 83 | user: "replicator"
 84 | password: "secret"
 85 | group: "users"
 86 | tables:
 87 |   - "users"
 88 | 
 89 | # Worker settings (postgres sink)
 90 | target-host: "dest-db"
 91 | target-dbname: "myapp"
 92 | target-user: "writer"
 93 | target-password: "secret"
 94 | 
 95 | # Common settings
 96 | nats-url: "nats://localhost:4222"
 97 | ```
 98 | 
 99 | [View full configuration options →](internal/pg-flo.yaml)
100 | 
101 | ## Core Concepts
102 | 
103 | ### Architecture
104 | 
105 | pg_flo uses two main components:
106 | 
107 | - **Replicator**: Captures PostgreSQL changes via logical replication
108 | - **Worker**: Processes and routes changes through NATS
109 | 
110 | [Learn how it works →](internal/how-it-works.md)
111 | 
112 | ### Groups
113 | 
114 | Groups are used to:
115 | 
116 | - Identify replication processes
117 | - Isolate replication slots and publications
118 | - Run multiple instances on same database
119 | - Maintain state for resumability
120 | - Enable parallel processing
121 | 
122 | ```shell
123 | # Example: Separate groups for different tables
124 | pg_flo replicator --group users_orders --tables users,orders
125 | 
126 | pg_flo replicator --group products --tables products
127 | ```
128 | 
129 | ### Streaming Modes
130 | 
131 | 1. **Stream Only** (default)
132 |    - Real-time streaming of changes
133 | 
134 | ```shell
135 | pg_flo replicator --stream
136 | ```
137 | 
138 | 2. **Copy Only**
139 |    - One-time parallel copy of existing data
140 | 
141 | ```shell
142 | pg_flo replicator --copy --max-copy-workers-per-table 4
143 | ```
144 | 
145 | 3. **Copy and Stream**
146 |    - Initial parallel copy followed by continuous streaming
147 | 
148 | ```shell
149 | pg_flo replicator --copy-and-stream --max-copy-workers-per-table 4
150 | ```
151 | 
152 | ### Destinations
153 | 
154 | - **stdout**: Console output
155 | - **file**: File writing
156 | - **postgres**: Database replication
157 | - **webhook**: HTTP endpoints
158 | 
159 | [View destination details →](pkg/sinks/README.md)
160 | 
161 | ## Advanced Features
162 | 
163 | ### Message Routing
164 | 
165 | Routing configuration is defined in a separate YAML file:
166 | 
167 | ```yaml
168 | # routing.yaml
169 | users:
170 |   source_table: users
171 |   destination_table: customers
172 |   column_mappings:
173 |     - source: id
174 |       destination: customer_id
175 | ```
176 | 
177 | ```shell
178 | # Apply routing configuration
179 | pg_flo worker postgres --routing-config /path/to/routing.yaml
180 | ```
181 | 
182 | [Learn about routing →](pkg/routing/README.md)
183 | 
184 | ### Transformation Rules
185 | 
186 | Rules are defined in a separate YAML file:
187 | 
188 | ```yaml
189 | # rules.yaml
190 | users:
191 |   - type: exclude_columns
192 |     columns: [password, ssn]
193 |   - type: mask_columns
194 |     columns: [email]
195 | ```
196 | 
197 | ```shell
198 | # Apply transformation rules
199 | pg_flo worker file --rules-config /path/to/rules.yaml
200 | ```
201 | 
202 | [View transformation options →](pkg/rules/README.md)
203 | 
204 | ### Combined Example
205 | 
206 | ```shell
207 | pg_flo worker postgres --config /etc/pg_flo/config.yaml --routing-config routing.yaml --rules-config rules.yaml
208 | ```
209 | 
210 | ## Scaling Guide
211 | 
212 | Best practices:
213 | 
214 | - Run one worker per group
215 | - Use groups to replicate different tables independently
216 | - Scale horizontally using multiple groups
217 | 
218 | Example scaling setup:
219 | 
220 | ```shell
221 | # Group: sales
222 | pg_flo replicator --group sales --tables sales
223 | pg_flo worker postgres --group sales
224 | 
225 | # Group: inventory
226 | pg_flo replicator --group inventory --tables inventory
227 | pg_flo worker postgres --group inventory
228 | ```
229 | 
230 | ## Limits and Considerations
231 | 
232 | - NATS message size: 8MB (configurable)
233 | - One worker per group recommended
234 | - PostgreSQL logical replication prerequisites required
235 | - Tables must have one of the following for replication:
236 |   - Primary key
237 |   - Unique constraint with `NOT NULL` columns
238 |   - `REPLICA IDENTITY FULL` set
239 | 
240 | Example table configurations:
241 | 
242 | ```sql
243 | -- Using primary key (recommended)
244 | CREATE TABLE users (
245 |   id SERIAL PRIMARY KEY,
246 |   email TEXT,
247 |   name TEXT
248 | );
249 | 
250 | -- Using unique constraint
251 | CREATE TABLE orders (
252 |   order_id TEXT NOT NULL,
253 |   customer_id TEXT NOT NULL,
254 |   data JSONB,
255 |   CONSTRAINT orders_unique UNIQUE (order_id, customer_id)
256 | );
257 | ALTER TABLE orders REPLICA IDENTITY USING INDEX orders_unique;
258 | 
259 | -- Using all columns (higher overhead in terms of performance)
260 | CREATE TABLE audit_logs (
261 |   id SERIAL,
262 |   action TEXT,
263 |   data JSONB
264 | );
265 | ALTER TABLE audit_logs REPLICA IDENTITY FULL;
266 | ```
267 | 
268 | ## Development
269 | 
270 | ```shell
271 | make build
272 | make test
273 | make lint
274 | 
275 | # E2E tests
276 | ./internal/scripts/e2e_local.sh
277 | ```
278 | 
279 | ## Contributing
280 | 
281 | Contributions welcome! Please open an issue or submit a pull request.
282 | 
283 | ## License
284 | 
285 | Apache License 2.0. [View license →](LICENSE)
286 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/pgflo/pg_flo
 2 | 
 3 | go 1.21.5
 4 | 
 5 | require (
 6 | 	github.com/goccy/go-json v0.10.5
 7 | 	github.com/jackc/pglogrepl v0.0.0-20240307033717-828fbfe908e9
 8 | 	github.com/jackc/pgtype v1.14.4
 9 | 	github.com/jackc/pgx/v5 v5.7.2
10 | 	github.com/nats-io/nats.go v1.38.0
11 | 	github.com/rs/zerolog v1.33.0
12 | 	github.com/shopspring/decimal v1.4.0
13 | 	github.com/spf13/cobra v1.9.1
14 | 	github.com/spf13/pflag v1.0.6
15 | 	github.com/spf13/viper v1.19.0
16 | 	github.com/stretchr/testify v1.10.0
17 | 	gopkg.in/yaml.v2 v2.4.0
18 | )
19 | 
20 | require (
21 | 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
22 | 	github.com/fsnotify/fsnotify v1.7.0 // indirect
23 | 	github.com/hashicorp/hcl v1.0.0 // indirect
24 | 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
25 | 	github.com/jackc/pgio v1.0.0 // indirect
26 | 	github.com/jackc/pgpassfile v1.0.0 // indirect
27 | 	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
28 | 	github.com/jackc/puddle/v2 v2.2.2 // indirect
29 | 	github.com/klauspost/compress v1.17.9 // indirect
30 | 	github.com/magiconair/properties v1.8.7 // indirect
31 | 	github.com/mattn/go-colorable v0.1.13 // indirect
32 | 	github.com/mattn/go-isatty v0.0.20 // indirect
33 | 	github.com/mitchellh/mapstructure v1.5.0 // indirect
34 | 	github.com/nats-io/nkeys v0.4.9 // indirect
35 | 	github.com/nats-io/nuid v1.0.1 // indirect
36 | 	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
37 | 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
38 | 	github.com/sagikazarmark/locafero v0.6.0 // indirect
39 | 	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
40 | 	github.com/sourcegraph/conc v0.3.0 // indirect
41 | 	github.com/spf13/afero v1.11.0 // indirect
42 | 	github.com/spf13/cast v1.7.0 // indirect
43 | 	github.com/stretchr/objx v0.5.2 // indirect
44 | 	github.com/subosito/gotenv v1.6.0 // indirect
45 | 	go.uber.org/multierr v1.11.0 // indirect
46 | 	golang.org/x/crypto v0.31.0 // indirect
47 | 	golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
48 | 	golang.org/x/sync v0.10.0 // indirect
49 | 	golang.org/x/sys v0.28.0 // indirect
50 | 	golang.org/x/text v0.21.0 // indirect
51 | 	gopkg.in/ini.v1 v1.67.0 // indirect
52 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
53 | )
54 | 


--------------------------------------------------------------------------------
/internal/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.8"
 2 | 
 3 | services:
 4 |   postgres:
 5 |     image: postgres:14
 6 |     container_name: pg_logical_replication
 7 |     environment:
 8 |       POSTGRES_USER: myuser
 9 |       POSTGRES_PASSWORD: mypassword!@#%1234
10 |       POSTGRES_DB: mydb
11 |     volumes:
12 |       - postgres_data:/var/lib/postgresql/data
13 |     ports:
14 |       - "5433:5432"
15 |     command:
16 |       - "postgres"
17 |       - "-c"
18 |       - "wal_level=logical"
19 |       - "-c"
20 |       - "max_replication_slots=5"
21 |       - "-c"
22 |       - "max_wal_senders=5"
23 |     restart: unless-stopped
24 | 
25 |   target_postgres:
26 |     image: postgres:14
27 |     container_name: pg_target
28 |     environment:
29 |       POSTGRES_USER: targetuser
30 |       POSTGRES_PASSWORD: targetpassword!@#1234
31 |       POSTGRES_DB: targetdb
32 |     volumes:
33 |       - target_postgres_data:/var/lib/postgresql/data
34 |     ports:
35 |       - "5434:5432"
36 |     restart: unless-stopped
37 | 
38 |   nats:
39 |     image: nats:latest
40 |     container_name: pg_flo_nats
41 |     command: ["-c", "/etc/nats/nats-server.conf"]
42 |     volumes:
43 |       - ./nats-server.conf:/etc/nats/nats-server.conf
44 |       - nats_data:/data
45 |     ports:
46 |       - "4222:4222"
47 |       - "8222:8222"
48 |     restart: unless-stopped
49 | 
50 | volumes:
51 |   postgres_data:
52 |   target_postgres_data:
53 |   nats_data:
54 | 


--------------------------------------------------------------------------------
/internal/examples/README.md:
--------------------------------------------------------------------------------
  1 | # pg_flo Examples
  2 | 
  3 | This guide demonstrates common use cases for pg_flo with practical examples. For full configuration options, see the [example config file](../pg-flo.yaml).
  4 | 
  5 | ## Basic Replication
  6 | 
  7 | Simple database-to-database replication:
  8 | 
  9 | ```bash
 10 | # Start NATS server
 11 | docker run -d --name pg_flo_nats \
 12 |   --network host \
 13 |   -v /path/to/nats-server.conf:/etc/nats/nats-server.conf \
 14 |   nats:latest \
 15 |   -c /etc/nats/nats-server.conf
 16 | 
 17 | # Start replicator
 18 | docker run -d --name pg_flo_replicator \
 19 |   --network host \
 20 |   -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
 21 |   pgflo/pg_flo:latest \
 22 |   replicator --config /etc/pg_flo/config.yaml
 23 | 
 24 | # Start worker
 25 | docker run -d --name pg_flo_worker \
 26 |   --network host \
 27 |   -v /path/to/config.yaml:/etc/pg_flo/config.yaml \
 28 |   pgflo/pg_flo:latest \
 29 |   worker postgres --config /etc/pg_flo/config.yaml
 30 | ```
 31 | 
 32 | ## Data Masking and Transformation
 33 | 
 34 | Mask sensitive data during replication:
 35 | 
 36 | ```yaml
 37 | # rules.yaml
 38 | rules:
 39 |   - table: users
 40 |     type: transform
 41 |     column: email
 42 |     parameters:
 43 |       type: mask
 44 |       mask_char: "*"
 45 |     operations: [INSERT, UPDATE]
 46 |   - table: payments
 47 |     type: transform
 48 |     column: card_number
 49 |     parameters:
 50 |       type: regex_replace
 51 |       pattern: "(\d{12})(\d{4})"
 52 |       replacement: "************$2"
 53 | ```
 54 | 
 55 | ```bash
 56 | pg_flo worker postgres \
 57 |   --group sensitive_data \
 58 |   --rules-config /path/to/rules.yaml \
 59 |   # ... other postgres connection flags
 60 | ```
 61 | 
 62 | ## Custom Table Routing
 63 | 
 64 | Route and rename tables/columns:
 65 | 
 66 | ```yaml
 67 | # routing.yaml
 68 | users:
 69 |   source_table: users
 70 |   destination_table: customers
 71 |   column_mappings:
 72 |     - source: user_id
 73 |       destination: customer_id
 74 |     - source: created_at
 75 |       destination: signup_date
 76 |   operations:
 77 |     - INSERT
 78 |     - UPDATE
 79 | ```
 80 | 
 81 | ```bash
 82 | pg_flo worker postgres \
 83 |   --group user_migration \
 84 |   --routing-config /path/to/routing.yaml \
 85 |   # ... other config flags
 86 | ```
 87 | 
 88 | ## Initial Load Options
 89 | 
 90 | ### Copy Only (One-time Data Copy)
 91 | 
 92 | Copy existing data without streaming changes:
 93 | 
 94 | ```bash
 95 | pg_flo replicator \
 96 |   --copy \
 97 |   --max-copy-workers-per-table 4 \
 98 |   --group initial_load \
 99 |    # ... other config flags
100 | ```
101 | 
102 | ### Copy and Stream
103 | 
104 | Perform parallel initial data load followed by continuous streaming:
105 | 
106 | ```bash
107 | pg_flo replicator \
108 |   --copy-and-stream \
109 |   --max-copy-workers-per-table 4 \
110 |   --group full_sync \
111 |    # ... other config flags
112 | ```
113 | 
114 | ## Multi-Destination Pipeline
115 | 
116 | Stream changes to multiple destinations simultaneously:
117 | 
118 | ```bash
119 | # Terminal 1: Stream to PostgreSQL
120 | pg_flo worker postgres \
121 |   --group audit \
122 |    # ... other config flags
123 | 
124 | # Terminal 2: Stream to files for archival
125 | pg_flo worker file \
126 |   --group audit \
127 |   --file-output-dir /archive/changes
128 | 
129 | # Terminal 3: Stream to webhook for external processing
130 | pg_flo worker webhook \
131 |   --group audit \
132 |   --webhook-url https://api.example.com/changes \
133 |   --webhook-batch-size 100
134 | ```
135 | 
136 | ## Schema Tracking
137 | 
138 | Enable DDL tracking to capture schema changes. DDLs are applied on the destination as they arrive:
139 | 
140 | ```bash
141 | pg_flo replicator \
142 |   --track-ddl \
143 |   --group schema_sync \
144 |    # ... other config flags
145 | 
146 | pg_flo worker postgres \
147 |   --group schema_sync \
148 |   --target-sync-schema true \
149 |   # ... other postgres connection flags
150 | ```
151 | 
152 | ## Configuration File
153 | 
154 | Instead of CLI flags, you can use a configuration file:
155 | 
156 | ```yaml
157 | # ~/.pg_flo.yaml
158 | host: "source-db.example.com"
159 | port: 5432
160 | dbname: "myapp"
161 | user: "replicator"
162 | password: "secret"
163 | group: "production"
164 | tables:
165 |   - users
166 |   - orders
167 |   - payments
168 | nats-url: "nats://localhost:4222"
169 | target-host: "dest-db.example.com"
170 | target-dbname: "myapp"
171 | target-user: "writer"
172 | target-password: "secret"
173 | ```
174 | 
175 | ```bash
176 | pg_flo replicator --config /path/to/config.yaml
177 | pg_flo worker postgres --config /path/to/config.yaml
178 | ```
179 | 
180 | See the [example config file](../pg-flo.yaml) for more details.
181 | 
182 | ## Environment Variables
183 | 
184 | All configuration options can also be set via environment variables:
185 | 
186 | ```bash
187 | export PG_FLO_HOST=source-db.example.com
188 | export PG_FLO_PORT=5432
189 | export PG_FLO_DBNAME=myapp
190 | export PG_FLO_USER=replicator
191 | export PG_FLO_PASSWORD=secret
192 | export PG_FLO_GROUP=production
193 | export PG_FLO_NATS_URL=nats://localhost:4222
194 | 
195 | pg_flo replicator
196 | ```
197 | 


--------------------------------------------------------------------------------
/internal/how-it-works.md:
--------------------------------------------------------------------------------
 1 | # How it Works
 2 | 
 3 | `pg_flo` leverages PostgreSQL's logical replication system to capture and stream data while applying transformations and filtrations to the data before it reaches the destination. It utilizes **NATS** as a message broker to decouple the replicator and worker processes, providing flexibility and scalability.
 4 | 
 5 | 1. **Publication Creation**: Creates a PostgreSQL publication for the specified tables or all tables (per `group`).
 6 | 
 7 | 2. **Replication Slot**: A replication slot is created to ensure no data is lost between streaming sessions.
 8 | 
 9 | 3. **Operation Modes**:
10 | 
11 |    - **Copy-and-Stream**: Performs an initial bulk copy followed by streaming changes.
12 |    - **Stream-Only**: Starts streaming changes immediately from the last known position.
13 | 
14 | 4. **Initial Bulk Copy** (for Copy-and-Stream mode):
15 | 
16 |    - If no valid LSN is found in NATS, `pg_flo` performs an initial bulk copy of existing data.
17 |    - This process is parallelized for fast data sync:
18 |      - A snapshot is taken to ensure consistency.
19 |      - Each table is divided into page ranges.
20 |      - Multiple workers copy different ranges concurrently.
21 | 
22 | 5. **Streaming Changes**:
23 | 
24 |    - After the initial copy (or immediately in Stream-Only mode), the replicator streams changes from PostgreSQL and publishes them to NATS.
25 |    - The last processed LSN is stored in NATS, allowing `pg_flo` to resume operations from where it left off in case of interruptions.
26 | 
27 | 6. **Message Processing**: The worker processes various types of messages from NATS:
28 | 
29 |    - Relation messages to understand table structures
30 |    - Insert, Update, and Delete messages containing actual data changes
31 |    - Begin and Commit messages for transaction boundaries
32 |    - DDL changes like ALTER TABLE, CREATE INDEX, etc.
33 | 
34 | 7. **Data Transformation**: Received data is converted into a structured format, with type-aware conversions for different PostgreSQL data types.
35 | 
36 | 8. **Rule Application**: If configured, transformation and filtering rules are applied to the data:
37 | 
38 |    - **Transform Rules**:
39 |      - Regex: Apply regular expression transformations to string values.
40 |      - Mask: Mask sensitive data, keeping the first and last characters visible.
41 |    - **Filter Rules**:
42 |      - Comparison: Filter based on equality, inequality, greater than, less than, etc.
43 |      - Contains: Filter string values based on whether they contain a specific substring.
44 |    - Rules can be applied selectively to insert, update, or delete operations.
45 | 
46 | 9. **Buffering**: Processed data is buffered and written in batches to optimize write operations to the destination.
47 | 
48 | 10. **Writing to Sink**: Data is periodically flushed from the buffer to the configured sink (e.g., stdout, file, or other destinations).
49 | 
50 | 11. **State Management**:
51 |     - The replicator keeps track of its progress by updating the Last LSN in NATS.
52 |     - The worker maintains its progress to ensure data consistency.
53 |     - This allows for resumable operations across multiple runs.
54 |     - Periodic status updates are sent to PostgreSQL to maintain the replication connection.
55 | 


--------------------------------------------------------------------------------
/internal/nats-server.conf:
--------------------------------------------------------------------------------
1 | jetstream: enabled
2 | store_dir: /data
3 | http_port: 8222
4 | max_payload: 8388608
5 | 


--------------------------------------------------------------------------------
/internal/pg-flo.yaml:
--------------------------------------------------------------------------------
 1 | # [Replicator] PostgreSQL connection settings
 2 | host: "localhost" # PostgreSQL host (env: PG_FLO_HOST)
 3 | port: 5432 # PostgreSQL port (env: PG_FLO_PORT)
 4 | dbname: "your_database" # PostgreSQL database name (env: PG_FLO_DBNAME)
 5 | user: "your_user" # PostgreSQL user (env: PG_FLO_USER)
 6 | password: "your_password" # PostgreSQL password (env: PG_FLO_PASSWORD)
 7 | schema: "public" # PostgreSQL schema to replicate from (env: PG_FLO_SCHEMA)
 8 | 
 9 | # Replication settings
10 | group: "your_group" # Group name to identify each replication (env: PG_FLO_GROUP)
11 | tables: # Tables to replicate (empty for all tables) (env: PG_FLO_TABLES)
12 |   - "table1"
13 |   - "table2"
14 | copy-and-stream: false # Enable copy and stream mode (env: PG_FLO_COPY_AND_STREAM)
15 | max-copy-workers-per-table: 4 # Maximum number of parallel workers for copy operation (env: PG_FLO_MAX_COPY_WORKERS_PER_TABLE)
16 | track-ddl: false # Enable tracking of DDL changes (env: PG_FLO_TRACK_DDL)
17 | 
18 | # NATS settings
19 | nats-url: "nats://localhost:4222" # NATS server URL (env: PG_FLO_NATS_URL)
20 | 
21 | # Worker settings
22 | batch-size: 1000 # Number of messages to process in a batch (env: PG_FLO_BATCH_SIZE)
23 | rules-config: "/path/to/rules.yaml" # Path to rules configuration file (env: PG_FLO_RULES_CONFIG)
24 | routing-config: "/path/to/routing.yaml" # Path to routing configuration file (env: PG_FLO_ROUTING_CONFIG)
25 | 
26 | # File sink settings
27 | file-output-dir: "/tmp/pg_flo-output" # Output directory for file sink (env: PG_FLO_FILE_OUTPUT_DIR)
28 | 
29 | # [Worker] Postgres sink settings
30 | target-host: "" # Target PostgreSQL host (env: PG_FLO_TARGET_HOST)
31 | target-port: 5432 # Target PostgreSQL port (env: PG_FLO_TARGET_PORT)
32 | target-dbname: "" # Target PostgreSQL database name (env: PG_FLO_TARGET_DBNAME)
33 | target-user: "" # Target PostgreSQL user (env: PG_FLO_TARGET_USER)
34 | target-password: "" # Target PostgreSQL password (env: PG_FLO_TARGET_PASSWORD)
35 | target-sync-schema: false # Sync schema from source to target (env: PG_FLO_TARGET_SYNC_SCHEMA)
36 | target-disable-foreign-keys: false # Disable foreign key constraints on target (env: PG_FLO_TARGET_DISABLE_FOREIGN_KEYS)
37 | 
38 | # Source connection for schema sync (only needed with target-sync-schema: true)
39 | source-host: "" # Source PostgreSQL host (env: PG_FLO_SOURCE_HOST)
40 | source-port: 5432 # Source PostgreSQL port (env: PG_FLO_SOURCE_PORT)
41 | source-dbname: "" # Source PostgreSQL database name (env: PG_FLO_SOURCE_DBNAME)
42 | source-user: "" # Source PostgreSQL user (env: PG_FLO_SOURCE_USER)
43 | source-password: "" # Source PostgreSQL password (env: PG_FLO_SOURCE_PASSWORD)
44 | 
45 | # Webhook sink settings
46 | webhook-url: "" # Webhook URL to send data (env: PG_FLO_WEBHOOK_URL)
47 | 


--------------------------------------------------------------------------------
/internal/pg_flo_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pgflo/pg_flo/e9be74c2ffaa91b13f9a4326d4b5d83c81e4b450/internal/pg_flo_logo.png


--------------------------------------------------------------------------------
/internal/scripts/e2e_common.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | PG_HOST="${PG_HOST:-localhost}"
  4 | PG_PORT="${PG_PORT:-5433}"
  5 | PG_USER="${PG_USER:-myuser}"
  6 | PG_PASSWORD="${PG_PASSWORD:-mypassword!@#%1234}"
  7 | PG_DB="${PG_DB:-mydb}"
  8 | 
  9 | TARGET_PG_HOST="${TARGET_PG_HOST:-localhost}"
 10 | TARGET_PG_PORT="${TARGET_PG_PORT:-5434}"
 11 | TARGET_PG_USER="${TARGET_PG_USER:-targetuser}"
 12 | TARGET_PG_PASSWORD="${TARGET_PG_PASSWORD:-targetpassword!@#1234}"
 13 | TARGET_PG_DB="${TARGET_PG_DB:-targetdb}"
 14 | 
 15 | NATS_URL="${NATS_URL:-nats://localhost:4222}"
 16 | 
 17 | pg_flo_BIN="./bin/pg_flo"
 18 | OUTPUT_DIR="/tmp/pg_flo-output"
 19 | pg_flo_LOG="/tmp/pg_flo.log"
 20 | pg_flo_WORKER_LOG="/tmp/pg_flo_worker.log"
 21 | 
 22 | # Helper functions
 23 | log() { echo "🔹 $1"; }
 24 | success() { echo "✅ $1"; }
 25 | error() { echo "❌ $1"; }
 26 | 
 27 | run_sql() {
 28 |   if [ ${#1} -gt 1000 ]; then
 29 |     local temp_file=$(mktemp)
 30 |     echo "$1" >"$temp_file"
 31 |     PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -p "$PG_PORT" -q -t -f "$temp_file"
 32 |     rm "$temp_file"
 33 |   else
 34 |     PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -p "$PG_PORT" -q -t -c "$1"
 35 |   fi
 36 | }
 37 | 
 38 | setup_postgres() {
 39 |   log "Ensuring PostgreSQL is ready..."
 40 |   for i in {1..30}; do
 41 |     if PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -U "$PG_USER" -d "$PG_DB" -p "$PG_PORT" -c '\q' >/dev/null 2>&1; then
 42 |       success "PostgreSQL is ready"
 43 |       return 0
 44 |     fi
 45 |     sleep 1
 46 |   done
 47 |   error "PostgreSQL is not ready after 30 seconds"
 48 |   exit 1
 49 | }
 50 | 
 51 | stop_pg_flo_gracefully() {
 52 |   log "Stopping pg_flo replicator..."
 53 |   if kill -0 "$pg_flo_PID" 2>/dev/null; then
 54 |     kill -TERM "$pg_flo_PID"
 55 |     wait "$pg_flo_PID" 2>/dev/null || true
 56 |     success "pg_flo replicator stopped"
 57 |   else
 58 |     log "pg_flo replicator process not found, it may have already completed"
 59 |   fi
 60 | 
 61 |   log "Stopping pg_flo worker..."
 62 |   if kill -0 "$pg_flo_WORKER_PID" 2>/dev/null; then
 63 |     kill -TERM "$pg_flo_WORKER_PID"
 64 |     wait "$pg_flo_WORKER_PID" 2>/dev/null || true
 65 |     success "pg_flo worker stopped"
 66 |   else
 67 |     log "pg_flo worker process not found, it may have already completed"
 68 |   fi
 69 | }
 70 | 
 71 | show_pg_flo_logs() {
 72 |   log "pg_flo replicator logs:"
 73 |   echo "----------------------------------------"
 74 |   cat $pg_flo_LOG*
 75 |   echo "----------------------------------------"
 76 | 
 77 |   log "pg_flo worker logs:"
 78 |   echo "----------------------------------------"
 79 |   cat $pg_flo_WORKER_LOG*
 80 |   echo "----------------------------------------"
 81 | }
 82 | 
 83 | run_sql_target() {
 84 |   if [ ${#1} -gt 1000 ]; then
 85 |     local temp_file=$(mktemp)
 86 |     echo "$1" >"$temp_file"
 87 |     PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" -p "$TARGET_PG_PORT" -q -t -f "$temp_file"
 88 |     rm "$temp_file"
 89 |   else
 90 |     PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" -p "$TARGET_PG_PORT" -q -t -c "$1"
 91 |   fi
 92 | }
 93 | 
 94 | setup_docker() {
 95 |   rm -Rf /tmp/pg*
 96 |   log "Setting up Docker environment..."
 97 |   docker compose -f internal/docker-compose.yml down -v
 98 |   docker compose -f internal/docker-compose.yml up -d
 99 |   success "Docker environment is set up"
100 | }
101 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_copy_and_stream.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_users() {
  7 |   log "Creating test table..."
  8 |   run_sql "DROP TABLE IF EXISTS public.users;"
  9 |   run_sql "CREATE TABLE public.users (
 10 |     id serial PRIMARY KEY,
 11 |     int_col integer,
 12 |     float_col float,
 13 |     text_col text,
 14 |     bool_col boolean,
 15 |     date_col date,
 16 |     timestamp_col timestamp with time zone,
 17 |     json_col jsonb,
 18 |     array_col integer[],
 19 |     bytea_col bytea
 20 |   );"
 21 |   success "Test table created"
 22 | }
 23 | 
 24 | populate_initial_data() {
 25 |   log "Populating initial data..."
 26 |   run_sql "INSERT INTO public.users (
 27 |     int_col, float_col, text_col, bool_col, date_col, timestamp_col, json_col, array_col, bytea_col
 28 |   ) SELECT
 29 |     generate_series(1, 500000),
 30 |     random() * 100,
 31 |     'Initial data ' || generate_series(1, 500000),
 32 |     (random() > 0.5),
 33 |     current_date + (random() * 365)::integer * interval '1 day',
 34 |     current_timestamp + (random() * 365 * 24 * 60 * 60)::integer * interval '1 second',
 35 |     json_build_object('key', 'value' || generate_series(1, 500000), 'number', generate_series(1, 500000)),
 36 |     ARRAY[generate_series(1, 3)],
 37 |     decode(lpad(to_hex(generate_series(1, 4)), 8, '0'), 'hex')
 38 |   ;"
 39 |   run_sql "UPDATE public.users SET text_col = text_col || ' - Updated';"
 40 | 
 41 |   log "Inserting large JSON data..."
 42 |   local large_json='{"data":['
 43 |   for i in {1..10000}; do
 44 |     if [ "$i" -ne 1 ]; then
 45 |       large_json+=','
 46 |     fi
 47 |     large_json+='{"id":'$i',"name":"Item '$i'","description":"This is a long description for item '$i'. It contains a lot of text to make the JSON larger.","attributes":{"color":"red","size":"large","weight":10.5,"tags":["tag1","tag2","tag3"]}}'
 48 |   done
 49 |   large_json+=']}'
 50 | 
 51 |   run_sql "INSERT INTO public.users (int_col, json_col) VALUES (1000001, '$large_json'::jsonb);"
 52 | 
 53 |   run_sql "ANALYZE public.users;"
 54 |   success "Initial data populated"
 55 | }
 56 | 
 57 | simulate_concurrent_changes() {
 58 |   log "Simulating concurrent changes..."
 59 |   for i in {1..3000}; do
 60 |     run_sql "INSERT INTO public.users (
 61 |       int_col, float_col, text_col, bool_col, date_col, timestamp_col, json_col, array_col, bytea_col
 62 |     ) VALUES (
 63 |       $i,
 64 |       $i * 1.5,
 65 |       'Concurrent data $i',
 66 |       ($i % 2 = 0),
 67 |       current_date + ($i % 365) * interval '1 day',
 68 |       current_timestamp + ($i % (365 * 24)) * interval '1 hour',
 69 |       '{\"key\": \"concurrent_$i\", \"value\": $i}',
 70 |       ARRAY[$i, $i+1, $i+2],
 71 |       decode(lpad(to_hex($i), 8, '0'), 'hex')
 72 |     );"
 73 |   done
 74 |   success "Concurrent changes simulated"
 75 | }
 76 | 
 77 | start_pg_flo_replication() {
 78 |   log "Starting pg_flo replication..."
 79 |   $pg_flo_BIN replicator \
 80 |     --host "$PG_HOST" \
 81 |     --port "$PG_PORT" \
 82 |     --dbname "$PG_DB" \
 83 |     --user "$PG_USER" \
 84 |     --password "$PG_PASSWORD" \
 85 |     --group "test_group" \
 86 |     --tables "users" \
 87 |     --schema "public" \
 88 |     --nats-url "$NATS_URL" \
 89 |     --copy-and-stream \
 90 |     --max-copy-workers-per-table 4 \
 91 |     >"$pg_flo_LOG" 2>&1 &
 92 |   pg_flo_PID=$!
 93 |   log "pg_flo started with PID: $pg_flo_PID"
 94 |   success "pg_flo replication started"
 95 | }
 96 | 
 97 | start_pg_flo_worker() {
 98 |   log "Starting pg_flo worker with PostgreSQL sink..."
 99 |   $pg_flo_BIN worker postgres \
100 |     --group "test_group" \
101 |     --nats-url "$NATS_URL" \
102 |     --source-host "$PG_HOST" \
103 |     --source-port "$PG_PORT" \
104 |     --source-dbname "$PG_DB" \
105 |     --source-user "$PG_USER" \
106 |     --source-password "$PG_PASSWORD" \
107 |     --target-host "$TARGET_PG_HOST" \
108 |     --target-port "$TARGET_PG_PORT" \
109 |     --target-dbname "$TARGET_PG_DB" \
110 |     --target-user "$TARGET_PG_USER" \
111 |     --target-password "$TARGET_PG_PASSWORD" \
112 |     --batch-size 5000 \
113 |     --target-sync-schema \
114 |     >"$pg_flo_WORKER_LOG" 2>&1 &
115 |   pg_flo_WORKER_PID=$!
116 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
117 |   success "pg_flo worker started"
118 | }
119 | 
120 | compare_row_counts() {
121 |   log "Comparing row counts..."
122 |   SOURCE_COUNT=$(run_sql "SELECT COUNT(*) FROM public.users")
123 |   TARGET_COUNT=$(run_sql_target "SELECT COUNT(*) FROM public.users")
124 | 
125 |   log "Source database row count: $SOURCE_COUNT"
126 |   log "Target database row count: $TARGET_COUNT"
127 | 
128 |   EXPECTED_COUNT=503001
129 | 
130 |   if [ "$SOURCE_COUNT" -eq "$TARGET_COUNT" ] && [ "$SOURCE_COUNT" -eq "$EXPECTED_COUNT" ]; then
131 |     success "Row counts match and total is correct ($EXPECTED_COUNT)"
132 |     return 0
133 |   else
134 |     error "Row counts do not match or total is incorrect. Expected $EXPECTED_COUNT, Source: $SOURCE_COUNT, Target: $TARGET_COUNT"
135 |     return 1
136 |   fi
137 | }
138 | 
139 | verify_large_json() {
140 |   log "Verifying large JSON data..."
141 |   local source_json_length=$(run_sql "
142 |     SELECT jsonb_array_length(json_col->'data')
143 |     FROM public.users
144 |     WHERE int_col = 1000001
145 |   ")
146 |   local target_json_length=$(run_sql_target "
147 |     SELECT jsonb_array_length(json_col->'data')
148 |     FROM public.users
149 |     WHERE int_col = 1000001
150 |   ")
151 | 
152 |   log "Source JSON length: $source_json_length"
153 |   log "Target JSON length: $target_json_length"
154 | 
155 |   if [ -n "$source_json_length" ] && [ -n "$target_json_length" ] &&
156 |     [ "$source_json_length" -eq "$target_json_length" ] &&
157 |     [ "$source_json_length" -eq 10000 ]; then
158 |     success "Large JSON data verified successfully"
159 |     return 0
160 |   else
161 |     error "Large JSON data verification failed. Expected length 10000, got Source: $source_json_length, Target: $target_json_length"
162 |     return 1
163 |   fi
164 | }
165 | 
166 | verify_data_integrity() {
167 |   log "Verifying data integrity..."
168 | 
169 |   generate_table_hash() {
170 |     local db=$1
171 |     local csv_file="/tmp/pg_flo_${db}_dump.csv"
172 | 
173 |     if [ "$db" = "source" ]; then
174 |       PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$PG_DB" \
175 |         -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
176 |     else
177 |       PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -p "$TARGET_PG_PORT" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" \
178 |         -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
179 |     fi
180 | 
181 |     if command -v md5 >/dev/null; then
182 |       md5 -q "$csv_file"
183 |     elif command -v md5sum >/dev/null; then
184 |       md5sum "$csv_file" | awk '{ print $1 }'
185 |     else
186 |       echo "Neither md5 nor md5sum command found" >&2
187 |       return 1
188 |     fi
189 |   }
190 | 
191 |   local source_hash=$(generate_table_hash "source")
192 |   local target_hash=$(generate_table_hash "target")
193 | 
194 |   log "Source data hash: $source_hash"
195 |   log "Target data hash: $target_hash"
196 |   log "Source CSV file: /tmp/pg_flo_source_dump.csv"
197 |   log "Target CSV file: /tmp/pg_flo_target_dump.csv"
198 | 
199 |   if [ "$source_hash" = "$target_hash" ]; then
200 |     success "Data integrity verified: source and target databases match 100%"
201 |     return 0
202 |   else
203 |     error "Data integrity check failed: source and target databases do not match"
204 |     log "You can compare the dumps using: diff /tmp/pg_flo_source_dump.csv /tmp/pg_flo_target_dump.csv"
205 |     return 1
206 |   fi
207 | }
208 | 
209 | test_pg_flo_cdc() {
210 |   setup_postgres
211 |   create_users
212 |   populate_initial_data
213 | 
214 |   start_pg_flo_replication
215 |   start_pg_flo_worker
216 |   simulate_concurrent_changes
217 | 
218 |   log "Waiting for changes to replicate..."
219 |   sleep 90
220 |   stop_pg_flo_gracefully
221 |   compare_row_counts || return 1
222 |   verify_large_json || return 1
223 |   verify_data_integrity || return 1
224 | }
225 | 
226 | log "Starting pg_flo CDC test..."
227 | if test_pg_flo_cdc; then
228 |   success "All tests passed! 🎉"
229 |   exit 0
230 | else
231 |   error "Some tests failed. Please check the logs."
232 |   show_pg_flo_logs
233 |   exit 1
234 | fi
235 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_copy_only.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_users() {
  7 |   log "Creating test table..."
  8 |   run_sql "DROP TABLE IF EXISTS public.users;"
  9 |   run_sql "CREATE TABLE public.users (
 10 |     id serial PRIMARY KEY,
 11 |     int_col integer,
 12 |     float_col float,
 13 |     text_col text,
 14 |     bool_col boolean,
 15 |     date_col date,
 16 |     timestamp_col timestamp with time zone,
 17 |     json_col jsonb,
 18 |     array_col integer[],
 19 |     bytea_col bytea
 20 |   );"
 21 |   success "Test table created"
 22 | }
 23 | 
 24 | populate_initial_data() {
 25 |   log "Populating initial data..."
 26 |   run_sql "INSERT INTO public.users (
 27 |     int_col, float_col, text_col, bool_col, date_col, timestamp_col, json_col, array_col, bytea_col
 28 |   ) SELECT
 29 |     generate_series(1, 500000),
 30 |     random() * 100,
 31 |     'Initial data ' || generate_series(1, 500000),
 32 |     (random() > 0.5),
 33 |     current_date + (random() * 365)::integer * interval '1 day',
 34 |     current_timestamp + (random() * 365 * 24 * 60 * 60)::integer * interval '1 second',
 35 |     json_build_object('key', 'value' || generate_series(1, 500000), 'number', generate_series(1, 500000)),
 36 |     ARRAY[generate_series(1, 3)],
 37 |     decode(lpad(to_hex(generate_series(1, 4)), 8, '0'), 'hex')
 38 |   ;"
 39 | 
 40 |   log "Inserting large JSON data..."
 41 |   local large_json='{"data":['
 42 |   for i in {1..10000}; do
 43 |     if [ "$i" -ne 1 ]; then
 44 |       large_json+=','
 45 |     fi
 46 |     large_json+='{"id":'$i',"name":"Item '$i'","description":"This is a long description for item '$i'. It contains a lot of text to make the JSON larger.","attributes":{"color":"red","size":"large","weight":10.5,"tags":["tag1","tag2","tag3"]}}'
 47 |   done
 48 |   large_json+=']}'
 49 | 
 50 |   run_sql "INSERT INTO public.users (int_col, json_col) VALUES (1000001, '$large_json'::jsonb);"
 51 | 
 52 |   run_sql "ANALYZE public.users;"
 53 |   success "Initial data populated"
 54 | }
 55 | 
 56 | start_pg_flo_copy_only() {
 57 |   log "Starting pg_flo in copy-only mode..."
 58 |   $pg_flo_BIN replicator \
 59 |     --host "$PG_HOST" \
 60 |     --port "$PG_PORT" \
 61 |     --dbname "$PG_DB" \
 62 |     --user "$PG_USER" \
 63 |     --password "$PG_PASSWORD" \
 64 |     --group "test_group" \
 65 |     --tables "users" \
 66 |     --schema "public" \
 67 |     --nats-url "$NATS_URL" \
 68 |     --copy \
 69 |     --max-copy-workers-per-table 10 \
 70 |     >"$pg_flo_LOG" 2>&1 &
 71 |   pg_flo_PID=$!
 72 |   log "pg_flo started with PID: $pg_flo_PID"
 73 |   success "pg_flo copy-only started"
 74 | }
 75 | 
 76 | start_pg_flo_worker() {
 77 |   log "Starting pg_flo worker with PostgreSQL sink..."
 78 |   $pg_flo_BIN worker postgres \
 79 |     --group "test_group" \
 80 |     --nats-url "$NATS_URL" \
 81 |     --source-host "$PG_HOST" \
 82 |     --source-port "$PG_PORT" \
 83 |     --source-dbname "$PG_DB" \
 84 |     --source-user "$PG_USER" \
 85 |     --source-password "$PG_PASSWORD" \
 86 |     --target-host "$TARGET_PG_HOST" \
 87 |     --target-port "$TARGET_PG_PORT" \
 88 |     --target-dbname "$TARGET_PG_DB" \
 89 |     --target-user "$TARGET_PG_USER" \
 90 |     --target-password "$TARGET_PG_PASSWORD" \
 91 |     --target-sync-schema \
 92 |     >"$pg_flo_WORKER_LOG" 2>&1 &
 93 |   pg_flo_WORKER_PID=$!
 94 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
 95 |   success "pg_flo worker started"
 96 | }
 97 | 
 98 | compare_row_counts() {
 99 |   log "Comparing row counts..."
100 |   SOURCE_COUNT=$(run_sql "SELECT COUNT(*) FROM public.users")
101 |   TARGET_COUNT=$(run_sql_target "SELECT COUNT(*) FROM public.users")
102 | 
103 |   log "Source database row count: $SOURCE_COUNT"
104 |   log "Target database row count: $TARGET_COUNT"
105 | 
106 |   EXPECTED_COUNT=500001 # 500,000 regular rows + 1 large JSON row
107 | 
108 |   if [ "$SOURCE_COUNT" -eq "$TARGET_COUNT" ] && [ "$SOURCE_COUNT" -eq "$EXPECTED_COUNT" ]; then
109 |     success "Row counts match and total is correct ($EXPECTED_COUNT)"
110 |     return 0
111 |   else
112 |     error "Row counts do not match or total is incorrect. Expected $EXPECTED_COUNT, Source: $SOURCE_COUNT, Target: $TARGET_COUNT"
113 |     return 1
114 |   fi
115 | }
116 | 
117 | verify_large_json() {
118 |   log "Verifying large JSON data..."
119 |   local source_json_length=$(run_sql "
120 |     SELECT jsonb_array_length(json_col->'data')
121 |     FROM public.users
122 |     WHERE int_col = 1000001
123 |   ")
124 |   local target_json_length=$(run_sql_target "
125 |     SELECT jsonb_array_length(json_col->'data')
126 |     FROM public.users
127 |     WHERE int_col = 1000001
128 |   ")
129 | 
130 |   log "Source JSON length: $source_json_length"
131 |   log "Target JSON length: $target_json_length"
132 | 
133 |   if [ -n "$source_json_length" ] && [ -n "$target_json_length" ] &&
134 |     [ "$source_json_length" -eq "$target_json_length" ] &&
135 |     [ "$source_json_length" -eq 10000 ]; then
136 |     success "Large JSON data verified successfully"
137 |     return 0
138 |   else
139 |     error "Large JSON data verification failed. Expected length 10000, got Source: $source_json_length, Target: $target_json_length"
140 |     return 1
141 |   fi
142 | }
143 | 
144 | verify_data_integrity() {
145 |   log "Verifying data integrity..."
146 | 
147 |   generate_table_hash() {
148 |     local db=$1
149 |     local csv_file="/tmp/pg_flo_${db}_dump.csv"
150 | 
151 |     if [ "$db" = "source" ]; then
152 |       PGPASSWORD=$PG_PASSWORD psql -h "$PG_HOST" -p "$PG_PORT" -U "$PG_USER" -d "$PG_DB" \
153 |         -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
154 |     else
155 |       PGPASSWORD=$TARGET_PG_PASSWORD psql -h "$TARGET_PG_HOST" -p "$TARGET_PG_PORT" -U "$TARGET_PG_USER" -d "$TARGET_PG_DB" \
156 |         -c "\COPY (SELECT * FROM public.users ORDER BY id) TO '$csv_file' WITH CSV"
157 |     fi
158 | 
159 |     if command -v md5 >/dev/null; then
160 |       md5 -q "$csv_file"
161 |     elif command -v md5sum >/dev/null; then
162 |       md5sum "$csv_file" | awk '{ print $1 }'
163 |     else
164 |       echo "Neither md5 nor md5sum command found" >&2
165 |       return 1
166 |     fi
167 |   }
168 | 
169 |   local source_hash=$(generate_table_hash "source")
170 |   local target_hash=$(generate_table_hash "target")
171 | 
172 |   log "Source data hash: $source_hash"
173 |   log "Target data hash: $target_hash"
174 | 
175 |   if [ "$source_hash" = "$target_hash" ]; then
176 |     success "Data integrity verified: source and target databases match 100%"
177 |     return 0
178 |   else
179 |     error "Data integrity check failed: source and target databases do not match"
180 |     log "You can compare the dumps using: diff /tmp/pg_flo_source_dump.csv /tmp/pg_flo_target_dump.csv"
181 |     return 1
182 |   fi
183 | }
184 | 
185 | test_pg_flo_copy_only() {
186 |   setup_postgres
187 |   create_users
188 |   populate_initial_data
189 | 
190 |   start_pg_flo_copy_only
191 |   start_pg_flo_worker
192 | 
193 |   log "Waiting for changes to replicate..."
194 |   sleep 180
195 |   stop_pg_flo_gracefully
196 | 
197 |   compare_row_counts || return 1
198 |   verify_large_json || return 1
199 |   verify_data_integrity || return 1
200 | }
201 | 
202 | log "Starting pg_flo copy-only test..."
203 | if test_pg_flo_copy_only; then
204 |   success "All tests passed! 🎉"
205 |   exit 0
206 | else
207 |   error "Some tests failed. Please check the logs."
208 |   show_pg_flo_logs
209 |   exit 1
210 | fi
211 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_ddl.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_test_tables() {
  7 |   log "Creating test schemas and tables..."
  8 |   run_sql "DROP SCHEMA IF EXISTS app CASCADE; CREATE SCHEMA app;"
  9 |   run_sql "DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public;"
 10 | 
 11 |   run_sql "CREATE TABLE app.users (id serial PRIMARY KEY, data text);"
 12 |   run_sql "CREATE TABLE app.posts (id serial PRIMARY KEY, content text);"
 13 | 
 14 |   run_sql "CREATE TABLE app.comments (id serial PRIMARY KEY, text text);"
 15 |   run_sql "CREATE TABLE public.metrics (id serial PRIMARY KEY, value numeric);"
 16 |   success "Test tables created"
 17 | }
 18 | 
 19 | start_pg_flo_replication() {
 20 |   log "Starting pg_flo replication..."
 21 |   if [ -f "$pg_flo_LOG" ]; then
 22 |     mv "$pg_flo_LOG" "${pg_flo_LOG}.bak"
 23 |     log "Backed up previous replicator log to ${pg_flo_LOG}.bak"
 24 |   fi
 25 |   $pg_flo_BIN replicator \
 26 |     --host "$PG_HOST" \
 27 |     --port "$PG_PORT" \
 28 |     --dbname "$PG_DB" \
 29 |     --user "$PG_USER" \
 30 |     --password "$PG_PASSWORD" \
 31 |     --group "group_ddl" \
 32 |     --schema "app" \
 33 |     --tables "users,posts" \
 34 |     --nats-url "$NATS_URL" \
 35 |     --track-ddl \
 36 |     >"$pg_flo_LOG" 2>&1 &
 37 |   pg_flo_PID=$!
 38 |   log "pg_flo replicator started with PID: $pg_flo_PID"
 39 |   success "pg_flo replication started"
 40 | }
 41 | 
 42 | start_pg_flo_worker() {
 43 |   log "Starting pg_flo worker with PostgreSQL sink..."
 44 |   if [ -f "$pg_flo_WORKER_LOG" ]; then
 45 |     mv "$pg_flo_WORKER_LOG" "${pg_flo_WORKER_LOG}.bak"
 46 |     log "Backed up previous worker log to ${pg_flo_WORKER_LOG}.bak"
 47 |   fi
 48 |   $pg_flo_BIN worker postgres \
 49 |     --group "group_ddl" \
 50 |     --nats-url "$NATS_URL" \
 51 |     --source-host "$PG_HOST" \
 52 |     --source-port "$PG_PORT" \
 53 |     --source-dbname "$PG_DB" \
 54 |     --source-user "$PG_USER" \
 55 |     --source-password "$PG_PASSWORD" \
 56 |     --target-host "$TARGET_PG_HOST" \
 57 |     --target-port "$TARGET_PG_PORT" \
 58 |     --target-dbname "$TARGET_PG_DB" \
 59 |     --target-user "$TARGET_PG_USER" \
 60 |     --target-password "$TARGET_PG_PASSWORD" \
 61 |     --target-sync-schema \
 62 |     >"$pg_flo_WORKER_LOG" 2>&1 &
 63 |   pg_flo_WORKER_PID=$!
 64 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
 65 |   success "pg_flo worker started"
 66 | }
 67 | 
 68 | perform_ddl_operations() {
 69 |   log "Performing DDL operations..."
 70 | 
 71 |   # Column operations on tracked tables
 72 |   run_sql "ALTER TABLE app.users ADD COLUMN email text;"
 73 |   run_sql "ALTER TABLE app.users ADD COLUMN status varchar(50) DEFAULT 'active';"
 74 |   run_sql "ALTER TABLE app.posts ADD COLUMN category text;"
 75 | 
 76 |   # Index operations on tracked tables
 77 |   run_sql "CREATE INDEX CONCURRENTLY idx_users_email ON app.users (email);"
 78 |   run_sql "CREATE UNIQUE INDEX idx_posts_unique ON app.posts (content) WHERE content IS NOT NULL;"
 79 | 
 80 |   # Column modifications on tracked tables
 81 |   run_sql "ALTER TABLE app.users ALTER COLUMN status SET DEFAULT 'pending';"
 82 |   run_sql "ALTER TABLE app.posts ALTER COLUMN category TYPE varchar(100);"
 83 | 
 84 |   # Rename operations on tracked tables
 85 |   run_sql "ALTER TABLE app.users RENAME COLUMN data TO profile;"
 86 | 
 87 |   # Drop operations on tracked tables
 88 |   run_sql "DROP INDEX CONCURRENTLY IF EXISTS idx_users_email;"
 89 |   run_sql "ALTER TABLE app.posts DROP COLUMN IF EXISTS category;"
 90 | 
 91 |   # Operations on non-tracked tables (should be ignored)
 92 |   run_sql "ALTER TABLE app.comments ADD COLUMN author text;"
 93 |   run_sql "CREATE INDEX idx_comments_text ON app.comments (text);"
 94 |   run_sql "ALTER TABLE public.metrics ADD COLUMN timestamp timestamptz;"
 95 | 
 96 |   success "DDL operations performed"
 97 | }
 98 | 
 99 | verify_ddl_changes() {
100 |   log "Verifying DDL changes in target database..."
101 |   local failures=0
102 | 
103 |   check_column() {
104 |     local table=$1
105 |     local column=$2
106 |     local expected_exists=$3
107 |     local expected_type=${4:-""}
108 |     local expected_default=${5:-""}
109 |     local query="
110 |       SELECT COUNT(*),
111 |              data_type,
112 |              character_maximum_length,
113 |              column_default
114 |       FROM information_schema.columns
115 |       WHERE table_schema='app'
116 |         AND table_name='$table'
117 |         AND column_name='$column'
118 |       GROUP BY data_type, character_maximum_length, column_default;"
119 | 
120 |     local result
121 |     result=$(run_sql_target "$query")
122 | 
123 |     if [ -z "$result" ]; then
124 |       exists=0
125 |       data_type=""
126 |       char_length=""
127 |       default_value=""
128 |     else
129 |       read exists data_type char_length default_value < <(echo "$result" | tr '|' ' ')
130 |     fi
131 | 
132 |     exists=${exists:-0}
133 | 
134 |     if [ "$exists" -eq "$expected_exists" ]; then
135 |       if [ "$expected_exists" -eq 1 ]; then
136 |         local type_ok=true
137 |         local default_ok=true
138 | 
139 |         if [ -n "$expected_type" ]; then
140 |           # Handle character varying type specifically
141 |           if [ "$expected_type" = "character varying" ]; then
142 |             if [ "$data_type" = "character varying" ] || [ "$data_type" = "varchar" ] || [ "$data_type" = "character" ]; then
143 |               type_ok=true
144 |             else
145 |               type_ok=false
146 |             fi
147 |           elif [ "$data_type" != "$expected_type" ]; then
148 |             type_ok=false
149 |           fi
150 |         fi
151 | 
152 |         if [ -n "$expected_default" ]; then
153 |           if [[ "$default_value" == *"$expected_default"* ]]; then
154 |             default_ok=true
155 |           else
156 |             default_ok=false
157 |           fi
158 |         fi
159 | 
160 |         if [ "$type_ok" = true ] && [ "$default_ok" = true ]; then
161 |           if [[ "$expected_type" == "character varying" && -n "$char_length" ]]; then
162 |             success "Column app.$table.$column verification passed (type: $data_type($char_length), default: $default_value)"
163 |           else
164 |             success "Column app.$table.$column verification passed (type: $data_type, default: $default_value)"
165 |           fi
166 |         else
167 |           if [ "$type_ok" = false ]; then
168 |             error "Column app.$table.$column type mismatch (expected: $expected_type, got: $data_type)"
169 |             failures=$((failures + 1))
170 |           fi
171 |           if [ "$default_ok" = false ]; then
172 |             error "Column app.$table.$column default value mismatch (expected: $expected_default, got: $default_value)"
173 |             failures=$((failures + 1))
174 |           fi
175 |         fi
176 |       else
177 |         success "Column app.$table.$column verification passed (not exists)"
178 |       fi
179 |     else
180 |       error "Column app.$table.$column verification failed (expected: $expected_exists, got: $exists)"
181 |       failures=$((failures + 1))
182 |     fi
183 |   }
184 | 
185 |   check_index() {
186 |     local index=$1
187 |     local expected=$2
188 |     local exists=$(run_sql_target "SELECT COUNT(*) FROM pg_indexes WHERE schemaname='app' AND indexname='$index';")
189 | 
190 |     if [ "$exists" -eq "$expected" ]; then
191 |       success "Index app.$index verification passed (expected: $expected)"
192 |     else
193 |       error "Index app.$index verification failed (expected: $expected, got: $exists)"
194 |       failures=$((failures + 1))
195 |     fi
196 |   }
197 | 
198 |   # Verify app.users changes
199 |   check_column "users" "email" 1 "text"
200 |   check_column "users" "status" 1 "character varying" "'pending'"
201 |   check_column "users" "data" 0
202 |   check_column "users" "profile" 1 "text"
203 | 
204 |   # Verify app.posts changes
205 |   check_column "posts" "category" 0
206 |   check_column "posts" "content" 1 "text"
207 |   check_index "idx_posts_unique" 1 "unique"
208 | 
209 |   # Verify non-tracked tables
210 |   check_column "comments" "author" 0
211 |   check_index "idx_comments_text" 0
212 | 
213 |   local remaining_rows=$(run_sql "SELECT COUNT(*) FROM internal_pg_flo.ddl_log;")
214 |   if [ "$remaining_rows" -eq 0 ]; then
215 |     success "internal_pg_flo.ddl_log table is empty"
216 |   else
217 |     error "internal_pg_flo.ddl_log table is not empty. Remaining rows: $remaining_rows"
218 |     failures=$((failures + 1))
219 |   fi
220 | 
221 |   if [ "$failures" -eq 0 ]; then
222 |     success "All DDL changes verified successfully"
223 |     return 0
224 |   else
225 |     error "DDL verification failed with $failures errors"
226 |     return 1
227 |   fi
228 | }
229 | 
230 | test_pg_flo_ddl() {
231 |   setup_postgres
232 |   create_test_tables
233 |   start_pg_flo_worker
234 |   sleep 5
235 |   start_pg_flo_replication
236 |   sleep 3
237 |   perform_ddl_operations
238 |   stop_pg_flo_gracefully
239 |   verify_ddl_changes || return 1
240 | }
241 | 
242 | log "Starting pg_flo CDC test with DDL tracking..."
243 | if test_pg_flo_ddl; then
244 |   success "DDL tracking test passed! 🎉"
245 |   exit 0
246 | else
247 |   error "DDL tracking test failed. Please check the logs."
248 |   show_pg_flo_logs
249 |   exit 1
250 | fi
251 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_multi_tenant.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_multi_tenant_table() {
  7 |   log "Creating multi-tenant test table..."
  8 |   run_sql "DROP TABLE IF EXISTS public.events;"
  9 |   run_sql "CREATE TABLE public.events (
 10 |     id serial PRIMARY KEY,
 11 |     tenant_id int NOT NULL,
 12 |     name text,
 13 |     email text,
 14 |     created_at timestamp DEFAULT current_timestamp
 15 |   );"
 16 |   success "Multi-tenant test table created"
 17 | }
 18 | 
 19 | start_pg_flo_replication() {
 20 |   log "Starting pg_flo replication..."
 21 |   $pg_flo_BIN replicator \
 22 |     --host "$PG_HOST" \
 23 |     --port "$PG_PORT" \
 24 |     --dbname "$PG_DB" \
 25 |     --user "$PG_USER" \
 26 |     --password "$PG_PASSWORD" \
 27 |     --group "group_multi_tenant" \
 28 |     --tables "events" \
 29 |     --schema "public" \
 30 |     --nats-url "$NATS_URL" \
 31 |     >"$pg_flo_LOG" 2>&1 &
 32 |   pg_flo_PID=$!
 33 |   log "pg_flo replicator started with PID: $pg_flo_PID"
 34 |   success "pg_flo replication started"
 35 | }
 36 | 
 37 | start_pg_flo_worker() {
 38 |   log "Starting pg_flo worker with PostgreSQL sink..."
 39 |   $pg_flo_BIN worker postgres \
 40 |     --group "group_multi_tenant" \
 41 |     --nats-url "$NATS_URL" \
 42 |     --source-host "$PG_HOST" \
 43 |     --source-port "$PG_PORT" \
 44 |     --source-dbname "$PG_DB" \
 45 |     --source-user "$PG_USER" \
 46 |     --source-password "$PG_PASSWORD" \
 47 |     --target-host "$TARGET_PG_HOST" \
 48 |     --target-port "$TARGET_PG_PORT" \
 49 |     --target-dbname "$TARGET_PG_DB" \
 50 |     --target-user "$TARGET_PG_USER" \
 51 |     --target-password "$TARGET_PG_PASSWORD" \
 52 |     --target-sync-schema \
 53 |     --rules-config "$(dirname "$0")/multi_tenant_rules.yml" \
 54 |     >"$pg_flo_WORKER_LOG" 2>&1 &
 55 |   pg_flo_WORKER_PID=$!
 56 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
 57 |   success "pg_flo worker started"
 58 | }
 59 | 
 60 | simulate_multi_tenant_changes() {
 61 |   log "Simulating multi-tenant changes..."
 62 |   run_sql "INSERT INTO public.events (tenant_id, name, email) VALUES
 63 |     (1, 'Alice', 'alice@tenant1.com'),
 64 |     (2, 'Bob', 'bob@tenant2.com'),
 65 |     (3, 'Charlie', 'charlie@tenant3.com'),
 66 |     (3, 'David', 'david@tenant3.com'),
 67 |     (4, 'Eve', 'eve@tenant4.com'),
 68 |     (3, 'Frank', 'frank@tenant3.com');"
 69 |   success "Multi-tenant changes simulated"
 70 | }
 71 | 
 72 | verify_multi_tenant_changes() {
 73 |   log "Verifying multi-tenant changes in target database..."
 74 |   local tenant_3_count=$(run_sql_target "SELECT COUNT(*) FROM public.events WHERE tenant_id = 3;" | xargs)
 75 |   local total_count=$(run_sql_target "SELECT COUNT(*) FROM public.events;" | xargs)
 76 | 
 77 |   log "Tenant 3 count: $tenant_3_count (expected 3)"
 78 |   log "Total count: $total_count (expected 3)"
 79 | 
 80 |   if [ "$tenant_3_count" -eq 3 ] && [ "$total_count" -eq 3 ]; then
 81 |     success "Multi-tenant filtering verified successfully"
 82 |     return 0
 83 |   else
 84 |     error "Multi-tenant filtering verification failed"
 85 |     return 1
 86 |   fi
 87 | }
 88 | 
 89 | test_pg_flo_multi_tenant() {
 90 |   setup_postgres
 91 |   create_multi_tenant_table
 92 |   start_pg_flo_replication
 93 |   sleep 2
 94 |   start_pg_flo_worker
 95 |   simulate_multi_tenant_changes
 96 | 
 97 |   log "Waiting for pg_flo to process changes..."
 98 |   sleep 5
 99 | 
100 |   stop_pg_flo_gracefully
101 |   verify_multi_tenant_changes || return 1
102 | }
103 | 
104 | # Run the test
105 | log "Starting pg_flo CDC test with multi-tenant filtering..."
106 | if test_pg_flo_multi_tenant; then
107 |   success "All tests passed! 🎉"
108 |   exit 0
109 | else
110 |   error "Some tests failed. Please check the logs."
111 |   show_pg_flo_logs
112 |   exit 1
113 | fi
114 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_postgres.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_tables() {
  7 |   log "Creating test tables in source database..."
  8 |   run_sql "DROP TABLE IF EXISTS public.users;"
  9 |   run_sql "CREATE TABLE public.users (
 10 |     id serial PRIMARY KEY,
 11 |     data text,
 12 |     nullable_column text,
 13 |     toasted_column text,
 14 |     created_at timestamp DEFAULT current_timestamp
 15 |   );"
 16 |   run_sql "DROP TABLE IF EXISTS public.toast_test;"
 17 |   run_sql "CREATE TABLE public.toast_test (id serial PRIMARY KEY, large_jsonb jsonb, small_text text);"
 18 |   success "Test tables created in source database"
 19 | }
 20 | 
 21 | create_config_files() {
 22 |   log "Creating config files..."
 23 | 
 24 |   # Create replicator config
 25 |   cat >"/tmp/pg_flo_replicator.yml" <<EOF
 26 | # Replicator PostgreSQL connection settings
 27 | host: "${PG_HOST}"
 28 | port: ${PG_PORT}
 29 | dbname: "${PG_DB}"
 30 | user: "${PG_USER}"
 31 | password: "${PG_PASSWORD}"
 32 | schema: "public"
 33 | group: "group_postgres_sink"
 34 | tables:
 35 |   - users
 36 |   - toast_test
 37 | nats-url: "nats://localhost:4222"
 38 | EOF
 39 | 
 40 |   # Create worker config
 41 |   cat >"/tmp/pg_flo_worker.yml" <<EOF
 42 | # Worker settings
 43 | group: "group_postgres_sink"
 44 | nats-url: "nats://localhost:4222"
 45 | batch-size: 5000
 46 | 
 47 | # Source connection for schema sync
 48 | source-host: "${PG_HOST}"
 49 | source-port: ${PG_PORT}
 50 | source-dbname: "${PG_DB}"
 51 | source-user: "${PG_USER}"
 52 | source-password: "${PG_PASSWORD}"
 53 | 
 54 | # Target PostgreSQL connection settings
 55 | target-host: "${TARGET_PG_HOST}"
 56 | target-port: ${TARGET_PG_PORT}
 57 | target-dbname: "${TARGET_PG_DB}"
 58 | target-user: "${TARGET_PG_USER}"
 59 | target-password: "${TARGET_PG_PASSWORD}"
 60 | target-sync-schema: true
 61 | EOF
 62 | 
 63 |   success "Config files created"
 64 | }
 65 | 
 66 | start_pg_flo_replication() {
 67 |   log "Starting pg_flo replicator..."
 68 |   if [ -f "$pg_flo_LOG" ]; then
 69 |     mv "$pg_flo_LOG" "${pg_flo_LOG}.bak"
 70 |     log "Backed up previous replicator log to ${pg_flo_LOG}.bak"
 71 |   fi
 72 | 
 73 |   $pg_flo_BIN replicator --config "/tmp/pg_flo_replicator.yml" >"$pg_flo_LOG" 2>&1 &
 74 |   pg_flo_PID=$!
 75 |   log "pg_flo replicator started with PID: $pg_flo_PID"
 76 |   success "pg_flo replicator started"
 77 | }
 78 | 
 79 | start_pg_flo_worker() {
 80 |   log "Starting pg_flo worker with PostgreSQL sink..."
 81 |   if [ -f "$pg_flo_WORKER_LOG" ]; then
 82 |     mv "$pg_flo_WORKER_LOG" "${pg_flo_WORKER_LOG}.bak"
 83 |     log "Backed up previous worker log to ${pg_flo_WORKER_LOG}.bak"
 84 |   fi
 85 | 
 86 |   $pg_flo_BIN worker postgres --config "/tmp/pg_flo_worker.yml" >"$pg_flo_WORKER_LOG" 2>&1 &
 87 |   pg_flo_WORKER_PID=$!
 88 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
 89 |   success "pg_flo worker started"
 90 | }
 91 | 
 92 | simulate_changes() {
 93 |   log "Simulating changes..."
 94 |   local insert_count=6000
 95 | 
 96 |   for i in $(seq 1 "$insert_count"); do
 97 |     run_sql "INSERT INTO public.users (data, nullable_column, toasted_column) VALUES ('Data $i', 'Nullable $i', 'Toasted $i');"
 98 |   done
 99 | 
100 |   # Insert specific rows for deletion
101 |   run_sql "INSERT INTO public.users (id, data) VALUES (10001, 'To be deleted 1');"
102 |   run_sql "INSERT INTO public.users (id, data) VALUES (10002, 'To be deleted 2');"
103 |   run_sql "INSERT INTO public.users (id, data) VALUES (10003, 'To be deleted 3');"
104 |   run_sql "INSERT INTO public.users (id, data) VALUES (10004, 'To be deleted 4');"
105 |   run_sql "INSERT INTO public.users (id, data) VALUES (10005, 'To be deleted 5');"
106 | 
107 |   # Insert a row with potentially toasted data
108 |   run_sql "INSERT INTO public.users (id, toasted_column) VALUES (10006, repeat('Large toasted data ', 1000));"
109 | 
110 |   # Update with various scenarios
111 |   run_sql "UPDATE public.users SET data = 'Updated data' WHERE id = 1;"
112 |   run_sql "UPDATE public.users SET nullable_column = NULL WHERE id = 2;"
113 |   run_sql "UPDATE public.users SET data = 'Updated data', nullable_column = NULL WHERE id = 3;"
114 |   run_sql "UPDATE public.users SET toasted_column = repeat('A', 10000) WHERE id = 4;"
115 |   run_sql "UPDATE public.users SET data = 'Updated data' WHERE id = 5;"
116 | 
117 |   # Generate large JSONB data (approximately 1MB)
118 |   log "Generating 1MB JSONB data..."
119 |   local json_data='{"data":"'
120 |   for i in {1..100000}; do
121 |     json_data+="AAAAAAAAAA"
122 |   done
123 |   json_data+='"}'
124 | 
125 |   # Insert large JSONB data
126 |   run_sql "INSERT INTO public.toast_test (large_jsonb, small_text) VALUES ('$json_data'::jsonb, 'Initial small text');"
127 |   log "Inserted large JSONB data, waiting for replication..."
128 | 
129 |   # Update unrelated column
130 |   run_sql "UPDATE public.toast_test SET small_text = 'Updated small text' WHERE id = 1;"
131 |   log "Updated unrelated column, waiting for replication..."
132 | 
133 |   # Delete operations
134 |   run_sql "DELETE FROM public.users WHERE id = 10001;"
135 |   run_sql "DELETE FROM public.users WHERE id IN (10002, 10003);"
136 |   run_sql "DELETE FROM public.users WHERE id >= 10004 AND id <= 10005;"
137 |   run_sql "DELETE FROM public.users WHERE id = 10006;"
138 | 
139 |   success "Changes simulated"
140 | }
141 | 
142 | verify_changes() {
143 |   log "Verifying changes in target database..."
144 | 
145 |   local updated_data=$(run_sql_target "SELECT data FROM public.users WHERE id = 1;" | xargs)
146 |   log "Updated data for id 1: '$updated_data' (expected 'Updated data')"
147 | 
148 |   local null_column=$(run_sql_target "SELECT coalesce(nullable_column, 'NULL') FROM public.users WHERE id = 2;" | xargs)
149 |   log "Nullable column for id 2: '$null_column' (expected 'NULL')"
150 | 
151 |   local mixed_update=$(run_sql_target "SELECT data || ' | ' || coalesce(nullable_column, 'NULL') FROM public.users WHERE id = 3;" | xargs)
152 |   log "Mixed update for id 3: '$mixed_update' (expected 'Updated data | NULL')"
153 | 
154 |   local toast_length=$(run_sql_target "SELECT length(toasted_column) FROM public.users WHERE id = 4;" | xargs)
155 |   log "TOAST column length for id 4: '$toast_length' (expected '10000')"
156 | 
157 |   local unrelated_column=$(run_sql_target "SELECT nullable_column FROM public.users WHERE id = 5;" | xargs)
158 |   log "Unrelated column for id 5: '$unrelated_column' (expected 'Nullable 5')"
159 | 
160 |   local jsonb_length=$(run_sql_target "SELECT octet_length(large_jsonb::text) FROM public.toast_test LIMIT 1;" | xargs)
161 |   log "JSONB column length: '$jsonb_length' bytes (expected > 1000000)"
162 | 
163 |   local small_text=$(run_sql_target "SELECT small_text FROM public.toast_test LIMIT 1;" | xargs)
164 |   log "small_text content: '$small_text' (expected 'Updated small text')"
165 | 
166 |   local deleted_single=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id = 10001;" | xargs)
167 |   log "Count of deleted user (id 10001): '$deleted_single' (expected '0')"
168 | 
169 |   local deleted_multiple=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id IN (10002, 10003);" | xargs)
170 |   log "Count of deleted users (ids 10002, 10003): '$deleted_multiple' (expected '0')"
171 | 
172 |   local deleted_range=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id >= 10004 AND id <= 10005;" | xargs)
173 |   log "Count of deleted users (ids 10004-10005): '$deleted_range' (expected '0')"
174 | 
175 |   local deleted_toasted=$(run_sql_target "SELECT COUNT(*) FROM public.users WHERE id = 10006;" | xargs)
176 |   log "Count of deleted user with toasted data (id 10006): '$deleted_toasted' (expected '0')"
177 | 
178 |   log "Detailed verification:"
179 | 
180 |   if [ "$updated_data" != "Updated data" ]; then
181 |     log "updated_data: '$updated_data' != 'Updated data'"
182 |     error "Verification failed: updated_data mismatch"
183 |     return 1
184 |   fi
185 | 
186 |   if [ "$null_column" != "NULL" ]; then
187 |     log "null_column: '$null_column' != 'NULL'"
188 |     error "Verification failed: null_column mismatch"
189 |     return 1
190 |   fi
191 | 
192 |   if [ "$mixed_update" != "Updated data | NULL" ]; then
193 |     log "mixed_update: '$mixed_update' != 'Updated data | NULL'"
194 |     error "Verification failed: mixed_update mismatch"
195 |     return 1
196 |   fi
197 | 
198 |   if [ "$toast_length" != "10000" ]; then
199 |     log "toast_length: '$toast_length' != '10000'"
200 |     error "Verification failed: toast_length mismatch"
201 |     return 1
202 |   fi
203 | 
204 |   if [ "$unrelated_column" != "Nullable 5" ]; then
205 |     log "unrelated_column: '$unrelated_column' != 'Nullable 5'"
206 |     error "Verification failed: unrelated_column mismatch"
207 |     return 1
208 |   fi
209 | 
210 |   if [ -z "$jsonb_length" ] || [ "$jsonb_length" -le 1000000 ]; then
211 |     log "jsonb_length: '$jsonb_length' <= 1000000"
212 |     error "Verification failed: jsonb_length mismatch"
213 |     return 1
214 |   fi
215 | 
216 |   if [ "$small_text" != "Updated small text" ]; then
217 |     log "small_text: '$small_text' != 'Updated small text'"
218 |     error "Verification failed: small_text mismatch"
219 |     return 1
220 |   fi
221 | 
222 |   if [ "$deleted_single" != "0" ]; then
223 |     log "deleted_single: '$deleted_single' != '0'"
224 |     error "Verification failed: deleted_single mismatch"
225 |     return 1
226 |   fi
227 | 
228 |   if [ "$deleted_multiple" != "0" ]; then
229 |     log "deleted_multiple: '$deleted_multiple' != '0'"
230 |     error "Verification failed: deleted_multiple mismatch"
231 |     return 1
232 |   fi
233 | 
234 |   if [ "$deleted_range" != "0" ]; then
235 |     log "deleted_range: '$deleted_range' != '0'"
236 |     error "Verification failed: deleted_range mismatch"
237 |     return 1
238 |   fi
239 | 
240 |   if [ "$deleted_toasted" != "0" ]; then
241 |     log "deleted_toasted: '$deleted_toasted' != '0'"
242 |     error "Verification failed: deleted_toasted mismatch"
243 |     return 1
244 |   fi
245 | 
246 |   success "All changes verified successfully in target database"
247 |   return 0
248 | }
249 | 
250 | test_pg_flo_postgres_sink() {
251 |   setup_postgres
252 |   create_tables
253 |   create_config_files
254 |   start_pg_flo_replication
255 |   sleep 2
256 |   start_pg_flo_worker
257 |   simulate_changes
258 | 
259 |   log "Waiting for pg_flo to process changes..."
260 | 
261 |   stop_pg_flo_gracefully
262 |   verify_changes || return 1
263 | }
264 | 
265 | # Run the test
266 | log "Starting pg_flo CDC test with PostgreSQL sink..."
267 | if test_pg_flo_postgres_sink; then
268 |   success "All tests passed! 🎉"
269 |   exit 0
270 | else
271 |   error "Some tests failed. Please check the logs."
272 |   show_pg_flo_logs
273 |   exit 1
274 | fi
275 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_stream_only.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_users() {
  7 |   log "Creating test table..."
  8 |   run_sql "DROP TABLE IF EXISTS public.users;"
  9 |   run_sql "CREATE TABLE public.users (id serial PRIMARY KEY, data text, created_at timestamp DEFAULT current_timestamp);"
 10 |   success "Test table created"
 11 | }
 12 | 
 13 | start_pg_flo_replication() {
 14 |   log "Starting pg_flo replication..."
 15 |   $pg_flo_BIN replicator \
 16 |     --host "$PG_HOST" \
 17 |     --port "$PG_PORT" \
 18 |     --dbname "$PG_DB" \
 19 |     --user "$PG_USER" \
 20 |     --password "$PG_PASSWORD" \
 21 |     --group "group-2" \
 22 |     --tables "users" \
 23 |     --schema "public" \
 24 |     --nats-url "$NATS_URL" \
 25 |     >"$pg_flo_LOG" 2>&1 &
 26 |   pg_flo_PID=$!
 27 |   log "pg_flo started with PID: $pg_flo_PID"
 28 |   success "pg_flo replication started"
 29 | }
 30 | 
 31 | start_pg_flo_worker() {
 32 |   log "Starting pg_flo worker with file sink..."
 33 |   $pg_flo_BIN worker file \
 34 |     --group "group-2" \
 35 |     --nats-url "$NATS_URL" \
 36 |     --file-output-dir "$OUTPUT_DIR" \
 37 |     >"$pg_flo_WORKER_LOG" 2>&1 &
 38 |   pg_flo_WORKER_PID=$!
 39 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
 40 |   success "pg_flo worker started"
 41 | }
 42 | 
 43 | simulate_changes() {
 44 |   log "Simulating changes..."
 45 |   local insert_count=1000
 46 |   local update_count=500
 47 |   local delete_count=250
 48 | 
 49 |   log "Simulating inserts..."
 50 |   for i in $(seq 1 $insert_count); do
 51 |     run_sql "INSERT INTO public.users (data) VALUES ('Data $i');"
 52 |   done
 53 | 
 54 |   log "Simulating updates..."
 55 |   for i in $(seq 1 $update_count); do
 56 |     run_sql "UPDATE public.users SET data = 'Updated data $i' WHERE id = $i;"
 57 |   done
 58 | 
 59 |   log "Simulating deletes..."
 60 |   for i in $(seq 1 $delete_count); do
 61 |     run_sql "DELETE FROM public.users WHERE id = $i;"
 62 |   done
 63 | 
 64 |   success "Changes simulated"
 65 | }
 66 | 
 67 | verify_changes() {
 68 |   log "Verifying changes in ${OUTPUT_DIR}..."
 69 |   local insert_count=$(jq -s '[.[] | select(.Type == "INSERT")] | length' "$OUTPUT_DIR"/*.jsonl)
 70 |   local update_count=$(jq -s '[.[] | select(.Type == "UPDATE")] | length' "$OUTPUT_DIR"/*.jsonl)
 71 |   local delete_count=$(jq -s '[.[] | select(.Type == "DELETE")] | length' "$OUTPUT_DIR"/*.jsonl)
 72 | 
 73 |   log "INSERT count: $insert_count (expected 1000)"
 74 |   log "UPDATE count: $update_count (expected 500)"
 75 |   log "DELETE count: $delete_count (expected 250)"
 76 | 
 77 |   if [ "$insert_count" -eq 1000 ] && [ "$update_count" -eq 500 ] && [ "$delete_count" -eq 250 ]; then
 78 |     success "Change counts match expected values"
 79 |     return 0
 80 |   else
 81 |     error "Change counts do not match expected values"
 82 |     return 1
 83 |   fi
 84 | }
 85 | 
 86 | # Main test function
 87 | test_pg_flo_cdc() {
 88 |   setup_postgres
 89 |   create_users
 90 |   start_pg_flo_replication
 91 |   start_pg_flo_worker
 92 |   log "Waiting for replicator to initialize..."
 93 |   sleep 2
 94 |   simulate_changes
 95 | 
 96 |   log "Waiting for pg_flo to process changes..."
 97 |   sleep 2
 98 | 
 99 |   stop_pg_flo_gracefully
100 |   verify_changes || return 1
101 | }
102 | 
103 | # Run the test
104 | log "Starting pg_flo CDC test with changes..."
105 | if test_pg_flo_cdc; then
106 |   success "All tests passed! 🎉"
107 |   exit 0
108 | else
109 |   error "Some tests failed. Please check the logs."
110 |   show_pg_flo_logs
111 |   exit 1
112 | fi
113 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_test_local.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | 
 4 | source "$(dirname "$0")/e2e_common.sh"
 5 | 
 6 | setup_docker() {
 7 |   pkill -9 "pg_flo" || true
 8 |   rm -Rf /tmp/pg*
 9 |   log "Setting up Docker environment..."
10 |   docker compose -f internal/docker-compose.yml down -v
11 |   docker compose -f internal/docker-compose.yml up -d
12 |   success "Docker environment is set up"
13 | }
14 | 
15 | cleanup_data() {
16 |   log "Cleaning up data..."
17 |   run_sql "DROP TABLE IF EXISTS public.users;"
18 |   run_sql "DROP SCHEMA IF EXISTS internal_pg_flo CASCADE;"
19 |   rm -rf /tmp/pg_flo-output
20 |   rm -f /tmp/pg_flo.log
21 |   success "Data cleanup complete"
22 | }
23 | 
24 | cleanup() {
25 |   log "Cleaning up..."
26 |   docker compose down -v
27 |   success "Cleanup complete"
28 | }
29 | 
30 | trap cleanup EXIT
31 | 
32 | make build
33 | 
34 | setup_docker
35 | 
36 | log "Running e2e ddl tests..."
37 | if CI=false ruby ./internal/scripts/e2e_resume_test.rb; then
38 |   success "e2e ddl tests completed successfully"
39 | else
40 |   error "Original e2e tests failed"
41 |   exit 1
42 | fi
43 | 


--------------------------------------------------------------------------------
/internal/scripts/e2e_transform_filter.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/e2e_common.sh"
  5 | 
  6 | create_users() {
  7 |   log "Creating test table..."
  8 |   run_sql "DROP TABLE IF EXISTS public.users;"
  9 |   run_sql "CREATE TABLE public.users (
 10 |     id serial PRIMARY KEY,
 11 |     email text,
 12 |     phone text,
 13 |     age int,
 14 |     ssn text,
 15 |     created_at timestamp DEFAULT current_timestamp
 16 |   );"
 17 |   success "Test table created"
 18 | }
 19 | 
 20 | start_pg_flo_replication() {
 21 |   log "Starting pg_flo replication..."
 22 |   if [ -f "$pg_flo_LOG" ]; then
 23 |     mv "$pg_flo_LOG" "${pg_flo_LOG}.bak"
 24 |     log "Backed up previous replicator log to ${pg_flo_LOG}.bak"
 25 |   fi
 26 |   $pg_flo_BIN replicator \
 27 |     --host "$PG_HOST" \
 28 |     --port "$PG_PORT" \
 29 |     --dbname "$PG_DB" \
 30 |     --user "$PG_USER" \
 31 |     --password "$PG_PASSWORD" \
 32 |     --group "group_transform_filter" \
 33 |     --tables "users" \
 34 |     --schema "public" \
 35 |     --nats-url "$NATS_URL" \
 36 |     >"$pg_flo_LOG" 2>&1 &
 37 |   pg_flo_PID=$!
 38 |   log "pg_flo replicator started with PID: $pg_flo_PID"
 39 |   success "pg_flo replication started"
 40 | }
 41 | 
 42 | start_pg_flo_worker() {
 43 |   log "Starting pg_flo worker with file sink..."
 44 |   if [ -f "$pg_flo_WORKER_LOG" ]; then
 45 |     mv "$pg_flo_WORKER_LOG" "${pg_flo_WORKER_LOG}.bak"
 46 |     log "Backed up previous worker log to ${pg_flo_WORKER_LOG}.bak"
 47 |   fi
 48 |   $pg_flo_BIN worker file \
 49 |     --group "group_transform_filter" \
 50 |     --nats-url "$NATS_URL" \
 51 |     --file-output-dir "$OUTPUT_DIR" \
 52 |     --rules-config "$(dirname "$0")/rules.yml" \
 53 |     >"$pg_flo_WORKER_LOG" 2>&1 &
 54 |   pg_flo_WORKER_PID=$!
 55 |   log "pg_flo worker started with PID: $pg_flo_WORKER_PID"
 56 |   success "pg_flo worker started"
 57 | }
 58 | 
 59 | simulate_changes() {
 60 |   log "Simulating changes..."
 61 |   run_sql "INSERT INTO public.users (email, phone, age, ssn) VALUES
 62 |     ('john@example.com', '1234567890', 25, '123-45-6789'),
 63 |     ('jane@example.com', '9876543210', 17, '987-65-4321'),
 64 |     ('bob@example.com', '5551234567', 30, '555-12-3456');"
 65 | 
 66 |   run_sql "UPDATE public.users SET email = 'updated@example.com', phone = '1112223333' WHERE id = 1;"
 67 |   run_sql "DELETE FROM public.users WHERE age = 30;"
 68 |   run_sql "DELETE FROM public.users WHERE age = 17;"
 69 | 
 70 |   success "Changes simulated"
 71 | }
 72 | 
 73 | verify_changes() {
 74 |   log "Verifying changes..."
 75 |   local insert_count=$(jq -s '[.[] | select(.Type == "INSERT")] | length' "$OUTPUT_DIR"/*.jsonl)
 76 |   local update_count=$(jq -s '[.[] | select(.Type == "UPDATE")] | length' "$OUTPUT_DIR"/*.jsonl)
 77 |   local delete_count=$(jq -s '[.[] | select(.Type == "DELETE")] | length' "$OUTPUT_DIR"/*.jsonl)
 78 | 
 79 |   log "INSERT count: $insert_count (expected 2)"
 80 |   log "UPDATE count: $update_count (expected 1)"
 81 |   log "DELETE count: $delete_count (expected 2)"
 82 | 
 83 |   if [ "$insert_count" -eq 2 ] && [ "$update_count" -eq 1 ] && [ "$delete_count" -eq 2 ]; then
 84 |     success "Change counts match expected values"
 85 |   else
 86 |     error "Change counts do not match expected values"
 87 |     return 1
 88 |   fi
 89 | 
 90 |   # Verify transformations and filters
 91 |   local masked_email=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 1) | .NewTuple.email' "$OUTPUT_DIR"/*.jsonl)
 92 |   local formatted_phone=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 1) | .NewTuple.phone' "$OUTPUT_DIR"/*.jsonl)
 93 |   local filtered_insert=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 2) | .NewTuple.id' "$OUTPUT_DIR"/*.jsonl)
 94 |   local updated_email=$(jq -r 'select(.Type == "UPDATE") | .NewTuple.email' "$OUTPUT_DIR"/*.jsonl)
 95 |   local masked_ssn=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 1) | .NewTuple.ssn' "$OUTPUT_DIR"/*.jsonl)
 96 |   local filtered_age=$(jq -r 'select(.Type == "INSERT" and .NewTuple.id == 2) | .NewTuple.age' "$OUTPUT_DIR"/*.jsonl)
 97 | 
 98 |   if [[ "$masked_email" == "j**************m" ]] &&
 99 |     [[ "$formatted_phone" == "(123) 456-7890" ]] &&
100 |     [[ -z "$filtered_insert" ]] &&
101 |     [[ "$updated_email" == "u*****************m" ]] &&
102 |     [[ "$masked_ssn" == "1XXXXXXXXX9" ]] &&
103 |     [[ -z "$filtered_age" ]]; then
104 |     success "Transformations and filters applied correctly"
105 |   else
106 |     error "Transformations or filters not applied correctly"
107 |     log "Masked email: $masked_email"
108 |     log "Formatted phone: $formatted_phone"
109 |     log "Filtered insert: $filtered_insert"
110 |     log "Updated email: $updated_email"
111 |     log "Masked SSN: $masked_ssn"
112 |     log "Filtered age: $filtered_age"
113 |     return 1
114 |   fi
115 | }
116 | 
117 | test_pg_flo_transform_filter() {
118 |   setup_postgres
119 |   create_users
120 |   start_pg_flo_replication
121 |   start_pg_flo_worker
122 |   sleep 2
123 |   simulate_changes
124 | 
125 |   log "Waiting for pg_flo to process changes..."
126 | 
127 |   stop_pg_flo_gracefully
128 |   verify_changes || return 1
129 | }
130 | 
131 | log "Starting pg_flo CDC test with transformations and filters..."
132 | if test_pg_flo_transform_filter; then
133 |   success "All tests passed! 🎉"
134 |   exit 0
135 | else
136 |   error "Some tests failed. Please check the logs."
137 |   show_pg_flo_logs
138 |   exit 1
139 | fi
140 | 


--------------------------------------------------------------------------------
/internal/scripts/multi_tenant_rules.yml:
--------------------------------------------------------------------------------
1 | tables:
2 |   events:
3 |     - type: filter
4 |       column: tenant_id
5 |       parameters:
6 |         operator: "eq"
7 |         value: 3
8 |       operations: [INSERT, UPDATE, DELETE]
9 | 


--------------------------------------------------------------------------------
/internal/scripts/rules.yml:
--------------------------------------------------------------------------------
 1 | tables:
 2 |   users:
 3 |     - type: transform
 4 |       column: email
 5 |       parameters:
 6 |         type: mask
 7 |         mask_char: "*"
 8 |         allow_empty_deletes: true
 9 |       operations: [INSERT, UPDATE, DELETE]
10 |     - type: transform
11 |       column: phone
12 |       parameters:
13 |         type: regex
14 |         pattern: "^(\\d{3})(\\d{3})(\\d{4})$"
15 |         replace: "($1) $2-$3"
16 |         allow_empty_deletes: true
17 |       operations: [INSERT, UPDATE, DELETE]
18 |     - type: filter
19 |       column: age
20 |       parameters:
21 |         operator: "gte"
22 |         value: 18
23 |         allow_empty_deletes: true
24 |       operations: [INSERT, UPDATE, DELETE]
25 |     - type: transform
26 |       column: ssn
27 |       parameters:
28 |         type: mask
29 |         mask_char: "X"
30 |         allow_empty_deletes: true
31 |       operations: [INSERT, UPDATE, DELETE]
32 | 


--------------------------------------------------------------------------------
/internal/scripts/webhook_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | 
 4 | source "$(dirname "$0")/e2e_common.sh"
 5 | 
 6 | WEBHOOK_URL="https://deep-article-49.webhook.cool"
 7 | 
 8 | create_users() {
 9 |   log "Creating initial test table..."
10 |   run_sql "DROP TABLE IF EXISTS public.users;"
11 |   run_sql "CREATE TABLE public.users (id serial PRIMARY KEY, data text);"
12 |   success "Initial test table created"
13 | }
14 | 
15 | start_pg_flo_replication() {
16 |   log "Starting pg_flo replication..."
17 |   $pg_flo_BIN stream webhook \
18 |     --host "$PG_HOST" \
19 |     --port "$PG_PORT" \
20 |     --dbname "$PG_DB" \
21 |     --user "$PG_USER" \
22 |     --password "$PG_PASSWORD" \
23 |     --group "group-webhook" \
24 |     --tables "users" \
25 |     --schema "public" \
26 |     --status-dir "/tmp" \
27 |     --webhook-url "$WEBHOOK_URL" \
28 |     --track-ddl >"$pg_flo_LOG" 2>&1 &
29 |   pg_flo_PID=$!
30 |   log "pg_flo started with PID: $pg_flo_PID"
31 |   success "pg_flo replication started"
32 | }
33 | 
34 | simulate_changes() {
35 |   log "Simulating changes..."
36 |   local insert_count=10
37 |   local update_count=5
38 |   local delete_count=3
39 | 
40 |   for i in $(seq 1 $insert_count); do
41 |     run_sql "INSERT INTO public.users (data) VALUES ('Data $i');"
42 |   done
43 | 
44 |   for i in $(seq 1 $update_count); do
45 |     run_sql "UPDATE public.users SET data = 'Updated data $i' WHERE id = $i;"
46 |   done
47 | 
48 |   for i in $(seq 1 $delete_count); do
49 |     run_sql "DELETE FROM public.users WHERE id = $i;"
50 |   done
51 | 
52 |   success "Changes simulated"
53 | }
54 | 
55 | perform_ddl_operations() {
56 |   log "Performing DDL operations..."
57 |   run_sql "ALTER TABLE users ADD COLUMN new_column int;"
58 |   run_sql "CREATE INDEX CONCURRENTLY idx_users_data ON users (data);"
59 |   run_sql "ALTER TABLE users RENAME COLUMN data TO old_data;"
60 |   run_sql "DROP INDEX idx_users_data;"
61 |   run_sql "ALTER TABLE users ADD COLUMN new_column_one int;"
62 |   run_sql "ALTER TABLE users ALTER COLUMN old_data TYPE varchar(255);"
63 |   success "DDL operations performed"
64 | }
65 | 
66 | test_pg_flo_webhook() {
67 |   setup_docker
68 |   setup_postgres
69 |   create_users
70 |   start_pg_flo_replication
71 |   sleep 2
72 |   simulate_changes
73 |   perform_ddl_operations
74 | 
75 |   log "Waiting for pg_flo to process changes..."
76 |   sleep 10
77 | 
78 |   stop_pg_flo_gracefully
79 |   log "Test completed. Please check https://webhook.site/#!/f5a9abdb-c779-44a2-98ce-0760b4a2fc5c for received events."
80 | }
81 | 
82 | # Run the test
83 | log "Starting pg_flo CDC test with webhook sink..."
84 | if test_pg_flo_webhook; then
85 |   success "Test completed successfully. Please verify the results on webhook.site"
86 |   exit 0
87 | else
88 |   error "Test failed. Please check the logs."
89 |   show_pg_flo_logs
90 |   exit 1
91 | fi
92 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/pgflo/pg_flo/cmd"
 8 | )
 9 | 
10 | func main() {
11 | 	if err := cmd.Execute(); err != nil {
12 | 		fmt.Println(err)
13 | 		os.Exit(1)
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/pkg/pgflonats/pgflonats.go:
--------------------------------------------------------------------------------
  1 | package pgflonats
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"time"
  9 | 
 10 | 	"github.com/jackc/pglogrepl"
 11 | 	"github.com/nats-io/nats.go"
 12 | )
 13 | 
 14 | const (
 15 | 	defaultNATSURL = "nats://localhost:4222"
 16 | 	envNATSURL     = "PG_FLO_NATS_URL"
 17 | )
 18 | 
 19 | // NATSClient represents a client for interacting with NATS
 20 | type NATSClient struct {
 21 | 	conn        *nats.Conn
 22 | 	js          nats.JetStreamContext
 23 | 	stream      string
 24 | 	stateBucket string
 25 | }
 26 | 
 27 | // State represents the current state of the replication process
 28 | type State struct {
 29 | 	LSN              pglogrepl.LSN `json:"lsn"`
 30 | 	LastProcessedSeq map[string]uint64
 31 | }
 32 | 
 33 | // NewNATSClient creates a new NATS client with the specified configuration, setting up the connection, main stream, and state bucket.
 34 | func NewNATSClient(url, stream, group string) (*NATSClient, error) {
 35 | 	if url == "" {
 36 | 		url = os.Getenv(envNATSURL)
 37 | 		if url == "" {
 38 | 			url = defaultNATSURL
 39 | 		}
 40 | 	}
 41 | 
 42 | 	if stream == "" {
 43 | 		stream = fmt.Sprintf("pgflo_%s_stream", group)
 44 | 	}
 45 | 
 46 | 	nc, err := nats.Connect(url,
 47 | 		nats.RetryOnFailedConnect(true),
 48 | 		nats.MaxReconnects(-1),
 49 | 		nats.ReconnectWait(time.Second),
 50 | 		nats.DisconnectErrHandler(func(_ *nats.Conn, err error) {
 51 | 			fmt.Printf("Disconnected due to: %s, will attempt reconnects\n", err)
 52 | 		}),
 53 | 		nats.ReconnectHandler(func(nc *nats.Conn) {
 54 | 			fmt.Printf("Reconnected [%s]\n", nc.ConnectedUrl())
 55 | 		}),
 56 | 		nats.ClosedHandler(func(nc *nats.Conn) {
 57 | 			fmt.Printf("Exiting: %v\n", nc.LastError())
 58 | 		}),
 59 | 	)
 60 | 	if err != nil {
 61 | 		return nil, fmt.Errorf("failed to connect to NATS: %w", err)
 62 | 	}
 63 | 
 64 | 	js, err := nc.JetStream()
 65 | 	if err != nil {
 66 | 		return nil, fmt.Errorf("failed to create JetStream context: %w", err)
 67 | 	}
 68 | 
 69 | 	// Create the main stream
 70 | 	streamConfig := &nats.StreamConfig{
 71 | 		Name:      stream,
 72 | 		Subjects:  []string{fmt.Sprintf("pgflo.%s", group)},
 73 | 		Storage:   nats.FileStorage,
 74 | 		Retention: nats.LimitsPolicy,
 75 | 		MaxAge:    24 * time.Hour,
 76 | 	}
 77 | 	_, err = js.AddStream(streamConfig)
 78 | 	if err != nil && !errors.Is(err, nats.ErrStreamNameAlreadyInUse) {
 79 | 		return nil, fmt.Errorf("failed to create main stream: %w", err)
 80 | 	}
 81 | 
 82 | 	// Create the state bucket
 83 | 	stateBucket := fmt.Sprintf("pg_flo_state_%s", group)
 84 | 	_, kvErr := js.KeyValue(stateBucket)
 85 | 	if kvErr != nil {
 86 | 		if errors.Is(kvErr, nats.ErrBucketNotFound) {
 87 | 			_, err = js.CreateKeyValue(&nats.KeyValueConfig{
 88 | 				Bucket: stateBucket,
 89 | 			})
 90 | 			if err != nil {
 91 | 				return nil, fmt.Errorf("failed to create state bucket: %w", err)
 92 | 			}
 93 | 		} else {
 94 | 			return nil, fmt.Errorf("failed to access state bucket: %w", kvErr)
 95 | 		}
 96 | 	}
 97 | 
 98 | 	return &NATSClient{
 99 | 		conn:        nc,
100 | 		js:          js,
101 | 		stream:      stream,
102 | 		stateBucket: stateBucket,
103 | 	}, nil
104 | }
105 | 
106 | // PublishMessage publishes a message to the specified NATS subject.
107 | func (nc *NATSClient) PublishMessage(subject string, data []byte) error {
108 | 	_, err := nc.js.Publish(subject, data)
109 | 	if err != nil {
110 | 		return fmt.Errorf("failed to publish message: %w", err)
111 | 	}
112 | 	return nil
113 | }
114 | 
115 | // Close closes the NATS connection.
116 | func (nc *NATSClient) Close() error {
117 | 	nc.conn.Close()
118 | 	return nil
119 | }
120 | 
121 | // SaveState saves the current replication state to NATS.
122 | func (nc *NATSClient) SaveState(state State) error {
123 | 	kv, err := nc.js.KeyValue(nc.stateBucket)
124 | 	if err != nil {
125 | 		return fmt.Errorf("failed to get KV bucket: %v", err)
126 | 	}
127 | 
128 | 	data, err := json.Marshal(state)
129 | 	if err != nil {
130 | 		return fmt.Errorf("failed to marshal state: %v", err)
131 | 	}
132 | 
133 | 	_, err = kv.Put("state", data)
134 | 	if err != nil {
135 | 		return fmt.Errorf("failed to save state: %v", err)
136 | 	}
137 | 
138 | 	return nil
139 | }
140 | 
141 | // GetState retrieves the last saved state from NATS, initializing a new state if none is found.
142 | func (nc *NATSClient) GetState() (State, error) {
143 | 	kv, err := nc.js.KeyValue(nc.stateBucket)
144 | 	if err != nil {
145 | 		return State{}, fmt.Errorf("failed to get KV bucket: %v", err)
146 | 	}
147 | 
148 | 	entry, err := kv.Get("state")
149 | 	if err != nil {
150 | 		if errors.Is(err, nats.ErrKeyNotFound) {
151 | 			initialState := State{LastProcessedSeq: make(map[string]uint64)}
152 | 			// Try to create initial state
153 | 			if err := nc.SaveState(initialState); err != nil {
154 | 				// If SaveState fails because the key already exists, fetch it again
155 | 				if errors.Is(err, nats.ErrKeyExists) || errors.Is(err, nats.ErrUpdateMetaDeleted) {
156 | 					entry, err = kv.Get("state")
157 | 					if err != nil {
158 | 						return State{}, fmt.Errorf("failed to get state after conflict: %v", err)
159 | 					}
160 | 					if err := json.Unmarshal(entry.Value(), &initialState); err != nil {
161 | 						return State{}, fmt.Errorf("failed to unmarshal state after conflict: %v", err)
162 | 					}
163 | 					return initialState, nil
164 | 				}
165 | 				return State{}, fmt.Errorf("failed to save initial state: %v", err)
166 | 			}
167 | 			return initialState, nil
168 | 		}
169 | 		return State{}, fmt.Errorf("failed to get state: %v", err)
170 | 	}
171 | 
172 | 	var state State
173 | 	if err := json.Unmarshal(entry.Value(), &state); err != nil {
174 | 		return State{}, fmt.Errorf("failed to unmarshal state: %v", err)
175 | 	}
176 | 
177 | 	if state.LastProcessedSeq == nil {
178 | 		state.LastProcessedSeq = make(map[string]uint64)
179 | 	}
180 | 	return state, nil
181 | }
182 | 
183 | // JetStream returns the JetStream context.
184 | func (nc *NATSClient) JetStream() nats.JetStreamContext {
185 | 	return nc.js
186 | }
187 | 


--------------------------------------------------------------------------------
/pkg/replicator/buffer.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | // Buffer is a structure that holds data to be flushed periodically or when certain conditions are met
 9 | type Buffer struct {
10 | 	data         []interface{}
11 | 	maxRows      int
12 | 	flushTimeout time.Duration
13 | 	lastFlush    time.Time
14 | 	mutex        sync.Mutex
15 | }
16 | 
17 | // NewBuffer creates a new Buffer instance
18 | func NewBuffer(maxRows int, flushTimeout time.Duration) *Buffer {
19 | 	return &Buffer{
20 | 		data:         make([]interface{}, 0, maxRows),
21 | 		maxRows:      maxRows,
22 | 		flushTimeout: flushTimeout,
23 | 		lastFlush:    time.Now(),
24 | 	}
25 | }
26 | 
27 | // Add adds an item to the buffer and returns true if the buffer should be flushed
28 | func (b *Buffer) Add(item interface{}) bool {
29 | 	b.mutex.Lock()
30 | 	defer b.mutex.Unlock()
31 | 
32 | 	b.data = append(b.data, item)
33 | 
34 | 	return b.shouldFlush()
35 | }
36 | 
37 | // shouldFlush checks if the buffer should be flushed based on row count, or timeout
38 | func (b *Buffer) shouldFlush() bool {
39 | 	return len(b.data) >= b.maxRows || time.Since(b.lastFlush) >= b.flushTimeout
40 | }
41 | 
42 | // Flush flushes the buffer and returns the data
43 | func (b *Buffer) Flush() []interface{} {
44 | 	b.mutex.Lock()
45 | 	defer b.mutex.Unlock()
46 | 
47 | 	if len(b.data) == 0 {
48 | 		return nil
49 | 	}
50 | 
51 | 	data := b.data
52 | 	b.data = make([]interface{}, 0, b.maxRows)
53 | 	b.lastFlush = time.Now()
54 | 
55 | 	return data
56 | }
57 | 


--------------------------------------------------------------------------------
/pkg/replicator/config.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import "fmt"
 4 | 
 5 | // Config holds the configuration for the replicator
 6 | type Config struct {
 7 | 	Host     string
 8 | 	Port     uint16
 9 | 	Database string
10 | 	User     string
11 | 	Password string
12 | 	Group    string
13 | 	Schema   string
14 | 	Tables   []string
15 | 	TrackDDL bool
16 | }
17 | 
18 | // ConnectionString generates and returns a PostgreSQL connection string
19 | func (c Config) ConnectionString() string {
20 | 	return fmt.Sprintf("postgres://%s:%s@%s:%d/%s", c.User, c.Password, c.Host, c.Port, c.Database)
21 | }
22 | 


--------------------------------------------------------------------------------
/pkg/replicator/ddl_replicator.go:
--------------------------------------------------------------------------------
  1 | package replicator
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"fmt"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/jackc/pglogrepl"
 11 | 	"github.com/jackc/pgtype"
 12 | 	"github.com/pgflo/pg_flo/pkg/utils"
 13 | )
 14 | 
 15 | type DDLReplicator struct {
 16 | 	DDLConn  StandardConnection
 17 | 	BaseRepl *BaseReplicator
 18 | 	Config   Config
 19 | }
 20 | 
 21 | // NewDDLReplicator creates a new DDLReplicator instance
 22 | func NewDDLReplicator(config Config, BaseRepl *BaseReplicator, ddlConn StandardConnection) (*DDLReplicator, error) {
 23 | 	return &DDLReplicator{
 24 | 		Config:   config,
 25 | 		BaseRepl: BaseRepl,
 26 | 		DDLConn:  ddlConn,
 27 | 	}, nil
 28 | }
 29 | 
 30 | // SetupDDLTracking sets up the necessary schema, table, and triggers for DDL tracking
 31 | func (d *DDLReplicator) SetupDDLTracking(ctx context.Context) error {
 32 | 	tables, err := d.BaseRepl.GetConfiguredTables(ctx)
 33 | 	if err != nil {
 34 | 		return fmt.Errorf("failed to get configured tables: %w", err)
 35 | 	}
 36 | 
 37 | 	tableConditions := make([]string, len(tables))
 38 | 	for i, table := range tables {
 39 | 		parts := strings.Split(table, ".")
 40 | 		if len(parts) != 2 {
 41 | 			return fmt.Errorf("invalid table name format: %s", table)
 42 | 		}
 43 | 		tableConditions[i] = fmt.Sprintf("(nspname = '%s' AND relname = '%s')",
 44 | 			parts[0], parts[1])
 45 | 	}
 46 | 	tableFilter := strings.Join(tableConditions, " OR ")
 47 | 
 48 | 	_, err = d.DDLConn.Exec(ctx, fmt.Sprintf(`
 49 | 		CREATE SCHEMA IF NOT EXISTS internal_pg_flo;
 50 | 
 51 | 		CREATE TABLE IF NOT EXISTS internal_pg_flo.ddl_log (
 52 | 			id SERIAL PRIMARY KEY,
 53 | 			event_type TEXT NOT NULL,
 54 | 			object_type TEXT,
 55 | 			object_identity TEXT,
 56 | 			table_name TEXT,
 57 | 			ddl_command TEXT NOT NULL,
 58 | 			created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
 59 | 		);
 60 | 
 61 | 		CREATE OR REPLACE FUNCTION internal_pg_flo.ddl_trigger() RETURNS event_trigger AS $$
 62 | 		DECLARE
 63 | 			obj record;
 64 | 			ddl_command text;
 65 | 			table_name text;
 66 | 			should_track boolean;
 67 | 		BEGIN
 68 | 			SELECT current_query() INTO ddl_command;
 69 | 
 70 | 			IF TG_EVENT = 'ddl_command_end' THEN
 71 | 				FOR obj IN SELECT * FROM pg_event_trigger_ddl_commands()
 72 | 				LOOP
 73 | 					should_track := false;
 74 | 					-- Extract table name if object type is table or index
 75 | 					IF obj.object_type IN ('table', 'table column') THEN
 76 | 						SELECT nspname || '.' || relname, (%s)
 77 | 						INTO table_name, should_track
 78 | 						FROM pg_class c
 79 | 						JOIN pg_namespace n ON c.relnamespace = n.oid
 80 | 						WHERE c.oid = obj.objid;
 81 | 					ELSIF obj.object_type = 'index' THEN
 82 | 						WITH target_table AS (
 83 | 							SELECT t.oid as table_oid, n.nspname, t.relname
 84 | 							FROM pg_index i
 85 | 							JOIN pg_class t ON t.oid = i.indrelid
 86 | 							JOIN pg_namespace n ON t.relnamespace = n.oid
 87 | 							WHERE i.indexrelid = obj.objid
 88 | 						)
 89 | 						SELECT nspname || '.' || relname, (%s)
 90 | 						INTO table_name, should_track
 91 | 						FROM target_table;
 92 | 					END IF;
 93 | 
 94 | 					IF should_track THEN
 95 | 						INSERT INTO internal_pg_flo.ddl_log (event_type, object_type, object_identity, table_name, ddl_command)
 96 | 						VALUES (TG_EVENT, obj.object_type, obj.object_identity, table_name, ddl_command);
 97 | 					END IF;
 98 | 				END LOOP;
 99 | 			END IF;
100 | 		END;
101 | 		$$ LANGUAGE plpgsql;
102 | 
103 | 		DROP EVENT TRIGGER IF EXISTS pg_flo_ddl_trigger;
104 | 		CREATE EVENT TRIGGER pg_flo_ddl_trigger ON ddl_command_end
105 | 		EXECUTE FUNCTION internal_pg_flo.ddl_trigger();
106 | 	`, tableFilter, tableFilter))
107 | 
108 | 	if err != nil {
109 | 		d.BaseRepl.Logger.Error().Err(err).Msg("Failed to setup DDL tracking")
110 | 		return err
111 | 	}
112 | 	return nil
113 | }
114 | 
115 | // StartDDLReplication starts the DDL replication process
116 | func (d *DDLReplicator) StartDDLReplication(ctx context.Context) {
117 | 	ticker := time.NewTicker(1 * time.Second)
118 | 	defer ticker.Stop()
119 | 
120 | 	for {
121 | 		select {
122 | 		case <-ctx.Done():
123 | 			d.BaseRepl.Logger.Info().Msg("DDL replication stopping...")
124 | 			return
125 | 		case <-ticker.C:
126 | 			if err := d.ProcessDDLEvents(ctx); err != nil {
127 | 				if ctx.Err() != nil {
128 | 					// Context canceled, exit gracefully
129 | 					return
130 | 				}
131 | 				d.BaseRepl.Logger.Error().Err(err).Msg("Failed to process DDL events")
132 | 			}
133 | 		}
134 | 	}
135 | }
136 | 
137 | // ProcessDDLEvents processes DDL events from the log table
138 | func (d *DDLReplicator) ProcessDDLEvents(ctx context.Context) error {
139 | 	rows, err := d.DDLConn.Query(ctx, `
140 | 			SELECT id, event_type, object_type, object_identity, table_name, ddl_command, created_at
141 | 			FROM internal_pg_flo.ddl_log
142 | 			ORDER BY created_at ASC
143 | 	`)
144 | 	if err != nil {
145 | 		d.BaseRepl.Logger.Error().Err(err).Msg("Failed to query DDL log")
146 | 		return nil
147 | 	}
148 | 	defer rows.Close()
149 | 
150 | 	var processedIDs []int
151 | 	seenCommands := make(map[string]bool)
152 | 
153 | 	for rows.Next() {
154 | 		var id int
155 | 		var eventType, objectType, objectIdentity, ddlCommand string
156 | 		var tableName sql.NullString
157 | 		var createdAt time.Time
158 | 		if err := rows.Scan(&id, &eventType, &objectType, &objectIdentity, &tableName, &ddlCommand, &createdAt); err != nil {
159 | 			d.BaseRepl.Logger.Error().Err(err).Msg("Failed to scan DDL log row")
160 | 			return nil
161 | 		}
162 | 
163 | 		if d.shouldSkipDDLEvent(ddlCommand) {
164 | 			processedIDs = append(processedIDs, id)
165 | 			continue
166 | 		}
167 | 
168 | 		if seenCommands[ddlCommand] {
169 | 			processedIDs = append(processedIDs, id)
170 | 			continue
171 | 		}
172 | 		seenCommands[ddlCommand] = true
173 | 
174 | 		var schema, table string
175 | 		if tableName.Valid {
176 | 			schema, table = splitSchemaAndTable(tableName.String)
177 | 		} else {
178 | 			schema, table = "public", ""
179 | 		}
180 | 
181 | 		cdcMessage := utils.CDCMessage{
182 | 			Type:      utils.OperationDDL,
183 | 			Schema:    schema,
184 | 			Table:     table,
185 | 			EmittedAt: time.Now(),
186 | 			Columns: []*pglogrepl.RelationMessageColumn{
187 | 				{Name: "event_type", DataType: pgtype.TextOID},
188 | 				{Name: "object_type", DataType: pgtype.TextOID},
189 | 				{Name: "object_identity", DataType: pgtype.TextOID},
190 | 				{Name: "ddl_command", DataType: pgtype.TextOID},
191 | 				{Name: "created_at", DataType: pgtype.TimestamptzOID},
192 | 			},
193 | 			NewTuple: &pglogrepl.TupleData{
194 | 				Columns: []*pglogrepl.TupleDataColumn{
195 | 					{Data: []byte(eventType)},
196 | 					{Data: []byte(objectType)},
197 | 					{Data: []byte(objectIdentity)},
198 | 					{Data: []byte(ddlCommand)},
199 | 					{Data: []byte(createdAt.Format(time.RFC3339))},
200 | 				},
201 | 			},
202 | 		}
203 | 
204 | 		if err := d.BaseRepl.PublishToNATS(cdcMessage); err != nil {
205 | 			d.BaseRepl.Logger.Error().Err(err).Msg("Error during publishing DDL event to NATS")
206 | 			return nil
207 | 		}
208 | 
209 | 		processedIDs = append(processedIDs, id)
210 | 	}
211 | 
212 | 	if err := rows.Err(); err != nil {
213 | 		d.BaseRepl.Logger.Error().Err(err).Msg("Error during DDL log iteration")
214 | 		return nil
215 | 	}
216 | 
217 | 	if len(processedIDs) > 0 {
218 | 		_, err = d.DDLConn.Exec(ctx, "DELETE FROM internal_pg_flo.ddl_log WHERE id = ANY($1)", processedIDs)
219 | 		if err != nil {
220 | 			d.BaseRepl.Logger.Error().Err(err).Msg("Failed to clear processed DDL events")
221 | 			return nil
222 | 		}
223 | 	}
224 | 
225 | 	return nil
226 | }
227 | 
228 | // splitSchemaAndTable splits a full table name into schema and table parts
229 | func splitSchemaAndTable(fullName string) (string, string) {
230 | 	parts := strings.SplitN(fullName, ".", 2)
231 | 	if len(parts) == 2 {
232 | 		return parts[0], parts[1]
233 | 	}
234 | 	return "public", fullName
235 | }
236 | 
237 | // Close closes the DDL connection
238 | func (d *DDLReplicator) Close(ctx context.Context) error {
239 | 	if d.DDLConn != nil {
240 | 		return d.DDLConn.Close(ctx)
241 | 	}
242 | 	return nil
243 | }
244 | 
245 | // Shutdown performs a graceful shutdown of the DDL replicator
246 | func (d *DDLReplicator) Shutdown(ctx context.Context) error {
247 | 	d.BaseRepl.Logger.Info().Msg("Shutting down DDL replicator")
248 | 
249 | 	// Process remaining events with the provided context
250 | 	if err := d.ProcessDDLEvents(ctx); err != nil {
251 | 		d.BaseRepl.Logger.Error().Err(err).Msg("Failed to process final DDL events")
252 | 		// Continue with shutdown even if processing fails
253 | 	}
254 | 
255 | 	// Wait for any pending events with respect to context deadline
256 | 	ticker := time.NewTicker(100 * time.Millisecond)
257 | 	defer ticker.Stop()
258 | 
259 | 	for {
260 | 		select {
261 | 		case <-ctx.Done():
262 | 			d.BaseRepl.Logger.Warn().Msg("Context deadline exceeded while waiting for DDL events")
263 | 			return ctx.Err()
264 | 		case <-ticker.C:
265 | 			hasEvents, err := d.HasPendingDDLEvents(ctx)
266 | 			if err != nil {
267 | 				d.BaseRepl.Logger.Error().Err(err).Msg("Failed to check pending DDL events")
268 | 				return err
269 | 			}
270 | 			if !hasEvents {
271 | 				d.BaseRepl.Logger.Info().Msg("All DDL events processed")
272 | 				return d.Close(ctx)
273 | 			}
274 | 		}
275 | 	}
276 | }
277 | 
278 | // HasPendingDDLEvents checks if there are pending DDL events in the log
279 | func (d *DDLReplicator) HasPendingDDLEvents(ctx context.Context) (bool, error) {
280 | 	var count int
281 | 	err := d.DDLConn.QueryRow(ctx, `
282 | 		SELECT COUNT(*) FROM internal_pg_flo.ddl_log
283 | 	`).Scan(&count)
284 | 	if err != nil {
285 | 		return false, err
286 | 	}
287 | 	return count > 0, nil
288 | }
289 | 
290 | // shouldSkipDDLEvent determines if a DDL event should be skipped from processing
291 | func (d *DDLReplicator) shouldSkipDDLEvent(ddlCommand string) bool {
292 | 	if strings.Contains(ddlCommand, "internal_pg_flo.") {
293 | 		return true
294 | 	}
295 | 
296 | 	publicationName := GeneratePublicationName(d.Config.Group)
297 | 	if strings.Contains(ddlCommand, fmt.Sprintf("CREATE PUBLICATION %q", publicationName)) ||
298 | 		strings.Contains(ddlCommand, fmt.Sprintf("DROP PUBLICATION %q", publicationName)) ||
299 | 		strings.Contains(ddlCommand, "CREATE PUBLICATION pg_flo_") ||
300 | 		strings.Contains(ddlCommand, "DROP PUBLICATION pg_flo_") {
301 | 		return true
302 | 	}
303 | 
304 | 	return false
305 | }
306 | 


--------------------------------------------------------------------------------
/pkg/replicator/errors.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | )
 7 | 
 8 | var (
 9 | 	ErrReplicatorAlreadyStarted = errors.New("replicator already started")
10 | 	ErrReplicatorNotStarted     = errors.New("replicator not started")
11 | 	ErrReplicatorAlreadyStopped = errors.New("replicator already stopped")
12 | )
13 | 
14 | // ReplicationError represents an error that occurred during replication.
15 | type ReplicationError struct {
16 | 	Op  string // The operation that caused the error
17 | 	Err error  // The underlying error
18 | }
19 | 
20 | // Error returns a formatted error message.
21 | func (e *ReplicationError) Error() string {
22 | 	return fmt.Sprintf("replication error during %s: %v", e.Op, e.Err)
23 | }
24 | 


--------------------------------------------------------------------------------
/pkg/replicator/factory.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | )
 7 | 
 8 | // ReplicatorFactory defines the interface for creating replicators
 9 | type Factory interface {
10 | 	CreateReplicator(config Config, natsClient NATSClient) (Replicator, error)
11 | }
12 | 
13 | // BaseFactory provides common functionality for factories
14 | type BaseFactory struct{}
15 | 
16 | // CreateConnections creates replication and standard connections
17 | func (f *BaseFactory) CreateConnections(config Config) (ReplicationConnection, StandardConnection, error) {
18 | 	replicationConn := NewReplicationConnection(config)
19 | 	if err := replicationConn.Connect(context.Background()); err != nil {
20 | 		return nil, nil, fmt.Errorf("failed to connect for replication: %v", err)
21 | 	}
22 | 
23 | 	standardConn, err := NewStandardConnection(config)
24 | 	if err != nil {
25 | 		return nil, nil, fmt.Errorf("failed to create standard connection: %v", err)
26 | 	}
27 | 
28 | 	return replicationConn, standardConn, nil
29 | }
30 | 
31 | // StreamReplicatorFactory creates `StreamReplicator` instances
32 | type StreamReplicatorFactory struct {
33 | 	BaseFactory
34 | }
35 | 
36 | // CreateReplicator creates a new `StreamReplicator`
37 | func (f *StreamReplicatorFactory) CreateReplicator(config Config, natsClient NATSClient) (Replicator, error) {
38 | 	replicationConn, standardConn, err := f.CreateConnections(config)
39 | 	if err != nil {
40 | 		return nil, err
41 | 	}
42 | 
43 | 	baseReplicator := NewBaseReplicator(config, replicationConn, standardConn, natsClient)
44 | 	return &StreamReplicator{BaseReplicator: baseReplicator}, nil
45 | }
46 | 
47 | // CopyAndStreamReplicatorFactory creates `CopyAndStreamReplicator` instances
48 | type CopyAndStreamReplicatorFactory struct {
49 | 	BaseFactory
50 | 	MaxCopyWorkersPerTable int
51 | 	CopyOnly               bool
52 | }
53 | 
54 | // CreateReplicator creates a new `CopyAndStreamReplicator`
55 | func (f *CopyAndStreamReplicatorFactory) CreateReplicator(config Config, natsClient NATSClient) (Replicator, error) {
56 | 	replicationConn, standardConn, err := f.CreateConnections(config)
57 | 	if err != nil {
58 | 		return nil, err
59 | 	}
60 | 
61 | 	baseReplicator := NewBaseReplicator(config, replicationConn, standardConn, natsClient)
62 | 
63 | 	if f.MaxCopyWorkersPerTable <= 0 {
64 | 		f.MaxCopyWorkersPerTable = 4
65 | 	}
66 | 
67 | 	return NewCopyAndStreamReplicator(
68 | 		baseReplicator,
69 | 		f.MaxCopyWorkersPerTable,
70 | 		f.CopyOnly,
71 | 	), nil
72 | }
73 | 


--------------------------------------------------------------------------------
/pkg/replicator/interfaces.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/jackc/pglogrepl"
 7 | 	"github.com/jackc/pgx/v5"
 8 | 	"github.com/jackc/pgx/v5/pgconn"
 9 | 	"github.com/jackc/pgx/v5/pgproto3"
10 | 	"github.com/nats-io/nats.go"
11 | 	"github.com/pgflo/pg_flo/pkg/pgflonats"
12 | )
13 | 
14 | type Replicator interface {
15 | 	Start(ctx context.Context) error
16 | 	Stop(ctx context.Context) error
17 | }
18 | 
19 | type ReplicationConnection interface {
20 | 	Connect(ctx context.Context) error
21 | 	Close(ctx context.Context) error
22 | 	CreateReplicationSlot(ctx context.Context, slotName string) (pglogrepl.CreateReplicationSlotResult, error)
23 | 	StartReplication(ctx context.Context, slotName string, startLSN pglogrepl.LSN, options pglogrepl.StartReplicationOptions) error
24 | 	ReceiveMessage(ctx context.Context) (pgproto3.BackendMessage, error)
25 | 	SendStandbyStatusUpdate(ctx context.Context, status pglogrepl.StandbyStatusUpdate) error
26 | }
27 | 
28 | type StandardConnection interface {
29 | 	Connect(ctx context.Context) error
30 | 	Close(ctx context.Context) error
31 | 	Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error)
32 | 	Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error)
33 | 	QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row
34 | 	BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error)
35 | 	Acquire(ctx context.Context) (PgxPoolConn, error)
36 | }
37 | 
38 | type PgxPoolConn interface {
39 | 	BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error)
40 | 	Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error)
41 | 	Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error)
42 | 	QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row
43 | 	Release()
44 | }
45 | 
46 | type NATSClient interface {
47 | 	PublishMessage(subject string, data []byte) error
48 | 	Close() error
49 | 	SaveState(state pgflonats.State) error
50 | 	GetState() (pgflonats.State, error)
51 | 	JetStream() nats.JetStreamContext
52 | }
53 | 


--------------------------------------------------------------------------------
/pkg/replicator/json_encoder.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	"github.com/pgflo/pg_flo/pkg/utils"
 8 | )
 9 | 
10 | // InitializeOIDMap initializes the OID to type name map with custom types from the database
11 | func InitializeOIDMap(ctx context.Context, conn StandardConnection) error {
12 | 	rows, err := conn.Query(ctx, `
13 | 		SELECT oid, typname
14 | 		FROM pg_type
15 | 		WHERE typtype = 'b' AND oid > 10000 -- Only base types and custom types
16 | 	`)
17 | 	if err != nil {
18 | 		return fmt.Errorf("failed to query pg_type: %w", err)
19 | 	}
20 | 	defer rows.Close()
21 | 
22 | 	for rows.Next() {
23 | 		var oid uint32
24 | 		var typeName string
25 | 		if err := rows.Scan(&oid, &typeName); err != nil {
26 | 			return fmt.Errorf("failed to scan row: %w", err)
27 | 		}
28 | 		utils.OidToTypeName[oid] = typeName
29 | 	}
30 | 
31 | 	if err := rows.Err(); err != nil {
32 | 		return fmt.Errorf("error iterating over rows: %w", err)
33 | 	}
34 | 
35 | 	return nil
36 | }
37 | 


--------------------------------------------------------------------------------
/pkg/replicator/replication_connection.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	"github.com/jackc/pglogrepl"
 8 | 	"github.com/jackc/pgx/v5"
 9 | 	"github.com/jackc/pgx/v5/pgconn"
10 | 	"github.com/jackc/pgx/v5/pgproto3"
11 | )
12 | 
13 | // PostgresReplicationConnection implements the ReplicationConnection interface
14 | // for PostgreSQL databases.
15 | type PostgresReplicationConnection struct {
16 | 	Config Config
17 | 	Conn   *pgconn.PgConn
18 | }
19 | 
20 | // NewReplicationConnection creates a new PostgresReplicationConnection instance.
21 | func NewReplicationConnection(config Config) ReplicationConnection {
22 | 	return &PostgresReplicationConnection{
23 | 		Config: config,
24 | 	}
25 | }
26 | 
27 | // Connect establishes a connection to the PostgreSQL database for replication.
28 | func (rc *PostgresReplicationConnection) Connect(ctx context.Context) error {
29 | 	config, err := pgx.ParseConfig(fmt.Sprintf("host=%s port=%d dbname=%s user=%s password=%s",
30 | 		rc.Config.Host,
31 | 		rc.Config.Port,
32 | 		rc.Config.Database,
33 | 		rc.Config.User,
34 | 		rc.Config.Password))
35 | 	if err != nil {
36 | 		return fmt.Errorf("failed to parse connection config: %v", err)
37 | 	}
38 | 
39 | 	config.RuntimeParams["replication"] = "database"
40 | 
41 | 	conn, err := pgx.ConnectConfig(ctx, config)
42 | 	if err != nil {
43 | 		return fmt.Errorf("failed to connect to PostgreSQL: %v", err)
44 | 	}
45 | 
46 | 	rc.Conn = conn.PgConn()
47 | 	return nil
48 | }
49 | 
50 | // Close terminates the connection to the PostgreSQL database.
51 | func (rc *PostgresReplicationConnection) Close(ctx context.Context) error {
52 | 	return rc.Conn.Close(ctx)
53 | }
54 | 
55 | // CreateReplicationSlot creates a new replication slot in the PostgreSQL database.
56 | func (rc *PostgresReplicationConnection) CreateReplicationSlot(ctx context.Context, slotName string) (pglogrepl.CreateReplicationSlotResult, error) {
57 | 	return pglogrepl.CreateReplicationSlot(ctx, rc.Conn, slotName, "pgoutput", pglogrepl.CreateReplicationSlotOptions{Temporary: false})
58 | }
59 | 
60 | // StartReplication initiates the replication process from the specified LSN.
61 | func (rc *PostgresReplicationConnection) StartReplication(ctx context.Context, slotName string, startLSN pglogrepl.LSN, options pglogrepl.StartReplicationOptions) error {
62 | 	return pglogrepl.StartReplication(ctx, rc.Conn, slotName, startLSN, options)
63 | }
64 | 
65 | // ReceiveMessage receives a message from the PostgreSQL replication stream.
66 | func (rc *PostgresReplicationConnection) ReceiveMessage(ctx context.Context) (pgproto3.BackendMessage, error) {
67 | 	return rc.Conn.ReceiveMessage(ctx)
68 | }
69 | 
70 | // SendStandbyStatusUpdate sends a status update to the PostgreSQL server during replication.
71 | func (rc *PostgresReplicationConnection) SendStandbyStatusUpdate(ctx context.Context, status pglogrepl.StandbyStatusUpdate) error {
72 | 	return pglogrepl.SendStandbyStatusUpdate(ctx, rc.Conn, status)
73 | }
74 | 


--------------------------------------------------------------------------------
/pkg/replicator/standard_connection.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 
 7 | 	"github.com/jackc/pgx/v5"
 8 | 	"github.com/jackc/pgx/v5/pgconn"
 9 | 	"github.com/jackc/pgx/v5/pgxpool"
10 | )
11 | 
12 | // StandardConnectionImpl implements the StandardConnection interface for PostgreSQL databases.
13 | type StandardConnectionImpl struct {
14 | 	pool *pgxpool.Pool
15 | }
16 | 
17 | // NewStandardConnection creates a new StandardConnectionImpl instance and establishes a connection.
18 | func NewStandardConnection(config Config) (*StandardConnectionImpl, error) {
19 | 	connString := fmt.Sprintf("host=%s port=%d dbname=%s user=%s password=%s",
20 | 		config.Host,
21 | 		config.Port,
22 | 		config.Database,
23 | 		config.User,
24 | 		config.Password)
25 | 
26 | 	poolConfig, err := pgxpool.ParseConfig(connString)
27 | 	if err != nil {
28 | 		return nil, fmt.Errorf("unable to parse connection string: %v", err)
29 | 	}
30 | 
31 | 	poolConfig.MaxConns = 20
32 | 
33 | 	pool, err := pgxpool.NewWithConfig(context.Background(), poolConfig)
34 | 	if err != nil {
35 | 		return nil, fmt.Errorf("unable to create connection pool: %v", err)
36 | 	}
37 | 	return &StandardConnectionImpl{pool: pool}, nil
38 | }
39 | 
40 | // Connect establishes a connection to the PostgreSQL database.
41 | func (s *StandardConnectionImpl) Connect(ctx context.Context) error {
42 | 	return s.pool.Ping(ctx)
43 | }
44 | 
45 | // Close terminates the connection to the PostgreSQL database.
46 | func (s *StandardConnectionImpl) Close(_ context.Context) error {
47 | 	s.pool.Close()
48 | 	return nil
49 | }
50 | 
51 | // Exec executes a SQL query without returning any rows.
52 | func (s *StandardConnectionImpl) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
53 | 	return s.pool.Exec(ctx, sql, arguments...)
54 | }
55 | 
56 | // BeginTx starts a new transaction with the specified options.
57 | func (s *StandardConnectionImpl) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) {
58 | 	return s.pool.BeginTx(ctx, txOptions)
59 | }
60 | 
61 | // QueryRow executes a query that is expected to return at most one row.
62 | func (s *StandardConnectionImpl) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
63 | 	return s.pool.QueryRow(ctx, sql, args...)
64 | }
65 | 
66 | // Query executes a query that returns rows, typically a SELECT.
67 | func (s *StandardConnectionImpl) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
68 | 	return s.pool.Query(ctx, sql, args...)
69 | }
70 | 
71 | // Acquire acquires a connection from the pool.
72 | func (s *StandardConnectionImpl) Acquire(ctx context.Context) (PgxPoolConn, error) {
73 | 	conn, err := s.pool.Acquire(ctx)
74 | 	if err != nil {
75 | 		return nil, err
76 | 	}
77 | 	return &PgxPoolConnWrapper{Conn: conn}, nil
78 | }
79 | 
80 | type PgxPoolConnWrapper struct {
81 | 	*pgxpool.Conn
82 | }
83 | 


--------------------------------------------------------------------------------
/pkg/replicator/stream_replicator.go:
--------------------------------------------------------------------------------
 1 | package replicator
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/jackc/pglogrepl"
 7 | )
 8 | 
 9 | type StreamReplicator struct {
10 | 	*BaseReplicator
11 | }
12 | 
13 | func NewStreamReplicator(base *BaseReplicator) *StreamReplicator {
14 | 	return &StreamReplicator{
15 | 		BaseReplicator: base,
16 | 	}
17 | }
18 | 
19 | func (r *StreamReplicator) Start(ctx context.Context) error {
20 | 	if err := r.BaseReplicator.Start(ctx); err != nil {
21 | 		return err
22 | 	}
23 | 
24 | 	startLSN, err := r.GetLastState()
25 | 	if err != nil {
26 | 		r.Logger.Warn().Err(err).Msg("Failed to get last LSN, starting from 0")
27 | 		startLSN = pglogrepl.LSN(0)
28 | 	}
29 | 
30 | 	r.Logger.Info().Str("startLSN", startLSN.String()).Msg("Starting replication")
31 | 
32 | 	errChan := make(chan error, 1)
33 | 	go func() {
34 | 		errChan <- r.StartReplicationFromLSN(ctx, startLSN, r.stopChan)
35 | 	}()
36 | 
37 | 	select {
38 | 	case <-ctx.Done():
39 | 		return ctx.Err()
40 | 	case err := <-errChan:
41 | 		return err
42 | 	}
43 | }
44 | 
45 | func (r *StreamReplicator) Stop(ctx context.Context) error {
46 | 	return r.BaseReplicator.Stop(ctx)
47 | }
48 | 


--------------------------------------------------------------------------------
/pkg/replicator/table_handling.go:
--------------------------------------------------------------------------------
  1 | package replicator
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 
  7 | 	"github.com/pgflo/pg_flo/pkg/utils"
  8 | )
  9 | 
 10 | // AddPrimaryKeyInfo adds replication key information to the CDCMessage
 11 | func (r *BaseReplicator) AddPrimaryKeyInfo(message *utils.CDCMessage, table string) {
 12 | 	if key, ok := r.TableReplicationKeys[table]; ok {
 13 | 		message.ReplicationKey = key
 14 | 	} else {
 15 | 		r.Logger.Error().
 16 | 			Str("table", table).
 17 | 			Msg("No replication key information found for table. This should not happen as validation is done during initialization")
 18 | 	}
 19 | }
 20 | 
 21 | // InitializePrimaryKeyInfo initializes primary key information for all tables
 22 | func (r *BaseReplicator) InitializePrimaryKeyInfo() error {
 23 | 	query := `
 24 | 		WITH table_info AS (
 25 | 			SELECT
 26 | 				t.tablename,
 27 | 				c.relreplident,
 28 | 				(
 29 | 					SELECT array_agg(a.attname ORDER BY array_position(i.indkey, a.attnum))
 30 | 					FROM pg_index i
 31 | 					JOIN pg_attribute a ON a.attrelid = c.oid AND a.attnum = ANY(i.indkey)
 32 | 					WHERE i.indrelid = c.oid AND i.indisprimary
 33 | 				) as pk_columns,
 34 | 				(
 35 | 					SELECT array_agg(a.attname ORDER BY array_position(i.indkey, a.attnum))
 36 | 					FROM pg_index i
 37 | 					JOIN pg_attribute a ON a.attrelid = c.oid AND a.attnum = ANY(i.indkey)
 38 | 					WHERE i.indrelid = c.oid AND i.indisunique AND NOT i.indisprimary
 39 | 					LIMIT 1
 40 | 				) as unique_columns
 41 | 			FROM pg_tables t
 42 | 			JOIN pg_class c ON t.tablename = c.relname
 43 | 			JOIN pg_namespace n ON c.relnamespace = n.oid
 44 | 			WHERE t.schemaname = $1
 45 | 		)
 46 | 		SELECT
 47 | 			tablename,
 48 | 			relreplident::text,
 49 | 			COALESCE(pk_columns, ARRAY[]::text[]) as pk_columns,
 50 | 			COALESCE(unique_columns, ARRAY[]::text[]) as unique_columns
 51 | 		FROM table_info;
 52 | 	`
 53 | 
 54 | 	rows, err := r.StandardConn.Query(context.Background(), query, r.Config.Schema)
 55 | 	if err != nil {
 56 | 		return fmt.Errorf("failed to query replication key info: %v", err)
 57 | 	}
 58 | 	defer rows.Close()
 59 | 
 60 | 	r.TableReplicationKeys = make(map[string]utils.ReplicationKey)
 61 | 
 62 | 	for rows.Next() {
 63 | 		var (
 64 | 			tableName       string
 65 | 			replicaIdentity string
 66 | 			pkColumns       []string
 67 | 			uniqueColumns   []string
 68 | 		)
 69 | 
 70 | 		if err := rows.Scan(&tableName, &replicaIdentity, &pkColumns, &uniqueColumns); err != nil {
 71 | 			return fmt.Errorf("failed to scan row: %v", err)
 72 | 		}
 73 | 
 74 | 		key := utils.ReplicationKey{}
 75 | 
 76 | 		switch {
 77 | 		case len(pkColumns) > 0:
 78 | 			key = utils.ReplicationKey{
 79 | 				Type:    utils.ReplicationKeyPK,
 80 | 				Columns: pkColumns,
 81 | 			}
 82 | 		case len(uniqueColumns) > 0:
 83 | 			key = utils.ReplicationKey{
 84 | 				Type:    utils.ReplicationKeyUnique,
 85 | 				Columns: uniqueColumns,
 86 | 			}
 87 | 		case replicaIdentity == "f":
 88 | 			key = utils.ReplicationKey{
 89 | 				Type:    utils.ReplicationKeyFull,
 90 | 				Columns: nil,
 91 | 			}
 92 | 		}
 93 | 
 94 | 		if err := r.validateTableReplicationKey(tableName, key); err != nil {
 95 | 			r.Logger.Warn().
 96 | 				Str("table", tableName).
 97 | 				Str("replica_identity", replicaIdentity).
 98 | 				Str("key_type", string(key.Type)).
 99 | 				Strs("columns", key.Columns).
100 | 				Err(err).
101 | 				Msg("Invalid replication key configuration")
102 | 			continue
103 | 		}
104 | 
105 | 		r.TableReplicationKeys[tableName] = key
106 | 
107 | 		r.Logger.Debug().
108 | 			Str("table", tableName).
109 | 			Str("key_type", string(key.Type)).
110 | 			Strs("columns", key.Columns).
111 | 			Str("replica_identity", replicaIdentity).
112 | 			Msg("Initialized replication key configuration")
113 | 	}
114 | 
115 | 	return rows.Err()
116 | }
117 | 
118 | // GetConfiguredTables returns all tables based on configuration
119 | // If no specific tables are configured, returns all tables from the configured schema
120 | func (r *BaseReplicator) GetConfiguredTables(ctx context.Context) ([]string, error) {
121 | 	if len(r.Config.Tables) > 0 {
122 | 		fullyQualifiedTables := make([]string, len(r.Config.Tables))
123 | 		for i, table := range r.Config.Tables {
124 | 			fullyQualifiedTables[i] = fmt.Sprintf("%s.%s", r.Config.Schema, table)
125 | 		}
126 | 		return fullyQualifiedTables, nil
127 | 	}
128 | 
129 | 	rows, err := r.StandardConn.Query(ctx, `
130 | 		SELECT schemaname || '.' || tablename
131 | 		FROM pg_tables
132 | 		WHERE schemaname = $1
133 | 		AND schemaname NOT IN ('pg_catalog', 'information_schema', 'internal_pg_flo')
134 | 	`, r.Config.Schema)
135 | 	if err != nil {
136 | 		return nil, fmt.Errorf("failed to query tables: %v", err)
137 | 	}
138 | 	defer rows.Close()
139 | 
140 | 	var tables []string
141 | 	for rows.Next() {
142 | 		var tableName string
143 | 		if err := rows.Scan(&tableName); err != nil {
144 | 			return nil, fmt.Errorf("failed to scan table name: %v", err)
145 | 		}
146 | 		tables = append(tables, tableName)
147 | 	}
148 | 
149 | 	return tables, nil
150 | }
151 | 
152 | func (r *BaseReplicator) validateTableReplicationKey(tableName string, key utils.ReplicationKey) error {
153 | 	if !key.IsValid() {
154 | 		return fmt.Errorf(
155 | 			"table %q requires one of the following:\n"+
156 | 				"\t1. A PRIMARY KEY constraint\n"+
157 | 				"\t2. A UNIQUE constraint\n"+
158 | 				"\t3. REPLICA IDENTITY FULL (ALTER TABLE %s REPLICA IDENTITY FULL)",
159 | 			tableName, tableName)
160 | 	}
161 | 	return nil
162 | }
163 | 


--------------------------------------------------------------------------------
/pkg/replicator/tests/buffer_test.go:
--------------------------------------------------------------------------------
  1 | package replicator_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | 
  7 | 	"github.com/pgflo/pg_flo/pkg/replicator"
  8 | 	"github.com/stretchr/testify/assert"
  9 | 	"github.com/stretchr/testify/mock"
 10 | )
 11 | 
 12 | func TestBuffer(t *testing.T) {
 13 | 	t.Run("NewBuffer", func(t *testing.T) {
 14 | 		buffer := replicator.NewBuffer(10, 5*time.Second)
 15 | 		assert.NotNil(t, buffer)
 16 | 	})
 17 | 
 18 | 	t.Run("Add and Flush", func(t *testing.T) {
 19 | 		buffer := replicator.NewBuffer(10, 5*time.Second)
 20 | 
 21 | 		// Add items
 22 | 		for i := 0; i < 5; i++ {
 23 | 			shouldFlush := buffer.Add([]byte("test"))
 24 | 			assert.False(t, shouldFlush)
 25 | 		}
 26 | 
 27 | 		// Flush
 28 | 		data := buffer.Flush()
 29 | 		assert.Len(t, data, 5)
 30 | 		assert.Equal(t, []byte("test"), data[0])
 31 | 
 32 | 		// Buffer should be empty after flush
 33 | 		emptyData := buffer.Flush()
 34 | 		assert.Nil(t, emptyData)
 35 | 	})
 36 | 
 37 | 	t.Run("Flush on MaxRows", func(t *testing.T) {
 38 | 		buffer := replicator.NewBuffer(3, 5*time.Second)
 39 | 
 40 | 		buffer.Add([]byte("test1"))
 41 | 		buffer.Add([]byte("test2"))
 42 | 		shouldFlush := buffer.Add([]byte("test3"))
 43 | 
 44 | 		assert.True(t, shouldFlush)
 45 | 
 46 | 		data := buffer.Flush()
 47 | 		assert.Len(t, data, 3)
 48 | 	})
 49 | 
 50 | 	t.Run("Flush on Timeout", func(t *testing.T) {
 51 | 		buffer := replicator.NewBuffer(10, 100*time.Millisecond)
 52 | 
 53 | 		buffer.Add([]byte("test"))
 54 | 		time.Sleep(150 * time.Millisecond)
 55 | 
 56 | 		shouldFlush := buffer.Add([]byte("test"))
 57 | 		assert.True(t, shouldFlush)
 58 | 
 59 | 		data := buffer.Flush()
 60 | 		assert.Len(t, data, 2)
 61 | 	})
 62 | 
 63 | 	t.Run("Concurrent Access", func(t *testing.T) {
 64 | 		buffer := replicator.NewBuffer(100, 5*time.Second)
 65 | 
 66 | 		done := make(chan bool)
 67 | 		for i := 0; i < 10; i++ {
 68 | 			go func() {
 69 | 				for j := 0; j < 10; j++ {
 70 | 					buffer.Add([]byte("test"))
 71 | 				}
 72 | 				done <- true
 73 | 			}()
 74 | 		}
 75 | 
 76 | 		for i := 0; i < 10; i++ {
 77 | 			<-done
 78 | 		}
 79 | 
 80 | 		data := buffer.Flush()
 81 | 		assert.Len(t, data, 100)
 82 | 	})
 83 | 
 84 | 	t.Run("BufferFlush", func(t *testing.T) {
 85 | 		mockSink := new(MockSink)
 86 | 		buffer := replicator.NewBuffer(5, 1*time.Second)
 87 | 
 88 | 		mockSink.On("WriteBatch", mock.Anything).Return(nil)
 89 | 
 90 | 		for i := 0; i < 5; i++ {
 91 | 			shouldFlush := buffer.Add(i)
 92 | 			if shouldFlush {
 93 | 				data := buffer.Flush()
 94 | 				err := mockSink.WriteBatch(data)
 95 | 				assert.NoError(t, err)
 96 | 			}
 97 | 		}
 98 | 
 99 | 		mockSink.AssertNumberOfCalls(t, "WriteBatch", 1)
100 | 		mockSink.AssertExpectations(t)
101 | 	})
102 | }
103 | 


--------------------------------------------------------------------------------
/pkg/replicator/tests/ddl_replicator_test.go:
--------------------------------------------------------------------------------
  1 | package replicator_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"strings"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/jackc/pgx/v5/pgconn"
 10 | 	"github.com/pgflo/pg_flo/pkg/replicator"
 11 | 	"github.com/pgflo/pg_flo/pkg/utils"
 12 | 	"github.com/rs/zerolog"
 13 | 	"github.com/stretchr/testify/assert"
 14 | 	"github.com/stretchr/testify/mock"
 15 | )
 16 | 
 17 | func TestDDLReplicator(t *testing.T) {
 18 | 	t.Run("NewDDLReplicator", func(t *testing.T) {
 19 | 		mockBaseReplicator := &replicator.BaseReplicator{
 20 | 			Logger: utils.NewZerologLogger(zerolog.New(nil)),
 21 | 		}
 22 | 		mockStandardConn := &MockStandardConnection{}
 23 | 		config := replicator.Config{}
 24 | 
 25 | 		ddlReplicator, err := replicator.NewDDLReplicator(config, mockBaseReplicator, mockStandardConn)
 26 | 
 27 | 		assert.NoError(t, err)
 28 | 		assert.NotNil(t, ddlReplicator)
 29 | 		assert.Equal(t, config, ddlReplicator.Config)
 30 | 		assert.Equal(t, mockStandardConn, ddlReplicator.DDLConn)
 31 | 	})
 32 | 
 33 | 	t.Run("SetupDDLTracking", func(t *testing.T) {
 34 | 		mockStandardConn := &MockStandardConnection{}
 35 | 		mockBaseRepl := &replicator.BaseReplicator{
 36 | 			Logger:       utils.NewZerologLogger(zerolog.New(zerolog.NewConsoleWriter()).With().Timestamp().Logger()),
 37 | 			StandardConn: mockStandardConn,
 38 | 			Config: replicator.Config{
 39 | 				Schema: "public",
 40 | 				Tables: []string{"test_table"},
 41 | 			},
 42 | 		}
 43 | 
 44 | 		ddlReplicator := &replicator.DDLReplicator{
 45 | 			DDLConn:  mockStandardConn,
 46 | 			BaseRepl: mockBaseRepl,
 47 | 		}
 48 | 
 49 | 		ctx := context.Background()
 50 | 
 51 | 		mockStandardConn.On("Exec", ctx, mock.AnythingOfType("string"), mock.Anything).Return(pgconn.CommandTag{}, nil).
 52 | 			Run(func(args mock.Arguments) {
 53 | 				sql := args.Get(1).(string)
 54 | 				assert.Contains(t, sql, "CREATE SCHEMA IF NOT EXISTS internal_pg_flo")
 55 | 				assert.Contains(t, sql, "CREATE TABLE IF NOT EXISTS internal_pg_flo.ddl_log")
 56 | 				assert.Contains(t, sql, "CREATE OR REPLACE FUNCTION internal_pg_flo.ddl_trigger()")
 57 | 				assert.Contains(t, sql, "CREATE EVENT TRIGGER pg_flo_ddl_trigger")
 58 | 			})
 59 | 
 60 | 		err := ddlReplicator.SetupDDLTracking(ctx)
 61 | 
 62 | 		assert.NoError(t, err)
 63 | 		mockStandardConn.AssertExpectations(t)
 64 | 	})
 65 | 
 66 | 	t.Run("StartDDLReplication", func(t *testing.T) {
 67 | 		mockStandardConn := &MockStandardConnection{}
 68 | 		mockBaseReplicator := &replicator.BaseReplicator{
 69 | 			Logger: utils.NewZerologLogger(zerolog.New(zerolog.NewConsoleWriter()).With().Timestamp().Logger()),
 70 | 		}
 71 | 		ddlReplicator := &replicator.DDLReplicator{
 72 | 			DDLConn:  mockStandardConn,
 73 | 			BaseRepl: mockBaseReplicator,
 74 | 		}
 75 | 
 76 | 		ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 77 | 		defer cancel()
 78 | 
 79 | 		mockRows := &MockRows{}
 80 | 		mockStandardConn.On("Query", mock.Anything, mock.MatchedBy(func(sql string) bool {
 81 | 			expectedParts := []string{
 82 | 				"SELECT id, event_type, object_type, object_identity, table_name, ddl_command, created_at",
 83 | 				"FROM internal_pg_flo.ddl_log",
 84 | 				"ORDER BY created_at ASC",
 85 | 			}
 86 | 			for _, part := range expectedParts {
 87 | 				if !strings.Contains(sql, part) {
 88 | 					return false
 89 | 				}
 90 | 			}
 91 | 			return true
 92 | 		}), mock.Anything).Return(mockRows, nil).Maybe()
 93 | 
 94 | 		mockRows.On("Next").Return(false).Maybe()
 95 | 		mockRows.On("Err").Return(nil).Maybe()
 96 | 		mockRows.On("Close").Return().Maybe()
 97 | 
 98 | 		mockStandardConn.On("QueryRow", mock.Anything, mock.MatchedBy(func(sql string) bool {
 99 | 			return strings.Contains(sql, "SELECT COUNT(*) FROM internal_pg_flo.ddl_log")
100 | 		}), mock.Anything).Return(&MockRow{
101 | 			scanFunc: func(dest ...interface{}) error {
102 | 				*dest[0].(*int) = 0
103 | 				return nil
104 | 			},
105 | 		}).Maybe()
106 | 
107 | 		go ddlReplicator.StartDDLReplication(ctx)
108 | 
109 | 		time.Sleep(100 * time.Millisecond)
110 | 
111 | 		cancel()
112 | 
113 | 		time.Sleep(100 * time.Millisecond)
114 | 
115 | 		mockStandardConn.AssertExpectations(t)
116 | 		mockRows.AssertExpectations(t)
117 | 	})
118 | }
119 | 


--------------------------------------------------------------------------------
/pkg/replicator/tests/json_encoder_test.go:
--------------------------------------------------------------------------------
  1 | package replicator_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | 
  7 | 	"github.com/jackc/pglogrepl"
  8 | 	"github.com/jackc/pgtype"
  9 | 	"github.com/pgflo/pg_flo/pkg/utils"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | func TestOIDToString(t *testing.T) {
 14 | 	t.Run("OIDToString function", func(t *testing.T) {
 15 | 		assert.Equal(t, "int4", utils.OIDToString(pgtype.Int4OID))
 16 | 		assert.Equal(t, "text", utils.OIDToString(pgtype.TextOID))
 17 | 		assert.Equal(t, "unknown_99999", utils.OIDToString(99999))
 18 | 	})
 19 | }
 20 | 
 21 | func TestCDCBinaryEncoding(t *testing.T) {
 22 | 	t.Run("Encode and decode preserves CDC types", func(t *testing.T) {
 23 | 		testData := utils.CDCMessage{
 24 | 			Type:   utils.OperationInsert,
 25 | 			Schema: "public",
 26 | 			Table:  "users",
 27 | 			Columns: []*pglogrepl.RelationMessageColumn{
 28 | 				{Name: "id", DataType: pgtype.Int4OID},
 29 | 				{Name: "name", DataType: pgtype.TextOID},
 30 | 			},
 31 | 			NewTuple: &pglogrepl.TupleData{
 32 | 				Columns: []*pglogrepl.TupleDataColumn{
 33 | 					{Data: []byte("123")},
 34 | 					{Data: []byte("John Doe")},
 35 | 				},
 36 | 			},
 37 | 		}
 38 | 
 39 | 		encoded, err := testData.MarshalBinary()
 40 | 		assert.NoError(t, err)
 41 | 
 42 | 		var decoded utils.CDCMessage
 43 | 		err = decoded.UnmarshalBinary(encoded)
 44 | 		assert.NoError(t, err)
 45 | 
 46 | 		assert.Equal(t, testData.Type, decoded.Type)
 47 | 		assert.Equal(t, testData.Schema, decoded.Schema)
 48 | 		assert.Equal(t, testData.Table, decoded.Table)
 49 | 		assert.Equal(t, testData.Columns, decoded.Columns)
 50 | 		assert.Equal(t, testData.NewTuple, decoded.NewTuple)
 51 | 	})
 52 | }
 53 | 
 54 | func TestBinaryEncodingComplexTypes(t *testing.T) {
 55 | 	t.Run("Encode and decode handles complex types", func(t *testing.T) {
 56 | 		binaryData := []byte{0x01, 0x02, 0x03, 0x04}
 57 | 		jsonbData := []byte(`{"key": "value", "nested": {"number": 42}}`)
 58 | 		timestamp := time.Now().UTC()
 59 | 		floatValue := []byte("3.14159")
 60 | 		intValue := []byte("9876543210")
 61 | 		boolValue := []byte("true")
 62 | 		textArrayValue := []byte("{hello,world}")
 63 | 
 64 | 		testData := utils.CDCMessage{
 65 | 			Type:   utils.OperationInsert,
 66 | 			Schema: "public",
 67 | 			Table:  "complex_types",
 68 | 			Columns: []*pglogrepl.RelationMessageColumn{
 69 | 				{Name: "binary", DataType: pgtype.ByteaOID},
 70 | 				{Name: "jsonb", DataType: pgtype.JSONBOID},
 71 | 				{Name: "timestamp", DataType: pgtype.TimestamptzOID},
 72 | 				{Name: "float", DataType: pgtype.Float8OID},
 73 | 				{Name: "integer", DataType: pgtype.Int8OID},
 74 | 				{Name: "boolean", DataType: pgtype.BoolOID},
 75 | 				{Name: "text_array", DataType: pgtype.TextArrayOID},
 76 | 			},
 77 | 			NewTuple: &pglogrepl.TupleData{
 78 | 				Columns: []*pglogrepl.TupleDataColumn{
 79 | 					{Data: binaryData},
 80 | 					{Data: jsonbData},
 81 | 					{Data: []byte(timestamp.Format(time.RFC3339Nano))},
 82 | 					{Data: floatValue},
 83 | 					{Data: intValue},
 84 | 					{Data: boolValue},
 85 | 					{Data: textArrayValue},
 86 | 				},
 87 | 			},
 88 | 			OldTuple: &pglogrepl.TupleData{
 89 | 				Columns: []*pglogrepl.TupleDataColumn{
 90 | 					{Data: []byte{0x05, 0x06, 0x07, 0x08}},
 91 | 					{Data: []byte(`{"old": "data"}`)},
 92 | 				},
 93 | 			},
 94 | 		}
 95 | 
 96 | 		encoded, err := testData.MarshalBinary()
 97 | 		assert.NoError(t, err)
 98 | 
 99 | 		var decoded utils.CDCMessage
100 | 		err = decoded.UnmarshalBinary(encoded)
101 | 		assert.NoError(t, err)
102 | 
103 | 		assert.Equal(t, binaryData, decoded.NewTuple.Columns[0].Data)
104 | 		assert.Equal(t, jsonbData, decoded.NewTuple.Columns[1].Data)
105 | 		assert.Equal(t, []byte(timestamp.Format(time.RFC3339Nano)), decoded.NewTuple.Columns[2].Data)
106 | 		assert.Equal(t, floatValue, decoded.NewTuple.Columns[3].Data)
107 | 		assert.Equal(t, intValue, decoded.NewTuple.Columns[4].Data)
108 | 		assert.Equal(t, boolValue, decoded.NewTuple.Columns[5].Data)
109 | 		assert.Equal(t, textArrayValue, decoded.NewTuple.Columns[6].Data)
110 | 
111 | 		assert.Equal(t, []byte{0x05, 0x06, 0x07, 0x08}, decoded.OldTuple.Columns[0].Data)
112 | 		assert.Equal(t, []byte(`{"old": "data"}`), decoded.OldTuple.Columns[1].Data)
113 | 
114 | 		assert.Equal(t, testData.Type, decoded.Type)
115 | 		assert.Equal(t, testData.Schema, decoded.Schema)
116 | 		assert.Equal(t, testData.Table, decoded.Table)
117 | 		assert.Equal(t, testData.Columns, decoded.Columns)
118 | 	})
119 | }
120 | 


--------------------------------------------------------------------------------
/pkg/replicator/tests/mocks_test.go:
--------------------------------------------------------------------------------
  1 | package replicator_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 
  6 | 	"github.com/jackc/pglogrepl"
  7 | 	"github.com/jackc/pgx/v5"
  8 | 	"github.com/jackc/pgx/v5/pgconn"
  9 | 	"github.com/jackc/pgx/v5/pgproto3"
 10 | 	"github.com/nats-io/nats.go"
 11 | 	"github.com/pgflo/pg_flo/pkg/pgflonats"
 12 | 	"github.com/pgflo/pg_flo/pkg/replicator"
 13 | 	"github.com/stretchr/testify/mock"
 14 | )
 15 | 
 16 | type MockReplicationConnection struct {
 17 | 	mock.Mock
 18 | }
 19 | 
 20 | func (m *MockReplicationConnection) Connect(ctx context.Context) error {
 21 | 	args := m.Called(ctx)
 22 | 	return args.Error(0)
 23 | }
 24 | 
 25 | func (m *MockReplicationConnection) Close(ctx context.Context) error {
 26 | 	args := m.Called(ctx)
 27 | 	return args.Error(0)
 28 | }
 29 | 
 30 | func (m *MockReplicationConnection) CreateReplicationSlot(ctx context.Context, slotName string) (pglogrepl.CreateReplicationSlotResult, error) {
 31 | 	args := m.Called(ctx, slotName)
 32 | 	return args.Get(0).(pglogrepl.CreateReplicationSlotResult), args.Error(1)
 33 | }
 34 | 
 35 | func (m *MockReplicationConnection) StartReplication(ctx context.Context, slotName string, startLSN pglogrepl.LSN, options pglogrepl.StartReplicationOptions) error {
 36 | 	args := m.Called(ctx, slotName, startLSN, options)
 37 | 	return args.Error(0)
 38 | }
 39 | 
 40 | func (m *MockReplicationConnection) ReceiveMessage(ctx context.Context) (pgproto3.BackendMessage, error) {
 41 | 	args := m.Called(ctx)
 42 | 	msg := args.Get(0)
 43 | 	if msg == nil {
 44 | 		return nil, args.Error(1)
 45 | 	}
 46 | 	return msg.(pgproto3.BackendMessage), args.Error(1)
 47 | }
 48 | 
 49 | func (m *MockReplicationConnection) SendStandbyStatusUpdate(ctx context.Context, status pglogrepl.StandbyStatusUpdate) error {
 50 | 	args := m.Called(ctx, status)
 51 | 	return args.Error(0)
 52 | }
 53 | 
 54 | type MockStandardConnection struct {
 55 | 	mock.Mock
 56 | }
 57 | 
 58 | func (m *MockStandardConnection) Connect(ctx context.Context) error {
 59 | 	args := m.Called(ctx)
 60 | 	return args.Error(0)
 61 | }
 62 | 
 63 | func (m *MockStandardConnection) Close(ctx context.Context) error {
 64 | 	args := m.Called(ctx)
 65 | 	return args.Error(0)
 66 | }
 67 | 
 68 | func (m *MockStandardConnection) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
 69 | 	args := m.Called(ctx, sql, arguments)
 70 | 	return args.Get(0).(pgconn.CommandTag), args.Error(1)
 71 | }
 72 | 
 73 | func (m *MockStandardConnection) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
 74 | 	mockArgs := m.Called(ctx, sql, args)
 75 | 	return mockArgs.Get(0).(pgx.Rows), mockArgs.Error(1)
 76 | }
 77 | 
 78 | func (m *MockStandardConnection) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
 79 | 	mockArgs := m.Called(ctx, sql, args)
 80 | 	return mockArgs.Get(0).(pgx.Row)
 81 | }
 82 | 
 83 | func (m *MockStandardConnection) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) {
 84 | 	args := m.Called(ctx, txOptions)
 85 | 	return args.Get(0).(pgx.Tx), args.Error(1)
 86 | }
 87 | 
 88 | func (m *MockStandardConnection) Acquire(ctx context.Context) (replicator.PgxPoolConn, error) {
 89 | 	args := m.Called(ctx)
 90 | 	return args.Get(0).(replicator.PgxPoolConn), args.Error(1)
 91 | }
 92 | 
 93 | type MockSink struct {
 94 | 	mock.Mock
 95 | }
 96 | 
 97 | func (m *MockSink) WriteBatch(data []interface{}) error {
 98 | 	args := m.Called(data)
 99 | 	return args.Error(0)
100 | }
101 | 
102 | func (m *MockSink) Close() error {
103 | 	args := m.Called()
104 | 	return args.Error(0)
105 | }
106 | 
107 | type MockPgxPoolConn struct {
108 | 	mock.Mock
109 | }
110 | 
111 | func (m *MockPgxPoolConn) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) {
112 | 	args := m.Called(ctx, txOptions)
113 | 	return args.Get(0).(pgx.Tx), args.Error(1)
114 | }
115 | 
116 | func (m *MockPgxPoolConn) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
117 | 	args := m.Called(ctx, sql, arguments)
118 | 	return args.Get(0).(pgconn.CommandTag), args.Error(1)
119 | }
120 | 
121 | func (m *MockPgxPoolConn) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
122 | 	mockArgs := m.Called(ctx, sql, args)
123 | 	return mockArgs.Get(0).(pgx.Rows), mockArgs.Error(1)
124 | }
125 | 
126 | func (m *MockPgxPoolConn) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
127 | 	mockArgs := m.Called(ctx, sql, args)
128 | 	return mockArgs.Get(0).(pgx.Row)
129 | }
130 | 
131 | func (m *MockPgxPoolConn) Release() {
132 | 	m.Called()
133 | }
134 | 
135 | type MockTx struct {
136 | 	mock.Mock
137 | }
138 | 
139 | func (m *MockTx) Begin(ctx context.Context) (pgx.Tx, error) {
140 | 	args := m.Called(ctx)
141 | 	return args.Get(0).(pgx.Tx), args.Error(1)
142 | }
143 | 
144 | func (m *MockTx) Commit(ctx context.Context) error {
145 | 	args := m.Called(ctx)
146 | 	return args.Error(0)
147 | }
148 | 
149 | func (m *MockTx) CopyFrom(ctx context.Context, tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int64, error) {
150 | 	args := m.Called(ctx, tableName, columnNames, rowSrc)
151 | 	return args.Get(0).(int64), args.Error(1)
152 | }
153 | 
154 | func (m *MockTx) SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults {
155 | 	args := m.Called(ctx, b)
156 | 	return args.Get(0).(pgx.BatchResults)
157 | }
158 | 
159 | func (m *MockTx) LargeObjects() pgx.LargeObjects {
160 | 	args := m.Called()
161 | 	return args.Get(0).(pgx.LargeObjects)
162 | }
163 | 
164 | func (m *MockTx) Prepare(ctx context.Context, name, sql string) (*pgconn.StatementDescription, error) {
165 | 	args := m.Called(ctx, name, sql)
166 | 	return args.Get(0).(*pgconn.StatementDescription), args.Error(1)
167 | }
168 | 
169 | func (m *MockTx) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) {
170 | 	args := []interface{}{ctx, sql}
171 | 	args = append(args, arguments...)
172 | 	callArgs := m.Called(args...)
173 | 	return callArgs.Get(0).(pgconn.CommandTag), callArgs.Error(1)
174 | }
175 | 
176 | func (m *MockTx) Query(ctx context.Context, sql string, args ...interface{}) (pgx.Rows, error) {
177 | 	mockArgs := m.Called(ctx, sql, args)
178 | 	return mockArgs.Get(0).(pgx.Rows), mockArgs.Error(1)
179 | }
180 | 
181 | func (m *MockTx) QueryRow(ctx context.Context, sql string, args ...interface{}) pgx.Row {
182 | 	callArgs := []interface{}{ctx, sql}
183 | 	callArgs = append(callArgs, args...)
184 | 	mockArgs := m.Called(callArgs...)
185 | 	return mockArgs.Get(0).(pgx.Row)
186 | }
187 | 
188 | func (m *MockTx) Conn() *pgx.Conn {
189 | 	args := m.Called()
190 | 	return args.Get(0).(*pgx.Conn)
191 | }
192 | 
193 | func (m *MockTx) Rollback(ctx context.Context) error {
194 | 	args := m.Called(ctx)
195 | 	return args.Error(0)
196 | }
197 | 
198 | type MockRow struct {
199 | 	scanFunc func(dest ...interface{}) error
200 | }
201 | 
202 | func (m MockRow) Scan(dest ...interface{}) error {
203 | 	return m.scanFunc(dest...)
204 | }
205 | 
206 | type MockRows struct {
207 | 	mock.Mock
208 | }
209 | 
210 | func (m *MockRows) Next() bool {
211 | 	args := m.Called()
212 | 	return args.Bool(0)
213 | }
214 | 
215 | func (m *MockRows) Scan(dest ...interface{}) error {
216 | 	args := m.Called(dest...)
217 | 	return args.Error(0)
218 | }
219 | 
220 | func (m *MockRows) Err() error {
221 | 	args := m.Called()
222 | 	return args.Error(0)
223 | }
224 | 
225 | func (m *MockRows) Close() {
226 | 	m.Called()
227 | }
228 | 
229 | func (m *MockRows) CommandTag() pgconn.CommandTag {
230 | 	args := m.Called()
231 | 	return args.Get(0).(pgconn.CommandTag)
232 | }
233 | 
234 | func (m *MockRows) FieldDescriptions() []pgconn.FieldDescription {
235 | 	args := m.Called()
236 | 	return args.Get(0).([]pgconn.FieldDescription)
237 | }
238 | 
239 | func (m *MockRows) Values() ([]interface{}, error) {
240 | 	args := m.Called()
241 | 	return args.Get(0).([]interface{}), args.Error(1)
242 | }
243 | 
244 | func (m *MockRows) RawValues() [][]byte {
245 | 	args := m.Called()
246 | 	return args.Get(0).([][]byte)
247 | }
248 | 
249 | func (m *MockRows) Conn() *pgx.Conn {
250 | 	args := m.Called()
251 | 	return args.Get(0).(*pgx.Conn)
252 | }
253 | 
254 | // MockNATSClient mocks the NATSClient
255 | type MockNATSClient struct {
256 | 	mock.Mock
257 | }
258 | 
259 | // PublishMessage mocks the PublishMessage method
260 | func (m *MockNATSClient) PublishMessage(subject string, data []byte) error {
261 | 	args := m.Called(subject, data)
262 | 	return args.Error(0)
263 | }
264 | 
265 | // Close mocks the Close method
266 | func (m *MockNATSClient) Close() error {
267 | 	args := m.Called()
268 | 	return args.Error(0)
269 | }
270 | 
271 | // SaveState mocks the SaveState method
272 | func (m *MockNATSClient) SaveState(state pgflonats.State) error {
273 | 	args := m.Called(state)
274 | 	return args.Error(0)
275 | }
276 | 
277 | // GetState mocks the GetState method
278 | func (m *MockNATSClient) GetState() (pgflonats.State, error) {
279 | 	args := m.Called()
280 | 	return args.Get(0).(pgflonats.State), args.Error(1)
281 | }
282 | 
283 | // JetStream mocks the JetStream method
284 | func (m *MockNATSClient) JetStream() nats.JetStreamContext {
285 | 	args := m.Called()
286 | 	return args.Get(0).(nats.JetStreamContext)
287 | }
288 | 


--------------------------------------------------------------------------------
/pkg/routing/README.md:
--------------------------------------------------------------------------------
 1 | # Message Routing
 2 | 
 3 | Table routing allows you to map source tables and columns to different destinations while preserving data types.
 4 | 
 5 | ## Configuration
 6 | 
 7 | Create a YAML file (e.g., `routing.yaml`) with your routing rules:
 8 | 
 9 | ```yaml
10 | users:
11 |   source_table: users
12 |   destination_table: customers
13 |   column_mappings:
14 |     - source: id
15 |       destination: customer_id
16 |     - source: username
17 |       destination: customer_name
18 |   operations:
19 |     - INSERT
20 |     - UPDATE
21 | 
22 | orders:
23 |   source_table: orders
24 |   destination_table: transactions
25 |   column_mappings:
26 |     - source: id
27 |       destination: transaction_id
28 |     - source: total_amount
29 |       destination: amount
30 |   operations:
31 |     - INSERT
32 |     - UPDATE
33 |     - DELETE
34 | ```
35 | 
36 | ## Usage with Routing
37 | 
38 | Start the worker with the routing configuration:
39 | 
40 | ```shell
41 | pg_flo worker postgres --routing-config routing.yaml ...
42 | ```
43 | 
44 | ## Routing Rules
45 | 
46 | Each table configuration supports:
47 | 
48 | - `source_table`: Original table name (required)
49 | - `destination_table`: Target table name (optional, defaults to source_table)
50 | - `column_mappings`: List of column name mappings (optional)
51 |   - `source`: Original column name
52 |   - `destination`: New column name in target
53 | - `operations`: List of operations to replicate (required)
54 |   - Supported: `INSERT`, `UPDATE`, `DELETE`
55 | 
56 | ## Important Notes
57 | 
58 | - Column data types must match between source and destination
59 | - Primary keys are automatically mapped
60 | - All specified columns must exist in both tables
61 | - Operations not listed in `operations` will be ignored. Defaults to all operations.
62 | - Unlisted columns are preserved with their original names
63 | - Complex types (jsonb, arrays) are preserved during mapping
64 | 
65 | ## Examples
66 | 
67 | ### Basic Table Mapping
68 | 
69 | ```yaml
70 | users:
71 |   source_table: users
72 |   destination_table: customers
73 |   operations:
74 |     - INSERT
75 |     - UPDATE
76 | ```
77 | 
78 | ### Column Remapping
79 | 
80 | ```yaml
81 | products:
82 |   source_table: products
83 |   destination_table: items
84 |   column_mappings:
85 |     - source: id
86 |       destination: item_id
87 |     - source: name
88 |       destination: item_name
89 |   operations:
90 |     - INSERT
91 |     - UPDATE
92 |     - DELETE
93 | ```
94 | 


--------------------------------------------------------------------------------
/pkg/routing/router.go:
--------------------------------------------------------------------------------
  1 | package routing
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 
  6 | 	"github.com/jackc/pglogrepl"
  7 | 	"github.com/pgflo/pg_flo/pkg/utils"
  8 | 	"github.com/rs/zerolog"
  9 | 	"github.com/rs/zerolog/log"
 10 | )
 11 | 
 12 | type ColumnMapping struct {
 13 | 	Source      string `yaml:"source"`
 14 | 	Destination string `yaml:"destination"`
 15 | }
 16 | 
 17 | type TableRoute struct {
 18 | 	SourceTable      string                `yaml:"source_table"`
 19 | 	DestinationTable string                `yaml:"destination_table"`
 20 | 	ColumnMappings   []ColumnMapping       `yaml:"column_mappings"`
 21 | 	Operations       []utils.OperationType `yaml:"operations"`
 22 | }
 23 | 
 24 | type Router struct {
 25 | 	Routes map[string]TableRoute
 26 | 	mutex  sync.RWMutex
 27 | 	logger zerolog.Logger
 28 | }
 29 | 
 30 | func NewRouter() *Router {
 31 | 	return &Router{
 32 | 		Routes: make(map[string]TableRoute),
 33 | 		logger: log.With().Str("component", "router").Logger(),
 34 | 	}
 35 | }
 36 | 
 37 | func (r *Router) AddRoute(route TableRoute) {
 38 | 	r.mutex.Lock()
 39 | 	defer r.mutex.Unlock()
 40 | 	r.Routes[route.SourceTable] = route
 41 | }
 42 | 
 43 | func (r *Router) ApplyRouting(message *utils.CDCMessage) (*utils.CDCMessage, error) {
 44 | 	r.mutex.RLock()
 45 | 	defer r.mutex.RUnlock()
 46 | 	route, exists := r.Routes[message.Table]
 47 | 	if !exists {
 48 | 		return message, nil
 49 | 	}
 50 | 
 51 | 	if !ContainsOperation(route.Operations, message.Type) {
 52 | 		return nil, nil
 53 | 	}
 54 | 
 55 | 	routedMessage := *message
 56 | 	routedMessage.Table = route.DestinationTable
 57 | 
 58 | 	if len(route.ColumnMappings) > 0 {
 59 | 		newColumns := make([]*pglogrepl.RelationMessageColumn, len(message.Columns))
 60 | 		for i, col := range message.Columns {
 61 | 			newCol := *col
 62 | 			mappedName := GetMappedColumnName(route.ColumnMappings, col.Name)
 63 | 			if mappedName != "" {
 64 | 				newCol.Name = mappedName
 65 | 			}
 66 | 			newColumns[i] = &newCol
 67 | 		}
 68 | 		routedMessage.Columns = newColumns
 69 | 
 70 | 		if routedMessage.ReplicationKey.Type != utils.ReplicationKeyFull {
 71 | 			mappedColumns := make([]string, len(routedMessage.ReplicationKey.Columns))
 72 | 			for i, keyCol := range routedMessage.ReplicationKey.Columns {
 73 | 				mappedName := GetMappedColumnName(route.ColumnMappings, keyCol)
 74 | 				if mappedName != "" {
 75 | 					mappedColumns[i] = mappedName
 76 | 				} else {
 77 | 					mappedColumns[i] = keyCol
 78 | 				}
 79 | 			}
 80 | 			routedMessage.ReplicationKey.Columns = mappedColumns
 81 | 		}
 82 | 	}
 83 | 
 84 | 	return &routedMessage, nil
 85 | }
 86 | 
 87 | // ContainsOperation checks if the given operation is in the list of operations
 88 | func ContainsOperation(operations []utils.OperationType, operation utils.OperationType) bool {
 89 | 	for _, op := range operations {
 90 | 		if op == operation {
 91 | 			return true
 92 | 		}
 93 | 	}
 94 | 	return false
 95 | }
 96 | 
 97 | // GetMappedColumnName returns the destination column name for a given source column name
 98 | func GetMappedColumnName(mappings []ColumnMapping, sourceName string) string {
 99 | 	for _, mapping := range mappings {
100 | 		if mapping.Source == sourceName {
101 | 			return mapping.Destination
102 | 		}
103 | 	}
104 | 	return ""
105 | }
106 | 
107 | // LoadRoutes loads routes from the provided configuration
108 | func (r *Router) LoadRoutes(config map[string]TableRoute) error {
109 | 	for sourceName, route := range config {
110 | 		r.logger.Info().
111 | 			Str("source_table", sourceName).
112 | 			Str("destination_table", route.DestinationTable).
113 | 			Any("operations", route.Operations).
114 | 			Any("column_mappings", route.ColumnMappings).
115 | 			Msg("Loading route")
116 | 
117 | 		route.SourceTable = sourceName
118 | 		if route.DestinationTable == "" {
119 | 			route.DestinationTable = sourceName
120 | 		}
121 | 		r.AddRoute(route)
122 | 	}
123 | 	return nil
124 | }
125 | 


--------------------------------------------------------------------------------
/pkg/routing/tests/routing_test.go:
--------------------------------------------------------------------------------
  1 | package routing_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/jackc/pglogrepl"
  7 | 	"github.com/pgflo/pg_flo/pkg/routing"
  8 | 	"github.com/pgflo/pg_flo/pkg/utils"
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | func TestRouter_ApplyRouting(t *testing.T) {
 13 | 	tests := []struct {
 14 | 		name           string
 15 | 		routes         map[string]routing.TableRoute
 16 | 		inputMessage   *utils.CDCMessage
 17 | 		expectedOutput *utils.CDCMessage
 18 | 		expectNil      bool
 19 | 	}{
 20 | 		{
 21 | 			name: "Simple table routing",
 22 | 			routes: map[string]routing.TableRoute{
 23 | 				"source_table": {
 24 | 					SourceTable:      "source_table",
 25 | 					DestinationTable: "dest_table",
 26 | 					Operations:       []utils.OperationType{utils.OperationInsert, utils.OperationUpdate, utils.OperationDelete},
 27 | 				},
 28 | 			},
 29 | 			inputMessage: &utils.CDCMessage{
 30 | 				Type:  utils.OperationInsert,
 31 | 				Table: "source_table",
 32 | 				Columns: []*pglogrepl.RelationMessageColumn{
 33 | 					{Name: "id", DataType: 23},
 34 | 					{Name: "name", DataType: 25},
 35 | 				},
 36 | 			},
 37 | 			expectedOutput: &utils.CDCMessage{
 38 | 				Type:  utils.OperationInsert,
 39 | 				Table: "dest_table",
 40 | 				Columns: []*pglogrepl.RelationMessageColumn{
 41 | 					{Name: "id", DataType: 23},
 42 | 					{Name: "name", DataType: 25},
 43 | 				},
 44 | 			},
 45 | 		},
 46 | 		{
 47 | 			name: "Column mapping",
 48 | 			routes: map[string]routing.TableRoute{
 49 | 				"users": {
 50 | 					SourceTable:      "users",
 51 | 					DestinationTable: "customers",
 52 | 					ColumnMappings: []routing.ColumnMapping{
 53 | 						{Source: "user_id", Destination: "customer_id"},
 54 | 						{Source: "user_name", Destination: "customer_name"},
 55 | 					},
 56 | 					Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate, utils.OperationDelete},
 57 | 				},
 58 | 			},
 59 | 			inputMessage: &utils.CDCMessage{
 60 | 				Type:  utils.OperationUpdate,
 61 | 				Table: "users",
 62 | 				Columns: []*pglogrepl.RelationMessageColumn{
 63 | 					{Name: "user_id", DataType: 23},
 64 | 					{Name: "user_name", DataType: 25},
 65 | 					{Name: "email", DataType: 25},
 66 | 				},
 67 | 			},
 68 | 			expectedOutput: &utils.CDCMessage{
 69 | 				Type:  utils.OperationUpdate,
 70 | 				Table: "customers",
 71 | 				Columns: []*pglogrepl.RelationMessageColumn{
 72 | 					{Name: "customer_id", DataType: 23},
 73 | 					{Name: "customer_name", DataType: 25},
 74 | 					{Name: "email", DataType: 25},
 75 | 				},
 76 | 			},
 77 | 		},
 78 | 		{
 79 | 			name: "Operation filtering - allowed",
 80 | 			routes: map[string]routing.TableRoute{
 81 | 				"orders": {
 82 | 					SourceTable:      "orders",
 83 | 					DestinationTable: "processed_orders",
 84 | 					Operations:       []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
 85 | 				},
 86 | 			},
 87 | 			inputMessage: &utils.CDCMessage{
 88 | 				Type:  utils.OperationUpdate,
 89 | 				Table: "orders",
 90 | 			},
 91 | 			expectedOutput: &utils.CDCMessage{
 92 | 				Type:  utils.OperationUpdate,
 93 | 				Table: "processed_orders",
 94 | 			},
 95 | 		},
 96 | 		{
 97 | 			name: "Operation filtering - not allowed",
 98 | 			routes: map[string]routing.TableRoute{
 99 | 				"orders": {
100 | 					SourceTable:      "orders",
101 | 					DestinationTable: "processed_orders",
102 | 					Operations:       []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
103 | 				},
104 | 			},
105 | 			inputMessage: &utils.CDCMessage{
106 | 				Type:  utils.OperationDelete,
107 | 				Table: "orders",
108 | 			},
109 | 			expectNil: true,
110 | 		},
111 | 		{
112 | 			name:   "No route for table",
113 | 			routes: map[string]routing.TableRoute{},
114 | 			inputMessage: &utils.CDCMessage{
115 | 				Type:  utils.OperationInsert,
116 | 				Table: "unknown_table",
117 | 			},
118 | 			expectedOutput: &utils.CDCMessage{
119 | 				Type:  utils.OperationInsert,
120 | 				Table: "unknown_table",
121 | 			},
122 | 		},
123 | 	}
124 | 
125 | 	for _, tt := range tests {
126 | 		t.Run(tt.name, func(t *testing.T) {
127 | 			router := routing.NewRouter()
128 | 			for _, route := range tt.routes {
129 | 				router.AddRoute(route)
130 | 			}
131 | 
132 | 			result, err := router.ApplyRouting(tt.inputMessage)
133 | 
134 | 			assert.NoError(t, err)
135 | 
136 | 			if tt.expectNil {
137 | 				assert.Nil(t, result)
138 | 			} else {
139 | 				assert.NotNil(t, result)
140 | 				assert.Equal(t, tt.expectedOutput.Type, result.Type)
141 | 				assert.Equal(t, tt.expectedOutput.Table, result.Table)
142 | 				assert.Equal(t, len(tt.expectedOutput.Columns), len(result.Columns))
143 | 				for i, col := range tt.expectedOutput.Columns {
144 | 					assert.Equal(t, col.Name, result.Columns[i].Name)
145 | 					assert.Equal(t, col.DataType, result.Columns[i].DataType)
146 | 				}
147 | 			}
148 | 		})
149 | 	}
150 | }
151 | 
152 | func TestRouter_LoadRoutes(t *testing.T) {
153 | 	router := routing.NewRouter()
154 | 	config := map[string]routing.TableRoute{
155 | 		"table1": {
156 | 			SourceTable:      "table1",
157 | 			DestinationTable: "dest_table1",
158 | 			ColumnMappings: []routing.ColumnMapping{
159 | 				{Source: "col1", Destination: "dest_col1"},
160 | 			},
161 | 			Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
162 | 		},
163 | 		"table2": {
164 | 			SourceTable:      "table2",
165 | 			DestinationTable: "dest_table2",
166 | 			Operations:       []utils.OperationType{utils.OperationInsert, utils.OperationUpdate, utils.OperationDelete},
167 | 		},
168 | 	}
169 | 
170 | 	err := router.LoadRoutes(config)
171 | 	assert.NoError(t, err)
172 | 
173 | 	assert.Len(t, router.Routes, 2)
174 | 	assert.Contains(t, router.Routes, "table1")
175 | 	assert.Contains(t, router.Routes, "table2")
176 | 
177 | 	assert.Equal(t, "dest_table1", router.Routes["table1"].DestinationTable)
178 | 	assert.Equal(t, "dest_table2", router.Routes["table2"].DestinationTable)
179 | 
180 | 	assert.Len(t, router.Routes["table1"].ColumnMappings, 1)
181 | 	assert.Len(t, router.Routes["table1"].Operations, 2)
182 | 	assert.Len(t, router.Routes["table2"].Operations, 3)
183 | }
184 | 
185 | func TestRouter_AddRoute(t *testing.T) {
186 | 	router := routing.NewRouter()
187 | 	route := routing.TableRoute{
188 | 		SourceTable:      "source",
189 | 		DestinationTable: "destination",
190 | 		ColumnMappings: []routing.ColumnMapping{
191 | 			{Source: "src_col", Destination: "dest_col"},
192 | 		},
193 | 		Operations: []utils.OperationType{utils.OperationInsert},
194 | 	}
195 | 
196 | 	router.AddRoute(route)
197 | 
198 | 	assert.Len(t, router.Routes, 1)
199 | 	assert.Contains(t, router.Routes, "source")
200 | 	assert.Equal(t, route, router.Routes["source"])
201 | }
202 | 
203 | func TestContainsOperation(t *testing.T) {
204 | 	operations := []utils.OperationType{utils.OperationInsert, utils.OperationUpdate}
205 | 
206 | 	assert.True(t, routing.ContainsOperation(operations, utils.OperationInsert))
207 | 	assert.True(t, routing.ContainsOperation(operations, utils.OperationUpdate))
208 | 	assert.False(t, routing.ContainsOperation(operations, utils.OperationDelete))
209 | }
210 | 
211 | func TestGetMappedColumnName(t *testing.T) {
212 | 	mappings := []routing.ColumnMapping{
213 | 		{Source: "col1", Destination: "mapped_col1"},
214 | 		{Source: "col2", Destination: "mapped_col2"},
215 | 	}
216 | 
217 | 	assert.Equal(t, "mapped_col1", routing.GetMappedColumnName(mappings, "col1"))
218 | 	assert.Equal(t, "mapped_col2", routing.GetMappedColumnName(mappings, "col2"))
219 | 	assert.Equal(t, "", routing.GetMappedColumnName(mappings, "col3"))
220 | }
221 | 


--------------------------------------------------------------------------------
/pkg/rules/README.md:
--------------------------------------------------------------------------------
 1 | ## Transformation Rules
 2 | 
 3 | There are two types of transformation rules available:
 4 | 
 5 | 1. **Regex Transform**
 6 | 
 7 |    - Type: `"regex"`
 8 |    - Parameters:
 9 |      - `pattern`: The regular expression pattern to match
10 |      - `replace`: The replacement string
11 |    - Description: Applies a regular expression replacement on string values in the specified column.
12 | 
13 | 2. **Mask Transform**
14 |    - Type: `"mask"`
15 |    - Parameters:
16 |      - `mask_char`: The character to use for masking
17 |    - Description: Masks the content of string values, keeping the first and last characters visible and replacing the rest with the specified mask character.
18 | 
19 | ## Filtering Rules
20 | 
21 | Filtering rules use various comparison operators to determine whether a row should be included in the output. The available operators are:
22 | 
23 | 1. **Equality** (`"eq"`)
24 | 2. **Inequality** (`"ne"`)
25 | 3. **Greater Than** (`"gt"`)
26 | 4. **Less Than** (`"lt"`)
27 | 5. **Greater Than or Equal To** (`"gte"`)
28 | 6. **Less Than or Equal To** (`"lte"`)
29 | 7. **Contains** (`"contains"`)
30 | 
31 | ## Rule Properties
32 | 
33 | Both transformation and filtering rules share these common properties:
34 | 
35 | - `type`: Specifies whether it's a "transform" or "filter" rule.
36 | - `column`: The name of the column to apply the rule to.
37 | - `operations`: An array of operations (INSERT, UPDATE, DELETE) to which the rule should be applied. If not specified, it applies to all operations.
38 | - `allow_empty_deletes`: A boolean flag that, when set to true, allows the rule to process delete operations even if the column value is empty.
39 | 
40 | ## Additional Notes
41 | 
42 | - The rules support various data types, including integers, floats, strings, timestamps, booleans, and numeric (decimal) values.
43 | - For filtering rules, the comparison is type-aware, ensuring that values are compared appropriately based on their data type.
44 | - The `contains` operator for filtering only works on string values.
45 | - Transformation rules currently only work on string values. If a non-string value is encountered, the transformation is skipped and a warning is logged.
46 | 
47 | To use these rules, you would define them in a YAML configuration file and specify the path to this file using the `--rules-config` flag when running `pg_flo`. The exact structure of the YAML file should match the rule properties and parameters described above.
48 | 


--------------------------------------------------------------------------------
/pkg/rules/engine.go:
--------------------------------------------------------------------------------
 1 | package rules
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/pgflo/pg_flo/pkg/utils"
 7 | )
 8 | 
 9 | // AddRule adds a new rule for the specified table
10 | func (re *RuleEngine) AddRule(tableName string, rule Rule) {
11 | 	re.mutex.Lock()
12 | 	defer re.mutex.Unlock()
13 | 	re.Rules[tableName] = append(re.Rules[tableName], rule)
14 | }
15 | 
16 | // ApplyRules applies all rules for the specified table to the given data
17 | func (re *RuleEngine) ApplyRules(message *utils.CDCMessage) (*utils.CDCMessage, error) {
18 | 	re.mutex.RLock()
19 | 	defer re.mutex.RUnlock()
20 | 
21 | 	rules, exists := re.Rules[message.Table]
22 | 	if !exists {
23 | 		return message, nil // No rules for this table
24 | 	}
25 | 
26 | 	logger.Info().
27 | 		Str("table", message.Table).
28 | 		Str("operation", string(message.Type)).
29 | 		Int("ruleCount", len(rules)).
30 | 		Msg("Applying rules")
31 | 
32 | 	var err error
33 | 	for _, rule := range rules {
34 | 		message, err = rule.Apply(message)
35 | 		if err != nil {
36 | 			return nil, err
37 | 		}
38 | 		if message == nil {
39 | 			// Message filtered out
40 | 			return nil, nil
41 | 		}
42 | 	}
43 | 	return message, nil
44 | }
45 | 
46 | // LoadRules loads rules from the provided configuration
47 | func (re *RuleEngine) LoadRules(config Config) error {
48 | 	for tableName, ruleConfigs := range config.Tables {
49 | 		logger.Info().Str("table", tableName).Msg("Loading rules for table")
50 | 		for i, ruleConfig := range ruleConfigs {
51 | 			rule, err := createRule(tableName, ruleConfig)
52 | 			if err != nil {
53 | 				return fmt.Errorf("error creating rule for table %s: %w", tableName, err)
54 | 			}
55 | 			logger.Info().
56 | 				Str("table", tableName).
57 | 				Int("ruleIndex", i+1).
58 | 				Str("ruleType", fmt.Sprintf("%T", rule)).
59 | 				Msg("Created rule")
60 | 			re.AddRule(tableName, rule)
61 | 		}
62 | 	}
63 | 	return nil
64 | }
65 | 
66 | // createRule creates a new rule based on the provided configuration
67 | func createRule(tableName string, config RuleConfig) (Rule, error) {
68 | 	logger.Info().
69 | 		Str("table", tableName).
70 | 		Str("ruleType", config.Type).
71 | 		Msg("Creating rule")
72 | 	switch config.Type {
73 | 	case "transform":
74 | 		return NewTransformRule(tableName, config.Column, config.Parameters)
75 | 	case "filter":
76 | 		return NewFilterRule(tableName, config.Column, config.Parameters)
77 | 	default:
78 | 		return nil, fmt.Errorf("unknown rule type: %s", config.Type)
79 | 	}
80 | }
81 | 


--------------------------------------------------------------------------------
/pkg/rules/tests/engine_test.go:
--------------------------------------------------------------------------------
  1 | package rules_test
  2 | 
  3 | import (
  4 | 	"log"
  5 | 	"os"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/jackc/pglogrepl"
  9 | 	"github.com/jackc/pgtype"
 10 | 	"github.com/pgflo/pg_flo/pkg/rules"
 11 | 	"github.com/pgflo/pg_flo/pkg/utils"
 12 | 	"github.com/stretchr/testify/assert"
 13 | )
 14 | 
 15 | func TestMain(m *testing.M) {
 16 | 	log.SetOutput(os.Stdout)
 17 | 	os.Exit(m.Run())
 18 | }
 19 | 
 20 | func TestRuleEngine_AddRule(t *testing.T) {
 21 | 	re := rules.NewRuleEngine()
 22 | 	rule := &MockRule{
 23 | 		TableName:  "users",
 24 | 		ColumnName: "test_column",
 25 | 		ApplyFunc: func(message *utils.CDCMessage) (*utils.CDCMessage, error) {
 26 | 			return message, nil
 27 | 		},
 28 | 	}
 29 | 	re.AddRule("users", rule)
 30 | 
 31 | 	message := &utils.CDCMessage{
 32 | 		Type:   utils.OperationInsert,
 33 | 		Schema: "public",
 34 | 		Table:  "users",
 35 | 		Columns: []*pglogrepl.RelationMessageColumn{
 36 | 			{Name: "test_column", DataType: pgtype.TextOID},
 37 | 		},
 38 | 		NewTuple: &pglogrepl.TupleData{
 39 | 			Columns: []*pglogrepl.TupleDataColumn{
 40 | 				{Data: []byte("original")},
 41 | 			},
 42 | 		},
 43 | 	}
 44 | 
 45 | 	result, err := re.ApplyRules(message)
 46 | 	assert.NoError(t, err)
 47 | 	assert.NotNil(t, result)
 48 | }
 49 | 
 50 | func TestRuleEngine_ApplyRules(t *testing.T) {
 51 | 	re := rules.NewRuleEngine()
 52 | 	rule := &MockRule{
 53 | 		TableName:  "users",
 54 | 		ColumnName: "test_column",
 55 | 		ApplyFunc: func(message *utils.CDCMessage) (*utils.CDCMessage, error) {
 56 | 			message.NewTuple.Columns[0].Data = []byte("transformed")
 57 | 			return message, nil
 58 | 		},
 59 | 	}
 60 | 	re.AddRule("users", rule)
 61 | 
 62 | 	message := &utils.CDCMessage{
 63 | 		Type:   utils.OperationInsert,
 64 | 		Schema: "public",
 65 | 		Table:  "users",
 66 | 		Columns: []*pglogrepl.RelationMessageColumn{
 67 | 			{Name: "test_column", DataType: pgtype.TextOID},
 68 | 		},
 69 | 		NewTuple: &pglogrepl.TupleData{
 70 | 			Columns: []*pglogrepl.TupleDataColumn{
 71 | 				{Data: []byte("original")},
 72 | 			},
 73 | 		},
 74 | 	}
 75 | 
 76 | 	result, err := re.ApplyRules(message)
 77 | 
 78 | 	assert.NoError(t, err)
 79 | 	value, err := result.GetColumnValue("test_column", false)
 80 | 	assert.NoError(t, err)
 81 | 	assert.Equal(t, "transformed", value)
 82 | }
 83 | 
 84 | func TestRuleEngine_ApplyRules_NoRules(t *testing.T) {
 85 | 	re := rules.NewRuleEngine()
 86 | 	message := &utils.CDCMessage{
 87 | 		Type:   utils.OperationInsert,
 88 | 		Schema: "public",
 89 | 		Table:  "users",
 90 | 		Columns: []*pglogrepl.RelationMessageColumn{
 91 | 			{Name: "test_column", DataType: pgtype.TextOID},
 92 | 		},
 93 | 		NewTuple: &pglogrepl.TupleData{
 94 | 			Columns: []*pglogrepl.TupleDataColumn{
 95 | 				{Data: []byte("original")},
 96 | 			},
 97 | 		},
 98 | 	}
 99 | 
100 | 	result, err := re.ApplyRules(message)
101 | 
102 | 	assert.NoError(t, err)
103 | 	assert.Equal(t, message, result)
104 | }
105 | 
106 | func TestRuleEngine_LoadRules_Transform(t *testing.T) {
107 | 	re := rules.NewRuleEngine()
108 | 	config := rules.Config{
109 | 		Tables: map[string][]rules.RuleConfig{
110 | 			"users": {
111 | 				{
112 | 					Type:   "transform",
113 | 					Column: "test_column",
114 | 					Parameters: map[string]interface{}{
115 | 						"type":      "mask",
116 | 						"mask_char": "*",
117 | 					},
118 | 					Operations: []utils.OperationType{utils.OperationInsert, utils.OperationUpdate},
119 | 				},
120 | 			},
121 | 		},
122 | 	}
123 | 
124 | 	err := re.LoadRules(config)
125 | 	assert.NoError(t, err)
126 | 
127 | 	message := &utils.CDCMessage{
128 | 		Type:   utils.OperationInsert,
129 | 		Schema: "public",
130 | 		Table:  "users",
131 | 		Columns: []*pglogrepl.RelationMessageColumn{
132 | 			{Name: "test_column", DataType: pgtype.TextOID},
133 | 		},
134 | 		NewTuple: &pglogrepl.TupleData{
135 | 			Columns: []*pglogrepl.TupleDataColumn{
136 | 				{Data: []byte("test")},
137 | 			},
138 | 		},
139 | 	}
140 | 
141 | 	result, err := re.ApplyRules(message)
142 | 	assert.NoError(t, err)
143 | 	assert.NotNil(t, result)
144 | 	value, err := result.GetColumnValue("test_column", false)
145 | 	assert.NoError(t, err)
146 | 	assert.Equal(t, "t**t", value)
147 | }
148 | 
149 | func TestRuleEngine_LoadRules_Filter(t *testing.T) {
150 | 	re := rules.NewRuleEngine()
151 | 	config := rules.Config{
152 | 		Tables: map[string][]rules.RuleConfig{
153 | 			"users": {
154 | 				{
155 | 					Type:   "filter",
156 | 					Column: "id",
157 | 					Parameters: map[string]interface{}{
158 | 						"operator": "gt",
159 | 						"value":    int64(100),
160 | 					},
161 | 					Operations: []utils.OperationType{utils.OperationDelete},
162 | 				},
163 | 			},
164 | 		},
165 | 	}
166 | 
167 | 	err := re.LoadRules(config)
168 | 	assert.NoError(t, err)
169 | 
170 | 	message := &utils.CDCMessage{
171 | 		Type:   utils.OperationDelete,
172 | 		Schema: "public",
173 | 		Table:  "users",
174 | 		Columns: []*pglogrepl.RelationMessageColumn{
175 | 			{Name: "id", DataType: pgtype.Int8OID},
176 | 		},
177 | 		OldTuple: &pglogrepl.TupleData{
178 | 			Columns: []*pglogrepl.TupleDataColumn{
179 | 				{Data: []byte("101")},
180 | 			},
181 | 		},
182 | 	}
183 | 
184 | 	result, err := re.ApplyRules(message)
185 | 	assert.NoError(t, err)
186 | 	assert.NotNil(t, result)
187 | 	value, err := result.GetColumnValue("id", true)
188 | 	assert.NoError(t, err)
189 | 	assert.Equal(t, int64(101), value)
190 | 
191 | 	message.OldTuple.Columns[0].Data = []byte("99")
192 | 	result, err = re.ApplyRules(message)
193 | 	assert.NoError(t, err)
194 | 	assert.Nil(t, result)
195 | }
196 | 
197 | func TestRuleEngine_LoadRules_EmptyDeletes(t *testing.T) {
198 | 	re := rules.NewRuleEngine()
199 | 	config := rules.Config{
200 | 		Tables: map[string][]rules.RuleConfig{
201 | 			"users": {
202 | 				{
203 | 					Type:              "filter",
204 | 					Column:            "id",
205 | 					AllowEmptyDeletes: true,
206 | 					Parameters: map[string]interface{}{
207 | 						"operator": "eq",
208 | 						"value":    int64(101),
209 | 					},
210 | 					Operations: []utils.OperationType{utils.OperationDelete},
211 | 				},
212 | 			},
213 | 		},
214 | 	}
215 | 
216 | 	err := re.LoadRules(config)
217 | 	assert.NoError(t, err)
218 | 
219 | 	message := &utils.CDCMessage{
220 | 		Type:   utils.OperationDelete,
221 | 		Schema: "public",
222 | 		Table:  "users",
223 | 		Columns: []*pglogrepl.RelationMessageColumn{
224 | 			{Name: "id", DataType: pgtype.Int8OID},
225 | 		},
226 | 		OldTuple: &pglogrepl.TupleData{
227 | 			Columns: []*pglogrepl.TupleDataColumn{
228 | 				{Data: []byte("101")},
229 | 			},
230 | 		},
231 | 	}
232 | 
233 | 	result, err := re.ApplyRules(message)
234 | 	assert.NoError(t, err)
235 | 	assert.NotNil(t, result)
236 | 	value, err := result.GetColumnValue("id", true)
237 | 	assert.NoError(t, err)
238 | 	assert.Equal(t, int64(101), value)
239 | }
240 | 
241 | func TestRuleEngine_ApplyRules_FilterRule(t *testing.T) {
242 | 	re := rules.NewRuleEngine()
243 | 	config := rules.Config{
244 | 		Tables: map[string][]rules.RuleConfig{
245 | 			"users": {
246 | 				{
247 | 					Type:   "filter",
248 | 					Column: "id",
249 | 					Parameters: map[string]interface{}{
250 | 						"operator": "gt",
251 | 						"value":    int64(100),
252 | 					},
253 | 					Operations: []utils.OperationType{utils.OperationUpdate},
254 | 				},
255 | 			},
256 | 		},
257 | 	}
258 | 
259 | 	err := re.LoadRules(config)
260 | 	assert.NoError(t, err)
261 | 
262 | 	message := &utils.CDCMessage{
263 | 		Type:   utils.OperationUpdate,
264 | 		Schema: "public",
265 | 		Table:  "users",
266 | 		Columns: []*pglogrepl.RelationMessageColumn{
267 | 			{Name: "id", DataType: pgtype.Int8OID},
268 | 		},
269 | 		NewTuple: &pglogrepl.TupleData{
270 | 			Columns: []*pglogrepl.TupleDataColumn{
271 | 				{Data: []byte("101")},
272 | 			},
273 | 		},
274 | 	}
275 | 	result, err := re.ApplyRules(message)
276 | 
277 | 	assert.NoError(t, err)
278 | 	assert.NotNil(t, result)
279 | 	idValue, err := result.GetColumnValue("id", false)
280 | 	assert.NoError(t, err)
281 | 	assert.Equal(t, int64(101), idValue)
282 | 
283 | 	message.NewTuple.Columns[0].Data = []byte("99")
284 | 	result, err = re.ApplyRules(message)
285 | 
286 | 	assert.NoError(t, err)
287 | 	assert.Nil(t, result)
288 | 
289 | 	message.Type = utils.OperationInsert
290 | 	message.NewTuple.Columns[0].Data = []byte("101")
291 | 	result, err = re.ApplyRules(message)
292 | 
293 | 	assert.NoError(t, err)
294 | 	assert.NotNil(t, result)
295 | 	idValue, err = result.GetColumnValue("id", false)
296 | 	assert.NoError(t, err)
297 | 	assert.Equal(t, int64(101), idValue)
298 | }
299 | 


--------------------------------------------------------------------------------
/pkg/rules/tests/mocks_test.go:
--------------------------------------------------------------------------------
 1 | package rules_test
 2 | 
 3 | import (
 4 | 	"github.com/pgflo/pg_flo/pkg/utils"
 5 | )
 6 | 
 7 | type MockRule struct {
 8 | 	TableName  string
 9 | 	ColumnName string
10 | 	ApplyFunc  func(*utils.CDCMessage) (*utils.CDCMessage, error)
11 | }
12 | 
13 | func (r *MockRule) Apply(message *utils.CDCMessage) (*utils.CDCMessage, error) {
14 | 	return r.ApplyFunc(message)
15 | }
16 | 


--------------------------------------------------------------------------------
/pkg/rules/types.go:
--------------------------------------------------------------------------------
 1 | package rules
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 
 6 | 	"github.com/pgflo/pg_flo/pkg/utils"
 7 | )
 8 | 
 9 | // Rule interface defines the methods that all rules must implement
10 | type Rule interface {
11 | 	Apply(message *utils.CDCMessage) (*utils.CDCMessage, error)
12 | }
13 | 
14 | // RuleConfig represents the configuration for a single rule
15 | type RuleConfig struct {
16 | 	Type              string                 `yaml:"type"`
17 | 	Column            string                 `yaml:"column"`
18 | 	Parameters        map[string]interface{} `yaml:"parameters"`
19 | 	Operations        []utils.OperationType  `yaml:"operations,omitempty"`
20 | 	AllowEmptyDeletes bool                   `yaml:"allow_empty_deletes,omitempty"`
21 | }
22 | 
23 | // Config represents the overall configuration for rules
24 | type Config struct {
25 | 	Tables map[string][]RuleConfig `yaml:"tables"`
26 | }
27 | 
28 | // TransformRule represents a rule that transforms data
29 | type TransformRule struct {
30 | 	TableName         string
31 | 	ColumnName        string
32 | 	Transform         func(*utils.CDCMessage) (*utils.CDCMessage, error)
33 | 	Operations        []utils.OperationType
34 | 	AllowEmptyDeletes bool
35 | }
36 | 
37 | // FilterRule represents a rule that filters data
38 | type FilterRule struct {
39 | 	TableName         string
40 | 	ColumnName        string
41 | 	Condition         func(*utils.CDCMessage) bool
42 | 	Operations        []utils.OperationType
43 | 	AllowEmptyDeletes bool
44 | }
45 | 
46 | // RuleEngine manages and applies rules to data
47 | type RuleEngine struct {
48 | 	Rules map[string][]Rule // map of table name to slice of rules
49 | 	mutex sync.RWMutex
50 | }
51 | 
52 | // NewRuleEngine creates a new RuleEngine instance
53 | func NewRuleEngine() *RuleEngine {
54 | 	return &RuleEngine{
55 | 		Rules: make(map[string][]Rule),
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/pkg/sinks/README.md:
--------------------------------------------------------------------------------
  1 | # Supported Sinks in pg_flo
  2 | 
  3 | pg_flo supports various sink types (destinations) for streaming data changes. This document provides an overview of the supported sinks and how to use them via the command-line interface.
  4 | 
  5 | - [Available Sinks](#available-sinks)
  6 | - [Common Flags](#common-flags)
  7 | - [STDOUT Sink](#stdout-sink)
  8 |   - [Usage](#usage)
  9 |   - [Example](#example)
 10 | - [File Sink](#file-sink)
 11 |   - [Usage](#usage-1)
 12 |   - [Additional Flags](#additional-flags)
 13 |   - [Example](#example-1)
 14 | - [PostgreSQL Sink](#postgresql-sink)
 15 |   - [Usage](#usage-2)
 16 |   - [Additional Flags](#additional-flags-1)
 17 |   - [Example](#example-2)
 18 |   - [Additional Behavior](#additional-behavior)
 19 | - [Webhook Sink](#webhook-sink)
 20 |   - [Usage](#usage-3)
 21 |   - [Additional Flags](#additional-flags-2)
 22 |   - [Example](#example-3)
 23 |   - [Additional Behavior](#additional-behavior-1)
 24 | - [Sink Interface](#sink-interface)
 25 | 
 26 | ## Available Sinks
 27 | 
 28 | 1. STDOUT
 29 | 2. File
 30 | 3. PostgreSQL
 31 | 4. Webhook
 32 | 
 33 | ## Common Flags
 34 | 
 35 | These flags are common to all sink types:
 36 | 
 37 | - `--host`: PostgreSQL source host
 38 | - `--port`: PostgreSQL source port
 39 | - `--dbname`: PostgreSQL source database name
 40 | - `--user`: PostgreSQL source user
 41 | - `--password`: PostgreSQL source password
 42 | - `--group`: Group name for replication
 43 | - `--tables`: Tables to replicate (comma-separated)
 44 | - `--status-dir`: Directory to store status files
 45 | 
 46 | ## STDOUT Sink
 47 | 
 48 | The STDOUT sink writes changes directly to the console output.
 49 | 
 50 | ### Usage
 51 | 
 52 | ```shell
 53 | pg_flo stream stdout [common flags]
 54 | ```
 55 | 
 56 | ### Example
 57 | 
 58 | ```shell
 59 | pg_flo stream stdout \
 60 |   --host localhost \
 61 |   --port 5432 \
 62 |   --dbname your_database \
 63 |   --user your_user \
 64 |   --password your_password \
 65 |   --group your_group \
 66 |   --tables table1,table2 \
 67 |   --status-dir /tmp/pg_flo-status
 68 | ```
 69 | 
 70 | ## File Sink
 71 | 
 72 | The File sink writes changes to files in the specified output directory.
 73 | 
 74 | ### Usage
 75 | 
 76 | ```shell
 77 | pg_flo stream file [common flags] --output-dir <output_directory>
 78 | ```
 79 | 
 80 | ### Additional Flags
 81 | 
 82 | - `--output-dir`: Output directory for file sink
 83 | 
 84 | ### Example
 85 | 
 86 | ```shell
 87 | pg_flo stream file \
 88 |   --host localhost \
 89 |   --port 5432 \
 90 |   --dbname your_database \
 91 |   --user your_user \
 92 |   --password your_password \
 93 |   --group your_group \
 94 |   --tables table1,table2 \
 95 |   --status-dir /tmp/pg_flo-status \
 96 |   --output-dir /tmp/pg_flo-output
 97 | ```
 98 | 
 99 | ## PostgreSQL Sink
100 | 
101 | The PostgreSQL sink replicates changes to another PostgreSQL database. To ensure accurate replication of updates and deletes, all tables must have a primary key defined.
102 | 
103 | ### Usage
104 | 
105 | ```shell
106 | pg_flo stream postgres [common flags] [postgres sink flags]
107 | ```
108 | 
109 | ### Additional Flags
110 | 
111 | - `--target-host`: Target PostgreSQL host
112 | - `--target-port`: Target PostgreSQL port
113 | - `--target-dbname`: Target PostgreSQL database name
114 | - `--target-user`: Target PostgreSQL user
115 | - `--target-password`: Target PostgreSQL password
116 | - `--sync-schema`: Sync schema from source to target via `pg_dump` (boolean flag)
117 | 
118 | ### Example
119 | 
120 | ```shell
121 | pg_flo stream postgres \
122 |   --host localhost \
123 |   --port 5432 \
124 |   --dbname source_db \
125 |   --user source_user \
126 |   --password source_password \
127 |   --group replication_group \
128 |   --tables table1,table2 \
129 |   --schema public \
130 |   --status-dir /tmp/pg_flo-status \
131 |   --target-host target.host.com \
132 |   --target-port 5433 \
133 |   --target-dbname target_db \
134 |   --target-user target_user \
135 |   --target-password target_password \
136 |   --sync-schema
137 | ```
138 | 
139 | ### Additional Behavior
140 | 
141 | - Supports schema synchronization between source and target databases using `pg_dump` when the `--sync-schema` flag is set.
142 | - Creates an `internal_pg_flo` schema and `lsn_status` table to keep track of the last processed LSN.
143 | - Handles `INSERT`, `UPDATE`, `DELETE`, and `DDL` operations.
144 | - Uses `UPSERT` (`INSERT ... ON CONFLICT DO UPDATE`) for handling both `INSERT` and `UPDATE` operations efficiently.
145 | - Executes operations within a transaction for each batch of changes.
146 | - Rolls back the transaction and logs an error if any operation in the batch fails.
147 | 
148 | ## Webhook Sink
149 | 
150 | The Webhook sink sends changes as HTTP POST requests to a specified URL.
151 | 
152 | ### Usage
153 | 
154 | ```shell
155 | pg_flo stream webhook [common flags] --webhook-url <webhook_url>
156 | ```
157 | 
158 | ### Additional Flags
159 | 
160 | - `--webhook-url`: URL to send webhook POST requests
161 | 
162 | ### Example
163 | 
164 | ```shell
165 | pg_flo stream webhook \
166 |   --host localhost \
167 |   --port 5432 \
168 |   --dbname your_database \
169 |   --user your_user \
170 |   --password your_password \
171 |   --group your_group \
172 |   --tables table1,table2 \
173 |   --schema public \
174 |   --status-dir /tmp/pg_flo-status \
175 |   --webhook-url https://your-webhook-endpoint.com/receive
176 | ```
177 | 
178 | ### Additional Behavior
179 | 
180 | - Sends each change as a separate HTTP POST request to the specified webhook URL.
181 | - Implements a retry mechanism with up to 3 attempts for failed requests.
182 | - Considers both network errors and non-2xx status codes as failures that trigger retries.
183 | - Maintains a status file to keep track of the last processed LSN.
184 | - The status file is stored in the specified status directory with the name `pg_flo_webhook_last_lsn.json`.
185 | 
186 | ## Sink Interface
187 | 
188 | `pg_flo` uses a common interface for all sink types, allowing for easy implementation of new sinks. The `Sink` interface defines the following methods:
189 | 
190 | - `WriteBatch(data []interface{}) error`: Writes a batch of changes to the sink.
191 | - `Close() error`: Closes the sink, releasing any resources or connections.
192 | 
193 | Sinks can save the last processed `LSN` at the destination (as appropriate). This ensures that if a `pg_flo` process shuts down (for example, during a deployment) and starts again, it knows where to resume from.
194 | 


--------------------------------------------------------------------------------
/pkg/sinks/file.go:
--------------------------------------------------------------------------------
  1 | package sinks
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"sync"
  8 | 	"time"
  9 | 
 10 | 	"github.com/goccy/go-json"
 11 | 	"github.com/pgflo/pg_flo/pkg/utils"
 12 | 
 13 | 	"github.com/rs/zerolog"
 14 | 	"github.com/rs/zerolog/log"
 15 | )
 16 | 
 17 | func init() {
 18 | 	log.Logger = log.Output(zerolog.ConsoleWriter{
 19 | 		Out:        os.Stderr,
 20 | 		TimeFormat: "15:04:05.000",
 21 | 	})
 22 | }
 23 | 
 24 | // FileSink represents a sink that writes data to files
 25 | type FileSink struct {
 26 | 	outputDir      string
 27 | 	currentFile    *os.File
 28 | 	currentSize    int64
 29 | 	maxFileSize    int64
 30 | 	rotateInterval time.Duration
 31 | 	lastRotation   time.Time
 32 | 	mutex          sync.Mutex
 33 | }
 34 | 
 35 | // NewFileSink creates a new FileSink instance
 36 | func NewFileSink(outputDir string) (*FileSink, error) {
 37 | 	sink := &FileSink{
 38 | 		outputDir:      outputDir,
 39 | 		maxFileSize:    100 * 1024 * 1024, // 100 MB
 40 | 		rotateInterval: time.Hour,         // Rotate every hour if size limit not reached
 41 | 	}
 42 | 
 43 | 	if err := os.MkdirAll(outputDir, 0755); err != nil {
 44 | 		return nil, fmt.Errorf("failed to create output directory: %v", err)
 45 | 	}
 46 | 
 47 | 	if err := sink.rotateFile(); err != nil {
 48 | 		return nil, fmt.Errorf("failed to create initial log file: %v", err)
 49 | 	}
 50 | 
 51 | 	return sink, nil
 52 | }
 53 | 
 54 | // rotateFile creates a new log file and updates the current file pointer
 55 | func (s *FileSink) rotateFile() error {
 56 | 	if s.currentFile != nil {
 57 | 		err := s.currentFile.Close()
 58 | 		if err != nil {
 59 | 			return err
 60 | 		}
 61 | 		s.currentFile = nil
 62 | 	}
 63 | 
 64 | 	timestamp := time.Now().UTC().Format("20060102T150405Z")
 65 | 	filename := fmt.Sprintf("pg_flo_log_%s.jsonl", timestamp)
 66 | 	filepath := filepath.Join(s.outputDir, filename)
 67 | 
 68 | 	file, err := os.OpenFile(filepath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 69 | 	if err != nil {
 70 | 		return fmt.Errorf("failed to create new log file: %v", err)
 71 | 	}
 72 | 
 73 | 	s.currentFile = file
 74 | 	s.currentSize = 0
 75 | 	s.lastRotation = time.Now()
 76 | 
 77 | 	log.Info().Str("file", filepath).Msg("Rotated to new log file")
 78 | 	return nil
 79 | }
 80 | 
 81 | // WriteBatch writes a batch of data to the current log file
 82 | func (s *FileSink) WriteBatch(messages []*utils.CDCMessage) error {
 83 | 	s.mutex.Lock()
 84 | 	defer s.mutex.Unlock()
 85 | 
 86 | 	for _, message := range messages {
 87 | 		decodedMessage, err := buildDecodedMessage(message)
 88 | 		if err != nil {
 89 | 			return fmt.Errorf("failed to build decoded message: %v", err)
 90 | 		}
 91 | 
 92 | 		jsonData, err := json.Marshal(decodedMessage)
 93 | 		if err != nil {
 94 | 			return fmt.Errorf("failed to marshal data to JSON: %v", err)
 95 | 		}
 96 | 
 97 | 		if s.currentFile == nil || s.currentSize >= s.maxFileSize || time.Since(s.lastRotation) >= s.rotateInterval {
 98 | 			if err := s.rotateFile(); err != nil {
 99 | 				return err
100 | 			}
101 | 		}
102 | 
103 | 		jsonData = append(jsonData, '\n')
104 | 		n, err := s.currentFile.Write(jsonData)
105 | 		if err != nil {
106 | 			return fmt.Errorf("failed to write to log file: %v", err)
107 | 		}
108 | 
109 | 		s.currentSize += int64(n)
110 | 	}
111 | 	return nil
112 | }
113 | 
114 | // Close closes the current log file and performs any necessary cleanup
115 | func (s *FileSink) Close() error {
116 | 	s.mutex.Lock()
117 | 	defer s.mutex.Unlock()
118 | 
119 | 	if s.currentFile != nil {
120 | 		err := s.currentFile.Close()
121 | 		s.currentFile = nil
122 | 		return err
123 | 	}
124 | 	return nil
125 | }
126 | 


--------------------------------------------------------------------------------
/pkg/sinks/shared.go:
--------------------------------------------------------------------------------
 1 | package sinks
 2 | 
 3 | import "github.com/pgflo/pg_flo/pkg/utils"
 4 | 
 5 | func buildDecodedMessage(message *utils.CDCMessage) (map[string]interface{}, error) {
 6 | 	decodedMessage := make(map[string]interface{})
 7 | 	decodedMessage["Type"] = message.Type
 8 | 	decodedMessage["Schema"] = message.Schema
 9 | 	decodedMessage["Table"] = message.Table
10 | 	decodedMessage["ReplicationKey"] = message.ReplicationKey
11 | 	decodedMessage["LSN"] = message.LSN
12 | 	decodedMessage["EmittedAt"] = message.EmittedAt
13 | 
14 | 	if message.NewTuple != nil {
15 | 		newTuple := make(map[string]interface{})
16 | 		for _, col := range message.Columns {
17 | 			value, err := message.GetColumnValue(col.Name, false)
18 | 			if err != nil {
19 | 				return nil, err
20 | 			}
21 | 			newTuple[col.Name] = value
22 | 		}
23 | 		decodedMessage["NewTuple"] = newTuple
24 | 	}
25 | 
26 | 	if message.OldTuple != nil {
27 | 		oldTuple := make(map[string]interface{})
28 | 		for _, col := range message.Columns {
29 | 			value, err := message.GetColumnValue(col.Name, true)
30 | 			if err != nil {
31 | 				return nil, err
32 | 			}
33 | 			oldTuple[col.Name] = value
34 | 		}
35 | 		decodedMessage["OldTuple"] = oldTuple
36 | 	}
37 | 
38 | 	return decodedMessage, nil
39 | }
40 | 


--------------------------------------------------------------------------------
/pkg/sinks/sink.go:
--------------------------------------------------------------------------------
 1 | package sinks
 2 | 
 3 | import (
 4 | 	"github.com/pgflo/pg_flo/pkg/utils"
 5 | )
 6 | 
 7 | type Sink interface {
 8 | 	WriteBatch(data []*utils.CDCMessage) error
 9 | }
10 | 


--------------------------------------------------------------------------------
/pkg/sinks/stdout.go:
--------------------------------------------------------------------------------
 1 | package sinks
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/goccy/go-json"
 7 | 	"github.com/pgflo/pg_flo/pkg/utils"
 8 | )
 9 | 
10 | // StdoutSink represents a sink that writes data to standard output
11 | type StdoutSink struct{}
12 | 
13 | // NewStdoutSink creates a new StdoutSink instance
14 | func NewStdoutSink() (*StdoutSink, error) {
15 | 	return &StdoutSink{}, nil
16 | }
17 | 
18 | // WriteBatch writes a batch of data to standard output
19 | func (s *StdoutSink) WriteBatch(messages []*utils.CDCMessage) error {
20 | 	for _, message := range messages {
21 | 		decodedMessage, err := buildDecodedMessage(message)
22 | 		if err != nil {
23 | 			return fmt.Errorf("failed to build decoded message: %v", err)
24 | 		}
25 | 
26 | 		jsonData, err := json.Marshal(decodedMessage)
27 | 		if err != nil {
28 | 			return fmt.Errorf("failed to marshal data to JSON: %v", err)
29 | 		}
30 | 
31 | 		if _, err := fmt.Println(string(jsonData)); err != nil {
32 | 			return err
33 | 		}
34 | 	}
35 | 	return nil
36 | }
37 | 


--------------------------------------------------------------------------------
/pkg/sinks/types.go:
--------------------------------------------------------------------------------
1 | package sinks
2 | 
3 | import "github.com/jackc/pglogrepl"
4 | 
5 | type Status struct {
6 | 	LastLSN pglogrepl.LSN `json:"last_lsn"`
7 | }
8 | 


--------------------------------------------------------------------------------
/pkg/sinks/webhooks.go:
--------------------------------------------------------------------------------
 1 | package sinks
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 	"os"
 8 | 
 9 | 	"github.com/goccy/go-json"
10 | 	"github.com/pgflo/pg_flo/pkg/utils"
11 | 	"github.com/rs/zerolog"
12 | 	"github.com/rs/zerolog/log"
13 | )
14 | 
15 | func init() {
16 | 	log.Logger = log.Output(zerolog.ConsoleWriter{
17 | 		Out:        os.Stderr,
18 | 		TimeFormat: "15:04:05.000",
19 | 	})
20 | }
21 | 
22 | // WebhookSink represents a sink that sends data to a webhook endpoint
23 | type WebhookSink struct {
24 | 	webhookURL string
25 | 	client     *http.Client
26 | }
27 | 
28 | // NewWebhookSink creates a new WebhookSink instance
29 | func NewWebhookSink(webhookURL string) (*WebhookSink, error) {
30 | 	sink := &WebhookSink{
31 | 		webhookURL: webhookURL,
32 | 		client:     &http.Client{},
33 | 	}
34 | 
35 | 	return sink, nil
36 | }
37 | 
38 | // WriteBatch sends a batch of data to the webhook endpoint
39 | func (s *WebhookSink) WriteBatch(messages []*utils.CDCMessage) error {
40 | 	for _, message := range messages {
41 | 		decodedMessage, err := buildDecodedMessage(message)
42 | 		if err != nil {
43 | 			return fmt.Errorf("failed to build decoded message: %v", err)
44 | 		}
45 | 
46 | 		jsonData, err := json.Marshal(decodedMessage)
47 | 		if err != nil {
48 | 			return fmt.Errorf("failed to marshal data to JSON: %v", err)
49 | 		}
50 | 
51 | 		if err = s.sendWithRetry(jsonData); err != nil {
52 | 			return err
53 | 		}
54 | 	}
55 | 	return nil
56 | }
57 | 
58 | // sendWithRetry sends data to the webhook endpoint with retry logic
59 | func (s *WebhookSink) sendWithRetry(jsonData []byte) error {
60 | 	maxRetries := 3
61 | 	for attempt := 1; attempt <= maxRetries; attempt++ {
62 | 		req, err := http.NewRequest("POST", s.webhookURL, bytes.NewBuffer(jsonData))
63 | 		if err != nil {
64 | 			return fmt.Errorf("failed to create request: %v", err)
65 | 		}
66 | 
67 | 		req.Header.Set("Content-Type", "application/json")
68 | 
69 | 		resp, err := s.client.Do(req)
70 | 		if err != nil {
71 | 			if attempt == maxRetries {
72 | 				return fmt.Errorf("failed to send webhook after %d attempts: %v", maxRetries, err)
73 | 			}
74 | 			log.Warn().Err(err).Int("attempt", attempt).Msg("Webhook request failed, retrying...")
75 | 			continue
76 | 		}
77 | 		defer resp.Body.Close()
78 | 
79 | 		if resp.StatusCode >= 200 && resp.StatusCode < 300 {
80 | 			return nil
81 | 		}
82 | 
83 | 		if attempt == maxRetries {
84 | 			return fmt.Errorf("webhook request failed with status code: %d after %d attempts", resp.StatusCode, maxRetries)
85 | 		}
86 | 		log.Warn().Int("statusCode", resp.StatusCode).Int("attempt", attempt).Msg("Received non-2xx status code, retrying...")
87 | 	}
88 | 	return nil
89 | }
90 | 
91 | // Close performs any necessary cleanup (no-op for WebhookSink)
92 | func (s *WebhookSink) Close() error {
93 | 	return nil
94 | }
95 | 


--------------------------------------------------------------------------------
/pkg/utils/cdc_encoding.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"reflect"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 	"time"
 10 | 
 11 | 	"github.com/jackc/pgx/v5/pgtype"
 12 | )
 13 | 
 14 | // ConvertToPgCompatibleOutput converts a Go value to its PostgreSQL output format.
 15 | func ConvertToPgCompatibleOutput(value interface{}, oid uint32) ([]byte, error) {
 16 | 	if value == nil {
 17 | 		return nil, nil
 18 | 	}
 19 | 
 20 | 	switch oid {
 21 | 	case pgtype.BoolOID:
 22 | 		return strconv.AppendBool(nil, value.(bool)), nil
 23 | 	case pgtype.Int2OID, pgtype.Int4OID, pgtype.Int8OID:
 24 | 		switch v := value.(type) {
 25 | 		case int:
 26 | 			return []byte(strconv.FormatInt(int64(v), 10)), nil
 27 | 		case int32:
 28 | 			return []byte(strconv.FormatInt(int64(v), 10)), nil
 29 | 		case int64:
 30 | 			return []byte(strconv.FormatInt(v, 10)), nil
 31 | 		default:
 32 | 			return []byte(fmt.Sprintf("%d", value)), nil
 33 | 		}
 34 | 	case pgtype.Float4OID, pgtype.Float8OID:
 35 | 		return []byte(strconv.FormatFloat(value.(float64), 'f', -1, 64)), nil
 36 | 	case pgtype.NumericOID:
 37 | 		return []byte(fmt.Sprintf("%v", value)), nil
 38 | 	case pgtype.TextOID, pgtype.VarcharOID:
 39 | 		return []byte(value.(string)), nil
 40 | 	case pgtype.ByteaOID:
 41 | 		if byteaData, ok := value.([]byte); ok {
 42 | 			return byteaData, nil
 43 | 		}
 44 | 		return nil, fmt.Errorf("invalid bytea data type")
 45 | 	case pgtype.TimestampOID, pgtype.TimestamptzOID:
 46 | 		return []byte(value.(time.Time).Format(time.RFC3339Nano)), nil
 47 | 	case pgtype.DateOID:
 48 | 		return []byte(value.(time.Time).Format("2006-01-02")), nil
 49 | 	case pgtype.JSONOID:
 50 | 		switch v := value.(type) {
 51 | 		case string:
 52 | 			return []byte(v), nil
 53 | 		case []byte:
 54 | 			return v, nil
 55 | 		default:
 56 | 			return nil, fmt.Errorf("unsupported type for JSON data: %T", value)
 57 | 		}
 58 | 	case pgtype.JSONBOID:
 59 | 		if jsonBytes, ok := value.([]byte); ok {
 60 | 			return jsonBytes, nil
 61 | 		}
 62 | 		return json.Marshal(value)
 63 | 	case pgtype.TextArrayOID, pgtype.VarcharArrayOID,
 64 | 		pgtype.Int2ArrayOID, pgtype.Int4ArrayOID, pgtype.Int8ArrayOID,
 65 | 		pgtype.Float4ArrayOID, pgtype.Float8ArrayOID, pgtype.BoolArrayOID:
 66 | 		return EncodeArray(value)
 67 | 	default:
 68 | 		return []byte(fmt.Sprintf("%v", value)), nil
 69 | 	}
 70 | }
 71 | 
 72 | // EncodeArray encodes a slice of values into a PostgreSQL array format.
 73 | func EncodeArray(value interface{}) ([]byte, error) {
 74 | 	var elements []string
 75 | 
 76 | 	switch slice := value.(type) {
 77 | 	case []interface{}:
 78 | 		for _, v := range slice {
 79 | 			elem, err := encodeArrayElement(v)
 80 | 			if err != nil {
 81 | 				return nil, err
 82 | 			}
 83 | 			elements = append(elements, elem)
 84 | 		}
 85 | 	case []string:
 86 | 		elements = append(elements, slice...)
 87 | 	case []int, []int32, []int64, []float32, []float64, []bool:
 88 | 		sliceValue := reflect.ValueOf(slice)
 89 | 		for i := 0; i < sliceValue.Len(); i++ {
 90 | 			elem, err := encodeArrayElement(sliceValue.Index(i).Interface())
 91 | 			if err != nil {
 92 | 				return nil, err
 93 | 			}
 94 | 			elements = append(elements, elem)
 95 | 		}
 96 | 	default:
 97 | 		return nil, fmt.Errorf("unsupported slice type: %T", value)
 98 | 	}
 99 | 
100 | 	return []byte("{" + strings.Join(elements, ",") + "}"), nil
101 | }
102 | 
103 | // encodeArrayElement encodes a single array element into a string representation.
104 | func encodeArrayElement(v interface{}) (string, error) {
105 | 	if v == nil {
106 | 		return "NULL", nil
107 | 	}
108 | 
109 | 	switch val := v.(type) {
110 | 	case string:
111 | 		return val, nil
112 | 	case int, int32, int64, float32, float64:
113 | 		return fmt.Sprintf("%v", val), nil
114 | 	case bool:
115 | 		return strconv.FormatBool(val), nil
116 | 	case time.Time:
117 | 		return val.Format(time.RFC3339Nano), nil
118 | 	case []byte:
119 | 		return fmt.Sprintf("\\x%x", val), nil
120 | 	default:
121 | 		jsonBytes, err := json.Marshal(val)
122 | 		if err != nil {
123 | 			return "", fmt.Errorf("failed to marshal array element to JSON: %w", err)
124 | 		}
125 | 		return string(jsonBytes), nil
126 | 	}
127 | }
128 | 


--------------------------------------------------------------------------------
/pkg/utils/cdc_message.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/gob"
  6 | 	"encoding/hex"
  7 | 	"encoding/json"
  8 | 	"fmt"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 	"time"
 12 | 
 13 | 	"github.com/jackc/pglogrepl"
 14 | 	"github.com/jackc/pgx/v5/pgtype"
 15 | )
 16 | 
 17 | // init registers types with the gob package for encoding/decoding
 18 | func init() {
 19 | 	gob.Register(json.RawMessage{})
 20 | 	gob.Register(time.Time{})
 21 | 	gob.Register(map[string]interface{}{})
 22 | 	gob.Register(pglogrepl.RelationMessageColumn{})
 23 | 	gob.Register(pglogrepl.LSN(0))
 24 | 
 25 | 	gob.Register(CDCMessage{})
 26 | 	gob.Register(pglogrepl.TupleData{})
 27 | 	gob.Register(pglogrepl.TupleDataColumn{})
 28 | }
 29 | 
 30 | // CDCMessage represents a full message for Change Data Capture
 31 | type CDCMessage struct {
 32 | 	Type           OperationType
 33 | 	Schema         string
 34 | 	Table          string
 35 | 	Columns        []*pglogrepl.RelationMessageColumn
 36 | 	NewTuple       *pglogrepl.TupleData
 37 | 	OldTuple       *pglogrepl.TupleData
 38 | 	ReplicationKey ReplicationKey
 39 | 	LSN            string
 40 | 	EmittedAt      time.Time
 41 | 	ToastedColumns map[string]bool
 42 | }
 43 | 
 44 | // MarshalBinary implements the encoding.BinaryMarshaler interface
 45 | func (m CDCMessage) MarshalBinary() ([]byte, error) {
 46 | 	return EncodeCDCMessage(m)
 47 | }
 48 | 
 49 | // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface
 50 | func (m *CDCMessage) UnmarshalBinary(data []byte) error {
 51 | 	decodedMessage, err := DecodeCDCMessage(data)
 52 | 	if err != nil {
 53 | 		return err
 54 | 	}
 55 | 	*m = *decodedMessage
 56 | 	return nil
 57 | }
 58 | 
 59 | func (m *CDCMessage) GetColumnIndex(columnName string) int {
 60 | 	for i, col := range m.Columns {
 61 | 		if col.Name == columnName {
 62 | 			return i
 63 | 		}
 64 | 	}
 65 | 	return -1
 66 | }
 67 | 
 68 | // GetColumnValue gets a column value, optionally using old values for DELETE/UPDATE
 69 | func (m *CDCMessage) GetColumnValue(columnName string, useOldValues bool) (interface{}, error) {
 70 | 	colIndex := m.GetColumnIndex(columnName)
 71 | 	if colIndex == -1 {
 72 | 		return nil, fmt.Errorf("column %s not found", columnName)
 73 | 	}
 74 | 
 75 | 	var data []byte
 76 | 	if useOldValues && m.OldTuple != nil {
 77 | 		data = m.OldTuple.Columns[colIndex].Data
 78 | 	} else if m.NewTuple != nil {
 79 | 		data = m.NewTuple.Columns[colIndex].Data
 80 | 	} else {
 81 | 		return nil, fmt.Errorf("no data available for column %s", columnName)
 82 | 	}
 83 | 
 84 | 	return DecodeValue(data, m.Columns[colIndex].DataType)
 85 | }
 86 | 
 87 | // SetColumnValue sets the value of a column, respecting its type
 88 | func (m *CDCMessage) SetColumnValue(columnName string, value interface{}) error {
 89 | 	colIndex := m.GetColumnIndex(columnName)
 90 | 	if colIndex == -1 {
 91 | 		return fmt.Errorf("column %s not found", columnName)
 92 | 	}
 93 | 
 94 | 	column := m.Columns[colIndex]
 95 | 	encodedValue, err := EncodeValue(value, column.DataType)
 96 | 	if err != nil {
 97 | 		return err
 98 | 	}
 99 | 
100 | 	if m.Type == OperationDelete {
101 | 		m.OldTuple.Columns[colIndex] = &pglogrepl.TupleDataColumn{Data: encodedValue}
102 | 	} else {
103 | 		m.NewTuple.Columns[colIndex] = &pglogrepl.TupleDataColumn{Data: encodedValue}
104 | 	}
105 | 
106 | 	return nil
107 | }
108 | 
109 | // EncodeCDCMessage encodes a CDCMessage into a byte slice
110 | func EncodeCDCMessage(m CDCMessage) ([]byte, error) {
111 | 	var buf bytes.Buffer
112 | 	enc := gob.NewEncoder(&buf)
113 | 
114 | 	if err := enc.Encode(m.Type); err != nil {
115 | 		return nil, err
116 | 	}
117 | 	if err := enc.Encode(m.Schema); err != nil {
118 | 		return nil, err
119 | 	}
120 | 	if err := enc.Encode(m.Table); err != nil {
121 | 		return nil, err
122 | 	}
123 | 	if err := enc.Encode(m.Columns); err != nil {
124 | 		return nil, err
125 | 	}
126 | 
127 | 	if err := enc.Encode(m.NewTuple != nil); err != nil {
128 | 		return nil, err
129 | 	}
130 | 	if m.NewTuple != nil {
131 | 		if err := enc.Encode(m.NewTuple); err != nil {
132 | 			return nil, err
133 | 		}
134 | 	}
135 | 
136 | 	if err := enc.Encode(m.OldTuple != nil); err != nil {
137 | 		return nil, err
138 | 	}
139 | 
140 | 	if m.OldTuple != nil {
141 | 		if err := enc.Encode(m.OldTuple); err != nil {
142 | 			return nil, err
143 | 		}
144 | 	}
145 | 
146 | 	if err := enc.Encode(m.ReplicationKey); err != nil {
147 | 		return nil, err
148 | 	}
149 | 
150 | 	if err := enc.Encode(m.LSN); err != nil {
151 | 		return nil, err
152 | 	}
153 | 
154 | 	if err := enc.Encode(m.EmittedAt); err != nil {
155 | 		return nil, err
156 | 	}
157 | 
158 | 	if err := enc.Encode(m.ToastedColumns); err != nil {
159 | 		return nil, err
160 | 	}
161 | 
162 | 	return buf.Bytes(), nil
163 | }
164 | 
165 | // DecodeCDCMessage decodes a byte slice into a CDCMessage
166 | func DecodeCDCMessage(data []byte) (*CDCMessage, error) {
167 | 	buf := bytes.NewBuffer(data)
168 | 	dec := gob.NewDecoder(buf)
169 | 	m := &CDCMessage{}
170 | 
171 | 	if err := dec.Decode(&m.Type); err != nil {
172 | 		return nil, err
173 | 	}
174 | 	if err := dec.Decode(&m.Schema); err != nil {
175 | 		return nil, err
176 | 	}
177 | 	if err := dec.Decode(&m.Table); err != nil {
178 | 		return nil, err
179 | 	}
180 | 	if err := dec.Decode(&m.Columns); err != nil {
181 | 		return nil, err
182 | 	}
183 | 
184 | 	var newTupleExists bool
185 | 	if err := dec.Decode(&newTupleExists); err != nil {
186 | 		return nil, err
187 | 	}
188 | 	if newTupleExists {
189 | 		m.NewTuple = &pglogrepl.TupleData{}
190 | 		if err := dec.Decode(m.NewTuple); err != nil {
191 | 			return nil, err
192 | 		}
193 | 	}
194 | 
195 | 	var oldTupleExists bool
196 | 	if err := dec.Decode(&oldTupleExists); err != nil {
197 | 		return nil, err
198 | 	}
199 | 	if oldTupleExists {
200 | 		m.OldTuple = &pglogrepl.TupleData{}
201 | 		if err := dec.Decode(m.OldTuple); err != nil {
202 | 			return nil, err
203 | 		}
204 | 	}
205 | 
206 | 	if err := dec.Decode(&m.ReplicationKey); err != nil {
207 | 		return nil, err
208 | 	}
209 | 
210 | 	if err := dec.Decode(&m.LSN); err != nil {
211 | 		return nil, err
212 | 	}
213 | 
214 | 	if err := dec.Decode(&m.EmittedAt); err != nil {
215 | 		return nil, err
216 | 	}
217 | 
218 | 	if err := dec.Decode(&m.ToastedColumns); err != nil {
219 | 		return nil, err
220 | 	}
221 | 
222 | 	return m, nil
223 | }
224 | 
225 | // DecodeValue decodes a byte slice into a Go value based on the PostgreSQL data type
226 | func DecodeValue(data []byte, dataType uint32) (interface{}, error) {
227 | 	if data == nil {
228 | 		return nil, nil
229 | 	}
230 | 	strData := string(data)
231 | 	switch dataType {
232 | 	case pgtype.BoolOID:
233 | 		return strconv.ParseBool(string(data))
234 | 	case pgtype.Int2OID, pgtype.Int4OID, pgtype.Int8OID:
235 | 		return strconv.ParseInt(string(data), 10, 64)
236 | 	case pgtype.Float4OID, pgtype.Float8OID:
237 | 		if strings.EqualFold(strData, "NULL") {
238 | 			return nil, nil
239 | 		}
240 | 		return strconv.ParseFloat(strData, 64)
241 | 	case pgtype.NumericOID:
242 | 		return string(data), nil
243 | 	case pgtype.TextOID, pgtype.VarcharOID:
244 | 		return string(data), nil
245 | 	case pgtype.ByteaOID:
246 | 		if strings.HasPrefix(strData, "\\x") {
247 | 			hexString := strData[2:]
248 | 			byteData, err := hex.DecodeString(hexString)
249 | 			if err != nil {
250 | 				return nil, fmt.Errorf("failed to decode bytea hex string: %v", err)
251 | 			}
252 | 			return byteData, nil
253 | 		}
254 | 		return data, nil
255 | 	case pgtype.TimestampOID, pgtype.TimestamptzOID:
256 | 		return ParseTimestamp(string(data))
257 | 	case pgtype.DateOID:
258 | 		return time.Parse("2006-01-02", string(data))
259 | 	case pgtype.JSONOID:
260 | 		return string(data), nil
261 | 	case pgtype.JSONBOID:
262 | 		var result interface{}
263 | 		err := json.Unmarshal(data, &result)
264 | 		return result, err
265 | 	case pgtype.TextArrayOID, pgtype.VarcharArrayOID:
266 | 		return DecodeTextArray(data)
267 | 	case pgtype.Int2ArrayOID, pgtype.Int4ArrayOID, pgtype.Int8ArrayOID, pgtype.Float4ArrayOID, pgtype.Float8ArrayOID, pgtype.BoolArrayOID:
268 | 		return DecodeArray(data, dataType)
269 | 	default:
270 | 		return string(data), nil
271 | 	}
272 | }
273 | 
274 | // DecodeTextArray decodes a PostgreSQL text array into a []string
275 | func DecodeTextArray(data []byte) ([]string, error) {
276 | 	if len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
277 | 		return nil, fmt.Errorf("invalid array format")
278 | 	}
279 | 	elements := strings.Split(string(data[1:len(data)-1]), ",")
280 | 	for i, elem := range elements {
281 | 		elements[i] = strings.Trim(elem, "\"")
282 | 	}
283 | 	return elements, nil
284 | }
285 | 
286 | // DecodeArray decodes a PostgreSQL array into a slice of the appropriate type
287 | func DecodeArray(data []byte, dataType uint32) (interface{}, error) {
288 | 	if len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
289 | 		return nil, fmt.Errorf("invalid array format")
290 | 	}
291 | 	elements := strings.Split(string(data[1:len(data)-1]), ",")
292 | 
293 | 	switch dataType {
294 | 	case pgtype.Int2ArrayOID, pgtype.Int4ArrayOID, pgtype.Int8ArrayOID:
295 | 		result := make([]interface{}, len(elements))
296 | 		for i, elem := range elements {
297 | 			if elem == "NULL" {
298 | 				result[i] = nil
299 | 				continue
300 | 			}
301 | 			val, err := strconv.ParseInt(elem, 10, 64)
302 | 			if err != nil {
303 | 				return nil, err
304 | 			}
305 | 			result[i] = val
306 | 		}
307 | 		return result, nil
308 | 	case pgtype.Float4ArrayOID, pgtype.Float8ArrayOID:
309 | 		result := make([]interface{}, len(elements))
310 | 		for i, elem := range elements {
311 | 			if elem == "NULL" {
312 | 				result[i] = nil
313 | 				continue
314 | 			}
315 | 			val, err := strconv.ParseFloat(elem, 64)
316 | 			if err != nil {
317 | 				return nil, err
318 | 			}
319 | 			result[i] = val
320 | 		}
321 | 		return result, nil
322 | 	case pgtype.BoolArrayOID:
323 | 		result := make([]interface{}, len(elements))
324 | 		for i, elem := range elements {
325 | 			if elem == "NULL" {
326 | 				result[i] = nil
327 | 				continue
328 | 			}
329 | 			val, err := strconv.ParseBool(elem)
330 | 			if err != nil {
331 | 				return nil, err
332 | 			}
333 | 			result[i] = val
334 | 		}
335 | 		return result, nil
336 | 	default:
337 | 		return elements, nil
338 | 	}
339 | }
340 | 
341 | // EncodeValue encodes a Go value into a byte slice based on the PostgreSQL data type
342 | func EncodeValue(value interface{}, dataType uint32) ([]byte, error) {
343 | 	return ConvertToPgCompatibleOutput(value, dataType)
344 | }
345 | 
346 | // IsColumnToasted checks if a column was TOASTed
347 | func (m *CDCMessage) IsColumnToasted(columnName string) bool {
348 | 	return m.ToastedColumns[columnName]
349 | }
350 | 


--------------------------------------------------------------------------------
/pkg/utils/retry.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"time"
 6 | )
 7 | 
 8 | type RetryConfig struct {
 9 | 	MaxAttempts int
10 | 	InitialWait time.Duration
11 | 	MaxWait     time.Duration
12 | }
13 | 
14 | func WithRetry(ctx context.Context, cfg RetryConfig, operation func() error) error {
15 | 	wait := cfg.InitialWait
16 | 	for attempt := 1; attempt <= cfg.MaxAttempts; attempt++ {
17 | 		err := operation()
18 | 		if err == nil {
19 | 			return nil
20 | 		}
21 | 
22 | 		if attempt == cfg.MaxAttempts {
23 | 			return err
24 | 		}
25 | 
26 | 		select {
27 | 		case <-ctx.Done():
28 | 			return ctx.Err()
29 | 		case <-time.After(wait):
30 | 			// Exponential backoff with max wait
31 | 			wait *= 2
32 | 			if wait > cfg.MaxWait {
33 | 				wait = cfg.MaxWait
34 | 			}
35 | 		}
36 | 	}
37 | 	return nil
38 | }
39 | 


--------------------------------------------------------------------------------
/pkg/utils/shared.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"strconv"
  7 | 	"time"
  8 | 
  9 | 	"github.com/jackc/pgtype"
 10 | )
 11 | 
 12 | // ParseTimestamp attempts to parse a timestamp string using multiple layouts
 13 | func ParseTimestamp(value string) (time.Time, error) {
 14 | 	layouts := []string{
 15 | 		time.RFC3339Nano,
 16 | 		"2006-01-02 15:04:05.999999-07",
 17 | 		"2006-01-02 15:04:05.999999Z07:00",
 18 | 		"2006-01-02 15:04:05.999999",
 19 | 		"2006-01-02T15:04:05.999999Z",
 20 | 		"2006-01-02 15:04:05",
 21 | 		"2006-01-02T15:04:05Z",
 22 | 	}
 23 | 
 24 | 	for _, layout := range layouts {
 25 | 		if t, err := time.Parse(layout, value); err == nil {
 26 | 			return t, nil
 27 | 		}
 28 | 	}
 29 | 
 30 | 	return time.Time{}, fmt.Errorf("unable to parse timestamp: %s", value)
 31 | }
 32 | 
 33 | // OidToTypeName maps PostgreSQL OIDs to their corresponding type names
 34 | var OidToTypeName = map[uint32]string{
 35 | 	pgtype.BoolOID:             "bool",
 36 | 	pgtype.ByteaOID:            "bytea",
 37 | 	pgtype.Int8OID:             "int8",
 38 | 	pgtype.Int2OID:             "int2",
 39 | 	pgtype.Int4OID:             "int4",
 40 | 	pgtype.TextOID:             "text",
 41 | 	pgtype.JSONOID:             "json",
 42 | 	pgtype.Float4OID:           "float4",
 43 | 	pgtype.Float8OID:           "float8",
 44 | 	pgtype.BoolArrayOID:        "bool[]",
 45 | 	pgtype.Int2ArrayOID:        "int2[]",
 46 | 	pgtype.Int4ArrayOID:        "int4[]",
 47 | 	pgtype.TextArrayOID:        "text[]",
 48 | 	pgtype.ByteaArrayOID:       "bytea[]",
 49 | 	pgtype.Int8ArrayOID:        "int8[]",
 50 | 	pgtype.Float4ArrayOID:      "float4[]",
 51 | 	pgtype.Float8ArrayOID:      "float8[]",
 52 | 	pgtype.BPCharOID:           "bpchar",
 53 | 	pgtype.VarcharOID:          "varchar",
 54 | 	pgtype.DateOID:             "date",
 55 | 	pgtype.TimeOID:             "time",
 56 | 	pgtype.TimestampOID:        "timestamp",
 57 | 	pgtype.TimestampArrayOID:   "timestamp[]",
 58 | 	pgtype.DateArrayOID:        "date[]",
 59 | 	pgtype.TimestamptzOID:      "timestamptz",
 60 | 	pgtype.TimestamptzArrayOID: "timestamptz[]",
 61 | 	pgtype.IntervalOID:         "interval",
 62 | 	pgtype.NumericArrayOID:     "numeric[]",
 63 | 	pgtype.BitOID:              "bit",
 64 | 	pgtype.VarbitOID:           "varbit",
 65 | 	pgtype.NumericOID:          "numeric",
 66 | 	pgtype.UUIDOID:             "uuid",
 67 | 	pgtype.UUIDArrayOID:        "uuid[]",
 68 | 	pgtype.JSONBOID:            "jsonb",
 69 | 	pgtype.JSONBArrayOID:       "jsonb[]",
 70 | }
 71 | 
 72 | // OIDToString converts a PostgreSQL OID to its string representation
 73 | func OIDToString(oid uint32) string {
 74 | 	if typeName, ok := OidToTypeName[oid]; ok {
 75 | 		return typeName
 76 | 	}
 77 | 	return fmt.Sprintf("unknown_%d", oid)
 78 | }
 79 | 
 80 | // StringToOID converts a type name to its PostgreSQL OID
 81 | func StringToOID(typeName string) uint32 {
 82 | 	for oid, name := range OidToTypeName {
 83 | 		if name == typeName {
 84 | 			return oid
 85 | 		}
 86 | 	}
 87 | 	return 0
 88 | }
 89 | 
 90 | // ToInt64 converts an interface{} to int64
 91 | func ToInt64(v interface{}) (int64, bool) {
 92 | 	switch v := v.(type) {
 93 | 	case int, int8, int16, int32, int64:
 94 | 		return reflect.ValueOf(v).Int(), true
 95 | 	case uint, uint8, uint16, uint32, uint64:
 96 | 		return int64(reflect.ValueOf(v).Uint()), true
 97 | 	case string:
 98 | 		if i, err := strconv.ParseInt(v, 10, 64); err == nil {
 99 | 			return i, true
100 | 		}
101 | 	}
102 | 	return 0, false
103 | }
104 | 
105 | // ToFloat64 converts an interface{} to float64
106 | func ToFloat64(v interface{}) (float64, bool) {
107 | 	switch v := v.(type) {
108 | 	case int, int8, int16, int32, int64:
109 | 		return float64(reflect.ValueOf(v).Int()), true
110 | 	case uint, uint8, uint16, uint32, uint64:
111 | 		return float64(reflect.ValueOf(v).Uint()), true
112 | 	case float32, float64:
113 | 		return reflect.ValueOf(v).Float(), true
114 | 	case string:
115 | 		if f, err := strconv.ParseFloat(v, 64); err == nil {
116 | 			return f, true
117 | 		}
118 | 	}
119 | 	return 0, false
120 | }
121 | 
122 | // ToBool converts various types to bool
123 | func ToBool(v interface{}) (bool, bool) {
124 | 	switch v := v.(type) {
125 | 	case bool:
126 | 		return v, true
127 | 	case string:
128 | 		if v == "true" || v == "1" {
129 | 			return true, true
130 | 		}
131 | 		if v == "false" || v == "0" {
132 | 			return false, true
133 | 		}
134 | 	case int, int8, int16, int32, int64:
135 | 		return reflect.ValueOf(v).Int() != 0, true
136 | 	case uint, uint8, uint16, uint32, uint64:
137 | 		return reflect.ValueOf(v).Uint() != 0, true
138 | 	case float32, float64:
139 | 		return reflect.ValueOf(v).Float() != 0, true
140 | 	}
141 | 	return false, false
142 | }
143 | 
144 | // IsValid checks if the replication key is properly configured
145 | func (rk *ReplicationKey) IsValid() bool {
146 | 	if rk.Type == ReplicationKeyFull {
147 | 		return true // FULL doesn't require specific columns
148 | 	}
149 | 
150 | 	return len(rk.Columns) > 0 &&
151 | 		(rk.Type == ReplicationKeyPK || rk.Type == ReplicationKeyUnique)
152 | }
153 | 


--------------------------------------------------------------------------------
/pkg/utils/shared_types.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | // OperationType represents the type of database operation
 4 | type OperationType string
 5 | 
 6 | const (
 7 | 	OperationInsert OperationType = "INSERT"
 8 | 	OperationUpdate OperationType = "UPDATE"
 9 | 	OperationDelete OperationType = "DELETE"
10 | 	OperationDDL    OperationType = "DDL"
11 | )
12 | 
13 | // ReplicationKeyType represents the type of replication key
14 | type ReplicationKeyType string
15 | 
16 | const (
17 | 	ReplicationKeyPK     ReplicationKeyType = "PRIMARY KEY"
18 | 	ReplicationKeyUnique ReplicationKeyType = "UNIQUE"
19 | 	ReplicationKeyFull   ReplicationKeyType = "FULL" // Replica identity full
20 | )
21 | 
22 | // ReplicationKey represents a key used for replication (either PK or unique constraint)
23 | type ReplicationKey struct {
24 | 	Type    ReplicationKeyType
25 | 	Columns []string
26 | }
27 | 
28 | type Logger interface {
29 | 	Debug() LogEvent
30 | 	Info() LogEvent
31 | 	Warn() LogEvent
32 | 	Error() LogEvent
33 | 	Err(err error) LogEvent
34 | }
35 | 
36 | type LogEvent interface {
37 | 	Str(key, val string) LogEvent
38 | 	Int(key string, val int) LogEvent
39 | 	Int64(key string, val int64) LogEvent
40 | 	Uint8(key string, val uint8) LogEvent
41 | 	Uint32(key string, val uint32) LogEvent
42 | 	Interface(key string, val interface{}) LogEvent
43 | 	Err(err error) LogEvent
44 | 	Strs(key string, vals []string) LogEvent
45 | 	Any(key string, val interface{}) LogEvent
46 | 	Type(key string, val interface{}) LogEvent
47 | 	Msg(msg string)
48 | 	Msgf(format string, v ...interface{})
49 | }
50 | 


--------------------------------------------------------------------------------
/pkg/utils/zerolog_logger.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"github.com/rs/zerolog"
 5 | )
 6 | 
 7 | type ZerologLogger struct {
 8 | 	logger zerolog.Logger
 9 | }
10 | 
11 | func NewZerologLogger(logger zerolog.Logger) Logger {
12 | 	return &ZerologLogger{logger: logger}
13 | }
14 | 
15 | type ZerologLogEvent struct {
16 | 	event *zerolog.Event
17 | }
18 | 
19 | func (z *ZerologLogger) Debug() LogEvent {
20 | 	return &ZerologLogEvent{event: z.logger.Debug()}
21 | }
22 | 
23 | func (z *ZerologLogger) Info() LogEvent {
24 | 	return &ZerologLogEvent{event: z.logger.Info()}
25 | }
26 | 
27 | func (z *ZerologLogger) Warn() LogEvent {
28 | 	return &ZerologLogEvent{event: z.logger.Warn()}
29 | }
30 | 
31 | func (z *ZerologLogger) Error() LogEvent {
32 | 	return &ZerologLogEvent{event: z.logger.Error()}
33 | }
34 | 
35 | func (z *ZerologLogger) Err(err error) LogEvent {
36 | 	return &ZerologLogEvent{event: z.logger.Err(err)}
37 | }
38 | 
39 | func (e *ZerologLogEvent) Str(key, val string) LogEvent {
40 | 	e.event = e.event.Str(key, val)
41 | 	return e
42 | }
43 | 
44 | func (e *ZerologLogEvent) Int(key string, val int) LogEvent {
45 | 	e.event = e.event.Int(key, val)
46 | 	return e
47 | }
48 | 
49 | func (e *ZerologLogEvent) Int64(key string, val int64) LogEvent {
50 | 	e.event = e.event.Int64(key, val)
51 | 	return e
52 | }
53 | 
54 | func (e *ZerologLogEvent) Uint32(key string, val uint32) LogEvent {
55 | 	e.event = e.event.Uint32(key, val)
56 | 	return e
57 | }
58 | 
59 | func (e *ZerologLogEvent) Interface(key string, val interface{}) LogEvent {
60 | 	e.event = e.event.Interface(key, val)
61 | 	return e
62 | }
63 | 
64 | func (e *ZerologLogEvent) Err(err error) LogEvent {
65 | 	e.event = e.event.Err(err)
66 | 	return e
67 | }
68 | 
69 | func (e *ZerologLogEvent) Msg(msg string) {
70 | 	e.event.Msg(msg)
71 | }
72 | 
73 | func (e *ZerologLogEvent) Msgf(format string, v ...interface{}) {
74 | 	e.event.Msgf(format, v...)
75 | }
76 | 
77 | func (e *ZerologLogEvent) Strs(key string, vals []string) LogEvent {
78 | 	e.event = e.event.Strs(key, vals)
79 | 	return e
80 | }
81 | 
82 | func (e *ZerologLogEvent) Any(key string, val interface{}) LogEvent {
83 | 	e.event = e.event.Interface(key, val)
84 | 	return e
85 | }
86 | 
87 | func (e *ZerologLogEvent) Uint8(key string, val uint8) LogEvent {
88 | 	e.event = e.event.Uint8(key, val)
89 | 	return e
90 | }
91 | 
92 | func (e *ZerologLogEvent) Type(key string, val interface{}) LogEvent {
93 | 	e.event = e.event.Type(key, val)
94 | 	return e
95 | }
96 | 


--------------------------------------------------------------------------------
/pkg/worker/worker.go:
--------------------------------------------------------------------------------
  1 | package worker
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	"github.com/nats-io/nats.go" // Use the standard NATS package
 12 | 	"github.com/pgflo/pg_flo/pkg/pgflonats"
 13 | 	"github.com/pgflo/pg_flo/pkg/routing"
 14 | 	"github.com/pgflo/pg_flo/pkg/rules"
 15 | 	"github.com/pgflo/pg_flo/pkg/sinks"
 16 | 	"github.com/pgflo/pg_flo/pkg/utils"
 17 | 	"github.com/rs/zerolog"
 18 | 	"github.com/rs/zerolog/log"
 19 | )
 20 | 
 21 | // Worker represents a worker that processes messages from NATS.
 22 | type Worker struct {
 23 | 	natsClient     *pgflonats.NATSClient
 24 | 	ruleEngine     *rules.RuleEngine
 25 | 	router         *routing.Router
 26 | 	sink           sinks.Sink
 27 | 	group          string
 28 | 	logger         utils.Logger
 29 | 	batchSize      int
 30 | 	buffer         []*utils.CDCMessage
 31 | 	lastSavedState uint64
 32 | 	flushInterval  time.Duration
 33 | 	shutdownCh     chan struct{}
 34 | 	wg             sync.WaitGroup
 35 | }
 36 | 
 37 | // Option is a function type that modifies Worker configuration
 38 | type Option func(*Worker)
 39 | 
 40 | // WithBatchSize sets the batch size for the worker
 41 | func WithBatchSize(size int) Option {
 42 | 	return func(w *Worker) {
 43 | 		w.batchSize = size
 44 | 	}
 45 | }
 46 | 
 47 | func init() {
 48 | 	log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout, TimeFormat: "15:04:05.000"})
 49 | 	zerolog.TimeFieldFormat = "2006-01-02T15:04:05.000Z07:00"
 50 | }
 51 | 
 52 | // NewWorker creates and returns a new Worker instance with the provided NATS client
 53 | func NewWorker(natsClient *pgflonats.NATSClient, ruleEngine *rules.RuleEngine, router *routing.Router, sink sinks.Sink, group string, opts ...Option) *Worker {
 54 | 	logger := utils.NewZerologLogger(log.With().Str("component", "worker").Logger())
 55 | 
 56 | 	w := &Worker{
 57 | 		natsClient:     natsClient,
 58 | 		ruleEngine:     ruleEngine,
 59 | 		router:         router,
 60 | 		sink:           sink,
 61 | 		group:          group,
 62 | 		logger:         logger,
 63 | 		batchSize:      1000,
 64 | 		buffer:         make([]*utils.CDCMessage, 0, 1000),
 65 | 		lastSavedState: 0,
 66 | 		flushInterval:  500 * time.Millisecond,
 67 | 		shutdownCh:     make(chan struct{}),
 68 | 	}
 69 | 
 70 | 	for _, opt := range opts {
 71 | 		opt(w)
 72 | 	}
 73 | 	w.buffer = make([]*utils.CDCMessage, 0, w.batchSize)
 74 | 
 75 | 	return w
 76 | }
 77 | 
 78 | // Start begins the worker's message processing loop, setting up the NATS consumer and processing messages.
 79 | func (w *Worker) Start(ctx context.Context) error {
 80 | 	stream := fmt.Sprintf("pgflo_%s_stream", w.group)
 81 | 	subject := fmt.Sprintf("pgflo.%s", w.group)
 82 | 
 83 | 	w.logger.Info().
 84 | 		Str("stream", stream).
 85 | 		Str("subject", subject).
 86 | 		Str("group", w.group).
 87 | 		Msg("Starting worker")
 88 | 
 89 | 	js := w.natsClient.JetStream()
 90 | 
 91 | 	consumerName := fmt.Sprintf("pgflo_%s_consumer", w.group)
 92 | 
 93 | 	consumerConfig := &nats.ConsumerConfig{
 94 | 		Durable:       consumerName,
 95 | 		FilterSubject: subject,
 96 | 		AckPolicy:     nats.AckExplicitPolicy,
 97 | 		MaxDeliver:    1,
 98 | 		AckWait:       25 * time.Minute,
 99 | 		DeliverPolicy: nats.DeliverAllPolicy,
100 | 	}
101 | 
102 | 	_, err := js.AddConsumer(stream, consumerConfig)
103 | 	if err != nil && !errors.Is(err, nats.ErrConsumerNameAlreadyInUse) {
104 | 		w.logger.Error().Err(err).Msg("Failed to add or update consumer")
105 | 		return fmt.Errorf("failed to add or update consumer: %w", err)
106 | 	}
107 | 
108 | 	sub, err := js.PullSubscribe(subject, consumerName)
109 | 	if err != nil {
110 | 		w.logger.Error().Err(err).Msg("Failed to subscribe to subject")
111 | 		return fmt.Errorf("failed to subscribe to subject: %w", err)
112 | 	}
113 | 
114 | 	w.wg.Add(1)
115 | 	go func() {
116 | 		defer w.wg.Done()
117 | 		if err := w.processMessages(ctx, sub); err != nil && err != context.Canceled {
118 | 			w.logger.Error().Err(err).Msg("Error processing messages")
119 | 		}
120 | 	}()
121 | 
122 | 	<-ctx.Done()
123 | 	w.logger.Info().Msg("Received shutdown signal. Initiating graceful shutdown...")
124 | 
125 | 	w.wg.Wait()
126 | 	w.logger.Debug().Msg("All goroutines finished")
127 | 
128 | 	return w.flushBuffer()
129 | }
130 | 
131 | // processMessages continuously processes messages from the NATS consumer.
132 | func (w *Worker) processMessages(ctx context.Context, sub *nats.Subscription) error {
133 | 	flushTicker := time.NewTicker(w.flushInterval)
134 | 	defer flushTicker.Stop()
135 | 
136 | 	for {
137 | 		select {
138 | 		case <-ctx.Done():
139 | 			w.logger.Info().Msg("Flushing remaining messages")
140 | 			return w.flushBuffer()
141 | 		case <-flushTicker.C:
142 | 			if err := w.flushBuffer(); err != nil {
143 | 				w.logger.Error().Err(err).Msg("Failed to flush buffer on interval")
144 | 			}
145 | 		default:
146 | 			msgs, err := sub.Fetch(10, nats.MaxWait(500*time.Millisecond))
147 | 			if err != nil && !errors.Is(err, nats.ErrTimeout) {
148 | 				w.logger.Error().Err(err).Msg("Error fetching messages")
149 | 				continue
150 | 			}
151 | 
152 | 			for _, msg := range msgs {
153 | 				if err := w.processMessage(msg); err != nil {
154 | 					w.logger.Error().Err(err).Msg("Failed to process message")
155 | 				}
156 | 				if err := msg.Ack(); err != nil {
157 | 					w.logger.Error().Err(err).Msg("Failed to acknowledge message")
158 | 				}
159 | 			}
160 | 			if len(w.buffer) >= w.batchSize {
161 | 				if err := w.flushBuffer(); err != nil {
162 | 					w.logger.Error().Err(err).Msg("Failed to flush buffer")
163 | 				}
164 | 			}
165 | 		}
166 | 	}
167 | }
168 | 
169 | // processMessage handles a single message, applying rules, writing to the sink, and updating the last processed sequence.
170 | func (w *Worker) processMessage(msg *nats.Msg) error {
171 | 	metadata, err := msg.Metadata()
172 | 	if err != nil {
173 | 		w.logger.Error().Err(err).Msg("Failed to get message metadata")
174 | 		return err
175 | 	}
176 | 
177 | 	var cdcMessage utils.CDCMessage
178 | 	err = cdcMessage.UnmarshalBinary(msg.Data)
179 | 	if err != nil {
180 | 		w.logger.Error().Err(err).Msg("Failed to unmarshal message")
181 | 		return err
182 | 	}
183 | 
184 | 	if w.ruleEngine != nil {
185 | 		processedMessage, err := w.ruleEngine.ApplyRules(&cdcMessage)
186 | 		if err != nil {
187 | 			w.logger.Error().Err(err).Msg("Failed to apply rules")
188 | 			return err
189 | 		}
190 | 		if processedMessage == nil {
191 | 			w.logger.Debug().Msg("Message filtered out by rules")
192 | 			return nil
193 | 		}
194 | 		cdcMessage = *processedMessage
195 | 	}
196 | 
197 | 	if w.router != nil {
198 | 		routedMessage, err := w.router.ApplyRouting(&cdcMessage)
199 | 		if err != nil {
200 | 			w.logger.Error().Err(err).Msg("Failed to apply routing")
201 | 			return err
202 | 		}
203 | 		if routedMessage == nil {
204 | 			w.logger.Debug().Msg("Message filtered out by routing")
205 | 			return nil
206 | 		}
207 | 		cdcMessage = *routedMessage
208 | 	}
209 | 
210 | 	w.buffer = append(w.buffer, &cdcMessage)
211 | 	w.lastSavedState = metadata.Sequence.Stream
212 | 
213 | 	return nil
214 | }
215 | 
216 | // flushBuffer writes the buffered messages to the sink and updates the last processed sequence.
217 | func (w *Worker) flushBuffer() error {
218 | 	if len(w.buffer) == 0 {
219 | 		return nil
220 | 	}
221 | 
222 | 	w.logger.Debug().
223 | 		Int("messages", len(w.buffer)).
224 | 		Int("batch_size", w.batchSize).
225 | 		Msg("Flushing buffer")
226 | 
227 | 	err := w.sink.WriteBatch(w.buffer)
228 | 	if err != nil {
229 | 		w.logger.Error().Err(err).Msg("Failed to write batch to sink")
230 | 		return err
231 | 	}
232 | 
233 | 	state, err := w.natsClient.GetState()
234 | 	if err != nil {
235 | 		w.logger.Error().Err(err).Msg("Failed to get current state")
236 | 		return err
237 | 	}
238 | 
239 | 	state.LastProcessedSeq[w.group] = w.lastSavedState
240 | 	if err := w.natsClient.SaveState(state); err != nil {
241 | 		w.logger.Error().Err(err).Msg("Failed to save state")
242 | 		return err
243 | 	}
244 | 
245 | 	w.buffer = w.buffer[:0]
246 | 	return nil
247 | }
248 | 


--------------------------------------------------------------------------------