├── .github
├── dependabot.yml
└── workflows
│ └── build.yml
├── .gitignore
├── .golangci.yml
├── .goreleaser.yaml
├── .pre-commit-config.yaml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── brand-kit
├── README.md
├── banner
│ ├── pgstream-banner.svg
│ └── pgstream-banner@2x.png
└── logo
│ ├── symbol
│ ├── pgstream-black-symbol.svg
│ └── pgstream-white-symbol.svg
│ └── wordmark
│ ├── pgstream-black-wordmark.svg
│ └── pgstream-white-wordmark.svg
├── build
└── docker
│ ├── docker-compose-signoz.yml
│ ├── docker-compose.yml
│ ├── postgres
│ ├── Dockerfile
│ └── postgresql.conf
│ └── signoz
│ ├── clickhouse
│ ├── cluster.xml
│ ├── config.xml
│ ├── custom-function.xml
│ ├── storage.xml
│ ├── user_scripts
│ │ └── histogramQuantile
│ └── users.xml
│ ├── dashboards
│ └── pgstream.json
│ ├── otel-collector-config.yaml
│ ├── otel-collector-opamp-config.yaml
│ └── prometheus.yml
├── cli-definition.json
├── cmd
├── config
│ ├── config.go
│ ├── config_env.go
│ ├── config_env_test.go
│ ├── config_yaml.go
│ ├── config_yaml_test.go
│ ├── helper_test.go
│ └── test
│ │ ├── test_config.env
│ │ ├── test_config.yaml
│ │ └── test_transformer_rules.yaml
├── init_cmd.go
├── root_cmd.go
├── run_cmd.go
├── snapshot_cmd.go
└── status_cmd.go
├── config_template.yaml
├── docs
├── README.md
├── img
│ ├── pgstream_arch_v1.png
│ ├── pgstream_diagram_v2.svg
│ ├── pgstream_diagram_v2_kafka.svg
│ ├── pgstream_snapshot_diagram.svg
│ ├── pgstream_snapshot_sequence.svg
│ ├── pgstream_transformer_diagram.svg
│ ├── pgstream_tutorial_kafka.svg
│ ├── pgstream_tutorial_pg2os.svg
│ ├── pgstream_tutorial_pg2pg.svg
│ ├── pgstream_tutorial_pg2webhooks.svg
│ ├── pgstream_tutorial_snapshot2pg.svg
│ └── pgstream_tutorial_transformer.svg
└── tutorials
│ ├── kafka2os_tutorial.env
│ ├── kafka2os_tutorial.yaml
│ ├── kafka2pg_tutorial.env
│ ├── kafka2pg_tutorial.yaml
│ ├── pg2kafka_tutorial.env
│ ├── pg2kafka_tutorial.yaml
│ ├── pg2os_tutorial.env
│ ├── pg2os_tutorial.yaml
│ ├── pg2pg_transformer_tutorial.env
│ ├── pg2pg_transformer_tutorial.yaml
│ ├── pg2pg_tutorial.env
│ ├── pg2pg_tutorial.yaml
│ ├── pg2webhook_tutorial.env
│ ├── pg2webhook_tutorial.yaml
│ ├── postgres_kafka.md
│ ├── postgres_snapshot.md
│ ├── postgres_to_opensearch.md
│ ├── postgres_to_postgres.md
│ ├── postgres_to_webhooks.md
│ ├── postgres_transformer.md
│ ├── snapshot2pg_tutorial.env
│ ├── snapshot2pg_tutorial.yaml
│ └── tutorial_transformer_rules.yaml
├── go.mod
├── go.sum
├── internal
├── http
│ ├── http.go
│ └── mocks
│ │ └── mock_http_client.go
├── json
│ └── json.go
├── log
│ └── zerolog
│ │ └── zerolog.go
├── postgres
│ ├── errors.go
│ ├── instrumentation
│ │ ├── instrumented_pg_dump_restore.go
│ │ ├── instrumented_querier.go
│ │ ├── instrumented_querier_builder.go
│ │ └── instrumented_tx.go
│ ├── mocks
│ │ ├── mock_pg_mapper.go
│ │ ├── mock_pg_querier.go
│ │ ├── mock_pg_replication_conn.go
│ │ ├── mock_row.go
│ │ ├── mock_rows.go
│ │ └── mock_tx.go
│ ├── pg_conn.go
│ ├── pg_conn_pool.go
│ ├── pg_dump.go
│ ├── pg_dump_pg_restore_integration_test.go
│ ├── pg_mapper.go
│ ├── pg_querier.go
│ ├── pg_querier_builder.go
│ ├── pg_replication_conn.go
│ ├── pg_restore.go
│ ├── pg_test.go
│ ├── pg_tx.go
│ ├── pg_utils.go
│ └── pg_utils_test.go
├── profiling
│ └── profiling.go
├── searchstore
│ ├── elasticsearch
│ │ ├── elasticsearch_client.go
│ │ └── elasticsearch_mapper.go
│ ├── mocks
│ │ ├── mock_client.go
│ │ └── mock_mapper.go
│ ├── opensearch
│ │ ├── opensearch_client.go
│ │ └── opensearch_mapper.go
│ ├── search_api.go
│ ├── search_client.go
│ ├── search_errors.go
│ └── search_mapper.go
├── sync
│ ├── mocks
│ │ └── mock_weighted_semaphore.go
│ └── semaphore.go
└── testcontainers
│ ├── test_elasticsearch_container.go
│ ├── test_kafka_container.go
│ ├── test_opensearch_container.go
│ └── test_postgres_container.go
├── kafka2os.env
├── kafka2os.yaml
├── license-header.txt
├── main.go
├── migrations
└── postgres
│ ├── 1_create_pgstream_xid.down.sql
│ ├── 1_create_pgstream_xid.up.sql
│ ├── 2_create_pgstream_schemalog_table.down.sql
│ ├── 2_create_pgstream_schemalog_table.up.sql
│ ├── 3_create_pgstream_tableids_table.down.sql
│ ├── 3_create_pgstream_tableids_table.up.sql
│ ├── 4_create_pgstream_get_schema_function.down.sql
│ ├── 4_create_pgstream_get_schema_function.up.sql
│ ├── 5_create_pgstream_log_schema_function.down.sql
│ ├── 5_create_pgstream_log_schema_function.up.sql
│ ├── 6_create_pgstream_refresh_schema_function.down.sql
│ ├── 6_create_pgstream_refresh_schema_function.up.sql
│ ├── 7_create_pgstream_event_triggers.down.sql
│ ├── 7_create_pgstream_event_triggers.up.sql
│ └── migrations.go
├── pg2kafka.env
├── pg2kafka.yaml
├── pg2os.env
├── pg2os.yaml
├── pg2pg.env
├── pg2pg.yaml
├── pg2webhook.env
├── pg2webhook.yaml
├── pkg
├── backoff
│ ├── backoff.go
│ └── mocks
│ │ └── mock_backoff.go
├── kafka
│ ├── config.go
│ ├── conn.go
│ ├── instrumentation
│ │ ├── instrumented_kafka_reader.go
│ │ └── instrumented_kafka_writer.go
│ ├── kafka_offset_parser.go
│ ├── kafka_offset_parser_test.go
│ ├── kafka_reader.go
│ ├── kafka_writer.go
│ ├── log.go
│ └── mocks
│ │ ├── mock_kafka_parser.go
│ │ ├── mock_kafka_reader.go
│ │ └── mock_kafka_writer.go
├── log
│ ├── logger.go
│ └── zerolog
│ │ └── logger.go
├── otel
│ ├── config.go
│ ├── otel_instrumentation.go
│ ├── otel_provider.go
│ ├── span.go
│ └── version.go
├── schemalog
│ ├── instrumentation
│ │ └── instrumented_store.go
│ ├── log_entry.go
│ ├── mocks
│ │ └── store_mock.go
│ ├── postgres
│ │ ├── helper_test.go
│ │ ├── pg_schemalog_store.go
│ │ └── pg_schemalog_store_test.go
│ ├── schema.go
│ ├── schema_diff.go
│ ├── schema_diff_test.go
│ ├── schema_test.go
│ ├── store.go
│ ├── store_cache.go
│ └── store_cache_test.go
├── snapshot
│ ├── errors.go
│ ├── errors_test.go
│ ├── generator
│ │ ├── helper_test.go
│ │ ├── instrumentation
│ │ │ └── instrumented_snapshot_generator.go
│ │ ├── mocks
│ │ │ └── mock_snapshot_generator.go
│ │ ├── postgres
│ │ │ ├── data
│ │ │ │ ├── config.go
│ │ │ │ ├── helper_test.go
│ │ │ │ ├── instrumented_table_snapshot_generator.go
│ │ │ │ ├── pg_snapshot_generator.go
│ │ │ │ ├── pg_snapshot_generator_integration_test.go
│ │ │ │ └── pg_snapshot_generator_test.go
│ │ │ ├── schema
│ │ │ │ ├── pgdumprestore
│ │ │ │ │ ├── helper_test.go
│ │ │ │ │ ├── snapshot_pg_dump_restore_generator.go
│ │ │ │ │ ├── snapshot_pg_dump_restore_generator_test.go
│ │ │ │ │ └── test
│ │ │ │ │ │ ├── test_dump.sql
│ │ │ │ │ │ ├── test_dump_constraints.sql
│ │ │ │ │ │ └── test_dump_filtered.sql
│ │ │ │ └── schemalog
│ │ │ │ │ ├── snapshot_schemalog_generator.go
│ │ │ │ │ └── snapshot_schemalog_generator_test.go
│ │ │ └── tablefinder
│ │ │ │ ├── instrumented_table_discovery.go
│ │ │ │ ├── pg_snapshot_table_finder.go
│ │ │ │ └── pg_snapshot_table_finder_test.go
│ │ ├── snapshot_generator.go
│ │ ├── snapshot_generator_recorder.go
│ │ └── snapshot_generator_recorder_test.go
│ ├── snapshot.go
│ └── store
│ │ ├── instrumentation
│ │ └── instrumented_snapshot_store.go
│ │ ├── mocks
│ │ └── mock_snapshot_store.go
│ │ ├── postgres
│ │ ├── pg_snapshot_store.go
│ │ └── pg_snapshot_store_test.go
│ │ └── snapshot_store.go
├── stream
│ ├── config.go
│ ├── helper_test.go
│ ├── integration
│ │ ├── config
│ │ │ └── postgresql.conf
│ │ ├── helper_test.go
│ │ ├── pg_kafka_integration_test.go
│ │ ├── pg_pg_integration_test.go
│ │ ├── pg_pg_integration_transformer_test.go
│ │ ├── pg_search_integration_test.go
│ │ ├── pg_webhook_integration_test.go
│ │ ├── setup_test.go
│ │ └── snapshot_pg_integration_test.go
│ ├── stream.go
│ ├── stream_init.go
│ ├── stream_run.go
│ ├── stream_snapshot.go
│ ├── stream_status.go
│ ├── stream_status_checker.go
│ ├── stream_status_checker_test.go
│ └── stream_status_test.go
├── tls
│ ├── test
│ │ ├── test.csr
│ │ ├── test.key
│ │ └── test.pem
│ ├── tls.go
│ └── tls_test.go
├── transformers
│ ├── builder
│ │ ├── transformer_builder.go
│ │ └── transformer_builder_test.go
│ ├── generators
│ │ ├── deterministic_bytes_generator.go
│ │ ├── generator.go
│ │ └── random_bytes_generator.go
│ ├── greenmask
│ │ ├── greenmask_boolean_transformer.go
│ │ ├── greenmask_boolean_transformer_test.go
│ │ ├── greenmask_choice_transformer.go
│ │ ├── greenmask_choice_transformer_test.go
│ │ ├── greenmask_date_transformer.go
│ │ ├── greenmask_date_transformer_test.go
│ │ ├── greenmask_firstname_transformer.go
│ │ ├── greenmask_firstname_transformer_test.go
│ │ ├── greenmask_float_transformer.go
│ │ ├── greenmask_float_transformer_test.go
│ │ ├── greenmask_integer_transformer.go
│ │ ├── greenmask_integer_transformer_test.go
│ │ ├── greenmask_string_transformer.go
│ │ ├── greenmask_string_transformer_test.go
│ │ ├── greenmask_timestamp_transformer.go
│ │ ├── greenmask_timestamp_transformer_test.go
│ │ ├── greenmask_transformer.go
│ │ ├── greenmask_unix_timestamp_transformer.go
│ │ ├── greenmask_unix_timestamp_transformer_test.go
│ │ ├── greenmask_uuid_transformer.go
│ │ └── greenmask_uuid_transformer_test.go
│ ├── instrumentation
│ │ └── instrumented_transformer.go
│ ├── literal_string_transformer.go
│ ├── literal_string_transformer_test.go
│ ├── masking_transformer.go
│ ├── masking_transformer_test.go
│ ├── mocks
│ │ ├── mock_builder.go
│ │ └── mock_transformer.go
│ ├── neosync
│ │ ├── neosync_email_transformer.go
│ │ ├── neosync_email_transformer_test.go
│ │ ├── neosync_firstname_transformer.go
│ │ ├── neosync_firstname_transformer_test.go
│ │ ├── neosync_fullname_transformer.go
│ │ ├── neosync_fullname_transformer_test.go
│ │ ├── neosync_lastname_transformer.go
│ │ ├── neosync_lastname_transformer_test.go
│ │ ├── neosync_string_transformer.go
│ │ ├── neosync_string_transformer_test.go
│ │ └── neosync_transformer.go
│ ├── phone_number_transformer.go
│ ├── phone_number_transformer_test.go
│ ├── string_transformer.go
│ ├── string_transformer_test.go
│ ├── template_transformer.go
│ ├── template_transformer_test.go
│ ├── transformer.go
│ └── transformer_test.go
└── wal
│ ├── checkpointer
│ ├── kafka
│ │ ├── wal_kafka_checkpointer.go
│ │ └── wal_kafka_checkpointer_test.go
│ ├── postgres
│ │ ├── helper_test.go
│ │ ├── wal_pg_checkpointer.go
│ │ └── wal_pg_checkpointer_test.go
│ └── wal_checkpointer.go
│ ├── listener
│ ├── kafka
│ │ ├── wal_kafka_reader.go
│ │ └── wal_kafka_reader_test.go
│ ├── postgres
│ │ ├── helper_test.go
│ │ ├── wal_pg_listener.go
│ │ └── wal_pg_listener_test.go
│ ├── snapshot
│ │ ├── adapter
│ │ │ ├── config.go
│ │ │ ├── config_test.go
│ │ │ ├── wal_process_event_adapter.go
│ │ │ ├── wal_process_event_adapter_test.go
│ │ │ ├── wal_snapshot_generator_adapter.go
│ │ │ └── wal_snapshot_generator_adapter_test.go
│ │ ├── builder
│ │ │ ├── config.go
│ │ │ └── wal_listener_snapshot_generator_builder.go
│ │ └── wal_snapshot_listener.go
│ └── wal_listener.go
│ ├── processor
│ ├── batch
│ │ ├── helper_test.go
│ │ ├── mocks
│ │ │ └── mock_batch_sender.go
│ │ ├── wal_batch.go
│ │ ├── wal_batch_sender.go
│ │ ├── wal_batch_sender_config.go
│ │ ├── wal_batch_sender_test.go
│ │ └── wal_message.go
│ ├── errors.go
│ ├── filter
│ │ ├── wal_filter.go
│ │ └── wal_filter_test.go
│ ├── injector
│ │ ├── helper_test.go
│ │ ├── wal_injector.go
│ │ └── wal_injector_test.go
│ ├── instrumentation
│ │ └── instrumented_wal_processor.go
│ ├── kafka
│ │ ├── config.go
│ │ ├── wal_kafka_batch_writer.go
│ │ └── wal_kafka_batch_writer_test.go
│ ├── mocks
│ │ └── mock_processor.go
│ ├── postgres
│ │ ├── config.go
│ │ ├── helper_test.go
│ │ ├── instrumented_wal_adapter.go
│ │ ├── postgres_batch_writer.go
│ │ ├── postgres_batch_writer_test.go
│ │ ├── postgres_bulk_ingest_writer.go
│ │ ├── postgres_bulk_ingest_writer_test.go
│ │ ├── postgres_query_msg.go
│ │ ├── postgres_wal_adapter.go
│ │ ├── postgres_wal_ddl_adapter.go
│ │ ├── postgres_wal_ddl_adapter_test.go
│ │ ├── postgres_wal_dml_adapter.go
│ │ ├── postgres_wal_dml_adapter_test.go
│ │ └── postgres_writer.go
│ ├── search
│ │ ├── config.go
│ │ ├── errors.go
│ │ ├── helper_test.go
│ │ ├── instrumentation
│ │ │ └── instrumented_search_store.go
│ │ ├── mocks
│ │ │ └── mock_search_mapper.go
│ │ ├── search_adapter.go
│ │ ├── search_adapter_test.go
│ │ ├── search_batch_indexer.go
│ │ ├── search_batch_indexer_test.go
│ │ ├── search_msg_batch.go
│ │ ├── search_store_retrier.go
│ │ ├── search_store_retrier_test.go
│ │ ├── store.go
│ │ └── store
│ │ │ ├── helper_test.go
│ │ │ ├── search_adapter.go
│ │ │ ├── search_index_name.go
│ │ │ ├── search_pg_mapper.go
│ │ │ ├── search_pg_mapper_test.go
│ │ │ ├── search_store.go
│ │ │ └── search_store_test.go
│ ├── transformer
│ │ ├── wal_postgres_transformer_parser.go
│ │ ├── wal_postgres_transformer_parser_test.go
│ │ ├── wal_transformer.go
│ │ ├── wal_transformer_parser.go
│ │ ├── wal_transformer_parser_test.go
│ │ ├── wal_transformer_rules.go
│ │ └── wal_transformer_test.go
│ ├── wal_processor.go
│ ├── wal_processor_test.go
│ └── webhook
│ │ ├── notifier
│ │ ├── config.go
│ │ ├── helper_test.go
│ │ ├── webhook_notifier.go
│ │ ├── webhook_notifier_test.go
│ │ └── webhook_notify_msg.go
│ │ ├── subscription
│ │ ├── server
│ │ │ ├── config.go
│ │ │ ├── subscription_server.go
│ │ │ └── subscription_server_test.go
│ │ ├── store
│ │ │ ├── cache
│ │ │ │ ├── config.go
│ │ │ │ ├── helper_test.go
│ │ │ │ ├── subscription_store_cache.go
│ │ │ │ └── subscription_store_cache_test.go
│ │ │ ├── mocks
│ │ │ │ └── mock_subscription_store.go
│ │ │ ├── postgres
│ │ │ │ ├── pg_subscription_store.go
│ │ │ │ └── pg_subscription_store_test.go
│ │ │ └── subscription_store.go
│ │ ├── subscription.go
│ │ └── subscription_test.go
│ │ └── webhook.go
│ ├── replication
│ ├── instrumentation
│ │ └── instrumented_replication_handler.go
│ ├── mocks
│ │ ├── mock_replication_handler.go
│ │ └── mock_replication_lsn_parser.go
│ ├── postgres
│ │ ├── helper_test.go
│ │ ├── pg_lsn_parser.go
│ │ ├── pg_replication_handler.go
│ │ └── pg_replication_handler_test.go
│ └── replication_handler.go
│ └── wal_data.go
├── snapshot2pg.env
├── snapshot2pg.yaml
├── tools
├── build-cli-definition.go
├── transformer-definition
│ └── build-transformers-definition.go
└── webhook
│ └── webhook_server.go
├── transformer_rules.yaml
└── transformers-definition.json
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "gomod"
9 | directory: "/"
10 | schedule:
11 | interval: "weekly"
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/license-header-checker
2 | pgstream
3 | tools/webhook/webhook
4 |
5 | *.prof
6 |
7 | coverage
8 |
9 | # misc
10 | .DS_Store
11 |
12 | .idea/
13 |
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | version: "2"
2 | run:
3 | go: "1.24"
4 | linters:
5 | default: none
6 | enable:
7 | - copyloopvar
8 | - errorlint
9 | - forbidigo
10 | - forcetypeassert
11 | - gochecksumtype
12 | - goconst
13 | - gocritic
14 | - gosec
15 | - govet
16 | - ineffassign
17 | - makezero
18 | - misspell
19 | - nakedret
20 | - nolintlint
21 | - prealloc
22 | - staticcheck
23 | - unused
24 | settings:
25 | errorlint:
26 | errorf: true
27 | forbidigo:
28 | forbid:
29 | - pattern: fmt.Print*
30 | goconst:
31 | numbers: true
32 | gocritic:
33 | disabled-checks:
34 | - exitAfterDefer
35 | - ifElseChain
36 | - commentFormatting
37 | gomodguard:
38 | blocked:
39 | modules:
40 | - github.com/pkg/errors:
41 | recommendations:
42 | - errors
43 | - fmt
44 | reason: This package is deprecated, use `fmt.Errorf` with `%w` instead
45 | gosec:
46 | excludes:
47 | - G115 # Potential integer overflow when converting between integer types
48 | - G108 # Profiling endpoint automatically exposed on /debug/pprof
49 | severity: low
50 | makezero:
51 | always: false
52 | staticcheck:
53 | checks:
54 | - -SA1019
55 | - -ST1000
56 | - -ST1005
57 | - all
58 | exclusions:
59 | generated: lax
60 | presets:
61 | - comments
62 | - common-false-positives
63 | - legacy
64 | - std-error-handling
65 | rules:
66 | - linters:
67 | - goconst
68 | path: (.+)_test\.go
69 | paths:
70 | - third_party$
71 | - builtin$
72 | - examples$
73 | formatters:
74 | enable:
75 | - gofumpt
76 | settings:
77 | gofumpt:
78 | module-path: pgstream
79 | extra-rules: false
80 | exclusions:
81 | generated: lax
82 | paths:
83 | - third_party$
84 | - builtin$
85 | - examples$
86 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v4.4.0
6 | hooks:
7 | - id: trailing-whitespace
8 | - id: end-of-file-fixer
9 | - id: check-yaml
10 | - id: check-shebang-scripts-are-executable
11 | - id: check-executables-have-shebangs
12 | - id: check-merge-conflict
13 | - repo: https://github.com/golangci/golangci-lint
14 | rev: v2.0.2
15 | hooks:
16 | - id: golangci-lint-full
17 | args: ["--timeout=10m", "--config=.golangci.yml"]
18 | - repo: local
19 | hooks:
20 | - id: license-header-checker
21 | name: license header check
22 | language: system
23 | entry: make license-check
24 | pass_filenames: false
25 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM scratch
2 | COPY pgstream /usr/bin/pgstream
3 | ENTRYPOINT [ "/usr/bin/pgstream" ]
4 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: lint
2 | lint: ## Lint source code
3 | @echo "Linting source code..."
4 | @go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.0.2
5 | @golangci-lint run
6 |
7 | .PHONY: test
8 | test:
9 | @go test -coverprofile=coverage -timeout 10m -race -cover -failfast ./...
10 |
11 | .PHONY: integration-test
12 | integration-test:
13 | @PGSTREAM_INTEGRATION_TESTS=true go test -timeout 180s github.com/xataio/pgstream/pkg/stream/integration
14 |
15 | .PHONY: license-check
16 | license-check:
17 | @curl -s https://raw.githubusercontent.com/lluissm/license-header-checker/master/install.sh | bash
18 | @./bin/license-header-checker -a -r ./license-header.txt . go
19 |
20 | .PHONY: gen-migrations
21 | gen-migrations:
22 | @go install github.com/go-bindata/go-bindata/...
23 | @go-bindata -o migrations/postgres/migrations.go -pkg pgmigrations -ignore migrations.go -prefix "migrations/postgres/" migrations/postgres/
24 |
25 | .PHONY: generate
26 | generate:
27 | # Generate the cli-definition.json file
28 | go run tools/build-cli-definition.go
29 | go run tools/transformer-definition/build-transformers-definition.go
30 |
31 | GIT_COMMIT := $(shell git rev-parse --short HEAD)
32 | .PHONY: build
33 | build:
34 | @go build -ldflags "-X github.com/xataio/pgstream/cmd.Env=development -X github.com/xataio/pgstream/cmd.Version=$(GIT_COMMIT)" .
35 |
36 | .PHONY: build-linux-amd64
37 | build-linux-amd64:
38 | @GOOS=linux GOARCH=amd64 go build -ldflags "-X github.com/xataio/pgstream/cmd.Env=development -X github.com/xataio/pgstream/cmd.Version=$(GIT_COMMIT)" .
39 |
40 | .PHONY: build-linux-arm64
41 | build-linux-arm64:
42 | @GOOS=linux GOARCH=arm64 go build -ldflags "-X github.com/xataio/pgstream/cmd.Env=development -X github.com/xataio/pgstream/cmd.Version=$(GIT_COMMIT)" .
43 |
--------------------------------------------------------------------------------
/brand-kit/README.md:
--------------------------------------------------------------------------------
1 | ## Logos
2 |
3 | Our logo combines a symbol and wordmark. For dark backgrounds, use the inverted (white) logo, and for light backgrounds, use the black logo. Maintain the logo's aspect ratio, provide clear space, and avoid unauthorized modifications.
4 |
5 | | Symbol | Wordmark |
6 | |-------------------------------------------------------|-------------------------------------------------------------|
7 | |  |  |
8 | |  |  |
9 |
10 | ## Banner
11 |
12 | The project banner is a key branding element that can be prominently featured at the top of project documentation, websites, social media profiles, and even on swag stickers. It serves as a visual representation of our project's identity and can be used to create a strong connection with our brand. Ensure that the banner is displayed at its original size to maintain clarity and visibility across various applications.
13 |
14 | 
15 |
--------------------------------------------------------------------------------
/brand-kit/banner/pgstream-banner@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xataio/pgstream/4e5d92e02c61380bad5b9b75adb2f532e88b7675/brand-kit/banner/pgstream-banner@2x.png
--------------------------------------------------------------------------------
/build/docker/postgres/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM postgres:17.2
2 |
3 | RUN apt-get update \
4 | && apt-get install -y postgresql-17-wal2json \
5 |
--------------------------------------------------------------------------------
/build/docker/postgres/postgresql.conf:
--------------------------------------------------------------------------------
1 | # CONNECTION
2 | listen_addresses = '*'
3 |
4 | # MODULES
5 | shared_preload_libraries = 'wal2json'
6 |
7 | # REPLICATION
8 | wal_level = logical # minimal, archive, hot_standby, or logical (change requires restart)
9 | max_wal_senders = 4 # max number of walsender processes (change requires restart)
10 | #wal_keep_segments = 4 # in logfile segments, 16MB each; 0 disables
11 | #wal_sender_timeout = 60s # in milliseconds; 0 disables
12 | max_replication_slots = 4 # max number of replication slots (change requires restart)
13 |
--------------------------------------------------------------------------------
/build/docker/signoz/clickhouse/custom-function.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | executable
4 | histogramQuantile
5 | Float64
6 |
7 | Array(Float64)
8 | buckets
9 |
10 |
11 | Array(Float64)
12 | counts
13 |
14 |
15 | Float64
16 | quantile
17 |
18 | CSV
19 | ./histogramQuantile
20 |
21 |
22 |
--------------------------------------------------------------------------------
/build/docker/signoz/clickhouse/storage.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | 10485760
7 |
8 |
9 | s3
10 |
16 | https://BUCKET-NAME.s3-REGION-NAME.amazonaws.com/data/
17 | ACCESS-KEY-ID
18 | SECRET-ACCESS-KEY
19 |
21 |
22 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | default
32 |
33 |
34 | s3
35 | 0
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/build/docker/signoz/clickhouse/user_scripts/histogramQuantile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xataio/pgstream/4e5d92e02c61380bad5b9b75adb2f532e88b7675/build/docker/signoz/clickhouse/user_scripts/histogramQuantile
--------------------------------------------------------------------------------
/build/docker/signoz/otel-collector-opamp-config.yaml:
--------------------------------------------------------------------------------
1 | server_endpoint: ws://signoz:4320/v1/opamp
2 |
--------------------------------------------------------------------------------
/build/docker/signoz/prometheus.yml:
--------------------------------------------------------------------------------
1 | # my global config
2 | global:
3 | scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
5 | # scrape_timeout is set to the global default (10s).
6 |
7 | # Alertmanager configuration
8 | alerting:
9 | alertmanagers:
10 | - static_configs:
11 | - targets:
12 | - alertmanager:9093
13 |
14 | # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
15 | rule_files: []
16 | # - "first_rules.yml"
17 | # - "second_rules.yml"
18 | # - 'alerts.yml'
19 |
20 | # A scrape configuration containing exactly one endpoint to scrape:
21 | # Here it's Prometheus itself.
22 | scrape_configs: []
23 |
24 | remote_read:
25 | - url: tcp://clickhouse:9000/signoz_metrics
26 |
--------------------------------------------------------------------------------
/cmd/config/test/test_transformer_rules.yaml:
--------------------------------------------------------------------------------
1 | transformations:
2 | validation_mode: relaxed
3 | table_transformers:
4 | - schema: public
5 | table: test
6 | column_transformers:
7 | name:
8 | name: greenmask_firstname
9 | dynamic_parameters:
10 | gender:
11 | column: sex
12 |
--------------------------------------------------------------------------------
/docs/img/pgstream_arch_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xataio/pgstream/4e5d92e02c61380bad5b9b75adb2f532e88b7675/docs/img/pgstream_arch_v1.png
--------------------------------------------------------------------------------
/docs/tutorials/kafka2os_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_KAFKA_READER_SERVERS="localhost:9092"
3 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream
4 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_ID=pgstream-opensearch-consumer-group
5 |
6 | # Processor config
7 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200"
8 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=25
9 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s
10 |
--------------------------------------------------------------------------------
/docs/tutorials/kafka2os_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | kafka:
3 | servers: ["localhost:9092"]
4 | topic:
5 | name: "pgstream"
6 | consumer_group:
7 | id: "pgstream-opensearch-consumer-group"
8 | start_offset: "earliest" # options are earliest or latest
9 | target:
10 | search:
11 | engine: "opensearch" # options are elasticsearch or opensearch
12 | url: "http://admin:admin@localhost:9200" # URL of the search engine
13 | batch:
14 | timeout: 5000 # batch timeout in milliseconds
15 | size: 25 # number of messages in a batch
16 |
--------------------------------------------------------------------------------
/docs/tutorials/kafka2pg_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_KAFKA_READER_SERVERS="localhost:9092"
3 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream
4 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_ID=pgstream-postgres-consumer-group
5 |
6 | # Processor config
7 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable"
8 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25
9 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s
10 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
11 |
--------------------------------------------------------------------------------
/docs/tutorials/kafka2pg_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | kafka:
3 | servers: ["localhost:9092"]
4 | topic:
5 | name: "pgstream"
6 | consumer_group:
7 | id: "pgstream-postgres-consumer-group"
8 | start_offset: "earliest" # options are earliest or latest
9 | target:
10 | postgres:
11 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable"
12 | batch:
13 | timeout: 5000 # batch timeout in milliseconds
14 | size: 25 # number of messages in a batch
15 | schema_log_store_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
16 | disable_triggers: false # whether to disable triggers on the target database
17 | on_conflict_action: "nothing" # options are update, nothing or error
18 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2kafka_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot
4 |
5 | # Processor config
6 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
7 | PGSTREAM_KAFKA_WRITER_SERVERS="localhost:9092"
8 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream
9 | PGSTREAM_KAFKA_TOPIC_PARTITIONS=1
10 | PGSTREAM_KAFKA_TOPIC_REPLICATION_FACTOR=1
11 | PGSTREAM_KAFKA_TOPIC_AUTO_CREATE=true
12 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2kafka_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: replication
5 | replication:
6 | replication_slot: "pgstream_tutorial_slot"
7 | target:
8 | kafka:
9 | servers: ["localhost:9092"]
10 | topic:
11 | name: "pgstream" # name of the Kafka topic
12 | partitions: 1 # number of partitions for the topic
13 | replication_factor: 1 # replication factor for the topic
14 | auto_create: true # whether to automatically create the topic if it doesn't exist
15 | modifiers:
16 | injector:
17 | enabled: true # whether to inject pgstream metadata into the WAL events
18 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2os_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot
4 |
5 | # Processor config
6 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
7 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200"
8 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=25
9 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s
10 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2os_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: replication
5 | replication:
6 | replication_slot: "pgstream_tutorial_slot"
7 | target:
8 | search:
9 | engine: "opensearch" # options are elasticsearch or opensearch
10 | url: "http://localhost:9200" # URL of the search engine
11 | batch:
12 | timeout: 5000 # batch timeout in milliseconds
13 | size: 25 # number of messages in a batch
14 | modifiers:
15 | injector:
16 | enabled: true # whether to inject pgstream metadata into the WAL events
17 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2pg_transformer_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot
4 |
5 | # Processor config
6 | PGSTREAM_TRANSFORMER_RULES_FILE="docs/tutorials/tutorial_transformer_rules.yaml"
7 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable"
8 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25
9 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s
10 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
11 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2pg_transformer_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: replication # options are replication, snapshot or snapshot_and_replication
5 | replication:
6 | replication_slot: pgstream_tutorial_slot
7 |
8 | target:
9 | postgres:
10 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable"
11 | batch:
12 | timeout: 5000 # batch timeout in milliseconds
13 | size: 25 # number of messages in a batch
14 | disable_triggers: false # whether to disable triggers on the target database
15 | on_conflict_action: "nothing" # options are update, nothing or error
16 |
17 | modifiers:
18 | transformations:
19 | validation_mode: relaxed
20 | table_transformers:
21 | - schema: public
22 | table: test
23 | column_transformers:
24 | email:
25 | name: neosync_email
26 | parameters:
27 | preserve_length: true
28 | preserve_domain: true
29 | email_type: fullname
30 | name:
31 | name: greenmask_firstname
32 | parameters:
33 | generator: deterministic
34 | gender: Female
35 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2pg_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot
4 |
5 | # Processor config
6 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable"
7 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25
8 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s
9 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
10 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2pg_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: replication # options are replication, snapshot or snapshot_and_replication
5 | replication:
6 | replication_slot: pgstream_tutorial_slot
7 |
8 | target:
9 | postgres:
10 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable"
11 | batch:
12 | timeout: 5000 # batch timeout in milliseconds
13 | size: 25 # number of messages in a batch
14 | disable_triggers: false # whether to disable triggers on the target database
15 | on_conflict_action: "nothing" # options are update, nothing or error
16 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2webhook_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot
4 |
5 | # Processor config
6 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
7 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_ENABLED=true
8 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_REFRESH_INTERVAL="60s"
9 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
10 |
--------------------------------------------------------------------------------
/docs/tutorials/pg2webhook_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: replication # options are replication, snapshot or snapshot_and_replication
5 | replication:
6 | replication_slot: pgstream_tutorial_slot
7 |
8 | target:
9 | webhooks:
10 | subscriptions:
11 | store:
12 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the webhook subscriptions are stored
13 | cache:
14 | enabled: true # whether to enable caching for the subscription store
15 | refresh_interval: 60 # interval in seconds to refresh the cache
16 |
17 | modifiers:
18 | injector:
19 | enabled: true # whether to inject pgstream metadata into the WAL events
20 |
--------------------------------------------------------------------------------
/docs/tutorials/snapshot2pg_tutorial.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_SNAPSHOT_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*"
4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4
5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4
6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000
7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1
8 | PGSTREAM_POSTGRES_SNAPSHOT_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable"
9 |
10 | # Processor config
11 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable"
12 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25
13 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s
14 | PGSTREAM_POSTGRES_WRITER_DISABLE_TRIGGERS=true
15 | PGSTREAM_POSTGRES_WRITER_ON_CONFLICT_ACTION=nothing
16 |
--------------------------------------------------------------------------------
/docs/tutorials/snapshot2pg_tutorial.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: snapshot # options are replication, snapshot or snapshot_and_replication
5 | snapshot: # when mode is snapshot or snapshot_and_replication
6 | mode: full # options are data_and, schema or data
7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern
8 | recorder:
9 | repeatable_snapshots: true # whether to repeat snapshots that have already been taken
10 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded
11 | snapshot_workers: 4 # number of schemas to be snapshotted in parallel
12 | data: # when mode is full or data
13 | schema_workers: 4 # number of schema tables to be snapshotted in parallel
14 | table_workers: 4 # number of workers to snapshot a table in parallel
15 | batch_page_size: 1000 # number of pages to read per batch
16 | schema: # when mode is full or schema
17 | mode: pgdump_pgrestore # options are pgdump_pgrestore or schemalog
18 | pgdump_pgrestore:
19 | clean_target_db: false # whether to clean the target database before restoring
20 |
21 | target:
22 | postgres:
23 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable"
24 | batch:
25 | timeout: 5000 # batch timeout in milliseconds
26 | size: 25 # number of messages in a batch
27 | disable_triggers: false # whether to disable triggers on the target database
28 | on_conflict_action: "nothing" # options are update, nothing or error
29 |
--------------------------------------------------------------------------------
/docs/tutorials/tutorial_transformer_rules.yaml:
--------------------------------------------------------------------------------
1 | transformations:
2 | validation_mode: relaxed
3 | table_transformers:
4 | - schema: public
5 | table: test
6 | column_transformers:
7 | email:
8 | name: neosync_email
9 | parameters:
10 | preserve_length: true
11 | preserve_domain: true
12 | email_type: fullname
13 | name:
14 | name: greenmask_firstname
15 | parameters:
16 | generator: deterministic
17 | gender: Female
18 |
--------------------------------------------------------------------------------
/internal/http/http.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package http
4 |
5 | import (
6 | "context"
7 | "net/http"
8 | )
9 |
10 | type Client interface {
11 | Do(*http.Request) (*http.Response, error)
12 | }
13 |
14 | type Server interface {
15 | Start(address string) error
16 | Shutdown(context.Context) error
17 | }
18 |
--------------------------------------------------------------------------------
/internal/http/mocks/mock_http_client.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import "net/http"
6 |
7 | type Client struct {
8 | DoFn func(*http.Request) (*http.Response, error)
9 | }
10 |
11 | func (m *Client) Do(req *http.Request) (*http.Response, error) {
12 | return m.DoFn(req)
13 | }
14 |
--------------------------------------------------------------------------------
/internal/json/json.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package json
4 |
5 | import (
6 | json "github.com/bytedance/sonic"
7 | )
8 |
9 | func Unmarshal(b []byte, v any) error {
10 | return json.Unmarshal(b, v)
11 | }
12 |
13 | func Marshal(v any) ([]byte, error) {
14 | return json.Marshal(v)
15 | }
16 |
--------------------------------------------------------------------------------
/internal/postgres/errors.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "errors"
7 | "fmt"
8 | "strings"
9 |
10 | "github.com/jackc/pgx/v5"
11 | "github.com/jackc/pgx/v5/pgconn"
12 | )
13 |
14 | var (
15 | ErrConnTimeout = errors.New("connection timeout")
16 | ErrNoRows = errors.New("no rows")
17 | )
18 |
19 | type ErrRelationDoesNotExist struct {
20 | Details string
21 | }
22 |
23 | func (e *ErrRelationDoesNotExist) Error() string {
24 | return fmt.Sprintf("relation does not exist: %s", e.Details)
25 | }
26 |
27 | type ErrConstraintViolation struct {
28 | Details string
29 | }
30 |
31 | func (e *ErrConstraintViolation) Error() string {
32 | return fmt.Sprintf("constraint violation: %s", e.Details)
33 | }
34 |
35 | type ErrSyntaxError struct {
36 | Details string
37 | }
38 |
39 | func (e *ErrSyntaxError) Error() string {
40 | return fmt.Sprintf("syntax error: %s", e.Details)
41 | }
42 |
43 | type ErrDataException struct {
44 | Details string
45 | }
46 |
47 | func (e *ErrDataException) Error() string {
48 | return fmt.Sprintf("data exception: %s", e.Details)
49 | }
50 |
51 | type ErrRelationAlreadyExists struct {
52 | Details string
53 | }
54 |
55 | func (e *ErrRelationAlreadyExists) Error() string {
56 | return fmt.Sprintf("relation already exists: %v", e.Details)
57 | }
58 |
59 | func mapError(err error) error {
60 | if pgconn.Timeout(err) {
61 | return ErrConnTimeout
62 | }
63 |
64 | if errors.Is(err, pgx.ErrNoRows) {
65 | return ErrNoRows
66 | }
67 |
68 | var pgErr *pgconn.PgError
69 | if errors.As(err, &pgErr) {
70 | if pgErr.Code == "42P01" {
71 | return &ErrRelationDoesNotExist{
72 | Details: pgErr.Message,
73 | }
74 | }
75 | if pgErr.Code == "42601" {
76 | return &ErrSyntaxError{
77 | Details: pgErr.Message,
78 | }
79 | }
80 | // Class 22 — Data Exception
81 | if strings.HasPrefix(pgErr.Code, "22") {
82 | return &ErrDataException{
83 | Details: pgErr.Message,
84 | }
85 | }
86 | // Class 23 — Integrity Constraint Violation
87 | if strings.HasPrefix(pgErr.Code, "23") {
88 | return &ErrConstraintViolation{
89 | Details: pgErr.Message,
90 | }
91 | }
92 | }
93 |
94 | return err
95 | }
96 |
--------------------------------------------------------------------------------
/internal/postgres/instrumentation/instrumented_pg_dump_restore.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package instrumentation
4 |
5 | import (
6 | "context"
7 |
8 | pglib "github.com/xataio/pgstream/internal/postgres"
9 | "github.com/xataio/pgstream/pkg/otel"
10 | "go.opentelemetry.io/otel/attribute"
11 | "go.opentelemetry.io/otel/trace"
12 | )
13 |
14 | type PGDumpRestore struct {
15 | pgdumpFn pglib.PGDumpFn
16 | pgrestoreFn pglib.PGRestoreFn
17 | tracer trace.Tracer
18 | }
19 |
20 | func NewPGDumpFn(pgdumpFn pglib.PGDumpFn, instrumentation *otel.Instrumentation) pglib.PGDumpFn {
21 | pgdr := &PGDumpRestore{
22 | pgdumpFn: pgdumpFn,
23 | tracer: instrumentation.Tracer,
24 | }
25 | return pgdr.PGDump
26 | }
27 |
28 | func NewPGRestoreFn(pgrestoreFn pglib.PGRestoreFn, instrumentation *otel.Instrumentation) pglib.PGRestoreFn {
29 | pgdr := &PGDumpRestore{
30 | pgrestoreFn: pgrestoreFn,
31 | tracer: instrumentation.Tracer,
32 | }
33 | return pgdr.PGRestore
34 | }
35 |
36 | func (i *PGDumpRestore) PGDump(ctx context.Context, opts pglib.PGDumpOptions) (dump []byte, err error) {
37 | ctx, span := otel.StartSpan(ctx, i.tracer, "pgdump", trace.WithAttributes([]attribute.KeyValue{
38 | {Key: "schemas", Value: attribute.StringSliceValue(opts.Schemas)},
39 | {Key: "tables", Value: attribute.StringSliceValue(opts.Tables)},
40 | {Key: "exclude_tables", Value: attribute.StringSliceValue(opts.ExcludeTables)},
41 | {Key: "clean", Value: attribute.BoolValue(opts.Clean)},
42 | }...))
43 | defer otel.CloseSpan(span, err)
44 | return i.pgdumpFn(ctx, opts)
45 | }
46 |
47 | func (i *PGDumpRestore) PGRestore(ctx context.Context, opts pglib.PGRestoreOptions, dump []byte) (out string, err error) {
48 | ctx, span := otel.StartSpan(ctx, i.tracer, "pgrestore")
49 | defer otel.CloseSpan(span, err)
50 | return i.pgrestoreFn(ctx, opts, dump)
51 | }
52 |
--------------------------------------------------------------------------------
/internal/postgres/instrumentation/instrumented_querier_builder.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package instrumentation
4 |
5 | import (
6 | "context"
7 |
8 | pglib "github.com/xataio/pgstream/internal/postgres"
9 | "github.com/xataio/pgstream/pkg/otel"
10 | )
11 |
12 | func NewQuerierBuilder(b pglib.QuerierBuilder, i *otel.Instrumentation) (pglib.QuerierBuilder, error) {
13 | return func(ctx context.Context, url string) (pglib.Querier, error) {
14 | querier, err := b(ctx, url)
15 | if err != nil {
16 | return nil, err
17 | }
18 | return NewQuerier(querier, i)
19 | }, nil
20 | }
21 |
--------------------------------------------------------------------------------
/internal/postgres/instrumentation/instrumented_tx.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package instrumentation
4 |
5 | import (
6 | "context"
7 |
8 | pglib "github.com/xataio/pgstream/internal/postgres"
9 | "github.com/xataio/pgstream/pkg/otel"
10 | "go.opentelemetry.io/otel/trace"
11 | )
12 |
13 | type Tx struct {
14 | inner pglib.Tx
15 | tracer trace.Tracer
16 | }
17 |
18 | func NewTx(t pglib.Tx, instrumentation *otel.Instrumentation) pglib.Tx {
19 | if instrumentation == nil {
20 | return t
21 | }
22 |
23 | return &Tx{
24 | inner: t,
25 | tracer: instrumentation.Tracer,
26 | }
27 | }
28 |
29 | func (i *Tx) Query(ctx context.Context, query string, args ...any) (rows pglib.Rows, err error) {
30 | queryAttrs := queryAttributes(query)
31 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.Query", trace.WithAttributes(queryAttrs...))
32 | defer otel.CloseSpan(span, err)
33 | return i.inner.Query(ctx, query, args...)
34 | }
35 |
36 | func (i *Tx) QueryRow(ctx context.Context, query string, args ...any) pglib.Row {
37 | queryAttrs := queryAttributes(query)
38 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.QueryRow", trace.WithAttributes(queryAttrs...))
39 | defer otel.CloseSpan(span, nil)
40 | return i.inner.QueryRow(ctx, query, args...)
41 | }
42 |
43 | func (i *Tx) Exec(ctx context.Context, query string, args ...any) (tag pglib.CommandTag, err error) {
44 | queryAttrs := queryAttributes(query)
45 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.Exec", trace.WithAttributes(queryAttrs...))
46 | defer otel.CloseSpan(span, err)
47 | return i.inner.Exec(ctx, query, args...)
48 | }
49 |
50 | func (i *Tx) CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (rowCount int64, err error) {
51 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.CopyFrom")
52 | defer otel.CloseSpan(span, err)
53 | return i.inner.CopyFrom(ctx, tableName, columnNames, srcRows)
54 | }
55 |
--------------------------------------------------------------------------------
/internal/postgres/mocks/mock_pg_mapper.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | type Mapper struct {
6 | TypeForOIDFn func(oid uint32) (string, error)
7 | }
8 |
9 | func (m *Mapper) TypeForOID(oid uint32) (string, error) {
10 | return m.TypeForOIDFn(oid)
11 | }
12 |
--------------------------------------------------------------------------------
/internal/postgres/mocks/mock_pg_replication_conn.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/internal/postgres"
9 | )
10 |
11 | type ReplicationConn struct {
12 | IdentifySystemFn func(ctx context.Context) (postgres.IdentifySystemResult, error)
13 | StartReplicationFn func(ctx context.Context, cfg postgres.ReplicationConfig) error
14 | SendStandbyStatusUpdateFn func(ctx context.Context, lsn uint64) error
15 | ReceiveMessageFn func(ctx context.Context) (*postgres.ReplicationMessage, error)
16 | CloseFn func(ctx context.Context) error
17 | }
18 |
19 | func (m *ReplicationConn) IdentifySystem(ctx context.Context) (postgres.IdentifySystemResult, error) {
20 | return m.IdentifySystemFn(ctx)
21 | }
22 |
23 | func (m *ReplicationConn) StartReplication(ctx context.Context, cfg postgres.ReplicationConfig) error {
24 | return m.StartReplicationFn(ctx, cfg)
25 | }
26 |
27 | func (m *ReplicationConn) SendStandbyStatusUpdate(ctx context.Context, lsn uint64) error {
28 | return m.SendStandbyStatusUpdateFn(ctx, lsn)
29 | }
30 |
31 | func (m *ReplicationConn) ReceiveMessage(ctx context.Context) (*postgres.ReplicationMessage, error) {
32 | return m.ReceiveMessageFn(ctx)
33 | }
34 |
35 | func (m *ReplicationConn) Close(ctx context.Context) error {
36 | return m.CloseFn(ctx)
37 | }
38 |
--------------------------------------------------------------------------------
/internal/postgres/mocks/mock_row.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | type Row struct {
6 | ScanFn func(args ...any) error
7 | }
8 |
9 | func (m *Row) Scan(args ...any) error {
10 | return m.ScanFn(args...)
11 | }
12 |
--------------------------------------------------------------------------------
/internal/postgres/mocks/mock_rows.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "github.com/jackc/pgx/v5"
7 | "github.com/jackc/pgx/v5/pgconn"
8 | )
9 |
10 | type Rows struct {
11 | CloseFn func()
12 | ErrFn func() error
13 | FieldDescriptionsFn func() []pgconn.FieldDescription
14 | NextFn func(i uint) bool
15 | ScanFn func(dest ...any) error
16 | ValuesFn func() ([]any, error)
17 | RawValuesFn func() [][]byte
18 | nextCalls uint
19 | }
20 |
21 | func (m *Rows) Close() {
22 | m.CloseFn()
23 | }
24 |
25 | func (m *Rows) Err() error {
26 | return m.ErrFn()
27 | }
28 |
29 | func (m *Rows) CommandTag() pgconn.CommandTag {
30 | return pgconn.CommandTag{}
31 | }
32 |
33 | func (m *Rows) FieldDescriptions() []pgconn.FieldDescription {
34 | return m.FieldDescriptionsFn()
35 | }
36 |
37 | func (m *Rows) Next() bool {
38 | m.nextCalls++
39 | return m.NextFn(m.nextCalls)
40 | }
41 |
42 | func (m *Rows) Scan(dest ...any) error {
43 | return m.ScanFn(dest...)
44 | }
45 |
46 | func (m *Rows) Values() ([]any, error) {
47 | return m.ValuesFn()
48 | }
49 |
50 | func (m *Rows) RawValues() [][]byte {
51 | return m.RawValuesFn()
52 | }
53 |
54 | func (m *Rows) Conn() *pgx.Conn {
55 | return &pgx.Conn{}
56 | }
57 |
--------------------------------------------------------------------------------
/internal/postgres/mocks/mock_tx.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/internal/postgres"
9 | )
10 |
11 | type Tx struct {
12 | QueryRowFn func(ctx context.Context, query string, args ...any) postgres.Row
13 | QueryFn func(ctx context.Context, query string, args ...any) (postgres.Rows, error)
14 | ExecFn func(ctx context.Context, i uint, query string, args ...any) (postgres.CommandTag, error)
15 | CopyFromFn func(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error)
16 | execCallCount uint
17 | }
18 |
19 | func (m *Tx) QueryRow(ctx context.Context, query string, args ...any) postgres.Row {
20 | return m.QueryRowFn(ctx, query, args...)
21 | }
22 |
23 | func (m *Tx) Query(ctx context.Context, query string, args ...any) (postgres.Rows, error) {
24 | return m.QueryFn(ctx, query, args...)
25 | }
26 |
27 | func (m *Tx) Exec(ctx context.Context, query string, args ...any) (postgres.CommandTag, error) {
28 | m.execCallCount++
29 | return m.ExecFn(ctx, m.execCallCount, query, args...)
30 | }
31 |
32 | func (m *Tx) CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (rowCount int64, err error) {
33 | return m.CopyFromFn(ctx, tableName, columnNames, srcRows)
34 | }
35 |
--------------------------------------------------------------------------------
/internal/postgres/pg_mapper.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 | "fmt"
8 |
9 | "github.com/jackc/pgx/v5/pgtype"
10 | )
11 |
12 | type Mapper struct {
13 | querier Querier
14 | pgMap *pgtype.Map
15 | }
16 |
17 | func NewMapper(conn Querier) *Mapper {
18 | return &Mapper{
19 | querier: conn,
20 | pgMap: pgtype.NewMap(),
21 | }
22 | }
23 |
24 | func (m *Mapper) TypeForOID(ctx context.Context, oid uint32) (string, error) {
25 | dataType, found := m.pgMap.TypeForOID(oid)
26 | if !found {
27 | return m.queryType(ctx, oid)
28 | }
29 | return dataType.Name, nil
30 | }
31 |
32 | func (m *Mapper) queryType(ctx context.Context, oid uint32) (string, error) {
33 | var dataType string
34 | if err := m.querier.QueryRow(ctx, fmt.Sprintf("SELECT pg_typeof(%d)", oid)).Scan(&dataType); err != nil {
35 | return "", fmt.Errorf("selecting type for OID %d: %w", oid, err)
36 | }
37 | return dataType, nil
38 | }
39 |
--------------------------------------------------------------------------------
/internal/postgres/pg_querier.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/jackc/pgx/v5"
9 | "github.com/jackc/pgx/v5/pgconn"
10 | )
11 |
12 | type Querier interface {
13 | Query(ctx context.Context, query string, args ...any) (Rows, error)
14 | QueryRow(ctx context.Context, query string, args ...any) Row
15 | Exec(ctx context.Context, query string, args ...any) (CommandTag, error)
16 | ExecInTx(ctx context.Context, fn func(tx Tx) error) error
17 | ExecInTxWithOptions(ctx context.Context, fn func(tx Tx) error, txOpts TxOptions) error
18 | CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error)
19 | Ping(ctx context.Context) error
20 | Close(ctx context.Context) error
21 | }
22 |
23 | type Row interface {
24 | pgx.Row
25 | }
26 |
27 | type Rows interface {
28 | pgx.Rows
29 | }
30 |
31 | type CommandTag struct {
32 | pgconn.CommandTag
33 | }
34 |
35 | type mappedRow struct {
36 | inner Row
37 | }
38 |
39 | func (mr *mappedRow) Scan(dest ...any) error {
40 | err := mr.inner.Scan(dest...)
41 | return mapError(err)
42 | }
43 |
--------------------------------------------------------------------------------
/internal/postgres/pg_querier_builder.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import "context"
6 |
7 | type QuerierBuilder func(context.Context, string) (Querier, error)
8 |
9 | func ConnBuilder(ctx context.Context, url string) (Querier, error) {
10 | return NewConn(ctx, url)
11 | }
12 |
13 | func ConnPoolBuilder(ctx context.Context, url string) (Querier, error) {
14 | return NewConnPool(ctx, url)
15 | }
16 |
--------------------------------------------------------------------------------
/internal/postgres/pg_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "testing"
7 |
8 | "github.com/stretchr/testify/require"
9 | )
10 |
11 | func Test_DefaultReplicationSlotName(t *testing.T) {
12 | t.Parallel()
13 |
14 | tests := []struct {
15 | name string
16 | wantSlotName string
17 | }{
18 | {
19 | name: "example",
20 | wantSlotName: "pgstream_example_slot",
21 | },
22 | {
23 | name: "example.com",
24 | wantSlotName: "pgstream_example_com_slot",
25 | },
26 | {
27 | name: "example.test.com",
28 | wantSlotName: "pgstream_example_test_com_slot",
29 | },
30 | }
31 |
32 | for _, tc := range tests {
33 | t.Run(tc.name, func(t *testing.T) {
34 | t.Parallel()
35 |
36 | slotName := DefaultReplicationSlotName(tc.name)
37 | require.Equal(t, tc.wantSlotName, slotName)
38 | })
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/internal/postgres/pg_tx.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/jackc/pgx/v5"
9 | )
10 |
11 | type Tx interface {
12 | Query(ctx context.Context, query string, args ...any) (Rows, error)
13 | QueryRow(ctx context.Context, query string, args ...any) Row
14 | Exec(ctx context.Context, query string, args ...any) (CommandTag, error)
15 | CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error)
16 | }
17 |
18 | type TxIsolationLevel string
19 |
20 | const (
21 | Serializable TxIsolationLevel = "serializable"
22 | RepeatableRead TxIsolationLevel = "repeatable read"
23 | ReadCommitted TxIsolationLevel = "read committed"
24 | ReadUncommitted TxIsolationLevel = "read uncommitted"
25 | )
26 |
27 | type TxAccessMode string
28 |
29 | const (
30 | ReadWrite TxAccessMode = "read write"
31 | ReadOnly TxAccessMode = "read only"
32 | )
33 |
34 | type TxOptions struct {
35 | IsolationLevel TxIsolationLevel
36 | AccessMode TxAccessMode
37 | }
38 |
39 | type Txn struct {
40 | pgx.Tx
41 | }
42 |
43 | func (t *Txn) QueryRow(ctx context.Context, query string, args ...any) Row {
44 | row := t.Tx.QueryRow(ctx, query, args...)
45 | return &mappedRow{inner: row}
46 | }
47 |
48 | func (t *Txn) Query(ctx context.Context, query string, args ...any) (Rows, error) {
49 | rows, err := t.Tx.Query(ctx, query, args...)
50 | return rows, mapError(err)
51 | }
52 |
53 | func (t *Txn) Exec(ctx context.Context, query string, args ...any) (CommandTag, error) {
54 | tag, err := t.Tx.Exec(ctx, query, args...)
55 | return CommandTag{tag}, mapError(err)
56 | }
57 |
58 | func (t *Txn) CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error) {
59 | identifier, err := newIdentifier(tableName)
60 | if err != nil {
61 | return -1, err
62 | }
63 |
64 | // sanitize the input, removing any added quotes. The CopyFrom will sanitize
65 | // them and double quotes will cause errors.
66 | for i, c := range columnNames {
67 | columnNames[i] = removeQuotes(c)
68 | }
69 |
70 | return t.Tx.CopyFrom(ctx, identifier, columnNames, pgx.CopyFromRows(srcRows))
71 | }
72 |
73 | func toTxOptions(opts TxOptions) pgx.TxOptions {
74 | return pgx.TxOptions{
75 | IsoLevel: pgx.TxIsoLevel(opts.IsolationLevel),
76 | AccessMode: pgx.TxAccessMode(opts.AccessMode),
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/internal/postgres/pg_utils_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "errors"
7 | "testing"
8 |
9 | "github.com/jackc/pgx/v5"
10 | "github.com/stretchr/testify/require"
11 | )
12 |
13 | func Test_newIdentifier(t *testing.T) {
14 | t.Parallel()
15 |
16 | tests := []struct {
17 | name string
18 | tableName string
19 |
20 | wantIdentifier pgx.Identifier
21 | wantErr error
22 | }{
23 | {
24 | name: "ok - table name",
25 | tableName: "test_table",
26 |
27 | wantIdentifier: pgx.Identifier{"test_table"},
28 | wantErr: nil,
29 | },
30 | {
31 | name: "ok - qualified table name",
32 | tableName: "test_schema.test_table",
33 |
34 | wantIdentifier: pgx.Identifier{"test_schema", "test_table"},
35 | wantErr: nil,
36 | },
37 | {
38 | name: "ok - quoted qualified table name",
39 | tableName: `"test_schema"."test_table"`,
40 |
41 | wantIdentifier: pgx.Identifier{"test_schema", "test_table"},
42 | wantErr: nil,
43 | },
44 | {
45 | name: "error - invalid table name",
46 | tableName: "invalid.test.table",
47 |
48 | wantIdentifier: nil,
49 | wantErr: errors.New("invalid table name: invalid.test.table"),
50 | },
51 | }
52 |
53 | for _, tc := range tests {
54 | t.Run(tc.name, func(t *testing.T) {
55 | t.Parallel()
56 |
57 | id, err := newIdentifier(tc.tableName)
58 | require.Equal(t, tc.wantErr, err)
59 | require.Equal(t, tc.wantIdentifier, id)
60 | })
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/internal/profiling/profiling.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package profiling
4 |
5 | import (
6 | "fmt"
7 | "net/http"
8 | _ "net/http/pprof"
9 | "os"
10 | "runtime"
11 | "runtime/pprof"
12 | )
13 |
14 | // StartProfilingServer starts an http server exposing /debug/pprof endpoint
15 | // with profiling insights.
16 | func StartProfilingServer(address string) {
17 | // by adding _ "net/http/pprof" the profiling endpoint attaches to the
18 | // server
19 | go func() {
20 | http.ListenAndServe(address, nil) //nolint:gosec
21 | }()
22 | }
23 |
24 | func StartCPUProfile(fileName string) (func(), error) {
25 | if fileName == "" {
26 | fileName = "cpu.prof"
27 | }
28 | cpuFile, err := os.Create(fileName)
29 | if err != nil {
30 | return nil, fmt.Errorf("could not create CPU profile file: %w", err)
31 | }
32 |
33 | if err := pprof.StartCPUProfile(cpuFile); err != nil {
34 | return nil, fmt.Errorf("could not start CPU profile: %w", err)
35 | }
36 |
37 | return func() {
38 | pprof.StopCPUProfile()
39 | cpuFile.Close()
40 | }, nil
41 | }
42 |
43 | func CreateMemoryProfile(fileName string) error {
44 | if fileName == "" {
45 | fileName = "mem.prof"
46 | }
47 | memFile, err := os.Create(fileName)
48 | if err != nil {
49 | return fmt.Errorf("could not create memory profile file: %w", err)
50 | }
51 | defer memFile.Close()
52 |
53 | runtime.GC() // get up-to-date statistics
54 | // Lookup("allocs") creates a profile similar to go test -memprofile.
55 | // Alternatively, use Lookup("heap") for a profile
56 | // that has inuse_space as the default index.
57 | if err := pprof.Lookup("allocs").WriteTo(memFile, 0); err != nil {
58 | return fmt.Errorf("could not write memory profile: %w", err)
59 | }
60 |
61 | return nil
62 | }
63 |
--------------------------------------------------------------------------------
/internal/searchstore/mocks/mock_mapper.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import "github.com/xataio/pgstream/internal/searchstore"
6 |
7 | type Mapper struct {
8 | GetDefaultIndexSettingsFn func() map[string]any
9 | FieldMappingFn func(*searchstore.Field) (map[string]any, error)
10 | }
11 |
12 | func (m *Mapper) GetDefaultIndexSettings() map[string]any {
13 | if m.GetDefaultIndexSettingsFn == nil {
14 | return map[string]any{}
15 | }
16 | return m.GetDefaultIndexSettingsFn()
17 | }
18 |
19 | func (m *Mapper) FieldMapping(f *searchstore.Field) (map[string]any, error) {
20 | return m.FieldMappingFn(f)
21 | }
22 |
--------------------------------------------------------------------------------
/internal/searchstore/search_mapper.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package searchstore
4 |
5 | type Mapper interface {
6 | GetDefaultIndexSettings() map[string]any
7 | FieldMapping(*Field) (map[string]any, error)
8 | }
9 |
10 | type Field struct {
11 | SearchType Type
12 | IsArray bool
13 | Metadata Metadata
14 | }
15 |
16 | type Metadata struct {
17 | VectorDimension int
18 | }
19 |
20 | type Type uint
21 |
22 | const (
23 | IntegerType Type = iota
24 | FloatType
25 | BoolType
26 | StringType
27 | DateTimeTZType
28 | DateTimeType
29 | DateType
30 | TimeType
31 | JSONType
32 | TextType
33 | PGVectorType
34 | )
35 |
--------------------------------------------------------------------------------
/internal/sync/mocks/mock_weighted_semaphore.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 | "sync/atomic"
8 | )
9 |
10 | type WeightedSemaphore struct {
11 | TryAcquireFn func(int64) bool
12 | AcquireFn func(context.Context, int64) error
13 | ReleaseFn func(uint64, int64)
14 | releaseCalls uint64
15 | }
16 |
17 | func (m *WeightedSemaphore) TryAcquire(i int64) bool {
18 | return m.TryAcquireFn(i)
19 | }
20 |
21 | func (m *WeightedSemaphore) Acquire(ctx context.Context, i int64) error {
22 | return m.AcquireFn(ctx, i)
23 | }
24 |
25 | func (m *WeightedSemaphore) Release(i int64) {
26 | atomic.AddUint64(&m.releaseCalls, 1)
27 | m.ReleaseFn(m.GetReleaseCalls(), i)
28 | }
29 |
30 | func (m *WeightedSemaphore) GetReleaseCalls() uint64 {
31 | return atomic.LoadUint64(&m.releaseCalls)
32 | }
33 |
--------------------------------------------------------------------------------
/internal/sync/semaphore.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package sync
4 |
5 | import (
6 | "context"
7 |
8 | "golang.org/x/sync/semaphore"
9 | )
10 |
11 | type WeightedSemaphore interface {
12 | TryAcquire(int64) bool
13 | Acquire(context.Context, int64) error
14 | Release(int64)
15 | }
16 |
17 | func NewWeightedSemaphore(size int64) *semaphore.Weighted {
18 | return semaphore.NewWeighted(size)
19 | }
20 |
--------------------------------------------------------------------------------
/internal/testcontainers/test_elasticsearch_container.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package testcontainers
4 |
5 | import (
6 | "context"
7 | "fmt"
8 |
9 | "github.com/testcontainers/testcontainers-go"
10 | "github.com/testcontainers/testcontainers-go/modules/elasticsearch"
11 | )
12 |
13 | const elasticsearchImage = "docker.elastic.co/elasticsearch/elasticsearch:8.9.0"
14 |
15 | func SetupElasticsearchContainer(ctx context.Context, url *string) (cleanup, error) {
16 | ctr, err := elasticsearch.Run(ctx, elasticsearchImage,
17 | testcontainers.WithEnv(map[string]string{"xpack.security.enabled": "false"})) // disable TLS
18 | if err != nil {
19 | return nil, fmt.Errorf("failed to start elasticsearch container: %w", err)
20 | }
21 |
22 | *url = ctr.Settings.Address
23 |
24 | return func() error {
25 | return ctr.Terminate(ctx)
26 | }, nil
27 | }
28 |
--------------------------------------------------------------------------------
/internal/testcontainers/test_kafka_container.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package testcontainers
4 |
5 | import (
6 | "context"
7 | "fmt"
8 | "time"
9 |
10 | "github.com/testcontainers/testcontainers-go"
11 | "github.com/testcontainers/testcontainers-go/modules/kafka"
12 | "github.com/testcontainers/testcontainers-go/wait"
13 | )
14 |
15 | const kafkaImage = "confluentinc/confluent-local:7.5.0"
16 |
17 | func SetupKafkaContainer(ctx context.Context, brokers *[]string) (cleanup, error) {
18 | opts := []testcontainers.ContainerCustomizer{
19 | kafka.WithClusterID("test-cluster"),
20 | testcontainers.WithWaitStrategy(
21 | wait.ForLog("Kafka Server started").
22 | WithOccurrence(1).
23 | WithStartupTimeout(5 * time.Second),
24 | ),
25 | }
26 |
27 | ctr, err := kafka.Run(ctx, kafkaImage, opts...)
28 | if err != nil {
29 | return nil, fmt.Errorf("failed to start kafka container: %w", err)
30 | }
31 |
32 | *brokers, err = ctr.Brokers(ctx)
33 | if err != nil {
34 | return nil, fmt.Errorf("retrieving brokers for kafka container: %w", err)
35 | }
36 |
37 | return func() error {
38 | return ctr.Terminate(ctx)
39 | }, nil
40 | }
41 |
--------------------------------------------------------------------------------
/internal/testcontainers/test_opensearch_container.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package testcontainers
4 |
5 | import (
6 | "context"
7 | "fmt"
8 |
9 | "github.com/testcontainers/testcontainers-go/modules/opensearch"
10 | )
11 |
12 | const opensearchImage = "opensearchproject/opensearch:2.11.1"
13 |
14 | func SetupOpenSearchContainer(ctx context.Context, url *string) (cleanup, error) {
15 | ctr, err := opensearch.Run(ctx, opensearchImage)
16 | if err != nil {
17 | return nil, fmt.Errorf("failed to start opensearch container: %w", err)
18 | }
19 |
20 | *url, err = ctr.Address(ctx)
21 | if err != nil {
22 | return nil, fmt.Errorf("retrieving url for opensearch container: %w", err)
23 | }
24 |
25 | return func() error {
26 | return ctr.Terminate(ctx)
27 | }, nil
28 | }
29 |
--------------------------------------------------------------------------------
/internal/testcontainers/test_postgres_container.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package testcontainers
4 |
5 | import (
6 | "context"
7 | "fmt"
8 | "time"
9 |
10 | "github.com/testcontainers/testcontainers-go"
11 | "github.com/testcontainers/testcontainers-go/modules/postgres"
12 | "github.com/testcontainers/testcontainers-go/wait"
13 | )
14 |
15 | type cleanup func() error
16 |
17 | type PostgresImage string
18 |
19 | const (
20 | Postgres14 PostgresImage = "debezium/postgres:14-alpine"
21 | Postgres17 PostgresImage = "debezium/postgres:17-alpine"
22 | )
23 |
24 | func SetupPostgresContainer(ctx context.Context, url *string, image PostgresImage, configFile ...string) (cleanup, error) {
25 | waitForLogs := wait.
26 | ForLog("database system is ready to accept connections").
27 | WithOccurrence(2).
28 | WithStartupTimeout(5 * time.Second)
29 |
30 | opts := []testcontainers.ContainerCustomizer{
31 | testcontainers.WithWaitStrategy(waitForLogs),
32 | }
33 | if len(configFile) > 0 {
34 | opts = append(opts, postgres.WithConfigFile(configFile[0]))
35 | }
36 |
37 | ctr, err := postgres.Run(ctx, string(image), opts...)
38 | if err != nil {
39 | return nil, fmt.Errorf("failed to start postgres container: %w", err)
40 | }
41 |
42 | *url, err = ctr.ConnectionString(ctx, "sslmode=disable")
43 | if err != nil {
44 | return nil, fmt.Errorf("retrieving connection string for postgres container: %w", err)
45 | }
46 |
47 | return func() error {
48 | return ctr.Terminate(ctx)
49 | }, nil
50 | }
51 |
--------------------------------------------------------------------------------
/kafka2os.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_KAFKA_READER_SERVERS="localhost:9092"
3 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream
4 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_ID=pgstream-consumer-group
5 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_START_OFFSET=earliest
6 | PGSTREAM_KAFKA_COMMIT_BACKOFF_INITIAL_INTERVAL=1s
7 | PGSTREAM_KAFKA_COMMIT_BACKOFF_MAX_INTERVAL=1m
8 | PGSTREAM_KAFKA_COMMIT_BACKOFF_MAX_RETRIES=60
9 |
10 | # Processor config
11 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=100
12 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s
13 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200"
14 | PGSTREAM_SEARCH_STORE_BACKOFF_INITIAL_INTERVAL=1s
15 | PGSTREAM_SEARCH_STORE_BACKOFF_MAX_INTERVAL=1m
16 | PGSTREAM_SEARCH_STORE_BACKOFF_MAX_RETRIES=0
17 |
--------------------------------------------------------------------------------
/kafka2os.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | kafka:
3 | servers: ["localhost:9092"]
4 | topic:
5 | name: "pgstream"
6 | consumer_group:
7 | id: "pgstream-consumer-group"
8 | start_offset: "earliest" # options are earliest or latest
9 | backoff: # one of exponential or constant
10 | exponential:
11 | max_retries: 60 # maximum number of retries
12 | initial_interval: 1000 # initial interval in milliseconds
13 | max_interval: 60000 # maximum interval in milliseconds
14 |
15 | target:
16 | search:
17 | engine: "opensearch" # options are elasticsearch or opensearch
18 | url: "http://admin:admin@localhost:9200" # URL of the search engine
19 | batch:
20 | timeout: 5000 # batch timeout in milliseconds
21 | size: 100 # number of messages in a batch
22 | backoff: # one of exponential or constant
23 | exponential:
24 | max_retries: 0 # maximum number of retries
25 | initial_interval: 1000 # initial interval in milliseconds
26 | max_interval: 60000 # maximum interval in milliseconds
27 |
28 | modifiers:
29 | injector:
30 | enabled: true # whether to inject pgstream metadata into the WAL events
31 | schemalog_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the schemalog database, if different from the source database
32 |
--------------------------------------------------------------------------------
/license-header.txt:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package main
4 |
5 | import (
6 | "os"
7 |
8 | "github.com/xataio/pgstream/cmd"
9 | )
10 |
11 | func main() {
12 | if err := cmd.Execute(); err != nil {
13 | os.Exit(1)
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/migrations/postgres/1_create_pgstream_xid.down.sql:
--------------------------------------------------------------------------------
1 | DROP FUNCTION IF EXISTS pgstream.xid_counter;
2 | DROP FUNCTION IF EXISTS pgstream.xid_pid;
3 | DROP FUNCTION IF EXISTS pgstream.xid_machine;
4 | DROP FUNCTION IF EXISTS pgstream.xid_time;
5 | DROP FUNCTION IF EXISTS pgstream.xid;
6 | DROP FUNCTION IF EXISTS pgstream.xid_decode;
7 | DROP FUNCTION IF EXISTS pgstream.xid_encode;
8 | DROP FUNCTION IF EXISTS pgstream._xid_machine_id;
9 | DROP SEQUENCE IF EXISTS pgstream.xid_serial ;
10 | DROP DOMAIN IF EXISTS pgstream.xid;
11 |
--------------------------------------------------------------------------------
/migrations/postgres/2_create_pgstream_schemalog_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS pgstream.schema_log;
2 |
--------------------------------------------------------------------------------
/migrations/postgres/2_create_pgstream_schemalog_table.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE IF NOT EXISTS pgstream.schema_log (
2 | id pgstream.xid PRIMARY KEY DEFAULT pgstream.xid(),
3 | version BIGINT NOT NULL,
4 | schema_name TEXT NOT NULL,
5 | schema JSONB NOT NULL,
6 | created_at TIMESTAMP NOT NULL DEFAULT NOW(),
7 | acked BOOLEAN NOT NULL DEFAULT FALSE
8 | );
9 |
10 | -- most lookups look like:
11 | -- `SELECT id, schema FROM pgstream.schema_log WHERE schema_name = 'foo' AND NOT acked ORDER BY id DESC LIMIT 1`
12 | CREATE INDEX IF NOT EXISTS schema_log_name_acked ON pgstream.schema_log (schema_name, acked, id);
13 | CREATE UNIQUE INDEX IF NOT EXISTS schema_log_version_uniq ON pgstream.schema_log(schema_name, version);
14 |
--------------------------------------------------------------------------------
/migrations/postgres/3_create_pgstream_tableids_table.down.sql:
--------------------------------------------------------------------------------
1 | DROP FUNCTION IF EXISTS pgstream.create_table_mapping;
2 | DROP TABLE IF EXISTS pgstream.table_ids;
3 |
--------------------------------------------------------------------------------
/migrations/postgres/3_create_pgstream_tableids_table.up.sql:
--------------------------------------------------------------------------------
1 | -- table_ids stores the mapping between the pgstream table id and the Postgres table oid
2 | CREATE TABLE IF NOT EXISTS pgstream.table_ids (
3 | id pgstream.xid PRIMARY KEY DEFAULT pgstream.xid(),
4 | oid BIGINT NOT NULL UNIQUE
5 | );
6 |
7 | CREATE OR REPLACE FUNCTION pgstream.create_table_mapping(table_oid oid) RETURNS pgstream.xid
8 | LANGUAGE SQL
9 | SET search_path = pg_catalog,pg_temp
10 | AS $$
11 | INSERT INTO pgstream.table_ids (oid) VALUES (table_oid) RETURNING id;
12 | $$;
13 |
--------------------------------------------------------------------------------
/migrations/postgres/4_create_pgstream_get_schema_function.down.sql:
--------------------------------------------------------------------------------
1 | DROP FUNCTION IF EXISTS pgstream.get_schema;
2 |
--------------------------------------------------------------------------------
/migrations/postgres/5_create_pgstream_log_schema_function.down.sql:
--------------------------------------------------------------------------------
1 | DROP FUNCTION IF EXISTS pgstream.log_schema;
2 | DROP FUNCTION IF EXISTS pgstream.is_system_schema;
3 |
--------------------------------------------------------------------------------
/migrations/postgres/6_create_pgstream_refresh_schema_function.down.sql:
--------------------------------------------------------------------------------
1 | DROP FUNCTION IF EXISTS pgstream.refresh_schema
2 |
--------------------------------------------------------------------------------
/migrations/postgres/6_create_pgstream_refresh_schema_function.up.sql:
--------------------------------------------------------------------------------
1 | CREATE OR REPLACE FUNCTION pgstream.refresh_schema(schema_to_refresh text) RETURNS void
2 | LANGUAGE plpgsql
3 | SECURITY DEFINER
4 | SET search_path = pg_catalog,pg_temp
5 | AS $$
6 | DECLARE
7 | schema_version bigint;
8 | is_system_schema boolean;
9 | BEGIN
10 | is_system_schema := pgstream.is_system_schema(schema_to_refresh);
11 |
12 | IF schema_to_refresh IS NOT NULL and NOT IS_SYSTEM_SCHEMA THEN
13 | SELECT COUNT(*)+1 INTO schema_version
14 | FROM "pgstream"."schema_log"
15 | where schema_name = schema_to_refresh;
16 |
17 | INSERT INTO "pgstream"."schema_log" (version, schema_name, schema)
18 | VALUES (schema_version, schema_to_refresh, pgstream.get_schema(schema_to_refresh));
19 |
20 | RESET pgstream.skip_log;
21 | END IF;
22 | END;
23 | $$;
24 |
--------------------------------------------------------------------------------
/migrations/postgres/7_create_pgstream_event_triggers.down.sql:
--------------------------------------------------------------------------------
1 | DROP EVENT TRIGGER IF EXISTS pgstream_log_schema_create_alter_table;
2 | DROP EVENT TRIGGER IF EXISTS pgstream_log_schema_drop_schema_table;
3 |
--------------------------------------------------------------------------------
/migrations/postgres/7_create_pgstream_event_triggers.up.sql:
--------------------------------------------------------------------------------
1 | CREATE EVENT TRIGGER pgstream_log_schema_create_alter_table ON ddl_command_end EXECUTE FUNCTION pgstream.log_schema();
2 | CREATE EVENT TRIGGER pgstream_log_schema_drop_schema_table ON sql_drop WHEN tag IN ('DROP TABLE', 'DROP SCHEMA') EXECUTE FUNCTION pgstream.log_schema();
3 |
--------------------------------------------------------------------------------
/pg2kafka.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable"
3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*"
4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4
5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4
6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000
7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1
8 |
9 | # Processor config
10 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable"
11 | PGSTREAM_KAFKA_WRITER_SERVERS="localhost:9092"
12 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream
13 | PGSTREAM_KAFKA_TOPIC_PARTITIONS=1
14 | PGSTREAM_KAFKA_TOPIC_REPLICATION_FACTOR=1
15 | PGSTREAM_KAFKA_TOPIC_AUTO_CREATE=true
16 | PGSTREAM_KAFKA_WRITER_BATCH_TIMEOUT=2s
17 | PGSTREAM_KAFKA_WRITER_BATCH_SIZE=100
18 | PGSTREAM_KAFKA_WRITER_BATCH_BYTES=1572864
19 |
--------------------------------------------------------------------------------
/pg2kafka.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: snapshot_and_replication # options are replication, snapshot or snapshot_and_replication
5 | snapshot: # when mode is snapshot or snapshot_and_replication
6 | mode: full # options are data_and, schema or data
7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern
8 | recorder:
9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded
10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel
11 | data: # when mode is full or data
12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel
13 | table_workers: 4 # number of workers to snapshot a table in parallel
14 | batch_page_size: 1000 # number of pages to read per batch
15 | schema: # when mode is full or schema
16 | mode: schemalog # options are pgdump_pgrestore or schemalog
17 |
18 | target:
19 | kafka:
20 | servers: ["localhost:9092"]
21 | topic:
22 | name: "pgstream" # name of the Kafka topic
23 | partitions: 1 # number of partitions for the topic
24 | replication_factor: 1 # replication factor for the topic
25 | auto_create: true # whether to automatically create the topic if it doesn't exist
26 | batch:
27 | timeout: 2000 # batch timeout in milliseconds
28 | size: 100 # number of messages in a batch
29 | max_bytes: 1572864 # max size of batch in bytes (1.5MiB)
30 |
31 | modifiers:
32 | injector:
33 | enabled: true # whether to inject pgstream metadata into the WAL events
34 |
--------------------------------------------------------------------------------
/pg2os.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable"
3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*"
4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4
5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4
6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000
7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1
8 |
9 | # Processor config
10 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable"
11 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=100
12 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s
13 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200"
14 | PGSTREAM_SEARCH_STORE_EXP_BACKOFF_INITIAL_INTERVAL=1s
15 | PGSTREAM_SEARCH_STORE_EXP_BACKOFF_MAX_INTERVAL=1m
16 | PGSTREAM_SEARCH_STORE_EXP_BACKOFF_MAX_RETRIES=0
17 |
--------------------------------------------------------------------------------
/pg2os.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: snapshot_and_replication # options are replication, snapshot or snapshot_and_replication
5 | snapshot: # when mode is snapshot or snapshot_and_replication
6 | mode: full # options are data_and, schema or data
7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern
8 | recorder:
9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded
10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel
11 | data: # when mode is full or data
12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel
13 | table_workers: 4 # number of workers to snapshot a table in parallel
14 | batch_page_size: 1000 # number of pages to read per batch
15 | schema: # when mode is full or schema
16 | mode: schemalog # options are pgdump_pgrestore or schemalog
17 |
18 | target:
19 | search:
20 | engine: "opensearch" # options are elasticsearch or opensearch
21 | url: "http://admin:admin@localhost:9200" # URL of the search engine
22 | batch:
23 | timeout: 5000 # batch timeout in milliseconds
24 | size: 100 # number of messages in a batch
25 | backoff: # one of exponential or constant
26 | exponential:
27 | max_retries: 0 # maximum number of retries
28 | initial_interval: 1000 # initial interval in milliseconds
29 | max_interval: 60000 # maximum interval in milliseconds
30 |
31 |
32 | modifiers:
33 | injector:
34 | enabled: true # whether to inject pgstream metadata into the WAL events
35 |
--------------------------------------------------------------------------------
/pg2pg.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable"
3 | PGSTREAM_POSTGRES_SNAPSHOT_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable"
4 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*"
5 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4
6 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4
7 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000
8 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1
9 |
10 | # Processor config
11 | PGSTREAM_TRANSFORMER_RULES_FILE="transformer_rules.yaml"
12 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable"
13 | PGSTREAM_FILTER_INCLUDE_TABLES="test"
14 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable"
15 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=100
16 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s
17 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable"
18 |
--------------------------------------------------------------------------------
/pg2pg.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: snapshot_and_replication # options are replication, snapshot or snapshot_and_replication
5 | snapshot: # when mode is snapshot or snapshot_and_replication
6 | mode: full # options are data_and, schema or data
7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern
8 | recorder:
9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded
10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel
11 | data: # when mode is full or data
12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel
13 | table_workers: 4 # number of workers to snapshot a table in parallel
14 | batch_page_size: 1000 # number of pages to read per batch
15 | schema: # when mode is full or schema
16 | mode: pgdump_pgrestore # options are pgdump_pgrestore or schemalog
17 | pgdump_pgrestore:
18 | clean_target_db: true # whether to clean the target database before restoring
19 |
20 | target:
21 | postgres:
22 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable"
23 | batch:
24 | timeout: 5000 # batch timeout in milliseconds
25 | size: 100 # number of messages in a batch
26 | disable_triggers: false # whether to disable triggers on the target database
27 | on_conflict_action: "nothing" # options are update, nothing or error
28 |
29 | modifiers:
30 | injector:
31 | enabled: true # whether to inject pgstream metadata into the WAL events
32 | filter:
33 | include_tables:
34 | - test
35 | transformations:
36 | validation_mode: relaxed
37 | table_transformers:
38 | - schema: public
39 | table: test
40 | column_transformers:
41 | name:
42 | name: greenmask_firstname
43 | dynamic_parameters:
44 | gender:
45 | column: sex
46 |
--------------------------------------------------------------------------------
/pg2webhook.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable"
3 |
4 | # Processor config
5 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable"
6 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable"
7 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_ENABLED=false
8 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_REFRESH_INTERVAL="60s"
9 |
--------------------------------------------------------------------------------
/pg2webhook.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: replication # options are replication, snapshot or snapshot_and_replication
5 |
6 | target:
7 | webhooks:
8 | subscriptions:
9 | store:
10 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the webhook subscriptions are stored
11 | cache:
12 | enabled: false # whether to enable caching for the subscription store
13 | server:
14 | address: "localhost:9090" # address of the subscription server
15 | read_timeout: 60 # read timeout in seconds
16 | write_timeout: 60 # write timeout in seconds
17 | notifier:
18 | worker_count: 4 # number of notifications to be processed in parallel
19 | client_timeout: 10000 # timeout for the webhook client in milliseconds
20 |
21 | modifiers:
22 | injector:
23 | enabled: true # whether to inject pgstream metadata into the WAL events
24 | schemalog_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the schemalog database, if different from the source database
25 |
--------------------------------------------------------------------------------
/pkg/backoff/mocks/mock_backoff.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import "github.com/xataio/pgstream/pkg/backoff"
6 |
7 | type Backoff struct {
8 | RetryNotifyFn func(backoff.Operation, backoff.Notify) error
9 | RetryFn func(backoff.Operation) error
10 | }
11 |
12 | func (m *Backoff) RetryNotify(op backoff.Operation, not backoff.Notify) error {
13 | return m.RetryNotifyFn(op, not)
14 | }
15 |
16 | func (m *Backoff) Retry(op backoff.Operation) error {
17 | return m.RetryFn(op)
18 | }
19 |
--------------------------------------------------------------------------------
/pkg/kafka/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package kafka
4 |
5 | import tlslib "github.com/xataio/pgstream/pkg/tls"
6 |
7 | type ConnConfig struct {
8 | Servers []string
9 | Topic TopicConfig
10 | TLS tlslib.Config
11 | }
12 |
13 | type TopicConfig struct {
14 | Name string
15 | // Number of partitions to be created for the topic. Defaults to 1.
16 | NumPartitions int
17 | // Replication factor for the topic. Defaults to 1.
18 | ReplicationFactor int
19 | // AutoCreate defines if the topic should be created if it doesn't exist.
20 | // Defaults to false.
21 | AutoCreate bool
22 | }
23 |
24 | type ReaderConfig struct {
25 | Conn ConnConfig
26 | // ConsumerGroupID is the ID of the consumer group to use. If not set,
27 | // defaults to "pgstream-consumer-group".
28 | ConsumerGroupID string
29 | // ConsumerGroupStartOffset is the offset to start consuming from. If not
30 | // set, defaults to "earliest".
31 | ConsumerGroupStartOffset string
32 | }
33 |
34 | const (
35 | defaultNumPartitions = 1
36 | defaultReplicationFactor = 1
37 | defaultConsumerGroupOffset = earliestOffset
38 | defaultConsumerGroupID = "pgstream-consumer-group"
39 | )
40 |
41 | func (c *TopicConfig) numPartitions() int {
42 | if c.NumPartitions > 0 {
43 | return c.NumPartitions
44 | }
45 | return defaultNumPartitions
46 | }
47 |
48 | func (c *TopicConfig) replicationFactor() int {
49 | if c.NumPartitions > 0 {
50 | return c.ReplicationFactor
51 | }
52 | return defaultReplicationFactor
53 | }
54 |
55 | func (c *ReaderConfig) consumerGroupID() string {
56 | if c.ConsumerGroupID != "" {
57 | return c.ConsumerGroupID
58 | }
59 | return defaultConsumerGroupID
60 | }
61 |
62 | func (c *ReaderConfig) consumerGroupStartOffset() string {
63 | if c.ConsumerGroupStartOffset != "" {
64 | return c.ConsumerGroupStartOffset
65 | }
66 | return defaultConsumerGroupOffset
67 | }
68 |
--------------------------------------------------------------------------------
/pkg/kafka/conn.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package kafka
4 |
5 | import (
6 | "errors"
7 | "fmt"
8 | "net"
9 | "strconv"
10 | "time"
11 |
12 | tlslib "github.com/xataio/pgstream/pkg/tls"
13 |
14 | "github.com/segmentio/kafka-go"
15 | )
16 |
17 | // withConnection creates a connection that can be used by the kafka operation
18 | // passed in the parameters. This ensures the cleanup of all connection resources.
19 | func withConnection(config *ConnConfig, kafkaOperation func(conn *kafka.Conn) error) error {
20 | dialer, err := buildDialer(&config.TLS)
21 | if err != nil {
22 | return err
23 | }
24 |
25 | var conn *kafka.Conn
26 | for _, server := range config.Servers {
27 | conn, err = dialer.Dial("tcp", server)
28 | if err != nil {
29 | // Try next server in the list
30 | continue
31 | }
32 | defer conn.Close()
33 |
34 | // Successfully connected. Do not try the other servers
35 | break
36 | }
37 |
38 | if conn == nil {
39 | return errors.New("error connecting to kafka, all servers failed")
40 | }
41 |
42 | controller, err := conn.Controller()
43 | if err != nil {
44 | return fmt.Errorf("controller: %w", err)
45 | }
46 | var controllerConn *kafka.Conn
47 |
48 | controllerConn, err = dialer.Dial("tcp", net.JoinHostPort(controller.Host, strconv.Itoa(controller.Port)))
49 | if err != nil {
50 | return fmt.Errorf("controller connection: %w", err)
51 | }
52 | defer controllerConn.Close()
53 |
54 | return kafkaOperation(controllerConn)
55 | }
56 |
57 | func buildDialer(cfg *tlslib.Config) (*kafka.Dialer, error) {
58 | timeout := 10 * time.Second
59 |
60 | tlsConfig, err := tlslib.NewConfig(cfg)
61 | if err != nil {
62 | return nil, fmt.Errorf("loading TLS configuration: %w", err)
63 | }
64 |
65 | return &kafka.Dialer{
66 | Timeout: timeout,
67 | DualStack: true,
68 | TLS: tlsConfig,
69 | }, nil
70 | }
71 |
--------------------------------------------------------------------------------
/pkg/kafka/kafka_offset_parser.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package kafka
4 |
5 | import (
6 | "errors"
7 | "fmt"
8 | "strconv"
9 | "strings"
10 | )
11 |
12 | type Offset struct {
13 | Topic string
14 | Partition int
15 | Offset int64
16 | }
17 |
18 | type OffsetParser interface {
19 | ToString(o *Offset) string
20 | FromString(s string) (*Offset, error)
21 | }
22 |
23 | type Parser struct{}
24 |
25 | var (
26 | ErrInvalidOffsetFormat = errors.New("invalid format for kafka offset")
27 |
28 | // "/" is used as a separator to concatenate the topic, partition and
29 | // offset. The partition and offset are integers, and the topic allowed
30 | // characters are [a-zA-Z0-9\._\-].
31 | //
32 | // See https://github.com/apache/kafka/blob/0.10.2/core/src/main/scala/kafka/common/Topic.scala#L29
33 | separator = "/"
34 | )
35 |
36 | func NewOffsetParser() *Parser {
37 | return &Parser{}
38 | }
39 |
40 | func (p *Parser) ToString(o *Offset) string {
41 | return fmt.Sprintf("%s%s%d%s%d", o.Topic, separator, o.Partition, separator, o.Offset)
42 | }
43 |
44 | func (p *Parser) FromString(s string) (*Offset, error) {
45 | parts := strings.Split(s, separator)
46 | if len(parts) != 3 {
47 | return nil, ErrInvalidOffsetFormat
48 | }
49 | topic := parts[0]
50 | partition, err := strconv.Atoi(parts[1])
51 | if err != nil {
52 | return nil, fmt.Errorf("parsing partition from string: %w: %w", ErrInvalidOffsetFormat, err)
53 | }
54 | offset, err := strconv.Atoi(parts[2])
55 | if err != nil {
56 | return nil, fmt.Errorf("parsing offset from string: %w: %w", ErrInvalidOffsetFormat, err)
57 | }
58 |
59 | return &Offset{
60 | Topic: topic,
61 | Partition: partition,
62 | Offset: int64(offset),
63 | }, nil
64 | }
65 |
--------------------------------------------------------------------------------
/pkg/kafka/kafka_offset_parser_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package kafka
4 |
5 | import (
6 | "testing"
7 |
8 | "github.com/stretchr/testify/require"
9 | )
10 |
11 | func TestParser_ToString(t *testing.T) {
12 | t.Parallel()
13 |
14 | o := &Offset{
15 | Topic: "test_topic",
16 | Partition: 0,
17 | Offset: 1,
18 | }
19 |
20 | parser := Parser{}
21 | str := parser.ToString(o)
22 | require.Equal(t, "test_topic/0/1", str)
23 | }
24 |
25 | func TestParser_FromString(t *testing.T) {
26 | t.Parallel()
27 |
28 | tests := []struct {
29 | name string
30 | str string
31 |
32 | wantOffset *Offset
33 | wantErr error
34 | }{
35 | {
36 | name: "ok",
37 | str: "test_topic/0/1",
38 |
39 | wantOffset: &Offset{
40 | Topic: "test_topic",
41 | Partition: 0,
42 | Offset: 1,
43 | },
44 | wantErr: nil,
45 | },
46 | {
47 | name: "error - invalid format",
48 | str: "test_topic01",
49 |
50 | wantOffset: nil,
51 | wantErr: ErrInvalidOffsetFormat,
52 | },
53 | {
54 | name: "error - invalid partition",
55 | str: "test_topic/zero/1",
56 |
57 | wantOffset: nil,
58 | wantErr: ErrInvalidOffsetFormat,
59 | },
60 | {
61 | name: "error - invalid offset",
62 | str: "test_topic/0/one",
63 |
64 | wantOffset: nil,
65 | wantErr: ErrInvalidOffsetFormat,
66 | },
67 | }
68 |
69 | for _, tc := range tests {
70 | t.Run(tc.name, func(t *testing.T) {
71 | t.Parallel()
72 |
73 | parser := NewOffsetParser()
74 | offset, err := parser.FromString(tc.str)
75 | require.ErrorIs(t, err, tc.wantErr)
76 | require.Equal(t, offset, tc.wantOffset)
77 | })
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/pkg/kafka/log.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package kafka
4 |
5 | import (
6 | "fmt"
7 |
8 | "github.com/segmentio/kafka-go"
9 | loglib "github.com/xataio/pgstream/pkg/log"
10 | )
11 |
12 | func makeLogger(logFn func(msg string, fields ...loglib.Fields)) kafka.LoggerFunc {
13 | return func(msg string, args ...interface{}) {
14 | logFn(fmt.Sprintf(msg, args...), nil)
15 | }
16 | }
17 |
18 | func makeErrLogger(logFn func(err error, msg string, fields ...loglib.Fields)) kafka.LoggerFunc {
19 | return func(msg string, args ...interface{}) {
20 | logFn(nil, fmt.Sprintf(msg, args...), nil)
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/pkg/kafka/mocks/mock_kafka_parser.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import "github.com/xataio/pgstream/pkg/kafka"
6 |
7 | type OffsetParser struct {
8 | ToStringFn func(o *kafka.Offset) string
9 | FromStringFn func(string) (*kafka.Offset, error)
10 | }
11 |
12 | func (m *OffsetParser) ToString(o *kafka.Offset) string {
13 | return m.ToStringFn(o)
14 | }
15 |
16 | func (m *OffsetParser) FromString(s string) (*kafka.Offset, error) {
17 | return m.FromStringFn(s)
18 | }
19 |
--------------------------------------------------------------------------------
/pkg/kafka/mocks/mock_kafka_reader.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/kafka"
9 | )
10 |
11 | type Reader struct {
12 | FetchMessageFn func(ctx context.Context) (*kafka.Message, error)
13 | CommitOffsetsFn func(ctx context.Context, offsets ...*kafka.Offset) error
14 | CloseFn func() error
15 | }
16 |
17 | func (m *Reader) FetchMessage(ctx context.Context) (*kafka.Message, error) {
18 | return m.FetchMessageFn(ctx)
19 | }
20 |
21 | func (m *Reader) CommitOffsets(ctx context.Context, offsets ...*kafka.Offset) error {
22 | return m.CommitOffsetsFn(ctx, offsets...)
23 | }
24 |
25 | func (m *Reader) Close() error {
26 | return m.CloseFn()
27 | }
28 |
--------------------------------------------------------------------------------
/pkg/kafka/mocks/mock_kafka_writer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 | "sync/atomic"
8 |
9 | "github.com/xataio/pgstream/pkg/kafka"
10 | )
11 |
12 | type Writer struct {
13 | WriteMessagesFn func(context.Context, uint64, ...kafka.Message) error
14 | CloseFn func() error
15 | WriteCalls uint64
16 | }
17 |
18 | func (m *Writer) WriteMessages(ctx context.Context, msgs ...kafka.Message) error {
19 | atomic.AddUint64(&m.WriteCalls, 1)
20 | return m.WriteMessagesFn(ctx, m.GetWriteCalls(), msgs...)
21 | }
22 |
23 | func (m *Writer) Close() error {
24 | if m.CloseFn != nil {
25 | return m.CloseFn()
26 | }
27 | return nil
28 | }
29 |
30 | func (m *Writer) GetWriteCalls() uint64 {
31 | return atomic.LoadUint64(&m.WriteCalls)
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/log/logger.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package log
4 |
5 | type Logger interface {
6 | Trace(msg string, fields ...Fields)
7 | Debug(msg string, fields ...Fields)
8 | Info(msg string, fields ...Fields)
9 | Warn(err error, msg string, fields ...Fields)
10 | Error(err error, msg string, fields ...Fields)
11 | Panic(msg string, fields ...Fields)
12 | WithFields(fields Fields) Logger
13 | }
14 |
15 | type Fields map[string]any
16 |
17 | type NoopLogger struct{}
18 |
19 | func (l *NoopLogger) Trace(msg string, fields ...Fields) {}
20 | func (l *NoopLogger) Debug(msg string, fields ...Fields) {}
21 | func (l *NoopLogger) Info(msg string, fields ...Fields) {}
22 | func (l *NoopLogger) Warn(err error, msg string, fields ...Fields) {}
23 | func (l *NoopLogger) Error(err error, msg string, fields ...Fields) {}
24 | func (l *NoopLogger) Panic(msg string, fields ...Fields) {}
25 | func (l *NoopLogger) WithFields(fields Fields) Logger {
26 | return l
27 | }
28 |
29 | const ModuleField = "module"
30 |
31 | func NewNoopLogger() *NoopLogger {
32 | return &NoopLogger{}
33 | }
34 |
35 | // NewLogger will return the logger on input if not nil, or a noop logger
36 | // otherwise.
37 | func NewLogger(l Logger) Logger {
38 | if l == nil {
39 | return &NoopLogger{}
40 | }
41 | return l
42 | }
43 |
44 | func MergeFields(f1, f2 Fields) Fields {
45 | allFields := make(Fields, len(f1)+len(f2))
46 | fieldMaps := []Fields{f1, f2}
47 | for _, fmap := range fieldMaps {
48 | for k, v := range fmap {
49 | allFields[k] = v
50 | }
51 | }
52 | return allFields
53 | }
54 |
--------------------------------------------------------------------------------
/pkg/otel/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package otel
4 |
5 | import "time"
6 |
7 | type Config struct {
8 | Metrics *MetricsConfig
9 | Traces *TracesConfig
10 | }
11 |
12 | type MetricsConfig struct {
13 | Endpoint string
14 | CollectionInterval time.Duration
15 | }
16 |
17 | type TracesConfig struct {
18 | Endpoint string
19 | SampleRatio float64
20 | }
21 |
22 | const defaultCollectionInterval = 60 * time.Second
23 |
24 | func (c *MetricsConfig) collectionInterval() time.Duration {
25 | if c.CollectionInterval != 0 {
26 | return c.CollectionInterval
27 | }
28 | return defaultCollectionInterval
29 | }
30 |
--------------------------------------------------------------------------------
/pkg/otel/otel_instrumentation.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package otel
4 |
5 | import (
6 | "go.opentelemetry.io/otel/metric"
7 | "go.opentelemetry.io/otel/trace"
8 | )
9 |
10 | type InstrumentationProvider interface {
11 | NewInstrumentation(name string) *Instrumentation
12 | Close() error
13 | }
14 |
15 | type Instrumentation struct {
16 | Meter metric.Meter
17 | Tracer trace.Tracer
18 | }
19 |
20 | func (i *Instrumentation) IsEnabled() bool {
21 | return i != nil && (i.Meter != nil || i.Tracer != nil)
22 | }
23 |
24 | type noopProvider struct{}
25 |
26 | func (p *noopProvider) NewInstrumentation(name string) *Instrumentation {
27 | return nil
28 | }
29 |
30 | func (p *noopProvider) Close() error {
31 | return nil
32 | }
33 |
34 | func NewInstrumentationProvider(cfg *Config) (InstrumentationProvider, error) {
35 | // if neither metrics or traces are configured, instrumentation is not
36 | // enabled. Return a noop with disabled instrumentation
37 | if cfg.Metrics == nil && cfg.Traces == nil {
38 | return &noopProvider{}, nil
39 | }
40 | return NewProvider(cfg)
41 | }
42 |
--------------------------------------------------------------------------------
/pkg/otel/span.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package otel
4 |
5 | import (
6 | "context"
7 |
8 | "go.opentelemetry.io/otel/codes"
9 | "go.opentelemetry.io/otel/trace"
10 | )
11 |
12 | // StartSpan will start a span using the tracer on input. If the tracer is nil,
13 | // the context returned is the same as on input, and the span will be nil.
14 | func StartSpan(ctx context.Context, tracer trace.Tracer, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
15 | if tracer == nil {
16 | return ctx, nil
17 | }
18 | return tracer.Start(ctx, name, opts...)
19 | }
20 |
21 | // CloseSpan closes a span and records the given error if not nil. If the span
22 | // is nil, this is a noop.
23 | func CloseSpan(span trace.Span, err error) {
24 | if span == nil {
25 | return
26 | }
27 | recordSpanResult(span, err)
28 | span.End()
29 | }
30 |
31 | func recordSpanResult(span trace.Span, err error) {
32 | if err == nil {
33 | return
34 | }
35 |
36 | span.RecordError(err)
37 | span.SetStatus(codes.Error, "")
38 | }
39 |
--------------------------------------------------------------------------------
/pkg/otel/version.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package otel
4 |
5 | import (
6 | "runtime/debug"
7 | "sync"
8 | )
9 |
10 | // commitOnce returns either unknownCommitName or the git commit hash for the code that built the
11 | // binary. NOTE: `vcs.revision`, used below, is not available unless one does a `go build` and
12 | // specifically a `go build cmd/xerver` rather than `go build cmd/xerver/*.go`.
13 | var commitOnce = sync.OnceValue(func() string {
14 | const unknownCommitName = "unknown"
15 |
16 | info, ok := debug.ReadBuildInfo()
17 | if !ok {
18 | return unknownCommitName
19 | }
20 |
21 | for _, v := range info.Settings {
22 | if v.Key == "vcs.revision" {
23 | return v.Value
24 | }
25 | }
26 |
27 | return unknownCommitName
28 | })
29 |
30 | func version() string {
31 | return commitOnce()
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/schemalog/instrumentation/instrumented_store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package instrumentation
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/otel"
9 | "github.com/xataio/pgstream/pkg/schemalog"
10 | "go.opentelemetry.io/otel/attribute"
11 | "go.opentelemetry.io/otel/trace"
12 | )
13 |
14 | type Store struct {
15 | inner schemalog.Store
16 | tracer trace.Tracer
17 | }
18 |
19 | func NewStore(inner schemalog.Store, instrumentation *otel.Instrumentation) schemalog.Store {
20 | if instrumentation == nil {
21 | return inner
22 | }
23 |
24 | return &Store{
25 | inner: inner,
26 | tracer: instrumentation.Tracer,
27 | }
28 | }
29 |
30 | func (s *Store) Insert(ctx context.Context, schemaName string) (le *schemalog.LogEntry, err error) {
31 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.Insert", trace.WithAttributes(attribute.String("schema", schemaName)))
32 | defer otel.CloseSpan(span, err)
33 | return s.inner.Insert(ctx, schemaName)
34 | }
35 |
36 | func (s *Store) FetchLast(ctx context.Context, schemaName string, acked bool) (le *schemalog.LogEntry, err error) {
37 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.FetchLast", trace.WithAttributes(attribute.String("schema", schemaName)))
38 | defer otel.CloseSpan(span, err)
39 | return s.inner.FetchLast(ctx, schemaName, acked)
40 | }
41 |
42 | func (s *Store) Fetch(ctx context.Context, schemaName string, version int) (le *schemalog.LogEntry, err error) {
43 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.Fetch", trace.WithAttributes(attribute.String("schema", schemaName)))
44 | defer otel.CloseSpan(span, err)
45 | return s.inner.Fetch(ctx, schemaName, version)
46 | }
47 |
48 | func (s *Store) Ack(ctx context.Context, le *schemalog.LogEntry) (err error) {
49 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.Ack", trace.WithAttributes(attribute.String("schema", le.SchemaName)))
50 | defer otel.CloseSpan(span, err)
51 | return s.inner.Ack(ctx, le)
52 | }
53 |
54 | func (s *Store) Close() error {
55 | return s.inner.Close()
56 | }
57 |
--------------------------------------------------------------------------------
/pkg/schemalog/mocks/store_mock.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 | "sync/atomic"
8 |
9 | "github.com/xataio/pgstream/pkg/schemalog"
10 | )
11 |
12 | type Store struct {
13 | InsertFn func(ctx context.Context, schemaName string) (*schemalog.LogEntry, error)
14 | FetchLastFn func(ctx context.Context, schemaName string, ackedOnly bool) (*schemalog.LogEntry, error)
15 | FetchFn func(ctx context.Context, schemaName string, version int) (*schemalog.LogEntry, error)
16 | AckFn func(ctx context.Context, le *schemalog.LogEntry) error
17 | CloseFn func() error
18 | insertCalls uint64
19 | fetchCalls uint64
20 | ackCalls uint64
21 | }
22 |
23 | var _ schemalog.Store = (*Store)(nil)
24 |
25 | func (m *Store) Insert(ctx context.Context, schemaName string) (*schemalog.LogEntry, error) {
26 | atomic.AddUint64(&m.insertCalls, 1)
27 | return m.InsertFn(ctx, schemaName)
28 | }
29 |
30 | func (m *Store) FetchLast(ctx context.Context, schemaName string, ackedOnly bool) (*schemalog.LogEntry, error) {
31 | atomic.AddUint64(&m.fetchCalls, 1)
32 | return m.FetchLastFn(ctx, schemaName, ackedOnly)
33 | }
34 |
35 | func (m *Store) Fetch(ctx context.Context, schemaName string, version int) (*schemalog.LogEntry, error) {
36 | atomic.AddUint64(&m.fetchCalls, 1)
37 | return m.FetchFn(ctx, schemaName, version)
38 | }
39 |
40 | func (m *Store) Ack(ctx context.Context, le *schemalog.LogEntry) error {
41 | atomic.AddUint64(&m.ackCalls, 1)
42 | return m.AckFn(ctx, le)
43 | }
44 |
45 | func (m *Store) Close() error {
46 | if m.CloseFn != nil {
47 | return m.CloseFn()
48 | }
49 | return nil
50 | }
51 |
52 | func (m *Store) GetInsertCalls() uint64 {
53 | return atomic.LoadUint64(&m.insertCalls)
54 | }
55 |
56 | func (m *Store) GetFetchCalls() uint64 {
57 | return atomic.LoadUint64(&m.fetchCalls)
58 | }
59 |
60 | func (m *Store) GetAckCalls() uint64 {
61 | return atomic.LoadUint64(&m.ackCalls)
62 | }
63 |
--------------------------------------------------------------------------------
/pkg/schemalog/postgres/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "fmt"
7 |
8 | "github.com/rs/xid"
9 | "github.com/xataio/pgstream/pkg/schemalog"
10 | )
11 |
12 | type mockRow struct {
13 | logEntry *schemalog.LogEntry
14 | version *int
15 | scanFn func(args ...any) error
16 | }
17 |
18 | func (m *mockRow) Scan(args ...any) error {
19 | if m.scanFn != nil {
20 | return m.scanFn(args...)
21 | }
22 |
23 | if m.logEntry != nil {
24 | id, ok := args[0].(*xid.ID)
25 | if !ok {
26 | return fmt.Errorf("unexpected type for xid.ID in scan: %T", args[0])
27 | }
28 | *id = m.logEntry.ID
29 | }
30 |
31 | if m.version != nil {
32 | version, ok := args[0].(*int)
33 | if !ok {
34 | return fmt.Errorf("unexpected type for version in scan: %T", args[0])
35 | }
36 | *version = *m.version
37 | }
38 |
39 | return nil
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/schemalog/schema_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package schemalog
4 |
5 | import (
6 | "testing"
7 |
8 | "github.com/stretchr/testify/require"
9 | )
10 |
11 | func TestTable_GetFirstUniqueNotNullColumn(t *testing.T) {
12 | t.Parallel()
13 |
14 | tests := []struct {
15 | name string
16 | table *Table
17 |
18 | wantCol *Column
19 | }{
20 | {
21 | name: "no unique not null columns",
22 | table: &Table{
23 | Columns: []Column{
24 | {PgstreamID: "1", Name: "col-1", Unique: false, Nullable: true},
25 | {PgstreamID: "2", Name: "col-2", Unique: true, Nullable: true},
26 | {PgstreamID: "3", Name: "col-3", Unique: false, Nullable: false},
27 | },
28 | },
29 |
30 | wantCol: nil,
31 | },
32 | {
33 | name: "single unique not null column",
34 | table: &Table{
35 | Columns: []Column{
36 | {PgstreamID: "1", Name: "col-1", Unique: false, Nullable: true},
37 | {PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false},
38 | {PgstreamID: "3", Name: "col-3", Unique: false, Nullable: false},
39 | },
40 | },
41 |
42 | wantCol: &Column{PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false},
43 | },
44 | {
45 | name: "multiple unique not null columns",
46 | table: &Table{
47 | Columns: []Column{
48 | {PgstreamID: "1", Name: "col-1", Unique: false, Nullable: true},
49 | {PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false},
50 | {PgstreamID: "3", Name: "col-3", Unique: true, Nullable: false},
51 | },
52 | },
53 |
54 | wantCol: &Column{PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false},
55 | },
56 | }
57 |
58 | for _, tc := range tests {
59 | t.Run(tc.name, func(t *testing.T) {
60 | t.Parallel()
61 |
62 | col := tc.table.GetFirstUniqueNotNullColumn()
63 | require.Equal(t, tc.wantCol, col)
64 | })
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/pkg/schemalog/store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package schemalog
4 |
5 | import (
6 | "context"
7 | "errors"
8 | )
9 |
10 | type Store interface {
11 | Insert(ctx context.Context, schemaName string) (*LogEntry, error)
12 | FetchLast(ctx context.Context, schemaName string, ackedOnly bool) (*LogEntry, error)
13 | Fetch(ctx context.Context, schemaName string, version int) (*LogEntry, error)
14 | Ack(ctx context.Context, le *LogEntry) error
15 | Close() error
16 | }
17 |
18 | var ErrNoRows = errors.New("no rows")
19 |
20 | const (
21 | SchemaName = "pgstream"
22 | TableName = "schema_log"
23 | )
24 |
--------------------------------------------------------------------------------
/pkg/schemalog/store_cache.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package schemalog
4 |
5 | import (
6 | "context"
7 | "fmt"
8 | "sync"
9 | )
10 |
11 | // StoreCache is a wrapper around a schemalog Store that provides an in memory
12 | // caching mechanism to reduce the amount of calls to the database. It is not
13 | // concurrency safe.
14 | type StoreCache struct {
15 | store Store
16 | mutex *sync.RWMutex
17 | cache map[string]*LogEntry
18 | }
19 |
20 | func NewStoreCache(store Store) *StoreCache {
21 | return &StoreCache{
22 | store: store,
23 | cache: make(map[string]*LogEntry),
24 | mutex: &sync.RWMutex{},
25 | }
26 | }
27 |
28 | func (s *StoreCache) Insert(ctx context.Context, schemaName string) (*LogEntry, error) {
29 | return s.store.Insert(ctx, schemaName)
30 | }
31 |
32 | func (s *StoreCache) FetchLast(ctx context.Context, schemaName string, ackedOnly bool) (*LogEntry, error) {
33 | logEntry := s.getCachedLogEntry(schemaName)
34 | if logEntry == nil {
35 | var err error
36 | logEntry, err = s.store.FetchLast(ctx, schemaName, ackedOnly)
37 | if err != nil {
38 | return nil, fmt.Errorf("store cache fetch last schema log: %w", err)
39 | }
40 | s.updateCachedLogEntry(schemaName, logEntry)
41 | }
42 |
43 | return logEntry, nil
44 | }
45 |
46 | func (s *StoreCache) Fetch(ctx context.Context, schemaName string, version int) (*LogEntry, error) {
47 | return s.store.Fetch(ctx, schemaName, version)
48 | }
49 |
50 | func (s *StoreCache) Ack(ctx context.Context, entry *LogEntry) error {
51 | s.updateCachedLogEntry(entry.SchemaName, entry)
52 | if err := s.store.Ack(ctx, entry); err != nil {
53 | return fmt.Errorf("store cache ack: %w", err)
54 | }
55 | return nil
56 | }
57 |
58 | func (s *StoreCache) Close() error {
59 | return s.store.Close()
60 | }
61 |
62 | func (s *StoreCache) getCachedLogEntry(schema string) *LogEntry {
63 | s.mutex.RLock()
64 | defer s.mutex.RUnlock()
65 | return s.cache[schema]
66 | }
67 |
68 | func (s *StoreCache) updateCachedLogEntry(schema string, logEntry *LogEntry) {
69 | s.mutex.Lock()
70 | defer s.mutex.Unlock()
71 | s.cache[schema] = logEntry
72 | }
73 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package generator
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/snapshot"
9 | )
10 |
11 | type mockGenerator struct {
12 | createSnapshotFn func(ctx context.Context, snapshot *snapshot.Snapshot) error
13 | closeFn func() error
14 | }
15 |
16 | func (m *mockGenerator) CreateSnapshot(ctx context.Context, snapshot *snapshot.Snapshot) error {
17 | return m.createSnapshotFn(ctx, snapshot)
18 | }
19 |
20 | func (m *mockGenerator) Close() error {
21 | if m.closeFn != nil {
22 | return m.closeFn()
23 | }
24 | return nil
25 | }
26 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/mocks/mock_snapshot_generator.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/snapshot"
9 | )
10 |
11 | type Generator struct {
12 | CreateSnapshotFn func(ctx context.Context, snapshot *snapshot.Snapshot) error
13 | CloseFn func() error
14 | createSnapshotCalls uint
15 | }
16 |
17 | func (m *Generator) CreateSnapshot(ctx context.Context, snapshot *snapshot.Snapshot) error {
18 | m.createSnapshotCalls++
19 | return m.CreateSnapshotFn(ctx, snapshot)
20 | }
21 |
22 | func (m *Generator) Close() error {
23 | if m.CloseFn != nil {
24 | return m.CloseFn()
25 | }
26 | return nil
27 | }
28 |
29 | func (m *Generator) CreateSnapshotCalls() uint {
30 | return m.createSnapshotCalls
31 | }
32 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/postgres/data/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | type Config struct {
6 | // Postgres connection URL. Required.
7 | URL string
8 | // BatchPageSize represents the size of the table page range that will be
9 | // processed concurrently by the table workers. Defaults to 1000.
10 | BatchPageSize uint
11 | // SchemaWorkers represents the number of tables the snapshot generator will
12 | // process concurrently per schema. Defaults to 4.
13 | SchemaWorkers uint
14 | // TableWorkers represents the number of concurrent workers per table. Each
15 | // worker will process a different page range in parallel. Defaults to 4.
16 | TableWorkers uint
17 | }
18 |
19 | const (
20 | defaultBatchPageSize = 1000
21 | defaultTableWorkers = 4
22 | defaultSchemaWorkers = 4
23 | )
24 |
25 | func (c *Config) batchPageSize() uint {
26 | if c.BatchPageSize > 0 {
27 | return c.BatchPageSize
28 | }
29 | return defaultBatchPageSize
30 | }
31 |
32 | func (c *Config) schemaWorkers() uint {
33 | if c.SchemaWorkers > 0 {
34 | return c.SchemaWorkers
35 | }
36 | return defaultSchemaWorkers
37 | }
38 |
39 | func (c *Config) tableWorkers() uint {
40 | if c.TableWorkers > 0 {
41 | return c.TableWorkers
42 | }
43 | return defaultTableWorkers
44 | }
45 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/postgres/data/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 | "sync"
8 | "testing"
9 |
10 | "github.com/stretchr/testify/require"
11 |
12 | pglib "github.com/xataio/pgstream/internal/postgres"
13 | "github.com/xataio/pgstream/pkg/snapshot"
14 | )
15 |
16 | type mockRowProcessor struct {
17 | rowChan chan *snapshot.Row
18 | once sync.Once
19 | }
20 |
21 | func (mp *mockRowProcessor) process(ctx context.Context, row *snapshot.Row) error {
22 | mp.rowChan <- row
23 | return nil
24 | }
25 |
26 | func (mp *mockRowProcessor) close() {
27 | mp.once.Do(func() { close(mp.rowChan) })
28 | }
29 |
30 | func execQuery(t *testing.T, ctx context.Context, pgurl, query string) {
31 | conn, err := pglib.NewConn(ctx, pgurl)
32 | require.NoError(t, err)
33 | defer conn.Close(ctx)
34 |
35 | _, err = conn.Exec(ctx, query)
36 | require.NoError(t, err)
37 | }
38 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/postgres/data/instrumented_table_snapshot_generator.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/otel"
9 | "go.opentelemetry.io/otel/attribute"
10 | "go.opentelemetry.io/otel/trace"
11 | )
12 |
13 | type instrumentedTableSnapshotGenerator struct {
14 | tracer trace.Tracer
15 | snapshotTableFn snapshotTableFn
16 | }
17 |
18 | func newInstrumentedTableSnapshotGenerator(fn snapshotTableFn, i *otel.Instrumentation) *instrumentedTableSnapshotGenerator {
19 | return &instrumentedTableSnapshotGenerator{
20 | tracer: i.Tracer,
21 | snapshotTableFn: fn,
22 | }
23 | }
24 |
25 | func (i *instrumentedTableSnapshotGenerator) snapshotTable(ctx context.Context, snapshotID string, schema, table string) (err error) {
26 | ctx, span := otel.StartSpan(ctx, i.tracer, "tableSnapshotGenerator.SnapshotTable", trace.WithAttributes([]attribute.KeyValue{
27 | {Key: "schema", Value: attribute.StringValue(schema)},
28 | {Key: "table", Value: attribute.StringValue(table)},
29 | }...))
30 | defer otel.CloseSpan(span, err)
31 | return i.snapshotTableFn(ctx, snapshotID, schema, table)
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/postgres/schema/pgdumprestore/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package pgdumprestore
4 |
5 | import (
6 | "context"
7 |
8 | pglib "github.com/xataio/pgstream/internal/postgres"
9 | )
10 |
11 | type mockPgDump struct {
12 | dumpFn func(context.Context, uint, pglib.PGDumpOptions) ([]byte, error)
13 | dumpCalls uint
14 | }
15 |
16 | func newMockPgdump(dumpFn func(context.Context, uint, pglib.PGDumpOptions) ([]byte, error)) pglib.PGDumpFn {
17 | m := &mockPgDump{
18 | dumpFn: dumpFn,
19 | }
20 | return m.dump
21 | }
22 |
23 | func (m *mockPgDump) dump(ctx context.Context, po pglib.PGDumpOptions) ([]byte, error) {
24 | m.dumpCalls++
25 | return m.dumpFn(ctx, m.dumpCalls, po)
26 | }
27 |
28 | type mockPgRestore struct {
29 | restoreFn func(context.Context, uint, pglib.PGRestoreOptions, []byte) (string, error)
30 | restoreCalls uint
31 | }
32 |
33 | func newMockPgrestore(restoreFn func(context.Context, uint, pglib.PGRestoreOptions, []byte) (string, error)) pglib.PGRestoreFn {
34 | m := &mockPgRestore{
35 | restoreFn: restoreFn,
36 | }
37 | return m.restore
38 | }
39 |
40 | func (m *mockPgRestore) restore(ctx context.Context, po pglib.PGRestoreOptions, dump []byte) (string, error) {
41 | m.restoreCalls++
42 | return m.restoreFn(ctx, m.restoreCalls, po, dump)
43 | }
44 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/postgres/schema/pgdumprestore/test/test_dump_constraints.sql:
--------------------------------------------------------------------------------
1 | \connect test
2 |
3 | ALTER TABLE ONLY musicbrainz.alternative_medium
4 | ADD CONSTRAINT alternative_medium_pkey PRIMARY KEY (id);
5 |
6 | ALTER TABLE ONLY musicbrainz.alternative_medium_track
7 | ADD CONSTRAINT alternative_medium_track_pkey PRIMARY KEY (alternative_medium, track);
8 |
9 | CREATE INDEX area_alias_idx_txt ON musicbrainz.area_alias USING gin (musicbrainz.mb_simple_tsvector((name)::text));
10 |
11 | CREATE UNIQUE INDEX area_alias_type_idx_gid ON musicbrainz.area_alias_type USING btree (gid);
12 |
13 | CREATE TRIGGER a_del_alternative_medium_track AFTER DELETE ON musicbrainz.alternative_medium_track FOR EACH ROW EXECUTE FUNCTION musicbrainz.a_del_alternative_medium_track();
14 |
15 | CREATE TRIGGER a_del_alternative_release AFTER DELETE ON musicbrainz.alternative_release FOR EACH ROW EXECUTE FUNCTION musicbrainz.a_del_alternative_release_or_track();
16 |
17 | CREATE CONSTRAINT TRIGGER apply_artist_release_group_pending_updates AFTER INSERT OR DELETE OR UPDATE ON musicbrainz.release DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION musicbrainz.apply_artist_release_group_pending_updates();
18 |
19 | CREATE CONSTRAINT TRIGGER apply_artist_release_group_pending_updates AFTER INSERT OR DELETE OR UPDATE ON musicbrainz.release_group DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION musicbrainz.apply_artist_release_group_pending_updates();
20 |
21 | ALTER TABLE ONLY musicbrainz.alternative_medium
22 | ADD CONSTRAINT alternative_medium_fk_alternative_release FOREIGN KEY (alternative_release) REFERENCES musicbrainz.alternative_release(id);
23 |
24 | ALTER TABLE ONLY musicbrainz.alternative_medium
25 | ADD CONSTRAINT alternative_medium_fk_medium FOREIGN KEY (medium) REFERENCES musicbrainz.medium(id);
26 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/postgres/tablefinder/instrumented_table_discovery.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package tablefinder
4 |
5 | import (
6 | "context"
7 |
8 | pglib "github.com/xataio/pgstream/internal/postgres"
9 | "github.com/xataio/pgstream/pkg/otel"
10 | "go.opentelemetry.io/otel/attribute"
11 | "go.opentelemetry.io/otel/trace"
12 | )
13 |
14 | type tableDiscoverer struct {
15 | fn tableDiscoveryFn
16 | tracer trace.Tracer
17 | }
18 |
19 | func newInstrumentedTableDiscoveryFn(fn tableDiscoveryFn, i *otel.Instrumentation) tableDiscoveryFn {
20 | td := tableDiscoverer{
21 | fn: fn,
22 | tracer: i.Tracer,
23 | }
24 | return td.discoverTables
25 | }
26 |
27 | func (i *tableDiscoverer) discoverTables(ctx context.Context, conn pglib.Querier, schema string) (tables []string, err error) {
28 | ctx, span := otel.StartSpan(ctx, i.tracer, "tableFinder.discoverTables", trace.WithAttributes(attribute.KeyValue{
29 | Key: "schema", Value: attribute.StringValue(schema),
30 | }))
31 | defer otel.CloseSpan(span, err)
32 |
33 | return i.fn(ctx, conn, schema)
34 | }
35 |
--------------------------------------------------------------------------------
/pkg/snapshot/generator/snapshot_generator.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package generator
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/snapshot"
9 | )
10 |
11 | type SnapshotGenerator interface {
12 | CreateSnapshot(ctx context.Context, snapshot *snapshot.Snapshot) error
13 | Close() error
14 | }
15 |
--------------------------------------------------------------------------------
/pkg/snapshot/snapshot.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package snapshot
4 |
5 | import (
6 | "context"
7 | )
8 |
9 | type Snapshot struct {
10 | SchemaName string
11 | TableNames []string
12 | }
13 |
14 | type Request struct {
15 | Snapshot Snapshot
16 | Status Status
17 | Errors *Errors
18 | }
19 |
20 | type Row struct {
21 | Schema string
22 | Table string
23 | Columns []Column
24 | }
25 |
26 | type Column struct {
27 | Name string
28 | Type string
29 | Value any
30 | }
31 |
32 | type RowProcessor func(context.Context, *Row) error
33 |
34 | type Status string
35 |
36 | const (
37 | StatusRequested = Status("requested")
38 | StatusInProgress = Status("in progress")
39 | StatusCompleted = Status("completed")
40 | )
41 |
42 | func (s *Snapshot) IsValid() bool {
43 | return s != nil && s.SchemaName != "" && len(s.TableNames) > 0
44 | }
45 |
46 | func (r *Request) MarkCompleted(err error) {
47 | r.Status = StatusCompleted
48 | r.Errors = NewErrors(err)
49 | }
50 |
51 | func (r *Request) MarkInProgress() {
52 | r.Status = StatusInProgress
53 | }
54 |
55 | func (r *Request) IsPending() bool {
56 | return r.Status == StatusRequested
57 | }
58 |
59 | func (r *Request) HasFailed() bool {
60 | return r.Status == StatusCompleted && r.Errors != nil
61 | }
62 |
63 | func (r *Request) HasFailedForTable(table string) bool {
64 | return r.Status == StatusCompleted && r.Errors.IsTableError(table)
65 | }
66 |
--------------------------------------------------------------------------------
/pkg/snapshot/store/mocks/mock_snapshot_store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/snapshot"
9 | )
10 |
11 | type Store struct {
12 | CreateSnapshotRequestFn func(context.Context, *snapshot.Request) error
13 | UpdateSnapshotRequestFn func(context.Context, uint, *snapshot.Request) error
14 | GetSnapshotRequestsByStatusFn func(ctx context.Context, status snapshot.Status) ([]*snapshot.Request, error)
15 | GetSnapshotRequestsBySchemaFn func(ctx context.Context, s string) ([]*snapshot.Request, error)
16 | updateSnapshotRequestCalls uint
17 | }
18 |
19 | func (m *Store) CreateSnapshotRequest(ctx context.Context, s *snapshot.Request) error {
20 | return m.CreateSnapshotRequestFn(ctx, s)
21 | }
22 |
23 | func (m *Store) UpdateSnapshotRequest(ctx context.Context, s *snapshot.Request) error {
24 | m.updateSnapshotRequestCalls++
25 | return m.UpdateSnapshotRequestFn(ctx, m.updateSnapshotRequestCalls, s)
26 | }
27 |
28 | func (m *Store) GetSnapshotRequestsByStatus(ctx context.Context, status snapshot.Status) ([]*snapshot.Request, error) {
29 | return m.GetSnapshotRequestsByStatusFn(ctx, status)
30 | }
31 |
32 | func (m *Store) GetSnapshotRequestsBySchema(ctx context.Context, s string) ([]*snapshot.Request, error) {
33 | return m.GetSnapshotRequestsBySchemaFn(ctx, s)
34 | }
35 |
36 | func (m *Store) Close() error {
37 | return nil
38 | }
39 |
--------------------------------------------------------------------------------
/pkg/snapshot/store/snapshot_store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package store
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/snapshot"
9 | )
10 |
11 | type Store interface {
12 | CreateSnapshotRequest(context.Context, *snapshot.Request) error
13 | UpdateSnapshotRequest(context.Context, *snapshot.Request) error
14 | GetSnapshotRequestsByStatus(ctx context.Context, status snapshot.Status) ([]*snapshot.Request, error)
15 | GetSnapshotRequestsBySchema(ctx context.Context, schema string) ([]*snapshot.Request, error)
16 | Close() error
17 | }
18 |
19 | const (
20 | SchemaName = "pgstream"
21 | TableName = "snapshot_requests"
22 | )
23 |
--------------------------------------------------------------------------------
/pkg/stream/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package stream
4 |
5 | type mockMigrator struct {
6 | versionFn func() (uint, bool, error)
7 | closeFn func() (error, error)
8 | }
9 |
10 | func (m *mockMigrator) Version() (uint, bool, error) {
11 | if m.versionFn != nil {
12 | return m.versionFn()
13 | }
14 | return 0, false, nil
15 | }
16 |
17 | func (m *mockMigrator) Close() (error, error) {
18 | if m.closeFn != nil {
19 | return m.closeFn()
20 | }
21 | return nil, nil
22 | }
23 |
--------------------------------------------------------------------------------
/pkg/stream/integration/config/postgresql.conf:
--------------------------------------------------------------------------------
1 | # CONNECTION
2 | listen_addresses = '*'
3 |
4 | # MODULES
5 | shared_preload_libraries = 'wal2json'
6 |
7 | # REPLICATION
8 | wal_level = logical # minimal, archive, hot_standby, or logical (change requires restart)
9 | max_wal_senders = 4 # max number of walsender processes (change requires restart)
10 | #wal_keep_segments = 4 # in logfile segments, 16MB each; 0 disables
11 | #wal_sender_timeout = 60s # in milliseconds; 0 disables
12 | max_replication_slots = 4 # max number of replication slots (change requires restart)
13 |
--------------------------------------------------------------------------------
/pkg/stream/integration/setup_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package integration
4 |
5 | import (
6 | "context"
7 | "log"
8 | "os"
9 | "testing"
10 |
11 | "github.com/xataio/pgstream/internal/testcontainers"
12 | "github.com/xataio/pgstream/pkg/stream"
13 | )
14 |
15 | func TestMain(m *testing.M) {
16 | // if integration tests are not enabled, nothing to setup
17 | if os.Getenv("PGSTREAM_INTEGRATION_TESTS") != "" {
18 | ctx := context.Background()
19 | pgcleanup, err := testcontainers.SetupPostgresContainer(ctx, &pgurl, testcontainers.Postgres14, "config/postgresql.conf")
20 | if err != nil {
21 | log.Fatal(err)
22 | }
23 | defer pgcleanup()
24 |
25 | if err := stream.Init(ctx, pgurl, ""); err != nil {
26 | log.Fatal(err)
27 | }
28 |
29 | kafkacleanup, err := testcontainers.SetupKafkaContainer(ctx, &kafkaBrokers)
30 | if err != nil {
31 | log.Fatal(err)
32 | }
33 | defer kafkacleanup()
34 |
35 | oscleanup, err := testcontainers.SetupOpenSearchContainer(ctx, &opensearchURL)
36 | if err != nil {
37 | log.Fatal(err)
38 | }
39 | defer oscleanup()
40 |
41 | escleanup, err := testcontainers.SetupElasticsearchContainer(ctx, &elasticsearchURL)
42 | if err != nil {
43 | log.Fatal(err)
44 | }
45 | defer escleanup()
46 |
47 | targetPGCleanup, err := testcontainers.SetupPostgresContainer(ctx, &targetPGURL, testcontainers.Postgres17)
48 | if err != nil {
49 | log.Fatal(err)
50 | }
51 | defer targetPGCleanup()
52 | }
53 |
54 | os.Exit(m.Run())
55 | }
56 |
--------------------------------------------------------------------------------
/pkg/stream/stream_snapshot.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package stream
4 |
5 | import (
6 | "context"
7 | "errors"
8 | "fmt"
9 |
10 | loglib "github.com/xataio/pgstream/pkg/log"
11 | "github.com/xataio/pgstream/pkg/otel"
12 | snapshotlistener "github.com/xataio/pgstream/pkg/wal/listener/snapshot"
13 | snapshotbuilder "github.com/xataio/pgstream/pkg/wal/listener/snapshot/builder"
14 | "golang.org/x/sync/errgroup"
15 | )
16 |
17 | func Snapshot(ctx context.Context, logger loglib.Logger, config *Config, instrumentation *otel.Instrumentation) error {
18 | if config.Listener.Snapshot == nil {
19 | return errors.New("source snapshot not configured")
20 | }
21 |
22 | if err := config.IsValid(); err != nil {
23 | return fmt.Errorf("incompatible configuration: %w", err)
24 | }
25 |
26 | eg, ctx := errgroup.WithContext(ctx)
27 |
28 | // Processor
29 |
30 | processor, err := buildProcessor(ctx, logger, &config.Processor, nil, instrumentation)
31 | if err != nil {
32 | return err
33 | }
34 | defer processor.Close()
35 |
36 | var closer closerFn
37 | processor, closer, err = addProcessorModifiers(ctx, config, logger, processor, instrumentation)
38 | if err != nil {
39 | return err
40 | }
41 | defer closer()
42 |
43 | // Listener
44 |
45 | snapshotGenerator, err := snapshotbuilder.NewSnapshotGenerator(
46 | ctx,
47 | config.Listener.Snapshot,
48 | processor.ProcessWALEvent,
49 | logger,
50 | instrumentation)
51 | if err != nil {
52 | return err
53 | }
54 | listener := snapshotlistener.New(snapshotGenerator)
55 | defer listener.Close()
56 |
57 | eg.Go(func() error {
58 | defer logger.Info("stopping postgres snapshot listener...")
59 | logger.Info("running postgres snapshot listener...")
60 | return listener.Listen(ctx)
61 | })
62 |
63 | if err := eg.Wait(); err != nil {
64 | if !errors.Is(err, context.Canceled) {
65 | return err
66 | }
67 | }
68 |
69 | return nil
70 | }
71 |
--------------------------------------------------------------------------------
/pkg/tls/test/test.csr:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE REQUEST-----
2 | MIIEjTCCAnUCAQAwSDELMAkGA1UEBhMCU1AxEzARBgNVBAgMClNvbWUtU3RhdGUx
3 | FTATBgNVBAoMDFBnc3RyZWFtIEx0ZDENMAsGA1UECwwEWGF0YTCCAiIwDQYJKoZI
4 | hvcNAQEBBQADggIPADCCAgoCggIBAMZG8/obpyvJ+WkGkdO/hbExSN1nWR206/Dh
5 | pSYzcZyI1Jj0R4Af0gD/EFVM+4KTDr20nOofmfBWOYHV+KwiKtWQQ+oT0+xVcTT6
6 | IC5I5K9+AXERuTu/NbnjkxuC/1u7K511RrUK0lxUra2/B8mGTc9nu2g415GVk2hU
7 | rJjWEX09hVH7xBSmnzYN+IfepsftxgnR5m2YzqOSnsphBBfyyOsL+3Jo5Uv4yY22
8 | bnJCDxx/TPG37EcGMb4Q/aCWk5mXm4Io5mBfcl7SNxy867JpBO2CCP6fFaWRUGmF
9 | /O+YBD5z0cSb1wZrMBRgezggpa2gacYVtsWrQYzAxMtCf3MwM89z8i+W9ME3cMhg
10 | T7b6T0XCQ5gkqmLxDCsg9ocNV0W0wb5EEPlt4/TeqMmLbpLkIa6rnR2C6gg+wFSQ
11 | Vk8c/aBtm9BjKFLWWUwDUPPzp1RdIVAfuobDK37dVqr+1m6oTDl+BoDuM5PsgTS4
12 | bFW2ZJVZB+d4IFmGhqTtKKmYa7QbwhzF8i5ShV+419KOt6hRkJ/jREdea7ZS/uyK
13 | wBAvPHZVkUk8tfTpcuKTFVKsgXXV8uwwnI2W6safYbTXaN+7gfA88wntaigjeij/
14 | LrO+itAiv9GTGkpMlXyuAX/d5+4j0EHcV64NYL61GvMcbJ5G0SeQrXuBlgMTYaNr
15 | O7miEujnAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEAGYFGSgMARWuH5VXK0/Fl
16 | nNg+/rzdff1NYkY9QrvFxCQeVJ9rD1ml7VLZXDtXhMNEGJyYbkouc1Ehx0BsihT6
17 | YztcnQ6TzWAzvr3Ns9X3riADzXxdDHV5xs+8VPV8RvT3XNcrlw2NQmzJ4Juc8PkT
18 | 4ZfguZBywAmFTw1oX8JqlQSp5pYtP7popsvGPS6ieUm0Kmv8kK3sRDs+JSc7iXtB
19 | /HymqeSylFNHgFZsdbYmu32v2qbcqimAitB/v5tGNhuiMXx6vEeQnB69V+AV70Rl
20 | 9dnvAo7ihTRMzecUVsoDFtc8OWSPdTm6t8vDI2JqmeDN25Xhyf89cKwoY97NhA99
21 | ds5WHs6TzsHohPZAsaxtZnjwxMEne7Y4FLFmTHVk5o0POTZcOC/sMB1iBDsd/YJe
22 | AYsoiqLYtu6x6Avfe6LXWYWYa/R4/UXh8H6WiChsFXzOIilp3apjaeHM3z7iKx2S
23 | VtGyVTrrcbzRiF0ShKVnbDXnvcoNZxPiXfh6Zz4SkBbV01T3hluBwzp4mjcWpiv3
24 | AOAWChMnbmkg/T+OME6e1JVHDR5tAC/7vF2QkZYpiH2RVnZmCTDWBcRGpMkhkRgF
25 | eycowzKBkgIOcJ99p0sGEqQ3W0J1M4bzuumncLID08EG/dEp1eIdunahcHHyhnnv
26 | BcGFr2/OxuaVmxcy5/QQjAg=
27 | -----END CERTIFICATE REQUEST-----
28 |
--------------------------------------------------------------------------------
/pkg/tls/test/test.pem:
--------------------------------------------------------------------------------
1 | -----BEGIN CERTIFICATE-----
2 | MIIFPzCCAyegAwIBAgIULV6zejFwt/Tri8WKZFBxj15uvaIwDQYJKoZIhvcNAQEL
3 | BQAwSDELMAkGA1UEBhMCU1AxEzARBgNVBAgMClNvbWUtU3RhdGUxFTATBgNVBAoM
4 | DFBnc3RyZWFtIEx0ZDENMAsGA1UECwwEWGF0YTAeFw0yNDA4MDUxNTE3NDBaFw0y
5 | NTA4MDUxNTE3NDBaMEgxCzAJBgNVBAYTAlNQMRMwEQYDVQQIDApTb21lLVN0YXRl
6 | MRUwEwYDVQQKDAxQZ3N0cmVhbSBMdGQxDTALBgNVBAsMBFhhdGEwggIiMA0GCSqG
7 | SIb3DQEBAQUAA4ICDwAwggIKAoICAQDGRvP6G6cryflpBpHTv4WxMUjdZ1kdtOvw
8 | 4aUmM3GciNSY9EeAH9IA/xBVTPuCkw69tJzqH5nwVjmB1fisIirVkEPqE9PsVXE0
9 | +iAuSOSvfgFxEbk7vzW545Mbgv9buyuddUa1CtJcVK2tvwfJhk3PZ7toONeRlZNo
10 | VKyY1hF9PYVR+8QUpp82DfiH3qbH7cYJ0eZtmM6jkp7KYQQX8sjrC/tyaOVL+MmN
11 | tm5yQg8cf0zxt+xHBjG+EP2glpOZl5uCKOZgX3Je0jccvOuyaQTtggj+nxWlkVBp
12 | hfzvmAQ+c9HEm9cGazAUYHs4IKWtoGnGFbbFq0GMwMTLQn9zMDPPc/IvlvTBN3DI
13 | YE+2+k9FwkOYJKpi8QwrIPaHDVdFtMG+RBD5beP03qjJi26S5CGuq50dguoIPsBU
14 | kFZPHP2gbZvQYyhS1llMA1Dz86dUXSFQH7qGwyt+3Vaq/tZuqEw5fgaA7jOT7IE0
15 | uGxVtmSVWQfneCBZhoak7SipmGu0G8IcxfIuUoVfuNfSjreoUZCf40RHXmu2Uv7s
16 | isAQLzx2VZFJPLX06XLikxVSrIF11fLsMJyNlurGn2G012jfu4HwPPMJ7WooI3oo
17 | /y6zvorQIr/RkxpKTJV8rgF/3efuI9BB3FeuDWC+tRrzHGyeRtEnkK17gZYDE2Gj
18 | azu5ohLo5wIDAQABoyEwHzAdBgNVHQ4EFgQUcj2UaSuSsgkex5hfS0eDAkPdbJ0w
19 | DQYJKoZIhvcNAQELBQADggIBAJK6fMa4L7iIQSlPzG3pHTSSLQd9Unev2naX9/S1
20 | Yo55Tj9VCBhViGa7CbDtaW7ZYr/fXydZVcthXYZzZ7QEVyYaguWlzXLjy/qF8kgk
21 | cDwinFa8hiJnP+BJUGnzq3LYQJ2labI4YUscc6p4inh9y8JZ3n33VqX2YjqCdHMA
22 | j8nw5xpThdQ/a8z3Z8ugFCLO09Hts1eKFhs5PwaQvjkoX+dSE2FeX51OMlLOPDsu
23 | C6ScDU7FG0J5JE36nRqp2XwSdGAfc5pHKmsuomxnoE/d/hL7O6zouo/jvQyCNFtn
24 | 5/pzhkhhOjUTP2gIW5ueNn8oQF9F32GWRNJGQVTBiK17dWvHxiSvIzgKqUyrD8lI
25 | VefVEQgRbfHD3nSk6G30gAeWzt8T10lI8MtQWTtoFJGFBaSVr/lSyHo4QS4SyTmK
26 | uvnFGJVivRtaAP4d+u/6/1Mvy5sNsSiWRRKfKTB/FEerbe7blhnqJhrBp98nKBv/
27 | IJtmewD7lVGGDY8sWnxnpNyqLVhvRilO9d+4oQWqKgN8m1PXAI2jDYA0RZ+Qs/ko
28 | 5FB88mRi8hNOhmADXguKlnCid/X0StK6wpphvFIaNnGLFzjeZXc65BoV1c2+Boxe
29 | cNwpkrW5tKaf/Ox1ntHnnQBpUhM4AxGoczfIj0dEnYio54gagsAMK/Pjq2KiX7Bi
30 | RUm4
31 | -----END CERTIFICATE-----
32 |
--------------------------------------------------------------------------------
/pkg/transformers/builder/transformer_builder_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package builder
4 |
5 | import (
6 | "testing"
7 |
8 | "github.com/stretchr/testify/require"
9 | "github.com/xataio/pgstream/pkg/transformers"
10 | )
11 |
12 | func TestTransformerBuilder_New(t *testing.T) {
13 | tests := []struct {
14 | name string
15 | config *transformers.Config
16 | wantErr error
17 | }{
18 | {
19 | name: "valid greenmask string transformer",
20 | config: &transformers.Config{
21 | Name: transformers.GreenmaskString,
22 | Parameters: map[string]any{"max_length": 10},
23 | },
24 | wantErr: nil,
25 | },
26 | {
27 | name: "invalid parameter for phone number transformer",
28 | config: &transformers.Config{
29 | Name: transformers.String,
30 | Parameters: map[string]any{"invalid": "param"},
31 | },
32 | wantErr: transformers.ErrUnknownParameter,
33 | },
34 | {
35 | name: "unsupported transformer",
36 | config: &transformers.Config{
37 | Name: "unsupported",
38 | Parameters: map[string]any{},
39 | },
40 | wantErr: transformers.ErrUnsupportedTransformer,
41 | },
42 | }
43 |
44 | for _, tt := range tests {
45 | t.Run(tt.name, func(t *testing.T) {
46 | t.Parallel()
47 | tb := NewTransformerBuilder()
48 | _, err := tb.New(tt.config)
49 | require.ErrorIs(t, err, tt.wantErr)
50 | })
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/pkg/transformers/generators/deterministic_bytes_generator.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package generators
4 |
5 | import (
6 | greenmaskgenerators "github.com/eminano/greenmask/pkg/generators"
7 | )
8 |
9 | func NewDeterministicBytesGenerator(size int) (Generator, error) {
10 | return greenmaskgenerators.GetHashBytesGen([]byte{}, size)
11 | }
12 |
--------------------------------------------------------------------------------
/pkg/transformers/generators/generator.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package generators
4 |
5 | type Generator interface {
6 | Generate([]byte) ([]byte, error)
7 | Size() int
8 | }
9 |
--------------------------------------------------------------------------------
/pkg/transformers/generators/random_bytes_generator.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package generators
4 |
5 | import (
6 | "time"
7 |
8 | greenmaskgenerators "github.com/eminano/greenmask/pkg/generators"
9 | )
10 |
11 | func NewRandomBytesGenerator(size int) Generator {
12 | return greenmaskgenerators.NewRandomBytes(time.Now().UnixNano(), size)
13 | }
14 |
--------------------------------------------------------------------------------
/pkg/transformers/greenmask/greenmask_boolean_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package greenmask
4 |
5 | import (
6 | "context"
7 |
8 | greenmasktransformers "github.com/eminano/greenmask/pkg/generators/transformers"
9 | "github.com/xataio/pgstream/pkg/transformers"
10 | )
11 |
12 | type BooleanTransformer struct {
13 | transformer *greenmasktransformers.RandomBoolean
14 | }
15 |
16 | var (
17 | booleanParams = []transformers.Parameter{
18 | {
19 | Name: "generator",
20 | SupportedType: "string",
21 | Default: "random",
22 | Dynamic: false,
23 | Required: false,
24 | Values: []any{"random", "deterministic"},
25 | },
26 | }
27 | booleanCompatibleTypes = []transformers.SupportedDataType{
28 | transformers.BooleanDataType,
29 | transformers.ByteArrayDataType,
30 | }
31 | )
32 |
33 | func NewBooleanTransformer(params transformers.ParameterValues) (*BooleanTransformer, error) {
34 | t := greenmasktransformers.NewRandomBoolean()
35 | if err := setGenerator(t, params); err != nil {
36 | return nil, err
37 | }
38 | return &BooleanTransformer{
39 | transformer: t,
40 | }, nil
41 | }
42 |
43 | func (bt *BooleanTransformer) Transform(_ context.Context, value transformers.Value) (any, error) {
44 | var toTransform []byte
45 | switch val := value.TransformValue.(type) {
46 | case bool:
47 | if val {
48 | toTransform = []byte{1}
49 | } else {
50 | toTransform = []byte{0}
51 | }
52 | case []byte:
53 | toTransform = val
54 | default:
55 | return nil, transformers.ErrUnsupportedValueType
56 | }
57 |
58 | ret, err := bt.transformer.Transform(toTransform)
59 | if err != nil {
60 | return nil, err
61 | }
62 | return bool(ret), nil
63 | }
64 |
65 | func (bt *BooleanTransformer) CompatibleTypes() []transformers.SupportedDataType {
66 | return booleanCompatibleTypes
67 | }
68 |
69 | func (bt *BooleanTransformer) Type() transformers.TransformerType {
70 | return transformers.GreenmaskBoolean
71 | }
72 |
73 | func BooleanTransformerDefinition() *transformers.Definition {
74 | return &transformers.Definition{
75 | SupportedTypes: booleanCompatibleTypes,
76 | Parameters: booleanParams,
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/pkg/transformers/greenmask/greenmask_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package greenmask
4 |
5 | import (
6 | "time"
7 |
8 | greenmaskgenerators "github.com/eminano/greenmask/pkg/generators"
9 | greenmasktransformers "github.com/eminano/greenmask/pkg/generators/transformers"
10 | "github.com/xataio/pgstream/pkg/transformers"
11 | )
12 |
13 | const (
14 | random = "random"
15 | deterministic = "deterministic"
16 | )
17 |
18 | func setGenerator(t greenmasktransformers.Transformer, params transformers.ParameterValues) error {
19 | generatorType, err := getGeneratorType(params)
20 | if err != nil {
21 | return err
22 | }
23 |
24 | var greenmaskGenerator greenmaskgenerators.Generator
25 | switch generatorType {
26 | case random:
27 | greenmaskGenerator = greenmaskgenerators.NewRandomBytes(time.Now().UnixNano(), t.GetRequiredGeneratorByteLength())
28 | case deterministic:
29 | var err error
30 | greenmaskGenerator, err = greenmaskgenerators.GetHashBytesGen([]byte{}, t.GetRequiredGeneratorByteLength())
31 | if err != nil {
32 | return err
33 | }
34 | default:
35 | return transformers.ErrUnsupportedGenerator
36 | }
37 |
38 | return t.SetGenerator(greenmaskGenerator)
39 | }
40 |
41 | func getGeneratorType(params transformers.ParameterValues) (string, error) {
42 | // default to using the random generator
43 | return findParameter(params, "generator", random)
44 | }
45 |
46 | func findParameter[T any](params transformers.ParameterValues, name string, defaultVal T) (T, error) {
47 | return transformers.FindParameterWithDefault(params, name, defaultVal)
48 | }
49 |
50 | func findParameterArray[T any](params transformers.ParameterValues, name string, defaultVal []T) ([]T, error) {
51 | val, found, err := transformers.FindParameterArray[T](params, name)
52 | if err != nil {
53 | return val, err
54 | }
55 | if !found {
56 | return defaultVal, nil
57 | }
58 | return val, nil
59 | }
60 |
--------------------------------------------------------------------------------
/pkg/transformers/greenmask/greenmask_uuid_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package greenmask
4 |
5 | import (
6 | "context"
7 |
8 | greenmasktransformers "github.com/eminano/greenmask/pkg/generators/transformers"
9 | "github.com/google/uuid"
10 | "github.com/xataio/pgstream/pkg/transformers"
11 | )
12 |
13 | type UUIDTransformer struct {
14 | transformer *greenmasktransformers.RandomUuidTransformer
15 | }
16 |
17 | var (
18 | uuidParams = []transformers.Parameter{
19 | {
20 | Name: "generator",
21 | SupportedType: "string",
22 | Default: "random",
23 | Dynamic: false,
24 | Required: false,
25 | Values: []any{"random", "deterministic"},
26 | },
27 | }
28 | uuidCompatibleTypes = []transformers.SupportedDataType{
29 | transformers.StringDataType,
30 | transformers.ByteArrayDataType,
31 | transformers.UUIDDataType,
32 | transformers.UInt8ArrayOf16DataType,
33 | }
34 | )
35 |
36 | func NewUUIDTransformer(params transformers.ParameterValues) (*UUIDTransformer, error) {
37 | t := greenmasktransformers.NewRandomUuidTransformer()
38 | if err := setGenerator(t, params); err != nil {
39 | return nil, err
40 | }
41 | return &UUIDTransformer{
42 | transformer: t,
43 | }, nil
44 | }
45 |
46 | func (ut *UUIDTransformer) Transform(_ context.Context, value transformers.Value) (any, error) {
47 | var toTransform []byte
48 | switch val := value.TransformValue.(type) {
49 | case string:
50 | toTransform = []byte(val)
51 | case uuid.UUID:
52 | toTransform = val[:]
53 | case []byte:
54 | toTransform = val
55 | case [16]uint8:
56 | toTransform = val[:]
57 | default:
58 | return nil, transformers.ErrUnsupportedValueType
59 | }
60 | ret, err := ut.transformer.Transform(toTransform)
61 | if err != nil {
62 | return nil, err
63 | }
64 | return ret, nil
65 | }
66 |
67 | func (ut *UUIDTransformer) CompatibleTypes() []transformers.SupportedDataType {
68 | return uuidCompatibleTypes
69 | }
70 |
71 | func (ut *UUIDTransformer) Type() transformers.TransformerType {
72 | return transformers.GreenmaskUUID
73 | }
74 |
75 | func UUIDTransformerDefinition() *transformers.Definition {
76 | return &transformers.Definition{
77 | SupportedTypes: uuidCompatibleTypes,
78 | Parameters: uuidParams,
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/pkg/transformers/literal_string_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformers
4 |
5 | import (
6 | "context"
7 | "errors"
8 | "fmt"
9 | )
10 |
11 | type LiteralStringTransformer struct {
12 | literal string
13 | }
14 |
15 | var (
16 | errLiteralStringNotFound = errors.New("literal_string_transformer: literal parameter not found")
17 | literalStringCompatibleTypes = []SupportedDataType{
18 | AllDataTypes,
19 | }
20 | literalStringParams = []Parameter{
21 | {
22 | Name: "literal",
23 | SupportedType: "string",
24 | Default: nil,
25 | Dynamic: false,
26 | Required: true,
27 | },
28 | }
29 | )
30 |
31 | func NewLiteralStringTransformer(params ParameterValues) (*LiteralStringTransformer, error) {
32 | literal, found, err := FindParameter[string](params, "literal")
33 | if err != nil {
34 | return nil, fmt.Errorf("literal_string_transformer: literal must be a string: %w", err)
35 | }
36 | if !found {
37 | return nil, errLiteralStringNotFound
38 | }
39 |
40 | return &LiteralStringTransformer{
41 | literal: literal,
42 | }, nil
43 | }
44 |
45 | func (lst *LiteralStringTransformer) Transform(_ context.Context, value Value) (any, error) {
46 | return lst.literal, nil
47 | }
48 |
49 | func (lst *LiteralStringTransformer) CompatibleTypes() []SupportedDataType {
50 | return literalStringCompatibleTypes
51 | }
52 |
53 | func (lst *LiteralStringTransformer) Type() TransformerType {
54 | return LiteralString
55 | }
56 |
57 | func LiteralStringTransformerDefinition() *Definition {
58 | return &Definition{
59 | SupportedTypes: literalStringCompatibleTypes,
60 | Parameters: literalStringParams,
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/pkg/transformers/literal_string_transformer_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformers
4 |
5 | import (
6 | "context"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | )
11 |
12 | func TestLiteralStringTransformer(t *testing.T) {
13 | t.Parallel()
14 | tests := []struct {
15 | name string
16 | params ParameterValues
17 | wantErr error
18 | }{
19 | {
20 | name: "ok - valid",
21 | params: ParameterValues{
22 | "literal": "test",
23 | },
24 | wantErr: nil,
25 | },
26 | {
27 | name: "error - invalid literal",
28 | params: ParameterValues{
29 | "literal": 123,
30 | },
31 | wantErr: ErrInvalidParameters,
32 | },
33 | {
34 | name: "error - empty literal",
35 | params: ParameterValues{},
36 | wantErr: errLiteralStringNotFound,
37 | },
38 | }
39 | for _, tc := range tests {
40 | t.Run(tc.name, func(t *testing.T) {
41 | t.Parallel()
42 | lst, err := NewLiteralStringTransformer(tc.params)
43 | require.ErrorIs(t, err, tc.wantErr)
44 | if tc.wantErr != nil {
45 | return
46 | }
47 | require.NoError(t, err)
48 | require.NotNil(t, lst)
49 | })
50 | }
51 | }
52 |
53 | func TestLiteralStringTransformer_Transform(t *testing.T) {
54 | t.Parallel()
55 | wantOutput := "{'output': 'testoutput'"
56 | lst, err := NewLiteralStringTransformer(ParameterValues{"literal": wantOutput})
57 | require.NoError(t, err)
58 | tests := []struct {
59 | name string
60 | params ParameterValues
61 | input any
62 | want any
63 | wantErr error
64 | }{
65 | {
66 | name: "ok - string",
67 | input: "testinput",
68 | wantErr: nil,
69 | },
70 | {
71 | name: "ok - JSON",
72 | input: "{'json': 'jsoninput'}",
73 | wantErr: nil,
74 | },
75 | }
76 | for _, tc := range tests {
77 | t.Run(tc.name, func(t *testing.T) {
78 | t.Parallel()
79 |
80 | got, err := lst.Transform(context.Background(), Value{TransformValue: tc.input})
81 | require.ErrorIs(t, err, tc.wantErr)
82 | if tc.wantErr != nil {
83 | return
84 | }
85 | require.Equal(t, wantOutput, got)
86 | })
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/pkg/transformers/mocks/mock_builder.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "github.com/xataio/pgstream/pkg/transformers"
7 | )
8 |
9 | type TransformerBuilder struct {
10 | NewFn func(*transformers.Config) (transformers.Transformer, error)
11 | }
12 |
13 | func (m *TransformerBuilder) New(cfg *transformers.Config) (transformers.Transformer, error) {
14 | return m.NewFn(cfg)
15 | }
16 |
--------------------------------------------------------------------------------
/pkg/transformers/mocks/mock_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/transformers"
9 | )
10 |
11 | type Transformer struct {
12 | TransformFn func(transformers.Value) (any, error)
13 | CompatibleTypesFn func() []transformers.SupportedDataType
14 | }
15 |
16 | func (m *Transformer) Transform(_ context.Context, val transformers.Value) (any, error) {
17 | return m.TransformFn(val)
18 | }
19 |
20 | func (m *Transformer) CompatibleTypes() []transformers.SupportedDataType {
21 | return m.CompatibleTypesFn()
22 | }
23 |
24 | func (m *Transformer) Type() transformers.TransformerType {
25 | return transformers.TransformerType("mock")
26 | }
27 |
--------------------------------------------------------------------------------
/pkg/transformers/neosync/neosync_firstname_transformer_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package neosync
4 |
5 | import (
6 | "context"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | "github.com/xataio/pgstream/pkg/transformers"
11 | )
12 |
13 | func TestFirstnameTransformer_Transform(t *testing.T) {
14 | t.Parallel()
15 |
16 | tests := []struct {
17 | name string
18 | value any
19 | params transformers.ParameterValues
20 |
21 | wantName string
22 | wantErr error
23 | }{
24 | {
25 | name: "ok",
26 | value: "alice",
27 | params: map[string]any{
28 | "preserve_length": false,
29 | "max_length": 4,
30 | "seed": 0,
31 | },
32 |
33 | wantName: "Ute",
34 | wantErr: nil,
35 | },
36 | {
37 | name: "error - invalid preserve length",
38 | value: "alice",
39 | params: map[string]any{
40 | "preserve_length": 1,
41 | },
42 |
43 | wantName: "",
44 | wantErr: transformers.ErrInvalidParameters,
45 | },
46 | {
47 | name: "error - invalid max length",
48 | value: "alice",
49 | params: map[string]any{
50 | "max_length": "1",
51 | },
52 |
53 | wantName: "",
54 | wantErr: transformers.ErrInvalidParameters,
55 | },
56 | {
57 | name: "error - invalid seed",
58 | value: "alice",
59 | params: map[string]any{
60 | "seed": "1",
61 | },
62 |
63 | wantName: "",
64 | wantErr: transformers.ErrInvalidParameters,
65 | },
66 | }
67 |
68 | for _, tc := range tests {
69 | t.Run(tc.name, func(t *testing.T) {
70 | t.Parallel()
71 |
72 | transformer, err := NewFirstNameTransformer(tc.params)
73 | require.ErrorIs(t, err, tc.wantErr)
74 |
75 | if err != nil {
76 | return
77 | }
78 |
79 | got, err := transformer.Transform(context.Background(), transformers.Value{TransformValue: tc.value})
80 | require.ErrorIs(t, err, tc.wantErr)
81 | require.Equal(t, tc.wantName, got)
82 | })
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/pkg/transformers/neosync/neosync_fullname_transformer_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package neosync
4 |
5 | import (
6 | "context"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | "github.com/xataio/pgstream/pkg/transformers"
11 | )
12 |
13 | func TestNewFullnameTransformer(t *testing.T) {
14 | t.Parallel()
15 | tests := []struct {
16 | name string
17 | params transformers.ParameterValues
18 | input any
19 | wantErr error
20 | wantName string
21 | }{
22 | {
23 | name: "ok - valid",
24 | params: transformers.ParameterValues{
25 | "preserve_length": false,
26 | "max_length": 20,
27 | "seed": 1234,
28 | },
29 | input: "name surname",
30 | wantErr: nil,
31 | wantName: "Flav Di Chiara",
32 | },
33 | {
34 | name: "error - invalid preserve_length",
35 | params: transformers.ParameterValues{
36 | "preserve_length": 123,
37 | "max_length": 10,
38 | "seed": 123,
39 | },
40 | wantErr: transformers.ErrInvalidParameters,
41 | },
42 | {
43 | name: "error - invalid max_length",
44 | params: transformers.ParameterValues{
45 | "preserve_length": true,
46 | "max_length": "invalid",
47 | "seed": 123,
48 | },
49 | wantErr: transformers.ErrInvalidParameters,
50 | },
51 | {
52 | name: "error - invalid seed",
53 | params: transformers.ParameterValues{
54 | "preserve_length": true,
55 | "max_length": 10,
56 | "seed": "invalid",
57 | },
58 | wantErr: transformers.ErrInvalidParameters,
59 | },
60 | }
61 | for _, tc := range tests {
62 | t.Run(tc.name, func(t *testing.T) {
63 | t.Parallel()
64 | lst, err := NewFullNameTransformer(tc.params)
65 | require.ErrorIs(t, err, tc.wantErr)
66 | if tc.wantErr != nil {
67 | return
68 | }
69 | require.NoError(t, err)
70 | require.NotNil(t, lst)
71 | got, _ := lst.Transform(context.Background(), transformers.Value{TransformValue: tc.input})
72 | require.Equal(t, tc.wantName, got)
73 | })
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/pkg/transformers/neosync/neosync_lastname_transformer_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package neosync
4 |
5 | import (
6 | "context"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | "github.com/xataio/pgstream/pkg/transformers"
11 | )
12 |
13 | func TestNewLastnameTransformer(t *testing.T) {
14 | t.Parallel()
15 | tests := []struct {
16 | name string
17 | params transformers.ParameterValues
18 | input any
19 | wantErr error
20 | wantName string
21 | }{
22 | {
23 | name: "ok - valid",
24 | params: transformers.ParameterValues{
25 | "preserve_length": false,
26 | "max_length": 10,
27 | "seed": 123,
28 | },
29 | input: "lastname",
30 | wantErr: nil,
31 | wantName: "Fournaris",
32 | },
33 | {
34 | name: "error - invalid preserve_length",
35 | params: transformers.ParameterValues{
36 | "preserve_length": 123,
37 | "max_length": 10,
38 | "seed": 123,
39 | },
40 | wantErr: transformers.ErrInvalidParameters,
41 | },
42 | {
43 | name: "error - invalid max_length",
44 | params: transformers.ParameterValues{
45 | "preserve_length": true,
46 | "max_length": "invalid",
47 | "seed": 123,
48 | },
49 | wantErr: transformers.ErrInvalidParameters,
50 | },
51 | {
52 | name: "error - invalid seed",
53 | params: transformers.ParameterValues{
54 | "preserve_length": true,
55 | "max_length": 10,
56 | "seed": "invalid",
57 | },
58 | wantErr: transformers.ErrInvalidParameters,
59 | },
60 | }
61 | for _, tc := range tests {
62 | t.Run(tc.name, func(t *testing.T) {
63 | t.Parallel()
64 | lst, err := NewLastNameTransformer(tc.params)
65 | require.ErrorIs(t, err, tc.wantErr)
66 | if tc.wantErr != nil {
67 | return
68 | }
69 | require.NoError(t, err)
70 | require.NotNil(t, lst)
71 | got, _ := lst.Transform(context.Background(), transformers.Value{TransformValue: tc.input})
72 | require.Equal(t, tc.wantName, got)
73 | })
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/pkg/transformers/neosync/neosync_string_transformer_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package neosync
4 |
5 | import (
6 | "context"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | "github.com/xataio/pgstream/pkg/transformers"
11 | )
12 |
13 | func TestStringTransformer_Transform(t *testing.T) {
14 | t.Parallel()
15 |
16 | tests := []struct {
17 | name string
18 | value any
19 | params transformers.ParameterValues
20 |
21 | wantString string
22 | wantErr error
23 | }{
24 | {
25 | name: "ok",
26 | value: "hello",
27 | params: map[string]any{
28 | "preserve_length": false,
29 | "min_length": 2,
30 | "max_length": 2,
31 | "seed": 0,
32 | },
33 |
34 | wantString: "np",
35 | wantErr: nil,
36 | },
37 | {
38 | name: "error - invalid preserve length",
39 | value: "hello",
40 | params: map[string]any{
41 | "preserve_length": 1,
42 | },
43 |
44 | wantString: "",
45 | wantErr: transformers.ErrInvalidParameters,
46 | },
47 | {
48 | name: "error - invalid min length",
49 | value: "hello",
50 | params: map[string]any{
51 | "min_length": "1",
52 | },
53 |
54 | wantString: "",
55 | wantErr: transformers.ErrInvalidParameters,
56 | },
57 | {
58 | name: "error - invalid max length",
59 | value: "hello",
60 | params: map[string]any{
61 | "max_length": "1",
62 | },
63 |
64 | wantString: "",
65 | wantErr: transformers.ErrInvalidParameters,
66 | },
67 | {
68 | name: "error - invalid seed",
69 | value: "hello",
70 | params: map[string]any{
71 | "seed": "1",
72 | },
73 |
74 | wantString: "",
75 | wantErr: transformers.ErrInvalidParameters,
76 | },
77 | }
78 |
79 | for _, tc := range tests {
80 | t.Run(tc.name, func(t *testing.T) {
81 | t.Parallel()
82 |
83 | transformer, err := NewStringTransformer(tc.params)
84 | require.ErrorIs(t, err, tc.wantErr)
85 |
86 | if err != nil {
87 | return
88 | }
89 |
90 | got, err := transformer.Transform(context.Background(), transformers.Value{TransformValue: tc.value})
91 | require.ErrorIs(t, err, tc.wantErr)
92 | require.Equal(t, tc.wantString, got)
93 | })
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/pkg/transformers/neosync/neosync_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package neosync
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/transformers"
9 | )
10 |
11 | // transformer is a wrapper around a neosync transformer. Neosync transformers
12 | // return a pointer to the type, so this implementation is generic to ensure
13 | // different types are supported.
14 | type transformer[T any] struct {
15 | neosyncTransformer neosyncTransformer
16 | opts any
17 | }
18 |
19 | type neosyncTransformer interface {
20 | Transform(value any, opts any) (any, error)
21 | }
22 |
23 | func New[T any](t neosyncTransformer, opts any) *transformer[T] {
24 | return &transformer[T]{
25 | opts: opts,
26 | neosyncTransformer: t,
27 | }
28 | }
29 |
30 | func (t *transformer[T]) Transform(_ context.Context, value transformers.Value) (any, error) {
31 | retPtr, err := t.neosyncTransformer.Transform(value.TransformValue, t.opts)
32 | if err != nil {
33 | return nil, err
34 | }
35 |
36 | ret, ok := retPtr.(*T)
37 | if !ok {
38 | return nil, transformers.ErrUnsupportedValueType
39 | }
40 | return *ret, nil
41 | }
42 |
43 | func findParameter[T any](params transformers.ParameterValues, name string) (*T, error) {
44 | var found bool
45 | var err error
46 |
47 | val := new(T)
48 | *val, found, err = transformers.FindParameter[T](params, name)
49 | if err != nil {
50 | return nil, err
51 | }
52 | if !found {
53 | val = nil
54 | }
55 | return val, nil
56 | }
57 |
58 | func findParameterArray[T any](params transformers.ParameterValues, name string) ([]T, error) {
59 | val, found, err := transformers.FindParameterArray[T](params, name)
60 | if err != nil {
61 | return val, err
62 | }
63 | if !found {
64 | val = nil
65 | }
66 | return val, nil
67 | }
68 |
69 | func toInt64Ptr(i *int) *int64 {
70 | if i == nil {
71 | return nil
72 | }
73 |
74 | i64 := int64(*i)
75 | return &i64
76 | }
77 |
78 | func toAnyPtr(strArray []string) *any {
79 | if len(strArray) == 0 {
80 | return nil
81 | }
82 |
83 | strArrayAny := any(strArray)
84 | return &strArrayAny
85 | }
86 |
--------------------------------------------------------------------------------
/pkg/transformers/string_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformers
4 |
5 | import (
6 | "context"
7 | "fmt"
8 |
9 | "golang.org/x/exp/rand"
10 | )
11 |
12 | type StringTransformer struct {
13 | // todo: add buffer pool
14 | // maxLength int
15 | // minLength int
16 | }
17 |
18 | var (
19 | stringParams = []Parameter{}
20 | stringCompatibleTypes = []SupportedDataType{
21 | StringDataType,
22 | ByteArrayDataType,
23 | }
24 | )
25 |
26 | func NewStringTransformer(params ParameterValues) (*StringTransformer, error) {
27 | return &StringTransformer{}, nil
28 | }
29 |
30 | func (st *StringTransformer) Transform(_ context.Context, v Value) (any, error) {
31 | switch str := v.TransformValue.(type) {
32 | case string:
33 | return st.transform(str), nil
34 | case []byte:
35 | return st.transform(string(str)), nil
36 | default:
37 | return v, fmt.Errorf("expected string, got %T: %w", v, ErrUnsupportedValueType)
38 | }
39 | }
40 |
41 | func (st *StringTransformer) transform(str string) string {
42 | const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
43 |
44 | b := make([]byte, len(str))
45 | for i := range b {
46 | b[i] = letterBytes[rand.Intn(len(letterBytes))]
47 | }
48 | return string(b)
49 | }
50 |
51 | func (st *StringTransformer) CompatibleTypes() []SupportedDataType {
52 | return stringCompatibleTypes
53 | }
54 |
55 | func (st *StringTransformer) Type() TransformerType {
56 | return String
57 | }
58 |
59 | func StringTransformerDefinition() *Definition {
60 | return &Definition{
61 | SupportedTypes: stringCompatibleTypes,
62 | Parameters: stringParams,
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/pkg/transformers/string_transformer_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformers
4 |
5 | import (
6 | "context"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | )
11 |
12 | func TestStringTransformer_Transform(t *testing.T) {
13 | t.Parallel()
14 |
15 | tests := []struct {
16 | name string
17 | value any
18 |
19 | wantLen int
20 | wantErr error
21 | }{
22 | {
23 | name: "ok - string",
24 | value: "hello",
25 |
26 | wantLen: 5,
27 | wantErr: nil,
28 | },
29 | {
30 | name: "ok - []byte",
31 | value: []byte("hello"),
32 |
33 | wantLen: 5,
34 | wantErr: nil,
35 | },
36 | {
37 | name: "unsupported type",
38 | value: 1,
39 |
40 | wantLen: 0,
41 | wantErr: ErrUnsupportedValueType,
42 | },
43 | }
44 |
45 | for _, tc := range tests {
46 | t.Run(tc.name, func(t *testing.T) {
47 | t.Parallel()
48 |
49 | st, err := NewStringTransformer(nil)
50 | require.NoError(t, err)
51 | got, err := st.Transform(context.Background(), Value{TransformValue: tc.value})
52 | require.ErrorIs(t, err, tc.wantErr)
53 | if tc.wantErr != nil {
54 | return
55 | }
56 |
57 | require.Len(t, got, tc.wantLen)
58 | require.NotEqual(t, got, tc.value)
59 | })
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/pkg/transformers/template_transformer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformers
4 |
5 | import (
6 | "context"
7 | "errors"
8 | "fmt"
9 | "strings"
10 | "text/template"
11 |
12 | "github.com/Masterminds/sprig/v3"
13 | greenmasktoolkit "github.com/eminano/greenmask/pkg/toolkit"
14 | )
15 |
16 | type TemplateTransformer struct {
17 | template *template.Template
18 | }
19 |
20 | var (
21 | errTemplateMustBeProvided = errors.New("template_transformer: template parameter must be provided")
22 | templateCompatibleTypes = []SupportedDataType{
23 | StringDataType,
24 | ByteArrayDataType,
25 | }
26 | templateParams = []Parameter{
27 | {
28 | Name: "template",
29 | SupportedType: "string",
30 | Default: nil,
31 | Dynamic: false,
32 | Required: true,
33 | },
34 | }
35 | )
36 |
37 | func NewTemplateTransformer(params ParameterValues) (*TemplateTransformer, error) {
38 | templateStr, found, err := FindParameter[string](params, "template")
39 | if err != nil {
40 | return nil, fmt.Errorf("template_transformer: template must be a string: %w", err)
41 | }
42 | if !found {
43 | return nil, errTemplateMustBeProvided
44 | }
45 |
46 | tmpl, err := template.New("").Funcs(greenmasktoolkit.FuncMap()).Funcs(sprig.FuncMap()).Parse(templateStr)
47 | if err != nil {
48 | return nil, fmt.Errorf("template_transformer: error parsing template: %w", err)
49 | }
50 | return &TemplateTransformer{template: tmpl}, nil
51 | }
52 |
53 | func (t *TemplateTransformer) Transform(_ context.Context, value Value) (any, error) {
54 | var buf strings.Builder
55 | if err := t.template.Execute(&buf, &value); err != nil {
56 | return nil, fmt.Errorf("template_transformer: error executing template: %w", err)
57 | }
58 | return buf.String(), nil
59 | }
60 |
61 | func (t *TemplateTransformer) CompatibleTypes() []SupportedDataType {
62 | return templateCompatibleTypes
63 | }
64 |
65 | func (t *TemplateTransformer) Type() TransformerType {
66 | return Template
67 | }
68 |
69 | func TemplateTransformerDefinition() *Definition {
70 | return &Definition{
71 | SupportedTypes: templateCompatibleTypes,
72 | Parameters: templateParams,
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/pkg/wal/checkpointer/postgres/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal/replication"
9 | )
10 |
11 | type mockSyncer struct {
12 | syncLSNFn func(context.Context, replication.LSN) error
13 | }
14 |
15 | func (m *mockSyncer) SyncLSN(ctx context.Context, lsn replication.LSN) error {
16 | return m.syncLSNFn(ctx, lsn)
17 | }
18 |
19 | func (m *mockSyncer) Close() error {
20 | return nil
21 | }
22 |
--------------------------------------------------------------------------------
/pkg/wal/checkpointer/postgres/wal_pg_checkpointer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | "github.com/xataio/pgstream/pkg/wal/replication"
10 | pgreplication "github.com/xataio/pgstream/pkg/wal/replication/postgres"
11 | )
12 |
13 | // Checkpointer is a postgres implementation of a wal checkpointer. It syncs the
14 | // LSN to postgres.
15 | type Checkpointer struct {
16 | syncer lsnSyncer
17 | parser replication.LSNParser
18 | }
19 |
20 | type Config struct {
21 | Replication pgreplication.Config
22 | }
23 |
24 | type lsnSyncer interface {
25 | SyncLSN(ctx context.Context, lsn replication.LSN) error
26 | Close() error
27 | }
28 |
29 | // New returns a postgres checkpointer that syncs the LSN to postgres on demand.
30 | func New(syncer lsnSyncer) *Checkpointer {
31 | return &Checkpointer{
32 | syncer: syncer,
33 | parser: pgreplication.NewLSNParser(),
34 | }
35 | }
36 |
37 | func (c *Checkpointer) SyncLSN(ctx context.Context, positions []wal.CommitPosition) error {
38 | if len(positions) == 0 {
39 | return nil
40 | }
41 |
42 | // we only need the max pg wal offset
43 | var max replication.LSN
44 | for _, position := range positions {
45 | lsn, err := c.parser.FromString(string(position))
46 | if err != nil {
47 | return err
48 | }
49 | if lsn > max {
50 | max = lsn
51 | }
52 | }
53 |
54 | return c.syncer.SyncLSN(ctx, replication.LSN(max))
55 | }
56 |
57 | func (c *Checkpointer) Close() error {
58 | return c.syncer.Close()
59 | }
60 |
--------------------------------------------------------------------------------
/pkg/wal/checkpointer/wal_checkpointer.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package checkpointer
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | )
10 |
11 | // Checkpoint defines the way to confirm the positions that have been read.
12 | // The actual implementation depends on the source of events (postgres, kafka,...)
13 | type Checkpoint func(ctx context.Context, positions []wal.CommitPosition) error
14 |
--------------------------------------------------------------------------------
/pkg/wal/listener/postgres/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 | "time"
8 |
9 | "github.com/xataio/pgstream/pkg/wal/replication"
10 | replicationmocks "github.com/xataio/pgstream/pkg/wal/replication/mocks"
11 | )
12 |
13 | const (
14 | testLSN = replication.LSN(7773397064)
15 | testLSNStr = "1/CF54A048"
16 | )
17 |
18 | func newMockReplicationHandler() *replicationmocks.Handler {
19 | return &replicationmocks.Handler{
20 | StartReplicationFn: func(context.Context) error { return nil },
21 | StartReplicationFromLSNFn: func(context.Context, replication.LSN) error { return nil },
22 | GetCurrentLSNFn: func(ctx context.Context) (replication.LSN, error) { return testLSN, nil },
23 | GetLSNParserFn: func() replication.LSNParser { return newMockLSNParser() },
24 | SyncLSNFn: func(ctx context.Context, lsn replication.LSN) error { return nil },
25 | ReceiveMessageFn: func(ctx context.Context, i uint64) (*replication.Message, error) {
26 | return newMockMessage(), nil
27 | },
28 | }
29 | }
30 |
31 | func newMockMessage() *replication.Message {
32 | return &replication.Message{
33 | LSN: testLSN,
34 | Data: []byte("test-data"),
35 | ReplyRequested: false,
36 | ServerTime: time.Now(),
37 | }
38 | }
39 |
40 | func newMockKeepAliveMessage(replyRequested bool) *replication.Message {
41 | return &replication.Message{
42 | LSN: testLSN,
43 | ReplyRequested: replyRequested,
44 | }
45 | }
46 |
47 | func newMockLSNParser() *replicationmocks.LSNParser {
48 | return &replicationmocks.LSNParser{
49 | ToStringFn: func(replication.LSN) string { return testLSNStr },
50 | FromStringFn: func(s string) (replication.LSN, error) { return testLSN, nil },
51 | }
52 | }
53 |
54 | type mockGenerator struct {
55 | createSnapshotFn func(context.Context) error
56 | }
57 |
58 | func (m *mockGenerator) CreateSnapshot(ctx context.Context) error {
59 | return m.createSnapshotFn(ctx)
60 | }
61 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/adapter/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package adapter
4 |
5 | import (
6 | "strings"
7 | )
8 |
9 | type SnapshotConfig struct {
10 | Tables []string
11 | // SnapshotWorkers represents the number of snapshots the generator will
12 | // process concurrently. This doesn't affect the parallelism of the tables
13 | // within each individual snapshot request. It defaults to 1.
14 | SnapshotWorkers uint
15 | }
16 |
17 | const defaultSnapshotWorkers = 1
18 |
19 | const publicSchema = "public"
20 |
21 | func (c *SnapshotConfig) schemaTableMap() map[string][]string {
22 | schemaTableMap := make(map[string][]string, len(c.Tables))
23 | for _, table := range c.Tables {
24 | schemaName := publicSchema
25 | tableName := table
26 | tableSplit := strings.Split(table, ".")
27 | if len(tableSplit) == 2 {
28 | schemaName = tableSplit[0]
29 | tableName = tableSplit[1]
30 | }
31 | schemaTableMap[schemaName] = append(schemaTableMap[schemaName], tableName)
32 | }
33 | return schemaTableMap
34 | }
35 |
36 | func (c *SnapshotConfig) snapshotWorkers() uint {
37 | if c.SnapshotWorkers > 0 {
38 | return c.SnapshotWorkers
39 | }
40 | return defaultSnapshotWorkers
41 | }
42 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/adapter/config_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package adapter
4 |
5 | import (
6 | "testing"
7 |
8 | "github.com/stretchr/testify/require"
9 | )
10 |
11 | func TestSnapshotConfig_schemaTableMap(t *testing.T) {
12 | t.Parallel()
13 |
14 | tests := []struct {
15 | name string
16 | tables []string
17 |
18 | wantMap map[string][]string
19 | }{
20 | {
21 | name: "ok",
22 | tables: []string{"a", "public.b", "test_schema.c"},
23 | wantMap: map[string][]string{
24 | "public": {"a", "b"},
25 | "test_schema": {"c"},
26 | },
27 | },
28 | }
29 |
30 | for _, tc := range tests {
31 | t.Run(tc.name, func(t *testing.T) {
32 | t.Parallel()
33 |
34 | config := SnapshotConfig{
35 | Tables: tc.tables,
36 | }
37 | got := config.schemaTableMap()
38 | require.Equal(t, tc.wantMap, got)
39 | })
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/adapter/wal_process_event_adapter.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package adapter
4 |
5 | import (
6 | "context"
7 | "time"
8 |
9 | "github.com/jonboulle/clockwork"
10 | "github.com/xataio/pgstream/pkg/snapshot"
11 | "github.com/xataio/pgstream/pkg/wal"
12 | "github.com/xataio/pgstream/pkg/wal/listener"
13 | )
14 |
15 | type ProcessEventAdapter struct {
16 | processEvent listener.ProcessWalEvent
17 | clock clockwork.Clock
18 | }
19 |
20 | func NewProcessEventAdapter(processEvent listener.ProcessWalEvent) *ProcessEventAdapter {
21 | return &ProcessEventAdapter{
22 | processEvent: processEvent,
23 | clock: clockwork.NewRealClock(),
24 | }
25 | }
26 |
27 | func (a *ProcessEventAdapter) ProcessRow(ctx context.Context, row *snapshot.Row) error {
28 | return a.processEvent(ctx, a.snapshotRowToWalEvent(row))
29 | }
30 |
31 | func (a *ProcessEventAdapter) snapshotRowToWalEvent(row *snapshot.Row) *wal.Event {
32 | if row == nil {
33 | return nil
34 | }
35 |
36 | columns := make([]wal.Column, 0, len(row.Columns))
37 | for _, col := range row.Columns {
38 | columns = append(columns, a.snapshotColumnToWalColumn(col))
39 | }
40 | // use 0 since there's no LSN associated, but it can be used as the
41 | // initial version downstream
42 | const zeroLSN = "0/0"
43 | return &wal.Event{
44 | CommitPosition: wal.CommitPosition(zeroLSN),
45 | Data: &wal.Data{
46 | Action: "I",
47 | Timestamp: a.clock.Now().UTC().Format(time.RFC3339),
48 | LSN: zeroLSN,
49 | Schema: row.Schema,
50 | Table: row.Table,
51 | Columns: columns,
52 | },
53 | }
54 | }
55 |
56 | func (a *ProcessEventAdapter) snapshotColumnToWalColumn(col snapshot.Column) wal.Column {
57 | return wal.Column{
58 | Name: col.Name,
59 | Type: col.Type,
60 | Value: col.Value,
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/adapter/wal_process_event_adapter_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package adapter
4 |
5 | import (
6 | "testing"
7 | "time"
8 |
9 | "github.com/jonboulle/clockwork"
10 | "github.com/stretchr/testify/require"
11 | "github.com/xataio/pgstream/pkg/snapshot"
12 | "github.com/xataio/pgstream/pkg/wal"
13 | )
14 |
15 | func TestProcessEventAdapter_snapshotRowToWalEvent(t *testing.T) {
16 | t.Parallel()
17 |
18 | now := time.Now()
19 | fakeClock := clockwork.NewFakeClockAt(now)
20 | testTable := "table1"
21 | zeroLSN := "0/0"
22 |
23 | tests := []struct {
24 | name string
25 | row *snapshot.Row
26 |
27 | wantEvent *wal.Event
28 | }{
29 | {
30 | name: "ok - nil row",
31 | row: nil,
32 |
33 | wantEvent: nil,
34 | },
35 | {
36 | name: "ok",
37 | row: &snapshot.Row{
38 | Schema: publicSchema,
39 | Table: testTable,
40 | Columns: []snapshot.Column{
41 | {Name: "id", Type: "int4", Value: 1},
42 | {Name: "name", Type: "text", Value: "alice"},
43 | },
44 | },
45 |
46 | wantEvent: &wal.Event{
47 | CommitPosition: wal.CommitPosition(zeroLSN),
48 | Data: &wal.Data{
49 | Action: "I",
50 | Timestamp: fakeClock.Now().UTC().Format(time.RFC3339),
51 | LSN: zeroLSN,
52 | Schema: publicSchema,
53 | Table: testTable,
54 | Columns: []wal.Column{
55 | {Name: "id", Type: "int4", Value: 1},
56 | {Name: "name", Type: "text", Value: "alice"},
57 | },
58 | },
59 | },
60 | },
61 | }
62 |
63 | for _, tc := range tests {
64 | t.Run(tc.name, func(t *testing.T) {
65 | t.Parallel()
66 |
67 | a := ProcessEventAdapter{
68 | clock: fakeClock,
69 | }
70 | event := a.snapshotRowToWalEvent(tc.row)
71 |
72 | require.Equal(t, tc.wantEvent, event)
73 | })
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/adapter/wal_snapshot_generator_adapter_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package adapter
4 |
5 | import (
6 | "context"
7 | "errors"
8 | "testing"
9 |
10 | "github.com/stretchr/testify/require"
11 | "github.com/xataio/pgstream/pkg/log"
12 | "github.com/xataio/pgstream/pkg/snapshot"
13 | "github.com/xataio/pgstream/pkg/snapshot/generator"
14 | generatormocks "github.com/xataio/pgstream/pkg/snapshot/generator/mocks"
15 | )
16 |
17 | func TestSnapshotGeneratorAdapter_CreateSnapshot(t *testing.T) {
18 | t.Parallel()
19 |
20 | errTest := errors.New("oh noes")
21 |
22 | tests := []struct {
23 | name string
24 | generator generator.SnapshotGenerator
25 | schemaTables map[string][]string
26 |
27 | wantErr error
28 | }{
29 | {
30 | name: "ok",
31 | generator: &generatormocks.Generator{
32 | CreateSnapshotFn: func(ctx context.Context, ss *snapshot.Snapshot) error {
33 | require.Equal(t, &snapshot.Snapshot{
34 | SchemaName: publicSchema,
35 | TableNames: []string{"*"},
36 | }, ss)
37 | return nil
38 | },
39 | },
40 | schemaTables: map[string][]string{
41 | publicSchema: {"*"},
42 | },
43 |
44 | wantErr: nil,
45 | },
46 | {
47 | name: "error",
48 | generator: &generatormocks.Generator{
49 | CreateSnapshotFn: func(ctx context.Context, ss *snapshot.Snapshot) error {
50 | return errTest
51 | },
52 | },
53 | schemaTables: map[string][]string{
54 | publicSchema: {"*"},
55 | },
56 |
57 | wantErr: errTest,
58 | },
59 | }
60 |
61 | for _, tc := range tests {
62 | t.Run(tc.name, func(t *testing.T) {
63 | t.Parallel()
64 |
65 | ga := SnapshotGeneratorAdapter{
66 | logger: log.NewNoopLogger(),
67 | generator: tc.generator,
68 | schemaTables: tc.schemaTables,
69 | snapshotWorkers: 1,
70 | }
71 | defer ga.Close()
72 |
73 | err := ga.CreateSnapshot(context.Background())
74 | require.ErrorIs(t, err, tc.wantErr)
75 | })
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/builder/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package builder
4 |
5 | import (
6 | schemalogpg "github.com/xataio/pgstream/pkg/schemalog/postgres"
7 | pgsnapshotgenerator "github.com/xataio/pgstream/pkg/snapshot/generator/postgres/data"
8 | "github.com/xataio/pgstream/pkg/snapshot/generator/postgres/schema/pgdumprestore"
9 | "github.com/xataio/pgstream/pkg/wal/listener/snapshot/adapter"
10 | )
11 |
12 | type SnapshotListenerConfig struct {
13 | Generator pgsnapshotgenerator.Config
14 | Adapter adapter.SnapshotConfig
15 | Recorder *SnapshotRecorderConfig
16 | Schema SchemaSnapshotConfig
17 | }
18 |
19 | type SchemaSnapshotConfig struct {
20 | SchemaLogStore *schemalogpg.Config
21 | DumpRestore *pgdumprestore.Config
22 | }
23 |
24 | type SnapshotRecorderConfig struct {
25 | RepeatableSnapshots bool
26 | SnapshotStoreURL string
27 | }
28 |
--------------------------------------------------------------------------------
/pkg/wal/listener/snapshot/wal_snapshot_listener.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package snapshot
4 |
5 | import (
6 | "context"
7 | )
8 |
9 | type Generator interface {
10 | CreateSnapshot(context.Context) error
11 | Close() error
12 | }
13 |
14 | type Listener struct {
15 | generator Generator
16 | }
17 |
18 | func New(generator Generator) *Listener {
19 | return &Listener{
20 | generator: generator,
21 | }
22 | }
23 |
24 | // Listen starts the snapshot generation process.
25 | func (l *Listener) Listen(ctx context.Context) error {
26 | return l.generator.CreateSnapshot(ctx)
27 | }
28 |
29 | // Close closes the listener internal resources
30 | func (l *Listener) Close() error {
31 | return l.generator.Close()
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/wal/listener/wal_listener.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package listener
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | )
10 |
11 | // Listener represents a process that listens to WAL events.
12 | type Listener interface {
13 | Listen(ctx context.Context) error
14 | Close() error
15 | }
16 |
17 | type ProcessWalEvent func(context.Context, *wal.Event) error
18 |
--------------------------------------------------------------------------------
/pkg/wal/processor/batch/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package batch
4 |
5 | type mockMessage struct {
6 | id uint
7 | isEmptyFn func() bool
8 | sizeFn func() int
9 | }
10 |
11 | func (m *mockMessage) Size() int {
12 | if m.sizeFn != nil {
13 | return m.sizeFn()
14 | }
15 | return 1
16 | }
17 |
18 | func (m *mockMessage) IsEmpty() bool {
19 | if m.isEmptyFn != nil {
20 | return m.isEmptyFn()
21 | }
22 | return false
23 | }
24 |
--------------------------------------------------------------------------------
/pkg/wal/processor/batch/mocks/mock_batch_sender.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal/processor/batch"
9 | )
10 |
11 | type BatchSender[T batch.Message] struct {
12 | SendMessageFn func(context.Context, *batch.WALMessage[T]) error
13 | CloseFn func()
14 | msgChan chan *batch.WALMessage[T]
15 | }
16 |
17 | func NewBatchSender[T batch.Message]() *BatchSender[T] {
18 | return &BatchSender[T]{
19 | msgChan: make(chan *batch.WALMessage[T]),
20 | }
21 | }
22 |
23 | func (m *BatchSender[T]) SendMessage(ctx context.Context, msg *batch.WALMessage[T]) error {
24 | if m.SendMessageFn != nil {
25 | return m.SendMessageFn(ctx, msg)
26 | }
27 |
28 | m.msgChan <- msg
29 | return nil
30 | }
31 |
32 | func (m *BatchSender[T]) Close() {
33 | close(m.msgChan)
34 | if m.CloseFn != nil {
35 | m.CloseFn()
36 | }
37 | }
38 |
39 | func (m *BatchSender[T]) GetWALMessages() []*batch.WALMessage[T] {
40 | msgs := []*batch.WALMessage[T]{}
41 | for msg := range m.msgChan {
42 | msgs = append(msgs, msg)
43 | }
44 | return msgs
45 | }
46 |
--------------------------------------------------------------------------------
/pkg/wal/processor/batch/wal_batch.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package batch
4 |
5 | import (
6 | "github.com/xataio/pgstream/pkg/wal"
7 | )
8 |
9 | type Batch[T Message] struct {
10 | messages []T
11 | positions []wal.CommitPosition
12 | totalBytes int
13 | }
14 |
15 | const zeroLSN = "0/0"
16 |
17 | func NewBatch[T Message](messages []T, positions []wal.CommitPosition) *Batch[T] {
18 | return &Batch[T]{
19 | messages: messages,
20 | positions: positions,
21 | }
22 | }
23 |
24 | func (b *Batch[T]) GetMessages() []T {
25 | return b.messages
26 | }
27 |
28 | func (b *Batch[T]) GetCommitPositions() []wal.CommitPosition {
29 | return b.positions
30 | }
31 |
32 | func (b *Batch[T]) add(m *WALMessage[T]) {
33 | if !m.message.IsEmpty() {
34 | b.messages = append(b.messages, m.message)
35 | b.totalBytes += m.message.Size()
36 | }
37 |
38 | if m.position != "" && m.position != zeroLSN {
39 | b.positions = append(b.positions, m.position)
40 | }
41 | }
42 |
43 | func (b *Batch[T]) drain() *Batch[T] {
44 | batch := &Batch[T]{
45 | messages: b.messages,
46 | positions: b.positions,
47 | totalBytes: b.totalBytes,
48 | }
49 |
50 | b.messages = []T{}
51 | b.totalBytes = 0
52 | b.positions = []wal.CommitPosition{}
53 | return batch
54 | }
55 |
56 | func (b *Batch[T]) isEmpty() bool {
57 | return len(b.messages) == 0 && len(b.positions) == 0
58 | }
59 |
60 | func (b *Batch[T]) maxBatchBytesReached(maxBatchBytes int64, msg T) bool {
61 | return maxBatchBytes > 0 && b.totalBytes+msg.Size() >= int(maxBatchBytes)
62 | }
63 |
--------------------------------------------------------------------------------
/pkg/wal/processor/batch/wal_batch_sender_config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package batch
4 |
5 | import (
6 | "errors"
7 | "time"
8 | )
9 |
10 | type Config struct {
11 | // BatchTime is the max time interval at which the batch sending is
12 | // triggered. Defaults to 1s
13 | BatchTimeout time.Duration
14 | // MaxBatchBytes is the max size in bytes for a given batch. When this size is
15 | // reached, the batch is sent. Defaults to 1572864 bytes.
16 | MaxBatchBytes int64
17 | // MaxBatchSize is the max number of messages to be sent per batch. When this
18 | // size is reached, the batch is sent. Defaults to 100.
19 | MaxBatchSize int64
20 | // MaxQueueBytes is the max memory used by the batch writer for inflight
21 | // batches. Defaults to 100MiB
22 | MaxQueueBytes int64
23 | }
24 |
25 | const (
26 | defaultMaxQueueBytes = int64(100 * 1024 * 1024) // 100MiB
27 | defaultBatchTimeout = time.Second
28 | defaultMaxBatchSize = 100
29 | defaultMaxBatchBytes = int64(1572864)
30 | )
31 |
32 | func (c *Config) GetMaxBatchBytes() int64 {
33 | if c.MaxBatchBytes > 0 {
34 | return c.MaxBatchBytes
35 | }
36 | return defaultMaxBatchBytes
37 | }
38 |
39 | func (c *Config) GetMaxBatchSize() int64 {
40 | if c.MaxBatchSize > 0 {
41 | return c.MaxBatchSize
42 | }
43 | return defaultMaxBatchSize
44 | }
45 |
46 | func (c *Config) GetBatchTimeout() time.Duration {
47 | if c.BatchTimeout > 0 {
48 | return c.BatchTimeout
49 | }
50 | return defaultBatchTimeout
51 | }
52 |
53 | func (c *Config) GetMaxQueueBytes() (int64, error) {
54 | if c.MaxQueueBytes > 0 {
55 | if c.MaxQueueBytes < c.GetMaxBatchBytes() {
56 | return -1, errors.New("max queue bytes must be equal or bigger than max batch bytes")
57 | }
58 | return c.MaxQueueBytes, nil
59 | }
60 |
61 | return defaultMaxQueueBytes, nil
62 | }
63 |
--------------------------------------------------------------------------------
/pkg/wal/processor/batch/wal_message.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package batch
4 |
5 | import "github.com/xataio/pgstream/pkg/wal"
6 |
7 | type Message interface {
8 | Size() int
9 | IsEmpty() bool
10 | }
11 |
12 | // WALMessage is a wrapper around any kind of message implementing the Message
13 | // interface which contains a wal commit position.
14 | type WALMessage[T Message] struct {
15 | message T
16 | position wal.CommitPosition
17 | }
18 |
19 | func NewWALMessage[T Message](msg T, pos wal.CommitPosition) *WALMessage[T] {
20 | return &WALMessage[T]{
21 | message: msg,
22 | position: pos,
23 | }
24 | }
25 |
26 | func (m *WALMessage[T]) GetMessage() T {
27 | return m.message
28 | }
29 |
30 | func (m *WALMessage[T]) GetPosition() wal.CommitPosition {
31 | return m.position
32 | }
33 |
34 | func (m *WALMessage[T]) Size() int {
35 | return m.message.Size()
36 | }
37 |
38 | func (m *WALMessage[T]) isKeepAlive() bool {
39 | return m.message.IsEmpty() && m.position != ""
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/wal/processor/errors.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package processor
4 |
5 | import "errors"
6 |
7 | var (
8 | ErrVersionNotFound = errors.New("version column not found")
9 | ErrIDNotFound = errors.New("id column not found")
10 | ErrTableNotFound = errors.New("table not found")
11 | ErrColumnNotFound = errors.New("column not found")
12 | )
13 |
--------------------------------------------------------------------------------
/pkg/wal/processor/kafka/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package kafka
4 |
5 | import (
6 | "github.com/xataio/pgstream/pkg/kafka"
7 | "github.com/xataio/pgstream/pkg/wal/processor/batch"
8 | )
9 |
10 | type Config struct {
11 | Kafka kafka.ConnConfig
12 | Batch batch.Config
13 | }
14 |
--------------------------------------------------------------------------------
/pkg/wal/processor/mocks/mock_processor.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | )
10 |
11 | type Processor struct {
12 | ProcessWALEventFn func(ctx context.Context, walEvent *wal.Event) error
13 | CloseFn func() error
14 | processCalls uint
15 | }
16 |
17 | func (m *Processor) ProcessWALEvent(ctx context.Context, walEvent *wal.Event) error {
18 | m.processCalls++
19 | return m.ProcessWALEventFn(ctx, walEvent)
20 | }
21 |
22 | func (m *Processor) GetProcessCalls() uint {
23 | return m.processCalls
24 | }
25 |
26 | func (m *Processor) Close() error {
27 | if m.CloseFn != nil {
28 | return m.CloseFn()
29 | }
30 | return nil
31 | }
32 |
33 | func (m *Processor) Name() string {
34 | return "mock"
35 | }
36 |
--------------------------------------------------------------------------------
/pkg/wal/processor/postgres/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | schemalogpg "github.com/xataio/pgstream/pkg/schemalog/postgres"
7 | "github.com/xataio/pgstream/pkg/wal/processor/batch"
8 | )
9 |
10 | type Config struct {
11 | URL string
12 | BatchConfig batch.Config
13 | SchemaLogStore schemalogpg.Config
14 | DisableTriggers bool
15 | OnConflictAction string
16 | BulkIngestEnabled bool
17 | }
18 |
--------------------------------------------------------------------------------
/pkg/wal/processor/postgres/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | )
10 |
11 | type mockAdapter struct {
12 | walEventToQueriesFn func(*wal.Event) ([]*query, error)
13 | }
14 |
15 | func (m *mockAdapter) walEventToQueries(_ context.Context, e *wal.Event) ([]*query, error) {
16 | return m.walEventToQueriesFn(e)
17 | }
18 |
--------------------------------------------------------------------------------
/pkg/wal/processor/postgres/instrumented_wal_adapter.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/otel"
9 | "github.com/xataio/pgstream/pkg/wal"
10 |
11 | "go.opentelemetry.io/otel/trace"
12 | )
13 |
14 | type instrumentedWalAdapter struct {
15 | inner walAdapter
16 | tracer trace.Tracer
17 | }
18 |
19 | func newInstrumentedWalAdapter(a walAdapter, i *otel.Instrumentation) walAdapter {
20 | if i == nil {
21 | return a
22 | }
23 |
24 | return &instrumentedWalAdapter{
25 | inner: a,
26 | tracer: i.Tracer,
27 | }
28 | }
29 |
30 | func (i *instrumentedWalAdapter) walEventToQueries(ctx context.Context, event *wal.Event) (queries []*query, err error) {
31 | ctx, span := otel.StartSpan(ctx, i.tracer, "walAdapter.walEventToQueries")
32 | defer otel.CloseSpan(span, err)
33 |
34 | return i.inner.walEventToQueries(ctx, event)
35 | }
36 |
--------------------------------------------------------------------------------
/pkg/wal/processor/postgres/postgres_query_msg.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | type query struct {
6 | schema string
7 | table string
8 | sql string
9 | columnNames []string
10 | args []any
11 | isDDL bool
12 | }
13 |
14 | // size returns the size of the message sql query (does not include the
15 | // parameters)
16 | func (m *query) Size() int {
17 | return len(m.sql)
18 | }
19 |
20 | func (m *query) IsEmpty() bool {
21 | return m == nil || m.sql == ""
22 | }
23 |
24 | func (m *query) getSQL() string {
25 | if m == nil {
26 | return ""
27 | }
28 | return m.sql
29 | }
30 |
31 | func (m *query) getArgs() []any {
32 | if m == nil {
33 | return nil
34 | }
35 | return m.args
36 | }
37 |
--------------------------------------------------------------------------------
/pkg/wal/processor/postgres/postgres_wal_adapter.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | "github.com/xataio/pgstream/pkg/wal/processor"
10 | )
11 |
12 | type walAdapter interface {
13 | walEventToQueries(ctx context.Context, e *wal.Event) ([]*query, error)
14 | }
15 |
16 | type adapter struct {
17 | dmlAdapter *dmlAdapter
18 | ddlAdapter *ddlAdapter
19 | }
20 |
21 | func newAdapter(schemaQuerier schemalogQuerier, onConflictAction string) (*adapter, error) {
22 | dmlAdapter, err := newDMLAdapter(onConflictAction)
23 | if err != nil {
24 | return nil, err
25 | }
26 |
27 | var ddl *ddlAdapter
28 | if schemaQuerier != nil {
29 | ddl = newDDLAdapter(schemaQuerier)
30 | }
31 | return &adapter{
32 | dmlAdapter: dmlAdapter,
33 | ddlAdapter: ddl,
34 | }, nil
35 | }
36 |
37 | func (a *adapter) walEventToQueries(ctx context.Context, e *wal.Event) ([]*query, error) {
38 | if e.Data == nil {
39 | return []*query{{}}, nil
40 | }
41 |
42 | if processor.IsSchemaLogEvent(e.Data) {
43 | // there's no ddl adapter, the ddl query will not be processed
44 | if a.ddlAdapter == nil {
45 | return []*query{{}}, nil
46 | }
47 |
48 | return a.ddlAdapter.walDataToQueries(ctx, e.Data)
49 | }
50 |
51 | return []*query{a.dmlAdapter.walDataToQuery(e.Data)}, nil
52 | }
53 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package search
4 |
5 | import (
6 | "github.com/xataio/pgstream/pkg/wal/processor/batch"
7 | )
8 |
9 | type IndexerConfig struct {
10 | Batch batch.Config
11 | }
12 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/errors.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package search
4 |
5 | import (
6 | "errors"
7 | "fmt"
8 | "time"
9 | )
10 |
11 | type ErrTypeInvalid struct {
12 | Input string
13 | }
14 |
15 | func (e ErrTypeInvalid) Error() string {
16 | return fmt.Sprintf("unsupported type: %s", e.Input)
17 | }
18 |
19 | type ErrSchemaNotFound struct {
20 | SchemaName string
21 | }
22 |
23 | func (e ErrSchemaNotFound) Error() string {
24 | return fmt.Sprintf("schema [%s] not found", e.SchemaName)
25 | }
26 |
27 | type ErrSchemaAlreadyExists struct {
28 | SchemaName string
29 | }
30 |
31 | func (e ErrSchemaAlreadyExists) Error() string {
32 | return fmt.Sprintf("schema [%s] already exists", e.SchemaName)
33 | }
34 |
35 | type ErrSchemaUpdateOutOfOrder struct {
36 | SchemaName string
37 | SchemaID string
38 | NewVersion int
39 | CurrentVersion int
40 | CurrentCreatedAt time.Time
41 | NewCreatedAt time.Time
42 | }
43 |
44 | func (e ErrSchemaUpdateOutOfOrder) Error() string {
45 | return fmt.Sprintf("our of order schema update detected for schema [%s] with id [%s]: incoming version: %d, created at: %v, current version: %d, created at: %v",
46 | e.SchemaName, e.SchemaID, e.NewVersion, e.NewCreatedAt, e.CurrentVersion, e.CurrentCreatedAt)
47 | }
48 |
49 | var (
50 | ErrRetriable = errors.New("retriable error")
51 | ErrInvalidQuery = errors.New("invalid query")
52 |
53 | errNilIDValue = errors.New("id has nil value")
54 | errNilVersionValue = errors.New("version has nil value")
55 | errMetadataMissing = errors.New("missing wal event metadata")
56 | errEmptyQueueMsg = errors.New("invalid empty queue message")
57 | errIncompatibleLSN = errors.New("incompatible LSN value")
58 | )
59 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/mocks/mock_search_mapper.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import "github.com/xataio/pgstream/pkg/schemalog"
6 |
7 | type Mapper struct {
8 | ColumnToSearchMappingFn func(column schemalog.Column) (map[string]any, error)
9 | MapColumnValueFn func(column schemalog.Column, value any) (any, error)
10 | }
11 |
12 | func (m *Mapper) ColumnToSearchMapping(column schemalog.Column) (map[string]any, error) {
13 | return m.ColumnToSearchMappingFn(column)
14 | }
15 |
16 | func (m *Mapper) MapColumnValue(column schemalog.Column, value any) (any, error) {
17 | return m.MapColumnValueFn(column, value)
18 | }
19 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/search_msg_batch.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package search
4 |
5 | import (
6 | "github.com/xataio/pgstream/pkg/schemalog"
7 | )
8 |
9 | type msg struct {
10 | write *Document
11 | truncate *truncateItem
12 | schemaChange *schemalog.LogEntry
13 | bytesSize int
14 | }
15 |
16 | type truncateItem struct {
17 | schemaName string
18 | tableID string
19 | }
20 |
21 | func (m *msg) Size() int {
22 | return m.bytesSize
23 | }
24 |
25 | func (m *msg) IsEmpty() bool {
26 | return m != nil && m.write == nil && m.schemaChange == nil && m.truncate == nil
27 | }
28 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package search
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/schemalog"
9 | )
10 |
11 | type Store interface {
12 | GetMapper() Mapper
13 | // schema operations
14 | ApplySchemaChange(ctx context.Context, logEntry *schemalog.LogEntry) error
15 | DeleteSchema(ctx context.Context, schemaName string) error
16 | // data operations
17 | DeleteTableDocuments(ctx context.Context, schemaName string, tableIDs []string) error
18 | SendDocuments(ctx context.Context, docs []Document) ([]DocumentError, error)
19 | }
20 |
21 | type Mapper interface {
22 | ColumnToSearchMapping(column schemalog.Column) (map[string]any, error)
23 | MapColumnValue(column schemalog.Column, value any) (any, error)
24 | }
25 |
26 | type Document struct {
27 | ID string
28 | Schema string
29 | Data map[string]any
30 | Version int
31 | Delete bool
32 | }
33 |
34 | type DocumentError struct {
35 | Document Document
36 | Severity Severity
37 | Error string
38 | }
39 |
40 | type Severity uint
41 |
42 | const (
43 | SeverityNone Severity = iota
44 | SeverityDataLoss
45 | SeverityIgnored
46 | SeverityRetriable
47 | )
48 |
49 | func (s *Severity) String() string {
50 | if s == nil {
51 | return ""
52 | }
53 | switch *s {
54 | case SeverityNone:
55 | return "NONE"
56 | case SeverityDataLoss:
57 | return "DATALOSS"
58 | case SeverityIgnored:
59 | return "IGNORED"
60 | case SeverityRetriable:
61 | return "RETRIABLE"
62 | default:
63 | return ""
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/store/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package store
4 |
5 | import (
6 | "github.com/xataio/pgstream/internal/searchstore"
7 | "github.com/xataio/pgstream/pkg/schemalog"
8 | "github.com/xataio/pgstream/pkg/wal/processor/search"
9 | )
10 |
11 | type mockAdapter struct {
12 | recordToLogEntryFn func(map[string]any) (*schemalog.LogEntry, error)
13 | schemaNameToIndexFn func(schemaName string) IndexName
14 | indexToSchemaNameFn func(index string) string
15 | searchDocToBulkItemFn func(docs search.Document) searchstore.BulkItem
16 | bulkItemsToSearchDocErrsFn func(items []searchstore.BulkItem) []search.DocumentError
17 | }
18 |
19 | func (m *mockAdapter) RecordToLogEntry(rec map[string]any) (*schemalog.LogEntry, error) {
20 | return m.recordToLogEntryFn(rec)
21 | }
22 |
23 | func (m *mockAdapter) SchemaNameToIndex(schemaName string) IndexName {
24 | return m.schemaNameToIndexFn(schemaName)
25 | }
26 |
27 | func (m *mockAdapter) IndexToSchemaName(index string) string {
28 | return m.indexToSchemaNameFn(index)
29 | }
30 |
31 | func (m *mockAdapter) SearchDocToBulkItem(docs search.Document) searchstore.BulkItem {
32 | return m.searchDocToBulkItemFn(docs)
33 | }
34 |
35 | func (m *mockAdapter) BulkItemsToSearchDocErrs(items []searchstore.BulkItem) []search.DocumentError {
36 | return m.bulkItemsToSearchDocErrsFn(items)
37 | }
38 |
--------------------------------------------------------------------------------
/pkg/wal/processor/search/store/search_index_name.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package store
4 |
5 | import (
6 | "fmt"
7 | "strings"
8 | )
9 |
10 | type IndexNameAdapter interface {
11 | SchemaNameToIndex(schemaName string) IndexName
12 | IndexToSchemaName(index string) string
13 | }
14 |
15 | // IndexName represents an opensearch index name constructed from a schema name.
16 | type IndexName interface {
17 | Name() string
18 | Version() int
19 | NameWithVersion() string
20 | SchemaName() string
21 | }
22 |
23 | type defaultIndexNameAdapter struct{}
24 |
25 | func newDefaultIndexNameAdapter() IndexNameAdapter {
26 | return &defaultIndexNameAdapter{}
27 | }
28 |
29 | func (i *defaultIndexNameAdapter) SchemaNameToIndex(schemaName string) IndexName {
30 | return newDefaultIndexName(schemaName)
31 | }
32 |
33 | func (i *defaultIndexNameAdapter) IndexToSchemaName(index string) string {
34 | return strings.TrimSuffix(index, "-1")
35 | }
36 |
37 | type defaultIndexName struct {
38 | schemaName string
39 | version int
40 | }
41 |
42 | func newDefaultIndexName(schemaName string) IndexName {
43 | return &defaultIndexName{
44 | schemaName: schemaName,
45 | version: 1,
46 | }
47 | }
48 |
49 | func (i defaultIndexName) SchemaName() string {
50 | return i.schemaName
51 | }
52 |
53 | // NameWithVersion represents the name of the index with the version number. This should
54 | // generally not be needed, in favour of `Name`.
55 | func (i defaultIndexName) NameWithVersion() string {
56 | return fmt.Sprintf("%s-%d", i.schemaName, i.version)
57 | }
58 |
59 | // Name returns the name we should use for querying the index.
60 | func (i *defaultIndexName) Name() string {
61 | return i.schemaName
62 | }
63 |
64 | func (i *defaultIndexName) Version() int {
65 | return i.version
66 | }
67 |
--------------------------------------------------------------------------------
/pkg/wal/processor/transformer/wal_transformer_parser.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformer
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/transformers"
9 | )
10 |
11 | type transformerParser struct {
12 | builder transformerBuilder
13 | }
14 |
15 | func newTransformerParser(b transformerBuilder) *transformerParser {
16 | return &transformerParser{
17 | builder: b,
18 | }
19 | }
20 |
21 | func (p *transformerParser) parse(_ context.Context, rules Rules) (map[string]ColumnTransformers, error) {
22 | var err error
23 | transformerMap := map[string]ColumnTransformers{}
24 | for _, table := range rules.Transformers {
25 | if table.ValidationMode == validationModeStrict {
26 | return nil, errValidatorRequiredForStrictMode
27 | }
28 | schemaTableTransformers := make(map[string]transformers.Transformer)
29 | transformerMap[schemaTableKey(table.Schema, table.Table)] = schemaTableTransformers
30 | for colName, transformerRules := range table.ColumnRules {
31 | cfg := transformerRulesToConfig(transformerRules)
32 | if cfg.Name == "" || cfg.Name == "noop" {
33 | // noop transformer, skip
34 | continue
35 | }
36 | if schemaTableTransformers[colName], err = p.builder.New(cfg); err != nil {
37 | return nil, err
38 | }
39 | }
40 | }
41 | return transformerMap, nil
42 | }
43 |
44 | func transformerRulesToConfig(rules TransformerRules) *transformers.Config {
45 | return &transformers.Config{
46 | Name: transformers.TransformerType(rules.Name),
47 | Parameters: rules.Parameters,
48 | DynamicParameters: rules.DynamicParameters,
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/pkg/wal/processor/transformer/wal_transformer_rules.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package transformer
4 |
5 | type Rules struct {
6 | Transformers []TableRules `yaml:"transformations"`
7 | ValidationMode string `yaml:"validation_mode"`
8 | }
9 |
10 | type TableRules struct {
11 | Schema string `yaml:"schema"`
12 | Table string `yaml:"table"`
13 | ColumnRules map[string]TransformerRules `yaml:"column_transformers"`
14 | ValidationMode string `yaml:"validation_mode"`
15 | }
16 |
17 | type TransformerRules struct {
18 | Name string `yaml:"name"`
19 | Parameters map[string]any `yaml:"parameters"`
20 | DynamicParameters map[string]any `yaml:"dynamic_parameters"`
21 | }
22 |
--------------------------------------------------------------------------------
/pkg/wal/processor/wal_processor.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package processor
4 |
5 | import (
6 | "context"
7 | "encoding/json"
8 | "errors"
9 | "fmt"
10 |
11 | "github.com/xataio/pgstream/pkg/schemalog"
12 | "github.com/xataio/pgstream/pkg/wal"
13 | )
14 |
15 | // Processor is a general interface to receive and process a wal event
16 | type Processor interface {
17 | ProcessWALEvent(ctx context.Context, walEvent *wal.Event) error
18 | Close() error
19 | Name() string
20 | }
21 |
22 | var (
23 | ErrPanic = errors.New("panic while processing wal event")
24 | ErrIncompatibleWalData = errors.New("wal data event is not a schema log entry")
25 | )
26 |
27 | // IsSchemaLogEvent will return true if the wal event data originates from the
28 | // pgstream schema and the pgstream schema_log table.
29 | func IsSchemaLogEvent(d *wal.Data) bool {
30 | return d.Schema == schemalog.SchemaName && d.Table == schemalog.TableName
31 | }
32 |
33 | // WalDataToLogEntry will convert the wal event data on input into the
34 | // equivalent schemalog entry. It will return an error if the wal event data is
35 | // not from the schema log table.
36 | func WalDataToLogEntry(d *wal.Data) (*schemalog.LogEntry, error) {
37 | if !IsSchemaLogEvent(d) {
38 | return nil, ErrIncompatibleWalData
39 | }
40 |
41 | intermediateRec := make(map[string]any, len(d.Columns))
42 | for _, col := range d.Columns { // we only process inserts, so identity columns should never be set
43 | intermediateRec[col.Name] = col.Value
44 | }
45 |
46 | intermediateRecBytes, err := json.Marshal(intermediateRec)
47 | if err != nil {
48 | return nil, fmt.Errorf("parsing wal event into schema log entry, intermediate record is not valid JSON: %w", err)
49 | }
50 |
51 | var le schemalog.LogEntry
52 | if err := json.Unmarshal(intermediateRecBytes, &le); err != nil {
53 | return nil, fmt.Errorf("parsing wal event into schema, intermediate record is not valid JSON: %w", err)
54 | }
55 |
56 | return &le, nil
57 | }
58 |
--------------------------------------------------------------------------------
/pkg/wal/processor/wal_processor_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package processor
4 |
5 | import (
6 | "testing"
7 | "time"
8 |
9 | "github.com/google/go-cmp/cmp"
10 | "github.com/google/go-cmp/cmp/cmpopts"
11 | "github.com/rs/xid"
12 | "github.com/stretchr/testify/require"
13 | "github.com/xataio/pgstream/pkg/schemalog"
14 | "github.com/xataio/pgstream/pkg/wal"
15 | )
16 |
17 | func Test_WalDataToLogEntry(t *testing.T) {
18 | t.Parallel()
19 |
20 | now := time.Now().UTC().Round(time.Second)
21 | nowStr := now.Format("2006-01-02 15:04:05")
22 | id := xid.New()
23 |
24 | testWalData := &wal.Data{
25 | Action: "I",
26 | Schema: schemalog.SchemaName,
27 | Table: schemalog.TableName,
28 | Columns: []wal.Column{
29 | {ID: "id", Name: "id", Type: "text", Value: id.String()},
30 | {ID: "version", Name: "version", Type: "integer", Value: 0},
31 | {ID: "schema_name", Name: "schema_name", Type: "text", Value: "test_schema_1"},
32 | {ID: "created_at", Name: "created_at", Type: "timestamp", Value: nowStr},
33 | },
34 | }
35 |
36 | tests := []struct {
37 | name string
38 | data *wal.Data
39 |
40 | wantLogEntry *schemalog.LogEntry
41 | wantErr error
42 | }{
43 | {
44 | name: "ok",
45 | data: testWalData,
46 |
47 | wantLogEntry: &schemalog.LogEntry{
48 | ID: id,
49 | Version: 0,
50 | SchemaName: "test_schema_1",
51 | CreatedAt: schemalog.NewSchemaCreatedAtTimestamp(now),
52 | },
53 | wantErr: nil,
54 | },
55 | {
56 | name: "error - invalid data",
57 | data: &wal.Data{
58 | Schema: "test_schema",
59 | Table: "test_table",
60 | },
61 |
62 | wantLogEntry: nil,
63 | wantErr: ErrIncompatibleWalData,
64 | },
65 | }
66 |
67 | for _, tc := range tests {
68 | t.Run(tc.name, func(t *testing.T) {
69 | t.Parallel()
70 |
71 | logEntry, err := WalDataToLogEntry(tc.data)
72 | require.ErrorIs(t, err, tc.wantErr)
73 | if diff := cmp.Diff(logEntry, tc.wantLogEntry, cmpopts.IgnoreUnexported(schemalog.LogEntry{})); diff != "" {
74 | t.Errorf("got: \n%v, \nwant \n%v, \ndiff: \n%s", logEntry, tc.wantLogEntry, diff)
75 | }
76 | })
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/notifier/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package notifier
4 |
5 | import "time"
6 |
7 | type Config struct {
8 | // MaxQueueBytes is the max memory used by the webhook notifier for inflight
9 | // events. Defaults to 100MiB
10 | MaxQueueBytes int64
11 | // URLWorkerCount is the max number of concurrent workers that will send
12 | // webhooks for a given event. Defaults to 10.
13 | URLWorkerCount uint
14 | // ClientTimeout is the max time the notifier will wait for a response from
15 | // a webhook url before it times out. Defaults to 10s.
16 | ClientTimeout time.Duration
17 | }
18 |
19 | const (
20 | defaultMaxQueueBytes = int64(100 * 1024 * 1024) // 100MiB
21 | defaultURLWorkerCount = 10
22 | defaultClientTimeout = 10 * time.Second
23 | )
24 |
25 | func (c *Config) maxQueueBytes() int64 {
26 | if c.MaxQueueBytes > 0 {
27 | return c.MaxQueueBytes
28 | }
29 |
30 | return defaultMaxQueueBytes
31 | }
32 |
33 | func (c *Config) workerCount() uint {
34 | if c.URLWorkerCount > 0 {
35 | return c.URLWorkerCount
36 | }
37 |
38 | return defaultURLWorkerCount
39 | }
40 |
41 | func (c *Config) clientTimeout() time.Duration {
42 | if c.ClientTimeout > 0 {
43 | return c.ClientTimeout
44 | }
45 |
46 | return defaultClientTimeout
47 | }
48 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/notifier/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package notifier
4 |
5 | import (
6 | "errors"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription"
10 | )
11 |
12 | var (
13 | testCommitPos = wal.CommitPosition("test-pos")
14 | errTest = errors.New("oh noes")
15 | )
16 |
17 | func newTestSubscription(url, schema, table string, eventTypes []string) *subscription.Subscription {
18 | return &subscription.Subscription{
19 | URL: url,
20 | Schema: schema,
21 | Table: table,
22 | EventTypes: eventTypes,
23 | }
24 | }
25 |
26 | func testNotifyMsg(urls []string, payload []byte) *notifyMsg {
27 | return ¬ifyMsg{
28 | urls: urls,
29 | payload: payload,
30 | commitPosition: testCommitPos,
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/notifier/webhook_notify_msg.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package notifier
4 |
5 | import (
6 | "fmt"
7 |
8 | "github.com/xataio/pgstream/pkg/wal"
9 | "github.com/xataio/pgstream/pkg/wal/processor/webhook"
10 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription"
11 | )
12 |
13 | type notifyMsg struct {
14 | urls []string
15 | payload []byte
16 | commitPosition wal.CommitPosition
17 | }
18 |
19 | type serialiser func(any) ([]byte, error)
20 |
21 | func newNotifyMsg(event *wal.Event, subscriptions []*subscription.Subscription, serialiser serialiser) (*notifyMsg, error) {
22 | var payload []byte
23 | urls := make([]string, 0, len(subscriptions))
24 | if len(subscriptions) > 0 {
25 | var err error
26 | payload, err = serialiser(&webhook.Payload{Data: event.Data})
27 | if err != nil {
28 | return nil, fmt.Errorf("serialising webhook payload: %w", err)
29 | }
30 |
31 | for _, s := range subscriptions {
32 | urls = append(urls, s.URL)
33 | }
34 | }
35 |
36 | return ¬ifyMsg{
37 | urls: urls,
38 | payload: payload,
39 | commitPosition: event.CommitPosition,
40 | }, nil
41 | }
42 |
43 | func (m *notifyMsg) size() int {
44 | urlSize := 0
45 | for _, url := range m.urls {
46 | urlSize += len(url)
47 | }
48 | return len(m.payload) + urlSize
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/server/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package server
4 |
5 | import "time"
6 |
7 | type Config struct {
8 | // Address for the server to listen on. The format is "host:port". Defaults
9 | // to ":9900".
10 | Address string
11 | // ReadTimeout is the maximum duration for reading the entire request,
12 | // including the body. Defaults to 5s.
13 | ReadTimeout time.Duration
14 | // WriteTimeout is the maximum duration before timing out writes of the
15 | // response. It is reset whenever a new request's header is read. Defaults
16 | // to 10s.
17 | WriteTimeout time.Duration
18 | }
19 |
20 | const (
21 | defaultServerReadTimeout = 5 * time.Second
22 | defaultServerWriteTimeout = 10 * time.Second
23 | defaultServerAddress = ":9900"
24 | )
25 |
26 | func (c *Config) readTimeout() time.Duration {
27 | if c.ReadTimeout > 0 {
28 | return c.ReadTimeout
29 | }
30 | return defaultServerReadTimeout
31 | }
32 |
33 | func (c *Config) writeTimeout() time.Duration {
34 | if c.WriteTimeout > 0 {
35 | return c.WriteTimeout
36 | }
37 | return defaultServerWriteTimeout
38 | }
39 |
40 | func (c *Config) address() string {
41 | if c.Address != "" {
42 | return c.Address
43 | }
44 | return defaultServerAddress
45 | }
46 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/store/cache/config.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package cache
4 |
5 | import "time"
6 |
7 | type Config struct {
8 | // SyncInterval represents how frequently the cache will attempt to sync
9 | // with the internal subscription store to retrieve the latest data. It
10 | // defaults to 60s.
11 | SyncInterval time.Duration
12 | }
13 |
14 | const (
15 | defaultSyncInterval = 60 * time.Second
16 | )
17 |
18 | func (c *Config) syncInterval() time.Duration {
19 | if c.SyncInterval > 0 {
20 | return c.SyncInterval
21 | }
22 | return defaultSyncInterval
23 | }
24 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/store/cache/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package cache
4 |
5 | import (
6 | "errors"
7 |
8 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription"
9 | )
10 |
11 | var errTest = errors.New("oh noes")
12 |
13 | func newTestSubscription(url, schema, table string, eventTypes []string) *subscription.Subscription {
14 | return &subscription.Subscription{
15 | URL: url,
16 | Schema: schema,
17 | Table: table,
18 | EventTypes: eventTypes,
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/store/mocks/mock_subscription_store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription"
9 | )
10 |
11 | type Store struct {
12 | CreateSubscriptionFn func(ctx context.Context, s *subscription.Subscription) error
13 | DeleteSubscriptionFn func(ctx context.Context, s *subscription.Subscription) error
14 | GetSubscriptionsFn func(ctx context.Context, action, schema, table string) ([]*subscription.Subscription, error)
15 | }
16 |
17 | func (m *Store) CreateSubscription(ctx context.Context, s *subscription.Subscription) error {
18 | return m.CreateSubscriptionFn(ctx, s)
19 | }
20 |
21 | func (m *Store) DeleteSubscription(ctx context.Context, s *subscription.Subscription) error {
22 | return m.DeleteSubscriptionFn(ctx, s)
23 | }
24 |
25 | func (m *Store) GetSubscriptions(ctx context.Context, action, schema, table string) ([]*subscription.Subscription, error) {
26 | return m.GetSubscriptionsFn(ctx, action, schema, table)
27 | }
28 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/store/postgres/pg_subscription_store_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "fmt"
7 | "testing"
8 |
9 | "github.com/stretchr/testify/require"
10 | )
11 |
12 | func TestStore_buildGetQuery(t *testing.T) {
13 | t.Parallel()
14 |
15 | tests := []struct {
16 | name string
17 | action string
18 | schema string
19 | table string
20 |
21 | wantQuery string
22 | wantParams []any
23 | }{
24 | {
25 | name: "no filters",
26 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s LIMIT 1000`, subscriptionsTable()),
27 | wantParams: nil,
28 | },
29 | {
30 | name: "with action filter",
31 | action: "I",
32 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s WHERE ($1=ANY(event_types) OR event_types IS NULL) LIMIT 1000`, subscriptionsTable()),
33 | wantParams: []any{"I"},
34 | },
35 | {
36 | name: "with schema filter",
37 | schema: "test_schema",
38 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s WHERE (schema_name=$1 OR schema_name='') LIMIT 1000`, subscriptionsTable()),
39 | wantParams: []any{"test_schema"},
40 | },
41 | {
42 | name: "with table filter",
43 | table: "test_table",
44 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s WHERE (table_name=$1 OR table_name='') LIMIT 1000`, subscriptionsTable()),
45 | wantParams: []any{"test_table"},
46 | },
47 | {
48 | name: "with all filters",
49 | action: "I",
50 | schema: "test_schema",
51 | table: "test_table",
52 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s `, subscriptionsTable()) +
53 | "WHERE (schema_name=$1 OR schema_name='') " +
54 | "AND (table_name=$2 OR table_name='') " +
55 | "AND ($3=ANY(event_types) OR event_types IS NULL) LIMIT 1000",
56 | wantParams: []any{"test_schema", "test_table", "I"},
57 | },
58 | }
59 |
60 | for _, tc := range tests {
61 | t.Run(tc.name, func(t *testing.T) {
62 | t.Parallel()
63 |
64 | s := &Store{}
65 | query, params := s.buildGetQuery(tc.action, tc.schema, tc.table)
66 | require.Equal(t, tc.wantQuery, query)
67 | require.Equal(t, tc.wantParams, params)
68 | })
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/store/subscription_store.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package store
4 |
5 | import (
6 | "context"
7 |
8 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription"
9 | )
10 |
11 | type Store interface {
12 | CreateSubscription(ctx context.Context, s *subscription.Subscription) error
13 | DeleteSubscription(ctx context.Context, s *subscription.Subscription) error
14 | GetSubscriptions(ctx context.Context, action, schema, table string) ([]*subscription.Subscription, error)
15 | }
16 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/subscription/subscription.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package subscription
4 |
5 | import (
6 | "fmt"
7 | "slices"
8 | )
9 |
10 | type Subscription struct {
11 | URL string `json:"url"`
12 | EventTypes []string `json:"event_types"`
13 | Schema string `json:"schema"`
14 | Table string `json:"table"`
15 | }
16 |
17 | func (s *Subscription) IsFor(action, schema, table string) bool {
18 | if action == "" && schema == "" && table == "" {
19 | return true
20 | }
21 |
22 | if action != "" && len(s.EventTypes) > 0 && !slices.Contains(s.EventTypes, action) {
23 | return false
24 | }
25 |
26 | if schema != "" && s.Schema != "" && s.Schema != schema {
27 | return false
28 | }
29 |
30 | if table != "" && s.Table != "" && s.Table != table {
31 | return false
32 | }
33 |
34 | return true
35 | }
36 |
37 | func (s *Subscription) Key() string {
38 | return fmt.Sprintf("%s/%s/%s", s.URL, s.Schema, s.Table)
39 | }
40 |
--------------------------------------------------------------------------------
/pkg/wal/processor/webhook/webhook.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package webhook
4 |
5 | import "github.com/xataio/pgstream/pkg/wal"
6 |
7 | type Payload struct {
8 | Data *wal.Data
9 | }
10 |
--------------------------------------------------------------------------------
/pkg/wal/replication/mocks/mock_replication_handler.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "context"
7 | "sync/atomic"
8 |
9 | "github.com/xataio/pgstream/pkg/wal/replication"
10 | )
11 |
12 | type Handler struct {
13 | StartReplicationFn func(context.Context) error
14 | StartReplicationFromLSNFn func(context.Context, replication.LSN) error
15 | ReceiveMessageFn func(context.Context, uint64) (*replication.Message, error)
16 | SyncLSNFn func(context.Context, replication.LSN) error
17 | DropReplicationSlotFn func(ctx context.Context) error
18 | GetLSNParserFn func() replication.LSNParser
19 | GetCurrentLSNFn func(context.Context) (replication.LSN, error)
20 | CloseFn func() error
21 | SyncLSNCalls uint64
22 | ReceiveMessageCalls uint64
23 | }
24 |
25 | func (m *Handler) StartReplication(ctx context.Context) error {
26 | return m.StartReplicationFn(ctx)
27 | }
28 |
29 | func (m *Handler) StartReplicationFromLSN(ctx context.Context, lsn replication.LSN) error {
30 | return m.StartReplicationFromLSNFn(ctx, lsn)
31 | }
32 |
33 | func (m *Handler) ReceiveMessage(ctx context.Context) (*replication.Message, error) {
34 | atomic.AddUint64(&m.ReceiveMessageCalls, 1)
35 | return m.ReceiveMessageFn(ctx, m.GetReceiveMessageCalls())
36 | }
37 |
38 | func (m *Handler) SyncLSN(ctx context.Context, lsn replication.LSN) error {
39 | atomic.AddUint64(&m.SyncLSNCalls, 1)
40 | return m.SyncLSNFn(ctx, lsn)
41 | }
42 |
43 | func (m *Handler) DropReplicationSlot(ctx context.Context) error {
44 | return m.DropReplicationSlotFn(ctx)
45 | }
46 |
47 | func (m *Handler) GetCurrentLSN(ctx context.Context) (replication.LSN, error) {
48 | return m.GetCurrentLSNFn(ctx)
49 | }
50 |
51 | func (m *Handler) GetLSNParser() replication.LSNParser {
52 | return m.GetLSNParserFn()
53 | }
54 |
55 | func (m *Handler) Close() error {
56 | return m.CloseFn()
57 | }
58 |
59 | func (m *Handler) GetSyncLSNCalls() uint64 {
60 | return atomic.LoadUint64(&m.SyncLSNCalls)
61 | }
62 |
63 | func (m *Handler) GetReceiveMessageCalls() uint64 {
64 | return atomic.LoadUint64(&m.ReceiveMessageCalls)
65 | }
66 |
--------------------------------------------------------------------------------
/pkg/wal/replication/mocks/mock_replication_lsn_parser.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package mocks
4 |
5 | import (
6 | "github.com/xataio/pgstream/pkg/wal/replication"
7 | )
8 |
9 | type LSNParser struct {
10 | ToStringFn func(replication.LSN) string
11 | FromStringFn func(string) (replication.LSN, error)
12 | }
13 |
14 | func (m *LSNParser) ToString(lsn replication.LSN) string {
15 | return m.ToStringFn(lsn)
16 | }
17 |
18 | func (m *LSNParser) FromString(lsn string) (replication.LSN, error) {
19 | return m.FromStringFn(lsn)
20 | }
21 |
--------------------------------------------------------------------------------
/pkg/wal/replication/postgres/helper_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "errors"
7 | "fmt"
8 | "time"
9 | )
10 |
11 | type mockRow struct {
12 | lsn string
13 | lag int64
14 | exists bool
15 | scanFn func(args ...any) error
16 | }
17 |
18 | func (m *mockRow) Scan(args ...any) error {
19 | if m.scanFn != nil {
20 | return m.scanFn(args...)
21 | }
22 |
23 | if len(args) != 1 {
24 | return fmt.Errorf("expected 1 argument, got %d", len(args))
25 | }
26 |
27 | switch arg := args[0].(type) {
28 | case *string:
29 | *arg = m.lsn
30 | case *int64:
31 | *arg = m.lag
32 | case *bool:
33 | *arg = m.exists
34 | default:
35 | return fmt.Errorf("unexpected argument type in scan: %T", args[0])
36 | }
37 |
38 | return nil
39 | }
40 |
41 | const (
42 | testDBName = "test-db"
43 | testSlot = "test_slot"
44 | testLSN = uint64(7773397064)
45 | testLSNStr = "1/CF54A048"
46 | )
47 |
48 | var (
49 | errTest = errors.New("oh noes")
50 |
51 | now = time.Now()
52 | )
53 |
--------------------------------------------------------------------------------
/pkg/wal/replication/postgres/pg_lsn_parser.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package postgres
4 |
5 | import (
6 | "github.com/jackc/pglogrepl"
7 |
8 | "github.com/xataio/pgstream/pkg/wal/replication"
9 | )
10 |
11 | // LSNParser is the postgres implementation of the replication.LSNParser
12 | type LSNParser struct{}
13 |
14 | func NewLSNParser() *LSNParser {
15 | return &LSNParser{}
16 | }
17 |
18 | func (p *LSNParser) FromString(lsnStr string) (replication.LSN, error) {
19 | lsn, err := pglogrepl.ParseLSN(lsnStr)
20 | if err != nil {
21 | return 0, err
22 | }
23 | return replication.LSN(lsn), nil
24 | }
25 |
26 | func (p *LSNParser) ToString(lsn replication.LSN) string {
27 | return pglogrepl.LSN(lsn).String()
28 | }
29 |
--------------------------------------------------------------------------------
/pkg/wal/replication/replication_handler.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package replication
4 |
5 | import (
6 | "context"
7 | "errors"
8 | "time"
9 | )
10 |
11 | // Handler manages the replication operations
12 | type Handler interface {
13 | StartReplication(ctx context.Context) error
14 | StartReplicationFromLSN(ctx context.Context, lsn LSN) error
15 | ReceiveMessage(ctx context.Context) (*Message, error)
16 | SyncLSN(ctx context.Context, lsn LSN) error
17 | GetReplicationLag(ctx context.Context) (int64, error)
18 | GetCurrentLSN(ctx context.Context) (LSN, error)
19 | GetLSNParser() LSNParser
20 | Close() error
21 | }
22 |
23 | // Message contains the replication data
24 | type Message struct {
25 | LSN LSN
26 | Data []byte
27 | ServerTime time.Time
28 | ReplyRequested bool
29 | }
30 |
31 | // LSNParser handles the LSN type conversion
32 | type LSNParser interface {
33 | ToString(LSN) string
34 | FromString(string) (LSN, error)
35 | }
36 |
37 | type LSN uint64
38 |
39 | var ErrConnTimeout = errors.New("connection timeout")
40 |
--------------------------------------------------------------------------------
/snapshot2pg.env:
--------------------------------------------------------------------------------
1 | # Listener config
2 | PGSTREAM_POSTGRES_SNAPSHOT_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable"
3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="test"
4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4
5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4
6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000
7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1
8 |
9 | # Processor config
10 | PGSTREAM_TRANSFORMER_RULES_FILE="transformer_rules.yaml"
11 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable"
12 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=100
13 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s
14 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable"
15 | PGSTREAM_POSTGRES_WRITER_DISABLE_TRIGGERS=true
16 | PGSTREAM_POSTGRES_WRITER_ON_CONFLICT_ACTION="nothing"
17 |
--------------------------------------------------------------------------------
/snapshot2pg.yaml:
--------------------------------------------------------------------------------
1 | source:
2 | postgres:
3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable"
4 | mode: snapshot # options are replication, snapshot or snapshot_and_replication
5 | snapshot: # when mode is snapshot or snapshot_and_replication
6 | mode: full # options are data_and, schema or data
7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern
8 | recorder:
9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded
10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel
11 | data: # when mode is full or data
12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel
13 | table_workers: 4 # number of workers to snapshot a table in parallel
14 | batch_page_size: 1000 # number of pages to read per batch
15 | schema: # when mode is full or schema
16 | mode: pgdump_pgrestore # options are pgdump_pgrestore or schemalog
17 | pgdump_pgrestore:
18 | clean_target_db: true # whether to clean the target database before restoring
19 |
20 | target:
21 | postgres:
22 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable"
23 | batch:
24 | timeout: 5000 # batch timeout in milliseconds
25 | size: 100 # number of messages in a batch
26 | disable_triggers: false # whether to disable triggers on the target database
27 | on_conflict_action: "nothing" # options are update, nothing or error
28 |
29 | modifiers:
30 | injector:
31 | enabled: false # whether to inject pgstream metadata into the WAL events
32 | transformations:
33 | validation_mode: relaxed
34 | table_transformers:
35 | - schema: public
36 | table: test
37 | column_transformers:
38 | name:
39 | name: greenmask_firstname
40 | dynamic_parameters:
41 | gender:
42 | column: sex
43 |
--------------------------------------------------------------------------------
/tools/webhook/webhook_server.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 |
3 | package main
4 |
5 | import (
6 | "bytes"
7 | "encoding/json"
8 | "flag"
9 | "fmt"
10 | "io"
11 | "net/http"
12 | "os"
13 | "time"
14 |
15 | "github.com/xataio/pgstream/internal/log/zerolog"
16 | loglib "github.com/xataio/pgstream/pkg/log"
17 | )
18 |
19 | var logger loglib.Logger
20 |
21 | func main() {
22 | address := flag.String("address", ":9910", "Webhook server address")
23 | logLevel := flag.String("log-level", "debug", "Webhook server log level")
24 | flag.Parse()
25 |
26 | logger = zerolog.NewStdLogger(zerolog.NewLogger(&zerolog.Config{
27 | LogLevel: *logLevel,
28 | }))
29 |
30 | mux := http.NewServeMux()
31 | mux.HandleFunc("/webhook", processWebhook)
32 |
33 | server := &http.Server{
34 | Handler: mux,
35 | Addr: *address,
36 | ReadTimeout: 5 * time.Second,
37 | WriteTimeout: 5 * time.Second,
38 | }
39 |
40 | logger.Info(fmt.Sprintf("listening on %s...", *address))
41 | if err := server.ListenAndServe(); err != nil {
42 | logger.Error(err, "listening on http server", loglib.Fields{"address": *address})
43 | os.Exit(1)
44 | }
45 | }
46 |
47 | func processWebhook(w http.ResponseWriter, r *http.Request) {
48 | if r.Method != http.MethodPost {
49 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
50 | return
51 | }
52 |
53 | logger.Debug("got /webhook request")
54 |
55 | bodyBytes, err := io.ReadAll(r.Body)
56 | if err != nil {
57 | http.Error(w, err.Error(), http.StatusBadRequest)
58 | return
59 | }
60 | defer r.Body.Close()
61 |
62 | var prettyJSON bytes.Buffer
63 | if err = json.Indent(&prettyJSON, bodyBytes, "", " "); err != nil {
64 | http.Error(w, err.Error(), http.StatusBadRequest)
65 | return
66 | }
67 | logger.Info(prettyJSON.String())
68 |
69 | w.WriteHeader(http.StatusOK)
70 | }
71 |
--------------------------------------------------------------------------------
/transformer_rules.yaml:
--------------------------------------------------------------------------------
1 | transformations:
2 | validation_mode: relaxed
3 | table_transformers:
4 | - schema: public
5 | table: test
6 | column_transformers:
7 | name:
8 | name: greenmask_firstname
9 | dynamic_parameters:
10 | gender:
11 | column: sex
12 |
--------------------------------------------------------------------------------