├── .github ├── dependabot.yml └── workflows │ └── build.yml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yaml ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── brand-kit ├── README.md ├── banner │ ├── pgstream-banner.svg │ └── pgstream-banner@2x.png └── logo │ ├── symbol │ ├── pgstream-black-symbol.svg │ └── pgstream-white-symbol.svg │ └── wordmark │ ├── pgstream-black-wordmark.svg │ └── pgstream-white-wordmark.svg ├── build └── docker │ ├── docker-compose-signoz.yml │ ├── docker-compose.yml │ ├── postgres │ ├── Dockerfile │ └── postgresql.conf │ └── signoz │ ├── clickhouse │ ├── cluster.xml │ ├── config.xml │ ├── custom-function.xml │ ├── storage.xml │ ├── user_scripts │ │ └── histogramQuantile │ └── users.xml │ ├── dashboards │ └── pgstream.json │ ├── otel-collector-config.yaml │ ├── otel-collector-opamp-config.yaml │ └── prometheus.yml ├── cli-definition.json ├── cmd ├── config │ ├── config.go │ ├── config_env.go │ ├── config_env_test.go │ ├── config_yaml.go │ ├── config_yaml_test.go │ ├── helper_test.go │ └── test │ │ ├── test_config.env │ │ ├── test_config.yaml │ │ └── test_transformer_rules.yaml ├── init_cmd.go ├── root_cmd.go ├── run_cmd.go ├── snapshot_cmd.go └── status_cmd.go ├── config_template.yaml ├── docs ├── README.md ├── img │ ├── pgstream_arch_v1.png │ ├── pgstream_diagram_v2.svg │ ├── pgstream_diagram_v2_kafka.svg │ ├── pgstream_snapshot_diagram.svg │ ├── pgstream_snapshot_sequence.svg │ ├── pgstream_transformer_diagram.svg │ ├── pgstream_tutorial_kafka.svg │ ├── pgstream_tutorial_pg2os.svg │ ├── pgstream_tutorial_pg2pg.svg │ ├── pgstream_tutorial_pg2webhooks.svg │ ├── pgstream_tutorial_snapshot2pg.svg │ └── pgstream_tutorial_transformer.svg └── tutorials │ ├── kafka2os_tutorial.env │ ├── kafka2os_tutorial.yaml │ ├── kafka2pg_tutorial.env │ ├── kafka2pg_tutorial.yaml │ ├── pg2kafka_tutorial.env │ ├── pg2kafka_tutorial.yaml │ ├── pg2os_tutorial.env │ ├── pg2os_tutorial.yaml │ ├── pg2pg_transformer_tutorial.env │ ├── pg2pg_transformer_tutorial.yaml │ ├── pg2pg_tutorial.env │ ├── pg2pg_tutorial.yaml │ ├── pg2webhook_tutorial.env │ ├── pg2webhook_tutorial.yaml │ ├── postgres_kafka.md │ ├── postgres_snapshot.md │ ├── postgres_to_opensearch.md │ ├── postgres_to_postgres.md │ ├── postgres_to_webhooks.md │ ├── postgres_transformer.md │ ├── snapshot2pg_tutorial.env │ ├── snapshot2pg_tutorial.yaml │ └── tutorial_transformer_rules.yaml ├── go.mod ├── go.sum ├── internal ├── http │ ├── http.go │ └── mocks │ │ └── mock_http_client.go ├── json │ └── json.go ├── log │ └── zerolog │ │ └── zerolog.go ├── postgres │ ├── errors.go │ ├── instrumentation │ │ ├── instrumented_pg_dump_restore.go │ │ ├── instrumented_querier.go │ │ ├── instrumented_querier_builder.go │ │ └── instrumented_tx.go │ ├── mocks │ │ ├── mock_pg_mapper.go │ │ ├── mock_pg_querier.go │ │ ├── mock_pg_replication_conn.go │ │ ├── mock_row.go │ │ ├── mock_rows.go │ │ └── mock_tx.go │ ├── pg_conn.go │ ├── pg_conn_pool.go │ ├── pg_dump.go │ ├── pg_dump_pg_restore_integration_test.go │ ├── pg_mapper.go │ ├── pg_querier.go │ ├── pg_querier_builder.go │ ├── pg_replication_conn.go │ ├── pg_restore.go │ ├── pg_test.go │ ├── pg_tx.go │ ├── pg_utils.go │ └── pg_utils_test.go ├── profiling │ └── profiling.go ├── searchstore │ ├── elasticsearch │ │ ├── elasticsearch_client.go │ │ └── elasticsearch_mapper.go │ ├── mocks │ │ ├── mock_client.go │ │ └── mock_mapper.go │ ├── opensearch │ │ ├── opensearch_client.go │ │ └── opensearch_mapper.go │ ├── search_api.go │ ├── search_client.go │ ├── search_errors.go │ └── search_mapper.go ├── sync │ ├── mocks │ │ └── mock_weighted_semaphore.go │ └── semaphore.go └── testcontainers │ ├── test_elasticsearch_container.go │ ├── test_kafka_container.go │ ├── test_opensearch_container.go │ └── test_postgres_container.go ├── kafka2os.env ├── kafka2os.yaml ├── license-header.txt ├── main.go ├── migrations └── postgres │ ├── 1_create_pgstream_xid.down.sql │ ├── 1_create_pgstream_xid.up.sql │ ├── 2_create_pgstream_schemalog_table.down.sql │ ├── 2_create_pgstream_schemalog_table.up.sql │ ├── 3_create_pgstream_tableids_table.down.sql │ ├── 3_create_pgstream_tableids_table.up.sql │ ├── 4_create_pgstream_get_schema_function.down.sql │ ├── 4_create_pgstream_get_schema_function.up.sql │ ├── 5_create_pgstream_log_schema_function.down.sql │ ├── 5_create_pgstream_log_schema_function.up.sql │ ├── 6_create_pgstream_refresh_schema_function.down.sql │ ├── 6_create_pgstream_refresh_schema_function.up.sql │ ├── 7_create_pgstream_event_triggers.down.sql │ ├── 7_create_pgstream_event_triggers.up.sql │ └── migrations.go ├── pg2kafka.env ├── pg2kafka.yaml ├── pg2os.env ├── pg2os.yaml ├── pg2pg.env ├── pg2pg.yaml ├── pg2webhook.env ├── pg2webhook.yaml ├── pkg ├── backoff │ ├── backoff.go │ └── mocks │ │ └── mock_backoff.go ├── kafka │ ├── config.go │ ├── conn.go │ ├── instrumentation │ │ ├── instrumented_kafka_reader.go │ │ └── instrumented_kafka_writer.go │ ├── kafka_offset_parser.go │ ├── kafka_offset_parser_test.go │ ├── kafka_reader.go │ ├── kafka_writer.go │ ├── log.go │ └── mocks │ │ ├── mock_kafka_parser.go │ │ ├── mock_kafka_reader.go │ │ └── mock_kafka_writer.go ├── log │ ├── logger.go │ └── zerolog │ │ └── logger.go ├── otel │ ├── config.go │ ├── otel_instrumentation.go │ ├── otel_provider.go │ ├── span.go │ └── version.go ├── schemalog │ ├── instrumentation │ │ └── instrumented_store.go │ ├── log_entry.go │ ├── mocks │ │ └── store_mock.go │ ├── postgres │ │ ├── helper_test.go │ │ ├── pg_schemalog_store.go │ │ └── pg_schemalog_store_test.go │ ├── schema.go │ ├── schema_diff.go │ ├── schema_diff_test.go │ ├── schema_test.go │ ├── store.go │ ├── store_cache.go │ └── store_cache_test.go ├── snapshot │ ├── errors.go │ ├── errors_test.go │ ├── generator │ │ ├── helper_test.go │ │ ├── instrumentation │ │ │ └── instrumented_snapshot_generator.go │ │ ├── mocks │ │ │ └── mock_snapshot_generator.go │ │ ├── postgres │ │ │ ├── data │ │ │ │ ├── config.go │ │ │ │ ├── helper_test.go │ │ │ │ ├── instrumented_table_snapshot_generator.go │ │ │ │ ├── pg_snapshot_generator.go │ │ │ │ ├── pg_snapshot_generator_integration_test.go │ │ │ │ └── pg_snapshot_generator_test.go │ │ │ ├── schema │ │ │ │ ├── pgdumprestore │ │ │ │ │ ├── helper_test.go │ │ │ │ │ ├── snapshot_pg_dump_restore_generator.go │ │ │ │ │ ├── snapshot_pg_dump_restore_generator_test.go │ │ │ │ │ └── test │ │ │ │ │ │ ├── test_dump.sql │ │ │ │ │ │ ├── test_dump_constraints.sql │ │ │ │ │ │ └── test_dump_filtered.sql │ │ │ │ └── schemalog │ │ │ │ │ ├── snapshot_schemalog_generator.go │ │ │ │ │ └── snapshot_schemalog_generator_test.go │ │ │ └── tablefinder │ │ │ │ ├── instrumented_table_discovery.go │ │ │ │ ├── pg_snapshot_table_finder.go │ │ │ │ └── pg_snapshot_table_finder_test.go │ │ ├── snapshot_generator.go │ │ ├── snapshot_generator_recorder.go │ │ └── snapshot_generator_recorder_test.go │ ├── snapshot.go │ └── store │ │ ├── instrumentation │ │ └── instrumented_snapshot_store.go │ │ ├── mocks │ │ └── mock_snapshot_store.go │ │ ├── postgres │ │ ├── pg_snapshot_store.go │ │ └── pg_snapshot_store_test.go │ │ └── snapshot_store.go ├── stream │ ├── config.go │ ├── helper_test.go │ ├── integration │ │ ├── config │ │ │ └── postgresql.conf │ │ ├── helper_test.go │ │ ├── pg_kafka_integration_test.go │ │ ├── pg_pg_integration_test.go │ │ ├── pg_pg_integration_transformer_test.go │ │ ├── pg_search_integration_test.go │ │ ├── pg_webhook_integration_test.go │ │ ├── setup_test.go │ │ └── snapshot_pg_integration_test.go │ ├── stream.go │ ├── stream_init.go │ ├── stream_run.go │ ├── stream_snapshot.go │ ├── stream_status.go │ ├── stream_status_checker.go │ ├── stream_status_checker_test.go │ └── stream_status_test.go ├── tls │ ├── test │ │ ├── test.csr │ │ ├── test.key │ │ └── test.pem │ ├── tls.go │ └── tls_test.go ├── transformers │ ├── builder │ │ ├── transformer_builder.go │ │ └── transformer_builder_test.go │ ├── generators │ │ ├── deterministic_bytes_generator.go │ │ ├── generator.go │ │ └── random_bytes_generator.go │ ├── greenmask │ │ ├── greenmask_boolean_transformer.go │ │ ├── greenmask_boolean_transformer_test.go │ │ ├── greenmask_choice_transformer.go │ │ ├── greenmask_choice_transformer_test.go │ │ ├── greenmask_date_transformer.go │ │ ├── greenmask_date_transformer_test.go │ │ ├── greenmask_firstname_transformer.go │ │ ├── greenmask_firstname_transformer_test.go │ │ ├── greenmask_float_transformer.go │ │ ├── greenmask_float_transformer_test.go │ │ ├── greenmask_integer_transformer.go │ │ ├── greenmask_integer_transformer_test.go │ │ ├── greenmask_string_transformer.go │ │ ├── greenmask_string_transformer_test.go │ │ ├── greenmask_timestamp_transformer.go │ │ ├── greenmask_timestamp_transformer_test.go │ │ ├── greenmask_transformer.go │ │ ├── greenmask_unix_timestamp_transformer.go │ │ ├── greenmask_unix_timestamp_transformer_test.go │ │ ├── greenmask_uuid_transformer.go │ │ └── greenmask_uuid_transformer_test.go │ ├── instrumentation │ │ └── instrumented_transformer.go │ ├── literal_string_transformer.go │ ├── literal_string_transformer_test.go │ ├── masking_transformer.go │ ├── masking_transformer_test.go │ ├── mocks │ │ ├── mock_builder.go │ │ └── mock_transformer.go │ ├── neosync │ │ ├── neosync_email_transformer.go │ │ ├── neosync_email_transformer_test.go │ │ ├── neosync_firstname_transformer.go │ │ ├── neosync_firstname_transformer_test.go │ │ ├── neosync_fullname_transformer.go │ │ ├── neosync_fullname_transformer_test.go │ │ ├── neosync_lastname_transformer.go │ │ ├── neosync_lastname_transformer_test.go │ │ ├── neosync_string_transformer.go │ │ ├── neosync_string_transformer_test.go │ │ └── neosync_transformer.go │ ├── phone_number_transformer.go │ ├── phone_number_transformer_test.go │ ├── string_transformer.go │ ├── string_transformer_test.go │ ├── template_transformer.go │ ├── template_transformer_test.go │ ├── transformer.go │ └── transformer_test.go └── wal │ ├── checkpointer │ ├── kafka │ │ ├── wal_kafka_checkpointer.go │ │ └── wal_kafka_checkpointer_test.go │ ├── postgres │ │ ├── helper_test.go │ │ ├── wal_pg_checkpointer.go │ │ └── wal_pg_checkpointer_test.go │ └── wal_checkpointer.go │ ├── listener │ ├── kafka │ │ ├── wal_kafka_reader.go │ │ └── wal_kafka_reader_test.go │ ├── postgres │ │ ├── helper_test.go │ │ ├── wal_pg_listener.go │ │ └── wal_pg_listener_test.go │ ├── snapshot │ │ ├── adapter │ │ │ ├── config.go │ │ │ ├── config_test.go │ │ │ ├── wal_process_event_adapter.go │ │ │ ├── wal_process_event_adapter_test.go │ │ │ ├── wal_snapshot_generator_adapter.go │ │ │ └── wal_snapshot_generator_adapter_test.go │ │ ├── builder │ │ │ ├── config.go │ │ │ └── wal_listener_snapshot_generator_builder.go │ │ └── wal_snapshot_listener.go │ └── wal_listener.go │ ├── processor │ ├── batch │ │ ├── helper_test.go │ │ ├── mocks │ │ │ └── mock_batch_sender.go │ │ ├── wal_batch.go │ │ ├── wal_batch_sender.go │ │ ├── wal_batch_sender_config.go │ │ ├── wal_batch_sender_test.go │ │ └── wal_message.go │ ├── errors.go │ ├── filter │ │ ├── wal_filter.go │ │ └── wal_filter_test.go │ ├── injector │ │ ├── helper_test.go │ │ ├── wal_injector.go │ │ └── wal_injector_test.go │ ├── instrumentation │ │ └── instrumented_wal_processor.go │ ├── kafka │ │ ├── config.go │ │ ├── wal_kafka_batch_writer.go │ │ └── wal_kafka_batch_writer_test.go │ ├── mocks │ │ └── mock_processor.go │ ├── postgres │ │ ├── config.go │ │ ├── helper_test.go │ │ ├── instrumented_wal_adapter.go │ │ ├── postgres_batch_writer.go │ │ ├── postgres_batch_writer_test.go │ │ ├── postgres_bulk_ingest_writer.go │ │ ├── postgres_bulk_ingest_writer_test.go │ │ ├── postgres_query_msg.go │ │ ├── postgres_wal_adapter.go │ │ ├── postgres_wal_ddl_adapter.go │ │ ├── postgres_wal_ddl_adapter_test.go │ │ ├── postgres_wal_dml_adapter.go │ │ ├── postgres_wal_dml_adapter_test.go │ │ └── postgres_writer.go │ ├── search │ │ ├── config.go │ │ ├── errors.go │ │ ├── helper_test.go │ │ ├── instrumentation │ │ │ └── instrumented_search_store.go │ │ ├── mocks │ │ │ └── mock_search_mapper.go │ │ ├── search_adapter.go │ │ ├── search_adapter_test.go │ │ ├── search_batch_indexer.go │ │ ├── search_batch_indexer_test.go │ │ ├── search_msg_batch.go │ │ ├── search_store_retrier.go │ │ ├── search_store_retrier_test.go │ │ ├── store.go │ │ └── store │ │ │ ├── helper_test.go │ │ │ ├── search_adapter.go │ │ │ ├── search_index_name.go │ │ │ ├── search_pg_mapper.go │ │ │ ├── search_pg_mapper_test.go │ │ │ ├── search_store.go │ │ │ └── search_store_test.go │ ├── transformer │ │ ├── wal_postgres_transformer_parser.go │ │ ├── wal_postgres_transformer_parser_test.go │ │ ├── wal_transformer.go │ │ ├── wal_transformer_parser.go │ │ ├── wal_transformer_parser_test.go │ │ ├── wal_transformer_rules.go │ │ └── wal_transformer_test.go │ ├── wal_processor.go │ ├── wal_processor_test.go │ └── webhook │ │ ├── notifier │ │ ├── config.go │ │ ├── helper_test.go │ │ ├── webhook_notifier.go │ │ ├── webhook_notifier_test.go │ │ └── webhook_notify_msg.go │ │ ├── subscription │ │ ├── server │ │ │ ├── config.go │ │ │ ├── subscription_server.go │ │ │ └── subscription_server_test.go │ │ ├── store │ │ │ ├── cache │ │ │ │ ├── config.go │ │ │ │ ├── helper_test.go │ │ │ │ ├── subscription_store_cache.go │ │ │ │ └── subscription_store_cache_test.go │ │ │ ├── mocks │ │ │ │ └── mock_subscription_store.go │ │ │ ├── postgres │ │ │ │ ├── pg_subscription_store.go │ │ │ │ └── pg_subscription_store_test.go │ │ │ └── subscription_store.go │ │ ├── subscription.go │ │ └── subscription_test.go │ │ └── webhook.go │ ├── replication │ ├── instrumentation │ │ └── instrumented_replication_handler.go │ ├── mocks │ │ ├── mock_replication_handler.go │ │ └── mock_replication_lsn_parser.go │ ├── postgres │ │ ├── helper_test.go │ │ ├── pg_lsn_parser.go │ │ ├── pg_replication_handler.go │ │ └── pg_replication_handler_test.go │ └── replication_handler.go │ └── wal_data.go ├── snapshot2pg.env ├── snapshot2pg.yaml ├── tools ├── build-cli-definition.go ├── transformer-definition │ └── build-transformers-definition.go └── webhook │ └── webhook_server.go ├── transformer_rules.yaml └── transformers-definition.json /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "gomod" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/license-header-checker 2 | pgstream 3 | tools/webhook/webhook 4 | 5 | *.prof 6 | 7 | coverage 8 | 9 | # misc 10 | .DS_Store 11 | 12 | .idea/ 13 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | run: 3 | go: "1.24" 4 | linters: 5 | default: none 6 | enable: 7 | - copyloopvar 8 | - errorlint 9 | - forbidigo 10 | - forcetypeassert 11 | - gochecksumtype 12 | - goconst 13 | - gocritic 14 | - gosec 15 | - govet 16 | - ineffassign 17 | - makezero 18 | - misspell 19 | - nakedret 20 | - nolintlint 21 | - prealloc 22 | - staticcheck 23 | - unused 24 | settings: 25 | errorlint: 26 | errorf: true 27 | forbidigo: 28 | forbid: 29 | - pattern: fmt.Print* 30 | goconst: 31 | numbers: true 32 | gocritic: 33 | disabled-checks: 34 | - exitAfterDefer 35 | - ifElseChain 36 | - commentFormatting 37 | gomodguard: 38 | blocked: 39 | modules: 40 | - github.com/pkg/errors: 41 | recommendations: 42 | - errors 43 | - fmt 44 | reason: This package is deprecated, use `fmt.Errorf` with `%w` instead 45 | gosec: 46 | excludes: 47 | - G115 # Potential integer overflow when converting between integer types 48 | - G108 # Profiling endpoint automatically exposed on /debug/pprof 49 | severity: low 50 | makezero: 51 | always: false 52 | staticcheck: 53 | checks: 54 | - -SA1019 55 | - -ST1000 56 | - -ST1005 57 | - all 58 | exclusions: 59 | generated: lax 60 | presets: 61 | - comments 62 | - common-false-positives 63 | - legacy 64 | - std-error-handling 65 | rules: 66 | - linters: 67 | - goconst 68 | path: (.+)_test\.go 69 | paths: 70 | - third_party$ 71 | - builtin$ 72 | - examples$ 73 | formatters: 74 | enable: 75 | - gofumpt 76 | settings: 77 | gofumpt: 78 | module-path: pgstream 79 | extra-rules: false 80 | exclusions: 81 | generated: lax 82 | paths: 83 | - third_party$ 84 | - builtin$ 85 | - examples$ 86 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.4.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-shebang-scripts-are-executable 11 | - id: check-executables-have-shebangs 12 | - id: check-merge-conflict 13 | - repo: https://github.com/golangci/golangci-lint 14 | rev: v2.0.2 15 | hooks: 16 | - id: golangci-lint-full 17 | args: ["--timeout=10m", "--config=.golangci.yml"] 18 | - repo: local 19 | hooks: 20 | - id: license-header-checker 21 | name: license header check 22 | language: system 23 | entry: make license-check 24 | pass_filenames: false 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | COPY pgstream /usr/bin/pgstream 3 | ENTRYPOINT [ "/usr/bin/pgstream" ] 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: lint 2 | lint: ## Lint source code 3 | @echo "Linting source code..." 4 | @go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.0.2 5 | @golangci-lint run 6 | 7 | .PHONY: test 8 | test: 9 | @go test -coverprofile=coverage -timeout 10m -race -cover -failfast ./... 10 | 11 | .PHONY: integration-test 12 | integration-test: 13 | @PGSTREAM_INTEGRATION_TESTS=true go test -timeout 180s github.com/xataio/pgstream/pkg/stream/integration 14 | 15 | .PHONY: license-check 16 | license-check: 17 | @curl -s https://raw.githubusercontent.com/lluissm/license-header-checker/master/install.sh | bash 18 | @./bin/license-header-checker -a -r ./license-header.txt . go 19 | 20 | .PHONY: gen-migrations 21 | gen-migrations: 22 | @go install github.com/go-bindata/go-bindata/... 23 | @go-bindata -o migrations/postgres/migrations.go -pkg pgmigrations -ignore migrations.go -prefix "migrations/postgres/" migrations/postgres/ 24 | 25 | .PHONY: generate 26 | generate: 27 | # Generate the cli-definition.json file 28 | go run tools/build-cli-definition.go 29 | go run tools/transformer-definition/build-transformers-definition.go 30 | 31 | GIT_COMMIT := $(shell git rev-parse --short HEAD) 32 | .PHONY: build 33 | build: 34 | @go build -ldflags "-X github.com/xataio/pgstream/cmd.Env=development -X github.com/xataio/pgstream/cmd.Version=$(GIT_COMMIT)" . 35 | 36 | .PHONY: build-linux-amd64 37 | build-linux-amd64: 38 | @GOOS=linux GOARCH=amd64 go build -ldflags "-X github.com/xataio/pgstream/cmd.Env=development -X github.com/xataio/pgstream/cmd.Version=$(GIT_COMMIT)" . 39 | 40 | .PHONY: build-linux-arm64 41 | build-linux-arm64: 42 | @GOOS=linux GOARCH=arm64 go build -ldflags "-X github.com/xataio/pgstream/cmd.Env=development -X github.com/xataio/pgstream/cmd.Version=$(GIT_COMMIT)" . 43 | -------------------------------------------------------------------------------- /brand-kit/README.md: -------------------------------------------------------------------------------- 1 | ## Logos 2 | 3 | Our logo combines a symbol and wordmark. For dark backgrounds, use the inverted (white) logo, and for light backgrounds, use the black logo. Maintain the logo's aspect ratio, provide clear space, and avoid unauthorized modifications. 4 | 5 | | Symbol | Wordmark | 6 | |-------------------------------------------------------|-------------------------------------------------------------| 7 | | ![SymbolWhite](logo/symbol/pgstream-white-symbol.svg) | ![WordmarkWhite](logo/wordmark/pgstream-white-wordmark.svg) | 8 | | ![SymbolBlack](logo/symbol/pgstream-black-symbol.svg) | ![WordmarkBlack](logo/wordmark/pgstream-black-wordmark.svg) | 9 | 10 | ## Banner 11 | 12 | The project banner is a key branding element that can be prominently featured at the top of project documentation, websites, social media profiles, and even on swag stickers. It serves as a visual representation of our project's identity and can be used to create a strong connection with our brand. Ensure that the banner is displayed at its original size to maintain clarity and visibility across various applications. 13 | 14 | ![Banner](banner/pgstream-banner.svg) 15 | -------------------------------------------------------------------------------- /brand-kit/banner/pgstream-banner@2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xataio/pgstream/4e5d92e02c61380bad5b9b75adb2f532e88b7675/brand-kit/banner/pgstream-banner@2x.png -------------------------------------------------------------------------------- /build/docker/postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:17.2 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y postgresql-17-wal2json \ 5 | -------------------------------------------------------------------------------- /build/docker/postgres/postgresql.conf: -------------------------------------------------------------------------------- 1 | # CONNECTION 2 | listen_addresses = '*' 3 | 4 | # MODULES 5 | shared_preload_libraries = 'wal2json' 6 | 7 | # REPLICATION 8 | wal_level = logical # minimal, archive, hot_standby, or logical (change requires restart) 9 | max_wal_senders = 4 # max number of walsender processes (change requires restart) 10 | #wal_keep_segments = 4 # in logfile segments, 16MB each; 0 disables 11 | #wal_sender_timeout = 60s # in milliseconds; 0 disables 12 | max_replication_slots = 4 # max number of replication slots (change requires restart) 13 | -------------------------------------------------------------------------------- /build/docker/signoz/clickhouse/custom-function.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | executable 4 | histogramQuantile 5 | Float64 6 | 7 | Array(Float64) 8 | buckets 9 | 10 | 11 | Array(Float64) 12 | counts 13 | 14 | 15 | Float64 16 | quantile 17 | 18 | CSV 19 | ./histogramQuantile 20 | 21 | 22 | -------------------------------------------------------------------------------- /build/docker/signoz/clickhouse/storage.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 10485760 7 | 8 | 9 | s3 10 | 16 | https://BUCKET-NAME.s3-REGION-NAME.amazonaws.com/data/ 17 | ACCESS-KEY-ID 18 | SECRET-ACCESS-KEY 19 | 21 | 22 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | default 32 | 33 | 34 | s3 35 | 0 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /build/docker/signoz/clickhouse/user_scripts/histogramQuantile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xataio/pgstream/4e5d92e02c61380bad5b9b75adb2f532e88b7675/build/docker/signoz/clickhouse/user_scripts/histogramQuantile -------------------------------------------------------------------------------- /build/docker/signoz/otel-collector-opamp-config.yaml: -------------------------------------------------------------------------------- 1 | server_endpoint: ws://signoz:4320/v1/opamp 2 | -------------------------------------------------------------------------------- /build/docker/signoz/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # Alertmanager configuration 8 | alerting: 9 | alertmanagers: 10 | - static_configs: 11 | - targets: 12 | - alertmanager:9093 13 | 14 | # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. 15 | rule_files: [] 16 | # - "first_rules.yml" 17 | # - "second_rules.yml" 18 | # - 'alerts.yml' 19 | 20 | # A scrape configuration containing exactly one endpoint to scrape: 21 | # Here it's Prometheus itself. 22 | scrape_configs: [] 23 | 24 | remote_read: 25 | - url: tcp://clickhouse:9000/signoz_metrics 26 | -------------------------------------------------------------------------------- /cmd/config/test/test_transformer_rules.yaml: -------------------------------------------------------------------------------- 1 | transformations: 2 | validation_mode: relaxed 3 | table_transformers: 4 | - schema: public 5 | table: test 6 | column_transformers: 7 | name: 8 | name: greenmask_firstname 9 | dynamic_parameters: 10 | gender: 11 | column: sex 12 | -------------------------------------------------------------------------------- /docs/img/pgstream_arch_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xataio/pgstream/4e5d92e02c61380bad5b9b75adb2f532e88b7675/docs/img/pgstream_arch_v1.png -------------------------------------------------------------------------------- /docs/tutorials/kafka2os_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_KAFKA_READER_SERVERS="localhost:9092" 3 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream 4 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_ID=pgstream-opensearch-consumer-group 5 | 6 | # Processor config 7 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200" 8 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=25 9 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s 10 | -------------------------------------------------------------------------------- /docs/tutorials/kafka2os_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | kafka: 3 | servers: ["localhost:9092"] 4 | topic: 5 | name: "pgstream" 6 | consumer_group: 7 | id: "pgstream-opensearch-consumer-group" 8 | start_offset: "earliest" # options are earliest or latest 9 | target: 10 | search: 11 | engine: "opensearch" # options are elasticsearch or opensearch 12 | url: "http://admin:admin@localhost:9200" # URL of the search engine 13 | batch: 14 | timeout: 5000 # batch timeout in milliseconds 15 | size: 25 # number of messages in a batch 16 | -------------------------------------------------------------------------------- /docs/tutorials/kafka2pg_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_KAFKA_READER_SERVERS="localhost:9092" 3 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream 4 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_ID=pgstream-postgres-consumer-group 5 | 6 | # Processor config 7 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable" 8 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25 9 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s 10 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 11 | -------------------------------------------------------------------------------- /docs/tutorials/kafka2pg_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | kafka: 3 | servers: ["localhost:9092"] 4 | topic: 5 | name: "pgstream" 6 | consumer_group: 7 | id: "pgstream-postgres-consumer-group" 8 | start_offset: "earliest" # options are earliest or latest 9 | target: 10 | postgres: 11 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable" 12 | batch: 13 | timeout: 5000 # batch timeout in milliseconds 14 | size: 25 # number of messages in a batch 15 | schema_log_store_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 16 | disable_triggers: false # whether to disable triggers on the target database 17 | on_conflict_action: "nothing" # options are update, nothing or error 18 | -------------------------------------------------------------------------------- /docs/tutorials/pg2kafka_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot 4 | 5 | # Processor config 6 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 7 | PGSTREAM_KAFKA_WRITER_SERVERS="localhost:9092" 8 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream 9 | PGSTREAM_KAFKA_TOPIC_PARTITIONS=1 10 | PGSTREAM_KAFKA_TOPIC_REPLICATION_FACTOR=1 11 | PGSTREAM_KAFKA_TOPIC_AUTO_CREATE=true 12 | -------------------------------------------------------------------------------- /docs/tutorials/pg2kafka_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: replication 5 | replication: 6 | replication_slot: "pgstream_tutorial_slot" 7 | target: 8 | kafka: 9 | servers: ["localhost:9092"] 10 | topic: 11 | name: "pgstream" # name of the Kafka topic 12 | partitions: 1 # number of partitions for the topic 13 | replication_factor: 1 # replication factor for the topic 14 | auto_create: true # whether to automatically create the topic if it doesn't exist 15 | modifiers: 16 | injector: 17 | enabled: true # whether to inject pgstream metadata into the WAL events 18 | -------------------------------------------------------------------------------- /docs/tutorials/pg2os_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot 4 | 5 | # Processor config 6 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 7 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200" 8 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=25 9 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s 10 | -------------------------------------------------------------------------------- /docs/tutorials/pg2os_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: replication 5 | replication: 6 | replication_slot: "pgstream_tutorial_slot" 7 | target: 8 | search: 9 | engine: "opensearch" # options are elasticsearch or opensearch 10 | url: "http://localhost:9200" # URL of the search engine 11 | batch: 12 | timeout: 5000 # batch timeout in milliseconds 13 | size: 25 # number of messages in a batch 14 | modifiers: 15 | injector: 16 | enabled: true # whether to inject pgstream metadata into the WAL events 17 | -------------------------------------------------------------------------------- /docs/tutorials/pg2pg_transformer_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot 4 | 5 | # Processor config 6 | PGSTREAM_TRANSFORMER_RULES_FILE="docs/tutorials/tutorial_transformer_rules.yaml" 7 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable" 8 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25 9 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s 10 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 11 | -------------------------------------------------------------------------------- /docs/tutorials/pg2pg_transformer_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: replication # options are replication, snapshot or snapshot_and_replication 5 | replication: 6 | replication_slot: pgstream_tutorial_slot 7 | 8 | target: 9 | postgres: 10 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable" 11 | batch: 12 | timeout: 5000 # batch timeout in milliseconds 13 | size: 25 # number of messages in a batch 14 | disable_triggers: false # whether to disable triggers on the target database 15 | on_conflict_action: "nothing" # options are update, nothing or error 16 | 17 | modifiers: 18 | transformations: 19 | validation_mode: relaxed 20 | table_transformers: 21 | - schema: public 22 | table: test 23 | column_transformers: 24 | email: 25 | name: neosync_email 26 | parameters: 27 | preserve_length: true 28 | preserve_domain: true 29 | email_type: fullname 30 | name: 31 | name: greenmask_firstname 32 | parameters: 33 | generator: deterministic 34 | gender: Female 35 | -------------------------------------------------------------------------------- /docs/tutorials/pg2pg_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot 4 | 5 | # Processor config 6 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable" 7 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25 8 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s 9 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 10 | -------------------------------------------------------------------------------- /docs/tutorials/pg2pg_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: replication # options are replication, snapshot or snapshot_and_replication 5 | replication: 6 | replication_slot: pgstream_tutorial_slot 7 | 8 | target: 9 | postgres: 10 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable" 11 | batch: 12 | timeout: 5000 # batch timeout in milliseconds 13 | size: 25 # number of messages in a batch 14 | disable_triggers: false # whether to disable triggers on the target database 15 | on_conflict_action: "nothing" # options are update, nothing or error 16 | -------------------------------------------------------------------------------- /docs/tutorials/pg2webhook_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 3 | PGSTREAM_POSTGRES_REPLICATION_SLOT_NAME=pgstream_tutorial_slot 4 | 5 | # Processor config 6 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 7 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_ENABLED=true 8 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_REFRESH_INTERVAL="60s" 9 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 10 | -------------------------------------------------------------------------------- /docs/tutorials/pg2webhook_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: replication # options are replication, snapshot or snapshot_and_replication 5 | replication: 6 | replication_slot: pgstream_tutorial_slot 7 | 8 | target: 9 | webhooks: 10 | subscriptions: 11 | store: 12 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the webhook subscriptions are stored 13 | cache: 14 | enabled: true # whether to enable caching for the subscription store 15 | refresh_interval: 60 # interval in seconds to refresh the cache 16 | 17 | modifiers: 18 | injector: 19 | enabled: true # whether to inject pgstream metadata into the WAL events 20 | -------------------------------------------------------------------------------- /docs/tutorials/snapshot2pg_tutorial.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_SNAPSHOT_LISTENER_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*" 4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4 5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4 6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000 7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1 8 | PGSTREAM_POSTGRES_SNAPSHOT_STORE_URL="postgres://postgres:postgres@localhost:5432?sslmode=disable" 9 | 10 | # Processor config 11 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable" 12 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=25 13 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s 14 | PGSTREAM_POSTGRES_WRITER_DISABLE_TRIGGERS=true 15 | PGSTREAM_POSTGRES_WRITER_ON_CONFLICT_ACTION=nothing 16 | -------------------------------------------------------------------------------- /docs/tutorials/snapshot2pg_tutorial.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: snapshot # options are replication, snapshot or snapshot_and_replication 5 | snapshot: # when mode is snapshot or snapshot_and_replication 6 | mode: full # options are data_and, schema or data 7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern 8 | recorder: 9 | repeatable_snapshots: true # whether to repeat snapshots that have already been taken 10 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded 11 | snapshot_workers: 4 # number of schemas to be snapshotted in parallel 12 | data: # when mode is full or data 13 | schema_workers: 4 # number of schema tables to be snapshotted in parallel 14 | table_workers: 4 # number of workers to snapshot a table in parallel 15 | batch_page_size: 1000 # number of pages to read per batch 16 | schema: # when mode is full or schema 17 | mode: pgdump_pgrestore # options are pgdump_pgrestore or schemalog 18 | pgdump_pgrestore: 19 | clean_target_db: false # whether to clean the target database before restoring 20 | 21 | target: 22 | postgres: 23 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable" 24 | batch: 25 | timeout: 5000 # batch timeout in milliseconds 26 | size: 25 # number of messages in a batch 27 | disable_triggers: false # whether to disable triggers on the target database 28 | on_conflict_action: "nothing" # options are update, nothing or error 29 | -------------------------------------------------------------------------------- /docs/tutorials/tutorial_transformer_rules.yaml: -------------------------------------------------------------------------------- 1 | transformations: 2 | validation_mode: relaxed 3 | table_transformers: 4 | - schema: public 5 | table: test 6 | column_transformers: 7 | email: 8 | name: neosync_email 9 | parameters: 10 | preserve_length: true 11 | preserve_domain: true 12 | email_type: fullname 13 | name: 14 | name: greenmask_firstname 15 | parameters: 16 | generator: deterministic 17 | gender: Female 18 | -------------------------------------------------------------------------------- /internal/http/http.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package http 4 | 5 | import ( 6 | "context" 7 | "net/http" 8 | ) 9 | 10 | type Client interface { 11 | Do(*http.Request) (*http.Response, error) 12 | } 13 | 14 | type Server interface { 15 | Start(address string) error 16 | Shutdown(context.Context) error 17 | } 18 | -------------------------------------------------------------------------------- /internal/http/mocks/mock_http_client.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import "net/http" 6 | 7 | type Client struct { 8 | DoFn func(*http.Request) (*http.Response, error) 9 | } 10 | 11 | func (m *Client) Do(req *http.Request) (*http.Response, error) { 12 | return m.DoFn(req) 13 | } 14 | -------------------------------------------------------------------------------- /internal/json/json.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package json 4 | 5 | import ( 6 | json "github.com/bytedance/sonic" 7 | ) 8 | 9 | func Unmarshal(b []byte, v any) error { 10 | return json.Unmarshal(b, v) 11 | } 12 | 13 | func Marshal(v any) ([]byte, error) { 14 | return json.Marshal(v) 15 | } 16 | -------------------------------------------------------------------------------- /internal/postgres/errors.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | "strings" 9 | 10 | "github.com/jackc/pgx/v5" 11 | "github.com/jackc/pgx/v5/pgconn" 12 | ) 13 | 14 | var ( 15 | ErrConnTimeout = errors.New("connection timeout") 16 | ErrNoRows = errors.New("no rows") 17 | ) 18 | 19 | type ErrRelationDoesNotExist struct { 20 | Details string 21 | } 22 | 23 | func (e *ErrRelationDoesNotExist) Error() string { 24 | return fmt.Sprintf("relation does not exist: %s", e.Details) 25 | } 26 | 27 | type ErrConstraintViolation struct { 28 | Details string 29 | } 30 | 31 | func (e *ErrConstraintViolation) Error() string { 32 | return fmt.Sprintf("constraint violation: %s", e.Details) 33 | } 34 | 35 | type ErrSyntaxError struct { 36 | Details string 37 | } 38 | 39 | func (e *ErrSyntaxError) Error() string { 40 | return fmt.Sprintf("syntax error: %s", e.Details) 41 | } 42 | 43 | type ErrDataException struct { 44 | Details string 45 | } 46 | 47 | func (e *ErrDataException) Error() string { 48 | return fmt.Sprintf("data exception: %s", e.Details) 49 | } 50 | 51 | type ErrRelationAlreadyExists struct { 52 | Details string 53 | } 54 | 55 | func (e *ErrRelationAlreadyExists) Error() string { 56 | return fmt.Sprintf("relation already exists: %v", e.Details) 57 | } 58 | 59 | func mapError(err error) error { 60 | if pgconn.Timeout(err) { 61 | return ErrConnTimeout 62 | } 63 | 64 | if errors.Is(err, pgx.ErrNoRows) { 65 | return ErrNoRows 66 | } 67 | 68 | var pgErr *pgconn.PgError 69 | if errors.As(err, &pgErr) { 70 | if pgErr.Code == "42P01" { 71 | return &ErrRelationDoesNotExist{ 72 | Details: pgErr.Message, 73 | } 74 | } 75 | if pgErr.Code == "42601" { 76 | return &ErrSyntaxError{ 77 | Details: pgErr.Message, 78 | } 79 | } 80 | // Class 22 — Data Exception 81 | if strings.HasPrefix(pgErr.Code, "22") { 82 | return &ErrDataException{ 83 | Details: pgErr.Message, 84 | } 85 | } 86 | // Class 23 — Integrity Constraint Violation 87 | if strings.HasPrefix(pgErr.Code, "23") { 88 | return &ErrConstraintViolation{ 89 | Details: pgErr.Message, 90 | } 91 | } 92 | } 93 | 94 | return err 95 | } 96 | -------------------------------------------------------------------------------- /internal/postgres/instrumentation/instrumented_pg_dump_restore.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package instrumentation 4 | 5 | import ( 6 | "context" 7 | 8 | pglib "github.com/xataio/pgstream/internal/postgres" 9 | "github.com/xataio/pgstream/pkg/otel" 10 | "go.opentelemetry.io/otel/attribute" 11 | "go.opentelemetry.io/otel/trace" 12 | ) 13 | 14 | type PGDumpRestore struct { 15 | pgdumpFn pglib.PGDumpFn 16 | pgrestoreFn pglib.PGRestoreFn 17 | tracer trace.Tracer 18 | } 19 | 20 | func NewPGDumpFn(pgdumpFn pglib.PGDumpFn, instrumentation *otel.Instrumentation) pglib.PGDumpFn { 21 | pgdr := &PGDumpRestore{ 22 | pgdumpFn: pgdumpFn, 23 | tracer: instrumentation.Tracer, 24 | } 25 | return pgdr.PGDump 26 | } 27 | 28 | func NewPGRestoreFn(pgrestoreFn pglib.PGRestoreFn, instrumentation *otel.Instrumentation) pglib.PGRestoreFn { 29 | pgdr := &PGDumpRestore{ 30 | pgrestoreFn: pgrestoreFn, 31 | tracer: instrumentation.Tracer, 32 | } 33 | return pgdr.PGRestore 34 | } 35 | 36 | func (i *PGDumpRestore) PGDump(ctx context.Context, opts pglib.PGDumpOptions) (dump []byte, err error) { 37 | ctx, span := otel.StartSpan(ctx, i.tracer, "pgdump", trace.WithAttributes([]attribute.KeyValue{ 38 | {Key: "schemas", Value: attribute.StringSliceValue(opts.Schemas)}, 39 | {Key: "tables", Value: attribute.StringSliceValue(opts.Tables)}, 40 | {Key: "exclude_tables", Value: attribute.StringSliceValue(opts.ExcludeTables)}, 41 | {Key: "clean", Value: attribute.BoolValue(opts.Clean)}, 42 | }...)) 43 | defer otel.CloseSpan(span, err) 44 | return i.pgdumpFn(ctx, opts) 45 | } 46 | 47 | func (i *PGDumpRestore) PGRestore(ctx context.Context, opts pglib.PGRestoreOptions, dump []byte) (out string, err error) { 48 | ctx, span := otel.StartSpan(ctx, i.tracer, "pgrestore") 49 | defer otel.CloseSpan(span, err) 50 | return i.pgrestoreFn(ctx, opts, dump) 51 | } 52 | -------------------------------------------------------------------------------- /internal/postgres/instrumentation/instrumented_querier_builder.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package instrumentation 4 | 5 | import ( 6 | "context" 7 | 8 | pglib "github.com/xataio/pgstream/internal/postgres" 9 | "github.com/xataio/pgstream/pkg/otel" 10 | ) 11 | 12 | func NewQuerierBuilder(b pglib.QuerierBuilder, i *otel.Instrumentation) (pglib.QuerierBuilder, error) { 13 | return func(ctx context.Context, url string) (pglib.Querier, error) { 14 | querier, err := b(ctx, url) 15 | if err != nil { 16 | return nil, err 17 | } 18 | return NewQuerier(querier, i) 19 | }, nil 20 | } 21 | -------------------------------------------------------------------------------- /internal/postgres/instrumentation/instrumented_tx.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package instrumentation 4 | 5 | import ( 6 | "context" 7 | 8 | pglib "github.com/xataio/pgstream/internal/postgres" 9 | "github.com/xataio/pgstream/pkg/otel" 10 | "go.opentelemetry.io/otel/trace" 11 | ) 12 | 13 | type Tx struct { 14 | inner pglib.Tx 15 | tracer trace.Tracer 16 | } 17 | 18 | func NewTx(t pglib.Tx, instrumentation *otel.Instrumentation) pglib.Tx { 19 | if instrumentation == nil { 20 | return t 21 | } 22 | 23 | return &Tx{ 24 | inner: t, 25 | tracer: instrumentation.Tracer, 26 | } 27 | } 28 | 29 | func (i *Tx) Query(ctx context.Context, query string, args ...any) (rows pglib.Rows, err error) { 30 | queryAttrs := queryAttributes(query) 31 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.Query", trace.WithAttributes(queryAttrs...)) 32 | defer otel.CloseSpan(span, err) 33 | return i.inner.Query(ctx, query, args...) 34 | } 35 | 36 | func (i *Tx) QueryRow(ctx context.Context, query string, args ...any) pglib.Row { 37 | queryAttrs := queryAttributes(query) 38 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.QueryRow", trace.WithAttributes(queryAttrs...)) 39 | defer otel.CloseSpan(span, nil) 40 | return i.inner.QueryRow(ctx, query, args...) 41 | } 42 | 43 | func (i *Tx) Exec(ctx context.Context, query string, args ...any) (tag pglib.CommandTag, err error) { 44 | queryAttrs := queryAttributes(query) 45 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.Exec", trace.WithAttributes(queryAttrs...)) 46 | defer otel.CloseSpan(span, err) 47 | return i.inner.Exec(ctx, query, args...) 48 | } 49 | 50 | func (i *Tx) CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (rowCount int64, err error) { 51 | ctx, span := otel.StartSpan(ctx, i.tracer, "tx.CopyFrom") 52 | defer otel.CloseSpan(span, err) 53 | return i.inner.CopyFrom(ctx, tableName, columnNames, srcRows) 54 | } 55 | -------------------------------------------------------------------------------- /internal/postgres/mocks/mock_pg_mapper.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | type Mapper struct { 6 | TypeForOIDFn func(oid uint32) (string, error) 7 | } 8 | 9 | func (m *Mapper) TypeForOID(oid uint32) (string, error) { 10 | return m.TypeForOIDFn(oid) 11 | } 12 | -------------------------------------------------------------------------------- /internal/postgres/mocks/mock_pg_replication_conn.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/internal/postgres" 9 | ) 10 | 11 | type ReplicationConn struct { 12 | IdentifySystemFn func(ctx context.Context) (postgres.IdentifySystemResult, error) 13 | StartReplicationFn func(ctx context.Context, cfg postgres.ReplicationConfig) error 14 | SendStandbyStatusUpdateFn func(ctx context.Context, lsn uint64) error 15 | ReceiveMessageFn func(ctx context.Context) (*postgres.ReplicationMessage, error) 16 | CloseFn func(ctx context.Context) error 17 | } 18 | 19 | func (m *ReplicationConn) IdentifySystem(ctx context.Context) (postgres.IdentifySystemResult, error) { 20 | return m.IdentifySystemFn(ctx) 21 | } 22 | 23 | func (m *ReplicationConn) StartReplication(ctx context.Context, cfg postgres.ReplicationConfig) error { 24 | return m.StartReplicationFn(ctx, cfg) 25 | } 26 | 27 | func (m *ReplicationConn) SendStandbyStatusUpdate(ctx context.Context, lsn uint64) error { 28 | return m.SendStandbyStatusUpdateFn(ctx, lsn) 29 | } 30 | 31 | func (m *ReplicationConn) ReceiveMessage(ctx context.Context) (*postgres.ReplicationMessage, error) { 32 | return m.ReceiveMessageFn(ctx) 33 | } 34 | 35 | func (m *ReplicationConn) Close(ctx context.Context) error { 36 | return m.CloseFn(ctx) 37 | } 38 | -------------------------------------------------------------------------------- /internal/postgres/mocks/mock_row.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | type Row struct { 6 | ScanFn func(args ...any) error 7 | } 8 | 9 | func (m *Row) Scan(args ...any) error { 10 | return m.ScanFn(args...) 11 | } 12 | -------------------------------------------------------------------------------- /internal/postgres/mocks/mock_rows.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "github.com/jackc/pgx/v5" 7 | "github.com/jackc/pgx/v5/pgconn" 8 | ) 9 | 10 | type Rows struct { 11 | CloseFn func() 12 | ErrFn func() error 13 | FieldDescriptionsFn func() []pgconn.FieldDescription 14 | NextFn func(i uint) bool 15 | ScanFn func(dest ...any) error 16 | ValuesFn func() ([]any, error) 17 | RawValuesFn func() [][]byte 18 | nextCalls uint 19 | } 20 | 21 | func (m *Rows) Close() { 22 | m.CloseFn() 23 | } 24 | 25 | func (m *Rows) Err() error { 26 | return m.ErrFn() 27 | } 28 | 29 | func (m *Rows) CommandTag() pgconn.CommandTag { 30 | return pgconn.CommandTag{} 31 | } 32 | 33 | func (m *Rows) FieldDescriptions() []pgconn.FieldDescription { 34 | return m.FieldDescriptionsFn() 35 | } 36 | 37 | func (m *Rows) Next() bool { 38 | m.nextCalls++ 39 | return m.NextFn(m.nextCalls) 40 | } 41 | 42 | func (m *Rows) Scan(dest ...any) error { 43 | return m.ScanFn(dest...) 44 | } 45 | 46 | func (m *Rows) Values() ([]any, error) { 47 | return m.ValuesFn() 48 | } 49 | 50 | func (m *Rows) RawValues() [][]byte { 51 | return m.RawValuesFn() 52 | } 53 | 54 | func (m *Rows) Conn() *pgx.Conn { 55 | return &pgx.Conn{} 56 | } 57 | -------------------------------------------------------------------------------- /internal/postgres/mocks/mock_tx.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/internal/postgres" 9 | ) 10 | 11 | type Tx struct { 12 | QueryRowFn func(ctx context.Context, query string, args ...any) postgres.Row 13 | QueryFn func(ctx context.Context, query string, args ...any) (postgres.Rows, error) 14 | ExecFn func(ctx context.Context, i uint, query string, args ...any) (postgres.CommandTag, error) 15 | CopyFromFn func(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error) 16 | execCallCount uint 17 | } 18 | 19 | func (m *Tx) QueryRow(ctx context.Context, query string, args ...any) postgres.Row { 20 | return m.QueryRowFn(ctx, query, args...) 21 | } 22 | 23 | func (m *Tx) Query(ctx context.Context, query string, args ...any) (postgres.Rows, error) { 24 | return m.QueryFn(ctx, query, args...) 25 | } 26 | 27 | func (m *Tx) Exec(ctx context.Context, query string, args ...any) (postgres.CommandTag, error) { 28 | m.execCallCount++ 29 | return m.ExecFn(ctx, m.execCallCount, query, args...) 30 | } 31 | 32 | func (m *Tx) CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (rowCount int64, err error) { 33 | return m.CopyFromFn(ctx, tableName, columnNames, srcRows) 34 | } 35 | -------------------------------------------------------------------------------- /internal/postgres/pg_mapper.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "github.com/jackc/pgx/v5/pgtype" 10 | ) 11 | 12 | type Mapper struct { 13 | querier Querier 14 | pgMap *pgtype.Map 15 | } 16 | 17 | func NewMapper(conn Querier) *Mapper { 18 | return &Mapper{ 19 | querier: conn, 20 | pgMap: pgtype.NewMap(), 21 | } 22 | } 23 | 24 | func (m *Mapper) TypeForOID(ctx context.Context, oid uint32) (string, error) { 25 | dataType, found := m.pgMap.TypeForOID(oid) 26 | if !found { 27 | return m.queryType(ctx, oid) 28 | } 29 | return dataType.Name, nil 30 | } 31 | 32 | func (m *Mapper) queryType(ctx context.Context, oid uint32) (string, error) { 33 | var dataType string 34 | if err := m.querier.QueryRow(ctx, fmt.Sprintf("SELECT pg_typeof(%d)", oid)).Scan(&dataType); err != nil { 35 | return "", fmt.Errorf("selecting type for OID %d: %w", oid, err) 36 | } 37 | return dataType, nil 38 | } 39 | -------------------------------------------------------------------------------- /internal/postgres/pg_querier.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/jackc/pgx/v5" 9 | "github.com/jackc/pgx/v5/pgconn" 10 | ) 11 | 12 | type Querier interface { 13 | Query(ctx context.Context, query string, args ...any) (Rows, error) 14 | QueryRow(ctx context.Context, query string, args ...any) Row 15 | Exec(ctx context.Context, query string, args ...any) (CommandTag, error) 16 | ExecInTx(ctx context.Context, fn func(tx Tx) error) error 17 | ExecInTxWithOptions(ctx context.Context, fn func(tx Tx) error, txOpts TxOptions) error 18 | CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error) 19 | Ping(ctx context.Context) error 20 | Close(ctx context.Context) error 21 | } 22 | 23 | type Row interface { 24 | pgx.Row 25 | } 26 | 27 | type Rows interface { 28 | pgx.Rows 29 | } 30 | 31 | type CommandTag struct { 32 | pgconn.CommandTag 33 | } 34 | 35 | type mappedRow struct { 36 | inner Row 37 | } 38 | 39 | func (mr *mappedRow) Scan(dest ...any) error { 40 | err := mr.inner.Scan(dest...) 41 | return mapError(err) 42 | } 43 | -------------------------------------------------------------------------------- /internal/postgres/pg_querier_builder.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import "context" 6 | 7 | type QuerierBuilder func(context.Context, string) (Querier, error) 8 | 9 | func ConnBuilder(ctx context.Context, url string) (Querier, error) { 10 | return NewConn(ctx, url) 11 | } 12 | 13 | func ConnPoolBuilder(ctx context.Context, url string) (Querier, error) { 14 | return NewConnPool(ctx, url) 15 | } 16 | -------------------------------------------------------------------------------- /internal/postgres/pg_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func Test_DefaultReplicationSlotName(t *testing.T) { 12 | t.Parallel() 13 | 14 | tests := []struct { 15 | name string 16 | wantSlotName string 17 | }{ 18 | { 19 | name: "example", 20 | wantSlotName: "pgstream_example_slot", 21 | }, 22 | { 23 | name: "example.com", 24 | wantSlotName: "pgstream_example_com_slot", 25 | }, 26 | { 27 | name: "example.test.com", 28 | wantSlotName: "pgstream_example_test_com_slot", 29 | }, 30 | } 31 | 32 | for _, tc := range tests { 33 | t.Run(tc.name, func(t *testing.T) { 34 | t.Parallel() 35 | 36 | slotName := DefaultReplicationSlotName(tc.name) 37 | require.Equal(t, tc.wantSlotName, slotName) 38 | }) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /internal/postgres/pg_tx.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/jackc/pgx/v5" 9 | ) 10 | 11 | type Tx interface { 12 | Query(ctx context.Context, query string, args ...any) (Rows, error) 13 | QueryRow(ctx context.Context, query string, args ...any) Row 14 | Exec(ctx context.Context, query string, args ...any) (CommandTag, error) 15 | CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error) 16 | } 17 | 18 | type TxIsolationLevel string 19 | 20 | const ( 21 | Serializable TxIsolationLevel = "serializable" 22 | RepeatableRead TxIsolationLevel = "repeatable read" 23 | ReadCommitted TxIsolationLevel = "read committed" 24 | ReadUncommitted TxIsolationLevel = "read uncommitted" 25 | ) 26 | 27 | type TxAccessMode string 28 | 29 | const ( 30 | ReadWrite TxAccessMode = "read write" 31 | ReadOnly TxAccessMode = "read only" 32 | ) 33 | 34 | type TxOptions struct { 35 | IsolationLevel TxIsolationLevel 36 | AccessMode TxAccessMode 37 | } 38 | 39 | type Txn struct { 40 | pgx.Tx 41 | } 42 | 43 | func (t *Txn) QueryRow(ctx context.Context, query string, args ...any) Row { 44 | row := t.Tx.QueryRow(ctx, query, args...) 45 | return &mappedRow{inner: row} 46 | } 47 | 48 | func (t *Txn) Query(ctx context.Context, query string, args ...any) (Rows, error) { 49 | rows, err := t.Tx.Query(ctx, query, args...) 50 | return rows, mapError(err) 51 | } 52 | 53 | func (t *Txn) Exec(ctx context.Context, query string, args ...any) (CommandTag, error) { 54 | tag, err := t.Tx.Exec(ctx, query, args...) 55 | return CommandTag{tag}, mapError(err) 56 | } 57 | 58 | func (t *Txn) CopyFrom(ctx context.Context, tableName string, columnNames []string, srcRows [][]any) (int64, error) { 59 | identifier, err := newIdentifier(tableName) 60 | if err != nil { 61 | return -1, err 62 | } 63 | 64 | // sanitize the input, removing any added quotes. The CopyFrom will sanitize 65 | // them and double quotes will cause errors. 66 | for i, c := range columnNames { 67 | columnNames[i] = removeQuotes(c) 68 | } 69 | 70 | return t.Tx.CopyFrom(ctx, identifier, columnNames, pgx.CopyFromRows(srcRows)) 71 | } 72 | 73 | func toTxOptions(opts TxOptions) pgx.TxOptions { 74 | return pgx.TxOptions{ 75 | IsoLevel: pgx.TxIsoLevel(opts.IsolationLevel), 76 | AccessMode: pgx.TxAccessMode(opts.AccessMode), 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /internal/postgres/pg_utils_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "errors" 7 | "testing" 8 | 9 | "github.com/jackc/pgx/v5" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func Test_newIdentifier(t *testing.T) { 14 | t.Parallel() 15 | 16 | tests := []struct { 17 | name string 18 | tableName string 19 | 20 | wantIdentifier pgx.Identifier 21 | wantErr error 22 | }{ 23 | { 24 | name: "ok - table name", 25 | tableName: "test_table", 26 | 27 | wantIdentifier: pgx.Identifier{"test_table"}, 28 | wantErr: nil, 29 | }, 30 | { 31 | name: "ok - qualified table name", 32 | tableName: "test_schema.test_table", 33 | 34 | wantIdentifier: pgx.Identifier{"test_schema", "test_table"}, 35 | wantErr: nil, 36 | }, 37 | { 38 | name: "ok - quoted qualified table name", 39 | tableName: `"test_schema"."test_table"`, 40 | 41 | wantIdentifier: pgx.Identifier{"test_schema", "test_table"}, 42 | wantErr: nil, 43 | }, 44 | { 45 | name: "error - invalid table name", 46 | tableName: "invalid.test.table", 47 | 48 | wantIdentifier: nil, 49 | wantErr: errors.New("invalid table name: invalid.test.table"), 50 | }, 51 | } 52 | 53 | for _, tc := range tests { 54 | t.Run(tc.name, func(t *testing.T) { 55 | t.Parallel() 56 | 57 | id, err := newIdentifier(tc.tableName) 58 | require.Equal(t, tc.wantErr, err) 59 | require.Equal(t, tc.wantIdentifier, id) 60 | }) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /internal/profiling/profiling.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package profiling 4 | 5 | import ( 6 | "fmt" 7 | "net/http" 8 | _ "net/http/pprof" 9 | "os" 10 | "runtime" 11 | "runtime/pprof" 12 | ) 13 | 14 | // StartProfilingServer starts an http server exposing /debug/pprof endpoint 15 | // with profiling insights. 16 | func StartProfilingServer(address string) { 17 | // by adding _ "net/http/pprof" the profiling endpoint attaches to the 18 | // server 19 | go func() { 20 | http.ListenAndServe(address, nil) //nolint:gosec 21 | }() 22 | } 23 | 24 | func StartCPUProfile(fileName string) (func(), error) { 25 | if fileName == "" { 26 | fileName = "cpu.prof" 27 | } 28 | cpuFile, err := os.Create(fileName) 29 | if err != nil { 30 | return nil, fmt.Errorf("could not create CPU profile file: %w", err) 31 | } 32 | 33 | if err := pprof.StartCPUProfile(cpuFile); err != nil { 34 | return nil, fmt.Errorf("could not start CPU profile: %w", err) 35 | } 36 | 37 | return func() { 38 | pprof.StopCPUProfile() 39 | cpuFile.Close() 40 | }, nil 41 | } 42 | 43 | func CreateMemoryProfile(fileName string) error { 44 | if fileName == "" { 45 | fileName = "mem.prof" 46 | } 47 | memFile, err := os.Create(fileName) 48 | if err != nil { 49 | return fmt.Errorf("could not create memory profile file: %w", err) 50 | } 51 | defer memFile.Close() 52 | 53 | runtime.GC() // get up-to-date statistics 54 | // Lookup("allocs") creates a profile similar to go test -memprofile. 55 | // Alternatively, use Lookup("heap") for a profile 56 | // that has inuse_space as the default index. 57 | if err := pprof.Lookup("allocs").WriteTo(memFile, 0); err != nil { 58 | return fmt.Errorf("could not write memory profile: %w", err) 59 | } 60 | 61 | return nil 62 | } 63 | -------------------------------------------------------------------------------- /internal/searchstore/mocks/mock_mapper.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import "github.com/xataio/pgstream/internal/searchstore" 6 | 7 | type Mapper struct { 8 | GetDefaultIndexSettingsFn func() map[string]any 9 | FieldMappingFn func(*searchstore.Field) (map[string]any, error) 10 | } 11 | 12 | func (m *Mapper) GetDefaultIndexSettings() map[string]any { 13 | if m.GetDefaultIndexSettingsFn == nil { 14 | return map[string]any{} 15 | } 16 | return m.GetDefaultIndexSettingsFn() 17 | } 18 | 19 | func (m *Mapper) FieldMapping(f *searchstore.Field) (map[string]any, error) { 20 | return m.FieldMappingFn(f) 21 | } 22 | -------------------------------------------------------------------------------- /internal/searchstore/search_mapper.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package searchstore 4 | 5 | type Mapper interface { 6 | GetDefaultIndexSettings() map[string]any 7 | FieldMapping(*Field) (map[string]any, error) 8 | } 9 | 10 | type Field struct { 11 | SearchType Type 12 | IsArray bool 13 | Metadata Metadata 14 | } 15 | 16 | type Metadata struct { 17 | VectorDimension int 18 | } 19 | 20 | type Type uint 21 | 22 | const ( 23 | IntegerType Type = iota 24 | FloatType 25 | BoolType 26 | StringType 27 | DateTimeTZType 28 | DateTimeType 29 | DateType 30 | TimeType 31 | JSONType 32 | TextType 33 | PGVectorType 34 | ) 35 | -------------------------------------------------------------------------------- /internal/sync/mocks/mock_weighted_semaphore.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | "sync/atomic" 8 | ) 9 | 10 | type WeightedSemaphore struct { 11 | TryAcquireFn func(int64) bool 12 | AcquireFn func(context.Context, int64) error 13 | ReleaseFn func(uint64, int64) 14 | releaseCalls uint64 15 | } 16 | 17 | func (m *WeightedSemaphore) TryAcquire(i int64) bool { 18 | return m.TryAcquireFn(i) 19 | } 20 | 21 | func (m *WeightedSemaphore) Acquire(ctx context.Context, i int64) error { 22 | return m.AcquireFn(ctx, i) 23 | } 24 | 25 | func (m *WeightedSemaphore) Release(i int64) { 26 | atomic.AddUint64(&m.releaseCalls, 1) 27 | m.ReleaseFn(m.GetReleaseCalls(), i) 28 | } 29 | 30 | func (m *WeightedSemaphore) GetReleaseCalls() uint64 { 31 | return atomic.LoadUint64(&m.releaseCalls) 32 | } 33 | -------------------------------------------------------------------------------- /internal/sync/semaphore.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package sync 4 | 5 | import ( 6 | "context" 7 | 8 | "golang.org/x/sync/semaphore" 9 | ) 10 | 11 | type WeightedSemaphore interface { 12 | TryAcquire(int64) bool 13 | Acquire(context.Context, int64) error 14 | Release(int64) 15 | } 16 | 17 | func NewWeightedSemaphore(size int64) *semaphore.Weighted { 18 | return semaphore.NewWeighted(size) 19 | } 20 | -------------------------------------------------------------------------------- /internal/testcontainers/test_elasticsearch_container.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package testcontainers 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "github.com/testcontainers/testcontainers-go" 10 | "github.com/testcontainers/testcontainers-go/modules/elasticsearch" 11 | ) 12 | 13 | const elasticsearchImage = "docker.elastic.co/elasticsearch/elasticsearch:8.9.0" 14 | 15 | func SetupElasticsearchContainer(ctx context.Context, url *string) (cleanup, error) { 16 | ctr, err := elasticsearch.Run(ctx, elasticsearchImage, 17 | testcontainers.WithEnv(map[string]string{"xpack.security.enabled": "false"})) // disable TLS 18 | if err != nil { 19 | return nil, fmt.Errorf("failed to start elasticsearch container: %w", err) 20 | } 21 | 22 | *url = ctr.Settings.Address 23 | 24 | return func() error { 25 | return ctr.Terminate(ctx) 26 | }, nil 27 | } 28 | -------------------------------------------------------------------------------- /internal/testcontainers/test_kafka_container.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package testcontainers 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "time" 9 | 10 | "github.com/testcontainers/testcontainers-go" 11 | "github.com/testcontainers/testcontainers-go/modules/kafka" 12 | "github.com/testcontainers/testcontainers-go/wait" 13 | ) 14 | 15 | const kafkaImage = "confluentinc/confluent-local:7.5.0" 16 | 17 | func SetupKafkaContainer(ctx context.Context, brokers *[]string) (cleanup, error) { 18 | opts := []testcontainers.ContainerCustomizer{ 19 | kafka.WithClusterID("test-cluster"), 20 | testcontainers.WithWaitStrategy( 21 | wait.ForLog("Kafka Server started"). 22 | WithOccurrence(1). 23 | WithStartupTimeout(5 * time.Second), 24 | ), 25 | } 26 | 27 | ctr, err := kafka.Run(ctx, kafkaImage, opts...) 28 | if err != nil { 29 | return nil, fmt.Errorf("failed to start kafka container: %w", err) 30 | } 31 | 32 | *brokers, err = ctr.Brokers(ctx) 33 | if err != nil { 34 | return nil, fmt.Errorf("retrieving brokers for kafka container: %w", err) 35 | } 36 | 37 | return func() error { 38 | return ctr.Terminate(ctx) 39 | }, nil 40 | } 41 | -------------------------------------------------------------------------------- /internal/testcontainers/test_opensearch_container.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package testcontainers 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "github.com/testcontainers/testcontainers-go/modules/opensearch" 10 | ) 11 | 12 | const opensearchImage = "opensearchproject/opensearch:2.11.1" 13 | 14 | func SetupOpenSearchContainer(ctx context.Context, url *string) (cleanup, error) { 15 | ctr, err := opensearch.Run(ctx, opensearchImage) 16 | if err != nil { 17 | return nil, fmt.Errorf("failed to start opensearch container: %w", err) 18 | } 19 | 20 | *url, err = ctr.Address(ctx) 21 | if err != nil { 22 | return nil, fmt.Errorf("retrieving url for opensearch container: %w", err) 23 | } 24 | 25 | return func() error { 26 | return ctr.Terminate(ctx) 27 | }, nil 28 | } 29 | -------------------------------------------------------------------------------- /internal/testcontainers/test_postgres_container.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package testcontainers 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "time" 9 | 10 | "github.com/testcontainers/testcontainers-go" 11 | "github.com/testcontainers/testcontainers-go/modules/postgres" 12 | "github.com/testcontainers/testcontainers-go/wait" 13 | ) 14 | 15 | type cleanup func() error 16 | 17 | type PostgresImage string 18 | 19 | const ( 20 | Postgres14 PostgresImage = "debezium/postgres:14-alpine" 21 | Postgres17 PostgresImage = "debezium/postgres:17-alpine" 22 | ) 23 | 24 | func SetupPostgresContainer(ctx context.Context, url *string, image PostgresImage, configFile ...string) (cleanup, error) { 25 | waitForLogs := wait. 26 | ForLog("database system is ready to accept connections"). 27 | WithOccurrence(2). 28 | WithStartupTimeout(5 * time.Second) 29 | 30 | opts := []testcontainers.ContainerCustomizer{ 31 | testcontainers.WithWaitStrategy(waitForLogs), 32 | } 33 | if len(configFile) > 0 { 34 | opts = append(opts, postgres.WithConfigFile(configFile[0])) 35 | } 36 | 37 | ctr, err := postgres.Run(ctx, string(image), opts...) 38 | if err != nil { 39 | return nil, fmt.Errorf("failed to start postgres container: %w", err) 40 | } 41 | 42 | *url, err = ctr.ConnectionString(ctx, "sslmode=disable") 43 | if err != nil { 44 | return nil, fmt.Errorf("retrieving connection string for postgres container: %w", err) 45 | } 46 | 47 | return func() error { 48 | return ctr.Terminate(ctx) 49 | }, nil 50 | } 51 | -------------------------------------------------------------------------------- /kafka2os.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_KAFKA_READER_SERVERS="localhost:9092" 3 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream 4 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_ID=pgstream-consumer-group 5 | PGSTREAM_KAFKA_READER_CONSUMER_GROUP_START_OFFSET=earliest 6 | PGSTREAM_KAFKA_COMMIT_BACKOFF_INITIAL_INTERVAL=1s 7 | PGSTREAM_KAFKA_COMMIT_BACKOFF_MAX_INTERVAL=1m 8 | PGSTREAM_KAFKA_COMMIT_BACKOFF_MAX_RETRIES=60 9 | 10 | # Processor config 11 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=100 12 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s 13 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200" 14 | PGSTREAM_SEARCH_STORE_BACKOFF_INITIAL_INTERVAL=1s 15 | PGSTREAM_SEARCH_STORE_BACKOFF_MAX_INTERVAL=1m 16 | PGSTREAM_SEARCH_STORE_BACKOFF_MAX_RETRIES=0 17 | -------------------------------------------------------------------------------- /kafka2os.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | kafka: 3 | servers: ["localhost:9092"] 4 | topic: 5 | name: "pgstream" 6 | consumer_group: 7 | id: "pgstream-consumer-group" 8 | start_offset: "earliest" # options are earliest or latest 9 | backoff: # one of exponential or constant 10 | exponential: 11 | max_retries: 60 # maximum number of retries 12 | initial_interval: 1000 # initial interval in milliseconds 13 | max_interval: 60000 # maximum interval in milliseconds 14 | 15 | target: 16 | search: 17 | engine: "opensearch" # options are elasticsearch or opensearch 18 | url: "http://admin:admin@localhost:9200" # URL of the search engine 19 | batch: 20 | timeout: 5000 # batch timeout in milliseconds 21 | size: 100 # number of messages in a batch 22 | backoff: # one of exponential or constant 23 | exponential: 24 | max_retries: 0 # maximum number of retries 25 | initial_interval: 1000 # initial interval in milliseconds 26 | max_interval: 60000 # maximum interval in milliseconds 27 | 28 | modifiers: 29 | injector: 30 | enabled: true # whether to inject pgstream metadata into the WAL events 31 | schemalog_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the schemalog database, if different from the source database 32 | -------------------------------------------------------------------------------- /license-header.txt: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package main 4 | 5 | import ( 6 | "os" 7 | 8 | "github.com/xataio/pgstream/cmd" 9 | ) 10 | 11 | func main() { 12 | if err := cmd.Execute(); err != nil { 13 | os.Exit(1) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /migrations/postgres/1_create_pgstream_xid.down.sql: -------------------------------------------------------------------------------- 1 | DROP FUNCTION IF EXISTS pgstream.xid_counter; 2 | DROP FUNCTION IF EXISTS pgstream.xid_pid; 3 | DROP FUNCTION IF EXISTS pgstream.xid_machine; 4 | DROP FUNCTION IF EXISTS pgstream.xid_time; 5 | DROP FUNCTION IF EXISTS pgstream.xid; 6 | DROP FUNCTION IF EXISTS pgstream.xid_decode; 7 | DROP FUNCTION IF EXISTS pgstream.xid_encode; 8 | DROP FUNCTION IF EXISTS pgstream._xid_machine_id; 9 | DROP SEQUENCE IF EXISTS pgstream.xid_serial ; 10 | DROP DOMAIN IF EXISTS pgstream.xid; 11 | -------------------------------------------------------------------------------- /migrations/postgres/2_create_pgstream_schemalog_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS pgstream.schema_log; 2 | -------------------------------------------------------------------------------- /migrations/postgres/2_create_pgstream_schemalog_table.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS pgstream.schema_log ( 2 | id pgstream.xid PRIMARY KEY DEFAULT pgstream.xid(), 3 | version BIGINT NOT NULL, 4 | schema_name TEXT NOT NULL, 5 | schema JSONB NOT NULL, 6 | created_at TIMESTAMP NOT NULL DEFAULT NOW(), 7 | acked BOOLEAN NOT NULL DEFAULT FALSE 8 | ); 9 | 10 | -- most lookups look like: 11 | -- `SELECT id, schema FROM pgstream.schema_log WHERE schema_name = 'foo' AND NOT acked ORDER BY id DESC LIMIT 1` 12 | CREATE INDEX IF NOT EXISTS schema_log_name_acked ON pgstream.schema_log (schema_name, acked, id); 13 | CREATE UNIQUE INDEX IF NOT EXISTS schema_log_version_uniq ON pgstream.schema_log(schema_name, version); 14 | -------------------------------------------------------------------------------- /migrations/postgres/3_create_pgstream_tableids_table.down.sql: -------------------------------------------------------------------------------- 1 | DROP FUNCTION IF EXISTS pgstream.create_table_mapping; 2 | DROP TABLE IF EXISTS pgstream.table_ids; 3 | -------------------------------------------------------------------------------- /migrations/postgres/3_create_pgstream_tableids_table.up.sql: -------------------------------------------------------------------------------- 1 | -- table_ids stores the mapping between the pgstream table id and the Postgres table oid 2 | CREATE TABLE IF NOT EXISTS pgstream.table_ids ( 3 | id pgstream.xid PRIMARY KEY DEFAULT pgstream.xid(), 4 | oid BIGINT NOT NULL UNIQUE 5 | ); 6 | 7 | CREATE OR REPLACE FUNCTION pgstream.create_table_mapping(table_oid oid) RETURNS pgstream.xid 8 | LANGUAGE SQL 9 | SET search_path = pg_catalog,pg_temp 10 | AS $$ 11 | INSERT INTO pgstream.table_ids (oid) VALUES (table_oid) RETURNING id; 12 | $$; 13 | -------------------------------------------------------------------------------- /migrations/postgres/4_create_pgstream_get_schema_function.down.sql: -------------------------------------------------------------------------------- 1 | DROP FUNCTION IF EXISTS pgstream.get_schema; 2 | -------------------------------------------------------------------------------- /migrations/postgres/5_create_pgstream_log_schema_function.down.sql: -------------------------------------------------------------------------------- 1 | DROP FUNCTION IF EXISTS pgstream.log_schema; 2 | DROP FUNCTION IF EXISTS pgstream.is_system_schema; 3 | -------------------------------------------------------------------------------- /migrations/postgres/6_create_pgstream_refresh_schema_function.down.sql: -------------------------------------------------------------------------------- 1 | DROP FUNCTION IF EXISTS pgstream.refresh_schema 2 | -------------------------------------------------------------------------------- /migrations/postgres/6_create_pgstream_refresh_schema_function.up.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION pgstream.refresh_schema(schema_to_refresh text) RETURNS void 2 | LANGUAGE plpgsql 3 | SECURITY DEFINER 4 | SET search_path = pg_catalog,pg_temp 5 | AS $$ 6 | DECLARE 7 | schema_version bigint; 8 | is_system_schema boolean; 9 | BEGIN 10 | is_system_schema := pgstream.is_system_schema(schema_to_refresh); 11 | 12 | IF schema_to_refresh IS NOT NULL and NOT IS_SYSTEM_SCHEMA THEN 13 | SELECT COUNT(*)+1 INTO schema_version 14 | FROM "pgstream"."schema_log" 15 | where schema_name = schema_to_refresh; 16 | 17 | INSERT INTO "pgstream"."schema_log" (version, schema_name, schema) 18 | VALUES (schema_version, schema_to_refresh, pgstream.get_schema(schema_to_refresh)); 19 | 20 | RESET pgstream.skip_log; 21 | END IF; 22 | END; 23 | $$; 24 | -------------------------------------------------------------------------------- /migrations/postgres/7_create_pgstream_event_triggers.down.sql: -------------------------------------------------------------------------------- 1 | DROP EVENT TRIGGER IF EXISTS pgstream_log_schema_create_alter_table; 2 | DROP EVENT TRIGGER IF EXISTS pgstream_log_schema_drop_schema_table; 3 | -------------------------------------------------------------------------------- /migrations/postgres/7_create_pgstream_event_triggers.up.sql: -------------------------------------------------------------------------------- 1 | CREATE EVENT TRIGGER pgstream_log_schema_create_alter_table ON ddl_command_end EXECUTE FUNCTION pgstream.log_schema(); 2 | CREATE EVENT TRIGGER pgstream_log_schema_drop_schema_table ON sql_drop WHEN tag IN ('DROP TABLE', 'DROP SCHEMA') EXECUTE FUNCTION pgstream.log_schema(); 3 | -------------------------------------------------------------------------------- /pg2kafka.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable" 3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*" 4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4 5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4 6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000 7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1 8 | 9 | # Processor config 10 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable" 11 | PGSTREAM_KAFKA_WRITER_SERVERS="localhost:9092" 12 | PGSTREAM_KAFKA_TOPIC_NAME=pgstream 13 | PGSTREAM_KAFKA_TOPIC_PARTITIONS=1 14 | PGSTREAM_KAFKA_TOPIC_REPLICATION_FACTOR=1 15 | PGSTREAM_KAFKA_TOPIC_AUTO_CREATE=true 16 | PGSTREAM_KAFKA_WRITER_BATCH_TIMEOUT=2s 17 | PGSTREAM_KAFKA_WRITER_BATCH_SIZE=100 18 | PGSTREAM_KAFKA_WRITER_BATCH_BYTES=1572864 19 | -------------------------------------------------------------------------------- /pg2kafka.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: snapshot_and_replication # options are replication, snapshot or snapshot_and_replication 5 | snapshot: # when mode is snapshot or snapshot_and_replication 6 | mode: full # options are data_and, schema or data 7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern 8 | recorder: 9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded 10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel 11 | data: # when mode is full or data 12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel 13 | table_workers: 4 # number of workers to snapshot a table in parallel 14 | batch_page_size: 1000 # number of pages to read per batch 15 | schema: # when mode is full or schema 16 | mode: schemalog # options are pgdump_pgrestore or schemalog 17 | 18 | target: 19 | kafka: 20 | servers: ["localhost:9092"] 21 | topic: 22 | name: "pgstream" # name of the Kafka topic 23 | partitions: 1 # number of partitions for the topic 24 | replication_factor: 1 # replication factor for the topic 25 | auto_create: true # whether to automatically create the topic if it doesn't exist 26 | batch: 27 | timeout: 2000 # batch timeout in milliseconds 28 | size: 100 # number of messages in a batch 29 | max_bytes: 1572864 # max size of batch in bytes (1.5MiB) 30 | 31 | modifiers: 32 | injector: 33 | enabled: true # whether to inject pgstream metadata into the WAL events 34 | -------------------------------------------------------------------------------- /pg2os.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable" 3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*" 4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4 5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4 6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000 7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1 8 | 9 | # Processor config 10 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable" 11 | PGSTREAM_SEARCH_INDEXER_BATCH_SIZE=100 12 | PGSTREAM_SEARCH_INDEXER_BATCH_TIMEOUT=5s 13 | PGSTREAM_OPENSEARCH_STORE_URL="http://admin:admin@localhost:9200" 14 | PGSTREAM_SEARCH_STORE_EXP_BACKOFF_INITIAL_INTERVAL=1s 15 | PGSTREAM_SEARCH_STORE_EXP_BACKOFF_MAX_INTERVAL=1m 16 | PGSTREAM_SEARCH_STORE_EXP_BACKOFF_MAX_RETRIES=0 17 | -------------------------------------------------------------------------------- /pg2os.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: snapshot_and_replication # options are replication, snapshot or snapshot_and_replication 5 | snapshot: # when mode is snapshot or snapshot_and_replication 6 | mode: full # options are data_and, schema or data 7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern 8 | recorder: 9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded 10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel 11 | data: # when mode is full or data 12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel 13 | table_workers: 4 # number of workers to snapshot a table in parallel 14 | batch_page_size: 1000 # number of pages to read per batch 15 | schema: # when mode is full or schema 16 | mode: schemalog # options are pgdump_pgrestore or schemalog 17 | 18 | target: 19 | search: 20 | engine: "opensearch" # options are elasticsearch or opensearch 21 | url: "http://admin:admin@localhost:9200" # URL of the search engine 22 | batch: 23 | timeout: 5000 # batch timeout in milliseconds 24 | size: 100 # number of messages in a batch 25 | backoff: # one of exponential or constant 26 | exponential: 27 | max_retries: 0 # maximum number of retries 28 | initial_interval: 1000 # initial interval in milliseconds 29 | max_interval: 60000 # maximum interval in milliseconds 30 | 31 | 32 | modifiers: 33 | injector: 34 | enabled: true # whether to inject pgstream metadata into the WAL events 35 | -------------------------------------------------------------------------------- /pg2pg.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable" 3 | PGSTREAM_POSTGRES_SNAPSHOT_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable" 4 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="*" 5 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4 6 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4 7 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000 8 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1 9 | 10 | # Processor config 11 | PGSTREAM_TRANSFORMER_RULES_FILE="transformer_rules.yaml" 12 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable" 13 | PGSTREAM_FILTER_INCLUDE_TABLES="test" 14 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable" 15 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=100 16 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s 17 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable" 18 | -------------------------------------------------------------------------------- /pg2pg.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: snapshot_and_replication # options are replication, snapshot or snapshot_and_replication 5 | snapshot: # when mode is snapshot or snapshot_and_replication 6 | mode: full # options are data_and, schema or data 7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern 8 | recorder: 9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded 10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel 11 | data: # when mode is full or data 12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel 13 | table_workers: 4 # number of workers to snapshot a table in parallel 14 | batch_page_size: 1000 # number of pages to read per batch 15 | schema: # when mode is full or schema 16 | mode: pgdump_pgrestore # options are pgdump_pgrestore or schemalog 17 | pgdump_pgrestore: 18 | clean_target_db: true # whether to clean the target database before restoring 19 | 20 | target: 21 | postgres: 22 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable" 23 | batch: 24 | timeout: 5000 # batch timeout in milliseconds 25 | size: 100 # number of messages in a batch 26 | disable_triggers: false # whether to disable triggers on the target database 27 | on_conflict_action: "nothing" # options are update, nothing or error 28 | 29 | modifiers: 30 | injector: 31 | enabled: true # whether to inject pgstream metadata into the WAL events 32 | filter: 33 | include_tables: 34 | - test 35 | transformations: 36 | validation_mode: relaxed 37 | table_transformers: 38 | - schema: public 39 | table: test 40 | column_transformers: 41 | name: 42 | name: greenmask_firstname 43 | dynamic_parameters: 44 | gender: 45 | column: sex 46 | -------------------------------------------------------------------------------- /pg2webhook.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable" 3 | 4 | # Processor config 5 | PGSTREAM_INJECTOR_STORE_POSTGRES_URL="postgres://postgres:postgres@localhost?sslmode=disable" 6 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable" 7 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_ENABLED=false 8 | PGSTREAM_WEBHOOK_SUBSCRIPTION_STORE_CACHE_REFRESH_INTERVAL="60s" 9 | -------------------------------------------------------------------------------- /pg2webhook.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: replication # options are replication, snapshot or snapshot_and_replication 5 | 6 | target: 7 | webhooks: 8 | subscriptions: 9 | store: 10 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the webhook subscriptions are stored 11 | cache: 12 | enabled: false # whether to enable caching for the subscription store 13 | server: 14 | address: "localhost:9090" # address of the subscription server 15 | read_timeout: 60 # read timeout in seconds 16 | write_timeout: 60 # write timeout in seconds 17 | notifier: 18 | worker_count: 4 # number of notifications to be processed in parallel 19 | client_timeout: 10000 # timeout for the webhook client in milliseconds 20 | 21 | modifiers: 22 | injector: 23 | enabled: true # whether to inject pgstream metadata into the WAL events 24 | schemalog_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the schemalog database, if different from the source database 25 | -------------------------------------------------------------------------------- /pkg/backoff/mocks/mock_backoff.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import "github.com/xataio/pgstream/pkg/backoff" 6 | 7 | type Backoff struct { 8 | RetryNotifyFn func(backoff.Operation, backoff.Notify) error 9 | RetryFn func(backoff.Operation) error 10 | } 11 | 12 | func (m *Backoff) RetryNotify(op backoff.Operation, not backoff.Notify) error { 13 | return m.RetryNotifyFn(op, not) 14 | } 15 | 16 | func (m *Backoff) Retry(op backoff.Operation) error { 17 | return m.RetryFn(op) 18 | } 19 | -------------------------------------------------------------------------------- /pkg/kafka/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package kafka 4 | 5 | import tlslib "github.com/xataio/pgstream/pkg/tls" 6 | 7 | type ConnConfig struct { 8 | Servers []string 9 | Topic TopicConfig 10 | TLS tlslib.Config 11 | } 12 | 13 | type TopicConfig struct { 14 | Name string 15 | // Number of partitions to be created for the topic. Defaults to 1. 16 | NumPartitions int 17 | // Replication factor for the topic. Defaults to 1. 18 | ReplicationFactor int 19 | // AutoCreate defines if the topic should be created if it doesn't exist. 20 | // Defaults to false. 21 | AutoCreate bool 22 | } 23 | 24 | type ReaderConfig struct { 25 | Conn ConnConfig 26 | // ConsumerGroupID is the ID of the consumer group to use. If not set, 27 | // defaults to "pgstream-consumer-group". 28 | ConsumerGroupID string 29 | // ConsumerGroupStartOffset is the offset to start consuming from. If not 30 | // set, defaults to "earliest". 31 | ConsumerGroupStartOffset string 32 | } 33 | 34 | const ( 35 | defaultNumPartitions = 1 36 | defaultReplicationFactor = 1 37 | defaultConsumerGroupOffset = earliestOffset 38 | defaultConsumerGroupID = "pgstream-consumer-group" 39 | ) 40 | 41 | func (c *TopicConfig) numPartitions() int { 42 | if c.NumPartitions > 0 { 43 | return c.NumPartitions 44 | } 45 | return defaultNumPartitions 46 | } 47 | 48 | func (c *TopicConfig) replicationFactor() int { 49 | if c.NumPartitions > 0 { 50 | return c.ReplicationFactor 51 | } 52 | return defaultReplicationFactor 53 | } 54 | 55 | func (c *ReaderConfig) consumerGroupID() string { 56 | if c.ConsumerGroupID != "" { 57 | return c.ConsumerGroupID 58 | } 59 | return defaultConsumerGroupID 60 | } 61 | 62 | func (c *ReaderConfig) consumerGroupStartOffset() string { 63 | if c.ConsumerGroupStartOffset != "" { 64 | return c.ConsumerGroupStartOffset 65 | } 66 | return defaultConsumerGroupOffset 67 | } 68 | -------------------------------------------------------------------------------- /pkg/kafka/conn.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package kafka 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | "net" 9 | "strconv" 10 | "time" 11 | 12 | tlslib "github.com/xataio/pgstream/pkg/tls" 13 | 14 | "github.com/segmentio/kafka-go" 15 | ) 16 | 17 | // withConnection creates a connection that can be used by the kafka operation 18 | // passed in the parameters. This ensures the cleanup of all connection resources. 19 | func withConnection(config *ConnConfig, kafkaOperation func(conn *kafka.Conn) error) error { 20 | dialer, err := buildDialer(&config.TLS) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | var conn *kafka.Conn 26 | for _, server := range config.Servers { 27 | conn, err = dialer.Dial("tcp", server) 28 | if err != nil { 29 | // Try next server in the list 30 | continue 31 | } 32 | defer conn.Close() 33 | 34 | // Successfully connected. Do not try the other servers 35 | break 36 | } 37 | 38 | if conn == nil { 39 | return errors.New("error connecting to kafka, all servers failed") 40 | } 41 | 42 | controller, err := conn.Controller() 43 | if err != nil { 44 | return fmt.Errorf("controller: %w", err) 45 | } 46 | var controllerConn *kafka.Conn 47 | 48 | controllerConn, err = dialer.Dial("tcp", net.JoinHostPort(controller.Host, strconv.Itoa(controller.Port))) 49 | if err != nil { 50 | return fmt.Errorf("controller connection: %w", err) 51 | } 52 | defer controllerConn.Close() 53 | 54 | return kafkaOperation(controllerConn) 55 | } 56 | 57 | func buildDialer(cfg *tlslib.Config) (*kafka.Dialer, error) { 58 | timeout := 10 * time.Second 59 | 60 | tlsConfig, err := tlslib.NewConfig(cfg) 61 | if err != nil { 62 | return nil, fmt.Errorf("loading TLS configuration: %w", err) 63 | } 64 | 65 | return &kafka.Dialer{ 66 | Timeout: timeout, 67 | DualStack: true, 68 | TLS: tlsConfig, 69 | }, nil 70 | } 71 | -------------------------------------------------------------------------------- /pkg/kafka/kafka_offset_parser.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package kafka 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | "strconv" 9 | "strings" 10 | ) 11 | 12 | type Offset struct { 13 | Topic string 14 | Partition int 15 | Offset int64 16 | } 17 | 18 | type OffsetParser interface { 19 | ToString(o *Offset) string 20 | FromString(s string) (*Offset, error) 21 | } 22 | 23 | type Parser struct{} 24 | 25 | var ( 26 | ErrInvalidOffsetFormat = errors.New("invalid format for kafka offset") 27 | 28 | // "/" is used as a separator to concatenate the topic, partition and 29 | // offset. The partition and offset are integers, and the topic allowed 30 | // characters are [a-zA-Z0-9\._\-]. 31 | // 32 | // See https://github.com/apache/kafka/blob/0.10.2/core/src/main/scala/kafka/common/Topic.scala#L29 33 | separator = "/" 34 | ) 35 | 36 | func NewOffsetParser() *Parser { 37 | return &Parser{} 38 | } 39 | 40 | func (p *Parser) ToString(o *Offset) string { 41 | return fmt.Sprintf("%s%s%d%s%d", o.Topic, separator, o.Partition, separator, o.Offset) 42 | } 43 | 44 | func (p *Parser) FromString(s string) (*Offset, error) { 45 | parts := strings.Split(s, separator) 46 | if len(parts) != 3 { 47 | return nil, ErrInvalidOffsetFormat 48 | } 49 | topic := parts[0] 50 | partition, err := strconv.Atoi(parts[1]) 51 | if err != nil { 52 | return nil, fmt.Errorf("parsing partition from string: %w: %w", ErrInvalidOffsetFormat, err) 53 | } 54 | offset, err := strconv.Atoi(parts[2]) 55 | if err != nil { 56 | return nil, fmt.Errorf("parsing offset from string: %w: %w", ErrInvalidOffsetFormat, err) 57 | } 58 | 59 | return &Offset{ 60 | Topic: topic, 61 | Partition: partition, 62 | Offset: int64(offset), 63 | }, nil 64 | } 65 | -------------------------------------------------------------------------------- /pkg/kafka/kafka_offset_parser_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package kafka 4 | 5 | import ( 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestParser_ToString(t *testing.T) { 12 | t.Parallel() 13 | 14 | o := &Offset{ 15 | Topic: "test_topic", 16 | Partition: 0, 17 | Offset: 1, 18 | } 19 | 20 | parser := Parser{} 21 | str := parser.ToString(o) 22 | require.Equal(t, "test_topic/0/1", str) 23 | } 24 | 25 | func TestParser_FromString(t *testing.T) { 26 | t.Parallel() 27 | 28 | tests := []struct { 29 | name string 30 | str string 31 | 32 | wantOffset *Offset 33 | wantErr error 34 | }{ 35 | { 36 | name: "ok", 37 | str: "test_topic/0/1", 38 | 39 | wantOffset: &Offset{ 40 | Topic: "test_topic", 41 | Partition: 0, 42 | Offset: 1, 43 | }, 44 | wantErr: nil, 45 | }, 46 | { 47 | name: "error - invalid format", 48 | str: "test_topic01", 49 | 50 | wantOffset: nil, 51 | wantErr: ErrInvalidOffsetFormat, 52 | }, 53 | { 54 | name: "error - invalid partition", 55 | str: "test_topic/zero/1", 56 | 57 | wantOffset: nil, 58 | wantErr: ErrInvalidOffsetFormat, 59 | }, 60 | { 61 | name: "error - invalid offset", 62 | str: "test_topic/0/one", 63 | 64 | wantOffset: nil, 65 | wantErr: ErrInvalidOffsetFormat, 66 | }, 67 | } 68 | 69 | for _, tc := range tests { 70 | t.Run(tc.name, func(t *testing.T) { 71 | t.Parallel() 72 | 73 | parser := NewOffsetParser() 74 | offset, err := parser.FromString(tc.str) 75 | require.ErrorIs(t, err, tc.wantErr) 76 | require.Equal(t, offset, tc.wantOffset) 77 | }) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /pkg/kafka/log.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package kafka 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/segmentio/kafka-go" 9 | loglib "github.com/xataio/pgstream/pkg/log" 10 | ) 11 | 12 | func makeLogger(logFn func(msg string, fields ...loglib.Fields)) kafka.LoggerFunc { 13 | return func(msg string, args ...interface{}) { 14 | logFn(fmt.Sprintf(msg, args...), nil) 15 | } 16 | } 17 | 18 | func makeErrLogger(logFn func(err error, msg string, fields ...loglib.Fields)) kafka.LoggerFunc { 19 | return func(msg string, args ...interface{}) { 20 | logFn(nil, fmt.Sprintf(msg, args...), nil) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /pkg/kafka/mocks/mock_kafka_parser.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import "github.com/xataio/pgstream/pkg/kafka" 6 | 7 | type OffsetParser struct { 8 | ToStringFn func(o *kafka.Offset) string 9 | FromStringFn func(string) (*kafka.Offset, error) 10 | } 11 | 12 | func (m *OffsetParser) ToString(o *kafka.Offset) string { 13 | return m.ToStringFn(o) 14 | } 15 | 16 | func (m *OffsetParser) FromString(s string) (*kafka.Offset, error) { 17 | return m.FromStringFn(s) 18 | } 19 | -------------------------------------------------------------------------------- /pkg/kafka/mocks/mock_kafka_reader.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/kafka" 9 | ) 10 | 11 | type Reader struct { 12 | FetchMessageFn func(ctx context.Context) (*kafka.Message, error) 13 | CommitOffsetsFn func(ctx context.Context, offsets ...*kafka.Offset) error 14 | CloseFn func() error 15 | } 16 | 17 | func (m *Reader) FetchMessage(ctx context.Context) (*kafka.Message, error) { 18 | return m.FetchMessageFn(ctx) 19 | } 20 | 21 | func (m *Reader) CommitOffsets(ctx context.Context, offsets ...*kafka.Offset) error { 22 | return m.CommitOffsetsFn(ctx, offsets...) 23 | } 24 | 25 | func (m *Reader) Close() error { 26 | return m.CloseFn() 27 | } 28 | -------------------------------------------------------------------------------- /pkg/kafka/mocks/mock_kafka_writer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | "sync/atomic" 8 | 9 | "github.com/xataio/pgstream/pkg/kafka" 10 | ) 11 | 12 | type Writer struct { 13 | WriteMessagesFn func(context.Context, uint64, ...kafka.Message) error 14 | CloseFn func() error 15 | WriteCalls uint64 16 | } 17 | 18 | func (m *Writer) WriteMessages(ctx context.Context, msgs ...kafka.Message) error { 19 | atomic.AddUint64(&m.WriteCalls, 1) 20 | return m.WriteMessagesFn(ctx, m.GetWriteCalls(), msgs...) 21 | } 22 | 23 | func (m *Writer) Close() error { 24 | if m.CloseFn != nil { 25 | return m.CloseFn() 26 | } 27 | return nil 28 | } 29 | 30 | func (m *Writer) GetWriteCalls() uint64 { 31 | return atomic.LoadUint64(&m.WriteCalls) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/log/logger.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package log 4 | 5 | type Logger interface { 6 | Trace(msg string, fields ...Fields) 7 | Debug(msg string, fields ...Fields) 8 | Info(msg string, fields ...Fields) 9 | Warn(err error, msg string, fields ...Fields) 10 | Error(err error, msg string, fields ...Fields) 11 | Panic(msg string, fields ...Fields) 12 | WithFields(fields Fields) Logger 13 | } 14 | 15 | type Fields map[string]any 16 | 17 | type NoopLogger struct{} 18 | 19 | func (l *NoopLogger) Trace(msg string, fields ...Fields) {} 20 | func (l *NoopLogger) Debug(msg string, fields ...Fields) {} 21 | func (l *NoopLogger) Info(msg string, fields ...Fields) {} 22 | func (l *NoopLogger) Warn(err error, msg string, fields ...Fields) {} 23 | func (l *NoopLogger) Error(err error, msg string, fields ...Fields) {} 24 | func (l *NoopLogger) Panic(msg string, fields ...Fields) {} 25 | func (l *NoopLogger) WithFields(fields Fields) Logger { 26 | return l 27 | } 28 | 29 | const ModuleField = "module" 30 | 31 | func NewNoopLogger() *NoopLogger { 32 | return &NoopLogger{} 33 | } 34 | 35 | // NewLogger will return the logger on input if not nil, or a noop logger 36 | // otherwise. 37 | func NewLogger(l Logger) Logger { 38 | if l == nil { 39 | return &NoopLogger{} 40 | } 41 | return l 42 | } 43 | 44 | func MergeFields(f1, f2 Fields) Fields { 45 | allFields := make(Fields, len(f1)+len(f2)) 46 | fieldMaps := []Fields{f1, f2} 47 | for _, fmap := range fieldMaps { 48 | for k, v := range fmap { 49 | allFields[k] = v 50 | } 51 | } 52 | return allFields 53 | } 54 | -------------------------------------------------------------------------------- /pkg/otel/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package otel 4 | 5 | import "time" 6 | 7 | type Config struct { 8 | Metrics *MetricsConfig 9 | Traces *TracesConfig 10 | } 11 | 12 | type MetricsConfig struct { 13 | Endpoint string 14 | CollectionInterval time.Duration 15 | } 16 | 17 | type TracesConfig struct { 18 | Endpoint string 19 | SampleRatio float64 20 | } 21 | 22 | const defaultCollectionInterval = 60 * time.Second 23 | 24 | func (c *MetricsConfig) collectionInterval() time.Duration { 25 | if c.CollectionInterval != 0 { 26 | return c.CollectionInterval 27 | } 28 | return defaultCollectionInterval 29 | } 30 | -------------------------------------------------------------------------------- /pkg/otel/otel_instrumentation.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package otel 4 | 5 | import ( 6 | "go.opentelemetry.io/otel/metric" 7 | "go.opentelemetry.io/otel/trace" 8 | ) 9 | 10 | type InstrumentationProvider interface { 11 | NewInstrumentation(name string) *Instrumentation 12 | Close() error 13 | } 14 | 15 | type Instrumentation struct { 16 | Meter metric.Meter 17 | Tracer trace.Tracer 18 | } 19 | 20 | func (i *Instrumentation) IsEnabled() bool { 21 | return i != nil && (i.Meter != nil || i.Tracer != nil) 22 | } 23 | 24 | type noopProvider struct{} 25 | 26 | func (p *noopProvider) NewInstrumentation(name string) *Instrumentation { 27 | return nil 28 | } 29 | 30 | func (p *noopProvider) Close() error { 31 | return nil 32 | } 33 | 34 | func NewInstrumentationProvider(cfg *Config) (InstrumentationProvider, error) { 35 | // if neither metrics or traces are configured, instrumentation is not 36 | // enabled. Return a noop with disabled instrumentation 37 | if cfg.Metrics == nil && cfg.Traces == nil { 38 | return &noopProvider{}, nil 39 | } 40 | return NewProvider(cfg) 41 | } 42 | -------------------------------------------------------------------------------- /pkg/otel/span.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package otel 4 | 5 | import ( 6 | "context" 7 | 8 | "go.opentelemetry.io/otel/codes" 9 | "go.opentelemetry.io/otel/trace" 10 | ) 11 | 12 | // StartSpan will start a span using the tracer on input. If the tracer is nil, 13 | // the context returned is the same as on input, and the span will be nil. 14 | func StartSpan(ctx context.Context, tracer trace.Tracer, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { 15 | if tracer == nil { 16 | return ctx, nil 17 | } 18 | return tracer.Start(ctx, name, opts...) 19 | } 20 | 21 | // CloseSpan closes a span and records the given error if not nil. If the span 22 | // is nil, this is a noop. 23 | func CloseSpan(span trace.Span, err error) { 24 | if span == nil { 25 | return 26 | } 27 | recordSpanResult(span, err) 28 | span.End() 29 | } 30 | 31 | func recordSpanResult(span trace.Span, err error) { 32 | if err == nil { 33 | return 34 | } 35 | 36 | span.RecordError(err) 37 | span.SetStatus(codes.Error, "") 38 | } 39 | -------------------------------------------------------------------------------- /pkg/otel/version.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package otel 4 | 5 | import ( 6 | "runtime/debug" 7 | "sync" 8 | ) 9 | 10 | // commitOnce returns either unknownCommitName or the git commit hash for the code that built the 11 | // binary. NOTE: `vcs.revision`, used below, is not available unless one does a `go build` and 12 | // specifically a `go build cmd/xerver` rather than `go build cmd/xerver/*.go`. 13 | var commitOnce = sync.OnceValue(func() string { 14 | const unknownCommitName = "unknown" 15 | 16 | info, ok := debug.ReadBuildInfo() 17 | if !ok { 18 | return unknownCommitName 19 | } 20 | 21 | for _, v := range info.Settings { 22 | if v.Key == "vcs.revision" { 23 | return v.Value 24 | } 25 | } 26 | 27 | return unknownCommitName 28 | }) 29 | 30 | func version() string { 31 | return commitOnce() 32 | } 33 | -------------------------------------------------------------------------------- /pkg/schemalog/instrumentation/instrumented_store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package instrumentation 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/otel" 9 | "github.com/xataio/pgstream/pkg/schemalog" 10 | "go.opentelemetry.io/otel/attribute" 11 | "go.opentelemetry.io/otel/trace" 12 | ) 13 | 14 | type Store struct { 15 | inner schemalog.Store 16 | tracer trace.Tracer 17 | } 18 | 19 | func NewStore(inner schemalog.Store, instrumentation *otel.Instrumentation) schemalog.Store { 20 | if instrumentation == nil { 21 | return inner 22 | } 23 | 24 | return &Store{ 25 | inner: inner, 26 | tracer: instrumentation.Tracer, 27 | } 28 | } 29 | 30 | func (s *Store) Insert(ctx context.Context, schemaName string) (le *schemalog.LogEntry, err error) { 31 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.Insert", trace.WithAttributes(attribute.String("schema", schemaName))) 32 | defer otel.CloseSpan(span, err) 33 | return s.inner.Insert(ctx, schemaName) 34 | } 35 | 36 | func (s *Store) FetchLast(ctx context.Context, schemaName string, acked bool) (le *schemalog.LogEntry, err error) { 37 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.FetchLast", trace.WithAttributes(attribute.String("schema", schemaName))) 38 | defer otel.CloseSpan(span, err) 39 | return s.inner.FetchLast(ctx, schemaName, acked) 40 | } 41 | 42 | func (s *Store) Fetch(ctx context.Context, schemaName string, version int) (le *schemalog.LogEntry, err error) { 43 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.Fetch", trace.WithAttributes(attribute.String("schema", schemaName))) 44 | defer otel.CloseSpan(span, err) 45 | return s.inner.Fetch(ctx, schemaName, version) 46 | } 47 | 48 | func (s *Store) Ack(ctx context.Context, le *schemalog.LogEntry) (err error) { 49 | ctx, span := otel.StartSpan(ctx, s.tracer, "schemalogstore.Ack", trace.WithAttributes(attribute.String("schema", le.SchemaName))) 50 | defer otel.CloseSpan(span, err) 51 | return s.inner.Ack(ctx, le) 52 | } 53 | 54 | func (s *Store) Close() error { 55 | return s.inner.Close() 56 | } 57 | -------------------------------------------------------------------------------- /pkg/schemalog/mocks/store_mock.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | "sync/atomic" 8 | 9 | "github.com/xataio/pgstream/pkg/schemalog" 10 | ) 11 | 12 | type Store struct { 13 | InsertFn func(ctx context.Context, schemaName string) (*schemalog.LogEntry, error) 14 | FetchLastFn func(ctx context.Context, schemaName string, ackedOnly bool) (*schemalog.LogEntry, error) 15 | FetchFn func(ctx context.Context, schemaName string, version int) (*schemalog.LogEntry, error) 16 | AckFn func(ctx context.Context, le *schemalog.LogEntry) error 17 | CloseFn func() error 18 | insertCalls uint64 19 | fetchCalls uint64 20 | ackCalls uint64 21 | } 22 | 23 | var _ schemalog.Store = (*Store)(nil) 24 | 25 | func (m *Store) Insert(ctx context.Context, schemaName string) (*schemalog.LogEntry, error) { 26 | atomic.AddUint64(&m.insertCalls, 1) 27 | return m.InsertFn(ctx, schemaName) 28 | } 29 | 30 | func (m *Store) FetchLast(ctx context.Context, schemaName string, ackedOnly bool) (*schemalog.LogEntry, error) { 31 | atomic.AddUint64(&m.fetchCalls, 1) 32 | return m.FetchLastFn(ctx, schemaName, ackedOnly) 33 | } 34 | 35 | func (m *Store) Fetch(ctx context.Context, schemaName string, version int) (*schemalog.LogEntry, error) { 36 | atomic.AddUint64(&m.fetchCalls, 1) 37 | return m.FetchFn(ctx, schemaName, version) 38 | } 39 | 40 | func (m *Store) Ack(ctx context.Context, le *schemalog.LogEntry) error { 41 | atomic.AddUint64(&m.ackCalls, 1) 42 | return m.AckFn(ctx, le) 43 | } 44 | 45 | func (m *Store) Close() error { 46 | if m.CloseFn != nil { 47 | return m.CloseFn() 48 | } 49 | return nil 50 | } 51 | 52 | func (m *Store) GetInsertCalls() uint64 { 53 | return atomic.LoadUint64(&m.insertCalls) 54 | } 55 | 56 | func (m *Store) GetFetchCalls() uint64 { 57 | return atomic.LoadUint64(&m.fetchCalls) 58 | } 59 | 60 | func (m *Store) GetAckCalls() uint64 { 61 | return atomic.LoadUint64(&m.ackCalls) 62 | } 63 | -------------------------------------------------------------------------------- /pkg/schemalog/postgres/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/rs/xid" 9 | "github.com/xataio/pgstream/pkg/schemalog" 10 | ) 11 | 12 | type mockRow struct { 13 | logEntry *schemalog.LogEntry 14 | version *int 15 | scanFn func(args ...any) error 16 | } 17 | 18 | func (m *mockRow) Scan(args ...any) error { 19 | if m.scanFn != nil { 20 | return m.scanFn(args...) 21 | } 22 | 23 | if m.logEntry != nil { 24 | id, ok := args[0].(*xid.ID) 25 | if !ok { 26 | return fmt.Errorf("unexpected type for xid.ID in scan: %T", args[0]) 27 | } 28 | *id = m.logEntry.ID 29 | } 30 | 31 | if m.version != nil { 32 | version, ok := args[0].(*int) 33 | if !ok { 34 | return fmt.Errorf("unexpected type for version in scan: %T", args[0]) 35 | } 36 | *version = *m.version 37 | } 38 | 39 | return nil 40 | } 41 | -------------------------------------------------------------------------------- /pkg/schemalog/schema_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package schemalog 4 | 5 | import ( 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestTable_GetFirstUniqueNotNullColumn(t *testing.T) { 12 | t.Parallel() 13 | 14 | tests := []struct { 15 | name string 16 | table *Table 17 | 18 | wantCol *Column 19 | }{ 20 | { 21 | name: "no unique not null columns", 22 | table: &Table{ 23 | Columns: []Column{ 24 | {PgstreamID: "1", Name: "col-1", Unique: false, Nullable: true}, 25 | {PgstreamID: "2", Name: "col-2", Unique: true, Nullable: true}, 26 | {PgstreamID: "3", Name: "col-3", Unique: false, Nullable: false}, 27 | }, 28 | }, 29 | 30 | wantCol: nil, 31 | }, 32 | { 33 | name: "single unique not null column", 34 | table: &Table{ 35 | Columns: []Column{ 36 | {PgstreamID: "1", Name: "col-1", Unique: false, Nullable: true}, 37 | {PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false}, 38 | {PgstreamID: "3", Name: "col-3", Unique: false, Nullable: false}, 39 | }, 40 | }, 41 | 42 | wantCol: &Column{PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false}, 43 | }, 44 | { 45 | name: "multiple unique not null columns", 46 | table: &Table{ 47 | Columns: []Column{ 48 | {PgstreamID: "1", Name: "col-1", Unique: false, Nullable: true}, 49 | {PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false}, 50 | {PgstreamID: "3", Name: "col-3", Unique: true, Nullable: false}, 51 | }, 52 | }, 53 | 54 | wantCol: &Column{PgstreamID: "2", Name: "col-2", Unique: true, Nullable: false}, 55 | }, 56 | } 57 | 58 | for _, tc := range tests { 59 | t.Run(tc.name, func(t *testing.T) { 60 | t.Parallel() 61 | 62 | col := tc.table.GetFirstUniqueNotNullColumn() 63 | require.Equal(t, tc.wantCol, col) 64 | }) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /pkg/schemalog/store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package schemalog 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | ) 9 | 10 | type Store interface { 11 | Insert(ctx context.Context, schemaName string) (*LogEntry, error) 12 | FetchLast(ctx context.Context, schemaName string, ackedOnly bool) (*LogEntry, error) 13 | Fetch(ctx context.Context, schemaName string, version int) (*LogEntry, error) 14 | Ack(ctx context.Context, le *LogEntry) error 15 | Close() error 16 | } 17 | 18 | var ErrNoRows = errors.New("no rows") 19 | 20 | const ( 21 | SchemaName = "pgstream" 22 | TableName = "schema_log" 23 | ) 24 | -------------------------------------------------------------------------------- /pkg/schemalog/store_cache.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package schemalog 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "sync" 9 | ) 10 | 11 | // StoreCache is a wrapper around a schemalog Store that provides an in memory 12 | // caching mechanism to reduce the amount of calls to the database. It is not 13 | // concurrency safe. 14 | type StoreCache struct { 15 | store Store 16 | mutex *sync.RWMutex 17 | cache map[string]*LogEntry 18 | } 19 | 20 | func NewStoreCache(store Store) *StoreCache { 21 | return &StoreCache{ 22 | store: store, 23 | cache: make(map[string]*LogEntry), 24 | mutex: &sync.RWMutex{}, 25 | } 26 | } 27 | 28 | func (s *StoreCache) Insert(ctx context.Context, schemaName string) (*LogEntry, error) { 29 | return s.store.Insert(ctx, schemaName) 30 | } 31 | 32 | func (s *StoreCache) FetchLast(ctx context.Context, schemaName string, ackedOnly bool) (*LogEntry, error) { 33 | logEntry := s.getCachedLogEntry(schemaName) 34 | if logEntry == nil { 35 | var err error 36 | logEntry, err = s.store.FetchLast(ctx, schemaName, ackedOnly) 37 | if err != nil { 38 | return nil, fmt.Errorf("store cache fetch last schema log: %w", err) 39 | } 40 | s.updateCachedLogEntry(schemaName, logEntry) 41 | } 42 | 43 | return logEntry, nil 44 | } 45 | 46 | func (s *StoreCache) Fetch(ctx context.Context, schemaName string, version int) (*LogEntry, error) { 47 | return s.store.Fetch(ctx, schemaName, version) 48 | } 49 | 50 | func (s *StoreCache) Ack(ctx context.Context, entry *LogEntry) error { 51 | s.updateCachedLogEntry(entry.SchemaName, entry) 52 | if err := s.store.Ack(ctx, entry); err != nil { 53 | return fmt.Errorf("store cache ack: %w", err) 54 | } 55 | return nil 56 | } 57 | 58 | func (s *StoreCache) Close() error { 59 | return s.store.Close() 60 | } 61 | 62 | func (s *StoreCache) getCachedLogEntry(schema string) *LogEntry { 63 | s.mutex.RLock() 64 | defer s.mutex.RUnlock() 65 | return s.cache[schema] 66 | } 67 | 68 | func (s *StoreCache) updateCachedLogEntry(schema string, logEntry *LogEntry) { 69 | s.mutex.Lock() 70 | defer s.mutex.Unlock() 71 | s.cache[schema] = logEntry 72 | } 73 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package generator 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/snapshot" 9 | ) 10 | 11 | type mockGenerator struct { 12 | createSnapshotFn func(ctx context.Context, snapshot *snapshot.Snapshot) error 13 | closeFn func() error 14 | } 15 | 16 | func (m *mockGenerator) CreateSnapshot(ctx context.Context, snapshot *snapshot.Snapshot) error { 17 | return m.createSnapshotFn(ctx, snapshot) 18 | } 19 | 20 | func (m *mockGenerator) Close() error { 21 | if m.closeFn != nil { 22 | return m.closeFn() 23 | } 24 | return nil 25 | } 26 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/mocks/mock_snapshot_generator.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/snapshot" 9 | ) 10 | 11 | type Generator struct { 12 | CreateSnapshotFn func(ctx context.Context, snapshot *snapshot.Snapshot) error 13 | CloseFn func() error 14 | createSnapshotCalls uint 15 | } 16 | 17 | func (m *Generator) CreateSnapshot(ctx context.Context, snapshot *snapshot.Snapshot) error { 18 | m.createSnapshotCalls++ 19 | return m.CreateSnapshotFn(ctx, snapshot) 20 | } 21 | 22 | func (m *Generator) Close() error { 23 | if m.CloseFn != nil { 24 | return m.CloseFn() 25 | } 26 | return nil 27 | } 28 | 29 | func (m *Generator) CreateSnapshotCalls() uint { 30 | return m.createSnapshotCalls 31 | } 32 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/postgres/data/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | type Config struct { 6 | // Postgres connection URL. Required. 7 | URL string 8 | // BatchPageSize represents the size of the table page range that will be 9 | // processed concurrently by the table workers. Defaults to 1000. 10 | BatchPageSize uint 11 | // SchemaWorkers represents the number of tables the snapshot generator will 12 | // process concurrently per schema. Defaults to 4. 13 | SchemaWorkers uint 14 | // TableWorkers represents the number of concurrent workers per table. Each 15 | // worker will process a different page range in parallel. Defaults to 4. 16 | TableWorkers uint 17 | } 18 | 19 | const ( 20 | defaultBatchPageSize = 1000 21 | defaultTableWorkers = 4 22 | defaultSchemaWorkers = 4 23 | ) 24 | 25 | func (c *Config) batchPageSize() uint { 26 | if c.BatchPageSize > 0 { 27 | return c.BatchPageSize 28 | } 29 | return defaultBatchPageSize 30 | } 31 | 32 | func (c *Config) schemaWorkers() uint { 33 | if c.SchemaWorkers > 0 { 34 | return c.SchemaWorkers 35 | } 36 | return defaultSchemaWorkers 37 | } 38 | 39 | func (c *Config) tableWorkers() uint { 40 | if c.TableWorkers > 0 { 41 | return c.TableWorkers 42 | } 43 | return defaultTableWorkers 44 | } 45 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/postgres/data/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | "sync" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/require" 11 | 12 | pglib "github.com/xataio/pgstream/internal/postgres" 13 | "github.com/xataio/pgstream/pkg/snapshot" 14 | ) 15 | 16 | type mockRowProcessor struct { 17 | rowChan chan *snapshot.Row 18 | once sync.Once 19 | } 20 | 21 | func (mp *mockRowProcessor) process(ctx context.Context, row *snapshot.Row) error { 22 | mp.rowChan <- row 23 | return nil 24 | } 25 | 26 | func (mp *mockRowProcessor) close() { 27 | mp.once.Do(func() { close(mp.rowChan) }) 28 | } 29 | 30 | func execQuery(t *testing.T, ctx context.Context, pgurl, query string) { 31 | conn, err := pglib.NewConn(ctx, pgurl) 32 | require.NoError(t, err) 33 | defer conn.Close(ctx) 34 | 35 | _, err = conn.Exec(ctx, query) 36 | require.NoError(t, err) 37 | } 38 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/postgres/data/instrumented_table_snapshot_generator.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/otel" 9 | "go.opentelemetry.io/otel/attribute" 10 | "go.opentelemetry.io/otel/trace" 11 | ) 12 | 13 | type instrumentedTableSnapshotGenerator struct { 14 | tracer trace.Tracer 15 | snapshotTableFn snapshotTableFn 16 | } 17 | 18 | func newInstrumentedTableSnapshotGenerator(fn snapshotTableFn, i *otel.Instrumentation) *instrumentedTableSnapshotGenerator { 19 | return &instrumentedTableSnapshotGenerator{ 20 | tracer: i.Tracer, 21 | snapshotTableFn: fn, 22 | } 23 | } 24 | 25 | func (i *instrumentedTableSnapshotGenerator) snapshotTable(ctx context.Context, snapshotID string, schema, table string) (err error) { 26 | ctx, span := otel.StartSpan(ctx, i.tracer, "tableSnapshotGenerator.SnapshotTable", trace.WithAttributes([]attribute.KeyValue{ 27 | {Key: "schema", Value: attribute.StringValue(schema)}, 28 | {Key: "table", Value: attribute.StringValue(table)}, 29 | }...)) 30 | defer otel.CloseSpan(span, err) 31 | return i.snapshotTableFn(ctx, snapshotID, schema, table) 32 | } 33 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/postgres/schema/pgdumprestore/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package pgdumprestore 4 | 5 | import ( 6 | "context" 7 | 8 | pglib "github.com/xataio/pgstream/internal/postgres" 9 | ) 10 | 11 | type mockPgDump struct { 12 | dumpFn func(context.Context, uint, pglib.PGDumpOptions) ([]byte, error) 13 | dumpCalls uint 14 | } 15 | 16 | func newMockPgdump(dumpFn func(context.Context, uint, pglib.PGDumpOptions) ([]byte, error)) pglib.PGDumpFn { 17 | m := &mockPgDump{ 18 | dumpFn: dumpFn, 19 | } 20 | return m.dump 21 | } 22 | 23 | func (m *mockPgDump) dump(ctx context.Context, po pglib.PGDumpOptions) ([]byte, error) { 24 | m.dumpCalls++ 25 | return m.dumpFn(ctx, m.dumpCalls, po) 26 | } 27 | 28 | type mockPgRestore struct { 29 | restoreFn func(context.Context, uint, pglib.PGRestoreOptions, []byte) (string, error) 30 | restoreCalls uint 31 | } 32 | 33 | func newMockPgrestore(restoreFn func(context.Context, uint, pglib.PGRestoreOptions, []byte) (string, error)) pglib.PGRestoreFn { 34 | m := &mockPgRestore{ 35 | restoreFn: restoreFn, 36 | } 37 | return m.restore 38 | } 39 | 40 | func (m *mockPgRestore) restore(ctx context.Context, po pglib.PGRestoreOptions, dump []byte) (string, error) { 41 | m.restoreCalls++ 42 | return m.restoreFn(ctx, m.restoreCalls, po, dump) 43 | } 44 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/postgres/schema/pgdumprestore/test/test_dump_constraints.sql: -------------------------------------------------------------------------------- 1 | \connect test 2 | 3 | ALTER TABLE ONLY musicbrainz.alternative_medium 4 | ADD CONSTRAINT alternative_medium_pkey PRIMARY KEY (id); 5 | 6 | ALTER TABLE ONLY musicbrainz.alternative_medium_track 7 | ADD CONSTRAINT alternative_medium_track_pkey PRIMARY KEY (alternative_medium, track); 8 | 9 | CREATE INDEX area_alias_idx_txt ON musicbrainz.area_alias USING gin (musicbrainz.mb_simple_tsvector((name)::text)); 10 | 11 | CREATE UNIQUE INDEX area_alias_type_idx_gid ON musicbrainz.area_alias_type USING btree (gid); 12 | 13 | CREATE TRIGGER a_del_alternative_medium_track AFTER DELETE ON musicbrainz.alternative_medium_track FOR EACH ROW EXECUTE FUNCTION musicbrainz.a_del_alternative_medium_track(); 14 | 15 | CREATE TRIGGER a_del_alternative_release AFTER DELETE ON musicbrainz.alternative_release FOR EACH ROW EXECUTE FUNCTION musicbrainz.a_del_alternative_release_or_track(); 16 | 17 | CREATE CONSTRAINT TRIGGER apply_artist_release_group_pending_updates AFTER INSERT OR DELETE OR UPDATE ON musicbrainz.release DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION musicbrainz.apply_artist_release_group_pending_updates(); 18 | 19 | CREATE CONSTRAINT TRIGGER apply_artist_release_group_pending_updates AFTER INSERT OR DELETE OR UPDATE ON musicbrainz.release_group DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION musicbrainz.apply_artist_release_group_pending_updates(); 20 | 21 | ALTER TABLE ONLY musicbrainz.alternative_medium 22 | ADD CONSTRAINT alternative_medium_fk_alternative_release FOREIGN KEY (alternative_release) REFERENCES musicbrainz.alternative_release(id); 23 | 24 | ALTER TABLE ONLY musicbrainz.alternative_medium 25 | ADD CONSTRAINT alternative_medium_fk_medium FOREIGN KEY (medium) REFERENCES musicbrainz.medium(id); 26 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/postgres/tablefinder/instrumented_table_discovery.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package tablefinder 4 | 5 | import ( 6 | "context" 7 | 8 | pglib "github.com/xataio/pgstream/internal/postgres" 9 | "github.com/xataio/pgstream/pkg/otel" 10 | "go.opentelemetry.io/otel/attribute" 11 | "go.opentelemetry.io/otel/trace" 12 | ) 13 | 14 | type tableDiscoverer struct { 15 | fn tableDiscoveryFn 16 | tracer trace.Tracer 17 | } 18 | 19 | func newInstrumentedTableDiscoveryFn(fn tableDiscoveryFn, i *otel.Instrumentation) tableDiscoveryFn { 20 | td := tableDiscoverer{ 21 | fn: fn, 22 | tracer: i.Tracer, 23 | } 24 | return td.discoverTables 25 | } 26 | 27 | func (i *tableDiscoverer) discoverTables(ctx context.Context, conn pglib.Querier, schema string) (tables []string, err error) { 28 | ctx, span := otel.StartSpan(ctx, i.tracer, "tableFinder.discoverTables", trace.WithAttributes(attribute.KeyValue{ 29 | Key: "schema", Value: attribute.StringValue(schema), 30 | })) 31 | defer otel.CloseSpan(span, err) 32 | 33 | return i.fn(ctx, conn, schema) 34 | } 35 | -------------------------------------------------------------------------------- /pkg/snapshot/generator/snapshot_generator.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package generator 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/snapshot" 9 | ) 10 | 11 | type SnapshotGenerator interface { 12 | CreateSnapshot(ctx context.Context, snapshot *snapshot.Snapshot) error 13 | Close() error 14 | } 15 | -------------------------------------------------------------------------------- /pkg/snapshot/snapshot.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package snapshot 4 | 5 | import ( 6 | "context" 7 | ) 8 | 9 | type Snapshot struct { 10 | SchemaName string 11 | TableNames []string 12 | } 13 | 14 | type Request struct { 15 | Snapshot Snapshot 16 | Status Status 17 | Errors *Errors 18 | } 19 | 20 | type Row struct { 21 | Schema string 22 | Table string 23 | Columns []Column 24 | } 25 | 26 | type Column struct { 27 | Name string 28 | Type string 29 | Value any 30 | } 31 | 32 | type RowProcessor func(context.Context, *Row) error 33 | 34 | type Status string 35 | 36 | const ( 37 | StatusRequested = Status("requested") 38 | StatusInProgress = Status("in progress") 39 | StatusCompleted = Status("completed") 40 | ) 41 | 42 | func (s *Snapshot) IsValid() bool { 43 | return s != nil && s.SchemaName != "" && len(s.TableNames) > 0 44 | } 45 | 46 | func (r *Request) MarkCompleted(err error) { 47 | r.Status = StatusCompleted 48 | r.Errors = NewErrors(err) 49 | } 50 | 51 | func (r *Request) MarkInProgress() { 52 | r.Status = StatusInProgress 53 | } 54 | 55 | func (r *Request) IsPending() bool { 56 | return r.Status == StatusRequested 57 | } 58 | 59 | func (r *Request) HasFailed() bool { 60 | return r.Status == StatusCompleted && r.Errors != nil 61 | } 62 | 63 | func (r *Request) HasFailedForTable(table string) bool { 64 | return r.Status == StatusCompleted && r.Errors.IsTableError(table) 65 | } 66 | -------------------------------------------------------------------------------- /pkg/snapshot/store/mocks/mock_snapshot_store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/snapshot" 9 | ) 10 | 11 | type Store struct { 12 | CreateSnapshotRequestFn func(context.Context, *snapshot.Request) error 13 | UpdateSnapshotRequestFn func(context.Context, uint, *snapshot.Request) error 14 | GetSnapshotRequestsByStatusFn func(ctx context.Context, status snapshot.Status) ([]*snapshot.Request, error) 15 | GetSnapshotRequestsBySchemaFn func(ctx context.Context, s string) ([]*snapshot.Request, error) 16 | updateSnapshotRequestCalls uint 17 | } 18 | 19 | func (m *Store) CreateSnapshotRequest(ctx context.Context, s *snapshot.Request) error { 20 | return m.CreateSnapshotRequestFn(ctx, s) 21 | } 22 | 23 | func (m *Store) UpdateSnapshotRequest(ctx context.Context, s *snapshot.Request) error { 24 | m.updateSnapshotRequestCalls++ 25 | return m.UpdateSnapshotRequestFn(ctx, m.updateSnapshotRequestCalls, s) 26 | } 27 | 28 | func (m *Store) GetSnapshotRequestsByStatus(ctx context.Context, status snapshot.Status) ([]*snapshot.Request, error) { 29 | return m.GetSnapshotRequestsByStatusFn(ctx, status) 30 | } 31 | 32 | func (m *Store) GetSnapshotRequestsBySchema(ctx context.Context, s string) ([]*snapshot.Request, error) { 33 | return m.GetSnapshotRequestsBySchemaFn(ctx, s) 34 | } 35 | 36 | func (m *Store) Close() error { 37 | return nil 38 | } 39 | -------------------------------------------------------------------------------- /pkg/snapshot/store/snapshot_store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package store 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/snapshot" 9 | ) 10 | 11 | type Store interface { 12 | CreateSnapshotRequest(context.Context, *snapshot.Request) error 13 | UpdateSnapshotRequest(context.Context, *snapshot.Request) error 14 | GetSnapshotRequestsByStatus(ctx context.Context, status snapshot.Status) ([]*snapshot.Request, error) 15 | GetSnapshotRequestsBySchema(ctx context.Context, schema string) ([]*snapshot.Request, error) 16 | Close() error 17 | } 18 | 19 | const ( 20 | SchemaName = "pgstream" 21 | TableName = "snapshot_requests" 22 | ) 23 | -------------------------------------------------------------------------------- /pkg/stream/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package stream 4 | 5 | type mockMigrator struct { 6 | versionFn func() (uint, bool, error) 7 | closeFn func() (error, error) 8 | } 9 | 10 | func (m *mockMigrator) Version() (uint, bool, error) { 11 | if m.versionFn != nil { 12 | return m.versionFn() 13 | } 14 | return 0, false, nil 15 | } 16 | 17 | func (m *mockMigrator) Close() (error, error) { 18 | if m.closeFn != nil { 19 | return m.closeFn() 20 | } 21 | return nil, nil 22 | } 23 | -------------------------------------------------------------------------------- /pkg/stream/integration/config/postgresql.conf: -------------------------------------------------------------------------------- 1 | # CONNECTION 2 | listen_addresses = '*' 3 | 4 | # MODULES 5 | shared_preload_libraries = 'wal2json' 6 | 7 | # REPLICATION 8 | wal_level = logical # minimal, archive, hot_standby, or logical (change requires restart) 9 | max_wal_senders = 4 # max number of walsender processes (change requires restart) 10 | #wal_keep_segments = 4 # in logfile segments, 16MB each; 0 disables 11 | #wal_sender_timeout = 60s # in milliseconds; 0 disables 12 | max_replication_slots = 4 # max number of replication slots (change requires restart) 13 | -------------------------------------------------------------------------------- /pkg/stream/integration/setup_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package integration 4 | 5 | import ( 6 | "context" 7 | "log" 8 | "os" 9 | "testing" 10 | 11 | "github.com/xataio/pgstream/internal/testcontainers" 12 | "github.com/xataio/pgstream/pkg/stream" 13 | ) 14 | 15 | func TestMain(m *testing.M) { 16 | // if integration tests are not enabled, nothing to setup 17 | if os.Getenv("PGSTREAM_INTEGRATION_TESTS") != "" { 18 | ctx := context.Background() 19 | pgcleanup, err := testcontainers.SetupPostgresContainer(ctx, &pgurl, testcontainers.Postgres14, "config/postgresql.conf") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | defer pgcleanup() 24 | 25 | if err := stream.Init(ctx, pgurl, ""); err != nil { 26 | log.Fatal(err) 27 | } 28 | 29 | kafkacleanup, err := testcontainers.SetupKafkaContainer(ctx, &kafkaBrokers) 30 | if err != nil { 31 | log.Fatal(err) 32 | } 33 | defer kafkacleanup() 34 | 35 | oscleanup, err := testcontainers.SetupOpenSearchContainer(ctx, &opensearchURL) 36 | if err != nil { 37 | log.Fatal(err) 38 | } 39 | defer oscleanup() 40 | 41 | escleanup, err := testcontainers.SetupElasticsearchContainer(ctx, &elasticsearchURL) 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | defer escleanup() 46 | 47 | targetPGCleanup, err := testcontainers.SetupPostgresContainer(ctx, &targetPGURL, testcontainers.Postgres17) 48 | if err != nil { 49 | log.Fatal(err) 50 | } 51 | defer targetPGCleanup() 52 | } 53 | 54 | os.Exit(m.Run()) 55 | } 56 | -------------------------------------------------------------------------------- /pkg/stream/stream_snapshot.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package stream 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | "fmt" 9 | 10 | loglib "github.com/xataio/pgstream/pkg/log" 11 | "github.com/xataio/pgstream/pkg/otel" 12 | snapshotlistener "github.com/xataio/pgstream/pkg/wal/listener/snapshot" 13 | snapshotbuilder "github.com/xataio/pgstream/pkg/wal/listener/snapshot/builder" 14 | "golang.org/x/sync/errgroup" 15 | ) 16 | 17 | func Snapshot(ctx context.Context, logger loglib.Logger, config *Config, instrumentation *otel.Instrumentation) error { 18 | if config.Listener.Snapshot == nil { 19 | return errors.New("source snapshot not configured") 20 | } 21 | 22 | if err := config.IsValid(); err != nil { 23 | return fmt.Errorf("incompatible configuration: %w", err) 24 | } 25 | 26 | eg, ctx := errgroup.WithContext(ctx) 27 | 28 | // Processor 29 | 30 | processor, err := buildProcessor(ctx, logger, &config.Processor, nil, instrumentation) 31 | if err != nil { 32 | return err 33 | } 34 | defer processor.Close() 35 | 36 | var closer closerFn 37 | processor, closer, err = addProcessorModifiers(ctx, config, logger, processor, instrumentation) 38 | if err != nil { 39 | return err 40 | } 41 | defer closer() 42 | 43 | // Listener 44 | 45 | snapshotGenerator, err := snapshotbuilder.NewSnapshotGenerator( 46 | ctx, 47 | config.Listener.Snapshot, 48 | processor.ProcessWALEvent, 49 | logger, 50 | instrumentation) 51 | if err != nil { 52 | return err 53 | } 54 | listener := snapshotlistener.New(snapshotGenerator) 55 | defer listener.Close() 56 | 57 | eg.Go(func() error { 58 | defer logger.Info("stopping postgres snapshot listener...") 59 | logger.Info("running postgres snapshot listener...") 60 | return listener.Listen(ctx) 61 | }) 62 | 63 | if err := eg.Wait(); err != nil { 64 | if !errors.Is(err, context.Canceled) { 65 | return err 66 | } 67 | } 68 | 69 | return nil 70 | } 71 | -------------------------------------------------------------------------------- /pkg/tls/test/test.csr: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE REQUEST----- 2 | MIIEjTCCAnUCAQAwSDELMAkGA1UEBhMCU1AxEzARBgNVBAgMClNvbWUtU3RhdGUx 3 | FTATBgNVBAoMDFBnc3RyZWFtIEx0ZDENMAsGA1UECwwEWGF0YTCCAiIwDQYJKoZI 4 | hvcNAQEBBQADggIPADCCAgoCggIBAMZG8/obpyvJ+WkGkdO/hbExSN1nWR206/Dh 5 | pSYzcZyI1Jj0R4Af0gD/EFVM+4KTDr20nOofmfBWOYHV+KwiKtWQQ+oT0+xVcTT6 6 | IC5I5K9+AXERuTu/NbnjkxuC/1u7K511RrUK0lxUra2/B8mGTc9nu2g415GVk2hU 7 | rJjWEX09hVH7xBSmnzYN+IfepsftxgnR5m2YzqOSnsphBBfyyOsL+3Jo5Uv4yY22 8 | bnJCDxx/TPG37EcGMb4Q/aCWk5mXm4Io5mBfcl7SNxy867JpBO2CCP6fFaWRUGmF 9 | /O+YBD5z0cSb1wZrMBRgezggpa2gacYVtsWrQYzAxMtCf3MwM89z8i+W9ME3cMhg 10 | T7b6T0XCQ5gkqmLxDCsg9ocNV0W0wb5EEPlt4/TeqMmLbpLkIa6rnR2C6gg+wFSQ 11 | Vk8c/aBtm9BjKFLWWUwDUPPzp1RdIVAfuobDK37dVqr+1m6oTDl+BoDuM5PsgTS4 12 | bFW2ZJVZB+d4IFmGhqTtKKmYa7QbwhzF8i5ShV+419KOt6hRkJ/jREdea7ZS/uyK 13 | wBAvPHZVkUk8tfTpcuKTFVKsgXXV8uwwnI2W6safYbTXaN+7gfA88wntaigjeij/ 14 | LrO+itAiv9GTGkpMlXyuAX/d5+4j0EHcV64NYL61GvMcbJ5G0SeQrXuBlgMTYaNr 15 | O7miEujnAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEAGYFGSgMARWuH5VXK0/Fl 16 | nNg+/rzdff1NYkY9QrvFxCQeVJ9rD1ml7VLZXDtXhMNEGJyYbkouc1Ehx0BsihT6 17 | YztcnQ6TzWAzvr3Ns9X3riADzXxdDHV5xs+8VPV8RvT3XNcrlw2NQmzJ4Juc8PkT 18 | 4ZfguZBywAmFTw1oX8JqlQSp5pYtP7popsvGPS6ieUm0Kmv8kK3sRDs+JSc7iXtB 19 | /HymqeSylFNHgFZsdbYmu32v2qbcqimAitB/v5tGNhuiMXx6vEeQnB69V+AV70Rl 20 | 9dnvAo7ihTRMzecUVsoDFtc8OWSPdTm6t8vDI2JqmeDN25Xhyf89cKwoY97NhA99 21 | ds5WHs6TzsHohPZAsaxtZnjwxMEne7Y4FLFmTHVk5o0POTZcOC/sMB1iBDsd/YJe 22 | AYsoiqLYtu6x6Avfe6LXWYWYa/R4/UXh8H6WiChsFXzOIilp3apjaeHM3z7iKx2S 23 | VtGyVTrrcbzRiF0ShKVnbDXnvcoNZxPiXfh6Zz4SkBbV01T3hluBwzp4mjcWpiv3 24 | AOAWChMnbmkg/T+OME6e1JVHDR5tAC/7vF2QkZYpiH2RVnZmCTDWBcRGpMkhkRgF 25 | eycowzKBkgIOcJ99p0sGEqQ3W0J1M4bzuumncLID08EG/dEp1eIdunahcHHyhnnv 26 | BcGFr2/OxuaVmxcy5/QQjAg= 27 | -----END CERTIFICATE REQUEST----- 28 | -------------------------------------------------------------------------------- /pkg/tls/test/test.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIFPzCCAyegAwIBAgIULV6zejFwt/Tri8WKZFBxj15uvaIwDQYJKoZIhvcNAQEL 3 | BQAwSDELMAkGA1UEBhMCU1AxEzARBgNVBAgMClNvbWUtU3RhdGUxFTATBgNVBAoM 4 | DFBnc3RyZWFtIEx0ZDENMAsGA1UECwwEWGF0YTAeFw0yNDA4MDUxNTE3NDBaFw0y 5 | NTA4MDUxNTE3NDBaMEgxCzAJBgNVBAYTAlNQMRMwEQYDVQQIDApTb21lLVN0YXRl 6 | MRUwEwYDVQQKDAxQZ3N0cmVhbSBMdGQxDTALBgNVBAsMBFhhdGEwggIiMA0GCSqG 7 | SIb3DQEBAQUAA4ICDwAwggIKAoICAQDGRvP6G6cryflpBpHTv4WxMUjdZ1kdtOvw 8 | 4aUmM3GciNSY9EeAH9IA/xBVTPuCkw69tJzqH5nwVjmB1fisIirVkEPqE9PsVXE0 9 | +iAuSOSvfgFxEbk7vzW545Mbgv9buyuddUa1CtJcVK2tvwfJhk3PZ7toONeRlZNo 10 | VKyY1hF9PYVR+8QUpp82DfiH3qbH7cYJ0eZtmM6jkp7KYQQX8sjrC/tyaOVL+MmN 11 | tm5yQg8cf0zxt+xHBjG+EP2glpOZl5uCKOZgX3Je0jccvOuyaQTtggj+nxWlkVBp 12 | hfzvmAQ+c9HEm9cGazAUYHs4IKWtoGnGFbbFq0GMwMTLQn9zMDPPc/IvlvTBN3DI 13 | YE+2+k9FwkOYJKpi8QwrIPaHDVdFtMG+RBD5beP03qjJi26S5CGuq50dguoIPsBU 14 | kFZPHP2gbZvQYyhS1llMA1Dz86dUXSFQH7qGwyt+3Vaq/tZuqEw5fgaA7jOT7IE0 15 | uGxVtmSVWQfneCBZhoak7SipmGu0G8IcxfIuUoVfuNfSjreoUZCf40RHXmu2Uv7s 16 | isAQLzx2VZFJPLX06XLikxVSrIF11fLsMJyNlurGn2G012jfu4HwPPMJ7WooI3oo 17 | /y6zvorQIr/RkxpKTJV8rgF/3efuI9BB3FeuDWC+tRrzHGyeRtEnkK17gZYDE2Gj 18 | azu5ohLo5wIDAQABoyEwHzAdBgNVHQ4EFgQUcj2UaSuSsgkex5hfS0eDAkPdbJ0w 19 | DQYJKoZIhvcNAQELBQADggIBAJK6fMa4L7iIQSlPzG3pHTSSLQd9Unev2naX9/S1 20 | Yo55Tj9VCBhViGa7CbDtaW7ZYr/fXydZVcthXYZzZ7QEVyYaguWlzXLjy/qF8kgk 21 | cDwinFa8hiJnP+BJUGnzq3LYQJ2labI4YUscc6p4inh9y8JZ3n33VqX2YjqCdHMA 22 | j8nw5xpThdQ/a8z3Z8ugFCLO09Hts1eKFhs5PwaQvjkoX+dSE2FeX51OMlLOPDsu 23 | C6ScDU7FG0J5JE36nRqp2XwSdGAfc5pHKmsuomxnoE/d/hL7O6zouo/jvQyCNFtn 24 | 5/pzhkhhOjUTP2gIW5ueNn8oQF9F32GWRNJGQVTBiK17dWvHxiSvIzgKqUyrD8lI 25 | VefVEQgRbfHD3nSk6G30gAeWzt8T10lI8MtQWTtoFJGFBaSVr/lSyHo4QS4SyTmK 26 | uvnFGJVivRtaAP4d+u/6/1Mvy5sNsSiWRRKfKTB/FEerbe7blhnqJhrBp98nKBv/ 27 | IJtmewD7lVGGDY8sWnxnpNyqLVhvRilO9d+4oQWqKgN8m1PXAI2jDYA0RZ+Qs/ko 28 | 5FB88mRi8hNOhmADXguKlnCid/X0StK6wpphvFIaNnGLFzjeZXc65BoV1c2+Boxe 29 | cNwpkrW5tKaf/Ox1ntHnnQBpUhM4AxGoczfIj0dEnYio54gagsAMK/Pjq2KiX7Bi 30 | RUm4 31 | -----END CERTIFICATE----- 32 | -------------------------------------------------------------------------------- /pkg/transformers/builder/transformer_builder_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package builder 4 | 5 | import ( 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | "github.com/xataio/pgstream/pkg/transformers" 10 | ) 11 | 12 | func TestTransformerBuilder_New(t *testing.T) { 13 | tests := []struct { 14 | name string 15 | config *transformers.Config 16 | wantErr error 17 | }{ 18 | { 19 | name: "valid greenmask string transformer", 20 | config: &transformers.Config{ 21 | Name: transformers.GreenmaskString, 22 | Parameters: map[string]any{"max_length": 10}, 23 | }, 24 | wantErr: nil, 25 | }, 26 | { 27 | name: "invalid parameter for phone number transformer", 28 | config: &transformers.Config{ 29 | Name: transformers.String, 30 | Parameters: map[string]any{"invalid": "param"}, 31 | }, 32 | wantErr: transformers.ErrUnknownParameter, 33 | }, 34 | { 35 | name: "unsupported transformer", 36 | config: &transformers.Config{ 37 | Name: "unsupported", 38 | Parameters: map[string]any{}, 39 | }, 40 | wantErr: transformers.ErrUnsupportedTransformer, 41 | }, 42 | } 43 | 44 | for _, tt := range tests { 45 | t.Run(tt.name, func(t *testing.T) { 46 | t.Parallel() 47 | tb := NewTransformerBuilder() 48 | _, err := tb.New(tt.config) 49 | require.ErrorIs(t, err, tt.wantErr) 50 | }) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pkg/transformers/generators/deterministic_bytes_generator.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package generators 4 | 5 | import ( 6 | greenmaskgenerators "github.com/eminano/greenmask/pkg/generators" 7 | ) 8 | 9 | func NewDeterministicBytesGenerator(size int) (Generator, error) { 10 | return greenmaskgenerators.GetHashBytesGen([]byte{}, size) 11 | } 12 | -------------------------------------------------------------------------------- /pkg/transformers/generators/generator.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package generators 4 | 5 | type Generator interface { 6 | Generate([]byte) ([]byte, error) 7 | Size() int 8 | } 9 | -------------------------------------------------------------------------------- /pkg/transformers/generators/random_bytes_generator.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package generators 4 | 5 | import ( 6 | "time" 7 | 8 | greenmaskgenerators "github.com/eminano/greenmask/pkg/generators" 9 | ) 10 | 11 | func NewRandomBytesGenerator(size int) Generator { 12 | return greenmaskgenerators.NewRandomBytes(time.Now().UnixNano(), size) 13 | } 14 | -------------------------------------------------------------------------------- /pkg/transformers/greenmask/greenmask_boolean_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package greenmask 4 | 5 | import ( 6 | "context" 7 | 8 | greenmasktransformers "github.com/eminano/greenmask/pkg/generators/transformers" 9 | "github.com/xataio/pgstream/pkg/transformers" 10 | ) 11 | 12 | type BooleanTransformer struct { 13 | transformer *greenmasktransformers.RandomBoolean 14 | } 15 | 16 | var ( 17 | booleanParams = []transformers.Parameter{ 18 | { 19 | Name: "generator", 20 | SupportedType: "string", 21 | Default: "random", 22 | Dynamic: false, 23 | Required: false, 24 | Values: []any{"random", "deterministic"}, 25 | }, 26 | } 27 | booleanCompatibleTypes = []transformers.SupportedDataType{ 28 | transformers.BooleanDataType, 29 | transformers.ByteArrayDataType, 30 | } 31 | ) 32 | 33 | func NewBooleanTransformer(params transformers.ParameterValues) (*BooleanTransformer, error) { 34 | t := greenmasktransformers.NewRandomBoolean() 35 | if err := setGenerator(t, params); err != nil { 36 | return nil, err 37 | } 38 | return &BooleanTransformer{ 39 | transformer: t, 40 | }, nil 41 | } 42 | 43 | func (bt *BooleanTransformer) Transform(_ context.Context, value transformers.Value) (any, error) { 44 | var toTransform []byte 45 | switch val := value.TransformValue.(type) { 46 | case bool: 47 | if val { 48 | toTransform = []byte{1} 49 | } else { 50 | toTransform = []byte{0} 51 | } 52 | case []byte: 53 | toTransform = val 54 | default: 55 | return nil, transformers.ErrUnsupportedValueType 56 | } 57 | 58 | ret, err := bt.transformer.Transform(toTransform) 59 | if err != nil { 60 | return nil, err 61 | } 62 | return bool(ret), nil 63 | } 64 | 65 | func (bt *BooleanTransformer) CompatibleTypes() []transformers.SupportedDataType { 66 | return booleanCompatibleTypes 67 | } 68 | 69 | func (bt *BooleanTransformer) Type() transformers.TransformerType { 70 | return transformers.GreenmaskBoolean 71 | } 72 | 73 | func BooleanTransformerDefinition() *transformers.Definition { 74 | return &transformers.Definition{ 75 | SupportedTypes: booleanCompatibleTypes, 76 | Parameters: booleanParams, 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pkg/transformers/greenmask/greenmask_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package greenmask 4 | 5 | import ( 6 | "time" 7 | 8 | greenmaskgenerators "github.com/eminano/greenmask/pkg/generators" 9 | greenmasktransformers "github.com/eminano/greenmask/pkg/generators/transformers" 10 | "github.com/xataio/pgstream/pkg/transformers" 11 | ) 12 | 13 | const ( 14 | random = "random" 15 | deterministic = "deterministic" 16 | ) 17 | 18 | func setGenerator(t greenmasktransformers.Transformer, params transformers.ParameterValues) error { 19 | generatorType, err := getGeneratorType(params) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | var greenmaskGenerator greenmaskgenerators.Generator 25 | switch generatorType { 26 | case random: 27 | greenmaskGenerator = greenmaskgenerators.NewRandomBytes(time.Now().UnixNano(), t.GetRequiredGeneratorByteLength()) 28 | case deterministic: 29 | var err error 30 | greenmaskGenerator, err = greenmaskgenerators.GetHashBytesGen([]byte{}, t.GetRequiredGeneratorByteLength()) 31 | if err != nil { 32 | return err 33 | } 34 | default: 35 | return transformers.ErrUnsupportedGenerator 36 | } 37 | 38 | return t.SetGenerator(greenmaskGenerator) 39 | } 40 | 41 | func getGeneratorType(params transformers.ParameterValues) (string, error) { 42 | // default to using the random generator 43 | return findParameter(params, "generator", random) 44 | } 45 | 46 | func findParameter[T any](params transformers.ParameterValues, name string, defaultVal T) (T, error) { 47 | return transformers.FindParameterWithDefault(params, name, defaultVal) 48 | } 49 | 50 | func findParameterArray[T any](params transformers.ParameterValues, name string, defaultVal []T) ([]T, error) { 51 | val, found, err := transformers.FindParameterArray[T](params, name) 52 | if err != nil { 53 | return val, err 54 | } 55 | if !found { 56 | return defaultVal, nil 57 | } 58 | return val, nil 59 | } 60 | -------------------------------------------------------------------------------- /pkg/transformers/greenmask/greenmask_uuid_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package greenmask 4 | 5 | import ( 6 | "context" 7 | 8 | greenmasktransformers "github.com/eminano/greenmask/pkg/generators/transformers" 9 | "github.com/google/uuid" 10 | "github.com/xataio/pgstream/pkg/transformers" 11 | ) 12 | 13 | type UUIDTransformer struct { 14 | transformer *greenmasktransformers.RandomUuidTransformer 15 | } 16 | 17 | var ( 18 | uuidParams = []transformers.Parameter{ 19 | { 20 | Name: "generator", 21 | SupportedType: "string", 22 | Default: "random", 23 | Dynamic: false, 24 | Required: false, 25 | Values: []any{"random", "deterministic"}, 26 | }, 27 | } 28 | uuidCompatibleTypes = []transformers.SupportedDataType{ 29 | transformers.StringDataType, 30 | transformers.ByteArrayDataType, 31 | transformers.UUIDDataType, 32 | transformers.UInt8ArrayOf16DataType, 33 | } 34 | ) 35 | 36 | func NewUUIDTransformer(params transformers.ParameterValues) (*UUIDTransformer, error) { 37 | t := greenmasktransformers.NewRandomUuidTransformer() 38 | if err := setGenerator(t, params); err != nil { 39 | return nil, err 40 | } 41 | return &UUIDTransformer{ 42 | transformer: t, 43 | }, nil 44 | } 45 | 46 | func (ut *UUIDTransformer) Transform(_ context.Context, value transformers.Value) (any, error) { 47 | var toTransform []byte 48 | switch val := value.TransformValue.(type) { 49 | case string: 50 | toTransform = []byte(val) 51 | case uuid.UUID: 52 | toTransform = val[:] 53 | case []byte: 54 | toTransform = val 55 | case [16]uint8: 56 | toTransform = val[:] 57 | default: 58 | return nil, transformers.ErrUnsupportedValueType 59 | } 60 | ret, err := ut.transformer.Transform(toTransform) 61 | if err != nil { 62 | return nil, err 63 | } 64 | return ret, nil 65 | } 66 | 67 | func (ut *UUIDTransformer) CompatibleTypes() []transformers.SupportedDataType { 68 | return uuidCompatibleTypes 69 | } 70 | 71 | func (ut *UUIDTransformer) Type() transformers.TransformerType { 72 | return transformers.GreenmaskUUID 73 | } 74 | 75 | func UUIDTransformerDefinition() *transformers.Definition { 76 | return &transformers.Definition{ 77 | SupportedTypes: uuidCompatibleTypes, 78 | Parameters: uuidParams, 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /pkg/transformers/literal_string_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformers 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | "fmt" 9 | ) 10 | 11 | type LiteralStringTransformer struct { 12 | literal string 13 | } 14 | 15 | var ( 16 | errLiteralStringNotFound = errors.New("literal_string_transformer: literal parameter not found") 17 | literalStringCompatibleTypes = []SupportedDataType{ 18 | AllDataTypes, 19 | } 20 | literalStringParams = []Parameter{ 21 | { 22 | Name: "literal", 23 | SupportedType: "string", 24 | Default: nil, 25 | Dynamic: false, 26 | Required: true, 27 | }, 28 | } 29 | ) 30 | 31 | func NewLiteralStringTransformer(params ParameterValues) (*LiteralStringTransformer, error) { 32 | literal, found, err := FindParameter[string](params, "literal") 33 | if err != nil { 34 | return nil, fmt.Errorf("literal_string_transformer: literal must be a string: %w", err) 35 | } 36 | if !found { 37 | return nil, errLiteralStringNotFound 38 | } 39 | 40 | return &LiteralStringTransformer{ 41 | literal: literal, 42 | }, nil 43 | } 44 | 45 | func (lst *LiteralStringTransformer) Transform(_ context.Context, value Value) (any, error) { 46 | return lst.literal, nil 47 | } 48 | 49 | func (lst *LiteralStringTransformer) CompatibleTypes() []SupportedDataType { 50 | return literalStringCompatibleTypes 51 | } 52 | 53 | func (lst *LiteralStringTransformer) Type() TransformerType { 54 | return LiteralString 55 | } 56 | 57 | func LiteralStringTransformerDefinition() *Definition { 58 | return &Definition{ 59 | SupportedTypes: literalStringCompatibleTypes, 60 | Parameters: literalStringParams, 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /pkg/transformers/literal_string_transformer_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformers 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestLiteralStringTransformer(t *testing.T) { 13 | t.Parallel() 14 | tests := []struct { 15 | name string 16 | params ParameterValues 17 | wantErr error 18 | }{ 19 | { 20 | name: "ok - valid", 21 | params: ParameterValues{ 22 | "literal": "test", 23 | }, 24 | wantErr: nil, 25 | }, 26 | { 27 | name: "error - invalid literal", 28 | params: ParameterValues{ 29 | "literal": 123, 30 | }, 31 | wantErr: ErrInvalidParameters, 32 | }, 33 | { 34 | name: "error - empty literal", 35 | params: ParameterValues{}, 36 | wantErr: errLiteralStringNotFound, 37 | }, 38 | } 39 | for _, tc := range tests { 40 | t.Run(tc.name, func(t *testing.T) { 41 | t.Parallel() 42 | lst, err := NewLiteralStringTransformer(tc.params) 43 | require.ErrorIs(t, err, tc.wantErr) 44 | if tc.wantErr != nil { 45 | return 46 | } 47 | require.NoError(t, err) 48 | require.NotNil(t, lst) 49 | }) 50 | } 51 | } 52 | 53 | func TestLiteralStringTransformer_Transform(t *testing.T) { 54 | t.Parallel() 55 | wantOutput := "{'output': 'testoutput'" 56 | lst, err := NewLiteralStringTransformer(ParameterValues{"literal": wantOutput}) 57 | require.NoError(t, err) 58 | tests := []struct { 59 | name string 60 | params ParameterValues 61 | input any 62 | want any 63 | wantErr error 64 | }{ 65 | { 66 | name: "ok - string", 67 | input: "testinput", 68 | wantErr: nil, 69 | }, 70 | { 71 | name: "ok - JSON", 72 | input: "{'json': 'jsoninput'}", 73 | wantErr: nil, 74 | }, 75 | } 76 | for _, tc := range tests { 77 | t.Run(tc.name, func(t *testing.T) { 78 | t.Parallel() 79 | 80 | got, err := lst.Transform(context.Background(), Value{TransformValue: tc.input}) 81 | require.ErrorIs(t, err, tc.wantErr) 82 | if tc.wantErr != nil { 83 | return 84 | } 85 | require.Equal(t, wantOutput, got) 86 | }) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /pkg/transformers/mocks/mock_builder.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "github.com/xataio/pgstream/pkg/transformers" 7 | ) 8 | 9 | type TransformerBuilder struct { 10 | NewFn func(*transformers.Config) (transformers.Transformer, error) 11 | } 12 | 13 | func (m *TransformerBuilder) New(cfg *transformers.Config) (transformers.Transformer, error) { 14 | return m.NewFn(cfg) 15 | } 16 | -------------------------------------------------------------------------------- /pkg/transformers/mocks/mock_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/transformers" 9 | ) 10 | 11 | type Transformer struct { 12 | TransformFn func(transformers.Value) (any, error) 13 | CompatibleTypesFn func() []transformers.SupportedDataType 14 | } 15 | 16 | func (m *Transformer) Transform(_ context.Context, val transformers.Value) (any, error) { 17 | return m.TransformFn(val) 18 | } 19 | 20 | func (m *Transformer) CompatibleTypes() []transformers.SupportedDataType { 21 | return m.CompatibleTypesFn() 22 | } 23 | 24 | func (m *Transformer) Type() transformers.TransformerType { 25 | return transformers.TransformerType("mock") 26 | } 27 | -------------------------------------------------------------------------------- /pkg/transformers/neosync/neosync_firstname_transformer_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package neosync 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | "github.com/xataio/pgstream/pkg/transformers" 11 | ) 12 | 13 | func TestFirstnameTransformer_Transform(t *testing.T) { 14 | t.Parallel() 15 | 16 | tests := []struct { 17 | name string 18 | value any 19 | params transformers.ParameterValues 20 | 21 | wantName string 22 | wantErr error 23 | }{ 24 | { 25 | name: "ok", 26 | value: "alice", 27 | params: map[string]any{ 28 | "preserve_length": false, 29 | "max_length": 4, 30 | "seed": 0, 31 | }, 32 | 33 | wantName: "Ute", 34 | wantErr: nil, 35 | }, 36 | { 37 | name: "error - invalid preserve length", 38 | value: "alice", 39 | params: map[string]any{ 40 | "preserve_length": 1, 41 | }, 42 | 43 | wantName: "", 44 | wantErr: transformers.ErrInvalidParameters, 45 | }, 46 | { 47 | name: "error - invalid max length", 48 | value: "alice", 49 | params: map[string]any{ 50 | "max_length": "1", 51 | }, 52 | 53 | wantName: "", 54 | wantErr: transformers.ErrInvalidParameters, 55 | }, 56 | { 57 | name: "error - invalid seed", 58 | value: "alice", 59 | params: map[string]any{ 60 | "seed": "1", 61 | }, 62 | 63 | wantName: "", 64 | wantErr: transformers.ErrInvalidParameters, 65 | }, 66 | } 67 | 68 | for _, tc := range tests { 69 | t.Run(tc.name, func(t *testing.T) { 70 | t.Parallel() 71 | 72 | transformer, err := NewFirstNameTransformer(tc.params) 73 | require.ErrorIs(t, err, tc.wantErr) 74 | 75 | if err != nil { 76 | return 77 | } 78 | 79 | got, err := transformer.Transform(context.Background(), transformers.Value{TransformValue: tc.value}) 80 | require.ErrorIs(t, err, tc.wantErr) 81 | require.Equal(t, tc.wantName, got) 82 | }) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /pkg/transformers/neosync/neosync_fullname_transformer_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package neosync 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | "github.com/xataio/pgstream/pkg/transformers" 11 | ) 12 | 13 | func TestNewFullnameTransformer(t *testing.T) { 14 | t.Parallel() 15 | tests := []struct { 16 | name string 17 | params transformers.ParameterValues 18 | input any 19 | wantErr error 20 | wantName string 21 | }{ 22 | { 23 | name: "ok - valid", 24 | params: transformers.ParameterValues{ 25 | "preserve_length": false, 26 | "max_length": 20, 27 | "seed": 1234, 28 | }, 29 | input: "name surname", 30 | wantErr: nil, 31 | wantName: "Flav Di Chiara", 32 | }, 33 | { 34 | name: "error - invalid preserve_length", 35 | params: transformers.ParameterValues{ 36 | "preserve_length": 123, 37 | "max_length": 10, 38 | "seed": 123, 39 | }, 40 | wantErr: transformers.ErrInvalidParameters, 41 | }, 42 | { 43 | name: "error - invalid max_length", 44 | params: transformers.ParameterValues{ 45 | "preserve_length": true, 46 | "max_length": "invalid", 47 | "seed": 123, 48 | }, 49 | wantErr: transformers.ErrInvalidParameters, 50 | }, 51 | { 52 | name: "error - invalid seed", 53 | params: transformers.ParameterValues{ 54 | "preserve_length": true, 55 | "max_length": 10, 56 | "seed": "invalid", 57 | }, 58 | wantErr: transformers.ErrInvalidParameters, 59 | }, 60 | } 61 | for _, tc := range tests { 62 | t.Run(tc.name, func(t *testing.T) { 63 | t.Parallel() 64 | lst, err := NewFullNameTransformer(tc.params) 65 | require.ErrorIs(t, err, tc.wantErr) 66 | if tc.wantErr != nil { 67 | return 68 | } 69 | require.NoError(t, err) 70 | require.NotNil(t, lst) 71 | got, _ := lst.Transform(context.Background(), transformers.Value{TransformValue: tc.input}) 72 | require.Equal(t, tc.wantName, got) 73 | }) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/transformers/neosync/neosync_lastname_transformer_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package neosync 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | "github.com/xataio/pgstream/pkg/transformers" 11 | ) 12 | 13 | func TestNewLastnameTransformer(t *testing.T) { 14 | t.Parallel() 15 | tests := []struct { 16 | name string 17 | params transformers.ParameterValues 18 | input any 19 | wantErr error 20 | wantName string 21 | }{ 22 | { 23 | name: "ok - valid", 24 | params: transformers.ParameterValues{ 25 | "preserve_length": false, 26 | "max_length": 10, 27 | "seed": 123, 28 | }, 29 | input: "lastname", 30 | wantErr: nil, 31 | wantName: "Fournaris", 32 | }, 33 | { 34 | name: "error - invalid preserve_length", 35 | params: transformers.ParameterValues{ 36 | "preserve_length": 123, 37 | "max_length": 10, 38 | "seed": 123, 39 | }, 40 | wantErr: transformers.ErrInvalidParameters, 41 | }, 42 | { 43 | name: "error - invalid max_length", 44 | params: transformers.ParameterValues{ 45 | "preserve_length": true, 46 | "max_length": "invalid", 47 | "seed": 123, 48 | }, 49 | wantErr: transformers.ErrInvalidParameters, 50 | }, 51 | { 52 | name: "error - invalid seed", 53 | params: transformers.ParameterValues{ 54 | "preserve_length": true, 55 | "max_length": 10, 56 | "seed": "invalid", 57 | }, 58 | wantErr: transformers.ErrInvalidParameters, 59 | }, 60 | } 61 | for _, tc := range tests { 62 | t.Run(tc.name, func(t *testing.T) { 63 | t.Parallel() 64 | lst, err := NewLastNameTransformer(tc.params) 65 | require.ErrorIs(t, err, tc.wantErr) 66 | if tc.wantErr != nil { 67 | return 68 | } 69 | require.NoError(t, err) 70 | require.NotNil(t, lst) 71 | got, _ := lst.Transform(context.Background(), transformers.Value{TransformValue: tc.input}) 72 | require.Equal(t, tc.wantName, got) 73 | }) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/transformers/neosync/neosync_string_transformer_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package neosync 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | "github.com/xataio/pgstream/pkg/transformers" 11 | ) 12 | 13 | func TestStringTransformer_Transform(t *testing.T) { 14 | t.Parallel() 15 | 16 | tests := []struct { 17 | name string 18 | value any 19 | params transformers.ParameterValues 20 | 21 | wantString string 22 | wantErr error 23 | }{ 24 | { 25 | name: "ok", 26 | value: "hello", 27 | params: map[string]any{ 28 | "preserve_length": false, 29 | "min_length": 2, 30 | "max_length": 2, 31 | "seed": 0, 32 | }, 33 | 34 | wantString: "np", 35 | wantErr: nil, 36 | }, 37 | { 38 | name: "error - invalid preserve length", 39 | value: "hello", 40 | params: map[string]any{ 41 | "preserve_length": 1, 42 | }, 43 | 44 | wantString: "", 45 | wantErr: transformers.ErrInvalidParameters, 46 | }, 47 | { 48 | name: "error - invalid min length", 49 | value: "hello", 50 | params: map[string]any{ 51 | "min_length": "1", 52 | }, 53 | 54 | wantString: "", 55 | wantErr: transformers.ErrInvalidParameters, 56 | }, 57 | { 58 | name: "error - invalid max length", 59 | value: "hello", 60 | params: map[string]any{ 61 | "max_length": "1", 62 | }, 63 | 64 | wantString: "", 65 | wantErr: transformers.ErrInvalidParameters, 66 | }, 67 | { 68 | name: "error - invalid seed", 69 | value: "hello", 70 | params: map[string]any{ 71 | "seed": "1", 72 | }, 73 | 74 | wantString: "", 75 | wantErr: transformers.ErrInvalidParameters, 76 | }, 77 | } 78 | 79 | for _, tc := range tests { 80 | t.Run(tc.name, func(t *testing.T) { 81 | t.Parallel() 82 | 83 | transformer, err := NewStringTransformer(tc.params) 84 | require.ErrorIs(t, err, tc.wantErr) 85 | 86 | if err != nil { 87 | return 88 | } 89 | 90 | got, err := transformer.Transform(context.Background(), transformers.Value{TransformValue: tc.value}) 91 | require.ErrorIs(t, err, tc.wantErr) 92 | require.Equal(t, tc.wantString, got) 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /pkg/transformers/neosync/neosync_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package neosync 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/transformers" 9 | ) 10 | 11 | // transformer is a wrapper around a neosync transformer. Neosync transformers 12 | // return a pointer to the type, so this implementation is generic to ensure 13 | // different types are supported. 14 | type transformer[T any] struct { 15 | neosyncTransformer neosyncTransformer 16 | opts any 17 | } 18 | 19 | type neosyncTransformer interface { 20 | Transform(value any, opts any) (any, error) 21 | } 22 | 23 | func New[T any](t neosyncTransformer, opts any) *transformer[T] { 24 | return &transformer[T]{ 25 | opts: opts, 26 | neosyncTransformer: t, 27 | } 28 | } 29 | 30 | func (t *transformer[T]) Transform(_ context.Context, value transformers.Value) (any, error) { 31 | retPtr, err := t.neosyncTransformer.Transform(value.TransformValue, t.opts) 32 | if err != nil { 33 | return nil, err 34 | } 35 | 36 | ret, ok := retPtr.(*T) 37 | if !ok { 38 | return nil, transformers.ErrUnsupportedValueType 39 | } 40 | return *ret, nil 41 | } 42 | 43 | func findParameter[T any](params transformers.ParameterValues, name string) (*T, error) { 44 | var found bool 45 | var err error 46 | 47 | val := new(T) 48 | *val, found, err = transformers.FindParameter[T](params, name) 49 | if err != nil { 50 | return nil, err 51 | } 52 | if !found { 53 | val = nil 54 | } 55 | return val, nil 56 | } 57 | 58 | func findParameterArray[T any](params transformers.ParameterValues, name string) ([]T, error) { 59 | val, found, err := transformers.FindParameterArray[T](params, name) 60 | if err != nil { 61 | return val, err 62 | } 63 | if !found { 64 | val = nil 65 | } 66 | return val, nil 67 | } 68 | 69 | func toInt64Ptr(i *int) *int64 { 70 | if i == nil { 71 | return nil 72 | } 73 | 74 | i64 := int64(*i) 75 | return &i64 76 | } 77 | 78 | func toAnyPtr(strArray []string) *any { 79 | if len(strArray) == 0 { 80 | return nil 81 | } 82 | 83 | strArrayAny := any(strArray) 84 | return &strArrayAny 85 | } 86 | -------------------------------------------------------------------------------- /pkg/transformers/string_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformers 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | 9 | "golang.org/x/exp/rand" 10 | ) 11 | 12 | type StringTransformer struct { 13 | // todo: add buffer pool 14 | // maxLength int 15 | // minLength int 16 | } 17 | 18 | var ( 19 | stringParams = []Parameter{} 20 | stringCompatibleTypes = []SupportedDataType{ 21 | StringDataType, 22 | ByteArrayDataType, 23 | } 24 | ) 25 | 26 | func NewStringTransformer(params ParameterValues) (*StringTransformer, error) { 27 | return &StringTransformer{}, nil 28 | } 29 | 30 | func (st *StringTransformer) Transform(_ context.Context, v Value) (any, error) { 31 | switch str := v.TransformValue.(type) { 32 | case string: 33 | return st.transform(str), nil 34 | case []byte: 35 | return st.transform(string(str)), nil 36 | default: 37 | return v, fmt.Errorf("expected string, got %T: %w", v, ErrUnsupportedValueType) 38 | } 39 | } 40 | 41 | func (st *StringTransformer) transform(str string) string { 42 | const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 43 | 44 | b := make([]byte, len(str)) 45 | for i := range b { 46 | b[i] = letterBytes[rand.Intn(len(letterBytes))] 47 | } 48 | return string(b) 49 | } 50 | 51 | func (st *StringTransformer) CompatibleTypes() []SupportedDataType { 52 | return stringCompatibleTypes 53 | } 54 | 55 | func (st *StringTransformer) Type() TransformerType { 56 | return String 57 | } 58 | 59 | func StringTransformerDefinition() *Definition { 60 | return &Definition{ 61 | SupportedTypes: stringCompatibleTypes, 62 | Parameters: stringParams, 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /pkg/transformers/string_transformer_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformers 4 | 5 | import ( 6 | "context" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestStringTransformer_Transform(t *testing.T) { 13 | t.Parallel() 14 | 15 | tests := []struct { 16 | name string 17 | value any 18 | 19 | wantLen int 20 | wantErr error 21 | }{ 22 | { 23 | name: "ok - string", 24 | value: "hello", 25 | 26 | wantLen: 5, 27 | wantErr: nil, 28 | }, 29 | { 30 | name: "ok - []byte", 31 | value: []byte("hello"), 32 | 33 | wantLen: 5, 34 | wantErr: nil, 35 | }, 36 | { 37 | name: "unsupported type", 38 | value: 1, 39 | 40 | wantLen: 0, 41 | wantErr: ErrUnsupportedValueType, 42 | }, 43 | } 44 | 45 | for _, tc := range tests { 46 | t.Run(tc.name, func(t *testing.T) { 47 | t.Parallel() 48 | 49 | st, err := NewStringTransformer(nil) 50 | require.NoError(t, err) 51 | got, err := st.Transform(context.Background(), Value{TransformValue: tc.value}) 52 | require.ErrorIs(t, err, tc.wantErr) 53 | if tc.wantErr != nil { 54 | return 55 | } 56 | 57 | require.Len(t, got, tc.wantLen) 58 | require.NotEqual(t, got, tc.value) 59 | }) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /pkg/transformers/template_transformer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformers 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | "fmt" 9 | "strings" 10 | "text/template" 11 | 12 | "github.com/Masterminds/sprig/v3" 13 | greenmasktoolkit "github.com/eminano/greenmask/pkg/toolkit" 14 | ) 15 | 16 | type TemplateTransformer struct { 17 | template *template.Template 18 | } 19 | 20 | var ( 21 | errTemplateMustBeProvided = errors.New("template_transformer: template parameter must be provided") 22 | templateCompatibleTypes = []SupportedDataType{ 23 | StringDataType, 24 | ByteArrayDataType, 25 | } 26 | templateParams = []Parameter{ 27 | { 28 | Name: "template", 29 | SupportedType: "string", 30 | Default: nil, 31 | Dynamic: false, 32 | Required: true, 33 | }, 34 | } 35 | ) 36 | 37 | func NewTemplateTransformer(params ParameterValues) (*TemplateTransformer, error) { 38 | templateStr, found, err := FindParameter[string](params, "template") 39 | if err != nil { 40 | return nil, fmt.Errorf("template_transformer: template must be a string: %w", err) 41 | } 42 | if !found { 43 | return nil, errTemplateMustBeProvided 44 | } 45 | 46 | tmpl, err := template.New("").Funcs(greenmasktoolkit.FuncMap()).Funcs(sprig.FuncMap()).Parse(templateStr) 47 | if err != nil { 48 | return nil, fmt.Errorf("template_transformer: error parsing template: %w", err) 49 | } 50 | return &TemplateTransformer{template: tmpl}, nil 51 | } 52 | 53 | func (t *TemplateTransformer) Transform(_ context.Context, value Value) (any, error) { 54 | var buf strings.Builder 55 | if err := t.template.Execute(&buf, &value); err != nil { 56 | return nil, fmt.Errorf("template_transformer: error executing template: %w", err) 57 | } 58 | return buf.String(), nil 59 | } 60 | 61 | func (t *TemplateTransformer) CompatibleTypes() []SupportedDataType { 62 | return templateCompatibleTypes 63 | } 64 | 65 | func (t *TemplateTransformer) Type() TransformerType { 66 | return Template 67 | } 68 | 69 | func TemplateTransformerDefinition() *Definition { 70 | return &Definition{ 71 | SupportedTypes: templateCompatibleTypes, 72 | Parameters: templateParams, 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /pkg/wal/checkpointer/postgres/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal/replication" 9 | ) 10 | 11 | type mockSyncer struct { 12 | syncLSNFn func(context.Context, replication.LSN) error 13 | } 14 | 15 | func (m *mockSyncer) SyncLSN(ctx context.Context, lsn replication.LSN) error { 16 | return m.syncLSNFn(ctx, lsn) 17 | } 18 | 19 | func (m *mockSyncer) Close() error { 20 | return nil 21 | } 22 | -------------------------------------------------------------------------------- /pkg/wal/checkpointer/postgres/wal_pg_checkpointer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | "github.com/xataio/pgstream/pkg/wal/replication" 10 | pgreplication "github.com/xataio/pgstream/pkg/wal/replication/postgres" 11 | ) 12 | 13 | // Checkpointer is a postgres implementation of a wal checkpointer. It syncs the 14 | // LSN to postgres. 15 | type Checkpointer struct { 16 | syncer lsnSyncer 17 | parser replication.LSNParser 18 | } 19 | 20 | type Config struct { 21 | Replication pgreplication.Config 22 | } 23 | 24 | type lsnSyncer interface { 25 | SyncLSN(ctx context.Context, lsn replication.LSN) error 26 | Close() error 27 | } 28 | 29 | // New returns a postgres checkpointer that syncs the LSN to postgres on demand. 30 | func New(syncer lsnSyncer) *Checkpointer { 31 | return &Checkpointer{ 32 | syncer: syncer, 33 | parser: pgreplication.NewLSNParser(), 34 | } 35 | } 36 | 37 | func (c *Checkpointer) SyncLSN(ctx context.Context, positions []wal.CommitPosition) error { 38 | if len(positions) == 0 { 39 | return nil 40 | } 41 | 42 | // we only need the max pg wal offset 43 | var max replication.LSN 44 | for _, position := range positions { 45 | lsn, err := c.parser.FromString(string(position)) 46 | if err != nil { 47 | return err 48 | } 49 | if lsn > max { 50 | max = lsn 51 | } 52 | } 53 | 54 | return c.syncer.SyncLSN(ctx, replication.LSN(max)) 55 | } 56 | 57 | func (c *Checkpointer) Close() error { 58 | return c.syncer.Close() 59 | } 60 | -------------------------------------------------------------------------------- /pkg/wal/checkpointer/wal_checkpointer.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package checkpointer 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | ) 10 | 11 | // Checkpoint defines the way to confirm the positions that have been read. 12 | // The actual implementation depends on the source of events (postgres, kafka,...) 13 | type Checkpoint func(ctx context.Context, positions []wal.CommitPosition) error 14 | -------------------------------------------------------------------------------- /pkg/wal/listener/postgres/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | "time" 8 | 9 | "github.com/xataio/pgstream/pkg/wal/replication" 10 | replicationmocks "github.com/xataio/pgstream/pkg/wal/replication/mocks" 11 | ) 12 | 13 | const ( 14 | testLSN = replication.LSN(7773397064) 15 | testLSNStr = "1/CF54A048" 16 | ) 17 | 18 | func newMockReplicationHandler() *replicationmocks.Handler { 19 | return &replicationmocks.Handler{ 20 | StartReplicationFn: func(context.Context) error { return nil }, 21 | StartReplicationFromLSNFn: func(context.Context, replication.LSN) error { return nil }, 22 | GetCurrentLSNFn: func(ctx context.Context) (replication.LSN, error) { return testLSN, nil }, 23 | GetLSNParserFn: func() replication.LSNParser { return newMockLSNParser() }, 24 | SyncLSNFn: func(ctx context.Context, lsn replication.LSN) error { return nil }, 25 | ReceiveMessageFn: func(ctx context.Context, i uint64) (*replication.Message, error) { 26 | return newMockMessage(), nil 27 | }, 28 | } 29 | } 30 | 31 | func newMockMessage() *replication.Message { 32 | return &replication.Message{ 33 | LSN: testLSN, 34 | Data: []byte("test-data"), 35 | ReplyRequested: false, 36 | ServerTime: time.Now(), 37 | } 38 | } 39 | 40 | func newMockKeepAliveMessage(replyRequested bool) *replication.Message { 41 | return &replication.Message{ 42 | LSN: testLSN, 43 | ReplyRequested: replyRequested, 44 | } 45 | } 46 | 47 | func newMockLSNParser() *replicationmocks.LSNParser { 48 | return &replicationmocks.LSNParser{ 49 | ToStringFn: func(replication.LSN) string { return testLSNStr }, 50 | FromStringFn: func(s string) (replication.LSN, error) { return testLSN, nil }, 51 | } 52 | } 53 | 54 | type mockGenerator struct { 55 | createSnapshotFn func(context.Context) error 56 | } 57 | 58 | func (m *mockGenerator) CreateSnapshot(ctx context.Context) error { 59 | return m.createSnapshotFn(ctx) 60 | } 61 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/adapter/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package adapter 4 | 5 | import ( 6 | "strings" 7 | ) 8 | 9 | type SnapshotConfig struct { 10 | Tables []string 11 | // SnapshotWorkers represents the number of snapshots the generator will 12 | // process concurrently. This doesn't affect the parallelism of the tables 13 | // within each individual snapshot request. It defaults to 1. 14 | SnapshotWorkers uint 15 | } 16 | 17 | const defaultSnapshotWorkers = 1 18 | 19 | const publicSchema = "public" 20 | 21 | func (c *SnapshotConfig) schemaTableMap() map[string][]string { 22 | schemaTableMap := make(map[string][]string, len(c.Tables)) 23 | for _, table := range c.Tables { 24 | schemaName := publicSchema 25 | tableName := table 26 | tableSplit := strings.Split(table, ".") 27 | if len(tableSplit) == 2 { 28 | schemaName = tableSplit[0] 29 | tableName = tableSplit[1] 30 | } 31 | schemaTableMap[schemaName] = append(schemaTableMap[schemaName], tableName) 32 | } 33 | return schemaTableMap 34 | } 35 | 36 | func (c *SnapshotConfig) snapshotWorkers() uint { 37 | if c.SnapshotWorkers > 0 { 38 | return c.SnapshotWorkers 39 | } 40 | return defaultSnapshotWorkers 41 | } 42 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/adapter/config_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package adapter 4 | 5 | import ( 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestSnapshotConfig_schemaTableMap(t *testing.T) { 12 | t.Parallel() 13 | 14 | tests := []struct { 15 | name string 16 | tables []string 17 | 18 | wantMap map[string][]string 19 | }{ 20 | { 21 | name: "ok", 22 | tables: []string{"a", "public.b", "test_schema.c"}, 23 | wantMap: map[string][]string{ 24 | "public": {"a", "b"}, 25 | "test_schema": {"c"}, 26 | }, 27 | }, 28 | } 29 | 30 | for _, tc := range tests { 31 | t.Run(tc.name, func(t *testing.T) { 32 | t.Parallel() 33 | 34 | config := SnapshotConfig{ 35 | Tables: tc.tables, 36 | } 37 | got := config.schemaTableMap() 38 | require.Equal(t, tc.wantMap, got) 39 | }) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/adapter/wal_process_event_adapter.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package adapter 4 | 5 | import ( 6 | "context" 7 | "time" 8 | 9 | "github.com/jonboulle/clockwork" 10 | "github.com/xataio/pgstream/pkg/snapshot" 11 | "github.com/xataio/pgstream/pkg/wal" 12 | "github.com/xataio/pgstream/pkg/wal/listener" 13 | ) 14 | 15 | type ProcessEventAdapter struct { 16 | processEvent listener.ProcessWalEvent 17 | clock clockwork.Clock 18 | } 19 | 20 | func NewProcessEventAdapter(processEvent listener.ProcessWalEvent) *ProcessEventAdapter { 21 | return &ProcessEventAdapter{ 22 | processEvent: processEvent, 23 | clock: clockwork.NewRealClock(), 24 | } 25 | } 26 | 27 | func (a *ProcessEventAdapter) ProcessRow(ctx context.Context, row *snapshot.Row) error { 28 | return a.processEvent(ctx, a.snapshotRowToWalEvent(row)) 29 | } 30 | 31 | func (a *ProcessEventAdapter) snapshotRowToWalEvent(row *snapshot.Row) *wal.Event { 32 | if row == nil { 33 | return nil 34 | } 35 | 36 | columns := make([]wal.Column, 0, len(row.Columns)) 37 | for _, col := range row.Columns { 38 | columns = append(columns, a.snapshotColumnToWalColumn(col)) 39 | } 40 | // use 0 since there's no LSN associated, but it can be used as the 41 | // initial version downstream 42 | const zeroLSN = "0/0" 43 | return &wal.Event{ 44 | CommitPosition: wal.CommitPosition(zeroLSN), 45 | Data: &wal.Data{ 46 | Action: "I", 47 | Timestamp: a.clock.Now().UTC().Format(time.RFC3339), 48 | LSN: zeroLSN, 49 | Schema: row.Schema, 50 | Table: row.Table, 51 | Columns: columns, 52 | }, 53 | } 54 | } 55 | 56 | func (a *ProcessEventAdapter) snapshotColumnToWalColumn(col snapshot.Column) wal.Column { 57 | return wal.Column{ 58 | Name: col.Name, 59 | Type: col.Type, 60 | Value: col.Value, 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/adapter/wal_process_event_adapter_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package adapter 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/jonboulle/clockwork" 10 | "github.com/stretchr/testify/require" 11 | "github.com/xataio/pgstream/pkg/snapshot" 12 | "github.com/xataio/pgstream/pkg/wal" 13 | ) 14 | 15 | func TestProcessEventAdapter_snapshotRowToWalEvent(t *testing.T) { 16 | t.Parallel() 17 | 18 | now := time.Now() 19 | fakeClock := clockwork.NewFakeClockAt(now) 20 | testTable := "table1" 21 | zeroLSN := "0/0" 22 | 23 | tests := []struct { 24 | name string 25 | row *snapshot.Row 26 | 27 | wantEvent *wal.Event 28 | }{ 29 | { 30 | name: "ok - nil row", 31 | row: nil, 32 | 33 | wantEvent: nil, 34 | }, 35 | { 36 | name: "ok", 37 | row: &snapshot.Row{ 38 | Schema: publicSchema, 39 | Table: testTable, 40 | Columns: []snapshot.Column{ 41 | {Name: "id", Type: "int4", Value: 1}, 42 | {Name: "name", Type: "text", Value: "alice"}, 43 | }, 44 | }, 45 | 46 | wantEvent: &wal.Event{ 47 | CommitPosition: wal.CommitPosition(zeroLSN), 48 | Data: &wal.Data{ 49 | Action: "I", 50 | Timestamp: fakeClock.Now().UTC().Format(time.RFC3339), 51 | LSN: zeroLSN, 52 | Schema: publicSchema, 53 | Table: testTable, 54 | Columns: []wal.Column{ 55 | {Name: "id", Type: "int4", Value: 1}, 56 | {Name: "name", Type: "text", Value: "alice"}, 57 | }, 58 | }, 59 | }, 60 | }, 61 | } 62 | 63 | for _, tc := range tests { 64 | t.Run(tc.name, func(t *testing.T) { 65 | t.Parallel() 66 | 67 | a := ProcessEventAdapter{ 68 | clock: fakeClock, 69 | } 70 | event := a.snapshotRowToWalEvent(tc.row) 71 | 72 | require.Equal(t, tc.wantEvent, event) 73 | }) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/adapter/wal_snapshot_generator_adapter_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package adapter 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/require" 11 | "github.com/xataio/pgstream/pkg/log" 12 | "github.com/xataio/pgstream/pkg/snapshot" 13 | "github.com/xataio/pgstream/pkg/snapshot/generator" 14 | generatormocks "github.com/xataio/pgstream/pkg/snapshot/generator/mocks" 15 | ) 16 | 17 | func TestSnapshotGeneratorAdapter_CreateSnapshot(t *testing.T) { 18 | t.Parallel() 19 | 20 | errTest := errors.New("oh noes") 21 | 22 | tests := []struct { 23 | name string 24 | generator generator.SnapshotGenerator 25 | schemaTables map[string][]string 26 | 27 | wantErr error 28 | }{ 29 | { 30 | name: "ok", 31 | generator: &generatormocks.Generator{ 32 | CreateSnapshotFn: func(ctx context.Context, ss *snapshot.Snapshot) error { 33 | require.Equal(t, &snapshot.Snapshot{ 34 | SchemaName: publicSchema, 35 | TableNames: []string{"*"}, 36 | }, ss) 37 | return nil 38 | }, 39 | }, 40 | schemaTables: map[string][]string{ 41 | publicSchema: {"*"}, 42 | }, 43 | 44 | wantErr: nil, 45 | }, 46 | { 47 | name: "error", 48 | generator: &generatormocks.Generator{ 49 | CreateSnapshotFn: func(ctx context.Context, ss *snapshot.Snapshot) error { 50 | return errTest 51 | }, 52 | }, 53 | schemaTables: map[string][]string{ 54 | publicSchema: {"*"}, 55 | }, 56 | 57 | wantErr: errTest, 58 | }, 59 | } 60 | 61 | for _, tc := range tests { 62 | t.Run(tc.name, func(t *testing.T) { 63 | t.Parallel() 64 | 65 | ga := SnapshotGeneratorAdapter{ 66 | logger: log.NewNoopLogger(), 67 | generator: tc.generator, 68 | schemaTables: tc.schemaTables, 69 | snapshotWorkers: 1, 70 | } 71 | defer ga.Close() 72 | 73 | err := ga.CreateSnapshot(context.Background()) 74 | require.ErrorIs(t, err, tc.wantErr) 75 | }) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/builder/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package builder 4 | 5 | import ( 6 | schemalogpg "github.com/xataio/pgstream/pkg/schemalog/postgres" 7 | pgsnapshotgenerator "github.com/xataio/pgstream/pkg/snapshot/generator/postgres/data" 8 | "github.com/xataio/pgstream/pkg/snapshot/generator/postgres/schema/pgdumprestore" 9 | "github.com/xataio/pgstream/pkg/wal/listener/snapshot/adapter" 10 | ) 11 | 12 | type SnapshotListenerConfig struct { 13 | Generator pgsnapshotgenerator.Config 14 | Adapter adapter.SnapshotConfig 15 | Recorder *SnapshotRecorderConfig 16 | Schema SchemaSnapshotConfig 17 | } 18 | 19 | type SchemaSnapshotConfig struct { 20 | SchemaLogStore *schemalogpg.Config 21 | DumpRestore *pgdumprestore.Config 22 | } 23 | 24 | type SnapshotRecorderConfig struct { 25 | RepeatableSnapshots bool 26 | SnapshotStoreURL string 27 | } 28 | -------------------------------------------------------------------------------- /pkg/wal/listener/snapshot/wal_snapshot_listener.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package snapshot 4 | 5 | import ( 6 | "context" 7 | ) 8 | 9 | type Generator interface { 10 | CreateSnapshot(context.Context) error 11 | Close() error 12 | } 13 | 14 | type Listener struct { 15 | generator Generator 16 | } 17 | 18 | func New(generator Generator) *Listener { 19 | return &Listener{ 20 | generator: generator, 21 | } 22 | } 23 | 24 | // Listen starts the snapshot generation process. 25 | func (l *Listener) Listen(ctx context.Context) error { 26 | return l.generator.CreateSnapshot(ctx) 27 | } 28 | 29 | // Close closes the listener internal resources 30 | func (l *Listener) Close() error { 31 | return l.generator.Close() 32 | } 33 | -------------------------------------------------------------------------------- /pkg/wal/listener/wal_listener.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package listener 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | ) 10 | 11 | // Listener represents a process that listens to WAL events. 12 | type Listener interface { 13 | Listen(ctx context.Context) error 14 | Close() error 15 | } 16 | 17 | type ProcessWalEvent func(context.Context, *wal.Event) error 18 | -------------------------------------------------------------------------------- /pkg/wal/processor/batch/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package batch 4 | 5 | type mockMessage struct { 6 | id uint 7 | isEmptyFn func() bool 8 | sizeFn func() int 9 | } 10 | 11 | func (m *mockMessage) Size() int { 12 | if m.sizeFn != nil { 13 | return m.sizeFn() 14 | } 15 | return 1 16 | } 17 | 18 | func (m *mockMessage) IsEmpty() bool { 19 | if m.isEmptyFn != nil { 20 | return m.isEmptyFn() 21 | } 22 | return false 23 | } 24 | -------------------------------------------------------------------------------- /pkg/wal/processor/batch/mocks/mock_batch_sender.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal/processor/batch" 9 | ) 10 | 11 | type BatchSender[T batch.Message] struct { 12 | SendMessageFn func(context.Context, *batch.WALMessage[T]) error 13 | CloseFn func() 14 | msgChan chan *batch.WALMessage[T] 15 | } 16 | 17 | func NewBatchSender[T batch.Message]() *BatchSender[T] { 18 | return &BatchSender[T]{ 19 | msgChan: make(chan *batch.WALMessage[T]), 20 | } 21 | } 22 | 23 | func (m *BatchSender[T]) SendMessage(ctx context.Context, msg *batch.WALMessage[T]) error { 24 | if m.SendMessageFn != nil { 25 | return m.SendMessageFn(ctx, msg) 26 | } 27 | 28 | m.msgChan <- msg 29 | return nil 30 | } 31 | 32 | func (m *BatchSender[T]) Close() { 33 | close(m.msgChan) 34 | if m.CloseFn != nil { 35 | m.CloseFn() 36 | } 37 | } 38 | 39 | func (m *BatchSender[T]) GetWALMessages() []*batch.WALMessage[T] { 40 | msgs := []*batch.WALMessage[T]{} 41 | for msg := range m.msgChan { 42 | msgs = append(msgs, msg) 43 | } 44 | return msgs 45 | } 46 | -------------------------------------------------------------------------------- /pkg/wal/processor/batch/wal_batch.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package batch 4 | 5 | import ( 6 | "github.com/xataio/pgstream/pkg/wal" 7 | ) 8 | 9 | type Batch[T Message] struct { 10 | messages []T 11 | positions []wal.CommitPosition 12 | totalBytes int 13 | } 14 | 15 | const zeroLSN = "0/0" 16 | 17 | func NewBatch[T Message](messages []T, positions []wal.CommitPosition) *Batch[T] { 18 | return &Batch[T]{ 19 | messages: messages, 20 | positions: positions, 21 | } 22 | } 23 | 24 | func (b *Batch[T]) GetMessages() []T { 25 | return b.messages 26 | } 27 | 28 | func (b *Batch[T]) GetCommitPositions() []wal.CommitPosition { 29 | return b.positions 30 | } 31 | 32 | func (b *Batch[T]) add(m *WALMessage[T]) { 33 | if !m.message.IsEmpty() { 34 | b.messages = append(b.messages, m.message) 35 | b.totalBytes += m.message.Size() 36 | } 37 | 38 | if m.position != "" && m.position != zeroLSN { 39 | b.positions = append(b.positions, m.position) 40 | } 41 | } 42 | 43 | func (b *Batch[T]) drain() *Batch[T] { 44 | batch := &Batch[T]{ 45 | messages: b.messages, 46 | positions: b.positions, 47 | totalBytes: b.totalBytes, 48 | } 49 | 50 | b.messages = []T{} 51 | b.totalBytes = 0 52 | b.positions = []wal.CommitPosition{} 53 | return batch 54 | } 55 | 56 | func (b *Batch[T]) isEmpty() bool { 57 | return len(b.messages) == 0 && len(b.positions) == 0 58 | } 59 | 60 | func (b *Batch[T]) maxBatchBytesReached(maxBatchBytes int64, msg T) bool { 61 | return maxBatchBytes > 0 && b.totalBytes+msg.Size() >= int(maxBatchBytes) 62 | } 63 | -------------------------------------------------------------------------------- /pkg/wal/processor/batch/wal_batch_sender_config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package batch 4 | 5 | import ( 6 | "errors" 7 | "time" 8 | ) 9 | 10 | type Config struct { 11 | // BatchTime is the max time interval at which the batch sending is 12 | // triggered. Defaults to 1s 13 | BatchTimeout time.Duration 14 | // MaxBatchBytes is the max size in bytes for a given batch. When this size is 15 | // reached, the batch is sent. Defaults to 1572864 bytes. 16 | MaxBatchBytes int64 17 | // MaxBatchSize is the max number of messages to be sent per batch. When this 18 | // size is reached, the batch is sent. Defaults to 100. 19 | MaxBatchSize int64 20 | // MaxQueueBytes is the max memory used by the batch writer for inflight 21 | // batches. Defaults to 100MiB 22 | MaxQueueBytes int64 23 | } 24 | 25 | const ( 26 | defaultMaxQueueBytes = int64(100 * 1024 * 1024) // 100MiB 27 | defaultBatchTimeout = time.Second 28 | defaultMaxBatchSize = 100 29 | defaultMaxBatchBytes = int64(1572864) 30 | ) 31 | 32 | func (c *Config) GetMaxBatchBytes() int64 { 33 | if c.MaxBatchBytes > 0 { 34 | return c.MaxBatchBytes 35 | } 36 | return defaultMaxBatchBytes 37 | } 38 | 39 | func (c *Config) GetMaxBatchSize() int64 { 40 | if c.MaxBatchSize > 0 { 41 | return c.MaxBatchSize 42 | } 43 | return defaultMaxBatchSize 44 | } 45 | 46 | func (c *Config) GetBatchTimeout() time.Duration { 47 | if c.BatchTimeout > 0 { 48 | return c.BatchTimeout 49 | } 50 | return defaultBatchTimeout 51 | } 52 | 53 | func (c *Config) GetMaxQueueBytes() (int64, error) { 54 | if c.MaxQueueBytes > 0 { 55 | if c.MaxQueueBytes < c.GetMaxBatchBytes() { 56 | return -1, errors.New("max queue bytes must be equal or bigger than max batch bytes") 57 | } 58 | return c.MaxQueueBytes, nil 59 | } 60 | 61 | return defaultMaxQueueBytes, nil 62 | } 63 | -------------------------------------------------------------------------------- /pkg/wal/processor/batch/wal_message.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package batch 4 | 5 | import "github.com/xataio/pgstream/pkg/wal" 6 | 7 | type Message interface { 8 | Size() int 9 | IsEmpty() bool 10 | } 11 | 12 | // WALMessage is a wrapper around any kind of message implementing the Message 13 | // interface which contains a wal commit position. 14 | type WALMessage[T Message] struct { 15 | message T 16 | position wal.CommitPosition 17 | } 18 | 19 | func NewWALMessage[T Message](msg T, pos wal.CommitPosition) *WALMessage[T] { 20 | return &WALMessage[T]{ 21 | message: msg, 22 | position: pos, 23 | } 24 | } 25 | 26 | func (m *WALMessage[T]) GetMessage() T { 27 | return m.message 28 | } 29 | 30 | func (m *WALMessage[T]) GetPosition() wal.CommitPosition { 31 | return m.position 32 | } 33 | 34 | func (m *WALMessage[T]) Size() int { 35 | return m.message.Size() 36 | } 37 | 38 | func (m *WALMessage[T]) isKeepAlive() bool { 39 | return m.message.IsEmpty() && m.position != "" 40 | } 41 | -------------------------------------------------------------------------------- /pkg/wal/processor/errors.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package processor 4 | 5 | import "errors" 6 | 7 | var ( 8 | ErrVersionNotFound = errors.New("version column not found") 9 | ErrIDNotFound = errors.New("id column not found") 10 | ErrTableNotFound = errors.New("table not found") 11 | ErrColumnNotFound = errors.New("column not found") 12 | ) 13 | -------------------------------------------------------------------------------- /pkg/wal/processor/kafka/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package kafka 4 | 5 | import ( 6 | "github.com/xataio/pgstream/pkg/kafka" 7 | "github.com/xataio/pgstream/pkg/wal/processor/batch" 8 | ) 9 | 10 | type Config struct { 11 | Kafka kafka.ConnConfig 12 | Batch batch.Config 13 | } 14 | -------------------------------------------------------------------------------- /pkg/wal/processor/mocks/mock_processor.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | ) 10 | 11 | type Processor struct { 12 | ProcessWALEventFn func(ctx context.Context, walEvent *wal.Event) error 13 | CloseFn func() error 14 | processCalls uint 15 | } 16 | 17 | func (m *Processor) ProcessWALEvent(ctx context.Context, walEvent *wal.Event) error { 18 | m.processCalls++ 19 | return m.ProcessWALEventFn(ctx, walEvent) 20 | } 21 | 22 | func (m *Processor) GetProcessCalls() uint { 23 | return m.processCalls 24 | } 25 | 26 | func (m *Processor) Close() error { 27 | if m.CloseFn != nil { 28 | return m.CloseFn() 29 | } 30 | return nil 31 | } 32 | 33 | func (m *Processor) Name() string { 34 | return "mock" 35 | } 36 | -------------------------------------------------------------------------------- /pkg/wal/processor/postgres/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | schemalogpg "github.com/xataio/pgstream/pkg/schemalog/postgres" 7 | "github.com/xataio/pgstream/pkg/wal/processor/batch" 8 | ) 9 | 10 | type Config struct { 11 | URL string 12 | BatchConfig batch.Config 13 | SchemaLogStore schemalogpg.Config 14 | DisableTriggers bool 15 | OnConflictAction string 16 | BulkIngestEnabled bool 17 | } 18 | -------------------------------------------------------------------------------- /pkg/wal/processor/postgres/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | ) 10 | 11 | type mockAdapter struct { 12 | walEventToQueriesFn func(*wal.Event) ([]*query, error) 13 | } 14 | 15 | func (m *mockAdapter) walEventToQueries(_ context.Context, e *wal.Event) ([]*query, error) { 16 | return m.walEventToQueriesFn(e) 17 | } 18 | -------------------------------------------------------------------------------- /pkg/wal/processor/postgres/instrumented_wal_adapter.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/otel" 9 | "github.com/xataio/pgstream/pkg/wal" 10 | 11 | "go.opentelemetry.io/otel/trace" 12 | ) 13 | 14 | type instrumentedWalAdapter struct { 15 | inner walAdapter 16 | tracer trace.Tracer 17 | } 18 | 19 | func newInstrumentedWalAdapter(a walAdapter, i *otel.Instrumentation) walAdapter { 20 | if i == nil { 21 | return a 22 | } 23 | 24 | return &instrumentedWalAdapter{ 25 | inner: a, 26 | tracer: i.Tracer, 27 | } 28 | } 29 | 30 | func (i *instrumentedWalAdapter) walEventToQueries(ctx context.Context, event *wal.Event) (queries []*query, err error) { 31 | ctx, span := otel.StartSpan(ctx, i.tracer, "walAdapter.walEventToQueries") 32 | defer otel.CloseSpan(span, err) 33 | 34 | return i.inner.walEventToQueries(ctx, event) 35 | } 36 | -------------------------------------------------------------------------------- /pkg/wal/processor/postgres/postgres_query_msg.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | type query struct { 6 | schema string 7 | table string 8 | sql string 9 | columnNames []string 10 | args []any 11 | isDDL bool 12 | } 13 | 14 | // size returns the size of the message sql query (does not include the 15 | // parameters) 16 | func (m *query) Size() int { 17 | return len(m.sql) 18 | } 19 | 20 | func (m *query) IsEmpty() bool { 21 | return m == nil || m.sql == "" 22 | } 23 | 24 | func (m *query) getSQL() string { 25 | if m == nil { 26 | return "" 27 | } 28 | return m.sql 29 | } 30 | 31 | func (m *query) getArgs() []any { 32 | if m == nil { 33 | return nil 34 | } 35 | return m.args 36 | } 37 | -------------------------------------------------------------------------------- /pkg/wal/processor/postgres/postgres_wal_adapter.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | "github.com/xataio/pgstream/pkg/wal/processor" 10 | ) 11 | 12 | type walAdapter interface { 13 | walEventToQueries(ctx context.Context, e *wal.Event) ([]*query, error) 14 | } 15 | 16 | type adapter struct { 17 | dmlAdapter *dmlAdapter 18 | ddlAdapter *ddlAdapter 19 | } 20 | 21 | func newAdapter(schemaQuerier schemalogQuerier, onConflictAction string) (*adapter, error) { 22 | dmlAdapter, err := newDMLAdapter(onConflictAction) 23 | if err != nil { 24 | return nil, err 25 | } 26 | 27 | var ddl *ddlAdapter 28 | if schemaQuerier != nil { 29 | ddl = newDDLAdapter(schemaQuerier) 30 | } 31 | return &adapter{ 32 | dmlAdapter: dmlAdapter, 33 | ddlAdapter: ddl, 34 | }, nil 35 | } 36 | 37 | func (a *adapter) walEventToQueries(ctx context.Context, e *wal.Event) ([]*query, error) { 38 | if e.Data == nil { 39 | return []*query{{}}, nil 40 | } 41 | 42 | if processor.IsSchemaLogEvent(e.Data) { 43 | // there's no ddl adapter, the ddl query will not be processed 44 | if a.ddlAdapter == nil { 45 | return []*query{{}}, nil 46 | } 47 | 48 | return a.ddlAdapter.walDataToQueries(ctx, e.Data) 49 | } 50 | 51 | return []*query{a.dmlAdapter.walDataToQuery(e.Data)}, nil 52 | } 53 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package search 4 | 5 | import ( 6 | "github.com/xataio/pgstream/pkg/wal/processor/batch" 7 | ) 8 | 9 | type IndexerConfig struct { 10 | Batch batch.Config 11 | } 12 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/errors.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package search 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | "time" 9 | ) 10 | 11 | type ErrTypeInvalid struct { 12 | Input string 13 | } 14 | 15 | func (e ErrTypeInvalid) Error() string { 16 | return fmt.Sprintf("unsupported type: %s", e.Input) 17 | } 18 | 19 | type ErrSchemaNotFound struct { 20 | SchemaName string 21 | } 22 | 23 | func (e ErrSchemaNotFound) Error() string { 24 | return fmt.Sprintf("schema [%s] not found", e.SchemaName) 25 | } 26 | 27 | type ErrSchemaAlreadyExists struct { 28 | SchemaName string 29 | } 30 | 31 | func (e ErrSchemaAlreadyExists) Error() string { 32 | return fmt.Sprintf("schema [%s] already exists", e.SchemaName) 33 | } 34 | 35 | type ErrSchemaUpdateOutOfOrder struct { 36 | SchemaName string 37 | SchemaID string 38 | NewVersion int 39 | CurrentVersion int 40 | CurrentCreatedAt time.Time 41 | NewCreatedAt time.Time 42 | } 43 | 44 | func (e ErrSchemaUpdateOutOfOrder) Error() string { 45 | return fmt.Sprintf("our of order schema update detected for schema [%s] with id [%s]: incoming version: %d, created at: %v, current version: %d, created at: %v", 46 | e.SchemaName, e.SchemaID, e.NewVersion, e.NewCreatedAt, e.CurrentVersion, e.CurrentCreatedAt) 47 | } 48 | 49 | var ( 50 | ErrRetriable = errors.New("retriable error") 51 | ErrInvalidQuery = errors.New("invalid query") 52 | 53 | errNilIDValue = errors.New("id has nil value") 54 | errNilVersionValue = errors.New("version has nil value") 55 | errMetadataMissing = errors.New("missing wal event metadata") 56 | errEmptyQueueMsg = errors.New("invalid empty queue message") 57 | errIncompatibleLSN = errors.New("incompatible LSN value") 58 | ) 59 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/mocks/mock_search_mapper.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import "github.com/xataio/pgstream/pkg/schemalog" 6 | 7 | type Mapper struct { 8 | ColumnToSearchMappingFn func(column schemalog.Column) (map[string]any, error) 9 | MapColumnValueFn func(column schemalog.Column, value any) (any, error) 10 | } 11 | 12 | func (m *Mapper) ColumnToSearchMapping(column schemalog.Column) (map[string]any, error) { 13 | return m.ColumnToSearchMappingFn(column) 14 | } 15 | 16 | func (m *Mapper) MapColumnValue(column schemalog.Column, value any) (any, error) { 17 | return m.MapColumnValueFn(column, value) 18 | } 19 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/search_msg_batch.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package search 4 | 5 | import ( 6 | "github.com/xataio/pgstream/pkg/schemalog" 7 | ) 8 | 9 | type msg struct { 10 | write *Document 11 | truncate *truncateItem 12 | schemaChange *schemalog.LogEntry 13 | bytesSize int 14 | } 15 | 16 | type truncateItem struct { 17 | schemaName string 18 | tableID string 19 | } 20 | 21 | func (m *msg) Size() int { 22 | return m.bytesSize 23 | } 24 | 25 | func (m *msg) IsEmpty() bool { 26 | return m != nil && m.write == nil && m.schemaChange == nil && m.truncate == nil 27 | } 28 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package search 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/schemalog" 9 | ) 10 | 11 | type Store interface { 12 | GetMapper() Mapper 13 | // schema operations 14 | ApplySchemaChange(ctx context.Context, logEntry *schemalog.LogEntry) error 15 | DeleteSchema(ctx context.Context, schemaName string) error 16 | // data operations 17 | DeleteTableDocuments(ctx context.Context, schemaName string, tableIDs []string) error 18 | SendDocuments(ctx context.Context, docs []Document) ([]DocumentError, error) 19 | } 20 | 21 | type Mapper interface { 22 | ColumnToSearchMapping(column schemalog.Column) (map[string]any, error) 23 | MapColumnValue(column schemalog.Column, value any) (any, error) 24 | } 25 | 26 | type Document struct { 27 | ID string 28 | Schema string 29 | Data map[string]any 30 | Version int 31 | Delete bool 32 | } 33 | 34 | type DocumentError struct { 35 | Document Document 36 | Severity Severity 37 | Error string 38 | } 39 | 40 | type Severity uint 41 | 42 | const ( 43 | SeverityNone Severity = iota 44 | SeverityDataLoss 45 | SeverityIgnored 46 | SeverityRetriable 47 | ) 48 | 49 | func (s *Severity) String() string { 50 | if s == nil { 51 | return "" 52 | } 53 | switch *s { 54 | case SeverityNone: 55 | return "NONE" 56 | case SeverityDataLoss: 57 | return "DATALOSS" 58 | case SeverityIgnored: 59 | return "IGNORED" 60 | case SeverityRetriable: 61 | return "RETRIABLE" 62 | default: 63 | return "" 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/store/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package store 4 | 5 | import ( 6 | "github.com/xataio/pgstream/internal/searchstore" 7 | "github.com/xataio/pgstream/pkg/schemalog" 8 | "github.com/xataio/pgstream/pkg/wal/processor/search" 9 | ) 10 | 11 | type mockAdapter struct { 12 | recordToLogEntryFn func(map[string]any) (*schemalog.LogEntry, error) 13 | schemaNameToIndexFn func(schemaName string) IndexName 14 | indexToSchemaNameFn func(index string) string 15 | searchDocToBulkItemFn func(docs search.Document) searchstore.BulkItem 16 | bulkItemsToSearchDocErrsFn func(items []searchstore.BulkItem) []search.DocumentError 17 | } 18 | 19 | func (m *mockAdapter) RecordToLogEntry(rec map[string]any) (*schemalog.LogEntry, error) { 20 | return m.recordToLogEntryFn(rec) 21 | } 22 | 23 | func (m *mockAdapter) SchemaNameToIndex(schemaName string) IndexName { 24 | return m.schemaNameToIndexFn(schemaName) 25 | } 26 | 27 | func (m *mockAdapter) IndexToSchemaName(index string) string { 28 | return m.indexToSchemaNameFn(index) 29 | } 30 | 31 | func (m *mockAdapter) SearchDocToBulkItem(docs search.Document) searchstore.BulkItem { 32 | return m.searchDocToBulkItemFn(docs) 33 | } 34 | 35 | func (m *mockAdapter) BulkItemsToSearchDocErrs(items []searchstore.BulkItem) []search.DocumentError { 36 | return m.bulkItemsToSearchDocErrsFn(items) 37 | } 38 | -------------------------------------------------------------------------------- /pkg/wal/processor/search/store/search_index_name.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package store 4 | 5 | import ( 6 | "fmt" 7 | "strings" 8 | ) 9 | 10 | type IndexNameAdapter interface { 11 | SchemaNameToIndex(schemaName string) IndexName 12 | IndexToSchemaName(index string) string 13 | } 14 | 15 | // IndexName represents an opensearch index name constructed from a schema name. 16 | type IndexName interface { 17 | Name() string 18 | Version() int 19 | NameWithVersion() string 20 | SchemaName() string 21 | } 22 | 23 | type defaultIndexNameAdapter struct{} 24 | 25 | func newDefaultIndexNameAdapter() IndexNameAdapter { 26 | return &defaultIndexNameAdapter{} 27 | } 28 | 29 | func (i *defaultIndexNameAdapter) SchemaNameToIndex(schemaName string) IndexName { 30 | return newDefaultIndexName(schemaName) 31 | } 32 | 33 | func (i *defaultIndexNameAdapter) IndexToSchemaName(index string) string { 34 | return strings.TrimSuffix(index, "-1") 35 | } 36 | 37 | type defaultIndexName struct { 38 | schemaName string 39 | version int 40 | } 41 | 42 | func newDefaultIndexName(schemaName string) IndexName { 43 | return &defaultIndexName{ 44 | schemaName: schemaName, 45 | version: 1, 46 | } 47 | } 48 | 49 | func (i defaultIndexName) SchemaName() string { 50 | return i.schemaName 51 | } 52 | 53 | // NameWithVersion represents the name of the index with the version number. This should 54 | // generally not be needed, in favour of `Name`. 55 | func (i defaultIndexName) NameWithVersion() string { 56 | return fmt.Sprintf("%s-%d", i.schemaName, i.version) 57 | } 58 | 59 | // Name returns the name we should use for querying the index. 60 | func (i *defaultIndexName) Name() string { 61 | return i.schemaName 62 | } 63 | 64 | func (i *defaultIndexName) Version() int { 65 | return i.version 66 | } 67 | -------------------------------------------------------------------------------- /pkg/wal/processor/transformer/wal_transformer_parser.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformer 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/transformers" 9 | ) 10 | 11 | type transformerParser struct { 12 | builder transformerBuilder 13 | } 14 | 15 | func newTransformerParser(b transformerBuilder) *transformerParser { 16 | return &transformerParser{ 17 | builder: b, 18 | } 19 | } 20 | 21 | func (p *transformerParser) parse(_ context.Context, rules Rules) (map[string]ColumnTransformers, error) { 22 | var err error 23 | transformerMap := map[string]ColumnTransformers{} 24 | for _, table := range rules.Transformers { 25 | if table.ValidationMode == validationModeStrict { 26 | return nil, errValidatorRequiredForStrictMode 27 | } 28 | schemaTableTransformers := make(map[string]transformers.Transformer) 29 | transformerMap[schemaTableKey(table.Schema, table.Table)] = schemaTableTransformers 30 | for colName, transformerRules := range table.ColumnRules { 31 | cfg := transformerRulesToConfig(transformerRules) 32 | if cfg.Name == "" || cfg.Name == "noop" { 33 | // noop transformer, skip 34 | continue 35 | } 36 | if schemaTableTransformers[colName], err = p.builder.New(cfg); err != nil { 37 | return nil, err 38 | } 39 | } 40 | } 41 | return transformerMap, nil 42 | } 43 | 44 | func transformerRulesToConfig(rules TransformerRules) *transformers.Config { 45 | return &transformers.Config{ 46 | Name: transformers.TransformerType(rules.Name), 47 | Parameters: rules.Parameters, 48 | DynamicParameters: rules.DynamicParameters, 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /pkg/wal/processor/transformer/wal_transformer_rules.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package transformer 4 | 5 | type Rules struct { 6 | Transformers []TableRules `yaml:"transformations"` 7 | ValidationMode string `yaml:"validation_mode"` 8 | } 9 | 10 | type TableRules struct { 11 | Schema string `yaml:"schema"` 12 | Table string `yaml:"table"` 13 | ColumnRules map[string]TransformerRules `yaml:"column_transformers"` 14 | ValidationMode string `yaml:"validation_mode"` 15 | } 16 | 17 | type TransformerRules struct { 18 | Name string `yaml:"name"` 19 | Parameters map[string]any `yaml:"parameters"` 20 | DynamicParameters map[string]any `yaml:"dynamic_parameters"` 21 | } 22 | -------------------------------------------------------------------------------- /pkg/wal/processor/wal_processor.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package processor 4 | 5 | import ( 6 | "context" 7 | "encoding/json" 8 | "errors" 9 | "fmt" 10 | 11 | "github.com/xataio/pgstream/pkg/schemalog" 12 | "github.com/xataio/pgstream/pkg/wal" 13 | ) 14 | 15 | // Processor is a general interface to receive and process a wal event 16 | type Processor interface { 17 | ProcessWALEvent(ctx context.Context, walEvent *wal.Event) error 18 | Close() error 19 | Name() string 20 | } 21 | 22 | var ( 23 | ErrPanic = errors.New("panic while processing wal event") 24 | ErrIncompatibleWalData = errors.New("wal data event is not a schema log entry") 25 | ) 26 | 27 | // IsSchemaLogEvent will return true if the wal event data originates from the 28 | // pgstream schema and the pgstream schema_log table. 29 | func IsSchemaLogEvent(d *wal.Data) bool { 30 | return d.Schema == schemalog.SchemaName && d.Table == schemalog.TableName 31 | } 32 | 33 | // WalDataToLogEntry will convert the wal event data on input into the 34 | // equivalent schemalog entry. It will return an error if the wal event data is 35 | // not from the schema log table. 36 | func WalDataToLogEntry(d *wal.Data) (*schemalog.LogEntry, error) { 37 | if !IsSchemaLogEvent(d) { 38 | return nil, ErrIncompatibleWalData 39 | } 40 | 41 | intermediateRec := make(map[string]any, len(d.Columns)) 42 | for _, col := range d.Columns { // we only process inserts, so identity columns should never be set 43 | intermediateRec[col.Name] = col.Value 44 | } 45 | 46 | intermediateRecBytes, err := json.Marshal(intermediateRec) 47 | if err != nil { 48 | return nil, fmt.Errorf("parsing wal event into schema log entry, intermediate record is not valid JSON: %w", err) 49 | } 50 | 51 | var le schemalog.LogEntry 52 | if err := json.Unmarshal(intermediateRecBytes, &le); err != nil { 53 | return nil, fmt.Errorf("parsing wal event into schema, intermediate record is not valid JSON: %w", err) 54 | } 55 | 56 | return &le, nil 57 | } 58 | -------------------------------------------------------------------------------- /pkg/wal/processor/wal_processor_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package processor 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/google/go-cmp/cmp" 10 | "github.com/google/go-cmp/cmp/cmpopts" 11 | "github.com/rs/xid" 12 | "github.com/stretchr/testify/require" 13 | "github.com/xataio/pgstream/pkg/schemalog" 14 | "github.com/xataio/pgstream/pkg/wal" 15 | ) 16 | 17 | func Test_WalDataToLogEntry(t *testing.T) { 18 | t.Parallel() 19 | 20 | now := time.Now().UTC().Round(time.Second) 21 | nowStr := now.Format("2006-01-02 15:04:05") 22 | id := xid.New() 23 | 24 | testWalData := &wal.Data{ 25 | Action: "I", 26 | Schema: schemalog.SchemaName, 27 | Table: schemalog.TableName, 28 | Columns: []wal.Column{ 29 | {ID: "id", Name: "id", Type: "text", Value: id.String()}, 30 | {ID: "version", Name: "version", Type: "integer", Value: 0}, 31 | {ID: "schema_name", Name: "schema_name", Type: "text", Value: "test_schema_1"}, 32 | {ID: "created_at", Name: "created_at", Type: "timestamp", Value: nowStr}, 33 | }, 34 | } 35 | 36 | tests := []struct { 37 | name string 38 | data *wal.Data 39 | 40 | wantLogEntry *schemalog.LogEntry 41 | wantErr error 42 | }{ 43 | { 44 | name: "ok", 45 | data: testWalData, 46 | 47 | wantLogEntry: &schemalog.LogEntry{ 48 | ID: id, 49 | Version: 0, 50 | SchemaName: "test_schema_1", 51 | CreatedAt: schemalog.NewSchemaCreatedAtTimestamp(now), 52 | }, 53 | wantErr: nil, 54 | }, 55 | { 56 | name: "error - invalid data", 57 | data: &wal.Data{ 58 | Schema: "test_schema", 59 | Table: "test_table", 60 | }, 61 | 62 | wantLogEntry: nil, 63 | wantErr: ErrIncompatibleWalData, 64 | }, 65 | } 66 | 67 | for _, tc := range tests { 68 | t.Run(tc.name, func(t *testing.T) { 69 | t.Parallel() 70 | 71 | logEntry, err := WalDataToLogEntry(tc.data) 72 | require.ErrorIs(t, err, tc.wantErr) 73 | if diff := cmp.Diff(logEntry, tc.wantLogEntry, cmpopts.IgnoreUnexported(schemalog.LogEntry{})); diff != "" { 74 | t.Errorf("got: \n%v, \nwant \n%v, \ndiff: \n%s", logEntry, tc.wantLogEntry, diff) 75 | } 76 | }) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/notifier/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package notifier 4 | 5 | import "time" 6 | 7 | type Config struct { 8 | // MaxQueueBytes is the max memory used by the webhook notifier for inflight 9 | // events. Defaults to 100MiB 10 | MaxQueueBytes int64 11 | // URLWorkerCount is the max number of concurrent workers that will send 12 | // webhooks for a given event. Defaults to 10. 13 | URLWorkerCount uint 14 | // ClientTimeout is the max time the notifier will wait for a response from 15 | // a webhook url before it times out. Defaults to 10s. 16 | ClientTimeout time.Duration 17 | } 18 | 19 | const ( 20 | defaultMaxQueueBytes = int64(100 * 1024 * 1024) // 100MiB 21 | defaultURLWorkerCount = 10 22 | defaultClientTimeout = 10 * time.Second 23 | ) 24 | 25 | func (c *Config) maxQueueBytes() int64 { 26 | if c.MaxQueueBytes > 0 { 27 | return c.MaxQueueBytes 28 | } 29 | 30 | return defaultMaxQueueBytes 31 | } 32 | 33 | func (c *Config) workerCount() uint { 34 | if c.URLWorkerCount > 0 { 35 | return c.URLWorkerCount 36 | } 37 | 38 | return defaultURLWorkerCount 39 | } 40 | 41 | func (c *Config) clientTimeout() time.Duration { 42 | if c.ClientTimeout > 0 { 43 | return c.ClientTimeout 44 | } 45 | 46 | return defaultClientTimeout 47 | } 48 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/notifier/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package notifier 4 | 5 | import ( 6 | "errors" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription" 10 | ) 11 | 12 | var ( 13 | testCommitPos = wal.CommitPosition("test-pos") 14 | errTest = errors.New("oh noes") 15 | ) 16 | 17 | func newTestSubscription(url, schema, table string, eventTypes []string) *subscription.Subscription { 18 | return &subscription.Subscription{ 19 | URL: url, 20 | Schema: schema, 21 | Table: table, 22 | EventTypes: eventTypes, 23 | } 24 | } 25 | 26 | func testNotifyMsg(urls []string, payload []byte) *notifyMsg { 27 | return ¬ifyMsg{ 28 | urls: urls, 29 | payload: payload, 30 | commitPosition: testCommitPos, 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/notifier/webhook_notify_msg.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package notifier 4 | 5 | import ( 6 | "fmt" 7 | 8 | "github.com/xataio/pgstream/pkg/wal" 9 | "github.com/xataio/pgstream/pkg/wal/processor/webhook" 10 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription" 11 | ) 12 | 13 | type notifyMsg struct { 14 | urls []string 15 | payload []byte 16 | commitPosition wal.CommitPosition 17 | } 18 | 19 | type serialiser func(any) ([]byte, error) 20 | 21 | func newNotifyMsg(event *wal.Event, subscriptions []*subscription.Subscription, serialiser serialiser) (*notifyMsg, error) { 22 | var payload []byte 23 | urls := make([]string, 0, len(subscriptions)) 24 | if len(subscriptions) > 0 { 25 | var err error 26 | payload, err = serialiser(&webhook.Payload{Data: event.Data}) 27 | if err != nil { 28 | return nil, fmt.Errorf("serialising webhook payload: %w", err) 29 | } 30 | 31 | for _, s := range subscriptions { 32 | urls = append(urls, s.URL) 33 | } 34 | } 35 | 36 | return ¬ifyMsg{ 37 | urls: urls, 38 | payload: payload, 39 | commitPosition: event.CommitPosition, 40 | }, nil 41 | } 42 | 43 | func (m *notifyMsg) size() int { 44 | urlSize := 0 45 | for _, url := range m.urls { 46 | urlSize += len(url) 47 | } 48 | return len(m.payload) + urlSize 49 | } 50 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/server/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package server 4 | 5 | import "time" 6 | 7 | type Config struct { 8 | // Address for the server to listen on. The format is "host:port". Defaults 9 | // to ":9900". 10 | Address string 11 | // ReadTimeout is the maximum duration for reading the entire request, 12 | // including the body. Defaults to 5s. 13 | ReadTimeout time.Duration 14 | // WriteTimeout is the maximum duration before timing out writes of the 15 | // response. It is reset whenever a new request's header is read. Defaults 16 | // to 10s. 17 | WriteTimeout time.Duration 18 | } 19 | 20 | const ( 21 | defaultServerReadTimeout = 5 * time.Second 22 | defaultServerWriteTimeout = 10 * time.Second 23 | defaultServerAddress = ":9900" 24 | ) 25 | 26 | func (c *Config) readTimeout() time.Duration { 27 | if c.ReadTimeout > 0 { 28 | return c.ReadTimeout 29 | } 30 | return defaultServerReadTimeout 31 | } 32 | 33 | func (c *Config) writeTimeout() time.Duration { 34 | if c.WriteTimeout > 0 { 35 | return c.WriteTimeout 36 | } 37 | return defaultServerWriteTimeout 38 | } 39 | 40 | func (c *Config) address() string { 41 | if c.Address != "" { 42 | return c.Address 43 | } 44 | return defaultServerAddress 45 | } 46 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/store/cache/config.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package cache 4 | 5 | import "time" 6 | 7 | type Config struct { 8 | // SyncInterval represents how frequently the cache will attempt to sync 9 | // with the internal subscription store to retrieve the latest data. It 10 | // defaults to 60s. 11 | SyncInterval time.Duration 12 | } 13 | 14 | const ( 15 | defaultSyncInterval = 60 * time.Second 16 | ) 17 | 18 | func (c *Config) syncInterval() time.Duration { 19 | if c.SyncInterval > 0 { 20 | return c.SyncInterval 21 | } 22 | return defaultSyncInterval 23 | } 24 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/store/cache/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package cache 4 | 5 | import ( 6 | "errors" 7 | 8 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription" 9 | ) 10 | 11 | var errTest = errors.New("oh noes") 12 | 13 | func newTestSubscription(url, schema, table string, eventTypes []string) *subscription.Subscription { 14 | return &subscription.Subscription{ 15 | URL: url, 16 | Schema: schema, 17 | Table: table, 18 | EventTypes: eventTypes, 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/store/mocks/mock_subscription_store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription" 9 | ) 10 | 11 | type Store struct { 12 | CreateSubscriptionFn func(ctx context.Context, s *subscription.Subscription) error 13 | DeleteSubscriptionFn func(ctx context.Context, s *subscription.Subscription) error 14 | GetSubscriptionsFn func(ctx context.Context, action, schema, table string) ([]*subscription.Subscription, error) 15 | } 16 | 17 | func (m *Store) CreateSubscription(ctx context.Context, s *subscription.Subscription) error { 18 | return m.CreateSubscriptionFn(ctx, s) 19 | } 20 | 21 | func (m *Store) DeleteSubscription(ctx context.Context, s *subscription.Subscription) error { 22 | return m.DeleteSubscriptionFn(ctx, s) 23 | } 24 | 25 | func (m *Store) GetSubscriptions(ctx context.Context, action, schema, table string) ([]*subscription.Subscription, error) { 26 | return m.GetSubscriptionsFn(ctx, action, schema, table) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/store/postgres/pg_subscription_store_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "fmt" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestStore_buildGetQuery(t *testing.T) { 13 | t.Parallel() 14 | 15 | tests := []struct { 16 | name string 17 | action string 18 | schema string 19 | table string 20 | 21 | wantQuery string 22 | wantParams []any 23 | }{ 24 | { 25 | name: "no filters", 26 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s LIMIT 1000`, subscriptionsTable()), 27 | wantParams: nil, 28 | }, 29 | { 30 | name: "with action filter", 31 | action: "I", 32 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s WHERE ($1=ANY(event_types) OR event_types IS NULL) LIMIT 1000`, subscriptionsTable()), 33 | wantParams: []any{"I"}, 34 | }, 35 | { 36 | name: "with schema filter", 37 | schema: "test_schema", 38 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s WHERE (schema_name=$1 OR schema_name='') LIMIT 1000`, subscriptionsTable()), 39 | wantParams: []any{"test_schema"}, 40 | }, 41 | { 42 | name: "with table filter", 43 | table: "test_table", 44 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s WHERE (table_name=$1 OR table_name='') LIMIT 1000`, subscriptionsTable()), 45 | wantParams: []any{"test_table"}, 46 | }, 47 | { 48 | name: "with all filters", 49 | action: "I", 50 | schema: "test_schema", 51 | table: "test_table", 52 | wantQuery: fmt.Sprintf(`SELECT url, schema_name, table_name, event_types FROM %s `, subscriptionsTable()) + 53 | "WHERE (schema_name=$1 OR schema_name='') " + 54 | "AND (table_name=$2 OR table_name='') " + 55 | "AND ($3=ANY(event_types) OR event_types IS NULL) LIMIT 1000", 56 | wantParams: []any{"test_schema", "test_table", "I"}, 57 | }, 58 | } 59 | 60 | for _, tc := range tests { 61 | t.Run(tc.name, func(t *testing.T) { 62 | t.Parallel() 63 | 64 | s := &Store{} 65 | query, params := s.buildGetQuery(tc.action, tc.schema, tc.table) 66 | require.Equal(t, tc.wantQuery, query) 67 | require.Equal(t, tc.wantParams, params) 68 | }) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/store/subscription_store.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package store 4 | 5 | import ( 6 | "context" 7 | 8 | "github.com/xataio/pgstream/pkg/wal/processor/webhook/subscription" 9 | ) 10 | 11 | type Store interface { 12 | CreateSubscription(ctx context.Context, s *subscription.Subscription) error 13 | DeleteSubscription(ctx context.Context, s *subscription.Subscription) error 14 | GetSubscriptions(ctx context.Context, action, schema, table string) ([]*subscription.Subscription, error) 15 | } 16 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/subscription/subscription.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package subscription 4 | 5 | import ( 6 | "fmt" 7 | "slices" 8 | ) 9 | 10 | type Subscription struct { 11 | URL string `json:"url"` 12 | EventTypes []string `json:"event_types"` 13 | Schema string `json:"schema"` 14 | Table string `json:"table"` 15 | } 16 | 17 | func (s *Subscription) IsFor(action, schema, table string) bool { 18 | if action == "" && schema == "" && table == "" { 19 | return true 20 | } 21 | 22 | if action != "" && len(s.EventTypes) > 0 && !slices.Contains(s.EventTypes, action) { 23 | return false 24 | } 25 | 26 | if schema != "" && s.Schema != "" && s.Schema != schema { 27 | return false 28 | } 29 | 30 | if table != "" && s.Table != "" && s.Table != table { 31 | return false 32 | } 33 | 34 | return true 35 | } 36 | 37 | func (s *Subscription) Key() string { 38 | return fmt.Sprintf("%s/%s/%s", s.URL, s.Schema, s.Table) 39 | } 40 | -------------------------------------------------------------------------------- /pkg/wal/processor/webhook/webhook.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package webhook 4 | 5 | import "github.com/xataio/pgstream/pkg/wal" 6 | 7 | type Payload struct { 8 | Data *wal.Data 9 | } 10 | -------------------------------------------------------------------------------- /pkg/wal/replication/mocks/mock_replication_handler.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "context" 7 | "sync/atomic" 8 | 9 | "github.com/xataio/pgstream/pkg/wal/replication" 10 | ) 11 | 12 | type Handler struct { 13 | StartReplicationFn func(context.Context) error 14 | StartReplicationFromLSNFn func(context.Context, replication.LSN) error 15 | ReceiveMessageFn func(context.Context, uint64) (*replication.Message, error) 16 | SyncLSNFn func(context.Context, replication.LSN) error 17 | DropReplicationSlotFn func(ctx context.Context) error 18 | GetLSNParserFn func() replication.LSNParser 19 | GetCurrentLSNFn func(context.Context) (replication.LSN, error) 20 | CloseFn func() error 21 | SyncLSNCalls uint64 22 | ReceiveMessageCalls uint64 23 | } 24 | 25 | func (m *Handler) StartReplication(ctx context.Context) error { 26 | return m.StartReplicationFn(ctx) 27 | } 28 | 29 | func (m *Handler) StartReplicationFromLSN(ctx context.Context, lsn replication.LSN) error { 30 | return m.StartReplicationFromLSNFn(ctx, lsn) 31 | } 32 | 33 | func (m *Handler) ReceiveMessage(ctx context.Context) (*replication.Message, error) { 34 | atomic.AddUint64(&m.ReceiveMessageCalls, 1) 35 | return m.ReceiveMessageFn(ctx, m.GetReceiveMessageCalls()) 36 | } 37 | 38 | func (m *Handler) SyncLSN(ctx context.Context, lsn replication.LSN) error { 39 | atomic.AddUint64(&m.SyncLSNCalls, 1) 40 | return m.SyncLSNFn(ctx, lsn) 41 | } 42 | 43 | func (m *Handler) DropReplicationSlot(ctx context.Context) error { 44 | return m.DropReplicationSlotFn(ctx) 45 | } 46 | 47 | func (m *Handler) GetCurrentLSN(ctx context.Context) (replication.LSN, error) { 48 | return m.GetCurrentLSNFn(ctx) 49 | } 50 | 51 | func (m *Handler) GetLSNParser() replication.LSNParser { 52 | return m.GetLSNParserFn() 53 | } 54 | 55 | func (m *Handler) Close() error { 56 | return m.CloseFn() 57 | } 58 | 59 | func (m *Handler) GetSyncLSNCalls() uint64 { 60 | return atomic.LoadUint64(&m.SyncLSNCalls) 61 | } 62 | 63 | func (m *Handler) GetReceiveMessageCalls() uint64 { 64 | return atomic.LoadUint64(&m.ReceiveMessageCalls) 65 | } 66 | -------------------------------------------------------------------------------- /pkg/wal/replication/mocks/mock_replication_lsn_parser.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package mocks 4 | 5 | import ( 6 | "github.com/xataio/pgstream/pkg/wal/replication" 7 | ) 8 | 9 | type LSNParser struct { 10 | ToStringFn func(replication.LSN) string 11 | FromStringFn func(string) (replication.LSN, error) 12 | } 13 | 14 | func (m *LSNParser) ToString(lsn replication.LSN) string { 15 | return m.ToStringFn(lsn) 16 | } 17 | 18 | func (m *LSNParser) FromString(lsn string) (replication.LSN, error) { 19 | return m.FromStringFn(lsn) 20 | } 21 | -------------------------------------------------------------------------------- /pkg/wal/replication/postgres/helper_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | "time" 9 | ) 10 | 11 | type mockRow struct { 12 | lsn string 13 | lag int64 14 | exists bool 15 | scanFn func(args ...any) error 16 | } 17 | 18 | func (m *mockRow) Scan(args ...any) error { 19 | if m.scanFn != nil { 20 | return m.scanFn(args...) 21 | } 22 | 23 | if len(args) != 1 { 24 | return fmt.Errorf("expected 1 argument, got %d", len(args)) 25 | } 26 | 27 | switch arg := args[0].(type) { 28 | case *string: 29 | *arg = m.lsn 30 | case *int64: 31 | *arg = m.lag 32 | case *bool: 33 | *arg = m.exists 34 | default: 35 | return fmt.Errorf("unexpected argument type in scan: %T", args[0]) 36 | } 37 | 38 | return nil 39 | } 40 | 41 | const ( 42 | testDBName = "test-db" 43 | testSlot = "test_slot" 44 | testLSN = uint64(7773397064) 45 | testLSNStr = "1/CF54A048" 46 | ) 47 | 48 | var ( 49 | errTest = errors.New("oh noes") 50 | 51 | now = time.Now() 52 | ) 53 | -------------------------------------------------------------------------------- /pkg/wal/replication/postgres/pg_lsn_parser.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package postgres 4 | 5 | import ( 6 | "github.com/jackc/pglogrepl" 7 | 8 | "github.com/xataio/pgstream/pkg/wal/replication" 9 | ) 10 | 11 | // LSNParser is the postgres implementation of the replication.LSNParser 12 | type LSNParser struct{} 13 | 14 | func NewLSNParser() *LSNParser { 15 | return &LSNParser{} 16 | } 17 | 18 | func (p *LSNParser) FromString(lsnStr string) (replication.LSN, error) { 19 | lsn, err := pglogrepl.ParseLSN(lsnStr) 20 | if err != nil { 21 | return 0, err 22 | } 23 | return replication.LSN(lsn), nil 24 | } 25 | 26 | func (p *LSNParser) ToString(lsn replication.LSN) string { 27 | return pglogrepl.LSN(lsn).String() 28 | } 29 | -------------------------------------------------------------------------------- /pkg/wal/replication/replication_handler.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package replication 4 | 5 | import ( 6 | "context" 7 | "errors" 8 | "time" 9 | ) 10 | 11 | // Handler manages the replication operations 12 | type Handler interface { 13 | StartReplication(ctx context.Context) error 14 | StartReplicationFromLSN(ctx context.Context, lsn LSN) error 15 | ReceiveMessage(ctx context.Context) (*Message, error) 16 | SyncLSN(ctx context.Context, lsn LSN) error 17 | GetReplicationLag(ctx context.Context) (int64, error) 18 | GetCurrentLSN(ctx context.Context) (LSN, error) 19 | GetLSNParser() LSNParser 20 | Close() error 21 | } 22 | 23 | // Message contains the replication data 24 | type Message struct { 25 | LSN LSN 26 | Data []byte 27 | ServerTime time.Time 28 | ReplyRequested bool 29 | } 30 | 31 | // LSNParser handles the LSN type conversion 32 | type LSNParser interface { 33 | ToString(LSN) string 34 | FromString(string) (LSN, error) 35 | } 36 | 37 | type LSN uint64 38 | 39 | var ErrConnTimeout = errors.New("connection timeout") 40 | -------------------------------------------------------------------------------- /snapshot2pg.env: -------------------------------------------------------------------------------- 1 | # Listener config 2 | PGSTREAM_POSTGRES_SNAPSHOT_LISTENER_URL="postgres://postgres:postgres@localhost?sslmode=disable" 3 | PGSTREAM_POSTGRES_SNAPSHOT_TABLES="test" 4 | PGSTREAM_POSTGRES_SNAPSHOT_SCHEMA_WORKERS=4 5 | PGSTREAM_POSTGRES_SNAPSHOT_TABLE_WORKERS=4 6 | PGSTREAM_POSTGRES_SNAPSHOT_BATCH_PAGE_SIZE=1000 7 | PGSTREAM_POSTGRES_SNAPSHOT_WORKERS=1 8 | 9 | # Processor config 10 | PGSTREAM_TRANSFORMER_RULES_FILE="transformer_rules.yaml" 11 | PGSTREAM_POSTGRES_WRITER_TARGET_URL="postgres://postgres:postgres@localhost:7654?sslmode=disable" 12 | PGSTREAM_POSTGRES_WRITER_BATCH_SIZE=100 13 | PGSTREAM_POSTGRES_WRITER_BATCH_TIMEOUT=5s 14 | PGSTREAM_POSTGRES_WRITER_SCHEMALOG_STORE_URL="postgres://postgres:postgres@localhost?sslmode=disable" 15 | PGSTREAM_POSTGRES_WRITER_DISABLE_TRIGGERS=true 16 | PGSTREAM_POSTGRES_WRITER_ON_CONFLICT_ACTION="nothing" 17 | -------------------------------------------------------------------------------- /snapshot2pg.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" 4 | mode: snapshot # options are replication, snapshot or snapshot_and_replication 5 | snapshot: # when mode is snapshot or snapshot_and_replication 6 | mode: full # options are data_and, schema or data 7 | tables: ["*"] # tables to snapshot, can be a list of table names or a pattern 8 | recorder: 9 | postgres_url: "postgres://postgres:postgres@localhost:5432?sslmode=disable" # URL of the database where the snapshot status is recorded 10 | snapshot_workers: 1 # number of schemas to be snapshotted in parallel 11 | data: # when mode is full or data 12 | schema_workers: 4 # number of schema tables to be snapshotted in parallel 13 | table_workers: 4 # number of workers to snapshot a table in parallel 14 | batch_page_size: 1000 # number of pages to read per batch 15 | schema: # when mode is full or schema 16 | mode: pgdump_pgrestore # options are pgdump_pgrestore or schemalog 17 | pgdump_pgrestore: 18 | clean_target_db: true # whether to clean the target database before restoring 19 | 20 | target: 21 | postgres: 22 | url: "postgres://postgres:postgres@localhost:7654?sslmode=disable" 23 | batch: 24 | timeout: 5000 # batch timeout in milliseconds 25 | size: 100 # number of messages in a batch 26 | disable_triggers: false # whether to disable triggers on the target database 27 | on_conflict_action: "nothing" # options are update, nothing or error 28 | 29 | modifiers: 30 | injector: 31 | enabled: false # whether to inject pgstream metadata into the WAL events 32 | transformations: 33 | validation_mode: relaxed 34 | table_transformers: 35 | - schema: public 36 | table: test 37 | column_transformers: 38 | name: 39 | name: greenmask_firstname 40 | dynamic_parameters: 41 | gender: 42 | column: sex 43 | -------------------------------------------------------------------------------- /tools/webhook/webhook_server.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | 3 | package main 4 | 5 | import ( 6 | "bytes" 7 | "encoding/json" 8 | "flag" 9 | "fmt" 10 | "io" 11 | "net/http" 12 | "os" 13 | "time" 14 | 15 | "github.com/xataio/pgstream/internal/log/zerolog" 16 | loglib "github.com/xataio/pgstream/pkg/log" 17 | ) 18 | 19 | var logger loglib.Logger 20 | 21 | func main() { 22 | address := flag.String("address", ":9910", "Webhook server address") 23 | logLevel := flag.String("log-level", "debug", "Webhook server log level") 24 | flag.Parse() 25 | 26 | logger = zerolog.NewStdLogger(zerolog.NewLogger(&zerolog.Config{ 27 | LogLevel: *logLevel, 28 | })) 29 | 30 | mux := http.NewServeMux() 31 | mux.HandleFunc("/webhook", processWebhook) 32 | 33 | server := &http.Server{ 34 | Handler: mux, 35 | Addr: *address, 36 | ReadTimeout: 5 * time.Second, 37 | WriteTimeout: 5 * time.Second, 38 | } 39 | 40 | logger.Info(fmt.Sprintf("listening on %s...", *address)) 41 | if err := server.ListenAndServe(); err != nil { 42 | logger.Error(err, "listening on http server", loglib.Fields{"address": *address}) 43 | os.Exit(1) 44 | } 45 | } 46 | 47 | func processWebhook(w http.ResponseWriter, r *http.Request) { 48 | if r.Method != http.MethodPost { 49 | http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) 50 | return 51 | } 52 | 53 | logger.Debug("got /webhook request") 54 | 55 | bodyBytes, err := io.ReadAll(r.Body) 56 | if err != nil { 57 | http.Error(w, err.Error(), http.StatusBadRequest) 58 | return 59 | } 60 | defer r.Body.Close() 61 | 62 | var prettyJSON bytes.Buffer 63 | if err = json.Indent(&prettyJSON, bodyBytes, "", " "); err != nil { 64 | http.Error(w, err.Error(), http.StatusBadRequest) 65 | return 66 | } 67 | logger.Info(prettyJSON.String()) 68 | 69 | w.WriteHeader(http.StatusOK) 70 | } 71 | -------------------------------------------------------------------------------- /transformer_rules.yaml: -------------------------------------------------------------------------------- 1 | transformations: 2 | validation_mode: relaxed 3 | table_transformers: 4 | - schema: public 5 | table: test 6 | column_transformers: 7 | name: 8 | name: greenmask_firstname 9 | dynamic_parameters: 10 | gender: 11 | column: sex 12 | --------------------------------------------------------------------------------