├── .circleci └── config.yml ├── .github └── workflows │ └── go-binary-release.yml ├── .gitignore ├── Dockerfile ├── Dockerfile.build ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── cmd ├── agent.go ├── agent_test.go ├── configure.go ├── controller.go ├── gateway.go ├── pg2pulsar.go ├── pulsar2pg.go ├── root.go └── version.go ├── docker-compose.yml ├── dockerbuild.sh ├── example ├── client │ └── main.go ├── const.go ├── generator │ └── main.go ├── reset │ └── main.go ├── schedule │ └── main.go ├── server │ └── main.go └── stop │ └── main.go ├── go.mod ├── go.sum ├── hack ├── images │ └── overview.png └── postgres │ ├── 11 │ └── Dockerfile │ ├── 12 │ └── Dockerfile │ ├── 13 │ └── Dockerfile │ ├── 14 │ ├── Dockerfile │ └── pglogical │ │ ├── new │ │ └── pglogical_output_plugin.c │ │ ├── old │ │ └── pglogical_output_plugin.c │ │ └── pglogical.patch │ ├── 15 │ ├── Dockerfile │ └── pglogical │ │ ├── new │ │ └── pglogical_output_plugin.c │ │ ├── old │ │ └── pglogical_output_plugin.c │ │ └── pglogical.patch │ ├── 16 │ ├── Dockerfile │ └── pglogical │ │ ├── new │ │ └── pglogical_output_plugin.c │ │ ├── old │ │ └── pglogical_output_plugin.c │ │ └── pglogical.patch │ ├── 17 │ ├── Dockerfile │ └── pglogical │ │ ├── new │ │ └── pglogical_output_plugin.c │ │ ├── old │ │ └── pglogical_output_plugin.c │ │ └── pglogical.patch │ ├── 96 │ └── Dockerfile │ ├── dockerbuild.sh │ ├── extension │ ├── LICENSE │ ├── Makefile │ ├── expected │ │ ├── 00_setup.out │ │ ├── 01_basic.out │ │ ├── 02_nested_ddl.out │ │ ├── 03_multi_query.out │ │ └── 04_temp.out │ ├── make.sh │ ├── pg_import.c │ ├── pg_import.h │ ├── pgcapture--0.1.sql │ ├── pgcapture.c │ ├── pgcapture.control │ ├── pgcapture.h │ └── sql │ │ ├── 00_setup.sql │ │ ├── 01_basic.sql │ │ ├── 02_nested_ddl.sql │ │ ├── 03_multi_query.sql │ │ └── 04_temp.sql │ ├── pg_hba.conf │ └── postgresql.conf ├── internal ├── cursormock │ ├── main.go │ └── mock.go ├── test │ ├── env.go │ ├── pg.go │ └── pulsar.go └── tool │ └── main.go ├── pb └── pgcapture.proto ├── pgcapture.go ├── pkg ├── cursor │ ├── main.go │ ├── main_test.go │ ├── pulsar.go │ ├── pulsar_sub.go │ ├── pulsar_sub_test.go │ ├── pulsar_test.go │ └── tracker.go ├── dblog │ ├── control.go │ ├── control_test.go │ ├── dumper.go │ ├── dumper_test.go │ ├── gateway.go │ ├── gateway_test.go │ ├── puller.go │ ├── puller_test.go │ ├── resolver.go │ ├── resolver_test.go │ ├── scheduler.go │ └── scheduler_test.go ├── decode │ ├── bytes.go │ ├── bytes_test.go │ ├── decoder.go │ ├── decoder_test.go │ ├── main_test.go │ ├── pglogical.go │ ├── pglogical_test.go │ ├── pgoutput.go │ ├── pgoutput_test.go │ ├── schema.go │ └── schema_test.go ├── pb │ ├── pgcapture.pb.go │ └── pgcapture_grpc.pb.go ├── pgcapture │ ├── consumer.go │ ├── consumer_test.go │ ├── dblog.go │ ├── debounce.go │ ├── json.go │ ├── json_test.go │ └── reflect.go ├── sink │ ├── main.go │ ├── main_test.go │ ├── postgres.go │ ├── postgres_test.go │ ├── pulsar.go │ └── pulsar_test.go ├── source │ ├── main.go │ ├── main_test.go │ ├── postgres.go │ ├── postgres_test.go │ ├── pulsar.go │ └── pulsar_test.go └── sql │ ├── builder.go │ ├── builder_test.go │ └── source.go └── python ├── README.md ├── example.py ├── pb ├── pgcapture_pb2.py └── pgcapture_pb2_grpc.py ├── pgcapture ├── __init__.py ├── client.py └── decoders.py └── setup.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | 4 | executors: 5 | default: 6 | machine: 7 | image: ubuntu-2204:2024.05.1 8 | 9 | commands: 10 | test-with-pg: 11 | parameters: 12 | version: 13 | type: integer 14 | default: 11 15 | steps: 16 | - run: curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter 17 | - run: chmod +x ./cc-test-reporter 18 | - run: ./cc-test-reporter before-build 19 | - run: PG_VERSION=<< parameters.version >> ./dockerbuild.sh test 20 | - run: ./cc-test-reporter after-build -p $(go list -m) --exit-code $? 21 | 22 | jobs: 23 | test-pg-11: 24 | executor: default 25 | steps: 26 | - checkout 27 | - test-with-pg: 28 | version: 11 29 | test-pg-12: 30 | executor: default 31 | steps: 32 | - checkout 33 | - test-with-pg: 34 | version: 12 35 | test-pg-13: 36 | executor: default 37 | steps: 38 | - checkout 39 | - test-with-pg: 40 | version: 13 41 | test-pg-14: 42 | executor: default 43 | steps: 44 | - checkout 45 | - test-with-pg: 46 | version: 14 47 | test-pg-15: 48 | executor: default 49 | steps: 50 | - checkout 51 | - test-with-pg: 52 | version: 15 53 | test-pg-16: 54 | executor: default 55 | steps: 56 | - checkout 57 | - test-with-pg: 58 | version: 16 59 | test-pg-17: 60 | executor: default 61 | steps: 62 | - checkout 63 | - test-with-pg: 64 | version: 17 65 | 66 | workflows: 67 | test: 68 | jobs: 69 | - test-pg-11 70 | - test-pg-12 71 | - test-pg-13 72 | - test-pg-14 73 | - test-pg-15 74 | - test-pg-16 75 | - test-pg-17 76 | -------------------------------------------------------------------------------- /.github/workflows/go-binary-release.yml: -------------------------------------------------------------------------------- 1 | name: Go Binary Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*.*.*" 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v3 17 | - name: Build 18 | run: ./dockerbuild.sh build && sudo mv ./bin/out/pgcapture ./bin/out/pgcapture-linux-amd64 19 | - name: Release 20 | uses: softprops/action-gh-release@v1 21 | with: 22 | files: ./bin/out/pgcapture-linux-amd64 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.o 3 | *.so 4 | __pycache__ 5 | bin 6 | results/ 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/base-debian10 2 | 3 | COPY bin/out / 4 | 5 | ENTRYPOINT ["/pgcapture"] 6 | -------------------------------------------------------------------------------- /Dockerfile.build: -------------------------------------------------------------------------------- 1 | FROM golang:1.20-bullseye 2 | 3 | RUN apt-get update && apt-get install -y python3 python3-pip 4 | RUN python3 -m pip install grpcio-tools 5 | RUN git config --global --add safe.directory /src 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PATH := ${CURDIR}/bin:$(PATH) 2 | go_exe = $(shell go env GOEXE) 3 | protoc_version = 3.19.1 4 | protoc_arch = x86_64 5 | 6 | ifeq ($(shell uname -s),Darwin) 7 | protoc_os = osx 8 | else 9 | protoc_os = linux 10 | 11 | ifeq ($(shell uname -m),aarch64) 12 | protoc_arch = aarch_64 13 | endif 14 | endif 15 | 16 | .PHONY: build 17 | build: 18 | go build \ 19 | -ldflags="-X github.com/replicase/pgcapture.CommitSHA=${PGCAPTURE_SHA} -X github.com/replicase/pgcapture.Version=${PGCAPTURE_VERSION}" \ 20 | -x -o bin/out/pgcapture ./cmd 21 | 22 | .PHONY: test 23 | test: 24 | go test -v -race -p 1 -coverprofile=./c.out ./... 25 | 26 | .PHONY: codegen 27 | codegen: bin/protoc-gen-go$(go_exe) bin/protoc-gen-go-grpc$(go_exe) bin/mockgen$(go_exe) proto generate 28 | 29 | PHONY: proto 30 | proto: bin/protoc 31 | protoc --go_out=pkg --go_opt=paths=source_relative --go-grpc_out=pkg --go-grpc_opt=paths=source_relative pb/*.proto 32 | python3 -m grpc_tools.protoc -I. --python_out=./python --grpc_python_out=./python pb/*.proto 33 | 34 | .PHONY: generate 35 | generate: 36 | go generate ./... 37 | 38 | bin/mockgen$(go_exe): go.sum 39 | go build -o $@ github.com/golang/mock/mockgen 40 | 41 | bin/protoc-gen-go$(go_exe): go.sum 42 | go build -o $@ google.golang.org/protobuf/cmd/protoc-gen-go 43 | 44 | bin/protoc-gen-go-grpc$(go_exe): go.sum 45 | go build -o $@ google.golang.org/grpc/cmd/protoc-gen-go-grpc 46 | 47 | bin/protoc-$(protoc_version).zip: 48 | mkdir -p $(dir $@) 49 | curl -o $@ --location https://github.com/protocolbuffers/protobuf/releases/download/v$(protoc_version)/protoc-$(protoc_version)-$(protoc_os)-$(protoc_arch).zip 50 | 51 | bin/protoc-$(protoc_version): bin/protoc-$(protoc_version).zip 52 | mkdir -p $@ 53 | unzip -d $@ -o $< 54 | 55 | bin/protoc: bin/protoc-$(protoc_version) 56 | ln -s -f ./protoc-$(protoc_version)/bin/protoc $@ 57 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | pgcapture 2 | Copyright 2021 Rueian (https://github.com/rueian) 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /cmd/agent_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | 7 | "google.golang.org/protobuf/types/known/structpb" 8 | ) 9 | 10 | func TestExtract_EmptyParams(t *testing.T) { 11 | actual, err := extract(&structpb.Struct{}, "a", "b", "c") 12 | if actual != nil { 13 | t.Errorf("actual: %v, want: nil", actual) 14 | } 15 | if err.Error() != "a key is required in parameters" { 16 | t.Errorf("unexpected error: %v", err) 17 | } 18 | } 19 | 20 | func TestExtract_MissingRequiredParams(t *testing.T) { 21 | params, err := structpb.NewStruct(map[string]interface{}{ 22 | "a": "foo", 23 | "b": "bar", 24 | }) 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | 29 | actual, err := extract(params, "a", "b", "c") 30 | if actual != nil { 31 | t.Errorf("actual: %v, want: nil", actual) 32 | } 33 | if err.Error() != "c key is required in parameters" { 34 | t.Errorf("unexpected error: %v", err) 35 | } 36 | } 37 | 38 | func TestExtract_WithRequiredParams(t *testing.T) { 39 | params, err := structpb.NewStruct(map[string]interface{}{ 40 | "a": "foo", 41 | "b": 123, 42 | "c": true, 43 | }) 44 | if err != nil { 45 | t.Fatal(err) 46 | } 47 | 48 | actual, err := extract(params, "a", "b", "c") 49 | if err != nil { 50 | t.Fatal(err) 51 | } 52 | 53 | expected := map[string]*structpb.Value{"a": structpb.NewStringValue("foo"), "b": structpb.NewNumberValue(123), "c": structpb.NewBoolValue(true)} 54 | if !reflect.DeepEqual(actual, expected) { 55 | t.Errorf("actual: %v, want: %v", actual, expected) 56 | } 57 | } 58 | 59 | func TestExtract_MissingOptionalParams(t *testing.T) { 60 | params, err := structpb.NewStruct(map[string]interface{}{ 61 | "a": "foo", 62 | "b": "bar", 63 | "c": "", 64 | }) 65 | if err != nil { 66 | t.Fatal(err) 67 | } 68 | 69 | actual, err := extract(params, "a", "b", "?c", "?d") 70 | if err != nil { 71 | t.Fatal(err) 72 | } 73 | 74 | // c is zero value, but it's optional, so it's value is an empty string 75 | // d is not present, but it's optional, so it's value is nil 76 | expected := map[string]*structpb.Value{"a": structpb.NewStringValue("foo"), "b": structpb.NewStringValue("bar"), "c": structpb.NewStringValue(""), "d": nil} 77 | if !reflect.DeepEqual(actual, expected) { 78 | t.Errorf("actual: %v, want: %v", actual, expected) 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /cmd/configure.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "strconv" 7 | "time" 8 | 9 | "github.com/replicase/pgcapture/pkg/decode" 10 | "github.com/replicase/pgcapture/pkg/pb" 11 | "github.com/spf13/cobra" 12 | "google.golang.org/grpc" 13 | "google.golang.org/protobuf/types/known/structpb" 14 | ) 15 | 16 | var ( 17 | AgentAddr string 18 | AgentCommand string 19 | ConfigPGConnURL string 20 | ConfigPGReplURL string 21 | ConfigPulsarURL string 22 | ConfigPulsarTopic string 23 | ConfigPGLogPath string 24 | ConfigStartLSN string 25 | ConfigPulsarTracker string 26 | ConfigPulsarTrackerInterval string 27 | ConfigPulsarTrackerReplicateState bool 28 | ConfigDecodePlugin string 29 | ConfigBatchTXSize int 30 | ) 31 | 32 | func init() { 33 | rootCmd.AddCommand(configure) 34 | configure.Flags().StringVarP(&AgentAddr, "AgentAddr", "", "", "connection addr to pgcapture agent") 35 | configure.Flags().StringVarP(&AgentCommand, "AgentCommand", "", "", "agent command to configure") 36 | configure.Flags().StringVarP(&ConfigPGConnURL, "PGConnURL", "", "", "connection url to install pg extension and fetching schema information") 37 | configure.Flags().StringVarP(&ConfigPGReplURL, "PGReplURL", "", "", "connection url to fetching logs from logical replication slot") 38 | configure.Flags().StringVarP(&ConfigPulsarURL, "PulsarURL", "", "", "connection url to sink pulsar cluster") 39 | configure.Flags().StringVarP(&ConfigPulsarTopic, "PulsarTopic", "", "", "the sink pulsar topic name and as well as the logical replication slot name") 40 | configure.Flags().StringVarP(&ConfigPGLogPath, "PGLogPath", "", "", "pg log path for finding last checkpoint lsn") 41 | configure.Flags().StringVarP(&ConfigStartLSN, "StartLSN", "", "", "the LSN position to start the pg2pulsar process, optional") 42 | configure.Flags().StringVarP(&ConfigPulsarTracker, "PulsarTracker", "", "", "the tracker type for pg2pulsar, optional") 43 | configure.Flags().StringVarP(&ConfigPulsarTrackerInterval, "PulsarTrackerInterval", "", "", "the commit interval for the pg2pulsar, optional") 44 | configure.Flags().BoolVarP(&ConfigPulsarTrackerReplicateState, "PulsarTrackerReplicateState", "", false, "the replicate state for the pg2pulsar, optional") 45 | configure.Flags().StringVarP(&ConfigDecodePlugin, "DecodePlugin", "", decode.PGOutputPlugin, "the logical decoding plugin name") 46 | configure.Flags().IntVarP(&ConfigBatchTXSize, "BatchTxSize", "", 100, "the max number of tx in a pipeline") 47 | configure.MarkFlagRequired("AgentAddr") 48 | configure.MarkFlagRequired("AgentCommand") 49 | configure.MarkFlagRequired("PGConnURL") 50 | configure.MarkFlagRequired("PulsarURL") 51 | configure.MarkFlagRequired("PulsarTopic") 52 | } 53 | 54 | var configure = &cobra.Command{ 55 | Use: "configure", 56 | Short: "Poke agent's Configure endpoint repeatedly", 57 | RunE: func(cmd *cobra.Command, args []string) (err error) { 58 | params, err := structpb.NewStruct(map[string]interface{}{ 59 | "Command": AgentCommand, 60 | "PGConnURL": ConfigPGConnURL, 61 | "PGReplURL": ConfigPGReplURL, 62 | "PulsarURL": ConfigPulsarURL, 63 | "PulsarTopic": ConfigPulsarTopic, 64 | "PGLogPath": ConfigPGLogPath, 65 | "StartLSN": ConfigStartLSN, 66 | "PulsarTracker": ConfigPulsarTracker, 67 | "PulsarTrackerInterval": ConfigPulsarTrackerInterval, 68 | "PulsarTrackerReplicateState": strconv.FormatBool(ConfigPulsarTrackerReplicateState), 69 | "DecodePlugin": ConfigDecodePlugin, 70 | "BatchTxSize": ConfigBatchTXSize, 71 | }) 72 | if err != nil { 73 | panic(err) 74 | } 75 | 76 | for { 77 | if err := poke(AgentAddr, params); err != nil { 78 | log.Println("Err", err) 79 | } 80 | 81 | time.Sleep(5 * time.Second) 82 | } 83 | }, 84 | } 85 | 86 | func poke(addr string, params *structpb.Struct) error { 87 | conn, err := grpc.Dial(addr, grpc.WithInsecure(), grpc.WithBlock()) 88 | if err != nil { 89 | return err 90 | } 91 | defer conn.Close() 92 | 93 | client := pb.NewAgentClient(conn) 94 | 95 | resp, err := client.Configure(context.Background(), &pb.AgentConfigRequest{Parameters: params}) 96 | if err != nil { 97 | return err 98 | } 99 | 100 | log.Println("Success", resp.String()) 101 | return nil 102 | } 103 | -------------------------------------------------------------------------------- /cmd/controller.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/replicase/pgcapture/pkg/dblog" 7 | "github.com/replicase/pgcapture/pkg/pb" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var ( 12 | ControllerListenAddr string 13 | ) 14 | 15 | func init() { 16 | rootCmd.AddCommand(controller) 17 | controller.Flags().StringVarP(&ControllerListenAddr, "ListenAddr", "", ":10000", "the tcp address for grpc server to listen") 18 | } 19 | 20 | var controller = &cobra.Command{ 21 | Use: "controller", 22 | Short: "grpc api for controlling the dump process", 23 | RunE: func(cmd *cobra.Command, args []string) (err error) { 24 | control := dblog.NewController(dblog.NewMemoryScheduler(time.Millisecond * 100)) 25 | return serveGRPC(&pb.DBLogController_ServiceDesc, ControllerListenAddr, control, func() {}) 26 | }, 27 | } 28 | -------------------------------------------------------------------------------- /cmd/gateway.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/replicase/pgcapture/pkg/dblog" 7 | "github.com/replicase/pgcapture/pkg/pb" 8 | "github.com/spf13/cobra" 9 | "google.golang.org/grpc" 10 | ) 11 | 12 | var ( 13 | GatewayListenAddr string 14 | ControllerAddr string 15 | ResolverConfig string 16 | ) 17 | 18 | func init() { 19 | rootCmd.AddCommand(gateway) 20 | gateway.Flags().StringVarP(&GatewayListenAddr, "ListenAddr", "", ":10001", "the tcp address for grpc server to listen") 21 | gateway.Flags().StringVarP(&ControllerAddr, "ControllerAddr", "", "127.0.0.1:10000", "the tcp address of dblog controller") 22 | gateway.Flags().StringVarP(&ResolverConfig, "ResolverConfig", "", "", "json config for resolving where is the source pulsar and where is the dump source") 23 | gateway.MarkFlagRequired("ResolverConfig") 24 | } 25 | 26 | var gateway = &cobra.Command{ 27 | Use: "gateway", 28 | Short: "grpc api for downstream to consume changes and dumps", 29 | RunE: func(cmd *cobra.Command, args []string) (err error) { 30 | resolverConfig := map[string]dblog.StaticAgentPulsarURIConfig{} 31 | if err = json.Unmarshal([]byte(ResolverConfig), &resolverConfig); err != nil { 32 | return err 33 | } 34 | controlConn, err := grpc.Dial(ControllerAddr, grpc.WithInsecure()) 35 | if err != nil { 36 | return err 37 | } 38 | gateway := &dblog.Gateway{ 39 | SourceResolver: dblog.NewStaticAgentPulsarResolver(resolverConfig), 40 | DumpInfoPuller: &dblog.GRPCDumpInfoPuller{Client: pb.NewDBLogControllerClient(controlConn)}, 41 | } 42 | return serveGRPC(&pb.DBLogGateway_ServiceDesc, GatewayListenAddr, gateway, func() {}) 43 | }, 44 | } 45 | -------------------------------------------------------------------------------- /cmd/pg2pulsar.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/apache/pulsar-client-go/pulsar" 5 | "github.com/replicase/pgcapture/pkg/decode" 6 | "github.com/replicase/pgcapture/pkg/sink" 7 | "github.com/replicase/pgcapture/pkg/source" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var ( 12 | SourcePGConnURL string 13 | SourcePGReplURL string 14 | SinkPulsarURL string 15 | SinkPulsarTopic string 16 | DecodePlugin string 17 | ) 18 | 19 | func init() { 20 | rootCmd.AddCommand(pg2pulsar) 21 | pg2pulsar.Flags().StringVarP(&SourcePGConnURL, "PGConnURL", "", "", "connection url to install pg extension and fetching schema information") 22 | pg2pulsar.Flags().StringVarP(&SourcePGReplURL, "PGReplURL", "", "", "connection url to fetching logs from logical replication slot") 23 | pg2pulsar.Flags().StringVarP(&SinkPulsarURL, "PulsarURL", "", "", "connection url to sink pulsar cluster") 24 | pg2pulsar.Flags().StringVarP(&SinkPulsarTopic, "PulsarTopic", "", "", "the sink pulsar topic name and as well as the logical replication slot name") 25 | pg2pulsar.Flags().StringVar(&DecodePlugin, "DecodePlugin", decode.PGOutputPlugin, "the logical decoding plugin name") 26 | pg2pulsar.MarkFlagRequired("PGConnURL") 27 | pg2pulsar.MarkFlagRequired("PGReplURL") 28 | pg2pulsar.MarkFlagRequired("PulsarURL") 29 | pg2pulsar.MarkFlagRequired("PulsarTopic") 30 | } 31 | 32 | var pg2pulsar = &cobra.Command{ 33 | Use: "pg2pulsar", 34 | Short: "Capture logical replication logs to a Pulsar Topic from a PostgreSQL logical replication slot", 35 | RunE: func(cmd *cobra.Command, args []string) (err error) { 36 | pgSrc := &source.PGXSource{SetupConnStr: SourcePGConnURL, ReplConnStr: SourcePGReplURL, ReplSlot: trimSlot(SinkPulsarTopic), CreateSlot: true, CreatePublication: true, DecodePlugin: DecodePlugin} 37 | pulsarSink := &sink.PulsarSink{PulsarOption: pulsar.ClientOptions{URL: SinkPulsarURL}, PulsarTopic: SinkPulsarTopic} 38 | return sourceToSink(pgSrc, pulsarSink) 39 | }, 40 | } 41 | -------------------------------------------------------------------------------- /cmd/pulsar2pg.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/apache/pulsar-client-go/pulsar" 7 | "github.com/replicase/pgcapture/pkg/sink" 8 | "github.com/replicase/pgcapture/pkg/source" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var ( 13 | SinkPGConnURL string 14 | SinkPGLogPath string 15 | SourcePulsarURL string 16 | SourcePulsarTopic string 17 | Renice int64 18 | BatchTXSize int 19 | ) 20 | 21 | func init() { 22 | rootCmd.AddCommand(pulsar2pg) 23 | pulsar2pg.Flags().StringVarP(&SinkPGConnURL, "PGConnURL", "", "", "connection url to install pg extension and fetching schema information") 24 | pulsar2pg.Flags().StringVarP(&SinkPGLogPath, "PGLogPath", "", "", "pg log path for finding last checkpoint lsn") 25 | pulsar2pg.Flags().StringVarP(&SourcePulsarURL, "PulsarURL", "", "", "connection url to sink pulsar cluster") 26 | pulsar2pg.Flags().StringVarP(&SourcePulsarTopic, "PulsarTopic", "", "", "the sink pulsar topic name and as well as the logical replication slot name") 27 | pulsar2pg.Flags().Int64VarP(&Renice, "Renice", "", -10, "try renice the sink pg process") 28 | pulsar2pg.Flags().IntVarP(&BatchTXSize, "BatchTxSize", "", 100, "the max number of tx in a pipeline") 29 | pulsar2pg.MarkFlagRequired("PGConnURL") 30 | pulsar2pg.MarkFlagRequired("PulsarURL") 31 | pulsar2pg.MarkFlagRequired("PulsarTopic") 32 | } 33 | 34 | var pulsar2pg = &cobra.Command{ 35 | Use: "pulsar2pg", 36 | Short: "Apply logical replication logs to a PostgreSQL from a Pulsar Topic", 37 | RunE: func(cmd *cobra.Command, args []string) (err error) { 38 | pgSink := &sink.PGXSink{ConnStr: SinkPGConnURL, SourceID: trimSlot(SourcePulsarTopic), Renice: Renice, LogReader: nil, BatchTXSize: BatchTXSize} 39 | if SinkPGLogPath != "" { 40 | pgLog, err := os.Open(SinkPGLogPath) 41 | if err != nil { 42 | return err 43 | } 44 | defer pgLog.Close() 45 | pgSink.LogReader = pgLog 46 | } 47 | pulsarSrc := &source.PulsarReaderSource{PulsarOption: pulsar.ClientOptions{URL: SourcePulsarURL}, PulsarTopic: SourcePulsarTopic} 48 | return sourceToSink(pulsarSrc, pgSink) 49 | }, 50 | } 51 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/http" 8 | _ "net/http/pprof" 9 | "os" 10 | "os/signal" 11 | "strings" 12 | "syscall" 13 | 14 | "github.com/prometheus/client_golang/prometheus/promhttp" 15 | "github.com/replicase/pgcapture/pkg/sink" 16 | "github.com/replicase/pgcapture/pkg/source" 17 | "github.com/sirupsen/logrus" 18 | "github.com/spf13/cobra" 19 | "google.golang.org/grpc" 20 | ) 21 | 22 | var ProfilerListenAddr string 23 | 24 | func init() { 25 | rootCmd.Flags().StringVarP(&ProfilerListenAddr, "ProfilerListenAddr", "", "localhost:6060", "golang profiler http endpoint") 26 | } 27 | 28 | var rootCmd = &cobra.Command{ 29 | Use: "pgcapture", 30 | PersistentPreRun: func(cmd *cobra.Command, args []string) { 31 | go func() { 32 | if ProfilerListenAddr != "" { 33 | log.Println(http.ListenAndServe(ProfilerListenAddr, nil)) 34 | } 35 | }() 36 | }, 37 | } 38 | 39 | func main() { 40 | if err := rootCmd.Execute(); err != nil { 41 | fmt.Fprintln(os.Stderr, err) 42 | os.Exit(1) 43 | } 44 | } 45 | 46 | func sourceToSink(src source.Source, sk sink.Sink) (err error) { 47 | signals := make(chan os.Signal, 1) 48 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 49 | 50 | lastCheckPoint, err := sk.Setup() 51 | if err != nil { 52 | return err 53 | } 54 | 55 | changes, err := src.Capture(lastCheckPoint) 56 | if err != nil { 57 | return err 58 | } 59 | 60 | go func() { 61 | checkpoints := sk.Apply(changes) 62 | for cp := range checkpoints { 63 | src.Commit(cp) 64 | } 65 | }() 66 | 67 | <-signals 68 | logrus.Info("receive signal, stopping...") 69 | sk.Stop() 70 | src.Stop() 71 | logrus.Info("receive signal, stopped") 72 | if err := sk.Error(); err != nil { 73 | return err 74 | } 75 | if err := src.Error(); err != nil { 76 | return err 77 | } 78 | return nil 79 | } 80 | 81 | func serveGRPC(desc *grpc.ServiceDesc, addr string, impl interface{}, clean func()) (err error) { 82 | lis, err := net.Listen("tcp", addr) 83 | if err != nil { 84 | return err 85 | } 86 | 87 | server := grpc.NewServer() 88 | server.RegisterService(desc, impl) 89 | 90 | go func() { 91 | signals := make(chan os.Signal, 1) 92 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 93 | <-signals 94 | logrus.Info("receive signal, stopping grpc server...") 95 | server.GracefulStop() 96 | logrus.Info("receive signal, cleaning up...") 97 | clean() 98 | logrus.Info("receive signal, stopped") 99 | }() 100 | 101 | return server.Serve(lis) 102 | } 103 | 104 | func trimSlot(topic string) string { 105 | topic = strings.TrimPrefix(topic, "persistent://public/") 106 | topic = strings.ReplaceAll(topic, "/", "_") 107 | topic = strings.ReplaceAll(topic, "-", "_") 108 | return topic 109 | } 110 | 111 | func startPrometheusServer(addr string) { 112 | handler := promhttp.Handler() 113 | server := &http.Server{ 114 | Addr: addr, 115 | Handler: http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 116 | if req.Method == http.MethodGet && req.URL.Path == "/metrics" { 117 | handler.ServeHTTP(w, req) 118 | } else { 119 | http.NotFound(w, req) 120 | } 121 | }), 122 | } 123 | 124 | logrus.WithFields(logrus.Fields{"addr": addr}).Info("starting prometheus server") 125 | go func() { 126 | if err := server.ListenAndServe(); err != nil { 127 | logrus.Error(err) 128 | } 129 | }() 130 | } 131 | -------------------------------------------------------------------------------- /cmd/version.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/replicase/pgcapture" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | func init() { 11 | rootCmd.AddCommand(version) 12 | } 13 | 14 | var version = &cobra.Command{ 15 | Use: "version", 16 | Short: "git commit version", 17 | RunE: func(cmd *cobra.Command, args []string) (err error) { 18 | fmt.Printf("version: %s (%s)", pgcapture.Version, pgcapture.CommitSHA) 19 | return nil 20 | }, 21 | } 22 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | 3 | x-common-pg: &common-pg 4 | build: 5 | context: hack/postgres 6 | dockerfile: ${PG_VERSION}/Dockerfile 7 | platform: ${PLATFORM} 8 | ports: 9 | - "5432:5432" 10 | command: [ "postgres", "-c", "config_file=/pgc/postgresql.conf", "-c","hba_file=/pgc/pg_hba.conf" ] 11 | environment: 12 | POSTGRES_HOST_AUTH_METHOD: trust 13 | volumes: 14 | - ./hack/postgres:/pgc 15 | 16 | x-common: &common 17 | build: 18 | context: . 19 | dockerfile: Dockerfile.build 20 | working_dir: /src 21 | volumes: 22 | - .:/src 23 | - ${LOCAL_GOPATH}/pkg/mod/cache:/go/pkg/mod/cache 24 | 25 | x-common-test-env: &common-test-env 26 | PG_VERSION: ${PG_VERSION} 27 | POSTGRES_URL: postgres://postgres@postgres:5432/postgres?sslmode=disable 28 | PULSAR_URL: pulsar://pulsar:6650 29 | PULSAR_ADMIN_URL: http://pulsar:8080 30 | 31 | x-common-test: &common-test 32 | <<: *common 33 | environment: *common-test-env 34 | depends_on: 35 | - pg_${PG_VERSION} 36 | - pulsar 37 | 38 | x-common-build: &common-build 39 | <<: *common 40 | environment: 41 | PGCAPTURE_SHA: ${PGCAPTURE_SHA} 42 | PGCAPTURE_VERSION: ${PGCAPTURE_VERSION} 43 | 44 | services: 45 | pg_11: 46 | <<: *common-pg 47 | image: replicase/postgres:11-logical 48 | container_name: postgres11 49 | hostname: postgres 50 | pg_12: 51 | <<: *common-pg 52 | image: replicase/postgres:12-logical 53 | container_name: postgres12 54 | hostname: postgres 55 | pg_13: 56 | <<: *common-pg 57 | image: replicase/postgres:13-logical 58 | container_name: postgres13 59 | hostname: postgres 60 | pg_14: 61 | <<: *common-pg 62 | image: replicase/postgres:14-logical 63 | container_name: postgres14 64 | hostname: postgres 65 | pg_15: 66 | <<: *common-pg 67 | image: replicase/postgres:15-logical 68 | container_name: postgres15 69 | hostname: postgres 70 | pg_16: 71 | <<: *common-pg 72 | image: replicase/postgres:16-logical 73 | container_name: postgres16 74 | hostname: postgres 75 | pg_17: 76 | <<: *common-pg 77 | image: replicase/postgres:17-logical 78 | container_name: postgres17 79 | hostname: postgres 80 | pulsar: 81 | image: apachepulsar/pulsar:2.10.4 82 | container_name: pulsar 83 | platform: ${PLATFORM} 84 | command: ["bin/pulsar", "standalone"] 85 | ports: 86 | - 6650:6650 87 | - 8080:8080 88 | build: 89 | <<: *common-build 90 | command: [ "make" ] 91 | pgcapture: 92 | image: replicase/pgcapture:latest 93 | build: 94 | context: . 95 | test: 96 | <<: *common-test 97 | command: [ "make", "test" ] 98 | test-deps: 99 | image: dadarek/wait-for-dependencies 100 | depends_on: 101 | - pg_${PG_VERSION} 102 | - pulsar 103 | command: [ "pulsar:6650", "pulsar:8080", "postgres:5432" ] 104 | codegen: 105 | <<: *common 106 | command: [ "make", "codegen" ] 107 | 108 | -------------------------------------------------------------------------------- /dockerbuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | export LOCAL_GOPATH=$(echo "${GOPATH:-~/go}" | cut -d ':' -f 1) # take the first GOPATH to mount into docker compose 6 | export PLATFORM=${PLATFORM:-linux/amd64} 7 | export PG_VERSION=${PG_VERSION:-14} 8 | export PGCAPTURE_SHA=$(git rev-parse --short HEAD) 9 | export PGCAPTURE_VERSION=$(git describe --tags --abbrev=0) 10 | 11 | [ ! -d "$LOCAL_GOPATH/pkg/mod/cache" ] && mkdir -p "$LOCAL_GOPATH/pkg/mod/cache" 12 | 13 | case "$1" in 14 | build) 15 | docker compose run --rm build 16 | docker compose build --force-rm pgcapture 17 | ;; 18 | test) 19 | docker compose run --rm test-deps 20 | docker compose run --rm test 21 | ;; 22 | codegen) 23 | docker compose run --rm codegen 24 | ;; 25 | clean) 26 | docker compose down 27 | ;; 28 | *) 29 | echo "\"$1\" is an unknown command" 30 | ;; 31 | esac 32 | -------------------------------------------------------------------------------- /example/client/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strconv" 7 | "time" 8 | 9 | "github.com/jackc/pgtype" 10 | "github.com/replicase/pgcapture" 11 | "github.com/replicase/pgcapture/example" 12 | "google.golang.org/grpc" 13 | ) 14 | 15 | type T1 struct { 16 | ID pgtype.Int4 `pg:"id"` 17 | V pgtype.Int4 `pg:"v"` 18 | } 19 | 20 | func (t *T1) TableName() (schema, table string) { 21 | return "public", example.TestTable 22 | } 23 | 24 | func (t *T1) DebounceKey() string { 25 | return strconv.Itoa(int(t.ID.Int)) 26 | } 27 | 28 | func main() { 29 | conn, err := grpc.Dial(example.GatewayAddr, grpc.WithInsecure()) 30 | if err != nil { 31 | panic(err) 32 | } 33 | defer conn.Close() 34 | 35 | consumer := pgcapture.NewDBLogConsumer(context.Background(), conn, pgcapture.ConsumerOption{ 36 | URI: example.SrcDB.DB, 37 | TableRegex: example.TestTable, 38 | DebounceInterval: time.Second, 39 | }) 40 | defer consumer.Stop() 41 | 42 | err = consumer.Consume(map[pgcapture.Model]pgcapture.ModelHandlerFunc{ 43 | &T1{}: func(change pgcapture.Change) error { 44 | fmt.Println(change.New) 45 | return nil 46 | }, 47 | }) 48 | if err != nil { 49 | panic(err) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /example/const.go: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import "github.com/replicase/pgcapture/internal/test" 4 | 5 | const ( 6 | PGHost = "127.0.0.1" 7 | PulsarURL = "pulsar://127.0.0.1:6650" 8 | TestTable = "test" 9 | TestDBSrc = "db_src" 10 | TestDBSink = "db_sink" 11 | AgentAddr1 = "localhost:8888" 12 | AgentAddr2 = "localhost:8889" 13 | ControlAddr = "localhost:10000" 14 | GatewayAddr = "localhost:10001" 15 | ) 16 | 17 | var ( 18 | DefaultDB = test.DBURL{Host: PGHost, DB: "postgres"} 19 | SinkDB = test.DBURL{Host: PGHost, DB: TestDBSink} 20 | SrcDB = test.DBURL{Host: PGHost, DB: TestDBSrc} 21 | ) 22 | -------------------------------------------------------------------------------- /example/generator/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/replicase/pgcapture/example" 5 | ) 6 | 7 | func main() { 8 | if err := example.SrcDB.RandomData(example.TestTable); err != nil { 9 | panic(err) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /example/reset/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | 8 | "github.com/replicase/pgcapture/example" 9 | ) 10 | 11 | func main() { 12 | req, _ := http.NewRequest(http.MethodDelete, "http://localhost:8080/admin/v2/persistent/public/default/"+example.TestDBSrc, nil) 13 | resp, err := http.DefaultClient.Do(req) 14 | if err != nil { 15 | fmt.Println(err) 16 | } else { 17 | bs, _ := io.ReadAll(resp.Body) 18 | resp.Body.Close() 19 | if resp.StatusCode > 299 { 20 | fmt.Println(string(bs)) 21 | } 22 | } 23 | 24 | if err := example.DefaultDB.Exec(fmt.Sprintf("select pg_drop_replication_slot('%s')", example.TestDBSrc)); err != nil { 25 | fmt.Println(err) 26 | } 27 | if err := example.DefaultDB.Exec("drop database " + example.TestDBSrc); err != nil { 28 | fmt.Println(err) 29 | } 30 | if err := example.DefaultDB.Exec("drop database " + example.TestDBSink); err != nil { 31 | fmt.Println(err) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /example/schedule/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/replicase/pgcapture" 8 | "github.com/replicase/pgcapture/example" 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "google.golang.org/grpc" 11 | "google.golang.org/protobuf/types/known/durationpb" 12 | ) 13 | 14 | func main() { 15 | conn, err := grpc.Dial(example.ControlAddr, grpc.WithInsecure()) 16 | if err != nil { 17 | panic(err) 18 | } 19 | defer conn.Close() 20 | 21 | client := pgcapture.NewDBLogControllerClient(conn) 22 | 23 | pages, err := example.SinkDB.TablePages(example.TestTable) 24 | if err != nil { 25 | panic(err) 26 | } 27 | 28 | batch := uint32(1) 29 | var dumps []*pb.DumpInfoResponse 30 | 31 | for i := uint32(0); i < uint32(pages); i += batch { 32 | dumps = append(dumps, &pb.DumpInfoResponse{Schema: "public", Table: example.TestTable, PageBegin: i, PageEnd: i + batch - 1}) 33 | } 34 | if _, err = client.Schedule(context.Background(), &pb.ScheduleRequest{Uri: example.TestDBSrc, Dumps: dumps}); err != nil { 35 | panic(err) 36 | } 37 | 38 | if _, err = client.SetScheduleCoolDown(context.Background(), &pb.SetScheduleCoolDownRequest{ 39 | Uri: example.TestDBSrc, Duration: durationpb.New(time.Second * 5), 40 | }); err != nil { 41 | panic(err) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /example/server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "os" 9 | "os/exec" 10 | "os/signal" 11 | "path" 12 | "sync" 13 | "syscall" 14 | "time" 15 | 16 | "github.com/replicase/pgcapture/example" 17 | "github.com/replicase/pgcapture/internal/test" 18 | "github.com/replicase/pgcapture/pkg/dblog" 19 | "github.com/replicase/pgcapture/pkg/pb" 20 | "google.golang.org/grpc" 21 | "google.golang.org/protobuf/types/known/structpb" 22 | ) 23 | 24 | func main() { 25 | if _, err := test.CreateDB(example.DefaultDB, example.SrcDB); err != nil { 26 | panic(err) 27 | } 28 | 29 | if _, err := test.CreateDB(example.DefaultDB, example.SinkDB); err != nil { 30 | panic(err) 31 | } 32 | 33 | var wg sync.WaitGroup 34 | ctx := context.Background() 35 | signals := make(chan os.Signal, 1) 36 | signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 37 | 38 | resolver := map[string]dblog.StaticAgentPulsarURIConfig{ 39 | example.SrcDB.DB: { 40 | AgentURL: example.AgentAddr2, 41 | PulsarURL: example.PulsarURL, 42 | PulsarTopic: example.SrcDB.DB, 43 | PulsarSubscription: example.SrcDB.DB, 44 | }, 45 | } 46 | cfg, _ := json.Marshal(resolver) 47 | 48 | for _, cmd := range []cmd{ 49 | { 50 | Name: "agent", 51 | Flags: map[string]string{ 52 | "ListenAddr": example.AgentAddr1, 53 | }, 54 | }, 55 | { 56 | Name: "agent", 57 | Flags: map[string]string{ 58 | "ListenAddr": example.AgentAddr2, 59 | }, 60 | }, 61 | { 62 | Name: "controller", 63 | Flags: map[string]string{ 64 | "ListenAddr": example.ControlAddr, 65 | }, 66 | }, 67 | { 68 | Name: "gateway", 69 | Flags: map[string]string{ 70 | "ListenAddr": example.GatewayAddr, 71 | "ControllerAddr": example.ControlAddr, 72 | "ResolverConfig": string(cfg), 73 | }, 74 | }, 75 | } { 76 | wg.Add(1) 77 | go run(ctx, cmd, &wg) 78 | } 79 | 80 | var conn1, conn2 *grpc.ClientConn 81 | var agent1, agent2 pb.AgentClient 82 | 83 | conn1, err := grpc.Dial(example.AgentAddr1, grpc.WithBlock(), grpc.WithInsecure()) 84 | if err != nil { 85 | log.Println(err) 86 | goto wait 87 | } 88 | conn2, err = grpc.Dial(example.AgentAddr2, grpc.WithBlock(), grpc.WithInsecure()) 89 | if err != nil { 90 | log.Println(err) 91 | goto wait 92 | } 93 | 94 | agent1 = pb.NewAgentClient(conn1) 95 | agent2 = pb.NewAgentClient(conn2) 96 | 97 | for { 98 | time.Sleep(time.Second) 99 | params, _ := structpb.NewStruct(map[string]interface{}{ 100 | "Command": "pg2pulsar", 101 | "PGConnURL": example.SrcDB.URL(), 102 | "PGReplURL": example.SrcDB.Repl(), 103 | "PulsarURL": example.PulsarURL, 104 | "PulsarTopic": example.SrcDB.DB, 105 | }) 106 | if _, err := agent1.Configure(ctx, &pb.AgentConfigRequest{Parameters: params}); err != nil { 107 | log.Println(err) 108 | goto wait 109 | } 110 | params, _ = structpb.NewStruct(map[string]interface{}{ 111 | "Command": "pulsar2pg", 112 | "PGConnURL": example.SinkDB.URL(), 113 | "PulsarURL": example.PulsarURL, 114 | "PulsarTopic": example.SrcDB.DB, 115 | }) 116 | if _, err := agent2.Configure(ctx, &pb.AgentConfigRequest{Parameters: params}); err != nil { 117 | log.Println(err) 118 | goto wait 119 | } 120 | } 121 | wait: 122 | wg.Wait() 123 | } 124 | 125 | type cmd struct { 126 | Name string 127 | Flags map[string]string 128 | } 129 | 130 | func run(ctx context.Context, cmd cmd, wg *sync.WaitGroup) { 131 | args := []string{"run", path.Join(".", "cmd"), cmd.Name} 132 | for k, v := range cmd.Flags { 133 | args = append(args, fmt.Sprintf("--%s=%s", k, v)) 134 | } 135 | c := exec.CommandContext(ctx, "go", args...) 136 | c.Stdout = os.Stdout 137 | c.Stderr = os.Stderr 138 | log.Println(cmd.Name, c.Run()) 139 | wg.Done() 140 | } 141 | -------------------------------------------------------------------------------- /example/stop/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/replicase/pgcapture" 7 | "github.com/replicase/pgcapture/example" 8 | "github.com/replicase/pgcapture/pkg/pb" 9 | "google.golang.org/grpc" 10 | ) 11 | 12 | func main() { 13 | conn, err := grpc.Dial(example.ControlAddr, grpc.WithInsecure()) 14 | if err != nil { 15 | panic(err) 16 | } 17 | defer conn.Close() 18 | 19 | client := pgcapture.NewDBLogControllerClient(conn) 20 | 21 | if _, err = client.StopSchedule(context.Background(), &pb.StopScheduleRequest{Uri: example.TestDBSrc}); err != nil { 22 | panic(err) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/replicase/pgcapture 2 | 3 | go 1.20 4 | 5 | replace github.com/pganalyze/pg_query_go/v2 v2.0.2 => github.com/rueian/pg_query_go/v2 v2.0.3-0.20210404160231-00fbdb47649c 6 | 7 | require ( 8 | github.com/apache/pulsar-client-go v0.10.0 9 | github.com/golang/mock v1.6.0 10 | github.com/golang/protobuf v1.5.3 11 | github.com/jackc/pgconn v1.8.1 12 | github.com/jackc/pglogrepl v0.0.0-20230810221841-d0818e1fbef7 13 | github.com/jackc/pgtype v1.7.0 14 | github.com/jackc/pgx/v4 v4.10.1 15 | github.com/jackc/pgx/v5 v5.4.3 16 | github.com/pganalyze/pg_query_go/v2 v2.0.2 17 | github.com/prometheus/client_golang v1.11.1 18 | github.com/sirupsen/logrus v1.6.0 19 | github.com/spf13/cobra v1.7.0 20 | github.com/streamnative/pulsar-admin-go v0.1.0 21 | google.golang.org/grpc v1.38.0 22 | google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 23 | google.golang.org/protobuf v1.30.0 24 | ) 25 | 26 | require ( 27 | github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect 28 | github.com/99designs/keyring v1.2.2 // indirect 29 | github.com/AthenZ/athenz v1.10.39 // indirect 30 | github.com/DataDog/zstd v1.5.0 // indirect 31 | github.com/ardielle/ardielle-go v1.5.2 // indirect 32 | github.com/beorn7/perks v1.0.1 // indirect 33 | github.com/bits-and-blooms/bitset v1.4.0 // indirect 34 | github.com/cespare/xxhash/v2 v2.1.1 // indirect 35 | github.com/danieljoos/wincred v1.2.0 // indirect 36 | github.com/dvsekhvalnov/jose2go v1.5.0 // indirect 37 | github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect 38 | github.com/golang-jwt/jwt v3.2.2+incompatible // indirect 39 | github.com/golang/snappy v0.0.1 // indirect 40 | github.com/google/go-cmp v0.5.9 // indirect 41 | github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c // indirect 42 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 43 | github.com/jackc/chunkreader/v2 v2.0.1 // indirect 44 | github.com/jackc/pgio v1.0.0 // indirect 45 | github.com/jackc/pgpassfile v1.0.0 // indirect 46 | github.com/jackc/pgproto3/v2 v2.0.7 // indirect 47 | github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect 48 | github.com/klauspost/compress v1.14.4 // indirect 49 | github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect 50 | github.com/linkedin/goavro/v2 v2.9.8 // indirect 51 | github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect 52 | github.com/mtibben/percent v0.2.1 // indirect 53 | github.com/onsi/gomega v1.27.10 // indirect 54 | github.com/pierrec/lz4 v2.0.5+incompatible // indirect 55 | github.com/pkg/errors v0.9.1 // indirect 56 | github.com/prometheus/client_model v0.2.0 // indirect 57 | github.com/prometheus/common v0.26.0 // indirect 58 | github.com/prometheus/procfs v0.6.0 // indirect 59 | github.com/spaolacci/murmur3 v1.1.0 // indirect 60 | github.com/spf13/pflag v1.0.5 // indirect 61 | go.uber.org/atomic v1.7.0 // indirect 62 | golang.org/x/crypto v0.12.0 // indirect 63 | golang.org/x/mod v0.10.0 // indirect 64 | golang.org/x/net v0.12.0 // indirect 65 | golang.org/x/oauth2 v0.8.0 // indirect 66 | golang.org/x/sys v0.11.0 // indirect 67 | golang.org/x/term v0.11.0 // indirect 68 | golang.org/x/text v0.12.0 // indirect 69 | golang.org/x/tools v0.9.3 // indirect 70 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect 71 | google.golang.org/appengine v1.6.7 // indirect 72 | google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect 73 | ) 74 | -------------------------------------------------------------------------------- /hack/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/replicase/pgcapture/1a64198cd12b987801cebbb0bd568f093272ced5/hack/images/overview.png -------------------------------------------------------------------------------- /hack/postgres/11/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:11-alpine 2 | 3 | RUN wget https://github.com/rueian/pglogical/archive/REL2_3_4_no_filter.tar.gz && \ 4 | tar -zxvf REL2_3_4_no_filter.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 && \ 6 | cd /pglogical-REL2_3_4_no_filter && \ 7 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 8 | make install && \ 9 | cd / && \ 10 | rm -rf /REL2_3_4_no_filter.tar.gz /pglogical-REL2_3_4_no_filter 11 | 12 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 13 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 14 | 15 | COPY ./extension /extension 16 | RUN cd /extension && ./make.sh 17 | -------------------------------------------------------------------------------- /hack/postgres/12/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:12-alpine 2 | 3 | RUN wget https://github.com/rueian/pglogical/archive/REL2_3_4_no_filter.tar.gz && \ 4 | tar -zxvf REL2_3_4_no_filter.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 && \ 6 | cd /pglogical-REL2_3_4_no_filter && \ 7 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 8 | make install && \ 9 | cd / && \ 10 | rm -rf /REL2_3_4_no_filter.tar.gz /pglogical-REL2_3_4_no_filter 11 | 12 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 13 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 14 | 15 | COPY ./extension /extension 16 | RUN cd /extension && ./make.sh 17 | -------------------------------------------------------------------------------- /hack/postgres/13/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:13-alpine 2 | 3 | RUN wget https://github.com/rueian/pglogical/archive/REL2_3_4_no_filter.tar.gz && \ 4 | tar -zxvf REL2_3_4_no_filter.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 && \ 6 | cd /pglogical-REL2_3_4_no_filter && \ 7 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 8 | make install && \ 9 | cd / && \ 10 | rm -rf /REL2_3_4_no_filter.tar.gz /pglogical-REL2_3_4_no_filter 11 | 12 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 13 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 14 | 15 | COPY ./extension /extension 16 | RUN cd /extension && ./make.sh 17 | -------------------------------------------------------------------------------- /hack/postgres/14/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:14-alpine 2 | 3 | RUN wget https://github.com/2ndQuadrant/pglogical/archive/REL2_4_2.tar.gz && \ 4 | tar -zxvf REL2_4_2.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev lz4-dev patch $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 6 | 7 | COPY 14/pglogical/pglogical.patch ./pglogical-REL2_4_2 8 | 9 | RUN cd pglogical-REL2_4_2 && \ 10 | patch -p1 < pglogical.patch && \ 11 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 12 | make install && \ 13 | cd / && \ 14 | rm -rf /REL2_4_2.tar.gz.tar.gz /pglogical-REL2_4_2.tar.gz 15 | 16 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 17 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 18 | 19 | COPY ./extension /extension 20 | RUN cd /extension && ./make.sh 21 | -------------------------------------------------------------------------------- /hack/postgres/14/pglogical/pglogical.patch: -------------------------------------------------------------------------------- 1 | diff -Naur old/pglogical_output_plugin.c new/pglogical_output_plugin.c 2 | --- old/pglogical_output_plugin.c 2023-07-24 16:14:59.000000000 +0800 3 | +++ new/pglogical_output_plugin.c 2023-07-24 16:15:55.000000000 +0800 4 | @@ -189,7 +189,7 @@ 5 | { 6 | int params_format; 7 | bool started_tx = false; 8 | - PGLogicalLocalNode *node; 9 | +// PGLogicalLocalNode *node; 10 | MemoryContext oldctx; 11 | 12 | /* 13 | @@ -208,8 +208,8 @@ 14 | StartTransactionCommand(); 15 | started_tx = true; 16 | } 17 | - node = get_local_node(false, false); 18 | - data->local_node_id = node->node->id; 19 | +// node = get_local_node(false, false); 20 | +// data->local_node_id = node->node->id; 21 | 22 | /* 23 | * Ideally we'd send the startup message immediately. That way 24 | @@ -668,8 +668,8 @@ 25 | old = MemoryContextSwitchTo(data->context); 26 | 27 | /* First check the table filter */ 28 | - if (!pglogical_change_filter(data, relation, change, &att_list)) 29 | - return; 30 | +// if (!pglogical_change_filter(data, relation, change, &att_list)) 31 | +// return; 32 | 33 | /* 34 | * If the protocol wants to write relation information and the client 35 | -------------------------------------------------------------------------------- /hack/postgres/15/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:15-alpine 2 | 3 | RUN wget https://github.com/2ndQuadrant/pglogical/archive/REL2_4_2.tar.gz && \ 4 | tar -zxvf REL2_4_2.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev lz4-dev patch $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 6 | 7 | COPY 15/pglogical/pglogical.patch ./pglogical-REL2_4_2 8 | 9 | RUN cd pglogical-REL2_4_2 && \ 10 | patch -p1 < pglogical.patch && \ 11 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 12 | make install && \ 13 | cd / && \ 14 | rm -rf /REL2_4_2.tar.gz.tar.gz /pglogical-REL2_4_2.tar.gz 15 | 16 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 17 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 18 | 19 | COPY ./extension /extension 20 | RUN cd /extension && ./make.sh 21 | -------------------------------------------------------------------------------- /hack/postgres/15/pglogical/pglogical.patch: -------------------------------------------------------------------------------- 1 | diff -Naur old/pglogical_output_plugin.c new/pglogical_output_plugin.c 2 | --- old/pglogical_output_plugin.c 2023-07-24 16:14:59.000000000 +0800 3 | +++ new/pglogical_output_plugin.c 2023-07-24 16:15:55.000000000 +0800 4 | @@ -189,7 +189,7 @@ 5 | { 6 | int params_format; 7 | bool started_tx = false; 8 | - PGLogicalLocalNode *node; 9 | +// PGLogicalLocalNode *node; 10 | MemoryContext oldctx; 11 | 12 | /* 13 | @@ -208,8 +208,8 @@ 14 | StartTransactionCommand(); 15 | started_tx = true; 16 | } 17 | - node = get_local_node(false, false); 18 | - data->local_node_id = node->node->id; 19 | +// node = get_local_node(false, false); 20 | +// data->local_node_id = node->node->id; 21 | 22 | /* 23 | * Ideally we'd send the startup message immediately. That way 24 | @@ -668,8 +668,8 @@ 25 | old = MemoryContextSwitchTo(data->context); 26 | 27 | /* First check the table filter */ 28 | - if (!pglogical_change_filter(data, relation, change, &att_list)) 29 | - return; 30 | +// if (!pglogical_change_filter(data, relation, change, &att_list)) 31 | +// return; 32 | 33 | /* 34 | * If the protocol wants to write relation information and the client 35 | -------------------------------------------------------------------------------- /hack/postgres/16/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:16-alpine 2 | 3 | RUN wget https://github.com/2ndQuadrant/pglogical/archive/REL2_4_4.tar.gz && \ 4 | tar -zxvf REL2_4_4.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev lz4-dev patch $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 6 | 7 | COPY 16/pglogical/pglogical.patch ./pglogical-REL2_4_4 8 | 9 | RUN cd pglogical-REL2_4_4 && \ 10 | patch -p1 < pglogical.patch && \ 11 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 12 | make install && \ 13 | cd / && \ 14 | rm -rf /REL2_4_4.tar.gz.tar.gz /pglogical-REL2_4_4.tar.gz 15 | 16 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 17 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 18 | 19 | COPY ./extension /extension 20 | RUN cd /extension && ./make.sh 21 | -------------------------------------------------------------------------------- /hack/postgres/16/pglogical/pglogical.patch: -------------------------------------------------------------------------------- 1 | diff -Naur old/pglogical_output_plugin.c new/pglogical_output_plugin.c 2 | --- old/pglogical_output_plugin.c 2023-10-04 09:54:00 3 | +++ new/pglogical_output_plugin.c 2023-10-24 21:27:44 4 | @@ -189,7 +189,7 @@ 5 | { 6 | int params_format; 7 | bool started_tx = false; 8 | - PGLogicalLocalNode *node; 9 | +// PGLogicalLocalNode *node; 10 | MemoryContext oldctx; 11 | 12 | /* 13 | @@ -208,8 +208,8 @@ 14 | StartTransactionCommand(); 15 | started_tx = true; 16 | } 17 | - node = get_local_node(false, false); 18 | - data->local_node_id = node->node->id; 19 | +// node = get_local_node(false, false); 20 | +// data->local_node_id = node->node->id; 21 | 22 | /* 23 | * Ideally we'd send the startup message immediately. That way 24 | @@ -669,8 +669,8 @@ 25 | old = MemoryContextSwitchTo(data->context); 26 | 27 | /* First check the table filter */ 28 | - if (!pglogical_change_filter(data, relation, change, &att_list)) 29 | - goto cleanup; 30 | +// if (!pglogical_change_filter(data, relation, change, &att_list)) 31 | +// goto cleanup; 32 | 33 | /* 34 | * If the protocol wants to write relation information and the client 35 | -------------------------------------------------------------------------------- /hack/postgres/17/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:17-alpine 2 | 3 | RUN wget https://github.com/2ndQuadrant/pglogical/archive/REL2_4_5.tar.gz && \ 4 | tar -zxvf REL2_4_5.tar.gz && \ 5 | apk -U add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev lz4-dev patch $DOCKER_PG_LLVM_DEPS krb5-pkinit krb5-dev krb5 6 | 7 | COPY 17/pglogical/pglogical.patch ./pglogical-REL2_4_5 8 | 9 | RUN cd pglogical-REL2_4_5 && \ 10 | patch -p1 < pglogical.patch && \ 11 | make USE_PGXS=1 CPPFLAGS="-DPGL_NO_STDIN_ASSIGN" clean all && \ 12 | make install && \ 13 | cd / && \ 14 | rm -rf /REL2_4_5.tar.gz.tar.gz /pglogical-REL2_4_5.tar.gz 15 | 16 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 17 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 18 | 19 | COPY ./extension /extension 20 | RUN cd /extension && ./make.sh 21 | -------------------------------------------------------------------------------- /hack/postgres/17/pglogical/pglogical.patch: -------------------------------------------------------------------------------- 1 | diff -Naur old/pglogical_output_plugin.c new/pglogical_output_plugin.c 2 | --- old/pglogical_output_plugin.c 2023-10-04 09:54:00 3 | +++ new/pglogical_output_plugin.c 2023-10-24 21:27:44 4 | @@ -189,7 +189,7 @@ 5 | { 6 | int params_format; 7 | bool started_tx = false; 8 | - PGLogicalLocalNode *node; 9 | +// PGLogicalLocalNode *node; 10 | MemoryContext oldctx; 11 | 12 | /* 13 | @@ -208,8 +208,8 @@ 14 | StartTransactionCommand(); 15 | started_tx = true; 16 | } 17 | - node = get_local_node(false, false); 18 | - data->local_node_id = node->node->id; 19 | +// node = get_local_node(false, false); 20 | +// data->local_node_id = node->node->id; 21 | 22 | /* 23 | * Ideally we'd send the startup message immediately. That way 24 | @@ -669,8 +669,8 @@ 25 | old = MemoryContextSwitchTo(data->context); 26 | 27 | /* First check the table filter */ 28 | - if (!pglogical_change_filter(data, relation, change, &att_list)) 29 | - goto cleanup; 30 | +// if (!pglogical_change_filter(data, relation, change, &att_list)) 31 | +// goto cleanup; 32 | 33 | /* 34 | * If the protocol wants to write relation information and the client 35 | -------------------------------------------------------------------------------- /hack/postgres/96/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:9.6-alpine 2 | 3 | RUN wget https://github.com/2ndQuadrant/pglogical/archive/REL1_2_2.tar.gz && \ 4 | tar -zxvf REL1_2_2.tar.gz && \ 5 | apk add --no-cache build-base libxslt-dev libxml2-dev openssl-dev libedit-dev zlib-dev && \ 6 | cd /pglogical-REL1_2_2 && \ 7 | make clean all && \ 8 | make install && \ 9 | cd / && \ 10 | rm -rf /REL1_2_2.tar.gz /pglogical-REL1_2_2 11 | 12 | RUN chown -R postgres:postgres /usr/local/share/postgresql/extension 13 | RUN chown -R postgres:postgres /usr/local/lib/postgresql 14 | 15 | COPY ./extension /extension 16 | RUN cd /extension && ./make.sh -------------------------------------------------------------------------------- /hack/postgres/dockerbuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export DOCKER_BUILDKIT=1 4 | 5 | VERSION="${VERSION:-11}" 6 | TAG="dcard/postgres:$VERSION-logical" 7 | 8 | echo "build for $TAG" 9 | docker buildx build --push --platform=linux/amd64 -t $TAG -f "./$VERSION/Dockerfile" . 10 | -------------------------------------------------------------------------------- /hack/postgres/extension/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Enova International, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /hack/postgres/extension/Makefile: -------------------------------------------------------------------------------- 1 | # source: https://github.com/enova/pgl_ddl_deploy/blob/master/Makefile 2 | 3 | EXTENSION = pgcapture 4 | TESTS = $(wildcard sql/*) 5 | REGRESS = $(patsubst sql/%.sql,%,$(TESTS)) 6 | DATA = pgcapture--0.1.sql 7 | MODULE_big = pgcapture 8 | 9 | OBJS = pgcapture.o \ 10 | pg_import.o 11 | 12 | PG_CONFIG ?= pg_config 13 | PGXS := $(shell $(PG_CONFIG) --pgxs) 14 | include $(PGXS) 15 | 16 | # Prevent unintentional inheritance of PGSERVICE while running regression suite 17 | # with make installcheck. We typically use PGSERVICE in our shell environment but 18 | # not for dev. Require instead explicit PGPORT= or PGSERVICE= to do installcheck 19 | unexport PGSERVICE -------------------------------------------------------------------------------- /hack/postgres/extension/expected/00_setup.out: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION pgcapture; 2 | -------------------------------------------------------------------------------- /hack/postgres/extension/expected/01_basic.out: -------------------------------------------------------------------------------- 1 | -- Test the DDL SRF 2 | SELECT * FROM pgcapture.current_query(); 3 | current_query 4 | --------------- 5 | 6 | (1 row) 7 | 8 | -- Test some basic DDL commands 9 | CREATE TABLE ctas AS SELECT 1 as id; 10 | CREATE TABLE ct(id integer); 11 | CREATE SCHEMA nsp1; 12 | CREATE TABLE nsp1.tbl(id integer, val text); 13 | CREATE INDEX ON nsp1.tbl (val) WHERE id % 2 = 0; 14 | -- DDL command that doesn't contain a trailing semi-column 15 | CREATE SCHEMA nsp2\g 16 | -- Check the results 17 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 18 | query | unnest 19 | -------------------------------------------------+----------------- 20 | CREATE TABLE ctas AS SELECT 1 as id | CREATE TABLE AS 21 | CREATE TABLE ct(id integer) | CREATE TABLE 22 | CREATE SCHEMA nsp1 | CREATE SCHEMA 23 | CREATE TABLE nsp1.tbl(id integer, val text) | CREATE TABLE 24 | CREATE INDEX ON nsp1.tbl (val) WHERE id % 2 = 0 | CREATE INDEX 25 | CREATE SCHEMA nsp2 | CREATE SCHEMA 26 | (6 rows) 27 | 28 | -------------------------------------------------------------------------------- /hack/postgres/extension/expected/02_nested_ddl.out: -------------------------------------------------------------------------------- 1 | LOAD 'pgcapture'; 2 | TRUNCATE pgcapture.ddl_logs; 3 | DO $$ 4 | BEGIN 5 | CREATE TABLE tbl1(); 6 | CREATE TABLE tbl2(); 7 | END; 8 | $$ LANGUAGE plpgsql; 9 | CREATE EXTENSION pg_stat_statements; 10 | -- Check the results 11 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 12 | query | unnest 13 | -------------------------------------+------------------ 14 | CREATE TABLE tbl1() | CREATE TABLE 15 | CREATE TABLE tbl2() | CREATE TABLE 16 | CREATE EXTENSION pg_stat_statements | CREATE EXTENSION 17 | (3 rows) 18 | 19 | -------------------------------------------------------------------------------- /hack/postgres/extension/expected/03_multi_query.out: -------------------------------------------------------------------------------- 1 | LOAD 'pgcapture'; 2 | TRUNCATE pgcapture.ddl_logs; 3 | -- test multi-query statements mixing DDL / DML 4 | \o /dev/null 5 | -- older version of psql don't emit all the resultsets in multi-query 6 | SELECT 1\; CREATE TABLE multi(id integer)\; INSERT INTO multi SELECT 1; 7 | \o 8 | -- test multi-query statements mixing DDL 9 | DROP TABLE multi\; CREATE TABLE multi(val text); 10 | -- Check the results 11 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 12 | query | unnest 13 | --------------------------------+-------------- 14 | CREATE TABLE multi(id integer) | CREATE TABLE 15 | DROP TABLE multi | DROP TABLE 16 | CREATE TABLE multi(val text) | CREATE TABLE 17 | (3 rows) 18 | 19 | -------------------------------------------------------------------------------- /hack/postgres/extension/expected/04_temp.out: -------------------------------------------------------------------------------- 1 | LOAD 'pgcapture'; 2 | -- Create a normal table that will later be dropped 3 | CREATE TABLE dropme(); 4 | CREATE TABLE dropmetoo(); 5 | TRUNCATE pgcapture.ddl_logs; 6 | -- Plain TEMP table should be ignored 7 | CREATE TEMPORARY TABLE tmp1(); 8 | ALTER TABLE tmp1 ADD COLUMN id integer; 9 | -- Table created in the pg_temp schema should be ignored 10 | CREATE TABLE pg_temp.tmp2(); 11 | ALTER TABLE pg_temp.tmp2 ADD COLUMN id integer; 12 | -- Table created in the pg_temp schema using the search_path should alse be 13 | -- ignored 14 | SET search_path TO not_a_schema, pg_temp, public; 15 | CREATE TABLE tmp3(); 16 | ALTER TABLE tmp3 ADD COLUMN id integer; 17 | RESET search_path; 18 | -- CTAS / SELECT INTO for temp tables should be ignored 19 | CREATE TABLE pg_temp.tmp4 AS SELECT 1 AS id; 20 | -- CREATE TEMP VIEW should be ignored 21 | CREATE TEMP VIEW v1 AS SELECT 1 AS id; 22 | -- Implicitly temp view creation should be ignored 23 | CREATE VIEW v2 AS SELECT * FROM tmp3; 24 | NOTICE: view "v2" will be a temporary view 25 | -- ALTER ... RENAME should ignore temp relations 26 | ALTER TABLE tmp4 RENAME COLUMN id TO id2; 27 | ALTER TABLE tmp4 RENAME TO tmp4b; 28 | -- Check the results 29 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 30 | query | unnest 31 | -------+-------- 32 | (0 rows) 33 | 34 | -- Dropping only temp tables should be ignored 35 | DROP TABLE tmp1, tmp2; 36 | -- But dropping a mix of temp and regular table should preserve the regular 37 | -- table names 38 | DROP TABLE tmp3, pg_temp.tmp3, dropme, tmp4b, dropmetoo CASCADE; 39 | NOTICE: drop cascades to view v2 40 | -- Check the results 41 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 42 | query | unnest 43 | --------------------------------------+------------ 44 | DROP TABLE dropme, dropmetoo CASCADE | DROP TABLE 45 | (1 row) 46 | 47 | -------------------------------------------------------------------------------- /hack/postgres/extension/make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd $(dirname $0) 4 | 5 | make 6 | make install -------------------------------------------------------------------------------- /hack/postgres/extension/pg_import.h: -------------------------------------------------------------------------------- 1 | /*-------------------------------------------------------------------- 2 | * pg_import.h 3 | * 4 | * Code imported from upstream. 5 | *-------------------------------------------------------------------- 6 | */ 7 | #ifndef PG_IMPORT_H 8 | #define PG_IMPORT_H 9 | 10 | #include "postgres.h" 11 | 12 | #include "catalog/catalog.h" 13 | #include "catalog/index.h" 14 | #include "catalog/partition.h" 15 | #include "miscadmin.h" 16 | #include "storage/lmgr.h" 17 | #include "utils/acl.h" 18 | #include "utils/syscache.h" 19 | 20 | /* Imported from src/backend/commands/tablecmds.c */ 21 | struct DropRelationCallbackState 22 | { 23 | /* These fields are set by RemoveRelations: */ 24 | char expected_relkind; 25 | LOCKMODE heap_lockmode; 26 | /* These fields are state to track which subsidiary locks are held: */ 27 | Oid heapOid; 28 | Oid partParentOid; 29 | /* These fields are passed back by RangeVarCallbackForDropRelation: */ 30 | char actual_relkind; 31 | char actual_relpersistence; 32 | }; 33 | 34 | /* Imported from src/backend/commands/tablecmds.c */ 35 | void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, 36 | Oid oldrelid, void *arg); 37 | 38 | /* Imported from src/backend/commands/tablecmds.c */ 39 | void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, 40 | Oid oldRelOid, void *arg); 41 | 42 | #endif /* PG_IMPORT_H */ 43 | -------------------------------------------------------------------------------- /hack/postgres/extension/pgcapture--0.1.sql: -------------------------------------------------------------------------------- 1 | -- complain if script is sourced in psql, rather than via CREATE EXTENSION 2 | \echo Use "CREATE EXTENSION pgcapture" to load this file. \quit 3 | 4 | CREATE TABLE pgcapture.ddl_logs (id SERIAL PRIMARY KEY, query TEXT, tags TEXT[], activity JSONB); 5 | CREATE TABLE pgcapture.sources (id TEXT PRIMARY KEY, commit pg_lsn, seq int, commit_ts timestamptz, mid bytea, apply_ts timestamptz DEFAULT CURRENT_TIMESTAMP); 6 | 7 | CREATE FUNCTION pgcapture.current_query() 8 | RETURNS TEXT AS 9 | 'MODULE_PATHNAME', 'pgl_ddl_deploy_current_query' 10 | LANGUAGE C VOLATILE STRICT; 11 | 12 | CREATE FUNCTION pgcapture.sql_command_tags(p_sql TEXT) 13 | RETURNS TEXT[] AS 14 | 'MODULE_PATHNAME', 'sql_command_tags' 15 | LANGUAGE C VOLATILE STRICT; 16 | 17 | CREATE FUNCTION pgcapture.log_ddl() RETURNS event_trigger AS $$ 18 | declare 19 | qstr TEXT; 20 | tags TEXT[]; 21 | acti JSONB; 22 | begin 23 | qstr = pgcapture.current_query(); 24 | tags = pgcapture.sql_command_tags(qstr); 25 | select row_to_json(a.*) into acti from (select datname,usename,application_name,client_addr,backend_start,xact_start from pg_stat_activity where pid = pg_backend_pid()) a; 26 | insert into pgcapture.ddl_logs(query, tags, activity) values (qstr,tags,acti); 27 | end; 28 | $$ LANGUAGE plpgsql STRICT; 29 | 30 | CREATE EVENT TRIGGER pgcapture_ddl_command_start ON ddl_command_start WHEN tag IN ( 31 | 'CREATE TABLE AS', 32 | 'SELECT INTO', 33 | 'DROP TRIGGER', 34 | 'DROP FUNCTION' 35 | ) EXECUTE PROCEDURE pgcapture.log_ddl(); 36 | CREATE EVENT TRIGGER pgcapture_ddl_command_end ON ddl_command_end WHEN TAG IN ( 37 | 'ALTER AGGREGATE', 38 | 'ALTER COLLATION', 39 | 'ALTER CONVERSION', 40 | 'ALTER DOMAIN', 41 | 'ALTER DEFAULT PRIVILEGES', 42 | 'ALTER EXTENSION', 43 | 'ALTER FOREIGN DATA WRAPPER', 44 | 'ALTER FOREIGN TABLE', 45 | 'ALTER FUNCTION', 46 | 'ALTER LANGUAGE', 47 | 'ALTER LARGE OBJECT', 48 | 'ALTER MATERIALIZED VIEW', 49 | 'ALTER OPERATOR', 50 | 'ALTER OPERATOR CLASS', 51 | 'ALTER OPERATOR FAMILY', 52 | 'ALTER POLICY', 53 | 'ALTER SCHEMA', 54 | 'ALTER SEQUENCE', 55 | 'ALTER SERVER', 56 | 'ALTER TABLE', 57 | 'ALTER TEXT SEARCH CONFIGURATION', 58 | 'ALTER TEXT SEARCH DICTIONARY', 59 | 'ALTER TEXT SEARCH PARSER', 60 | 'ALTER TEXT SEARCH TEMPLATE', 61 | 'ALTER TRIGGER', 62 | 'ALTER TYPE', 63 | 'ALTER USER MAPPING', 64 | 'ALTER VIEW', 65 | 'COMMENT', 66 | 'CREATE ACCESS METHOD', 67 | 'CREATE AGGREGATE', 68 | 'CREATE CAST', 69 | 'CREATE COLLATION', 70 | 'CREATE CONVERSION', 71 | 'CREATE DOMAIN', 72 | 'CREATE EXTENSION', 73 | 'CREATE FOREIGN DATA WRAPPER', 74 | 'CREATE FOREIGN TABLE', 75 | 'CREATE FUNCTION', 76 | 'CREATE INDEX', 77 | 'CREATE LANGUAGE', 78 | 'CREATE MATERIALIZED VIEW', 79 | 'CREATE OPERATOR', 80 | 'CREATE OPERATOR CLASS', 81 | 'CREATE OPERATOR FAMILY', 82 | 'CREATE POLICY', 83 | 'CREATE RULE', 84 | 'CREATE SCHEMA', 85 | 'CREATE SEQUENCE', 86 | 'CREATE SERVER', 87 | 'CREATE TABLE', 88 | 'CREATE TEXT SEARCH CONFIGURATION', 89 | 'CREATE TEXT SEARCH DICTIONARY', 90 | 'CREATE TEXT SEARCH PARSER', 91 | 'CREATE TEXT SEARCH TEMPLATE', 92 | 'CREATE TRIGGER', 93 | 'CREATE TYPE', 94 | 'CREATE USER MAPPING', 95 | 'CREATE VIEW', 96 | 'DROP ACCESS METHOD', 97 | 'DROP AGGREGATE', 98 | 'DROP CAST', 99 | 'DROP COLLATION', 100 | 'DROP CONVERSION', 101 | 'DROP DOMAIN', 102 | 'DROP EXTENSION', 103 | 'DROP FOREIGN DATA WRAPPER', 104 | 'DROP FOREIGN TABLE', 105 | 'DROP INDEX', 106 | 'DROP LANGUAGE', 107 | 'DROP MATERIALIZED VIEW', 108 | 'DROP OPERATOR', 109 | 'DROP OPERATOR CLASS', 110 | 'DROP OPERATOR FAMILY', 111 | 'DROP OWNED', 112 | 'DROP POLICY', 113 | 'DROP RULE', 114 | 'DROP SCHEMA', 115 | 'DROP SEQUENCE', 116 | 'DROP SERVER', 117 | 'DROP TABLE', 118 | 'DROP TEXT SEARCH CONFIGURATION', 119 | 'DROP TEXT SEARCH DICTIONARY', 120 | 'DROP TEXT SEARCH PARSER', 121 | 'DROP TEXT SEARCH TEMPLATE', 122 | 'DROP TYPE', 123 | 'DROP USER MAPPING', 124 | 'DROP VIEW', 125 | 'GRANT', 126 | 'IMPORT FOREIGN SCHEMA', 127 | 'REVOKE', 128 | 'SECURITY LABEL' 129 | ) EXECUTE PROCEDURE pgcapture.log_ddl(); -------------------------------------------------------------------------------- /hack/postgres/extension/pgcapture.control: -------------------------------------------------------------------------------- 1 | # pgcapture extension 2 | module_pathname = '$libdir/pgcapture' 3 | default_version = '0.1' 4 | schema = 'pgcapture' 5 | -------------------------------------------------------------------------------- /hack/postgres/extension/pgcapture.h: -------------------------------------------------------------------------------- 1 | /*-------------------------------------------------------------------- 2 | * pgcapture.h 3 | * 4 | * Compatibility macros. 5 | *-------------------------------------------------------------------- 6 | */ 7 | #ifndef PGCAPTURE_H 8 | #define PGCAPTURE_H 9 | 10 | #if PG_VERSION_NUM < 150000 11 | #define parse_analyze_fixedparams(a,b,c,d,e) parse_analyze(a,b,c,d,e) 12 | #endif 13 | 14 | /* ProcessUtility_hook */ 15 | #if PG_VERSION_NUM >= 140000 16 | #define UTILITY_HOOK_ARGS PlannedStmt *pstmt, const char *queryString, \ 17 | bool readOnlyTree, \ 18 | ProcessUtilityContext context, ParamListInfo params, \ 19 | QueryEnvironment *queryEnv, \ 20 | DestReceiver *dest, QueryCompletion *qc 21 | #define UTILITY_HOOK_ARG_NAMES pstmt, queryString, \ 22 | readOnlyTree, \ 23 | context, params, \ 24 | queryEnv, \ 25 | dest, qc 26 | #elif PG_VERSION_NUM >= 130000 27 | #define UTILITY_HOOK_ARGS PlannedStmt *pstmt, \ 28 | const char *queryString, \ 29 | ProcessUtilityContext context, \ 30 | ParamListInfo params, \ 31 | QueryEnvironment *queryEnv, \ 32 | DestReceiver *dest, QueryCompletion *qc 33 | #define UTILITY_HOOK_ARG_NAMES pstmt, \ 34 | queryString, \ 35 | context, \ 36 | params, \ 37 | queryEnv, \ 38 | dest, qc 39 | #elif PG_VERSION_NUM >= 100000 40 | #define UTILITY_HOOK_ARGS PlannedStmt *pstmt, \ 41 | const char *queryString, \ 42 | ProcessUtilityContext context, \ 43 | ParamListInfo params, \ 44 | QueryEnvironment *queryEnv, \ 45 | DestReceiver *dest, char *completionTag 46 | #define UTILITY_HOOK_ARG_NAMES pstmt, \ 47 | queryString, \ 48 | context, \ 49 | params, \ 50 | queryEnv, \ 51 | dest, completionTag 52 | #endif 53 | /* end of ProcessUtility_hook */ 54 | 55 | #endif /* PGCAPTURE_H */ 56 | -------------------------------------------------------------------------------- /hack/postgres/extension/sql/00_setup.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION pgcapture; 2 | -------------------------------------------------------------------------------- /hack/postgres/extension/sql/01_basic.sql: -------------------------------------------------------------------------------- 1 | -- Test the DDL SRF 2 | SELECT * FROM pgcapture.current_query(); 3 | 4 | -- Test some basic DDL commands 5 | CREATE TABLE ctas AS SELECT 1 as id; 6 | CREATE TABLE ct(id integer); 7 | 8 | CREATE SCHEMA nsp1; 9 | CREATE TABLE nsp1.tbl(id integer, val text); 10 | CREATE INDEX ON nsp1.tbl (val) WHERE id % 2 = 0; 11 | 12 | -- DDL command that doesn't contain a trailing semi-column 13 | CREATE SCHEMA nsp2\g 14 | 15 | -- Check the results 16 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 17 | -------------------------------------------------------------------------------- /hack/postgres/extension/sql/02_nested_ddl.sql: -------------------------------------------------------------------------------- 1 | LOAD 'pgcapture'; 2 | TRUNCATE pgcapture.ddl_logs; 3 | 4 | DO $$ 5 | BEGIN 6 | CREATE TABLE tbl1(); 7 | CREATE TABLE tbl2(); 8 | END; 9 | $$ LANGUAGE plpgsql; 10 | 11 | CREATE EXTENSION pg_stat_statements; 12 | 13 | -- Check the results 14 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 15 | -------------------------------------------------------------------------------- /hack/postgres/extension/sql/03_multi_query.sql: -------------------------------------------------------------------------------- 1 | LOAD 'pgcapture'; 2 | TRUNCATE pgcapture.ddl_logs; 3 | 4 | -- test multi-query statements mixing DDL / DML 5 | \o /dev/null 6 | -- older version of psql don't emit all the resultsets in multi-query 7 | SELECT 1\; CREATE TABLE multi(id integer)\; INSERT INTO multi SELECT 1; 8 | \o 9 | 10 | -- test multi-query statements mixing DDL 11 | DROP TABLE multi\; CREATE TABLE multi(val text); 12 | 13 | -- Check the results 14 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 15 | -------------------------------------------------------------------------------- /hack/postgres/extension/sql/04_temp.sql: -------------------------------------------------------------------------------- 1 | LOAD 'pgcapture'; 2 | 3 | -- Create a normal table that will later be dropped 4 | CREATE TABLE dropme(); 5 | CREATE TABLE dropmetoo(); 6 | 7 | TRUNCATE pgcapture.ddl_logs; 8 | 9 | -- Plain TEMP table should be ignored 10 | CREATE TEMPORARY TABLE tmp1(); 11 | ALTER TABLE tmp1 ADD COLUMN id integer; 12 | 13 | -- Table created in the pg_temp schema should be ignored 14 | CREATE TABLE pg_temp.tmp2(); 15 | ALTER TABLE pg_temp.tmp2 ADD COLUMN id integer; 16 | 17 | -- Table created in the pg_temp schema using the search_path should alse be 18 | -- ignored 19 | SET search_path TO not_a_schema, pg_temp, public; 20 | CREATE TABLE tmp3(); 21 | ALTER TABLE tmp3 ADD COLUMN id integer; 22 | RESET search_path; 23 | 24 | -- CTAS / SELECT INTO for temp tables should be ignored 25 | CREATE TABLE pg_temp.tmp4 AS SELECT 1 AS id; 26 | 27 | -- CREATE TEMP VIEW should be ignored 28 | CREATE TEMP VIEW v1 AS SELECT 1 AS id; 29 | 30 | -- Implicitly temp view creation should be ignored 31 | CREATE VIEW v2 AS SELECT * FROM tmp3; 32 | 33 | -- ALTER ... RENAME should ignore temp relations 34 | ALTER TABLE tmp4 RENAME COLUMN id TO id2; 35 | ALTER TABLE tmp4 RENAME TO tmp4b; 36 | 37 | -- Check the results 38 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 39 | 40 | -- Dropping only temp tables should be ignored 41 | DROP TABLE tmp1, tmp2; 42 | 43 | -- But dropping a mix of temp and regular table should preserve the regular 44 | -- table names 45 | DROP TABLE tmp3, pg_temp.tmp3, dropme, tmp4b, dropmetoo CASCADE; 46 | 47 | -- Check the results 48 | SELECT query, unnest(tags) FROM pgcapture.ddl_logs ORDER BY id; 49 | -------------------------------------------------------------------------------- /hack/postgres/pg_hba.conf: -------------------------------------------------------------------------------- 1 | # PostgreSQL Client Authentication Configuration File 2 | # =================================================== 3 | # 4 | # Refer to the "Client Authentication" section in the PostgreSQL 5 | # documentation for a complete description of this file. A short 6 | # synopsis follows. 7 | # 8 | # This file controls: which hosts are allowed to connect, how clients 9 | # are authenticated, which PostgreSQL user names they can use, which 10 | # databases they can access. Records take one of these forms: 11 | # 12 | # local DATABASE USER METHOD [OPTIONS] 13 | # host DATABASE USER ADDRESS METHOD [OPTIONS] 14 | # hostssl DATABASE USER ADDRESS METHOD [OPTIONS] 15 | # hostnossl DATABASE USER ADDRESS METHOD [OPTIONS] 16 | # 17 | # (The uppercase items must be replaced by actual values.) 18 | # 19 | # The first field is the connection type: "local" is a Unix-domain 20 | # socket, "host" is either a plain or SSL-encrypted TCP/IP socket, 21 | # "hostssl" is an SSL-encrypted TCP/IP socket, and "hostnossl" is a 22 | # plain TCP/IP socket. 23 | # 24 | # DATABASE can be "all", "sameuser", "samerole", "replication", a 25 | # database name, or a comma-separated list thereof. The "all" 26 | # keyword does not match "replication". Access to replication 27 | # must be enabled in a separate record (see example below). 28 | # 29 | # USER can be "all", a user name, a group name prefixed with "+", or a 30 | # comma-separated list thereof. In both the DATABASE and USER fields 31 | # you can also write a file name prefixed with "@" to include names 32 | # from a separate file. 33 | # 34 | # ADDRESS specifies the set of hosts the record matches. It can be a 35 | # host name, or it is made up of an IP address and a CIDR mask that is 36 | # an integer (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that 37 | # specifies the number of significant bits in the mask. A host name 38 | # that starts with a dot (.) matches a suffix of the actual host name. 39 | # Alternatively, you can write an IP address and netmask in separate 40 | # columns to specify the set of hosts. Instead of a CIDR-address, you 41 | # can write "samehost" to match any of the server's own IP addresses, 42 | # or "samenet" to match any address in any subnet that the server is 43 | # directly connected to. 44 | # 45 | # METHOD can be "trust", "reject", "md5", "password", "gss", "sspi", 46 | # "krb5", "ident", "peer", "pam", "ldap", "radius" or "cert". Note that 47 | # "password" sends passwords in clear text; "md5" is preferred since 48 | # it sends encrypted passwords. 49 | # 50 | # OPTIONS are a set of options for the authentication in the format 51 | # NAME=VALUE. The available options depend on the different 52 | # authentication methods -- refer to the "Client Authentication" 53 | # section in the documentation for a list of which options are 54 | # available for which authentication methods. 55 | # 56 | # Database and user names containing spaces, commas, quotes and other 57 | # special characters must be quoted. Quoting one of the keywords 58 | # "all", "sameuser", "samerole" or "replication" makes the name lose 59 | # its special character, and just match a database or username with 60 | # that name. 61 | # 62 | # This file is read on server startup and when the postmaster receives 63 | # a SIGHUP signal. If you edit the file on a running system, you have 64 | # to SIGHUP the postmaster for the changes to take effect. You can 65 | # use "pg_ctl reload" to do that. 66 | 67 | # Put your actual configuration here 68 | # ---------------------------------- 69 | # 70 | # If you want to allow non-local connections, you need to add more 71 | # "host" records. In that case you will also need to make PostgreSQL 72 | # listen on a non-local interface via the listen_addresses 73 | # configuration parameter, or via the -i or -h command line switches. 74 | 75 | 76 | 77 | 78 | # DO NOT DISABLE! 79 | # If you change this first entry you will need to make sure that the 80 | # database superuser can access the database using some other method. 81 | # Noninteractive access to all databases is required during automatic 82 | # maintenance (custom daily cronjobs, replication, and similar tasks). 83 | # 84 | # Database administrative login by Unix domain socket 85 | local all postgres peer 86 | 87 | # TYPE DATABASE USER ADDRESS METHOD 88 | 89 | # "local" is for Unix domain socket connections only 90 | local all all peer 91 | # IPv4 local connections: 92 | host all all 127.0.0.1/32 md5 93 | # IPv6 local connections: 94 | host all all ::1/128 md5 95 | # Allow replication connections from localhost, by a user with the 96 | # replication privilege. 97 | #local replication postgres peer 98 | #host replication postgres 127.0.0.1/32 md5 99 | #host replication postgres ::1/128 md5 100 | host replication postgres all trust 101 | host all postgres all trust -------------------------------------------------------------------------------- /internal/cursormock/main.go: -------------------------------------------------------------------------------- 1 | package cursormock 2 | 3 | //go:generate mockgen -destination=mock.go -package=$GOPACKAGE -source=../../pkg/cursor/tracker.go Tracker 4 | -------------------------------------------------------------------------------- /internal/cursormock/mock.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: ../../pkg/cursor/tracker.go 3 | 4 | // Package cursormock is a generated GoMock package. 5 | package cursormock 6 | 7 | import ( 8 | reflect "reflect" 9 | 10 | pulsar "github.com/apache/pulsar-client-go/pulsar" 11 | gomock "github.com/golang/mock/gomock" 12 | cursor "github.com/replicase/pgcapture/pkg/cursor" 13 | ) 14 | 15 | // MockTracker is a mock of Tracker interface. 16 | type MockTracker struct { 17 | ctrl *gomock.Controller 18 | recorder *MockTrackerMockRecorder 19 | } 20 | 21 | // MockTrackerMockRecorder is the mock recorder for MockTracker. 22 | type MockTrackerMockRecorder struct { 23 | mock *MockTracker 24 | } 25 | 26 | // NewMockTracker creates a new mock instance. 27 | func NewMockTracker(ctrl *gomock.Controller) *MockTracker { 28 | mock := &MockTracker{ctrl: ctrl} 29 | mock.recorder = &MockTrackerMockRecorder{mock} 30 | return mock 31 | } 32 | 33 | // EXPECT returns an object that allows the caller to indicate expected use. 34 | func (m *MockTracker) EXPECT() *MockTrackerMockRecorder { 35 | return m.recorder 36 | } 37 | 38 | // Close mocks base method. 39 | func (m *MockTracker) Close() { 40 | m.ctrl.T.Helper() 41 | m.ctrl.Call(m, "Close") 42 | } 43 | 44 | // Close indicates an expected call of Close. 45 | func (mr *MockTrackerMockRecorder) Close() *gomock.Call { 46 | mr.mock.ctrl.T.Helper() 47 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockTracker)(nil).Close)) 48 | } 49 | 50 | // Commit mocks base method. 51 | func (m *MockTracker) Commit(cp cursor.Checkpoint, mid pulsar.MessageID) error { 52 | m.ctrl.T.Helper() 53 | ret := m.ctrl.Call(m, "Commit", cp, mid) 54 | ret0, _ := ret[0].(error) 55 | return ret0 56 | } 57 | 58 | // Commit indicates an expected call of Commit. 59 | func (mr *MockTrackerMockRecorder) Commit(cp, mid interface{}) *gomock.Call { 60 | mr.mock.ctrl.T.Helper() 61 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Commit", reflect.TypeOf((*MockTracker)(nil).Commit), cp, mid) 62 | } 63 | 64 | // Last mocks base method. 65 | func (m *MockTracker) Last() (cursor.Checkpoint, error) { 66 | m.ctrl.T.Helper() 67 | ret := m.ctrl.Call(m, "Last") 68 | ret0, _ := ret[0].(cursor.Checkpoint) 69 | ret1, _ := ret[1].(error) 70 | return ret0, ret1 71 | } 72 | 73 | // Last indicates an expected call of Last. 74 | func (mr *MockTrackerMockRecorder) Last() *gomock.Call { 75 | mr.mock.ctrl.T.Helper() 76 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Last", reflect.TypeOf((*MockTracker)(nil).Last)) 77 | } 78 | 79 | // Start mocks base method. 80 | func (m *MockTracker) Start() { 81 | m.ctrl.T.Helper() 82 | m.ctrl.Call(m, "Start") 83 | } 84 | 85 | // Start indicates an expected call of Start. 86 | func (mr *MockTrackerMockRecorder) Start() *gomock.Call { 87 | mr.mock.ctrl.T.Helper() 88 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockTracker)(nil).Start)) 89 | } 90 | -------------------------------------------------------------------------------- /internal/test/env.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import "os" 4 | 5 | func Env(key, fallback string) string { 6 | if v, ok := os.LookupEnv(key); ok { 7 | return v 8 | } 9 | return fallback 10 | } 11 | -------------------------------------------------------------------------------- /internal/test/pg.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "strconv" 9 | "testing" 10 | 11 | "github.com/jackc/pgconn" 12 | "github.com/jackc/pgx/v4" 13 | ) 14 | 15 | type DBURL struct { 16 | Host string 17 | DB string 18 | } 19 | 20 | func (u DBURL) URL() string { 21 | return fmt.Sprintf("postgres://postgres@%s/%s?sslmode=disable", u.Host, u.DB) 22 | } 23 | 24 | func (u DBURL) Repl() string { 25 | return fmt.Sprintf("postgres://postgres@%s/%s?replication=database", u.Host, u.DB) 26 | } 27 | 28 | func (u DBURL) Exec(stmts ...string) error { 29 | ctx := context.Background() 30 | conn, err := pgx.Connect(ctx, u.URL()) 31 | if err != nil { 32 | return err 33 | } 34 | defer conn.Close(ctx) 35 | 36 | for _, stmt := range stmts { 37 | if _, err = conn.Exec(ctx, stmt); err != nil { 38 | return err 39 | } 40 | } 41 | return nil 42 | } 43 | 44 | func (u DBURL) RandomData(table string) error { 45 | return u.Exec( 46 | fmt.Sprintf("create table if not exists %s (id serial primary key, v int)", table), 47 | fmt.Sprintf("insert into %s (v) select * from generate_series(1,100) as v", table), 48 | fmt.Sprintf("analyze %s", table), 49 | ) 50 | } 51 | 52 | func (u DBURL) CleanData(table string) error { 53 | return u.Exec(fmt.Sprintf("delete from %s", table)) 54 | } 55 | 56 | func (u DBURL) TablePages(table string) (pages int, err error) { 57 | ctx := context.Background() 58 | conn, err := pgx.Connect(ctx, u.URL()) 59 | if err != nil { 60 | return 0, err 61 | } 62 | defer conn.Close(ctx) 63 | 64 | if err := u.Exec("analyze " + table); err != nil { 65 | return 0, err 66 | } 67 | 68 | if err = conn.QueryRow(ctx, fmt.Sprintf("select relpages from pg_class where relname = '%s'", table)).Scan(&pages); err != nil { 69 | return 0, err 70 | } 71 | return pages, nil 72 | } 73 | 74 | func CreateDB(u DBURL, n DBURL) (DBURL, error) { 75 | if err := u.Exec("create database " + n.DB); err != nil { 76 | var pge *pgconn.PgError 77 | if !errors.As(err, &pge) || pge.Code != "42P04" { 78 | return DBURL{}, err 79 | } 80 | } 81 | return n, nil 82 | } 83 | 84 | func ShouldSkipTestByPGVersion(t *testing.T, minimumVersion float64) { 85 | if v := os.Getenv("PG_VERSION"); v != "" && minimumVersion != 0 { 86 | version, _ := strconv.ParseFloat(v, 64) 87 | if version < minimumVersion { 88 | t.Skipf("Skip test for PG_VERSION %.1f < %.1f", version, minimumVersion) 89 | } 90 | } 91 | } 92 | 93 | func GetPostgresURL() string { 94 | return Env("POSTGRES_URL", "postgres://postgres@127.0.0.1:5432/postgres?sslmode=disable") 95 | } 96 | 97 | func GetPostgresReplURL() string { 98 | return fmt.Sprintf("%s&replication=database", GetPostgresURL()) 99 | } 100 | -------------------------------------------------------------------------------- /internal/test/pulsar.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | func GetPulsarURL() string { 4 | return Env("PULSAR_URL", "pulsar://127.0.0.1:6650") 5 | } 6 | 7 | func GetPulsarAdminURL() string { 8 | return Env("PULSAR_ADMIN_URL", "http://127.0.0.1:8080") 9 | } 10 | -------------------------------------------------------------------------------- /internal/tool/main.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | package tool 5 | 6 | import ( 7 | _ "github.com/golang/mock/mockgen" 8 | _ "google.golang.org/grpc/cmd/protoc-gen-go-grpc" 9 | ) 10 | -------------------------------------------------------------------------------- /pb/pgcapture.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package pgcapture; 4 | option go_package = "github.com/replicase/pgcapture/pkg/pb"; 5 | 6 | import "google/protobuf/struct.proto"; 7 | import "google/protobuf/duration.proto"; 8 | 9 | message Checkpoint { 10 | uint64 lsn = 1; 11 | uint32 seq = 2; 12 | bytes data = 3; 13 | } 14 | 15 | message Message { 16 | oneof type { 17 | Begin begin = 1; 18 | Commit commit = 2; 19 | Change change = 3; 20 | KeepAlive keep_alive = 4; 21 | } 22 | } 23 | 24 | message Begin { 25 | uint64 final_lsn = 1; 26 | uint64 commit_time = 2; 27 | uint32 remote_xid = 3; 28 | } 29 | 30 | message Commit { 31 | uint64 commit_lsn = 1; 32 | uint64 end_lsn = 2; 33 | uint64 commit_time = 3; 34 | } 35 | 36 | message KeepAlive { 37 | 38 | } 39 | 40 | message Change { 41 | enum Operation { 42 | INSERT = 0; 43 | UPDATE = 1; 44 | DELETE = 2; 45 | } 46 | Operation op = 1; 47 | string schema = 2; 48 | string table = 3; 49 | repeated Field new = 4; 50 | repeated Field old = 5; 51 | } 52 | 53 | message Field { 54 | string name = 1; 55 | uint32 oid = 2; 56 | oneof value { 57 | bytes binary = 3; 58 | string text = 4; 59 | } 60 | } 61 | 62 | service DBLogGateway { 63 | rpc Capture(stream CaptureRequest) returns (stream CaptureMessage); 64 | } 65 | 66 | service DBLogController { 67 | rpc PullDumpInfo(stream DumpInfoRequest) returns (stream DumpInfoResponse); 68 | rpc Schedule(ScheduleRequest) returns (ScheduleResponse); 69 | rpc StopSchedule(StopScheduleRequest) returns (StopScheduleResponse); 70 | rpc SetScheduleCoolDown(SetScheduleCoolDownRequest) returns (SetScheduleCoolDownResponse); 71 | } 72 | 73 | message CaptureRequest { 74 | oneof type { 75 | CaptureInit init = 1; 76 | CaptureAck ack = 2; 77 | } 78 | } 79 | 80 | message CaptureInit { 81 | string uri = 1; 82 | google.protobuf.Struct parameters = 2; 83 | } 84 | 85 | message CaptureAck { 86 | Checkpoint checkpoint = 1; 87 | string requeue_reason = 2; 88 | } 89 | 90 | message CaptureMessage { 91 | Checkpoint checkpoint = 1; 92 | Change change = 2; 93 | } 94 | 95 | message DumpInfoRequest { 96 | string uri = 1; 97 | string requeue_reason = 2; 98 | } 99 | 100 | message DumpInfoResponse { 101 | string schema = 1; 102 | string table = 2; 103 | uint32 page_begin = 3; 104 | uint32 page_end = 4; 105 | } 106 | 107 | message ScheduleRequest { 108 | string uri = 1; 109 | repeated DumpInfoResponse dumps = 2; 110 | } 111 | 112 | message ScheduleResponse { 113 | 114 | } 115 | 116 | message StopScheduleRequest { 117 | string uri = 1; 118 | } 119 | 120 | message StopScheduleResponse { 121 | 122 | } 123 | 124 | message SetScheduleCoolDownRequest { 125 | string uri = 1; 126 | google.protobuf.Duration duration = 2; 127 | } 128 | 129 | message SetScheduleCoolDownResponse { 130 | 131 | } 132 | 133 | service Agent { 134 | rpc Configure(AgentConfigRequest) returns (AgentConfigResponse) {} 135 | rpc Dump(AgentDumpRequest) returns (AgentDumpResponse) {} 136 | rpc StreamDump(AgentDumpRequest) returns (stream Change) {} 137 | } 138 | 139 | message AgentDumpRequest { 140 | uint64 min_lsn = 1; 141 | DumpInfoResponse info = 2; 142 | } 143 | 144 | message AgentDumpResponse { 145 | repeated Change change = 1; 146 | } 147 | 148 | message AgentConfigRequest { 149 | google.protobuf.Struct parameters = 1; 150 | } 151 | message AgentConfigResponse { 152 | google.protobuf.Struct report = 1; 153 | } -------------------------------------------------------------------------------- /pgcapture.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/replicase/pgcapture/pkg/dblog" 7 | "github.com/replicase/pgcapture/pkg/pb" 8 | "github.com/replicase/pgcapture/pkg/pgcapture" 9 | "github.com/replicase/pgcapture/pkg/source" 10 | "google.golang.org/grpc" 11 | ) 12 | 13 | var ( 14 | CommitSHA string 15 | Version string 16 | ) 17 | 18 | type ( 19 | Model = pgcapture.Model 20 | Change = pgcapture.Change 21 | ModelHandlerFunc = pgcapture.ModelHandlerFunc 22 | ConsumerOption = pgcapture.ConsumerOption 23 | SourceResolver = dblog.SourceResolver 24 | SourceDumper = dblog.SourceDumper 25 | RequeueSource = source.RequeueSource 26 | ) 27 | 28 | func NewDBLogConsumer(ctx context.Context, conn *grpc.ClientConn, option ConsumerOption) *pgcapture.Consumer { 29 | return pgcapture.NewDBLogConsumer(ctx, conn, option) 30 | } 31 | 32 | func NewDBLogGateway(conn *grpc.ClientConn, sourceResolver SourceResolver) *dblog.Gateway { 33 | return &dblog.Gateway{ 34 | SourceResolver: sourceResolver, 35 | DumpInfoPuller: &dblog.GRPCDumpInfoPuller{Client: pb.NewDBLogControllerClient(conn)}, 36 | } 37 | } 38 | 39 | func NewDBLogControllerClient(conn *grpc.ClientConn) pb.DBLogControllerClient { 40 | return pb.NewDBLogControllerClient(conn) 41 | } 42 | 43 | func MarshalJSON(m Model) ([]byte, error) { 44 | return pgcapture.MarshalJSON(m) 45 | } 46 | -------------------------------------------------------------------------------- /pkg/cursor/main.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import ( 4 | "errors" 5 | "strconv" 6 | "strings" 7 | "time" 8 | 9 | "github.com/apache/pulsar-client-go/pulsar" 10 | "github.com/jackc/pglogrepl" 11 | ) 12 | 13 | type Checkpoint struct { 14 | LSN uint64 15 | Seq uint32 16 | Data []byte 17 | ServerTime time.Time 18 | } 19 | 20 | func (cp *Checkpoint) Equal(cp2 Checkpoint) bool { 21 | return cp.LSN == cp2.LSN && cp.Seq == cp2.Seq 22 | } 23 | 24 | func (cp *Checkpoint) After(cp2 Checkpoint) bool { 25 | return (cp.LSN > cp2.LSN) || (cp.LSN == cp2.LSN && cp.Seq > cp2.Seq) 26 | } 27 | 28 | func (cp *Checkpoint) ToKey() string { 29 | return pglogrepl.LSN(cp.LSN).String() + "|" + strconv.FormatUint(uint64(cp.Seq), 16) 30 | } 31 | 32 | func (cp *Checkpoint) FromKey(str string) error { 33 | parts := strings.Split(str, "|") 34 | if len(parts) != 2 { 35 | return errors.New("malformed key, should be lsn|seq") 36 | } 37 | lsn, err := pglogrepl.ParseLSN(parts[0]) 38 | if err != nil { 39 | return err 40 | } 41 | seq, err := strconv.ParseUint(parts[1], 16, 32) 42 | if err != nil { 43 | return err 44 | } 45 | cp.LSN = uint64(lsn) 46 | cp.Seq = uint32(seq) 47 | return nil 48 | } 49 | 50 | func ToCheckpoint(msg pulsar.Message) (cp Checkpoint, err error) { 51 | if err = cp.FromKey(msg.Key()); err != nil { 52 | return 53 | } 54 | cp.Data = msg.ID().Serialize() 55 | return 56 | } 57 | 58 | // Since only the reader can guarantee not to create the partitioned topic(s), 59 | // we use the reader creation to ensure the existence of the specified topic 60 | func ensureTopic(client pulsar.Client, topic string) error { 61 | reader, err := client.CreateReader(pulsar.ReaderOptions{ 62 | Name: topic + "-temp-reader", 63 | Topic: topic, 64 | StartMessageID: pulsar.LatestMessageID(), 65 | }) 66 | if err != nil { 67 | return err 68 | } 69 | reader.Close() 70 | return nil 71 | } 72 | -------------------------------------------------------------------------------- /pkg/cursor/main_test.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "strconv" 10 | "strings" 11 | 12 | "github.com/golang/protobuf/proto" 13 | "github.com/replicase/pgcapture/internal/test" 14 | pulsaradmin "github.com/streamnative/pulsar-admin-go" 15 | "github.com/streamnative/pulsar-admin-go/pkg/utils" 16 | ) 17 | 18 | func NewAdminClient() (pulsaradmin.Client, error) { 19 | return pulsaradmin.NewClient(&pulsaradmin.Config{WebServiceURL: test.GetPulsarAdminURL()}) 20 | } 21 | 22 | func nextMessageID(cursor utils.CursorStats) string { 23 | comps := strings.Split(cursor.MarkDeletePosition, ":") 24 | e, _ := strconv.ParseInt(comps[1], 10, 64) 25 | return comps[0] + ":" + strconv.FormatInt(e+1, 10) 26 | } 27 | 28 | func CheckSubscriptionCursor(client pulsaradmin.Client, topicName string, subscriptionName string) (string, error) { 29 | topic, err := utils.GetTopicName("public/default/" + topicName) 30 | if err != nil { 31 | return "", err 32 | } 33 | 34 | stats, err := client.Topics().GetInternalStats(*topic) 35 | if err != nil { 36 | return "", err 37 | } 38 | 39 | for sub, c := range stats.Cursors { 40 | if sub == subscriptionName { 41 | return nextMessageID(c), nil 42 | } 43 | } 44 | return "", errors.New("subscription not found") 45 | } 46 | 47 | func GetCheckpointByMessageID(topicName string, messageID string) (cp Checkpoint, err error) { 48 | mid, err := utils.ParseMessageID(messageID) 49 | if err != nil { 50 | return cp, err 51 | } 52 | 53 | resp, err := http.Get( 54 | fmt.Sprintf("%s/admin/v2/persistent/public/default/", test.GetPulsarAdminURL()) + 55 | topicName + "/ledger/" + 56 | strconv.FormatInt(mid.LedgerID, 10) + 57 | "/entry/" + 58 | strconv.FormatInt(mid.EntryID, 10)) 59 | if err != nil { 60 | return cp, err 61 | } 62 | defer io.Copy(io.Discard, resp.Body) 63 | 64 | if resp.StatusCode != http.StatusOK { 65 | b, _ := io.ReadAll(resp.Body) 66 | return cp, errors.New(fmt.Sprintf("error response:\n %s", string(b))) 67 | } 68 | 69 | buf32 := make([]byte, 4) 70 | if _, err := io.ReadFull(resp.Body, buf32); err != nil { 71 | return cp, err 72 | } 73 | 74 | metaSize := binary.BigEndian.Uint32(buf32) 75 | metaBuf := make([]byte, metaSize) 76 | if _, err := io.ReadFull(resp.Body, metaBuf); err != nil { 77 | return cp, err 78 | } 79 | 80 | meta := new(utils.SingleMessageMetadata) 81 | if err := proto.Unmarshal(metaBuf, meta); err != nil { 82 | return cp, err 83 | } 84 | 85 | msgKey := *meta.PartitionKey 86 | err = cp.FromKey(msgKey) 87 | return cp, err 88 | } 89 | -------------------------------------------------------------------------------- /pkg/cursor/pulsar.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "time" 7 | 8 | "github.com/apache/pulsar-client-go/pulsar" 9 | ) 10 | 11 | var _ Tracker = (*PulsarTracker)(nil) 12 | 13 | type PulsarTracker struct { 14 | reader pulsar.Reader 15 | } 16 | 17 | func NewPulsarTracker(client pulsar.Client, topic string) (*PulsarTracker, error) { 18 | reader, err := client.CreateReader(pulsar.ReaderOptions{ 19 | Topic: topic, 20 | Name: topic + "-producer", 21 | StartMessageID: pulsar.LatestMessageID(), 22 | StartMessageIDInclusive: true, 23 | }) 24 | if err != nil { 25 | return nil, err 26 | } 27 | return &PulsarTracker{reader: reader}, nil 28 | } 29 | 30 | func (p *PulsarTracker) Last() (last Checkpoint, err error) { 31 | defer p.reader.Close() 32 | 33 | var ( 34 | msg pulsar.Message 35 | ) 36 | for { 37 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 38 | msg, err = p.reader.Next(ctx) 39 | cancel() 40 | if errors.Is(err, context.DeadlineExceeded) && !p.reader.HasNext() { 41 | err = nil 42 | return 43 | } 44 | if err != nil { 45 | return 46 | } 47 | if last, err = ToCheckpoint(msg); err != nil { 48 | return 49 | } 50 | } 51 | } 52 | 53 | func (p *PulsarTracker) Start() { 54 | // Do Nothing 55 | } 56 | 57 | func (p *PulsarTracker) Commit(_ Checkpoint, _ pulsar.MessageID) error { 58 | return nil 59 | } 60 | 61 | func (p *PulsarTracker) Close() { 62 | // Do Nothing 63 | } 64 | -------------------------------------------------------------------------------- /pkg/cursor/pulsar_sub.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "sync" 7 | "time" 8 | 9 | "github.com/apache/pulsar-client-go/pulsar" 10 | ) 11 | 12 | var _ Tracker = (*PulsarSubscriptionTracker)(nil) 13 | 14 | func (p *PulsarSubscriptionTracker) copyCursor() pulsar.MessageID { 15 | p.lock.RLock() 16 | defer p.lock.RUnlock() 17 | 18 | if p.cursor == nil { 19 | return nil 20 | } 21 | rst, _ := pulsar.DeserializeMessageID(p.cursor.Serialize()) 22 | return rst 23 | } 24 | 25 | func equalMessageID(a pulsar.MessageID, b pulsar.MessageID) bool { 26 | return a.LedgerID() == b.LedgerID() && 27 | a.EntryID() == b.EntryID() && 28 | a.PartitionIdx() == b.PartitionIdx() && 29 | a.BatchIdx() == b.BatchIdx() 30 | } 31 | 32 | func (p *PulsarSubscriptionTracker) tryAck() { 33 | current := p.copyCursor() 34 | if current == nil { 35 | return 36 | } 37 | if p.acked == nil || !equalMessageID(current, p.acked) { 38 | if err := p.consumer.AckIDCumulative(current); err != nil { 39 | return 40 | } 41 | p.acked = current 42 | } 43 | } 44 | 45 | func (p *PulsarSubscriptionTracker) drainMessages(ctx context.Context, messages <-chan pulsar.ConsumerMessage) { 46 | for { 47 | select { 48 | case <-ctx.Done(): 49 | p.stopDrain <- struct{}{} 50 | return 51 | case <-messages: 52 | // Do nothing 53 | continue 54 | } 55 | } 56 | } 57 | 58 | func (p *PulsarSubscriptionTracker) waitCommit(ctx context.Context, interval time.Duration) { 59 | ticker := time.NewTicker(interval) 60 | defer ticker.Stop() 61 | 62 | for { 63 | select { 64 | case <-ticker.C: 65 | p.tryAck() 66 | case <-ctx.Done(): 67 | p.stopCommit <- struct{}{} 68 | return 69 | } 70 | } 71 | } 72 | 73 | func NewPulsarSubscriptionTracker(client pulsar.Client, topic string, commitInterval time.Duration, replicateState bool) (*PulsarSubscriptionTracker, error) { 74 | if err := ensureTopic(client, topic); err != nil { 75 | return nil, err 76 | } 77 | 78 | consumer, err := client.Subscribe(pulsar.ConsumerOptions{ 79 | Name: "pulsar-subscription-tracker", 80 | Topic: topic, 81 | SubscriptionName: "pulsar-subscription-tracker-consumer", 82 | Type: pulsar.Exclusive, 83 | ReplicateSubscriptionState: replicateState, 84 | }) 85 | 86 | if err != nil { 87 | return nil, err 88 | } 89 | 90 | if commitInterval == 0 { 91 | commitInterval = time.Minute 92 | } 93 | 94 | tracker := &PulsarSubscriptionTracker{ 95 | consumer: consumer, 96 | commitInterval: commitInterval, 97 | lock: sync.RWMutex{}, 98 | } 99 | return tracker, nil 100 | } 101 | 102 | type PulsarSubscriptionTracker struct { 103 | consumer pulsar.Consumer 104 | lock sync.RWMutex 105 | cursor pulsar.MessageID 106 | commitCancel context.CancelFunc 107 | commitInterval time.Duration 108 | stopCommit chan struct{} 109 | stopDrain chan struct{} 110 | acked pulsar.MessageID 111 | } 112 | 113 | func (p *PulsarSubscriptionTracker) read(ctx context.Context) (cp Checkpoint, err error) { 114 | ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 115 | defer cancel() 116 | 117 | msg, err := p.consumer.Receive(ctx) 118 | if err != nil { 119 | return cp, err 120 | } 121 | return ToCheckpoint(msg) 122 | } 123 | 124 | func (p *PulsarSubscriptionTracker) Last() (Checkpoint, error) { 125 | var last Checkpoint 126 | if p.consumer != nil { 127 | for { 128 | cp, err := p.read(context.Background()) 129 | if err != nil { 130 | // most likely that there is no message in the topic 131 | if errors.Is(err, context.DeadlineExceeded) { 132 | return last, nil 133 | } 134 | return Checkpoint{}, err 135 | } 136 | last = cp 137 | } 138 | } 139 | return Checkpoint{}, nil 140 | } 141 | 142 | func (p *PulsarSubscriptionTracker) Start() { 143 | ctx, cancel := context.WithCancel(context.Background()) 144 | p.commitCancel = cancel 145 | p.stopCommit = make(chan struct{}) 146 | p.stopDrain = make(chan struct{}) 147 | 148 | // To bypass the consumer flow controls, we must drain the message in the queue 149 | go p.drainMessages(ctx, p.consumer.Chan()) 150 | go p.waitCommit(ctx, p.commitInterval) 151 | } 152 | 153 | func (p *PulsarSubscriptionTracker) Commit(_ Checkpoint, mid pulsar.MessageID) error { 154 | p.lock.Lock() 155 | defer p.lock.Unlock() 156 | p.cursor = mid 157 | return nil 158 | } 159 | 160 | func (p *PulsarSubscriptionTracker) Close() { 161 | if p.consumer != nil { 162 | if p.commitCancel != nil { 163 | p.commitCancel() 164 | <-p.stopDrain 165 | <-p.stopCommit 166 | } 167 | p.consumer.Close() 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /pkg/cursor/pulsar_sub_test.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | "testing" 7 | "time" 8 | 9 | "github.com/apache/pulsar-client-go/pulsar" 10 | "github.com/replicase/pgcapture/internal/test" 11 | ) 12 | 13 | func newPulsarSubscriptionTracker(topic string) (*PulsarSubscriptionTracker, func(), error) { 14 | client, err := pulsar.NewClient(pulsar.ClientOptions{ 15 | URL: test.GetPulsarURL(), 16 | }) 17 | if err != nil { 18 | return nil, nil, err 19 | } 20 | 21 | tracker, err := NewPulsarSubscriptionTracker(client, topic, 100*time.Millisecond, false) 22 | if err != nil { 23 | client.Close() 24 | return nil, nil, err 25 | } 26 | 27 | closeFunc := func() { 28 | tracker.Close() 29 | client.Close() 30 | } 31 | return tracker, closeFunc, nil 32 | } 33 | 34 | func TestPulsarSubscriptionTracker_Commit(t *testing.T) { 35 | topic := time.Now().Format("20060102150405") + "-commit" 36 | 37 | tracker, cancel, err := newPulsarSubscriptionTracker(topic) 38 | if err != nil { 39 | t.Fatal(err) 40 | } 41 | defer cancel() 42 | 43 | // so it will start the commit loop 44 | tracker.Start() 45 | 46 | client, err := pulsar.NewClient(pulsar.ClientOptions{ 47 | URL: test.GetPulsarURL(), 48 | }) 49 | if err != nil { 50 | t.Fatal(err) 51 | } 52 | defer client.Close() 53 | 54 | producer, err := client.CreateProducer(pulsar.ProducerOptions{ 55 | Topic: topic, 56 | Name: topic + "-producer", 57 | }) 58 | if err != nil { 59 | t.Fatal(err) 60 | } 61 | defer func() { 62 | defer producer.Close() 63 | _ = producer.Flush() 64 | }() 65 | 66 | var pos pulsar.MessageID 67 | 68 | for i := 0; i < 10; i++ { 69 | cp := Checkpoint{LSN: uint64(i + 100)} 70 | mid, err := producer.Send(context.Background(), &pulsar.ProducerMessage{ 71 | Key: cp.ToKey(), 72 | Payload: []byte("test-" + strconv.Itoa(i)), 73 | }) 74 | if err != nil { 75 | t.Fatal(err) 76 | } 77 | 78 | // set the position to the 5th message 79 | if i == 4 { 80 | pos = mid 81 | } 82 | } 83 | 84 | if err := tracker.Commit(Checkpoint{}, pos); err != nil { 85 | t.Fatal(err) 86 | } 87 | 88 | time.Sleep(time.Second) 89 | 90 | admin, err := NewAdminClient() 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | 95 | cursor, err := CheckSubscriptionCursor(admin, topic, "pulsar-subscription-tracker-consumer") 96 | if err != nil { 97 | t.Fatal(err) 98 | } 99 | 100 | cp, err := GetCheckpointByMessageID(topic, cursor) 101 | if err != nil { 102 | t.Fatal(err) 103 | } 104 | 105 | // the next message should be the 6th message 106 | if cp.LSN != 105 { 107 | t.Fatalf("unexpected next position: %v", cp.LSN) 108 | } 109 | } 110 | 111 | func TestPulsarSubscriptionTracker_Last(t *testing.T) { 112 | topic := time.Now().Format("20060102150405") 113 | 114 | tracker, cancel, err := newPulsarSubscriptionTracker(topic) 115 | if err != nil { 116 | t.Fatal(err) 117 | } 118 | defer cancel() 119 | 120 | client, err := pulsar.NewClient(pulsar.ClientOptions{ 121 | URL: test.GetPulsarURL(), 122 | }) 123 | if err != nil { 124 | t.Fatal(err) 125 | } 126 | defer client.Close() 127 | 128 | producer, err := client.CreateProducer(pulsar.ProducerOptions{ 129 | Topic: topic, 130 | Name: topic + "-producer", 131 | }) 132 | if err != nil { 133 | t.Fatal(err) 134 | } 135 | defer func() { 136 | defer producer.Close() 137 | _ = producer.Flush() 138 | }() 139 | 140 | for i := 0; i < 10; i++ { 141 | cp := Checkpoint{LSN: uint64(i + 100)} 142 | _, err := producer.Send(context.Background(), &pulsar.ProducerMessage{ 143 | Key: cp.ToKey(), 144 | Payload: []byte("test-" + strconv.Itoa(i)), 145 | }) 146 | if err != nil { 147 | t.Fatal(err) 148 | } 149 | } 150 | 151 | last, err := tracker.Last() 152 | if err != nil { 153 | t.Fatal(err) 154 | } 155 | if last.LSN != 109 { 156 | t.Fatalf("unexpected checkpoint.LSN: %v", last.LSN) 157 | } 158 | } 159 | 160 | func TestPulsarSubscriptionTracker_LastWithEmptyTopic(t *testing.T) { 161 | topic := time.Now().Format("20060102150405") + "-empty" 162 | 163 | tracker, cancel, err := newPulsarSubscriptionTracker(topic) 164 | if err != nil { 165 | t.Fatal(err) 166 | } 167 | defer cancel() 168 | 169 | last, err := tracker.Last() 170 | if err != nil { 171 | t.Fatal(err) 172 | } 173 | if last.LSN != 0 || last.Seq != 0 { 174 | t.Fatal("checkpoint of empty topic should be zero") 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /pkg/cursor/pulsar_test.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | "testing" 7 | "time" 8 | 9 | "github.com/apache/pulsar-client-go/pulsar" 10 | "github.com/replicase/pgcapture/internal/test" 11 | ) 12 | 13 | func newPulsarTracker(topic string) (*PulsarTracker, func(), error) { 14 | client, err := pulsar.NewClient(pulsar.ClientOptions{ 15 | URL: test.GetPulsarURL(), 16 | }) 17 | if err != nil { 18 | return nil, nil, err 19 | } 20 | 21 | tracker, err := NewPulsarTracker(client, topic) 22 | if err != nil { 23 | return nil, nil, err 24 | } 25 | 26 | closeFunc := func() { 27 | tracker.Close() 28 | client.Close() 29 | } 30 | 31 | return tracker, closeFunc, nil 32 | } 33 | 34 | func TestPulsarTracker(t *testing.T) { 35 | topic := time.Now().Format("20060102150405") 36 | 37 | tracker, cancel, err := newPulsarTracker(topic) 38 | if err != nil { 39 | t.Fatal(err) 40 | } 41 | defer cancel() 42 | 43 | client, err := pulsar.NewClient(pulsar.ClientOptions{ 44 | URL: test.GetPulsarURL(), 45 | }) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | defer client.Close() 50 | 51 | producer, err := client.CreateProducer(pulsar.ProducerOptions{ 52 | Topic: topic, 53 | Name: topic + "-producer", 54 | }) 55 | if err != nil { 56 | t.Fatal(err) 57 | } 58 | defer producer.Close() 59 | 60 | for i := 0; i < 10; i++ { 61 | cp := Checkpoint{LSN: uint64(100 + i)} 62 | if _, err := producer.Send(context.Background(), &pulsar.ProducerMessage{ 63 | Key: cp.ToKey(), 64 | Payload: []byte("test-" + strconv.Itoa(i)), 65 | }); err != nil { 66 | t.Fatal(err) 67 | } 68 | } 69 | _ = producer.Flush() 70 | 71 | last, err := tracker.Last() 72 | if err != nil { 73 | t.Fatal(err) 74 | } 75 | 76 | if last.LSN != 109 { 77 | t.Fatalf("unexpected checkpoint.LSN: %v", last.LSN) 78 | } 79 | } 80 | 81 | func TestPulsarTracker_Empty(t *testing.T) { 82 | topic := time.Now().Format("20060102150405") + "-empty" 83 | 84 | tracker, cancel, err := newPulsarTracker(topic) 85 | if err != nil { 86 | t.Fatal(err) 87 | } 88 | defer cancel() 89 | 90 | last, err := tracker.Last() 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | if last.LSN != 0 || last.Seq != 0 { 95 | t.Fatal("checkpoint of empty topic should be zero") 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /pkg/cursor/tracker.go: -------------------------------------------------------------------------------- 1 | package cursor 2 | 3 | import "github.com/apache/pulsar-client-go/pulsar" 4 | 5 | type Tracker interface { 6 | Last() (cp Checkpoint, err error) 7 | Start() 8 | Commit(cp Checkpoint, mid pulsar.MessageID) error 9 | Close() 10 | } 11 | -------------------------------------------------------------------------------- /pkg/dblog/control.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "strconv" 7 | "sync/atomic" 8 | 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "github.com/sirupsen/logrus" 11 | ) 12 | 13 | var ErrEmptyURI = errors.New("first request uri should not be empty") 14 | 15 | func NewController(scheduler Scheduler) *Controller { 16 | return &Controller{ 17 | Scheduler: scheduler, 18 | log: logrus.WithFields(logrus.Fields{"From": "Controller"}), 19 | } 20 | } 21 | 22 | type Controller struct { 23 | pb.UnimplementedDBLogControllerServer 24 | Scheduler Scheduler 25 | clients int64 26 | log *logrus.Entry 27 | } 28 | 29 | func (c *Controller) PullDumpInfo(server pb.DBLogController_PullDumpInfoServer) (err error) { 30 | id := strconv.FormatInt(atomic.AddInt64(&c.clients, 1), 10) 31 | 32 | msg, err := server.Recv() 33 | if err != nil { 34 | return err 35 | } 36 | uri := msg.Uri 37 | if uri == "" { 38 | return ErrEmptyURI 39 | } 40 | 41 | cancel, err := c.Scheduler.Register(uri, id, func(dump *pb.DumpInfoResponse) error { return server.Send(dump) }) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | log := c.log.WithFields(logrus.Fields{"URI": uri, "client": id}) 47 | log.Infof("registered client %s from uri %s", id, uri) 48 | 49 | defer func() { 50 | cancel() 51 | log.Infof("unregistered client %s to uri %s", id, uri) 52 | }() 53 | 54 | for { 55 | msg, err = server.Recv() 56 | if err != nil { 57 | return err 58 | } 59 | c.Scheduler.Ack(uri, id, msg.RequeueReason) 60 | if msg.RequeueReason != "" { 61 | log.WithFields(logrus.Fields{"Reason": msg.RequeueReason}).Error("requeue") 62 | } 63 | } 64 | } 65 | 66 | func (c *Controller) Schedule(ctx context.Context, req *pb.ScheduleRequest) (*pb.ScheduleResponse, error) { 67 | log := c.log.WithFields(logrus.Fields{ 68 | "URI": req.Uri, 69 | "NumDump": len(req.Dumps), 70 | }) 71 | log.Infof("start scheduling dumps of %s", req.Uri) 72 | 73 | if err := c.Scheduler.Schedule(req.Uri, req.Dumps, func() { 74 | log.Infof("finish scheduling dumps of %s", req.Uri) 75 | }); err != nil { 76 | return nil, err 77 | } 78 | return &pb.ScheduleResponse{}, nil 79 | } 80 | 81 | func (c *Controller) StopSchedule(ctx context.Context, req *pb.StopScheduleRequest) (*pb.StopScheduleResponse, error) { 82 | c.log.WithFields(logrus.Fields{"URI": req.Uri}).Infof("stop scheduling dumps of %s", req.Uri) 83 | c.Scheduler.StopSchedule(req.Uri) 84 | return &pb.StopScheduleResponse{}, nil 85 | } 86 | 87 | func (c *Controller) SetScheduleCoolDown(ctx context.Context, req *pb.SetScheduleCoolDownRequest) (*pb.SetScheduleCoolDownResponse, error) { 88 | c.log.WithFields(logrus.Fields{"URI": req.Uri}).Infof("set scheduling cool down of %s to %v", req.Uri, req.Duration.AsDuration()) 89 | c.Scheduler.SetCoolDown(req.Uri, req.Duration.AsDuration()) 90 | return &pb.SetScheduleCoolDownResponse{}, nil 91 | } 92 | -------------------------------------------------------------------------------- /pkg/dblog/dumper.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "sync" 9 | 10 | "github.com/jackc/pgconn" 11 | "github.com/jackc/pglogrepl" 12 | "github.com/jackc/pgx/v4" 13 | "github.com/replicase/pgcapture/pkg/pb" 14 | "google.golang.org/grpc" 15 | "google.golang.org/grpc/codes" 16 | "google.golang.org/grpc/status" 17 | ) 18 | 19 | type SourceDumper interface { 20 | LoadDump(minLSN uint64, info *pb.DumpInfoResponse) ([]*pb.Change, error) 21 | Stop() 22 | } 23 | 24 | func NewAgentSourceDumper(ctx context.Context, url string) (*AgentSource, error) { 25 | conn, err := grpc.DialContext(ctx, url, grpc.WithInsecure()) 26 | if err != nil { 27 | return nil, err 28 | } 29 | return &AgentSource{ 30 | conn: conn, 31 | client: pb.NewAgentClient(conn), 32 | }, nil 33 | } 34 | 35 | type AgentSource struct { 36 | conn *grpc.ClientConn 37 | client pb.AgentClient 38 | } 39 | 40 | func (a *AgentSource) LoadDump(minLSN uint64, info *pb.DumpInfoResponse) (changes []*pb.Change, err error) { 41 | stream, err := a.client.StreamDump(context.Background(), &pb.AgentDumpRequest{ 42 | MinLsn: minLSN, 43 | Info: info, 44 | }) 45 | if err != nil { 46 | if s, ok := status.FromError(err); ok { 47 | switch s.Code() { 48 | case codes.NotFound: 49 | return nil, ErrMissingTable 50 | case codes.Unavailable: 51 | return nil, ErrLSNMissing 52 | case codes.FailedPrecondition: 53 | return nil, ErrLSNFallBehind 54 | } 55 | } 56 | return nil, err 57 | } 58 | for { 59 | change, err := stream.Recv() 60 | if err != nil { 61 | if err == io.EOF { 62 | break 63 | } 64 | return nil, err 65 | } 66 | changes = append(changes, change) 67 | } 68 | stream.CloseSend() 69 | return changes, nil 70 | } 71 | 72 | func (a *AgentSource) Stop() { 73 | a.conn.Close() 74 | } 75 | 76 | func NewPGXSourceDumper(ctx context.Context, url string) (*PGXSourceDumper, error) { 77 | conn, err := pgx.Connect(ctx, url) 78 | if err != nil { 79 | return nil, err 80 | } 81 | return &PGXSourceDumper{conn: conn}, nil 82 | } 83 | 84 | type PGXSourceDumper struct { 85 | conn *pgx.Conn 86 | mu sync.Mutex 87 | 88 | SkipLSNCheck bool 89 | } 90 | 91 | func (p *PGXSourceDumper) LoadDump(minLSN uint64, info *pb.DumpInfoResponse) ([]*pb.Change, error) { 92 | if info.Schema == "" || info.Table == "" { 93 | return nil, ErrMissingTable 94 | } 95 | 96 | p.mu.Lock() 97 | changes, err := p.load(minLSN, info) 98 | p.mu.Unlock() 99 | 100 | if err != nil { 101 | return nil, err 102 | } 103 | return changes, nil 104 | } 105 | 106 | func (p *PGXSourceDumper) Stop() { 107 | p.mu.Lock() 108 | p.conn.Close(context.Background()) 109 | p.mu.Unlock() 110 | } 111 | 112 | // DumpQuery retrieves all the rows in the specified block range. 113 | // pg14 and above knows how to directly access those blocks using a TID Range 114 | // Scan node, so partial scans are efficient. 115 | // The tid format is (block_number, offset_number), the offset number 116 | // being an unsigned short integer (<= 65535). 117 | // Note that we have to use the upper bound as is (and therefore add knowledge 118 | // about the maximum offset number) rather than use (block_number + 1, 0), in 119 | // the unlikely event that we were provided the maximum block number 120 | // Note also that the caller is responsible for providing a properly quoted and 121 | // fully qualified relation name. 122 | const DumpQuery = `SELECT * FROM %s WHERE ctid >= ($1::bigint, 0)::text::tid AND ctid <= ($2::bigint, 65535)::text::tid` 123 | 124 | func (p *PGXSourceDumper) load(minLSN uint64, info *pb.DumpInfoResponse) ([]*pb.Change, error) { 125 | ctx := context.Background() 126 | 127 | tx, err := p.conn.BeginTx(ctx, pgx.TxOptions{}) 128 | if err != nil { 129 | return nil, err 130 | } 131 | defer tx.Rollback(ctx) 132 | 133 | if !p.SkipLSNCheck { 134 | if err = checkLSN(ctx, tx, minLSN); err != nil { 135 | return nil, err 136 | } 137 | } 138 | 139 | // Properly quote and escape the provided identifiers 140 | var identifiers pgx.Identifier = []string{info.Schema, info.Table}; 141 | relation := identifiers.Sanitize() 142 | 143 | rows, err := tx.Query(ctx, fmt.Sprintf(DumpQuery, relation), info.PageBegin, info.PageEnd) 144 | if err != nil { 145 | var pge *pgconn.PgError 146 | if errors.As(err, &pge) && pge.Code == "42P01" { 147 | return nil, ErrMissingTable 148 | } 149 | return nil, err 150 | } 151 | 152 | var changes []*pb.Change 153 | for rows.Next() { 154 | values := rows.RawValues() 155 | change := &pb.Change{Op: pb.Change_UPDATE, Schema: info.Schema, Table: info.Table} 156 | for i, fd := range rows.FieldDescriptions() { 157 | if value := values[i]; value == nil { 158 | change.New = append(change.New, &pb.Field{Name: string(fd.Name), Oid: fd.DataTypeOID, Value: nil}) 159 | } else { 160 | if fd.Format == 0 { 161 | change.New = append(change.New, &pb.Field{Name: string(fd.Name), Oid: fd.DataTypeOID, Value: &pb.Field_Text{Text: string(value)}}) 162 | } else { 163 | change.New = append(change.New, &pb.Field{Name: string(fd.Name), Oid: fd.DataTypeOID, Value: &pb.Field_Binary{Binary: value}}) 164 | } 165 | } 166 | } 167 | changes = append(changes, change) 168 | } 169 | return changes, nil 170 | } 171 | 172 | func checkLSN(ctx context.Context, tx pgx.Tx, minLSN uint64) (err error) { 173 | var str string 174 | var lsn pglogrepl.LSN 175 | err = tx.QueryRow(ctx, "SELECT commit FROM pgcapture.sources WHERE commit IS NOT NULL ORDER BY commit DESC LIMIT 1").Scan(&str) 176 | if errors.Is(err, pgx.ErrNoRows) { 177 | return ErrLSNMissing 178 | } 179 | if err == nil { 180 | lsn, err = pglogrepl.ParseLSN(str) 181 | if err == nil && uint64(lsn) < minLSN { 182 | return ErrLSNFallBehind 183 | } 184 | } 185 | return err 186 | } 187 | 188 | var ErrMissingTable = errors.New("missing Schema or table") 189 | var ErrLSNFallBehind = errors.New("lsn fall behind") 190 | var ErrLSNMissing = errors.New("missing lsn record") 191 | -------------------------------------------------------------------------------- /pkg/dblog/dumper_test.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "testing" 7 | 8 | "github.com/jackc/pglogrepl" 9 | "github.com/jackc/pgtype" 10 | "github.com/jackc/pgx/v4" 11 | "github.com/replicase/pgcapture/internal/test" 12 | "github.com/replicase/pgcapture/pkg/pb" 13 | "github.com/replicase/pgcapture/pkg/sql" 14 | ) 15 | 16 | func TestPGXSourceDumper(t *testing.T) { 17 | ctx := context.Background() 18 | postgresURL := test.GetPostgresURL() 19 | conn, err := pgx.Connect(ctx, postgresURL) 20 | if err != nil { 21 | t.Fatal(err) 22 | } 23 | defer conn.Close(ctx) 24 | 25 | // We explicitly use a schema name that requires quoting, and a table name 26 | // that also requires escaping to check that the dumper code properly 27 | // handle both of those. 28 | conn.Exec(ctx, `DROP SCHEMA IF EXISTS "Public" CASCADE; CREATE SCHEMA "Public"`) 29 | conn.Exec(ctx, "DROP EXTENSION IF EXISTS pgcapture") 30 | conn.Exec(ctx, sql.InstallExtension) 31 | conn.Exec(ctx, `CREATE TABLE "Public"."T""1" AS SELECT * FROM generate_series(1,100000) AS id; ANALYZE "Public"."T""1"`) 32 | 33 | dumper, err := NewPGXSourceDumper(ctx, postgresURL) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | defer dumper.Stop() 38 | 39 | if _, err := dumper.LoadDump(0, &pb.DumpInfoResponse{}); !errors.Is(err, ErrMissingTable) { 40 | t.Fatal(err) 41 | } 42 | if _, err := dumper.LoadDump(0, &pb.DumpInfoResponse{Schema: "Public", Table: `T"1`}); !errors.Is(err, ErrLSNMissing) { 43 | t.Fatal(err) 44 | } 45 | 46 | conn.Exec(ctx, "INSERT INTO pgcapture.sources (id,commit) VALUES ($1,$2)", `Public.T"1`, pglogrepl.LSN(0).String()) 47 | 48 | if _, err := dumper.LoadDump(0, &pb.DumpInfoResponse{Schema: "any", Table: "any"}); !errors.Is(err, ErrMissingTable) { 49 | t.Fatal(err) 50 | } 51 | if _, err := dumper.LoadDump(0, &pb.DumpInfoResponse{Schema: "Public", Table: "any"}); !errors.Is(err, ErrMissingTable) { 52 | t.Fatal(err) 53 | } 54 | 55 | if _, err := dumper.LoadDump(100, &pb.DumpInfoResponse{Schema: "Public", Table: `T"1`}); !errors.Is(err, ErrLSNFallBehind) { 56 | t.Fatal(err) 57 | } 58 | 59 | conn.Exec(ctx, "UPDATE pgcapture.sources SET commit=$2 WHERE id = $1", `Public.T"1`, pglogrepl.LSN(100).String()) 60 | 61 | var pages int 62 | if err := conn.QueryRow(ctx, `SELECT relpages FROM pg_class WHERE relname = 'T"1' AND relnamespace::regnamespace::text = '"Public"'`).Scan(&pages); err != nil || pages == 0 { 63 | t.Fatal(err) 64 | } 65 | 66 | seq := int32(1) 67 | for i := uint32(0); i < uint32(pages); i += 5 { 68 | changes, err := dumper.LoadDump(100, &pb.DumpInfoResponse{Schema: "Public", Table: `T"1`, PageBegin: i, PageEnd: i + 4}) 69 | if err != nil { 70 | t.Fatal(err) 71 | } 72 | for _, change := range changes { 73 | if change.Schema != "Public" { 74 | t.Fatal("unexpected") 75 | } 76 | if change.Table != `T"1` { 77 | t.Fatal("unexpected") 78 | } 79 | if change.Op != pb.Change_UPDATE { 80 | t.Fatal("unexpected") 81 | } 82 | if change.Old != nil { 83 | t.Fatal("unexpected") 84 | } 85 | if len(change.New) != 1 { 86 | t.Fatal("unexpected") 87 | } 88 | if change.New[0].Name != "id" { 89 | t.Fatal("unexpected") 90 | } 91 | if change.New[0].Oid != 23 { 92 | t.Fatal("unexpected") 93 | } 94 | var id pgtype.Int4 95 | if err := id.DecodeBinary(conn.ConnInfo(), change.New[0].GetBinary()); err != nil { 96 | t.Fatal(err) 97 | } 98 | if id.Int != seq { 99 | t.Fatal("unexpected") 100 | } 101 | seq++ 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /pkg/dblog/puller.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | "github.com/replicase/pgcapture/pkg/pb" 8 | "google.golang.org/grpc/codes" 9 | "google.golang.org/grpc/status" 10 | ) 11 | 12 | type DumpInfoPuller interface { 13 | Pull(ctx context.Context, uri string) chan DumpInfo 14 | } 15 | 16 | type DumpInfo struct { 17 | Resp *pb.DumpInfoResponse 18 | client pb.DBLogController_PullDumpInfoClient 19 | } 20 | 21 | func (i *DumpInfo) Ack(requeueReason string) error { 22 | if i.client == nil { 23 | return nil 24 | } 25 | err := i.client.Send(&pb.DumpInfoRequest{RequeueReason: requeueReason}) 26 | i.client = nil 27 | return err 28 | } 29 | 30 | type GRPCDumpInfoPuller struct { 31 | Client pb.DBLogControllerClient 32 | } 33 | 34 | func (p *GRPCDumpInfoPuller) Pull(ctx context.Context, uri string) chan DumpInfo { 35 | resp := make(chan DumpInfo, 1) 36 | 37 | go func() { 38 | defer close(resp) 39 | for { 40 | err := p.pulling(ctx, uri, resp) 41 | if e, ok := status.FromError(err); (ok && e.Code() == codes.Canceled) || errors.Is(err, context.Canceled) { 42 | return 43 | } 44 | } 45 | }() 46 | 47 | return resp 48 | } 49 | 50 | func (p *GRPCDumpInfoPuller) pulling(ctx context.Context, uri string, resp chan DumpInfo) error { 51 | client, err := p.Client.PullDumpInfo(ctx) 52 | if err != nil { 53 | return err 54 | } 55 | if err = client.Send(&pb.DumpInfoRequest{Uri: uri}); err != nil { 56 | return err 57 | } 58 | for { 59 | msg, err := client.Recv() 60 | if err != nil { 61 | return err 62 | } 63 | resp <- DumpInfo{ 64 | Resp: msg, 65 | client: client, 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /pkg/dblog/resolver.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | 7 | "github.com/apache/pulsar-client-go/pulsar" 8 | "github.com/replicase/pgcapture/pkg/source" 9 | ) 10 | 11 | type SourceResolver interface { 12 | Source(ctx context.Context, uri string) (source.RequeueSource, error) 13 | Dumper(ctx context.Context, uri string) (SourceDumper, error) 14 | } 15 | 16 | type StaticAgentPulsarURIConfig struct { 17 | PulsarURL string 18 | PulsarTopic string 19 | PulsarSubscription string 20 | PulsarReplicateState bool 21 | AgentURL string 22 | } 23 | 24 | func NewStaticAgentPulsarResolver(config map[string]StaticAgentPulsarURIConfig) *StaticAgentPulsarResolver { 25 | return &StaticAgentPulsarResolver{config: config} 26 | } 27 | 28 | type StaticAgentPulsarResolver struct { 29 | config map[string]StaticAgentPulsarURIConfig 30 | } 31 | 32 | func (r *StaticAgentPulsarResolver) Source(ctx context.Context, uri string) (source.RequeueSource, error) { 33 | config, ok := r.config[uri] 34 | if !ok { 35 | return nil, ErrURINotFound 36 | } 37 | return &source.PulsarConsumerSource{ 38 | PulsarOption: pulsar.ClientOptions{URL: config.PulsarURL}, 39 | PulsarTopic: config.PulsarTopic, 40 | PulsarSubscription: config.PulsarSubscription, 41 | PulsarReplicateState: config.PulsarReplicateState, 42 | }, nil 43 | } 44 | 45 | func (r *StaticAgentPulsarResolver) Dumper(ctx context.Context, uri string) (SourceDumper, error) { 46 | config, ok := r.config[uri] 47 | if !ok { 48 | return nil, ErrURINotFound 49 | } 50 | return NewAgentSourceDumper(ctx, config.AgentURL) 51 | } 52 | 53 | var ErrURINotFound = errors.New("requested uri not found") 54 | -------------------------------------------------------------------------------- /pkg/dblog/resolver_test.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "net" 7 | "testing" 8 | 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "google.golang.org/grpc" 11 | ) 12 | 13 | func TestStaticPGXPulsarResolver_ErrURINotFound(t *testing.T) { 14 | ctx := context.Background() 15 | r := NewStaticAgentPulsarResolver(nil) 16 | if _, err := r.Source(ctx, "any"); !errors.Is(err, ErrURINotFound) { 17 | t.Fatal("unexpected") 18 | } 19 | if _, err := r.Dumper(ctx, "any"); !errors.Is(err, ErrURINotFound) { 20 | t.Fatal("unexpected") 21 | } 22 | } 23 | 24 | type agent struct { 25 | pb.UnimplementedAgentServer 26 | } 27 | 28 | func TestStaticAgentPulsarResolver(t *testing.T) { 29 | ctx := context.Background() 30 | 31 | lis, err := net.Listen("tcp", "127.0.0.1:0") 32 | if err != nil { 33 | t.Fatal(err) 34 | } 35 | 36 | server := grpc.NewServer() 37 | server.RegisterService(&pb.Agent_ServiceDesc, &agent{}) 38 | go server.Serve(lis) 39 | 40 | r := NewStaticAgentPulsarResolver(map[string]StaticAgentPulsarURIConfig{ 41 | URI1: { 42 | PulsarURL: "", 43 | PulsarTopic: "", 44 | PulsarSubscription: "", 45 | AgentURL: lis.Addr().String(), 46 | }, 47 | }) 48 | source, err := r.Source(ctx, URI1) 49 | if source == nil || err != nil { 50 | t.Fatal(err) 51 | } 52 | dumper, err := r.Dumper(ctx, URI1) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | dumper.Stop() 57 | server.Stop() 58 | } 59 | -------------------------------------------------------------------------------- /pkg/dblog/scheduler.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | "time" 7 | 8 | "github.com/replicase/pgcapture/pkg/pb" 9 | ) 10 | 11 | type OnSchedule func(response *pb.DumpInfoResponse) error 12 | type AfterSchedule func() 13 | type CancelFunc func() 14 | 15 | var ErrAlreadyScheduled = errors.New("already scheduled") 16 | var ErrAlreadyRegistered = errors.New("already registered") 17 | 18 | type Scheduler interface { 19 | Schedule(uri string, dumps []*pb.DumpInfoResponse, fn AfterSchedule) error 20 | Register(uri string, client string, fn OnSchedule) (CancelFunc, error) 21 | Ack(uri string, client string, requeue string) 22 | SetCoolDown(uri string, dur time.Duration) 23 | StopSchedule(uri string) 24 | } 25 | 26 | func NewMemoryScheduler(interval time.Duration) *MemoryScheduler { 27 | return &MemoryScheduler{ 28 | interval: interval, 29 | pending: make(map[string]*pending), 30 | clients: make(map[string]map[string]*track), 31 | } 32 | } 33 | 34 | type MemoryScheduler struct { 35 | interval time.Duration 36 | pending map[string]*pending 37 | clients map[string]map[string]*track 38 | pendingMu sync.Mutex 39 | clientsMu sync.Mutex 40 | } 41 | 42 | func (s *MemoryScheduler) Schedule(uri string, dumps []*pb.DumpInfoResponse, fn AfterSchedule) error { 43 | s.pendingMu.Lock() 44 | defer s.pendingMu.Unlock() 45 | 46 | if _, ok := s.pending[uri]; ok { 47 | return ErrAlreadyScheduled 48 | } 49 | s.pending[uri] = &pending{dumps: dumps} 50 | 51 | go s.schedule(uri, fn) 52 | return nil 53 | } 54 | 55 | func (s *MemoryScheduler) schedule(uri string, fn AfterSchedule) { 56 | defer func() { 57 | s.pendingMu.Lock() 58 | delete(s.pending, uri) 59 | s.pendingMu.Unlock() 60 | fn() 61 | }() 62 | 63 | for { 64 | time.Sleep(s.interval) 65 | 66 | loops := 0 67 | s.clientsMu.Lock() 68 | loops = len(s.clients[uri]) 69 | s.clientsMu.Unlock() 70 | loops++ 71 | 72 | for i := 0; i < loops; i++ { 73 | if s.scheduleOne(uri) { 74 | return 75 | } 76 | } 77 | } 78 | } 79 | 80 | func (s *MemoryScheduler) scheduleOne(uri string) bool { 81 | var candidate *track 82 | var dump *pb.DumpInfoResponse 83 | 84 | busy := 0 85 | remain := 0 86 | stopping := false 87 | 88 | s.clientsMu.Lock() 89 | if clients, ok := s.clients[uri]; ok { 90 | for _, track := range clients { 91 | if track.dump == nil { 92 | candidate = track 93 | } else { 94 | busy++ 95 | } 96 | } 97 | } 98 | s.clientsMu.Unlock() 99 | 100 | s.pendingMu.Lock() 101 | if pending, ok := s.pending[uri]; ok { 102 | remain = pending.Remaining() 103 | stopping = pending.stopping 104 | coolDown := pending.CoolDown() 105 | if candidate != nil && !stopping && 106 | (coolDown == 0 || time.Now().Sub(candidate.ackTs) > coolDown) { 107 | dump = pending.Pop() 108 | } 109 | } 110 | s.pendingMu.Unlock() 111 | 112 | if candidate != nil && dump != nil { 113 | s.clientsMu.Lock() 114 | candidate.dump = dump 115 | s.clientsMu.Unlock() 116 | if err := candidate.schedule(dump); err != nil { 117 | candidate.cancel() 118 | } 119 | } 120 | 121 | if stopping { 122 | return busy == 0 123 | } 124 | 125 | return busy == 0 && remain == 0 126 | } 127 | 128 | func (s *MemoryScheduler) Register(uri string, client string, fn OnSchedule) (CancelFunc, error) { 129 | s.clientsMu.Lock() 130 | defer s.clientsMu.Unlock() 131 | 132 | clients, ok := s.clients[uri] 133 | if !ok { 134 | clients = make(map[string]*track) 135 | s.clients[uri] = clients 136 | } 137 | if _, ok = clients[client]; ok { 138 | return nil, ErrAlreadyRegistered 139 | } 140 | track := &track{schedule: fn, cancel: func() { 141 | s.Ack(uri, client, "canceled") 142 | s.clientsMu.Lock() 143 | delete(clients, client) 144 | s.clientsMu.Unlock() 145 | }} 146 | clients[client] = track 147 | 148 | return track.cancel, nil 149 | } 150 | 151 | func (s *MemoryScheduler) Ack(uri string, client string, requeue string) { 152 | var dump *pb.DumpInfoResponse 153 | 154 | s.clientsMu.Lock() 155 | if clients, ok := s.clients[uri]; ok { 156 | if track, ok := clients[client]; ok { 157 | dump = track.dump 158 | track.dump = nil 159 | track.ackTs = time.Now() 160 | } 161 | } 162 | s.clientsMu.Unlock() 163 | 164 | if dump != nil { 165 | s.pendingMu.Lock() 166 | if pending, ok := s.pending[uri]; ok { 167 | if requeue != "" { 168 | pending.Push(dump) 169 | pending.backoff++ 170 | } else { 171 | pending.backoff = 0 172 | } 173 | } 174 | s.pendingMu.Unlock() 175 | } 176 | } 177 | 178 | func (s *MemoryScheduler) SetCoolDown(uri string, dur time.Duration) { 179 | s.pendingMu.Lock() 180 | if pending, ok := s.pending[uri]; ok { 181 | pending.coolDown = dur 182 | } 183 | s.pendingMu.Unlock() 184 | } 185 | 186 | func (s *MemoryScheduler) StopSchedule(uri string) { 187 | s.pendingMu.Lock() 188 | if pending, ok := s.pending[uri]; ok { 189 | pending.stopping = true 190 | } 191 | s.pendingMu.Unlock() 192 | } 193 | 194 | type track struct { 195 | dump *pb.DumpInfoResponse 196 | ackTs time.Time 197 | schedule OnSchedule 198 | cancel CancelFunc 199 | } 200 | 201 | type pending struct { 202 | dumps []*pb.DumpInfoResponse 203 | coolDown time.Duration 204 | backoff int 205 | offset int 206 | stopping bool 207 | } 208 | 209 | const ( 210 | backoffGap time.Duration = 2 211 | backoffMax = 8 212 | ) 213 | 214 | func (p *pending) CoolDown() time.Duration { 215 | if p.backoff == 0 { 216 | return p.coolDown 217 | } 218 | step := backoffGap 219 | for i := 0; i < backoffMax && i < p.backoff-1; i++ { 220 | step *= 2 221 | } 222 | return p.coolDown + (step * time.Second) 223 | } 224 | 225 | func (p *pending) Remaining() int { 226 | return len(p.dumps) - p.offset 227 | } 228 | 229 | func (p *pending) Pop() *pb.DumpInfoResponse { 230 | if len(p.dumps) == p.offset { 231 | return nil 232 | } 233 | ret := p.dumps[p.offset] 234 | p.offset++ 235 | return ret 236 | } 237 | 238 | func (p *pending) Push(dump *pb.DumpInfoResponse) { 239 | if p.offset == 0 { 240 | return 241 | } 242 | p.offset-- 243 | p.dumps[p.offset] = dump 244 | } 245 | -------------------------------------------------------------------------------- /pkg/dblog/scheduler_test.go: -------------------------------------------------------------------------------- 1 | package dblog 2 | 3 | import ( 4 | "errors" 5 | "strconv" 6 | "testing" 7 | "time" 8 | 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | ) 11 | 12 | func TestMemoryScheduler_Schedule(t *testing.T) { 13 | URI1 := "URI1" 14 | URI2 := "URI2" 15 | 16 | groups := map[string]*group{ 17 | URI1: {dumps: makeDumps(URI1, 100), clients: make([]*client, 2), done: make(chan struct{})}, 18 | URI2: {dumps: makeDumps(URI2, 100), clients: make([]*client, 2), done: make(chan struct{})}, 19 | } 20 | 21 | s := NewMemoryScheduler(time.Millisecond) 22 | 23 | // start schedule each group without error 24 | for uri, group := range groups { 25 | group := group 26 | if err := s.Schedule(uri, group.dumps, func() { 27 | close(group.done) 28 | }); err != nil { 29 | t.Fatal(err) 30 | } 31 | } 32 | 33 | for uri, group := range groups { 34 | if err := s.Schedule(uri, group.dumps, nil); !errors.Is(err, ErrAlreadyScheduled) { 35 | t.Fatal("scheduled uri should be reject until finished") 36 | } 37 | } 38 | 39 | // callback of each group should not have race 40 | for uri, group := range groups { 41 | uri := uri 42 | group := group 43 | for i := range group.clients { 44 | i := i 45 | group.clients[i] = &client{} 46 | group.clients[i].cancel, _ = s.Register(uri, strconv.Itoa(i), func(dump *pb.DumpInfoResponse) error { 47 | if dump != group.dumps[group.counter] { 48 | t.Fatalf("dump should be delivered in order if no error") 49 | } 50 | group.counter++ 51 | group.clients[i].counter++ 52 | go func() { 53 | s.Ack(uri, strconv.Itoa(i), "") 54 | }() 55 | return nil 56 | }) 57 | } 58 | } 59 | 60 | for uri, group := range groups { 61 | for i := range group.clients { 62 | if _, err := s.Register(uri, strconv.Itoa(i), nil); !errors.Is(err, ErrAlreadyRegistered) { 63 | t.Fatal("client can't be registered twice until unregistered") 64 | } 65 | } 66 | } 67 | 68 | for _, group := range groups { 69 | <-group.done 70 | for _, client := range group.clients { 71 | if client.counter == 0 { 72 | t.Fatalf("all clients should be scheduled") 73 | } 74 | client.cancel() 75 | } 76 | } 77 | 78 | done := make(chan struct{}) 79 | // start a new schedule with the same uri 80 | dumps := []*pb.DumpInfoResponse{{Table: URI1, PageBegin: 777}} 81 | if err := s.Schedule(URI1, dumps, func() { 82 | done <- struct{}{} 83 | }); err != nil { 84 | t.Fatal(err) 85 | } 86 | 87 | // error client should be unregistered later 88 | scheduled := make(chan struct{}) 89 | if _, err := s.Register(URI1, "1", func(dump *pb.DumpInfoResponse) error { 90 | scheduled <- struct{}{} 91 | return errors.New("any error") 92 | }); err != nil { 93 | t.Fatal(err) 94 | } 95 | // wait for unregister error client 96 | <-scheduled 97 | // register again, and re-consume the fail dump 98 | var err error 99 | var cancel CancelFunc 100 | for { 101 | cancel, err = s.Register(URI1, "1", func(dump *pb.DumpInfoResponse) error { 102 | if dump != dumps[0] { 103 | t.Fatalf("dump not match") 104 | } 105 | go func() { 106 | s.Ack(URI1, "1", "") 107 | }() 108 | scheduled <- struct{}{} 109 | return nil 110 | }) 111 | if err == nil { 112 | break 113 | } 114 | if errors.Is(err, ErrAlreadyRegistered) { 115 | continue 116 | } 117 | t.Fatal(err) 118 | } 119 | <-scheduled 120 | <-done 121 | cancel() 122 | 123 | // start a new schedule with the same uri 124 | // and set cool down duration 125 | // stop schedule in the middle 126 | dumps = []*pb.DumpInfoResponse{ 127 | {Table: URI1, PageBegin: 777}, 128 | {Table: URI1, PageBegin: 999}, 129 | {Table: URI1, PageBegin: 777}, 130 | {Table: URI1, PageBegin: 999}, 131 | } 132 | if err := s.Schedule(URI1, dumps, func() { 133 | done <- struct{}{} 134 | }); err != nil { 135 | t.Fatal(err) 136 | } 137 | coolDown := time.Millisecond * 10 138 | s.SetCoolDown(URI1, coolDown) 139 | 140 | var received []time.Time 141 | if _, err := s.Register(URI1, "1", func(dump *pb.DumpInfoResponse) error { 142 | received = append(received, time.Now()) 143 | if len(received) == 2 { 144 | s.StopSchedule(URI1) 145 | } 146 | go func() { 147 | s.Ack(URI1, "1", "") 148 | }() 149 | return nil 150 | }); err != nil { 151 | t.Fatal(err) 152 | } 153 | <-done 154 | if received[1].Sub(received[0]) < coolDown { 155 | t.Fatalf("received gap should not be smaller then cool down interval") 156 | } 157 | if len(received) != 2 { 158 | t.Fatalf("scheduler is not stopped as requested") 159 | } 160 | } 161 | 162 | func makeDumps(table string, n int) (dumps []*pb.DumpInfoResponse) { 163 | for i := 0; i < n; i++ { 164 | dumps = append(dumps, &pb.DumpInfoResponse{Table: table, PageBegin: uint32(i)}) 165 | } 166 | return 167 | } 168 | 169 | type group struct { 170 | dumps []*pb.DumpInfoResponse 171 | counter int 172 | clients []*client 173 | done chan struct{} 174 | } 175 | 176 | type client struct { 177 | counter int 178 | cancel CancelFunc 179 | } 180 | -------------------------------------------------------------------------------- /pkg/decode/bytes.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "io" 7 | ) 8 | 9 | func NewBytesReader(data []byte) *BytesReader { 10 | return &BytesReader{data: data} 11 | } 12 | 13 | type BytesReader struct { 14 | data []byte 15 | off int 16 | } 17 | 18 | func (b *BytesReader) Skip(n int) { 19 | b.off += n 20 | return 21 | } 22 | 23 | func (b *BytesReader) Byte() (v byte, err error) { 24 | if b.off >= len(b.data) { 25 | return 0, io.EOF 26 | } 27 | v = b.data[b.off] 28 | b.off++ 29 | return 30 | } 31 | 32 | func (b *BytesReader) Uint32() (v uint32, err error) { 33 | end := b.off + 4 34 | if end > len(b.data) { 35 | return 0, io.EOF 36 | } 37 | v = binary.BigEndian.Uint32(b.data[b.off:end]) 38 | b.off = end 39 | return 40 | } 41 | 42 | func (b *BytesReader) Uint16() (v uint16, err error) { 43 | end := b.off + 2 44 | if end > len(b.data) { 45 | return 0, io.EOF 46 | } 47 | v = binary.BigEndian.Uint16(b.data[b.off:end]) 48 | b.off = end 49 | return 50 | } 51 | 52 | func (b *BytesReader) Int32() (v int, err error) { 53 | uv, err := b.Uint32() 54 | return int(uv), err 55 | } 56 | 57 | func (b *BytesReader) Int16() (v int, err error) { 58 | uv, err := b.Uint16() 59 | return int(uv), err 60 | } 61 | 62 | func (b *BytesReader) Int8() (v int, err error) { 63 | uv, err := b.Byte() 64 | return int(uv), err 65 | } 66 | 67 | func (b *BytesReader) IntEndIdx() (v int, err error) { 68 | idx := bytes.IndexByte(b.data[b.off:], byte(0)) 69 | if idx == -1 { 70 | return 0, io.EOF 71 | } 72 | return idx, nil 73 | } 74 | 75 | func (b *BytesReader) stringN(n int) (v string, err error) { 76 | end := b.off + n 77 | if end > len(b.data) { 78 | return "", io.EOF 79 | } 80 | v = string(b.data[b.off : end-1]) 81 | b.off = end 82 | return 83 | } 84 | 85 | func (b *BytesReader) String8() (v string, err error) { 86 | n, err := b.Int8() 87 | if err != nil { 88 | return "", err 89 | } 90 | return b.stringN(n) 91 | } 92 | 93 | func (b *BytesReader) String16() (v string, err error) { 94 | n, err := b.Int16() 95 | if err != nil { 96 | return "", err 97 | } 98 | return b.stringN(n) 99 | } 100 | 101 | func (b *BytesReader) Bytes32() (v []byte, err error) { 102 | n, err := b.Int32() 103 | if err != nil { 104 | return nil, err 105 | } 106 | end := b.off + n 107 | if end > len(b.data) { 108 | return nil, io.EOF 109 | } 110 | v = b.data[b.off:end] 111 | b.off = end 112 | return 113 | } 114 | 115 | func (b *BytesReader) StringEnd() (v string, err error) { 116 | idx, err := b.IntEndIdx() 117 | if err != nil { 118 | return "", err 119 | } 120 | 121 | return b.stringN(idx + 1) 122 | } 123 | -------------------------------------------------------------------------------- /pkg/decode/bytes_test.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "io" 7 | "math" 8 | "testing" 9 | ) 10 | 11 | func TestBytesReader(t *testing.T) { 12 | buf := &bytes.Buffer{} 13 | binary.Write(buf, binary.BigEndian, int8(math.MaxInt8)) 14 | binary.Write(buf, binary.BigEndian, int16(math.MaxInt16)) 15 | binary.Write(buf, binary.BigEndian, int32(math.MaxInt32)) 16 | binary.Write(buf, binary.BigEndian, uint16(math.MaxUint16)) 17 | binary.Write(buf, binary.BigEndian, uint32(math.MaxUint32)) 18 | buf.Write([]byte{'B'}) 19 | buf.Write([]byte{0}) 20 | 21 | variable := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 0} 22 | 23 | binary.Write(buf, binary.BigEndian, int32(len(variable))) 24 | buf.Write(variable) 25 | 26 | binary.Write(buf, binary.BigEndian, int8(len(variable))) 27 | buf.Write(variable) 28 | 29 | binary.Write(buf, binary.BigEndian, int16(len(variable))) 30 | buf.Write(variable) 31 | 32 | reader := NewBytesReader(buf.Bytes()) 33 | if v, err := reader.Int8(); v != math.MaxInt8 || err != nil { 34 | t.Fatalf("unexpected %v, %v", v, err) 35 | } 36 | if v, err := reader.Int16(); v != math.MaxInt16 || err != nil { 37 | t.Fatalf("unexpected %v, %v", v, err) 38 | } 39 | if v, err := reader.Int32(); v != math.MaxInt32 || err != nil { 40 | t.Fatalf("unexpected %v, %v", v, err) 41 | } 42 | if v, err := reader.Uint16(); v != uint16(math.MaxUint16) || err != nil { 43 | t.Fatalf("unexpected %v, %v", v, err) 44 | } 45 | if v, err := reader.Uint32(); v != uint32(math.MaxUint32) || err != nil { 46 | t.Fatalf("unexpected %v, %v", v, err) 47 | } 48 | if v, err := reader.Byte(); v != 'B' || err != nil { 49 | t.Fatalf("unexpected %v, %v", v, err) 50 | } 51 | reader.Skip(1) 52 | if v, err := reader.Bytes32(); !bytes.Equal(v, variable) || err != nil { 53 | t.Fatalf("unexpected %v, %v", v, err) 54 | } 55 | if v, err := reader.String8(); v != string(variable[:len(variable)-1]) || err != nil { 56 | t.Fatalf("unexpected %v, %v", v, err) 57 | } 58 | if v, err := reader.String16(); v != string(variable[:len(variable)-1]) || err != nil { 59 | t.Fatalf("unexpected %v, %v", v, err) 60 | } 61 | // ended 62 | if _, err := reader.Byte(); err != io.EOF { 63 | t.Fatalf("unexpected %v", err) 64 | } 65 | if _, err := reader.Uint32(); err != io.EOF { 66 | t.Fatalf("unexpected %v", err) 67 | } 68 | if _, err := reader.Uint16(); err != io.EOF { 69 | t.Fatalf("unexpected %v", err) 70 | } 71 | if _, err := reader.String8(); err != io.EOF { 72 | t.Fatalf("unexpected %v", err) 73 | } 74 | if _, err := reader.String16(); err != io.EOF { 75 | t.Fatalf("unexpected %v", err) 76 | } 77 | if _, err := reader.Bytes32(); err != io.EOF { 78 | t.Fatalf("unexpected %v", err) 79 | } 80 | } 81 | 82 | func TestBytesReader_IncompleteString8(t *testing.T) { 83 | buf := &bytes.Buffer{} 84 | binary.Write(buf, binary.BigEndian, int8(1)) 85 | if _, err := NewBytesReader(buf.Bytes()).String8(); err != io.EOF { 86 | t.Fatalf("unexpected %v", err) 87 | } 88 | } 89 | 90 | func TestBytesReader_IncompleteString16(t *testing.T) { 91 | buf := &bytes.Buffer{} 92 | binary.Write(buf, binary.BigEndian, int16(1)) 93 | if _, err := NewBytesReader(buf.Bytes()).String16(); err != io.EOF { 94 | t.Fatalf("unexpected %v", err) 95 | } 96 | } 97 | 98 | func TestBytesReader_IncompleteBytes32(t *testing.T) { 99 | buf := &bytes.Buffer{} 100 | binary.Write(buf, binary.BigEndian, int32(1)) 101 | if _, err := NewBytesReader(buf.Bytes()).Bytes32(); err != io.EOF { 102 | t.Fatalf("unexpected %v", err) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /pkg/decode/decoder.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "github.com/replicase/pgcapture/pkg/pb" 5 | ) 6 | 7 | const ( 8 | ExtensionSchema = "pgcapture" 9 | ExtensionDDLLogs = "ddl_logs" 10 | ExtensionSources = "sources" 11 | ) 12 | 13 | const ( 14 | PGLogicalOutputPlugin = "pglogical_output" 15 | PGOutputPlugin = "pgoutput" 16 | ) 17 | 18 | var OpMap = map[byte]pb.Change_Operation{ 19 | 'I': pb.Change_INSERT, 20 | 'U': pb.Change_UPDATE, 21 | 'D': pb.Change_DELETE, 22 | } 23 | 24 | type Relation struct { 25 | Rel uint32 26 | NspName string 27 | RelName string 28 | Fields []string 29 | } 30 | 31 | type RowChange struct { 32 | Op byte 33 | Rel uint32 34 | Old []Field 35 | New []Field 36 | } 37 | 38 | type Field struct { 39 | Format byte 40 | Datum []byte 41 | } 42 | 43 | type Decoder interface { 44 | Decode(in []byte) (*pb.Message, error) 45 | GetPluginArgs() []string 46 | } 47 | 48 | func IsDDL(m *pb.Change) bool { 49 | return m.Schema == ExtensionSchema && m.Table == ExtensionDDLLogs 50 | } 51 | 52 | func Ignore(m *pb.Change) bool { 53 | return m.Schema == ExtensionSchema && m.Table == ExtensionSources 54 | } 55 | 56 | func makeOldPBTuple(schema *PGXSchemaLoader, rel Relation, src []Field, noNull bool) (fields []*pb.Field) { 57 | if src == nil { 58 | return nil 59 | } 60 | fields = make([]*pb.Field, 0, len(src)) 61 | for i, s := range src { 62 | if noNull && s.Datum == nil { 63 | continue 64 | } 65 | typeInfo, err := schema.GetTypeInfo(rel.NspName, rel.RelName, rel.Fields[i]) 66 | if err != nil { 67 | // TODO: add optional logging, because it will generate a lot of logs when refreshing materialized view 68 | continue 69 | } 70 | switch s.Format { 71 | case 'b': 72 | fields = append(fields, &pb.Field{Name: rel.Fields[i], Oid: typeInfo.OID, Value: &pb.Field_Binary{Binary: s.Datum}}) 73 | case 'n': 74 | fields = append(fields, &pb.Field{Name: rel.Fields[i], Oid: typeInfo.OID, Value: nil}) 75 | case 't': 76 | fields = append(fields, &pb.Field{Name: rel.Fields[i], Oid: typeInfo.OID, Value: &pb.Field_Text{Text: string(s.Datum)}}) 77 | case 'u': 78 | continue // unchanged toast field should be excluded 79 | } 80 | } 81 | return fields 82 | } 83 | 84 | func makeNewPBTuple(schema *PGXSchemaLoader, rel Relation, old, new []Field, noNull bool) (fields []*pb.Field) { 85 | if new == nil { 86 | return nil 87 | } 88 | fields = make([]*pb.Field, 0, len(new)) 89 | for i, s := range new { 90 | if noNull && s.Datum == nil { 91 | continue 92 | } 93 | typeInfo, err := schema.GetTypeInfo(rel.NspName, rel.RelName, rel.Fields[i]) 94 | if err != nil { 95 | // TODO: add optional logging, because it will generate a lot of logs when refreshing materialized view 96 | continue 97 | } 98 | ReAppend: 99 | switch s.Format { 100 | case 'b': 101 | fields = append(fields, &pb.Field{Name: rel.Fields[i], Oid: typeInfo.OID, Value: &pb.Field_Binary{Binary: s.Datum}}) 102 | case 'n': 103 | fields = append(fields, &pb.Field{Name: rel.Fields[i], Oid: typeInfo.OID, Value: nil}) 104 | case 't': 105 | fields = append(fields, &pb.Field{Name: rel.Fields[i], Oid: typeInfo.OID, Value: &pb.Field_Text{Text: string(s.Datum)}}) 106 | case 'u': 107 | // fill the unchanged field with old value when ReplicaIdentity is full 108 | // otherwise, skip the unchanged field 109 | if typeInfo.ReplicaIdentity == ReplicaIdentityFull && old[i].Format != 'u' { 110 | s.Format = old[i].Format 111 | s.Datum = old[i].Datum 112 | goto ReAppend 113 | } 114 | continue 115 | } 116 | } 117 | return fields 118 | } 119 | -------------------------------------------------------------------------------- /pkg/decode/decoder_test.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/replicase/pgcapture/pkg/pb" 7 | ) 8 | 9 | func TestIsDDL(t *testing.T) { 10 | if !IsDDL(&pb.Change{Schema: ExtensionSchema, Table: ExtensionDDLLogs}) { 11 | t.Error("unexpected") 12 | } 13 | if IsDDL(&pb.Change{Schema: ExtensionSchema, Table: "other"}) { 14 | t.Error("unexpected") 15 | } 16 | if IsDDL(&pb.Change{Schema: "other", Table: ExtensionDDLLogs}) { 17 | t.Error("unexpected") 18 | } 19 | } 20 | 21 | func TestIgnore(t *testing.T) { 22 | if !Ignore(&pb.Change{Schema: ExtensionSchema, Table: ExtensionSources}) { 23 | t.Error("unexpected") 24 | } 25 | if Ignore(&pb.Change{Schema: ExtensionSchema, Table: "other"}) { 26 | t.Error("unexpected") 27 | } 28 | if Ignore(&pb.Change{Schema: "other", Table: ExtensionSources}) { 29 | t.Error("unexpected") 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /pkg/decode/main_test.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "strings" 8 | 9 | "github.com/jackc/pgx/v5" 10 | "github.com/jackc/pgx/v5/pgtype" 11 | "github.com/replicase/pgcapture/pkg/pb" 12 | ) 13 | 14 | const TestSlot = "test_slot" 15 | 16 | type change struct { 17 | Expect *pb.Change 18 | } 19 | 20 | func (c *change) Apply(ctx context.Context, conn *pgx.Conn) (err error) { 21 | vals := make([][]byte, 6) 22 | fmts := make([]int16, 6) 23 | oids := make([]uint32, 6) 24 | 25 | for i, t := range c.Expect.New { 26 | vals[i] = t.GetBinary() 27 | oids[i] = t.Oid 28 | fmts[i] = 1 29 | } 30 | 31 | table := c.Expect.Table 32 | switch c.Expect.Op { 33 | case pb.Change_INSERT: 34 | _, err = conn.PgConn().ExecParams(ctx, fmt.Sprintf("insert into %s values ($1,$2,$3,$4,$5,$6)", table), vals, oids, fmts, fmts).Close() 35 | case pb.Change_UPDATE: 36 | if c.Expect.Old != nil { 37 | vals[5] = c.Expect.Old[0].GetBinary() 38 | oids[5] = c.Expect.Old[0].Oid 39 | } else { 40 | vals[5] = c.Expect.New[0].GetBinary() 41 | oids[5] = c.Expect.New[0].Oid 42 | } 43 | fmts[5] = 1 44 | _, err = conn.PgConn().ExecParams(ctx, fmt.Sprintf("update %s set id=$1,uid=$2,txt=$3,js=$4,ts=$5 where id=$6", table), vals, oids, fmts, fmts).Close() 45 | case pb.Change_DELETE: 46 | vals[0] = c.Expect.Old[0].GetBinary() 47 | oids[0] = c.Expect.Old[0].Oid 48 | fmts[0] = 1 49 | _, err = conn.PgConn().ExecParams(ctx, fmt.Sprintf("delete from %s where id=$1", table), vals[:1], oids[:1], fmts[:1], fmts[:1]).Close() 50 | } 51 | return err 52 | } 53 | 54 | func nB(n int) []byte { 55 | builder := bytes.Buffer{} 56 | for i := 0; i < n; i++ { 57 | builder.WriteByte('A') 58 | } 59 | return builder.Bytes() 60 | } 61 | 62 | func nT(n int) string { 63 | builder := strings.Builder{} 64 | for i := 0; i < n; i++ { 65 | builder.WriteString("A") 66 | } 67 | return builder.String() 68 | } 69 | 70 | func b(in any, oid int) []byte { 71 | bs, _ := pgtype.NewMap().Encode(uint32(oid), pgtype.BinaryFormatCode, in, nil) 72 | return bs 73 | } 74 | -------------------------------------------------------------------------------- /pkg/decode/pglogical.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "errors" 7 | "fmt" 8 | 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "github.com/sirupsen/logrus" 11 | ) 12 | 13 | var StringEnd = []byte{0} 14 | 15 | func NewPGLogicalDecoder(schema *PGXSchemaLoader) (Decoder, error) { 16 | svn, err := schema.GetVersion() 17 | if err != nil { 18 | return nil, err 19 | } 20 | 21 | return &PGLogicalDecoder{ 22 | schema: schema, 23 | relations: make(map[uint32]Relation), 24 | pluginArgs: []string{ 25 | "min_proto_version '1'", 26 | "max_proto_version '1'", 27 | "startup_params_format '1'", 28 | "\"binary.want_binary_basetypes\" '1'", 29 | fmt.Sprintf("\"binary.basetypes_major_version\" '%d'", svn/100), 30 | "\"binary.bigendian\" '1'", 31 | }, 32 | log: logrus.WithFields(logrus.Fields{"From": "PGLogicalDecoder"}), 33 | }, nil 34 | } 35 | 36 | type PGLogicalDecoder struct { 37 | schema *PGXSchemaLoader 38 | relations map[uint32]Relation 39 | pluginArgs []string 40 | log *logrus.Entry 41 | } 42 | 43 | func (p *PGLogicalDecoder) Decode(in []byte) (m *pb.Message, err error) { 44 | switch in[0] { 45 | case 'B': 46 | return p.ReadBegin(in) 47 | case 'C': 48 | return p.ReadCommit(in) 49 | case 'R': 50 | r := Relation{} 51 | err = p.ReadRelation(in, &r) 52 | p.relations[r.Rel] = r 53 | case 'I', 'U', 'D': 54 | r := RowChange{} 55 | if err = p.ReadRowChange(in, &r); err != nil { 56 | return nil, err 57 | } 58 | 59 | rel, ok := p.relations[r.Rel] 60 | if !ok { 61 | return nil, errors.New("relation not found") 62 | } 63 | 64 | c := &pb.Change{Schema: rel.NspName, Table: rel.RelName, Op: OpMap[in[0]]} 65 | c.Old = makeOldPBTuple(p.schema, rel, r.Old, true) 66 | c.New = makeNewPBTuple(p.schema, rel, r.Old, r.New, false) 67 | 68 | if len(c.Old) != 0 || len(c.New) != 0 { 69 | return &pb.Message{Type: &pb.Message_Change{Change: c}}, nil 70 | } 71 | default: 72 | // TODO log unmatched message 73 | } 74 | return nil, err 75 | } 76 | 77 | func (p *PGLogicalDecoder) GetPluginArgs() []string { 78 | return p.pluginArgs 79 | } 80 | 81 | func (p *PGLogicalDecoder) ReadBegin(in []byte) (*pb.Message, error) { 82 | if len(in) != 1+1+8+8+4 { 83 | return nil, errors.New("begin wrong length") 84 | } 85 | return &pb.Message{Type: &pb.Message_Begin{Begin: &pb.Begin{ 86 | FinalLsn: binary.BigEndian.Uint64(in[2:10]), 87 | CommitTime: binary.BigEndian.Uint64(in[10:18]), 88 | RemoteXid: binary.BigEndian.Uint32(in[18:]), 89 | }}}, nil 90 | } 91 | 92 | func (p *PGLogicalDecoder) ReadCommit(in []byte) (*pb.Message, error) { 93 | if len(in) != 1+1+8+8+8 { 94 | return nil, errors.New("commit wrong length") 95 | } 96 | return &pb.Message{Type: &pb.Message_Commit{Commit: &pb.Commit{ 97 | CommitLsn: binary.BigEndian.Uint64(in[2:10]), 98 | EndLsn: binary.BigEndian.Uint64(in[10:18]), 99 | CommitTime: binary.BigEndian.Uint64(in[18:]), 100 | }}}, nil 101 | } 102 | 103 | func (p *PGLogicalDecoder) ReadRelation(in []byte, m *Relation) (err error) { 104 | reader := NewBytesReader(in) 105 | reader.Skip(2) // skip op and flags 106 | 107 | m.Rel, err = reader.Uint32() 108 | m.NspName, err = reader.String8() 109 | m.RelName, err = reader.String8() 110 | 111 | if t, err := reader.Byte(); err != nil || t != 'A' { 112 | return errors.New("relation expected A, got " + string(t)) 113 | } 114 | 115 | n, err := reader.Int16() 116 | m.Fields = make([]string, n) 117 | for i := 0; i < n; i++ { 118 | if t, err := reader.Byte(); err != nil || t != 'C' { 119 | return errors.New("relation expected C, got " + string(t)) 120 | } 121 | reader.Skip(1) // skip flags 122 | if t, err := reader.Byte(); err != nil || t != 'N' { 123 | return errors.New("relation expected N, got " + string(t)) 124 | } 125 | m.Fields[i], err = reader.String16() 126 | } 127 | return err 128 | } 129 | 130 | func (p *PGLogicalDecoder) ReadRowChange(in []byte, m *RowChange) (err error) { 131 | reader := NewBytesReader(in) 132 | m.Op, err = reader.Byte() 133 | reader.Skip(1) // skip flags 134 | m.Rel, err = reader.Uint32() 135 | 136 | kind, err := reader.Byte() 137 | if kind != 'N' { 138 | m.Old, err = p.readTuple(reader) 139 | if m.Op == 'U' { 140 | kind, err = reader.Byte() 141 | } 142 | } 143 | if kind == 'N' { 144 | m.New, err = p.readTuple(reader) 145 | } 146 | return err 147 | } 148 | 149 | func (p *PGLogicalDecoder) readTuple(reader *BytesReader) (fields []Field, err error) { 150 | if t, err := reader.Byte(); err != nil || t != 'T' { 151 | return nil, errors.New("expect T for tuple message, got " + string(t)) 152 | } 153 | 154 | if n, err := reader.Int16(); err == nil { 155 | fields = make([]Field, n) 156 | } 157 | 158 | for i := range fields { 159 | if fields[i].Format, err = reader.Byte(); err != nil { 160 | return nil, err 161 | } 162 | switch fields[i].Format { 163 | case 'b': 164 | fields[i].Datum, err = reader.Bytes32() 165 | case 'n', 'u': 166 | continue 167 | case 't': 168 | fields[i].Datum, err = reader.Bytes32() 169 | fields[i].Datum = bytes.TrimSuffix(fields[i].Datum, StringEnd) 170 | default: 171 | return nil, errors.New("unsupported data format: " + string(fields[i].Format)) 172 | } 173 | } 174 | return 175 | } 176 | -------------------------------------------------------------------------------- /pkg/decode/pgoutput.go: -------------------------------------------------------------------------------- 1 | package decode 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "errors" 7 | "fmt" 8 | 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "github.com/sirupsen/logrus" 11 | ) 12 | 13 | func NewPGOutputDecoder(schema *PGXSchemaLoader, slotName string) *PGOutputDecoder { 14 | return &PGOutputDecoder{ 15 | schema: schema, 16 | relations: make(map[uint32]Relation), 17 | pluginArgs: []string{ 18 | "proto_version '1'", 19 | fmt.Sprintf("publication_names '%s'", slotName), 20 | "binary 'true'", 21 | }, 22 | log: logrus.WithFields(logrus.Fields{"From": "PGOutputDecoder"}), 23 | } 24 | } 25 | 26 | type PGOutputDecoder struct { 27 | schema *PGXSchemaLoader 28 | relations map[uint32]Relation 29 | pluginArgs []string 30 | log *logrus.Entry 31 | } 32 | 33 | func (p *PGOutputDecoder) Decode(in []byte) (m *pb.Message, err error) { 34 | switch in[0] { 35 | case 'B': 36 | return p.ReadBegin(in) 37 | case 'C': 38 | return p.ReadCommit(in) 39 | case 'R': 40 | r := Relation{} 41 | err = p.ReadRelation(in, &r) 42 | p.relations[r.Rel] = r 43 | case 'I', 'U', 'D': 44 | r := RowChange{} 45 | if err = p.ReadRowChange(in, &r); err != nil { 46 | return nil, err 47 | } 48 | 49 | rel, ok := p.relations[r.Rel] 50 | if !ok { 51 | return nil, errors.New("relation not found") 52 | } 53 | 54 | c := &pb.Change{Schema: rel.NspName, Table: rel.RelName, Op: OpMap[in[0]]} 55 | c.Old = makeOldPBTuple(p.schema, rel, r.Old, true) 56 | c.New = makeNewPBTuple(p.schema, rel, r.Old, r.New, false) 57 | 58 | if len(c.Old) != 0 || len(c.New) != 0 { 59 | return &pb.Message{Type: &pb.Message_Change{Change: c}}, nil 60 | } 61 | default: 62 | // TODO log unmatched message 63 | } 64 | return nil, err 65 | } 66 | 67 | func (p *PGOutputDecoder) GetPluginArgs() []string { 68 | return p.pluginArgs 69 | } 70 | 71 | func (p *PGOutputDecoder) ReadBegin(in []byte) (*pb.Message, error) { 72 | if len(in) != 1+1+8+8+3 { 73 | return nil, errors.New("begin wrong length") 74 | } 75 | return &pb.Message{Type: &pb.Message_Begin{Begin: &pb.Begin{ 76 | FinalLsn: binary.BigEndian.Uint64(in[1:9]), 77 | CommitTime: binary.BigEndian.Uint64(in[9:17]), 78 | RemoteXid: binary.BigEndian.Uint32(in[17:]), 79 | }}}, nil 80 | } 81 | 82 | func (p *PGOutputDecoder) ReadCommit(in []byte) (*pb.Message, error) { 83 | if len(in) != 1+1+8+8+8 { 84 | return nil, errors.New("commit wrong length") 85 | } 86 | return &pb.Message{Type: &pb.Message_Commit{Commit: &pb.Commit{ 87 | CommitLsn: binary.BigEndian.Uint64(in[2:10]), 88 | EndLsn: binary.BigEndian.Uint64(in[10:18]), 89 | CommitTime: binary.BigEndian.Uint64(in[18:]), 90 | }}}, nil 91 | } 92 | 93 | func (p *PGOutputDecoder) ReadRelation(in []byte, m *Relation) (err error) { 94 | reader := NewBytesReader(in) 95 | reader.Skip(1) // skip op and flags 96 | 97 | m.Rel, err = reader.Uint32() 98 | m.NspName, err = reader.StringEnd() 99 | m.RelName, err = reader.StringEnd() 100 | 101 | // d = default, n = nothing, f = full, i = index 102 | if replicaIdentity, err := reader.Byte(); err != nil || (replicaIdentity != 'd' && replicaIdentity != 'n' && replicaIdentity != 'f' && replicaIdentity != 'i') { 103 | return errors.New("relation expected replicaIdentity equal d or n or f or i, got " + string(replicaIdentity)) 104 | } 105 | 106 | n, err := reader.Int16() 107 | m.Fields = make([]string, n) 108 | for i := 0; i < n; i++ { 109 | reader.Skip(1) // skip flag 110 | m.Fields[i], err = reader.StringEnd() 111 | if err != nil { 112 | return err 113 | } 114 | reader.Skip(8) // skip data type and type modifier 115 | } 116 | return err 117 | } 118 | 119 | func (p *PGOutputDecoder) ReadRowChange(in []byte, m *RowChange) (err error) { 120 | reader := NewBytesReader(in) 121 | m.Op, err = reader.Byte() 122 | m.Rel, err = reader.Uint32() 123 | 124 | kind, err := reader.Byte() 125 | if kind != 'N' { 126 | m.Old, err = p.readTuple(reader) 127 | if m.Op == 'U' { 128 | kind, err = reader.Byte() 129 | } 130 | } 131 | if kind == 'N' { 132 | m.New, err = p.readTuple(reader) 133 | } 134 | return err 135 | } 136 | 137 | func (p *PGOutputDecoder) readTuple(reader *BytesReader) (fields []Field, err error) { 138 | if n, err := reader.Int16(); err == nil { 139 | fields = make([]Field, n) 140 | } 141 | 142 | for i := range fields { 143 | if fields[i].Format, err = reader.Byte(); err != nil { 144 | return nil, err 145 | } 146 | switch fields[i].Format { 147 | case 'b': 148 | fields[i].Datum, err = reader.Bytes32() 149 | case 'n', 'u': 150 | continue 151 | case 't': 152 | fields[i].Datum, err = reader.Bytes32() 153 | fields[i].Datum = bytes.TrimSuffix(fields[i].Datum, StringEnd) 154 | default: 155 | return nil, errors.New("unsupported data format: " + string(fields[i].Format)) 156 | } 157 | } 158 | return 159 | } 160 | -------------------------------------------------------------------------------- /pkg/pgcapture/consumer.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "context" 5 | "reflect" 6 | "time" 7 | 8 | pgtypeV4 "github.com/jackc/pgtype" 9 | "github.com/jackc/pgx/v5/pgtype" 10 | "github.com/replicase/pgcapture/pkg/cursor" 11 | "github.com/replicase/pgcapture/pkg/pb" 12 | "github.com/replicase/pgcapture/pkg/source" 13 | "google.golang.org/grpc" 14 | "google.golang.org/protobuf/types/known/structpb" 15 | ) 16 | 17 | const TableRegexOption = "TableRegex" 18 | 19 | var DefaultErrorFn = func(source source.Change, err error) {} 20 | 21 | func NewSimpleConsumer(ctx context.Context, src source.RequeueSource, option ConsumerOption) *Consumer { 22 | return newConsumer(ctx, src, option) 23 | } 24 | 25 | func newDBogGatewaySource(ctx context.Context, client pb.DBLogGatewayClient, option ConsumerOption) *DBLogGatewayConsumer { 26 | parameters, _ := structpb.NewStruct(map[string]interface{}{}) 27 | if option.TableRegex != "" { 28 | parameters.Fields[TableRegexOption] = structpb.NewStringValue(option.TableRegex) 29 | } 30 | c := &DBLogGatewayConsumer{client: client, init: &pb.CaptureInit{ 31 | Uri: option.URI, 32 | Parameters: parameters, 33 | }} 34 | c.ctx, c.cancel = context.WithCancel(ctx) 35 | return c 36 | } 37 | 38 | func NewDBLogConsumer(ctx context.Context, conn *grpc.ClientConn, option ConsumerOption) *Consumer { 39 | s := newDBogGatewaySource(ctx, pb.NewDBLogGatewayClient(conn), option) 40 | return newConsumer(s.ctx, s, option) 41 | } 42 | 43 | /* 44 | NewConsumer 45 | Deprecated: please use NewDBLogConsumer instead 46 | */ 47 | func NewConsumer(ctx context.Context, conn *grpc.ClientConn, option ConsumerOption) *Consumer { 48 | return NewDBLogConsumer(ctx, conn, option) 49 | } 50 | 51 | func newConsumer(ctx context.Context, src source.RequeueSource, option ConsumerOption) *Consumer { 52 | errFn := DefaultErrorFn 53 | if option.OnDecodeError != nil { 54 | errFn = option.OnDecodeError 55 | } 56 | 57 | consumer := &Consumer{ctx: ctx, Source: src, errFn: errFn} 58 | if option.DebounceInterval > 0 { 59 | consumer.Bouncer = &DebounceHandler{ 60 | Interval: option.DebounceInterval, 61 | source: src, 62 | } 63 | } else { 64 | consumer.Bouncer = &NoBounceHandler{source: src} 65 | } 66 | 67 | return consumer 68 | } 69 | 70 | type OnDecodeError func(source source.Change, err error) 71 | 72 | type ConsumerOption struct { 73 | URI string 74 | TableRegex string 75 | DebounceInterval time.Duration 76 | OnDecodeError OnDecodeError 77 | } 78 | 79 | type Consumer struct { 80 | Source source.RequeueSource 81 | Bouncer BounceHandler 82 | ctx context.Context 83 | errFn OnDecodeError 84 | } 85 | 86 | func (c *Consumer) ConsumeAsync(mh ModelAsyncHandlers) error { 87 | if err := c.Bouncer.Initialize(c.ctx, mh); err != nil { 88 | return err 89 | } 90 | 91 | refs := make(map[string]reflection, len(mh)) 92 | for m, h := range mh { 93 | ref, err := reflectModel(m) 94 | if err != nil { 95 | return err 96 | } 97 | ref.hdl = h 98 | refs[ModelName(m.TableName())] = ref 99 | } 100 | 101 | changes, err := c.Source.Capture(cursor.Checkpoint{}) 102 | if err != nil { 103 | return err 104 | } 105 | 106 | for change := range changes { 107 | switch m := change.Message.Type.(type) { 108 | case *pb.Message_Change: 109 | ref, ok := refs[ModelName(m.Change.Schema, m.Change.Table)] 110 | if !ok { 111 | break 112 | } 113 | n, err := makeModel(ref, m.Change.New) 114 | if err != nil { 115 | c.errFn(change, err) 116 | break 117 | } 118 | o, err := makeModel(ref, m.Change.Old) 119 | if err != nil { 120 | c.errFn(change, err) 121 | break 122 | } 123 | c.Bouncer.Handle(ref.hdl, change.Checkpoint, Change{ 124 | Op: m.Change.Op, 125 | Checkpoint: change.Checkpoint, 126 | New: n, 127 | Old: o, 128 | }) 129 | continue 130 | } 131 | c.Source.Commit(change.Checkpoint) 132 | } 133 | return c.Source.Error() 134 | } 135 | 136 | func (c *Consumer) Consume(mh ModelHandlers) error { 137 | mah := make(ModelAsyncHandlers, len(mh)) 138 | for m, fn := range mh { 139 | mah[m] = toAsyncHandlerFunc(fn) 140 | } 141 | return c.ConsumeAsync(mah) 142 | } 143 | 144 | func makeModel(ref reflection, fields []*pb.Field) (interface{}, error) { 145 | ptr := reflect.New(ref.typ) 146 | val := ptr.Elem() 147 | interfaces := make(map[string]interface{}, len(ref.idx)) 148 | for name, i := range ref.idx { 149 | if f := val.Field(i).Addr(); f.CanInterface() { 150 | interfaces[name] = f.Interface() 151 | } 152 | } 153 | var err error 154 | for _, f := range fields { 155 | field, ok := interfaces[f.Name] 156 | if !ok { 157 | continue 158 | } 159 | if f.Value == nil { 160 | if decoder, ok := field.(pgtypeV4.BinaryDecoder); ok { 161 | err = decoder.DecodeBinary(ci, nil) 162 | } else { 163 | err = typeMap.Scan(f.Oid, pgtype.BinaryFormatCode, nil, field) 164 | } 165 | } else { 166 | if value, ok := f.Value.(*pb.Field_Binary); ok { 167 | if decoder, ok := field.(pgtypeV4.BinaryDecoder); ok { 168 | err = decoder.DecodeBinary(ci, value.Binary) 169 | } else { 170 | err = typeMap.Scan(f.Oid, pgtype.BinaryFormatCode, f.GetBinary(), field) 171 | } 172 | } else { 173 | if decoder, ok := field.(pgtypeV4.TextDecoder); ok { 174 | err = decoder.DecodeText(ci, []byte(f.GetText())) 175 | } else { 176 | err = typeMap.Scan(f.Oid, pgtype.TextFormatCode, []byte(f.GetText()), field) 177 | } 178 | } 179 | } 180 | if err != nil { 181 | return nil, err 182 | } 183 | } 184 | return ptr.Interface(), nil 185 | } 186 | 187 | func (c *Consumer) Stop() { 188 | c.Source.Stop() 189 | } 190 | 191 | var ( 192 | ci = pgtypeV4.NewConnInfo() 193 | typeMap = pgtype.NewMap() 194 | ) 195 | -------------------------------------------------------------------------------- /pkg/pgcapture/dblog.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync/atomic" 7 | 8 | "github.com/replicase/pgcapture/pkg/cursor" 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "github.com/replicase/pgcapture/pkg/source" 11 | ) 12 | 13 | type DBLogGatewayConsumer struct { 14 | client pb.DBLogGatewayClient 15 | init *pb.CaptureInit 16 | state int64 17 | stream pb.DBLogGateway_CaptureClient 18 | ctx context.Context 19 | cancel context.CancelFunc 20 | err atomic.Value 21 | } 22 | 23 | func (c *DBLogGatewayConsumer) Capture(cp cursor.Checkpoint) (changes chan source.Change, err error) { 24 | stream, err := c.client.Capture(c.ctx) 25 | if err != nil { 26 | c.cancel() 27 | return nil, err 28 | } 29 | 30 | if err = stream.Send(&pb.CaptureRequest{Type: &pb.CaptureRequest_Init{Init: c.init}}); err != nil { 31 | c.cancel() 32 | return nil, err 33 | } 34 | 35 | c.stream = stream 36 | changes = make(chan source.Change, 1000) 37 | 38 | atomic.StoreInt64(&c.state, 1) 39 | 40 | go func() { 41 | defer close(changes) 42 | for { 43 | msg, err := stream.Recv() 44 | if err != nil { 45 | c.err.Store(fmt.Errorf("%w", err)) 46 | return 47 | } 48 | changes <- source.Change{ 49 | Checkpoint: cursor.Checkpoint{ 50 | LSN: msg.Checkpoint.Lsn, 51 | Seq: msg.Checkpoint.Seq, 52 | Data: msg.Checkpoint.Data, 53 | }, 54 | Message: &pb.Message{Type: &pb.Message_Change{Change: msg.Change}}, 55 | } 56 | } 57 | }() 58 | 59 | return changes, nil 60 | } 61 | 62 | func (c *DBLogGatewayConsumer) Commit(cp cursor.Checkpoint) { 63 | if atomic.LoadInt64(&c.state) == 1 { 64 | if err := c.stream.Send(&pb.CaptureRequest{Type: &pb.CaptureRequest_Ack{Ack: &pb.CaptureAck{Checkpoint: &pb.Checkpoint{ 65 | Lsn: cp.LSN, 66 | Seq: cp.Seq, 67 | Data: cp.Data, 68 | }}}}); err != nil { 69 | c.err.Store(fmt.Errorf("%w", err)) 70 | c.Stop() 71 | } 72 | } 73 | } 74 | 75 | func (c *DBLogGatewayConsumer) Requeue(cp cursor.Checkpoint, reason string) { 76 | if atomic.LoadInt64(&c.state) == 1 { 77 | if err := c.stream.Send(&pb.CaptureRequest{Type: &pb.CaptureRequest_Ack{Ack: &pb.CaptureAck{Checkpoint: &pb.Checkpoint{ 78 | Lsn: cp.LSN, 79 | Seq: cp.Seq, 80 | Data: cp.Data, 81 | }, RequeueReason: reason}}}); err != nil { 82 | c.err.Store(fmt.Errorf("%w", err)) 83 | c.Stop() 84 | } 85 | } 86 | } 87 | 88 | func (c *DBLogGatewayConsumer) Error() error { 89 | if err, ok := c.err.Load().(error); ok { 90 | return err 91 | } 92 | return nil 93 | } 94 | 95 | func (c *DBLogGatewayConsumer) Stop() error { 96 | c.cancel() 97 | return c.Error() 98 | } 99 | -------------------------------------------------------------------------------- /pkg/pgcapture/debounce.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "time" 8 | 9 | "github.com/replicase/pgcapture/pkg/cursor" 10 | "github.com/replicase/pgcapture/pkg/pb" 11 | "github.com/replicase/pgcapture/pkg/source" 12 | ) 13 | 14 | type BounceHandler interface { 15 | Initialize(ctx context.Context, mh ModelAsyncHandlers) error 16 | Handle(fn ModelAsyncHandlerFunc, checkpoint cursor.Checkpoint, change Change) 17 | } 18 | 19 | type NoBounceHandler struct { 20 | source source.RequeueSource 21 | } 22 | 23 | func (b *NoBounceHandler) Initialize(ctx context.Context, mh ModelAsyncHandlers) error { 24 | return nil 25 | } 26 | 27 | func (b *NoBounceHandler) Handle(fn ModelAsyncHandlerFunc, checkpoint cursor.Checkpoint, change Change) { 28 | fn(change, func(err error) { 29 | if err != nil { 30 | b.source.Requeue(checkpoint, err.Error()) 31 | } else { 32 | b.source.Commit(checkpoint) 33 | } 34 | }) 35 | } 36 | 37 | type DebounceModel interface { 38 | Model 39 | DebounceKey() string 40 | } 41 | 42 | type event struct { 43 | Checkpoint cursor.Checkpoint 44 | Change Change 45 | Handler ModelAsyncHandlerFunc 46 | } 47 | 48 | type DebounceHandler struct { 49 | Interval time.Duration 50 | source source.RequeueSource 51 | store map[string]event 52 | ctx context.Context 53 | mu sync.Mutex 54 | } 55 | 56 | func (b *DebounceHandler) Initialize(ctx context.Context, mh ModelAsyncHandlers) error { 57 | for model := range mh { 58 | if _, ok := model.(DebounceModel); !ok { 59 | schema, table := model.TableName() 60 | return fmt.Errorf("%s.%s model should be implemented with DebounceModel interface", schema, table) 61 | } 62 | } 63 | b.ctx = ctx 64 | b.store = make(map[string]event) 65 | 66 | go func() { 67 | var err error 68 | for err == nil { 69 | time.Sleep(b.Interval) 70 | select { 71 | case <-b.ctx.Done(): 72 | err = b.ctx.Err() 73 | default: 74 | } 75 | b.mu.Lock() 76 | for k, v := range b.store { 77 | b.handle(v) 78 | delete(b.store, k) 79 | } 80 | b.mu.Unlock() 81 | } 82 | }() 83 | 84 | return nil 85 | } 86 | 87 | func (b *DebounceHandler) Handle(fn ModelAsyncHandlerFunc, checkpoint cursor.Checkpoint, change Change) { 88 | b.mu.Lock() 89 | defer b.mu.Unlock() 90 | 91 | e := event{ 92 | Checkpoint: checkpoint, 93 | Change: change, 94 | Handler: fn, 95 | } 96 | 97 | switch change.Op { 98 | case pb.Change_INSERT: 99 | key := debounceKey(change.New) 100 | if prev, ok := b.store[key]; ok { 101 | b.handle(prev) 102 | delete(b.store, key) 103 | } 104 | b.handle(e) 105 | case pb.Change_DELETE: 106 | key := debounceKey(change.Old) 107 | if prev, ok := b.store[key]; ok { 108 | b.handle(prev) 109 | delete(b.store, key) 110 | } 111 | b.handle(e) 112 | case pb.Change_UPDATE: 113 | if change.Old != nil { 114 | key := debounceKey(change.Old) 115 | if prev, ok := b.store[key]; ok { 116 | b.handle(prev) 117 | delete(b.store, key) 118 | } 119 | } 120 | key := debounceKey(change.New) 121 | if prev, ok := b.store[key]; ok { 122 | // since requeue order is not guaranteed, we need to check if the new event is newer than the previous one 123 | // then we commit the previous one and store the new one 124 | // workaround for the LSN == 0 issue because schedule dump lsn is 0 and should be always the latest event 125 | // also, when the checkpoint is equal, we cannot commit the previous event because it might be a same event 126 | if change.Checkpoint.LSN == 0 || change.Checkpoint.After(prev.Checkpoint) { 127 | b.source.Commit(prev.Checkpoint) 128 | b.store[key] = e 129 | } else if change.Checkpoint.Equal(prev.Checkpoint) { 130 | b.handle(prev) 131 | b.store[key] = e 132 | } else { 133 | b.source.Commit(change.Checkpoint) 134 | } 135 | } else { 136 | b.store[key] = e 137 | } 138 | } 139 | } 140 | 141 | func (b *DebounceHandler) handle(e event) { 142 | e.Handler(e.Change, func(err error) { 143 | if err != nil { 144 | b.source.Requeue(e.Checkpoint, err.Error()) 145 | } else { 146 | b.source.Commit(e.Checkpoint) 147 | } 148 | }) 149 | } 150 | 151 | func debounceKey(m interface{}) string { 152 | model := m.(DebounceModel) 153 | schema, table := model.TableName() 154 | return schema + table + model.DebounceKey() 155 | } 156 | -------------------------------------------------------------------------------- /pkg/pgcapture/json.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "bytes" 5 | "database/sql/driver" 6 | "encoding/json" 7 | "reflect" 8 | "strings" 9 | "sync" 10 | 11 | pgtypeV4 "github.com/jackc/pgtype" 12 | ) 13 | 14 | var bufPool sync.Pool 15 | var fieldsCache sync.Map 16 | 17 | type fieldOption struct { 18 | name string 19 | omitempty bool 20 | } 21 | 22 | func MarshalJSON(m Model) ([]byte, error) { 23 | var buf *bytes.Buffer 24 | if v := bufPool.Get(); v != nil { 25 | buf = v.(*bytes.Buffer) 26 | } else { 27 | buf = bytes.NewBuffer(nil) 28 | } 29 | defer func() { 30 | buf.Reset() 31 | bufPool.Put(buf) 32 | }() 33 | 34 | val := reflect.ValueOf(m) 35 | ele := val.Elem() 36 | typ := ele.Type() 37 | 38 | if val.IsNil() { 39 | buf.WriteString("null") 40 | return buf.Bytes(), nil 41 | } 42 | 43 | var names []fieldOption 44 | if v, ok := fieldsCache.Load(typ); ok { 45 | names = v.([]fieldOption) 46 | } else { 47 | names = make([]fieldOption, typ.NumField()) 48 | for i := 0; i < typ.NumField(); i++ { 49 | field := typ.Field(i) 50 | if tag, ok := field.Tag.Lookup("json"); ok { 51 | parts := strings.Split(tag, ",") 52 | opt := fieldOption{name: parts[0]} 53 | if len(parts) != 1 { 54 | opt.omitempty = strings.Contains(parts[1], "omitempty") 55 | } 56 | if opt.omitempty && opt.name == "" { 57 | opt.name = field.Name 58 | } 59 | names[i] = opt 60 | } else { 61 | names[i] = fieldOption{name: field.Name} 62 | } 63 | } 64 | fieldsCache.Store(typ, names) 65 | } 66 | 67 | buf.WriteString("{") 68 | 69 | count := 0 70 | for i, opt := range names { 71 | f := ele.Field(i) 72 | if !f.CanInterface() || opt.name == "-" { 73 | continue 74 | } 75 | face := f.Addr().Interface() 76 | if opt.omitempty { 77 | if valuer, ok := face.(pgtypeV4.Value); ok { 78 | if v := valuer.Get(); v == nil || isEmptyValue(reflect.ValueOf(v)) { 79 | continue 80 | } 81 | } else if valuer, ok := face.(driver.Valuer); ok { 82 | if v, err := valuer.Value(); err != nil { 83 | return nil, err 84 | } else if v == nil || isEmptyValue(reflect.ValueOf(v)) { 85 | continue 86 | } 87 | } else { 88 | if (f.Kind() == reflect.Ptr && f.IsNil()) || isEmptyValue(f) { 89 | continue 90 | } 91 | } 92 | } 93 | bs, err := json.Marshal(face) 94 | if err != nil { 95 | if strings.Contains(err.Error(), "cannot encode status undefined") { 96 | continue 97 | } 98 | return nil, err 99 | } 100 | if count > 0 { 101 | buf.WriteString(",") 102 | } 103 | buf.WriteString("\"") 104 | buf.WriteString(opt.name) 105 | buf.WriteString("\":") 106 | buf.Write(bs) 107 | count++ 108 | } 109 | buf.WriteString("}") 110 | return buf.Bytes(), nil 111 | } 112 | 113 | func isEmptyValue(v reflect.Value) bool { 114 | switch v.Kind() { 115 | case reflect.Array, reflect.Map, reflect.Slice, reflect.String: 116 | return v.Len() == 0 117 | case reflect.Bool: 118 | return !v.Bool() 119 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 120 | return v.Int() == 0 121 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 122 | return v.Uint() == 0 123 | case reflect.Float32, reflect.Float64: 124 | return v.Float() == 0 125 | case reflect.Interface, reflect.Ptr: 126 | return v.IsNil() 127 | } 128 | return false 129 | } 130 | -------------------------------------------------------------------------------- /pkg/pgcapture/json_test.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | pgtypeV4 "github.com/jackc/pgtype" 8 | "github.com/jackc/pgx/v5/pgtype" 9 | ) 10 | 11 | func TestMarshalJSON(t *testing.T) { 12 | testCases := []struct { 13 | input Model 14 | expect []byte 15 | }{ 16 | { 17 | input: &m1{ 18 | F1: pgtypeV4.Text{String: "f1", Status: pgtypeV4.Present}, 19 | F2: pgtypeV4.Text{String: "", Status: pgtypeV4.Present}, 20 | F3: pgtypeV4.Text{String: "", Status: pgtypeV4.Null}, 21 | F4: pgtypeV4.Text{String: "", Status: pgtypeV4.Null}, 22 | F5: pgtypeV4.Text{String: "", Status: pgtypeV4.Present}, 23 | F9: make([]string, 0), 24 | F10: []string{"1"}, 25 | }, 26 | expect: []byte(`{"f1":"f1","f4":null,"F5":"","F10":["1"]}`), 27 | }, 28 | { 29 | input: &m2{ 30 | F1: pgtype.Text{String: "f1", Valid: true}, 31 | F2: pgtype.Text{String: "", Valid: true}, 32 | F3: pgtype.Text{String: "", Valid: false}, 33 | F4: pgtype.Text{String: "", Valid: false}, 34 | F5: pgtype.Text{String: "", Valid: true}, 35 | F9: make([]string, 0), 36 | F10: []string{"1"}, 37 | }, 38 | expect: []byte(`{"f1":"f1","f4":null,"F5":"","F6":null,"F10":["1"]}`), 39 | }, 40 | } 41 | 42 | for _, tc := range testCases { 43 | bs, err := MarshalJSON(tc.input) 44 | if err != nil { 45 | t.Fatalf("unexpected err %v", err) 46 | } 47 | if !bytes.Equal(bs, tc.expect) { 48 | t.Fatalf("unexpected json %v", string(bs)) 49 | } 50 | } 51 | } 52 | 53 | type m1 struct { 54 | F1 pgtypeV4.Text `json:"f1"` 55 | F2 pgtypeV4.Text `json:"f2,omitempty"` 56 | F3 pgtypeV4.Text `json:",omitempty"` 57 | F4 pgtypeV4.Text `json:"f4"` 58 | F5 pgtypeV4.Text 59 | F6 pgtypeV4.Text 60 | 61 | F7 string `json:",omitempty"` 62 | F8 []string `json:",omitempty"` 63 | F9 []string `json:"f9,omitempty"` 64 | F10 []string 65 | } 66 | 67 | func (m *m1) TableName() (schema, table string) { 68 | return "", "" 69 | } 70 | 71 | type m2 struct { 72 | F1 pgtype.Text `json:"f1"` 73 | F2 pgtype.Text `json:"f2,omitempty"` 74 | F3 pgtype.Text `json:",omitempty"` 75 | F4 pgtype.Text `json:"f4"` 76 | F5 pgtype.Text 77 | F6 pgtype.Text 78 | 79 | F7 string `json:",omitempty"` 80 | F8 []string `json:",omitempty"` 81 | F9 []string `json:"f9,omitempty"` 82 | F10 []string 83 | } 84 | 85 | func (m *m2) TableName() (schema, table string) { 86 | return "", "" 87 | } 88 | -------------------------------------------------------------------------------- /pkg/pgcapture/reflect.go: -------------------------------------------------------------------------------- 1 | package pgcapture 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "reflect" 7 | "strings" 8 | 9 | "github.com/replicase/pgcapture/pkg/cursor" 10 | "github.com/replicase/pgcapture/pkg/pb" 11 | ) 12 | 13 | type Model interface { 14 | TableName() (schema, table string) 15 | } 16 | 17 | type Change struct { 18 | Op pb.Change_Operation 19 | Checkpoint cursor.Checkpoint 20 | New interface{} 21 | Old interface{} 22 | } 23 | 24 | type ModelHandlerFunc func(change Change) error 25 | type ModelAsyncHandlerFunc func(change Change, done func(err error)) 26 | type ModelHandlers map[Model]ModelHandlerFunc 27 | type ModelAsyncHandlers map[Model]ModelAsyncHandlerFunc 28 | 29 | func toAsyncHandlerFunc(fn ModelHandlerFunc) ModelAsyncHandlerFunc { 30 | return func(change Change, done func(err error)) { 31 | done(fn(change)) 32 | } 33 | } 34 | 35 | func reflectModel(model Model) (ref reflection, err error) { 36 | typ := reflect.TypeOf(model) 37 | if typ.Kind() != reflect.Ptr || typ.Elem().Kind() != reflect.Struct { 38 | return ref, errors.New("the field Model of SwitchHandler should be a pointer of struct") 39 | } 40 | typ = typ.Elem() 41 | ref = reflection{idx: make(map[string]int, typ.NumField()), typ: typ} 42 | for i := 0; i < typ.NumField(); i++ { 43 | f := typ.Field(i) 44 | if tag, ok := f.Tag.Lookup("pg"); ok { 45 | if n := strings.Split(tag, ","); len(n) > 0 && n[0] != "" { 46 | ref.idx[n[0]] = i 47 | } 48 | } 49 | } 50 | for k := range ref.idx { 51 | if k != "" { 52 | return ref, nil 53 | } 54 | } 55 | return ref, fmt.Errorf("at least one field of %s should should have a valid pg tag", typ.Elem()) 56 | } 57 | 58 | func ModelName(namespace, table string) string { 59 | if namespace == "" { 60 | return "public." + table 61 | } 62 | return namespace + "." + table 63 | } 64 | 65 | type reflection struct { 66 | idx map[string]int 67 | typ reflect.Type 68 | hdl ModelAsyncHandlerFunc 69 | } 70 | -------------------------------------------------------------------------------- /pkg/sink/main.go: -------------------------------------------------------------------------------- 1 | package sink 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | "sync" 7 | "sync/atomic" 8 | "time" 9 | 10 | "github.com/replicase/pgcapture/pkg/cursor" 11 | "github.com/replicase/pgcapture/pkg/source" 12 | ) 13 | 14 | type CleanFn func() 15 | type ApplyFn func(sourceRemaining int, message source.Change, committed chan cursor.Checkpoint) error 16 | 17 | type Sink interface { 18 | Setup() (cp cursor.Checkpoint, err error) 19 | Apply(changes chan source.Change) (committed chan cursor.Checkpoint) 20 | Error() error 21 | Stop() error 22 | } 23 | 24 | type BaseSink struct { 25 | CleanFn CleanFn 26 | cleanOnce sync.Once 27 | 28 | committed chan cursor.Checkpoint 29 | state int64 30 | err atomic.Value 31 | } 32 | 33 | func (b *BaseSink) Setup() (cp cursor.Checkpoint, err error) { 34 | panic("implement me") 35 | } 36 | 37 | func (b *BaseSink) Apply(changes chan source.Change) (committed chan cursor.Checkpoint) { 38 | panic("implement me") 39 | } 40 | 41 | func (b *BaseSink) apply(changes chan source.Change, applyFn ApplyFn) (committed chan cursor.Checkpoint) { 42 | if !atomic.CompareAndSwapInt64(&b.state, 0, 1) { 43 | return nil 44 | } 45 | b.committed = make(chan cursor.Checkpoint, 1000) 46 | atomic.StoreInt64(&b.state, 2) 47 | 48 | go func() { 49 | ticker := time.NewTicker(time.Second) 50 | for atomic.LoadInt64(&b.state) == 2 { 51 | select { 52 | case change, more := <-changes: 53 | if !more { 54 | goto cleanup 55 | } 56 | if err := applyFn(len(changes), change, b.committed); err != nil { 57 | b.err.Store(fmt.Errorf("%w", err)) 58 | goto cleanup 59 | } 60 | case <-ticker.C: 61 | } 62 | } 63 | cleanup: 64 | ticker.Stop() 65 | atomic.StoreInt64(&b.state, 4) 66 | go b.Stop() 67 | for range changes { 68 | // this loop should do nothing and only exit when the input channel is closed 69 | } 70 | }() 71 | return b.committed 72 | } 73 | 74 | func (b *BaseSink) Error() error { 75 | if err, ok := b.err.Load().(error); ok { 76 | return err 77 | } 78 | return nil 79 | } 80 | 81 | func (b *BaseSink) Stop() error { 82 | switch atomic.LoadInt64(&b.state) { 83 | case 0: 84 | b.cleanOnce.Do(b.CleanFn) 85 | case 1, 2: 86 | for !atomic.CompareAndSwapInt64(&b.state, 2, 3) { 87 | runtime.Gosched() 88 | } 89 | fallthrough 90 | case 3: 91 | for atomic.LoadInt64(&b.state) != 4 { 92 | time.Sleep(time.Millisecond * 100) 93 | } 94 | fallthrough 95 | case 4: 96 | b.cleanOnce.Do(func() { 97 | b.CleanFn() 98 | close(b.committed) 99 | }) 100 | } 101 | return b.Error() 102 | } 103 | -------------------------------------------------------------------------------- /pkg/sink/main_test.go: -------------------------------------------------------------------------------- 1 | package sink 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | "time" 7 | 8 | "github.com/replicase/pgcapture/pkg/cursor" 9 | "github.com/replicase/pgcapture/pkg/pb" 10 | "github.com/replicase/pgcapture/pkg/source" 11 | ) 12 | 13 | type sink struct { 14 | BaseSink 15 | Cleaned chan struct{} 16 | } 17 | 18 | func (s *sink) Setup() (cp cursor.Checkpoint, err error) { 19 | s.Cleaned = make(chan struct{}) 20 | s.BaseSink.CleanFn = func() { 21 | close(s.Cleaned) 22 | } 23 | return 24 | } 25 | 26 | func (s *sink) Apply(changes chan source.Change) (committed chan cursor.Checkpoint) { 27 | return s.BaseSink.apply(changes, func(sourceBufferSize int, message source.Change, committed chan cursor.Checkpoint) error { 28 | if message.Message != nil { 29 | return ErrAny 30 | } 31 | committed <- message.Checkpoint 32 | return nil 33 | }) 34 | } 35 | 36 | var ErrAny = errors.New("error") 37 | 38 | func TestBaseSink_Stop(t *testing.T) { 39 | // the sink should still consume changes after stopped, but do nothing 40 | sink := sink{} 41 | sink.Setup() 42 | changes := make(chan source.Change) 43 | committed := sink.Apply(changes) 44 | 45 | if err := sink.Stop(); err != nil { 46 | t.Fatalf("unexpected %v", err) 47 | } 48 | 49 | if _, more := <-committed; more { 50 | t.Fatal("committed channel should be closed") 51 | } 52 | if _, more := <-sink.Cleaned; more { 53 | t.Fatal("clean func should be called once") 54 | } 55 | 56 | for i := 0; i < 1000; i++ { 57 | select { 58 | case changes <- source.Change{Checkpoint: cursor.Checkpoint{}}: 59 | case <-time.NewTimer(time.Second).C: 60 | t.Fatal("push to changes should be still successful") 61 | } 62 | } 63 | close(changes) 64 | } 65 | 66 | func TestBaseSink_StopImmediate(t *testing.T) { 67 | sink := sink{} 68 | sink.Setup() 69 | changes := make(chan source.Change) 70 | go sink.Apply(changes) 71 | if err := sink.Stop(); err != nil { 72 | t.Fatalf("unexpected %v", err) 73 | } 74 | if _, more := <-sink.Cleaned; more { 75 | t.Fatal("clean func should be called after Stop()") 76 | } 77 | } 78 | 79 | func TestBaseSink_Clean(t *testing.T) { 80 | // the clean func should be call if changes is closed 81 | sink := sink{} 82 | sink.Setup() 83 | changes := make(chan source.Change) 84 | committed := sink.Apply(changes) 85 | 86 | count := 1000 87 | 88 | go func() { 89 | for i := 0; i < count; i++ { 90 | changes <- source.Change{Checkpoint: cursor.Checkpoint{}} 91 | } 92 | close(changes) 93 | }() 94 | 95 | for i := 0; i < count; i++ { 96 | <-committed 97 | } 98 | 99 | if _, more := <-committed; more { 100 | t.Fatal("committed channel should be closed") 101 | } 102 | 103 | if err := sink.Stop(); err != nil { 104 | t.Fatalf("unexpected %v", err) 105 | } 106 | 107 | if _, more := <-sink.Cleaned; more { 108 | t.Fatal("clean func should be called after Stop()") 109 | } 110 | } 111 | 112 | func TestBaseSink_Error(t *testing.T) { 113 | sink := sink{} 114 | sink.Setup() 115 | changes := make(chan source.Change) 116 | committed := sink.Apply(changes) 117 | 118 | // trigger error 119 | changes <- source.Change{Message: &pb.Message{}} 120 | 121 | if _, more := <-committed; more { 122 | t.Fatal("committed channel should be closed") 123 | } 124 | 125 | if err := sink.Stop(); !errors.Is(err, ErrAny) { 126 | t.Fatalf("unexpected %v", err) 127 | } 128 | 129 | if _, more := <-sink.Cleaned; more { 130 | t.Fatal("clean func should be called after Stop()") 131 | } 132 | 133 | close(changes) 134 | } 135 | 136 | func TestBaseSink_SecondApply(t *testing.T) { 137 | sink := sink{} 138 | sink.Setup() 139 | changes := make(chan source.Change) 140 | if first := sink.Apply(changes); first == nil { 141 | t.Fatal("should not nil if first time") 142 | } 143 | if second := sink.Apply(changes); second != nil { 144 | t.Fatal("should nil if second time") 145 | } 146 | sink.Stop() 147 | } 148 | 149 | func TestBaseSink_SetupPanic(t *testing.T) { 150 | defer func() { recover() }() 151 | s := BaseSink{} 152 | s.Setup() 153 | t.Fatal("should panic") 154 | } 155 | 156 | func TestBaseSink_ApplyPanic(t *testing.T) { 157 | defer func() { recover() }() 158 | s := BaseSink{} 159 | s.Apply(make(chan source.Change)) 160 | t.Fatal("should panic") 161 | } 162 | -------------------------------------------------------------------------------- /pkg/sink/pulsar.go: -------------------------------------------------------------------------------- 1 | package sink 2 | 3 | import ( 4 | "context" 5 | "encoding/hex" 6 | "fmt" 7 | "os" 8 | 9 | "github.com/apache/pulsar-client-go/pulsar" 10 | "github.com/replicase/pgcapture/pkg/cursor" 11 | "github.com/replicase/pgcapture/pkg/source" 12 | "github.com/sirupsen/logrus" 13 | "google.golang.org/protobuf/proto" 14 | ) 15 | 16 | type SetupTracker func(client pulsar.Client, topic string) (cursor.Tracker, error) 17 | 18 | func setupDefaultTracker(client pulsar.Client, topic string) (cursor.Tracker, error) { 19 | return cursor.NewPulsarTracker(client, topic) 20 | } 21 | 22 | type PulsarSink struct { 23 | BaseSink 24 | 25 | PulsarOption pulsar.ClientOptions 26 | PulsarTopic string 27 | // For overriding the cluster list to be replicated to 28 | ReplicatedClusters []string 29 | 30 | SetupTracker SetupTracker 31 | 32 | client pulsar.Client 33 | tracker cursor.Tracker 34 | producer pulsar.Producer 35 | log *logrus.Entry 36 | prev cursor.Checkpoint 37 | consistent bool 38 | } 39 | 40 | func (p *PulsarSink) Setup() (cp cursor.Checkpoint, err error) { 41 | p.client, err = pulsar.NewClient(p.PulsarOption) 42 | if err != nil { 43 | return cp, err 44 | } 45 | 46 | host, err := os.Hostname() 47 | if err != nil { 48 | return cp, err 49 | } 50 | 51 | if p.SetupTracker == nil { 52 | p.SetupTracker = setupDefaultTracker 53 | } 54 | 55 | p.tracker, err = p.SetupTracker(p.client, p.PulsarTopic) 56 | if err != nil { 57 | return cp, err 58 | } 59 | 60 | // Set up the producer first to avoid the existence of another producer when trying to read the latest message 61 | p.producer, err = p.client.CreateProducer(pulsar.ProducerOptions{ 62 | Topic: p.PulsarTopic, 63 | Name: p.PulsarTopic + "-producer", // fixed for exclusive producer 64 | Properties: map[string]string{"host": host}, 65 | MaxPendingMessages: 2000, 66 | CompressionType: pulsar.ZSTD, 67 | BatchingMaxMessages: 1000, 68 | BatchingMaxSize: 1024 * 1024, 69 | }) 70 | if err != nil { 71 | return cp, err 72 | } 73 | 74 | cp, err = p.tracker.Last() 75 | if err != nil { 76 | return cp, err 77 | } 78 | p.prev = cp 79 | 80 | p.log = logrus.WithFields(logrus.Fields{ 81 | "From": "PulsarSink", 82 | "Topic": p.PulsarTopic, 83 | }) 84 | p.log.WithFields(logrus.Fields{ 85 | "SinkLastLSN": p.prev.LSN, 86 | "SinkLastSeq": p.prev.Seq, 87 | }).Info("start sending changes to pulsar") 88 | 89 | p.BaseSink.CleanFn = func() { 90 | p.producer.Flush() 91 | p.producer.Close() 92 | p.client.Close() 93 | p.tracker.Close() 94 | } 95 | 96 | return cp, nil 97 | } 98 | 99 | func (p *PulsarSink) Apply(changes chan source.Change) chan cursor.Checkpoint { 100 | p.tracker.Start() 101 | 102 | var first bool 103 | return p.BaseSink.apply(changes, func(_ int, change source.Change, committed chan cursor.Checkpoint) error { 104 | if !first { 105 | p.log.WithFields(logrus.Fields{ 106 | "MessageLSN": change.Checkpoint.LSN, 107 | "MessageSeq": change.Checkpoint.Seq, 108 | "SinkLastLSN": p.prev.LSN, 109 | "SinkLastSeq": p.prev.Seq, 110 | "Message": change.Message.String(), 111 | "ReplicatedClusters": p.ReplicatedClusters, 112 | }).Info("applying the first message from source") 113 | first = true 114 | } 115 | 116 | p.consistent = p.consistent || change.Checkpoint.After(p.prev) 117 | if !p.consistent { 118 | p.log.WithFields(logrus.Fields{ 119 | "MessageLSN": change.Checkpoint.LSN, 120 | "MessageSeq": change.Checkpoint.Seq, 121 | "SinkLastLSN": p.prev.LSN, 122 | "SinkLastSeq": p.prev.Seq, 123 | "Message": change.Message.String(), 124 | "ReplicatedClusters": p.ReplicatedClusters, 125 | }).Warn("message dropped due to its lsn smaller than the last lsn of sink") 126 | return nil 127 | } 128 | 129 | if change.Message.GetKeepAlive() != nil { 130 | if err := p.producer.Flush(); err != nil { 131 | return err 132 | } 133 | committed <- change.Checkpoint 134 | return nil 135 | } 136 | 137 | bs, err := proto.Marshal(change.Message) 138 | if err != nil { 139 | return err 140 | } 141 | 142 | p.producer.SendAsync(context.Background(), &pulsar.ProducerMessage{ 143 | Key: change.Checkpoint.ToKey(), // for topic compaction, not routing policy 144 | Payload: bs, 145 | ReplicationClusters: p.ReplicatedClusters, 146 | }, func(id pulsar.MessageID, message *pulsar.ProducerMessage, err error) { 147 | var idHex string 148 | if id != nil { 149 | idHex = hex.EncodeToString(id.Serialize()) 150 | } 151 | 152 | if err != nil { 153 | p.log.WithFields(logrus.Fields{ 154 | "MessageLSN": change.Checkpoint.LSN, 155 | "MessageIDHex": idHex, 156 | "ReplicatedClusters": p.ReplicatedClusters, 157 | }).Errorf("fail to send message to pulsar: %v", err) 158 | p.BaseSink.err.Store(fmt.Errorf("%w", err)) 159 | p.BaseSink.Stop() 160 | return 161 | } 162 | 163 | cp := change.Checkpoint 164 | if err := p.tracker.Commit(cp, id); err != nil { 165 | p.log.WithFields(logrus.Fields{ 166 | "MessageLSN": change.Checkpoint.LSN, 167 | "MessageSeq": change.Checkpoint.Seq, 168 | "MessageIDHex": idHex, 169 | }).Errorf("fail to commit message to tracker: %v", err) 170 | } 171 | committed <- cp 172 | }) 173 | return nil 174 | }) 175 | } 176 | -------------------------------------------------------------------------------- /pkg/sink/pulsar_test.go: -------------------------------------------------------------------------------- 1 | package sink 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | "time" 7 | 8 | "github.com/apache/pulsar-client-go/pulsar" 9 | "github.com/golang/mock/gomock" 10 | "github.com/replicase/pgcapture/internal/cursormock" 11 | "github.com/replicase/pgcapture/internal/test" 12 | "github.com/replicase/pgcapture/pkg/cursor" 13 | "github.com/replicase/pgcapture/pkg/pb" 14 | "github.com/replicase/pgcapture/pkg/source" 15 | ) 16 | 17 | func newPulsarSink(topic string, tracker *cursormock.MockTracker) *PulsarSink { 18 | return &PulsarSink{ 19 | PulsarOption: pulsar.ClientOptions{URL: test.GetPulsarURL()}, 20 | PulsarTopic: topic, 21 | SetupTracker: func(_ pulsar.Client, _ string) (cursor.Tracker, error) { 22 | return tracker, nil 23 | }, 24 | } 25 | } 26 | 27 | func newMockTracker(ctrl *gomock.Controller) *cursormock.MockTracker { 28 | return cursormock.NewMockTracker(ctrl) 29 | } 30 | 31 | func TestPulsarSink(t *testing.T) { 32 | topic := time.Now().Format("20060102150405") 33 | ctrl := gomock.NewController(t) 34 | tracker := cursormock.NewMockTracker(ctrl) 35 | 36 | sink := newPulsarSink(topic, tracker) 37 | 38 | // test empty checkpoint 39 | tracker.EXPECT().Last().Return(cursor.Checkpoint{}, nil) 40 | tracker.EXPECT().Start() 41 | 42 | cp, err := sink.Setup() 43 | if err != nil { 44 | t.Fatal(err) 45 | } 46 | 47 | if cp.LSN != 0 || len(cp.Data) != 0 { 48 | t.Fatalf("checkpoint of empty topic should be zero") 49 | } 50 | 51 | changes := make(chan source.Change) 52 | committed := sink.Apply(changes) 53 | 54 | for i := uint64(1); i < 4; i++ { 55 | for j := uint32(1); j < 4; j++ { 56 | change := source.Change{ 57 | Checkpoint: cursor.Checkpoint{LSN: i, Seq: j}, 58 | Message: &pb.Message{Type: &pb.Message_Commit{Commit: &pb.Commit{EndLsn: i}}}, 59 | } 60 | tracker.EXPECT().Commit(change.Checkpoint, gomock.Any()).Return(nil) 61 | changes <- change 62 | if recv := <-committed; recv.LSN != i || recv.Seq != j { 63 | t.Fatalf("unexpected %v", i) 64 | } 65 | } 66 | } 67 | 68 | // should send keep alive message to the committed 69 | // but tracker should not be committed 70 | change := source.Change{Checkpoint: cursor.Checkpoint{LSN: 4, Seq: 0}, Message: &pb.Message{Type: &pb.Message_KeepAlive{ 71 | KeepAlive: &pb.KeepAlive{}, 72 | }}} 73 | changes <- change 74 | if recv := <-committed; recv.LSN != change.Checkpoint.LSN || recv.Seq != change.Checkpoint.Seq { 75 | t.Fatalf("unexpected %v", change.Checkpoint.LSN) 76 | } 77 | 78 | close(changes) 79 | 80 | tracker.EXPECT().Close() 81 | if err := sink.Stop(); err != nil { 82 | t.Fatal("unexpected", err) 83 | } 84 | 85 | if _, more := <-changes; more { 86 | t.Fatal("unexpected") 87 | } 88 | 89 | // test restart from last message 90 | sink = newPulsarSink(topic, tracker) 91 | tracker.EXPECT().Last().Return(cursor.Checkpoint{LSN: 3, Seq: 3}, nil) 92 | tracker.EXPECT().Start() 93 | 94 | cp, err = sink.Setup() 95 | if err != nil { 96 | t.Fatal(err) 97 | } 98 | if cp.LSN != 3 || cp.Seq != 3 { 99 | t.Fatalf("checkpoint of non empty topic should be last message") 100 | } 101 | 102 | changes = make(chan source.Change) 103 | committed = sink.Apply(changes) 104 | 105 | // test avoid duplicate publish 106 | for i := uint64(1); i < 4; i++ { 107 | for j := uint32(1); j < 4; j++ { 108 | // all these changes should be ignored 109 | changes <- source.Change{Checkpoint: cursor.Checkpoint{LSN: i, Seq: j}, Message: &pb.Message{Type: &pb.Message_Commit{Commit: &pb.Commit{EndLsn: i}}}} 110 | } 111 | } 112 | 113 | // these new changes should be accepted 114 | for i := uint64(3); i < 5; i++ { 115 | for j := uint32(4); j < 5; j++ { 116 | change := source.Change{ 117 | Checkpoint: cursor.Checkpoint{LSN: i, Seq: j}, 118 | Message: &pb.Message{Type: &pb.Message_Commit{Commit: &pb.Commit{EndLsn: i}}}, 119 | } 120 | tracker.EXPECT().Commit(change.Checkpoint, gomock.Any()).Return(nil) 121 | changes <- change 122 | if recv := <-committed; recv.LSN != i || recv.Seq != j { 123 | t.Fatalf("unexpected %v", i) 124 | } 125 | } 126 | } 127 | close(changes) 128 | 129 | tracker.EXPECT().Close() 130 | if err := sink.Stop(); err != nil { 131 | t.Fatal("unexpected", err) 132 | } 133 | if _, more := <-changes; more { 134 | t.Fatal("unexpected") 135 | } 136 | } 137 | 138 | func TestPulsarSink_DuplicatedSink(t *testing.T) { 139 | topic := time.Now().Format("20060102150405") 140 | tracker := cursormock.NewMockTracker(gomock.NewController(t)) 141 | 142 | sink1 := newPulsarSink(topic, tracker) 143 | tracker.EXPECT().Last().Return(cursor.Checkpoint{}, nil) 144 | if _, err := sink1.Setup(); err != nil { 145 | t.Fatal(err) 146 | } 147 | 148 | sink2 := newPulsarSink(topic, tracker) 149 | if _, err := sink2.Setup(); err == nil || !strings.Contains(err.Error(), "is already connected to topic") { 150 | t.Fatal("should be failed with duplicated sink") 151 | } 152 | 153 | tracker.EXPECT().Close() 154 | if err := sink1.Stop(); err != nil { 155 | t.Fatal("unexpected", err) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /pkg/source/main.go: -------------------------------------------------------------------------------- 1 | package source 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "net" 8 | "runtime" 9 | "sync/atomic" 10 | "time" 11 | 12 | "github.com/replicase/pgcapture/pkg/cursor" 13 | "github.com/replicase/pgcapture/pkg/pb" 14 | ) 15 | 16 | type Change struct { 17 | Checkpoint cursor.Checkpoint 18 | Message *pb.Message 19 | } 20 | 21 | type Source interface { 22 | Capture(cp cursor.Checkpoint) (changes chan Change, err error) 23 | Commit(cp cursor.Checkpoint) 24 | Error() error 25 | Stop() error 26 | } 27 | 28 | type RequeueSource interface { 29 | Source 30 | Requeue(cp cursor.Checkpoint, reason string) 31 | } 32 | 33 | type BaseSource struct { 34 | ReadTimeout time.Duration 35 | 36 | state int64 37 | stopped chan struct{} 38 | 39 | err atomic.Value 40 | } 41 | 42 | func (b *BaseSource) Capture(cp cursor.Checkpoint) (changes chan Change, err error) { 43 | panic("implement me") 44 | } 45 | 46 | func (b *BaseSource) Commit(cp cursor.Checkpoint) { 47 | panic("implement me") 48 | } 49 | 50 | func (b *BaseSource) Stop() error { 51 | switch atomic.LoadInt64(&b.state) { 52 | case 1, 2: 53 | for !atomic.CompareAndSwapInt64(&b.state, 2, 3) { 54 | runtime.Gosched() 55 | } 56 | fallthrough 57 | case 3: 58 | <-b.stopped 59 | } 60 | return b.Error() 61 | } 62 | 63 | func (b *BaseSource) Error() error { 64 | if err, ok := b.err.Load().(error); ok { 65 | return err 66 | } 67 | return nil 68 | } 69 | 70 | func (b *BaseSource) capture(readFn ReadFn, flushFn FlushFn) (chan Change, error) { 71 | if !atomic.CompareAndSwapInt64(&b.state, 0, 1) { 72 | return nil, nil 73 | } 74 | 75 | b.stopped = make(chan struct{}) 76 | changes := make(chan Change, 1000) 77 | 78 | atomic.StoreInt64(&b.state, 2) 79 | 80 | timeout := b.ReadTimeout 81 | if timeout == 0 { 82 | timeout = 5 * time.Second 83 | } 84 | 85 | go func() { 86 | defer close(b.stopped) 87 | defer close(changes) 88 | defer flushFn() 89 | for { 90 | ctx, cancel := context.WithTimeout(context.Background(), timeout) 91 | change, err := readFn(ctx) 92 | cancel() 93 | if atomic.LoadInt64(&b.state) != 2 { 94 | return 95 | } 96 | if isTimeout(err) { 97 | continue 98 | } 99 | if err != nil { 100 | b.err.Store(fmt.Errorf("%w", err)) 101 | return 102 | } 103 | if change.Message != nil { 104 | changes <- change 105 | } 106 | } 107 | }() 108 | return changes, nil 109 | } 110 | 111 | type CaptureFn func(changes chan Change) error 112 | type FlushFn func() 113 | type ReadFn func(ctx context.Context) (Change, error) 114 | 115 | func isTimeout(err error) bool { 116 | if errors.Is(err, context.DeadlineExceeded) { 117 | return true 118 | } 119 | 120 | var netErr net.Error 121 | return errors.As(err, &netErr) && netErr.Timeout() 122 | } 123 | -------------------------------------------------------------------------------- /pkg/source/main_test.go: -------------------------------------------------------------------------------- 1 | package source 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "testing" 7 | "time" 8 | 9 | "github.com/replicase/pgcapture/pkg/cursor" 10 | "github.com/replicase/pgcapture/pkg/pb" 11 | ) 12 | 13 | type source struct { 14 | BaseSource 15 | ReadFn ReadFn 16 | Flushed chan struct{} 17 | } 18 | 19 | func (s *source) Capture(cp cursor.Checkpoint) (changes chan Change, err error) { 20 | s.Flushed = make(chan struct{}) 21 | return s.BaseSource.capture(s.ReadFn, func() { 22 | close(s.Flushed) 23 | }) 24 | } 25 | 26 | func (s *source) Commit(cp cursor.Checkpoint) { 27 | 28 | } 29 | 30 | var ErrAny = errors.New("error") 31 | 32 | func TestBaseSource_Stop(t *testing.T) { 33 | source := source{ 34 | BaseSource: BaseSource{ReadTimeout: time.Second}, 35 | ReadFn: func(ctx context.Context) (Change, error) { 36 | return Change{Message: &pb.Message{}}, ctx.Err() 37 | }, 38 | } 39 | changes, _ := source.Capture(cursor.Checkpoint{}) 40 | 41 | go func() { 42 | time.Sleep(time.Second / 2) 43 | source.Stop() 44 | }() 45 | 46 | for range changes { 47 | } 48 | 49 | if _, more := <-changes; more { 50 | t.Fatal("committed channel should be closed after stop") 51 | } 52 | 53 | if _, more := <-source.Flushed; more { 54 | t.Fatal("clean func should be called once") 55 | } 56 | 57 | if source.Error() != nil { 58 | t.Fatalf("unexpected %v", source.Error()) 59 | } 60 | } 61 | 62 | func TestBaseSource_SecondCapture(t *testing.T) { 63 | source := source{ 64 | BaseSource: BaseSource{ReadTimeout: time.Second}, 65 | ReadFn: func(ctx context.Context) (Change, error) { 66 | return Change{Message: &pb.Message{}}, ctx.Err() 67 | }, 68 | } 69 | changes, _ := source.Capture(cursor.Checkpoint{}) 70 | 71 | if second, err := source.Capture(cursor.Checkpoint{}); second != nil || err != nil { 72 | t.Fatal("second capture should be ignore") 73 | } 74 | 75 | source.Stop() 76 | 77 | for range changes { 78 | } 79 | 80 | if _, more := <-changes; more { 81 | t.Fatal("committed channel should be closed after stop") 82 | } 83 | 84 | if _, more := <-source.Flushed; more { 85 | t.Fatal("clean func should be called once") 86 | } 87 | 88 | if source.Error() != nil { 89 | t.Fatalf("unexpected %v", source.Error()) 90 | } 91 | } 92 | 93 | func TestBaseSource_Timeout(t *testing.T) { 94 | count := 0 95 | source := source{ 96 | BaseSource: BaseSource{ReadTimeout: time.Second / 5}, 97 | ReadFn: func(ctx context.Context) (Change, error) { 98 | if count == 0 { 99 | time.Sleep(time.Second / 3) 100 | } 101 | count++ 102 | return Change{Message: &pb.Message{}}, ctx.Err() 103 | }, 104 | } 105 | changes, _ := source.Capture(cursor.Checkpoint{}) 106 | 107 | go func() { 108 | time.Sleep(time.Second / 2) 109 | source.Stop() 110 | }() 111 | 112 | for range changes { 113 | } 114 | 115 | if _, more := <-changes; more { 116 | t.Fatal("committed channel should be closed") 117 | } 118 | if _, more := <-source.Flushed; more { 119 | t.Fatal("clean func should be called once") 120 | } 121 | 122 | if source.Error() != nil { 123 | t.Fatalf("unexpected %v", source.Error()) 124 | } 125 | } 126 | 127 | func TestBaseSource_Error(t *testing.T) { 128 | source := source{ 129 | BaseSource: BaseSource{ReadTimeout: time.Second}, 130 | ReadFn: func(ctx context.Context) (Change, error) { 131 | return Change{}, ErrAny 132 | }, 133 | } 134 | changes, _ := source.Capture(cursor.Checkpoint{}) 135 | 136 | if _, more := <-changes; more { 137 | t.Fatal("committed channel should be closed") 138 | } 139 | if _, more := <-source.Flushed; more { 140 | t.Fatal("clean func should be called once") 141 | } 142 | 143 | if !errors.Is(source.Error(), ErrAny) { 144 | t.Fatalf("unexpected %v", source.Error()) 145 | } 146 | } 147 | 148 | func TestBaseSink_CapturePanic(t *testing.T) { 149 | defer func() { recover() }() 150 | s := BaseSource{} 151 | s.Capture(cursor.Checkpoint{}) 152 | t.Fatal("should panic") 153 | } 154 | 155 | func TestBaseSink_CommitPanic(t *testing.T) { 156 | defer func() { recover() }() 157 | s := BaseSource{} 158 | s.Commit(cursor.Checkpoint{}) 159 | t.Fatal("should panic") 160 | } 161 | -------------------------------------------------------------------------------- /pkg/sql/builder.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | 7 | "github.com/replicase/pgcapture/pkg/pb" 8 | ) 9 | 10 | func DeleteQuery(namespace, table string, fields []*pb.Field) string { 11 | var query strings.Builder 12 | query.WriteString("delete from \"") 13 | query.WriteString(namespace) 14 | query.WriteString("\".\"") 15 | query.WriteString(table) 16 | query.WriteString("\" where \"") 17 | 18 | for i, field := range fields { 19 | query.WriteString(field.Name) 20 | query.WriteString("\"=$" + strconv.Itoa(i+1)) 21 | if i != len(fields)-1 { 22 | query.WriteString(" and \"") 23 | } 24 | } 25 | return query.String() 26 | } 27 | 28 | func UpdateQuery(namespace, table string, sets, keys []*pb.Field) string { 29 | var query strings.Builder 30 | query.WriteString("update \"") 31 | query.WriteString(namespace) 32 | query.WriteString("\".\"") 33 | query.WriteString(table) 34 | query.WriteString("\" set \"") 35 | 36 | var j int 37 | for ; j < len(sets); j++ { 38 | field := sets[j] 39 | query.WriteString(field.Name) 40 | query.WriteString("\"=$" + strconv.Itoa(j+1)) 41 | if j != len(sets)-1 { 42 | query.WriteString(",\"") 43 | } 44 | } 45 | 46 | query.WriteString(" where \"") 47 | 48 | for i := 0; i < len(keys); i++ { 49 | k := i + j 50 | field := keys[i] 51 | 52 | query.WriteString(field.Name) 53 | query.WriteString("\"=$" + strconv.Itoa(k+1)) 54 | if i != len(keys)-1 { 55 | query.WriteString(" and \"") 56 | } 57 | } 58 | 59 | return query.String() 60 | } 61 | 62 | type InsertOption struct { 63 | Namespace string 64 | Table string 65 | Count int 66 | Keys []string 67 | Fields []*pb.Field 68 | PGVersion int64 69 | } 70 | 71 | func InsertQuery(opt InsertOption) string { 72 | var query strings.Builder 73 | query.WriteString("insert into \"") 74 | query.WriteString(opt.Namespace) 75 | query.WriteString("\".\"") 76 | query.WriteString(opt.Table) 77 | query.WriteString("\"(\"") 78 | 79 | fields := opt.Fields 80 | for i, field := range fields { 81 | query.WriteString(field.Name) 82 | if i == len(fields)-1 { 83 | query.WriteString("\")") 84 | } else { 85 | query.WriteString("\",\"") 86 | } 87 | } 88 | 89 | if opt.PGVersion >= 100000 { 90 | // to handle the case where the target table contains the GENERATED ALWAYS constraint; 91 | // according the SQL standard, the OVERRIDING SYSTEM VALUE can only be specified if an identity column that is generated always exists, 92 | // but PG will allow the clause to be specified even if the target table does not contain such a column. 93 | // ref: https://www.postgresql.org/docs/10/sql-insert.html 94 | query.WriteString(" OVERRIDING SYSTEM VALUE") 95 | } 96 | query.WriteString(" values (") 97 | 98 | i := 1 99 | for j := 0; j < opt.Count; j++ { 100 | for range fields { 101 | query.WriteString("$" + strconv.Itoa(i)) 102 | if i%len(fields) == 0 { 103 | query.WriteString(")") 104 | } else { 105 | query.WriteString(",") 106 | } 107 | i++ 108 | } 109 | if j < opt.Count-1 { 110 | query.WriteString(",(") 111 | } 112 | } 113 | 114 | keys := opt.Keys 115 | if len(keys) != 0 { 116 | query.WriteString(" ON CONFLICT (") 117 | query.WriteString(strings.Join(keys, ",")) 118 | query.WriteString(") DO NOTHING") 119 | } 120 | 121 | return query.String() 122 | } 123 | -------------------------------------------------------------------------------- /pkg/sql/builder_test.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/replicase/pgcapture/pkg/pb" 7 | ) 8 | 9 | func TestInsertQuery(t *testing.T) { 10 | opt := InsertOption{ 11 | Namespace: "public", 12 | Table: "my_table", 13 | Fields: []*pb.Field{{Name: "f1"}, {Name: "f2"}}, 14 | Count: 4, 15 | } 16 | 17 | q := InsertQuery(opt) 18 | if q != `insert into "public"."my_table"("f1","f2") values ($1,$2),($3,$4),($5,$6),($7,$8)` { 19 | t.Fatalf("not expected %q", q) 20 | } 21 | } 22 | 23 | func TestInsertQueryConflict(t *testing.T) { 24 | opt := InsertOption{ 25 | Namespace: "public", 26 | Table: "my_table", 27 | Keys: []string{"id", "name"}, 28 | Fields: []*pb.Field{{Name: "f1"}, {Name: "f2"}}, 29 | Count: 4, 30 | } 31 | 32 | q := InsertQuery(opt) 33 | if q != `insert into "public"."my_table"("f1","f2") values ($1,$2),($3,$4),($5,$6),($7,$8) ON CONFLICT (id,name) DO NOTHING` { 34 | t.Fatalf("not expected %q", q) 35 | } 36 | } 37 | 38 | func TestInsertQueryOverridingSystemValue(t *testing.T) { 39 | opt := InsertOption{ 40 | Namespace: "public", 41 | Table: "my_table", 42 | Fields: []*pb.Field{{Name: "f1"}, {Name: "f2"}}, 43 | Count: 4, 44 | PGVersion: 100000, 45 | } 46 | 47 | q := InsertQuery(opt) 48 | if q != `insert into "public"."my_table"("f1","f2") OVERRIDING SYSTEM VALUE values ($1,$2),($3,$4),($5,$6),($7,$8)` { 49 | t.Fatalf("not expected %q", q) 50 | } 51 | 52 | } 53 | 54 | func TestDeleteQuery(t *testing.T) { 55 | q := DeleteQuery("public", "my_table", []*pb.Field{{Name: "f1"}, {Name: "f2"}, {Name: "f3"}}) 56 | if q != `delete from "public"."my_table" where "f1"=$1 and "f2"=$2 and "f3"=$3` { 57 | t.Fatalf("not expected %q", q) 58 | } 59 | } 60 | 61 | func TestUpdateQuery(t *testing.T) { 62 | q := UpdateQuery("public", "my_table", []*pb.Field{{Name: "f1"}, {Name: "f2"}}, []*pb.Field{{Name: "f3"}, {Name: "f4"}}) 63 | if q != `update "public"."my_table" set "f1"=$1,"f2"=$2 where "f3"=$3 and "f4"=$4` { 64 | t.Fatalf("not expected %q", q) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /pkg/sql/source.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | var QueryAttrTypeOID = `SELECT nspname, relname, attname, atttypid, relreplident 4 | FROM pg_catalog.pg_namespace n 5 | JOIN pg_catalog.pg_class c ON c.relnamespace = n.oid AND c.relkind = 'r' 6 | JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid AND a.attnum > 0 and a.attisdropped = false 7 | WHERE n.nspname NOT IN ('pg_catalog', 'information_schema', 'pglogical') AND n.nspname !~ '^pg_toast';` 8 | 9 | var QueryIdentityKeys = `SELECT 10 | nspname, 11 | relname, 12 | array(select attname from pg_catalog.pg_attribute where attrelid = i.indrelid AND attnum > 0 AND attnum = ANY(i.indkey)) as keys, 13 | array(select column_name::text from information_schema.columns where table_schema = n.nspname AND table_name = c.relname AND identity_generation IS NOT NULL) as identity_generation_columns, 14 | array(select column_name::text from information_schema.columns where table_schema = n.nspname AND table_name = c.relname AND is_generated = 'ALWAYS') as generated_columns 15 | FROM pg_catalog.pg_index i 16 | JOIN pg_catalog.pg_class c ON c.oid = i.indrelid AND c.relkind = 'r' 17 | JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'pglogical') AND n.nspname !~ '^pg_toast' 18 | WHERE (i.indisprimary OR i.indisunique) AND i.indisvalid AND i.indpred IS NULL ORDER BY indisprimary;` 19 | 20 | var CreateLogicalSlot = `SELECT pg_create_logical_replication_slot($1, $2);` 21 | 22 | var CreatePublication = `CREATE PUBLICATION %s FOR ALL TABLES;` 23 | 24 | var InstallExtension = `CREATE EXTENSION IF NOT EXISTS pgcapture;` 25 | 26 | var ServerVersionNum = `SHOW server_version_num;` 27 | 28 | var QueryReplicationSlot = `SELECT confirmed_flush_lsn FROM pg_replication_slots WHERE slot_name = $1;` 29 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # PGCapture Python Client 2 | 3 | # Prepare dependencies 4 | 5 | ```sh 6 | python3 -m pip install grpcio 7 | python3 -m pip install git+https://github.com/rueian/pgcapture.git#subdirectory=python 8 | 9 | ``` 10 | 11 | # Checkout `example.py` 12 | 13 | ```sh 14 | python3 example.py 15 | ``` 16 | -------------------------------------------------------------------------------- /python/example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pgcapture import PGCaptureClient 3 | 4 | def dump(msg): 5 | print(str(msg['change'])) 6 | 7 | if __name__ == '__main__': 8 | client = PGCaptureClient('127.0.0.1:10000', 'database|subscription', TableRegex='.*') 9 | asyncio.run(client.capture(dump)) 10 | -------------------------------------------------------------------------------- /python/pgcapture/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import PGCaptureClient -------------------------------------------------------------------------------- /python/pgcapture/client.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import grpc 3 | import asyncio 4 | 5 | from . decoders import OIDRegistery 6 | from pb.pgcapture_pb2 import CaptureInit, CaptureAck, CaptureRequest 7 | from pb.pgcapture_pb2_grpc import DBLogGatewayStub 8 | from google.protobuf.struct_pb2 import Struct 9 | 10 | class PGCaptureClient: 11 | 12 | def __init__(self, grpc_addr, uri, **kwargs): 13 | params = Struct() 14 | params.update(kwargs) 15 | 16 | self._grpc_addr = grpc_addr 17 | self._init = CaptureRequest(init=CaptureInit(uri=uri, parameters=params)) 18 | 19 | async def capture(self, handler): 20 | async with grpc.aio.insecure_channel(self._grpc_addr) as channel: 21 | stub = DBLogGatewayStub(channel) 22 | stream = stub.Capture() 23 | 24 | await stream.write(self._init) 25 | async for msg in stream: 26 | ack = CaptureAck(checkpoint=msg.checkpoint) 27 | try: 28 | handler({ 29 | 'checkpoint': msg.checkpoint, 30 | 'change': { 31 | 'op': msg.change.op, 32 | 'schema': msg.change.schema, 33 | 'table': msg.change.table, 34 | 'new': decode(msg.change.new), 35 | 'old': decode(msg.change.old), 36 | } 37 | }) 38 | except Exception as e: 39 | ack.requeue_reason=str(e) 40 | print("requeue failed {}.{} record with error={}".format(msg.change.schema, msg.change.table, str(e)), file=sys.stderr) 41 | 42 | await stream.write(CaptureRequest(ack=ack)) 43 | 44 | 45 | def decode(fields): 46 | decoded = [] 47 | for field in fields: 48 | if field.HasField('binary'): 49 | decode = OIDRegistery[field.oid] 50 | if not decode: 51 | print("unsupported oid '{}' on field '{}'".format(field.oid, field.name), file=sys.stderr) 52 | continue 53 | decoded.append({'name': field.name, 'oid': field.oid, 'value': decode(field.binary)}) 54 | elif field.HasField("text"): 55 | decoded.append({'name': field.name, 'oid': field.oid, 'value': field.text}) 56 | else: 57 | decoded.append({'name': field.name, 'oid': field.oid, 'value': None}) 58 | 59 | return decoded 60 | -------------------------------------------------------------------------------- /python/pgcapture/decoders.py: -------------------------------------------------------------------------------- 1 | import json 2 | import struct 3 | from datetime import date, datetime, time, timedelta 4 | from decimal import Decimal 5 | from typing import Any, Callable, Dict, Tuple, cast 6 | from uuid import UUID 7 | 8 | _UnpackInt = Callable[[bytes], Tuple[int]] 9 | _UnpackFloat = Callable[[bytes], Tuple[float]] 10 | 11 | _unpack_int2 = cast(_UnpackInt, struct.Struct("!h").unpack) 12 | _unpack_int4 = cast(_UnpackInt, struct.Struct("!i").unpack) 13 | _unpack_uint4 = cast(_UnpackInt, struct.Struct("!I").unpack) 14 | _unpack_int8 = cast(_UnpackInt, struct.Struct("!q").unpack) 15 | _unpack_float4 = cast(_UnpackFloat, struct.Struct("!f").unpack) 16 | _unpack_float8 = cast(_UnpackFloat, struct.Struct("!d").unpack) 17 | microsecFromUnixEpochToY2K = 946684800 * 1000000 18 | 19 | _struct_head = struct.Struct("!III") 20 | _struct_dim = struct.Struct("!II") 21 | _struct_len = struct.Struct("!i") 22 | 23 | def BoolDecoder(data): 24 | return data != b"\x00" 25 | 26 | def ByteaDecoder(data): 27 | return data 28 | 29 | def Int8Decoder(data): 30 | return _unpack_int8(data)[0] 31 | 32 | def Int2Decoder(data): 33 | return _unpack_int2(data)[0] 34 | 35 | def Int4Decoder(data): 36 | return _unpack_int4(data)[0] 37 | 38 | def TextDecoder(data): 39 | return str(data, 'utf8') 40 | 41 | def JSONDecoder(data): 42 | return json.loads(str(data, 'utf8')) 43 | 44 | def Float4Decoder(data): 45 | return _unpack_float4(data)[0] 46 | 47 | def Float8Decoder(data): 48 | return _unpack_float8(data)[0] 49 | 50 | def UnknownDecoder(data): 51 | return str(data, 'utf8') 52 | 53 | def BoolArrayDecoder(data): 54 | return decodeArray(data, BoolDecoder) 55 | 56 | def Int2ArrayDecoder(data): 57 | return decodeArray(data, Int2Decoder) 58 | 59 | def Int4ArrayDecoder(data): 60 | return decodeArray(data, Int4Decoder) 61 | 62 | def TextArrayDecoder(data): 63 | return decodeArray(data, TextDecoder) 64 | 65 | def ByteaArrayDecoder(data): 66 | return decodeArray(data, ByteaDecoder) 67 | 68 | def BPCharArrayDecoder(data): 69 | return decodeArray(data, BPCharDecoder) 70 | 71 | def VarcharArrayDecoder(data): 72 | return decodeArray(data, VarcharDecoder) 73 | 74 | def Int8ArrayDecoder(data): 75 | return decodeArray(data, Int8Decoder) 76 | 77 | def Float4ArrayDecoder(data): 78 | return decodeArray(data, Float4Decoder) 79 | 80 | def Float8ArrayDecoder(data): 81 | return decodeArray(data, Float8Decoder) 82 | 83 | def BPCharDecoder(data): 84 | return str(data, 'utf8') 85 | 86 | def VarcharDecoder(data): 87 | return str(data, 'utf8') 88 | 89 | def DateDecoder(data): 90 | offset = _unpack_int4(data)[0] 91 | return date(2000, 1, 1) + timedelta(offset) 92 | 93 | def TimeDecoder(data): 94 | micros = _unpack_int8(data)[0] 95 | return time.min + timedelta(0, 0, micros) 96 | 97 | def TimestampDecoder(data): 98 | microsecSinceY2K = _unpack_int8(data)[0] 99 | microsecSinceUnixEpoch = microsecFromUnixEpochToY2K + microsecSinceY2K 100 | return datetime.utcfromtimestamp(microsecSinceUnixEpoch/1000000) + timedelta(microseconds=microsecSinceUnixEpoch%1000000) 101 | 102 | def TimestampArrayDecoder(data): 103 | return decodeArray(data, TimestampDecoder) 104 | 105 | def DateArrayDecoder(data): 106 | return decodeArray(data, DateDecoder) 107 | 108 | def TimestamptzDecoder(data): 109 | return TimestampDecoder(data) 110 | 111 | def TimestamptzArrayDecoder(data): 112 | return decodeArray(data, TimestamptzDecoder) 113 | 114 | def UUIDDecoder(data): 115 | return UUID(bytes=data) 116 | 117 | def UUIDArrayDecoder(data): 118 | return decodeArray(data, UUIDDecoder) 119 | 120 | def JSONBDecoder(data): 121 | return json.loads(str(data[1:], 'utf8')) 122 | 123 | def JSONBArrayDecoder(data): 124 | return decodeArray(data, JSONBDecoder) 125 | 126 | def decodeArray(data, callback): 127 | ndims, hasnull, oid = _struct_head.unpack_from(data[:12]) 128 | if not ndims: 129 | return [] 130 | 131 | p = 12 + 8 * ndims 132 | dims = [ 133 | _struct_dim.unpack_from(data, i)[0] for i in list(range(12, p, 8)) 134 | ] 135 | 136 | def consume(p): 137 | while 1: 138 | size = _struct_len.unpack_from(data, p)[0] 139 | p += 4 140 | if size != -1: 141 | yield callback(data[p : p + size]) 142 | p += size 143 | else: 144 | yield None 145 | 146 | items = consume(p) 147 | 148 | def agg(dims): 149 | if not dims: 150 | return next(items) 151 | else: 152 | dim, dims = dims[0], dims[1:] 153 | return [agg(dims) for _ in range(dim)] 154 | 155 | return agg(dims) 156 | 157 | OIDRegistery = { 158 | 16: BoolDecoder, 159 | 17: ByteaDecoder, 160 | # 18: QCharDecoder, 161 | # 19: NameDecoder, 162 | 20: Int8Decoder, 163 | 21: Int2Decoder, 164 | 23: Int4Decoder, 165 | 25: TextDecoder, 166 | # 26: OIDDecoder, 167 | # 27: TIDDecoder, 168 | # 28: XIDDecoder, 169 | # 29: CIDDecoder, 170 | 114: JSONDecoder, 171 | # 600: PointDecoder, 172 | # 601: LsegDecoder, 173 | # 602: PathDecoder, 174 | # 603: BoxDecoder, 175 | # 604: PolygonDecoder, 176 | # 628: LineDecoder, 177 | # 650: CIDRDecoder, 178 | # 651: CIDRArrayDecoder, 179 | 700: Float4Decoder, 180 | 701: Float8Decoder, 181 | # 718: CircleDecoder, 182 | 705: UnknownDecoder, 183 | # 829: MacaddrDecoder, 184 | # 869: InetDecoder, 185 | 1000: BoolArrayDecoder, 186 | 1005: Int2ArrayDecoder, 187 | 1007: Int4ArrayDecoder, 188 | 1009: TextArrayDecoder, 189 | 1001: ByteaArrayDecoder, 190 | 1014: BPCharArrayDecoder, 191 | 1015: VarcharArrayDecoder, 192 | 1016: Int8ArrayDecoder, 193 | 1021: Float4ArrayDecoder, 194 | 1022: Float8ArrayDecoder, 195 | # 1033: ACLItemDecoder, 196 | # 1034: ACLItemArrayDecoder, 197 | # 1041: InetArrayDecoder, 198 | 1042: BPCharDecoder, 199 | 1043: VarcharDecoder, 200 | 1082: DateDecoder, 201 | 1083: TimeDecoder, 202 | 1114: TimestampDecoder, 203 | 1115: TimestampArrayDecoder, 204 | 1182: DateArrayDecoder, 205 | 1184: TimestamptzDecoder, 206 | 1185: TimestamptzArrayDecoder, 207 | # 1186: IntervalDecoder, 208 | # 1231: NumericArrayDecoder, 209 | # 1560: BitDecoder, 210 | # 1562: VarbitDecoder, 211 | # 1700: NumericDecoder, 212 | # 2249: RecordDecoder, 213 | 2950: UUIDDecoder, 214 | 2951: UUIDArrayDecoder, 215 | 3802: JSONBDecoder, 216 | 3807: JSONBArrayDecoder, 217 | # 3912: DaterangeDecoder, 218 | # 3904: Int4rangeDecoder, 219 | # 3906: NumrangeDecoder, 220 | # 3908: TsrangeDecoder, 221 | # 3909: TsrangeArrayDecoder, 222 | # 3910: TstzrangeDecoder, 223 | # 3911: TstzrangeArrayDecoder, 224 | # 3926: Int8rangeDecoder, 225 | } 226 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name="pgcapture", 5 | version="0.1", 6 | author="rueian", 7 | author_github="http://github.com/rueian", 8 | description="Python client library for pgcapture, a scalable Netflix DBLog implementation for PostgreSQL", 9 | packages=["pgcapture", "pb"] 10 | ) --------------------------------------------------------------------------------