├── .github └── dependabot.yml ├── .gitignore ├── Dockerfile ├── Gopkg.lock ├── Gopkg.toml ├── LICENSE ├── README.md ├── TODO.md ├── eventqueue ├── event_queue.go └── event_queue_test.go ├── main.go ├── main_test.go ├── script ├── bootstrap ├── build ├── deploy ├── setup ├── test └── tests │ ├── go-test │ └── gometalinter └── sql ├── migrations.sql ├── sql_test.go └── triggers.sql /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | registries: 3 | docker-registry-eu-gcr-io: 4 | type: docker-registry 5 | url: https://eu.gcr.io 6 | username: _json_key 7 | password: "${{secrets.DOCKER_REGISTRY_EU_GCR_IO_PASSWORD}}" 8 | 9 | updates: 10 | - package-ecosystem: docker 11 | directory: "/" 12 | schedule: 13 | interval: weekly 14 | day: sunday 15 | time: "04:00" 16 | open-pull-requests-limit: 15 17 | registries: 18 | - docker-registry-eu-gcr-io 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:alpine as builder 2 | 3 | WORKDIR /go/src/github.com/blendle/pg2kafka 4 | ADD . ./ 5 | 6 | RUN apk --update --no-cache add git alpine-sdk bash 7 | RUN wget -qO- https://github.com/edenhill/librdkafka/archive/v0.11.4-RC1.tar.gz | tar xz 8 | RUN cd librdkafka-* && ./configure && make && make install 9 | RUN go get github.com/golang/dep/cmd/dep && dep ensure -vendor-only 10 | RUN go build -ldflags "-X main.version=$(git rev-parse --short @) -s -extldflags -static" -a -installsuffix cgo . 11 | 12 | FROM scratch 13 | LABEL maintainer="Jurre Stender " 14 | COPY sql ./sql 15 | COPY --from=builder /go/src/github.com/blendle/pg2kafka/pg2kafka / 16 | COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt 17 | ENTRYPOINT ["/pg2kafka"] 18 | -------------------------------------------------------------------------------- /Gopkg.lock: -------------------------------------------------------------------------------- 1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. 2 | 3 | 4 | [[projects]] 5 | branch = "master" 6 | digest = "1:518723f68a6013939a5f71851da62cc3033e27be18944d7b70f9a9228cf464b5" 7 | name = "github.com/blendle/go-logger" 8 | packages = ["."] 9 | pruneopts = "" 10 | revision = "2399adc0cd390782c11de7e2dd0bbcd5d4e7a110" 11 | 12 | [[projects]] 13 | branch = "master" 14 | digest = "1:6dfb0a0d2ffb4aed50b1101fab4a559cc690e85a6bb7bc865340d80571cbb908" 15 | name = "github.com/buger/jsonparser" 16 | packages = ["."] 17 | pruneopts = "" 18 | revision = "1a29609e0929ccd5666069e2e7213c2c69fa4ac2" 19 | 20 | [[projects]] 21 | digest = "1:9ebdaf26c9ef6a4ddaf0815a8da7048e8cc2b4fdf76cb72737245e7230c44015" 22 | name = "github.com/confluentinc/confluent-kafka-go" 23 | packages = ["kafka"] 24 | pruneopts = "" 25 | revision = "f696f02dcbb04d95b103c5b848bb15f749a996f7" 26 | version = "v0.11.0" 27 | 28 | [[projects]] 29 | branch = "master" 30 | digest = "1:de80b2e189ad1c264c7fec7c49bc62a724a6118814cf950f7b73f6e883b72c0e" 31 | name = "github.com/lib/pq" 32 | packages = [ 33 | ".", 34 | "oid", 35 | ] 36 | pruneopts = "" 37 | revision = "017e4c14d80628353eaee677a1aeffe47a10c0dc" 38 | 39 | [[projects]] 40 | digest = "1:7365acd48986e205ccb8652cc746f09c8b7876030d53710ea6ef7d0bd0dcd7ca" 41 | name = "github.com/pkg/errors" 42 | packages = ["."] 43 | pruneopts = "" 44 | revision = "645ef00459ed84a119197bfb8d8205042c6df63d" 45 | version = "v0.8.0" 46 | 47 | [[projects]] 48 | digest = "1:53a6fbacf8dce8fc9cbd4fab6a56eb169be7cb79b9fe601a152d6d9af68312bb" 49 | name = "go.uber.org/atomic" 50 | packages = ["."] 51 | pruneopts = "" 52 | revision = "4e336646b2ef9fc6e47be8e21594178f98e5ebcf" 53 | version = "v1.2.0" 54 | 55 | [[projects]] 56 | digest = "1:22c7effcb4da0eacb2bb1940ee173fac010e9ef3c691f5de4b524d538bd980f5" 57 | name = "go.uber.org/multierr" 58 | packages = ["."] 59 | pruneopts = "" 60 | revision = "3c4937480c32f4c13a875a1829af76c98ca3d40a" 61 | version = "v1.1.0" 62 | 63 | [[projects]] 64 | digest = "1:cfeddca8479edac1039a52dac1ff6e7a76252ebaecb86680383b0522423b7565" 65 | name = "go.uber.org/zap" 66 | packages = [ 67 | ".", 68 | "buffer", 69 | "internal/bufferpool", 70 | "internal/color", 71 | "internal/exit", 72 | "zapcore", 73 | ] 74 | pruneopts = "" 75 | revision = "35aad584952c3e7020db7b839f6b102de6271f89" 76 | version = "v1.7.1" 77 | 78 | [solve-meta] 79 | analyzer-name = "dep" 80 | analyzer-version = 1 81 | input-imports = [ 82 | "github.com/blendle/go-logger", 83 | "github.com/buger/jsonparser", 84 | "github.com/confluentinc/confluent-kafka-go/kafka", 85 | "github.com/lib/pq", 86 | "github.com/lib/pq/oid", 87 | "github.com/pkg/errors", 88 | "go.uber.org/atomic", 89 | "go.uber.org/multierr", 90 | "go.uber.org/zap", 91 | "go.uber.org/zap/buffer", 92 | "go.uber.org/zap/zapcore", 93 | ] 94 | solver-name = "gps-cdcl" 95 | solver-version = 1 96 | -------------------------------------------------------------------------------- /Gopkg.toml: -------------------------------------------------------------------------------- 1 | [[constraint]] 2 | branch = "master" 3 | name = "github.com/buger/jsonparser" 4 | 5 | [[constraint]] 6 | branch = "master" 7 | name = "github.com/blendle/go-logger" 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2017 Blendle 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pg2kafka 2 | -------- 3 | 4 | This service adds triggers to a given table in your Postgres database after 5 | taking a snapshot of it's initial representation, and tracks changes to that 6 | table to deliver them to a Kafka topic. 7 | 8 | It consists of two parts: 9 | 10 | - A schema in your DB containing an `outbound_event_queue` table and all the 11 | necessary functions and triggers to take snapshots and track changes. 12 | - A small executable that reads from said table, and ships them to Kafka. 13 | 14 | *pg2kafka is still in early development*, it is not advised to use this in 15 | production. If you run into issues, please open an issue. 16 | 17 | We use this as a way to reliably get data out of our hosted PostgreSQL databases 18 | where we cannot use systems like [debezium](http://debezium.io) or 19 | [bottled water](https://github.com/confluentinc/bottledwater-pg) since we do not 20 | have access to the WAL logs and cannot install native extensions or run binaries 21 | on the database host machine. 22 | 23 | The following SQL statements are used to send updates to the relevant topic: 24 | 25 | * `INSERT` 26 | * `UPDATE` 27 | * `DELETE` 28 | 29 | ## Usage 30 | 31 | Connect pg2kafka to the database you want to stream changes from, and set the 32 | `PERFORM_MIGRATIONS` env var to `true`, this will create a schema `pg2kafka` in 33 | said DB and will set up an `outbound_event_queue` table there, together with the 34 | necessary functions and triggers to start exporting data. 35 | 36 | In order to start tracking changes for a table, you need to execute the 37 | `pg2kafka.setup` function with the table name and a column to use as external 38 | ID. The external ID will be what's used as a partitioning key in Kafka, this 39 | ensures that messages for a given entity will always end up in order, on the 40 | same partition. The example below will add the trigger to the `products` table 41 | and use its `sku` column as the external ID. 42 | 43 | Let's say we have a database called `shop_test`: 44 | 45 | ```bash 46 | $ createdb shop_test 47 | ``` 48 | 49 | It contains a table `products`: 50 | 51 | ```sql 52 | CREATE TABLE products ( 53 | id BIGSERIAL, 54 | sku TEXT, 55 | name TEXT 56 | ); 57 | ``` 58 | 59 | And it already has some data in it: 60 | 61 | ```sql 62 | INSERT INTO products (sku, name) VALUES ('CM01-R', 'Red Coffee Mug'); 63 | INSERT INTO products (sku, name) VALUES ('CM01-B', 'Blue Coffee Mug'); 64 | ``` 65 | 66 | Given that we've already connected pg2kafka to it, and it has ran it's 67 | migrations, we can start tracking changes to the `products` table: 68 | 69 | ```sql 70 | SELECT pg2kafka.setup('products', 'sku'); 71 | ``` 72 | 73 | This will create snapshots of the current data in that table: 74 | 75 | ```json 76 | { 77 | "uuid": "ea76e080-6acd-413a-96b3-131a42ab1002", 78 | "external_id": "CM01-B", 79 | "statement": "SNAPSHOT", 80 | "data": { 81 | "id": 2, 82 | "sku": "CM01-B", 83 | "name": "Blue Coffee Mug" 84 | }, 85 | "created_at": "2017-11-02T16:14:36.709116Z" 86 | } 87 | { 88 | "uuid": "e1c0008d-6b7a-455a-afa6-c1c2eebd65d3", 89 | "external_id": "CM01-R", 90 | "statement": "SNAPSHOT", 91 | "data": { 92 | "id": 1, 93 | "sku": "CM01-R", 94 | "name": "Red Coffee Mug" 95 | }, 96 | "created_at": "2017-11-02T16:14:36.709116Z" 97 | } 98 | ``` 99 | 100 | Now once you start making changes to your table, you should start seeing events 101 | come in on the `pg2kafka.shop_test.products` topic: 102 | 103 | ```sql 104 | UPDATE products SET name = 'Big Red Coffee Mug' WHERE sku = 'CM01-R'; 105 | ``` 106 | 107 | ```json 108 | { 109 | "uuid": "d6521ce5-4068-45e4-a9ad-c0949033a55b", 110 | "external_id": "CM01-R", 111 | "statement": "UPDATE", 112 | "data": { 113 | "name": "Big Red Coffee Mug" 114 | }, 115 | "created_at": "2017-11-02T16:15:13.94077Z" 116 | } 117 | ``` 118 | 119 | The producer topics are all in the form of 120 | `pg2kafka.$database_name.$table_name`, you need to make sure that this topic 121 | exists, or else pg2kafka will crash. 122 | 123 | You can optionally prepend a namespace to the Kafka topic, by setting the 124 | `TOPIC_NAMESPACE` environment variable. When doing this, the final topic name 125 | would be `pg2kafka.$namespace.$database_name.$table_name`. 126 | 127 | ### Cleanup 128 | 129 | If you decide not to use pg2kafka anymore you can cleanup the Database triggers 130 | using the following command: 131 | 132 | ```sql 133 | DROP SCHEMA pg2kafka CASCADE; 134 | ``` 135 | 136 | ## Development 137 | 138 | ### Setup 139 | 140 | #### Golang 141 | 142 | You will need Go 1.9. 143 | 144 | #### PostgreSQL 145 | 146 | Set up a database and expose a connection string to it as an env variable, for 147 | local development we also specify `sslmode=disable`. 148 | 149 | ```bash 150 | $ createdb pg2kafka_test 151 | $ export DATABASE_URL="postgres://user:password@localhost/pg2kafka_test?sslmode=disable" 152 | ``` 153 | 154 | #### Kafka 155 | 156 | Install [Kafka](http://kafka.apache.org/) if you don't already have it running. 157 | This is not required to run the tests, but it is required if you want to run 158 | pg2kafka locally against a real Kafka. 159 | 160 | Create a topic for the table you want to track in your database: 161 | 162 | ```bash 163 | kafka-topics \ 164 | --zookeeper localhost:2181 \ 165 | --create \ 166 | --topic pg2kafka.pg2kafka_test.users \ 167 | --replication-factor 1 \ 168 | --partitions 3 169 | ``` 170 | 171 | Then export the Kafka host as an URL so pg2kafka can use it: 172 | 173 | ```bash 174 | $ export KAFKA_BROKER="localhost:9092" 175 | ``` 176 | 177 | ### Running the service locally 178 | 179 | Make sure you export the `DATABASE_URL` and `KAFKA_BROKER`, and also 180 | `export PERFORM_MIGRATIONS=true`. 181 | 182 | ```bash 183 | $ go run main.go 184 | ``` 185 | 186 | To run the service without using Kafka, you can set a `DRY_RUN=true` flag, which 187 | will produce the messages to stdout. 188 | 189 | ### Running tests 190 | 191 | The only thing required for the tests to run is that you've set up a database 192 | and exposed a connection string to it as `DATABASE_URL`. All the necessary 193 | schemas, tables and triggers will be created by the tests. 194 | 195 | ```bash 196 | $ ./script/test 197 | ``` 198 | 199 | ## License 200 | pg2kafka is released under the ISC license. See [LICENSE](https://github.com/blendle/pg2kafka/blob/master/LICENSE) for details. 201 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | - Do we get events about updates to null? 2 | - Make kafka topic configurable 3 | -------------------------------------------------------------------------------- /eventqueue/event_queue.go: -------------------------------------------------------------------------------- 1 | package eventqueue 2 | 3 | import ( 4 | "database/sql" 5 | "database/sql/driver" 6 | "encoding/json" 7 | "io/ioutil" 8 | "math" 9 | "time" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | const ( 15 | selectUnprocessedEventsQuery = ` 16 | SELECT id, uuid, external_id, table_name, statement, data, created_at 17 | FROM pg2kafka.outbound_event_queue 18 | WHERE processed = false 19 | ORDER BY id ASC 20 | LIMIT 1000 21 | ` 22 | 23 | markEventAsProcessedQuery = ` 24 | UPDATE pg2kafka.outbound_event_queue 25 | SET processed = true 26 | WHERE id = $1 AND processed = false 27 | ` 28 | 29 | countUnprocessedEventsQuery = ` 30 | SELECT count(*) AS count 31 | FROM pg2kafka.outbound_event_queue 32 | WHERE processed IS FALSE 33 | ` 34 | ) 35 | 36 | // ByteString is a special type of byte array with implemented interfaces to 37 | // convert from and to JSON and SQL values. 38 | type ByteString []byte 39 | 40 | // Event represents the queued event in the database 41 | type Event struct { 42 | ID int `json:"-"` 43 | UUID string `json:"uuid"` 44 | ExternalID ByteString `json:"external_id"` 45 | TableName string `json:"-"` 46 | Statement string `json:"statement"` 47 | Data json.RawMessage `json:"data"` 48 | CreatedAt time.Time `json:"created_at"` 49 | Processed bool `json:"-"` 50 | } 51 | 52 | // Queue represents the queue of snapshot/create/update/delete events stored in 53 | // the database. 54 | type Queue struct { 55 | db *sql.DB 56 | } 57 | 58 | // New creates a new Queue, connected to the given database URL. 59 | func New(conninfo string) (*Queue, error) { 60 | db, err := sql.Open("postgres", conninfo) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | return &Queue{db: db}, nil 66 | } 67 | 68 | // NewWithDB creates a new Queue with the given database. 69 | func NewWithDB(db *sql.DB) *Queue { 70 | return &Queue{db: db} 71 | } 72 | 73 | // FetchUnprocessedRecords fetches a page (up to 1000) of events that have not 74 | // been marked as processed yet. 75 | func (eq *Queue) FetchUnprocessedRecords() ([]*Event, error) { 76 | rows, err := eq.db.Query(selectUnprocessedEventsQuery) 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | messages := []*Event{} 82 | for rows.Next() { 83 | msg := &Event{} 84 | err = rows.Scan( 85 | &msg.ID, 86 | &msg.UUID, 87 | &msg.ExternalID, 88 | &msg.TableName, 89 | &msg.Statement, 90 | &msg.Data, 91 | &msg.CreatedAt, 92 | ) 93 | if err != nil { 94 | return nil, err 95 | } 96 | messages = append(messages, msg) 97 | } 98 | 99 | if cerr := rows.Close(); cerr != nil { 100 | return nil, cerr 101 | } 102 | return messages, nil 103 | } 104 | 105 | // UnprocessedEventPagesCount returns how many "pages" of events there are 106 | // queued in the database. Currently page-size is hard-coded to 1000 events per 107 | // page. 108 | func (eq *Queue) UnprocessedEventPagesCount() (int, error) { 109 | count := 0 110 | err := eq.db.QueryRow(countUnprocessedEventsQuery).Scan(&count) 111 | if err != nil { 112 | return 0, err 113 | } 114 | 115 | limit := 1000 116 | return int(math.Ceil(float64(count) / float64(limit))), nil 117 | } 118 | 119 | // MarkEventAsProcessed marks an even as processed. 120 | func (eq *Queue) MarkEventAsProcessed(eventID int) error { 121 | _, err := eq.db.Exec(markEventAsProcessedQuery, eventID) 122 | return err 123 | } 124 | 125 | // Close closes the Queue's database connection. 126 | func (eq *Queue) Close() error { 127 | return eq.db.Close() 128 | } 129 | 130 | // ConfigureOutboundEventQueueAndTriggers will set up a new schema 'pg2kafka', with 131 | // an 'outbound_event_queue' table that is used to store events, and all the 132 | // triggers necessary to snapshot and start tracking changes for a given table. 133 | func (eq *Queue) ConfigureOutboundEventQueueAndTriggers(path string) error { 134 | migration, err := ioutil.ReadFile(path + "/migrations.sql") 135 | if err != nil { 136 | return errors.Wrap(err, "error reading migration") 137 | } 138 | 139 | _, err = eq.db.Exec(string(migration)) 140 | if err != nil { 141 | return errors.Wrap(err, "failed to create table") 142 | } 143 | 144 | functions, err := ioutil.ReadFile(path + "/triggers.sql") 145 | if err != nil { 146 | return errors.Wrap(err, "Error loading functions") 147 | } 148 | 149 | _, err = eq.db.Exec(string(functions)) 150 | if err != nil { 151 | return errors.Wrap(err, "Error creating triggers") 152 | } 153 | 154 | return nil 155 | } 156 | 157 | // MarshalJSON implements the json.Marshaler interface. 158 | func (b *ByteString) MarshalJSON() ([]byte, error) { 159 | if *b == nil { 160 | return []byte("null"), nil 161 | } 162 | 163 | return append(append([]byte(`"`), *b...), byte('"')), nil 164 | } 165 | 166 | // UnmarshalJSON implements the json.Unmarshaler interface. 167 | func (b *ByteString) UnmarshalJSON(d []byte) error { 168 | var s string 169 | err := json.Unmarshal(d, &s) 170 | *b = ByteString(s) 171 | return err 172 | } 173 | 174 | // Value implements the driver.Valuer interface. 175 | func (b *ByteString) Value() (driver.Value, error) { 176 | return string(*b), nil 177 | } 178 | 179 | // Scan implements the sql.Scanner interface. 180 | func (b *ByteString) Scan(val interface{}) error { 181 | switch v := val.(type) { 182 | case nil: 183 | *b = nil 184 | case string: 185 | *b = []byte(v) 186 | case []byte: 187 | *b = v 188 | default: 189 | return errors.New("unable to convert value to ByteString") 190 | } 191 | 192 | return nil 193 | } 194 | -------------------------------------------------------------------------------- /eventqueue/event_queue_test.go: -------------------------------------------------------------------------------- 1 | package eventqueue 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | var byteStringMarshalJSONtests = []struct { 9 | in ByteString 10 | out []byte 11 | }{ 12 | {ByteString(nil), []byte(`null`)}, 13 | {ByteString(""), []byte(`""`)}, 14 | {ByteString("hello"), []byte(`"hello"`)}, 15 | } 16 | 17 | func TestByteString_MarshalJSON(t *testing.T) { 18 | for _, tt := range byteStringMarshalJSONtests { 19 | t.Run(string(tt.out), func(t *testing.T) { 20 | actual, err := tt.in.MarshalJSON() 21 | if err != nil { 22 | t.Fatalf("Unexpected error: %v", err) 23 | } 24 | 25 | if !bytes.Equal(actual, tt.out) { 26 | t.Errorf("%q => MarshalJSON() => %q, want: %q", tt.in, actual, tt.out) 27 | } 28 | }) 29 | } 30 | } 31 | 32 | var byteStringUnmarshalJSON = []struct { 33 | in []byte 34 | out ByteString 35 | err bool 36 | }{ 37 | {[]byte(`null`), ByteString(nil), false}, 38 | {[]byte(`""`), ByteString(""), false}, 39 | {[]byte(`"hello"`), ByteString("hello"), false}, 40 | {[]byte(`1`), ByteString(nil), true}, 41 | {[]byte(`[]`), ByteString(nil), true}, 42 | {[]byte(`{}`), ByteString(nil), true}, 43 | } 44 | 45 | func TestByteString_UnmarshalJSON(t *testing.T) { 46 | for _, tt := range byteStringUnmarshalJSON { 47 | t.Run(string(tt.in), func(t *testing.T) { 48 | var actual ByteString 49 | 50 | err := actual.UnmarshalJSON(tt.in) 51 | if (err == nil) == tt.err { 52 | t.Fatalf("Unexpected error: %v", err) 53 | } 54 | 55 | if !bytes.Equal(actual, tt.out) { 56 | t.Errorf("UnmarshalJSON(%q) => %q, want: %q", tt.in, actual, tt.out) 57 | } 58 | }) 59 | } 60 | } 61 | 62 | var byteStringValue = []struct { 63 | in ByteString 64 | out string 65 | }{ 66 | {ByteString(nil), ""}, 67 | {ByteString(""), ""}, 68 | {ByteString("hello"), "hello"}, 69 | {ByteString("1"), "1"}, 70 | } 71 | 72 | func TestByteString_Value(t *testing.T) { 73 | for _, tt := range byteStringValue { 74 | t.Run(tt.out, func(t *testing.T) { 75 | actual, err := tt.in.Value() 76 | if err != nil { 77 | t.Fatalf("Unexpected error: %v", err) 78 | } 79 | 80 | if actual != tt.out { 81 | t.Errorf("%q => Value() => %q, want: %q", tt.in, actual, tt.out) 82 | } 83 | }) 84 | } 85 | } 86 | 87 | var byteStringScan = []struct { 88 | in interface{} 89 | out ByteString 90 | err bool 91 | }{ 92 | {nil, ByteString(nil), false}, 93 | {"", ByteString(""), false}, 94 | {"hello", ByteString("hello"), false}, 95 | {[]byte("hi"), ByteString("hi"), false}, 96 | {1, ByteString(nil), true}, 97 | } 98 | 99 | func TestByteString_Scan(t *testing.T) { 100 | for _, tt := range byteStringScan { 101 | t.Run(string(tt.out), func(t *testing.T) { 102 | var actual ByteString 103 | 104 | err := actual.Scan(tt.in) 105 | if (err == nil) == tt.err { 106 | t.Fatalf("Unexpected error: %v", err) 107 | } 108 | 109 | if !bytes.Equal(actual, tt.out) { 110 | t.Errorf("Scan(%q) => %q, want: %q", tt.in, actual, tt.out) 111 | } 112 | }) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/url" 7 | "os" 8 | "os/signal" 9 | "strings" 10 | "time" 11 | 12 | logger "github.com/blendle/go-logger" 13 | "github.com/blendle/pg2kafka/eventqueue" 14 | "github.com/confluentinc/confluent-kafka-go/kafka" 15 | "github.com/lib/pq" 16 | "github.com/pkg/errors" 17 | "go.uber.org/zap" 18 | ) 19 | 20 | var ( 21 | topicNamespace string 22 | version string 23 | ) 24 | 25 | // Producer is the minimal required interface pg2kafka requires to produce 26 | // events to a kafka topic. 27 | type Producer interface { 28 | Close() 29 | Flush(int) int 30 | 31 | Produce(*kafka.Message, chan kafka.Event) error 32 | } 33 | 34 | func main() { 35 | conf := &logger.Config{ 36 | App: "pg2kafka", 37 | Tier: "stream-processor", 38 | Version: version, 39 | Production: os.Getenv("ENV") == "production", 40 | Environment: os.Getenv("ENV"), 41 | } 42 | 43 | logger.Init(conf) 44 | 45 | conninfo := os.Getenv("DATABASE_URL") 46 | topicNamespace = parseTopicNamespace(os.Getenv("TOPIC_NAMESPACE"), parseDatabaseName(conninfo)) 47 | 48 | eq, err := eventqueue.New(conninfo) 49 | if err != nil { 50 | logger.L.Fatal("Error opening db connection", zap.Error(err)) 51 | } 52 | defer func() { 53 | if cerr := eq.Close(); cerr != nil { 54 | logger.L.Fatal("Error closing db connection", zap.Error(cerr)) 55 | } 56 | }() 57 | 58 | if os.Getenv("PERFORM_MIGRATIONS") == "true" { 59 | if cerr := eq.ConfigureOutboundEventQueueAndTriggers("./sql"); cerr != nil { 60 | logger.L.Fatal("Error configuring outbound_event_queue and triggers", zap.Error(cerr)) 61 | } 62 | } else { 63 | logger.L.Info("Not performing database migrations due to missing `PERFORM_MIGRATIONS`.") 64 | } 65 | 66 | producer := setupProducer() 67 | defer producer.Close() 68 | defer producer.Flush(1000) 69 | 70 | reportProblem := func(ev pq.ListenerEventType, err error) { 71 | if err != nil { 72 | logger.L.Error("Error handling postgres notify", zap.Error(err)) 73 | } 74 | } 75 | listener := pq.NewListener(conninfo, 10*time.Second, time.Minute, reportProblem) 76 | if err := listener.Listen("outbound_event_queue"); err != nil { 77 | logger.L.Error("Error listening to pg", zap.Error(err)) 78 | } 79 | defer func() { 80 | if cerr := listener.Close(); cerr != nil { 81 | logger.L.Error("Error closing listener", zap.Error(cerr)) 82 | } 83 | }() 84 | 85 | // Process any events left in the queue 86 | processQueue(producer, eq) 87 | 88 | signals := make(chan os.Signal, 1) 89 | signal.Notify(signals, os.Interrupt) 90 | 91 | logger.L.Info("pg2kafka is now listening to notifications") 92 | waitForNotification(listener, producer, eq, signals) 93 | } 94 | 95 | // ProcessEvents queries the database for unprocessed events and produces them 96 | // to kafka. 97 | func ProcessEvents(p Producer, eq *eventqueue.Queue) { 98 | events, err := eq.FetchUnprocessedRecords() 99 | if err != nil { 100 | logger.L.Error("Error listening to pg", zap.Error(err)) 101 | } 102 | 103 | produceMessages(p, events, eq) 104 | } 105 | 106 | func processQueue(p Producer, eq *eventqueue.Queue) { 107 | pageCount, err := eq.UnprocessedEventPagesCount() 108 | if err != nil { 109 | logger.L.Fatal("Error selecting count", zap.Error(err)) 110 | } 111 | 112 | for i := 0; i <= pageCount; i++ { 113 | ProcessEvents(p, eq) 114 | } 115 | } 116 | 117 | func waitForNotification( 118 | l *pq.Listener, 119 | p Producer, 120 | eq *eventqueue.Queue, 121 | signals chan os.Signal, 122 | ) { 123 | for { 124 | select { 125 | case <-l.Notify: 126 | processQueue(p, eq) 127 | case <-time.After(90 * time.Second): 128 | go func() { 129 | err := l.Ping() 130 | if err != nil { 131 | logger.L.Fatal("Error pinging listener", zap.Error(err)) 132 | } 133 | }() 134 | case <-signals: 135 | return 136 | } 137 | } 138 | } 139 | 140 | func produceMessages(p Producer, events []*eventqueue.Event, eq *eventqueue.Queue) { 141 | deliveryChan := make(chan kafka.Event) 142 | for _, event := range events { 143 | msg, err := json.Marshal(event) 144 | if err != nil { 145 | logger.L.Fatal("Error parsing event", zap.Error(err)) 146 | } 147 | 148 | topic := topicName(event.TableName) 149 | message := &kafka.Message{ 150 | TopicPartition: kafka.TopicPartition{ 151 | Topic: &topic, 152 | Partition: kafka.PartitionAny, // nolint: gotype 153 | }, 154 | Value: msg, 155 | Key: event.ExternalID, 156 | Timestamp: event.CreatedAt, 157 | } 158 | if os.Getenv("DRY_RUN") != "" { 159 | logger.L.Info("Would produce message", zap.Any("message", message)) 160 | } else { 161 | err = p.Produce(message, deliveryChan) 162 | if err != nil { 163 | logger.L.Fatal("Failed to produce", zap.Error(err)) 164 | } 165 | e := <-deliveryChan 166 | 167 | result := e.(*kafka.Message) 168 | if result.TopicPartition.Error != nil { 169 | logger.L.Fatal("Delivery failed", zap.Error(result.TopicPartition.Error)) 170 | } 171 | } 172 | err = eq.MarkEventAsProcessed(event.ID) 173 | if err != nil { 174 | logger.L.Fatal("Error marking record as processed", zap.Error(err)) 175 | } 176 | } 177 | } 178 | 179 | func setupProducer() Producer { 180 | broker := os.Getenv("KAFKA_BROKER") 181 | if broker == "" { 182 | panic("missing KAFKA_BROKER environment") 183 | } 184 | 185 | hostname, err := os.Hostname() 186 | if err != nil { 187 | hostname = os.Getenv("HOSTNAME") 188 | } 189 | 190 | p, err := kafka.NewProducer(&kafka.ConfigMap{ 191 | "client.id": hostname, 192 | "bootstrap.servers": broker, 193 | "partitioner": "murmur2", 194 | "compression.codec": "snappy", 195 | }) 196 | if err != nil { 197 | panic(errors.Wrap(err, "failed to setup producer")) 198 | } 199 | 200 | return p 201 | } 202 | 203 | func topicName(tableName string) string { 204 | return fmt.Sprintf("pg2kafka.%v.%v", topicNamespace, tableName) 205 | } 206 | 207 | func parseDatabaseName(conninfo string) string { 208 | dbURL, err := url.Parse(conninfo) 209 | if err != nil { 210 | logger.L.Fatal("Error parsing db connection string", zap.Error(err)) 211 | } 212 | return strings.TrimPrefix(dbURL.Path, "/") 213 | } 214 | 215 | func parseTopicNamespace(topicNamespace string, databaseName string) string { 216 | s := databaseName 217 | if topicNamespace != "" { 218 | s = topicNamespace + "." + s 219 | } 220 | 221 | return s 222 | } 223 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "os" 6 | "testing" 7 | 8 | "github.com/buger/jsonparser" 9 | "github.com/confluentinc/confluent-kafka-go/kafka" 10 | 11 | "github.com/blendle/pg2kafka/eventqueue" 12 | _ "github.com/lib/pq" 13 | ) 14 | 15 | func TestFetchUnprocessedRecords(t *testing.T) { 16 | db, eq, cleanup := setup(t) 17 | defer cleanup() 18 | 19 | // TODO: Use actual trigger to generate this? 20 | events := []*eventqueue.Event{ 21 | { 22 | ExternalID: []byte("fefc72b4-d8df-4039-9fb9-bfcb18066a2b"), 23 | TableName: "users", 24 | Statement: "UPDATE", 25 | Data: []byte(`{ "email": "j@blendle.com" }`), 26 | Processed: true, 27 | }, 28 | { 29 | ExternalID: []byte("fefc72b4-d8df-4039-9fb9-bfcb18066a2b"), 30 | TableName: "users", 31 | Statement: "UPDATE", 32 | Data: []byte(`{ "email": "jurre@blendle.com" }`), 33 | }, 34 | { 35 | ExternalID: []byte("fefc72b4-d8df-4039-9fb9-bfcb18066a2b"), 36 | TableName: "users", 37 | Statement: "UPDATE", 38 | Data: []byte(`{ "email": "jurres@blendle.com" }`), 39 | }, 40 | { 41 | ExternalID: nil, 42 | TableName: "users", 43 | Statement: "CREATE", 44 | Data: []byte(`{ "email": "bart@simpsons.com" }`), 45 | }, 46 | { 47 | ExternalID: nil, 48 | TableName: "users", 49 | Statement: "UPDATE", 50 | Data: []byte(`{ "email": "bartman@simpsons.com" }`), 51 | }, 52 | } 53 | if err := insert(db, events); err != nil { 54 | t.Fatalf("Error inserting events: %v", err) 55 | } 56 | 57 | p := &mockProducer{ 58 | messages: make([]*kafka.Message, 0), 59 | } 60 | 61 | ProcessEvents(p, eq) 62 | 63 | expected := 4 64 | actual := len(p.messages) 65 | if actual != expected { 66 | t.Fatalf("Unexpected number of messages produced. Expected %d, got %d", expected, actual) 67 | } 68 | 69 | msg := p.messages[0] 70 | email, err := jsonparser.GetString(msg.Value, "data", "email") 71 | if err != nil { 72 | t.Fatal(err) 73 | } 74 | 75 | if email != "jurre@blendle.com" { 76 | t.Errorf("Data did not match. Expected %v, got %v", "jurre@blendle.com", email) 77 | } 78 | 79 | externalID, err := jsonparser.GetString(msg.Value, "external_id") 80 | if err != nil { 81 | t.Fatal(err) 82 | } 83 | 84 | if externalID != "fefc72b4-d8df-4039-9fb9-bfcb18066a2b" { 85 | t.Errorf("Expected %v, got %v", "fefc72b4-d8df-4039-9fb9-bfcb18066a2b", externalID) 86 | } 87 | 88 | msg = p.messages[3] 89 | email, err = jsonparser.GetString(msg.Value, "data", "email") 90 | if err != nil { 91 | t.Fatal(err) 92 | } 93 | 94 | if email != "bartman@simpsons.com" { 95 | t.Errorf("Data did not match. Expected %v, got %v", "bartman@simpsons.com", email) 96 | } 97 | 98 | if len(msg.Key) != 0 { 99 | t.Errorf("Expected empty key, got %v", msg.Key) 100 | } 101 | } 102 | 103 | // Helpers 104 | 105 | func setup(t *testing.T) (*sql.DB, *eventqueue.Queue, func()) { 106 | t.Helper() 107 | topicNamespace = "users" 108 | db, err := sql.Open("postgres", os.Getenv("DATABASE_URL")) 109 | if err != nil { 110 | t.Fatalf("failed to open database: %v", err) 111 | } 112 | 113 | eq := eventqueue.NewWithDB(db) 114 | if err := eq.ConfigureOutboundEventQueueAndTriggers("./sql"); err != nil { 115 | t.Fatal(err) 116 | } 117 | 118 | return db, eq, func() { 119 | _, err := db.Exec("DELETE FROM pg2kafka.outbound_event_queue") 120 | if err != nil { 121 | t.Fatalf("failed to clear table: %v", err) 122 | } 123 | if err := db.Close(); err != nil { 124 | t.Fatalf("Error closing db: %v", err) 125 | } 126 | } 127 | } 128 | 129 | func insert(db *sql.DB, events []*eventqueue.Event) error { 130 | tx, err := db.Begin() 131 | if err != nil { 132 | return err 133 | } 134 | statement, err := tx.Prepare(` 135 | INSERT INTO pg2kafka.outbound_event_queue (external_id, table_name, statement, data, processed) 136 | VALUES ($1, $2, $3, $4, $5) 137 | `) 138 | if err != nil { 139 | if txerr := tx.Rollback(); txerr != nil { 140 | return txerr 141 | } 142 | return err 143 | } 144 | 145 | for _, e := range events { 146 | _, serr := statement.Exec(e.ExternalID, e.TableName, e.Statement, e.Data, e.Processed) 147 | if serr != nil { 148 | if txerr := tx.Rollback(); err != nil { 149 | return txerr 150 | } 151 | return serr 152 | } 153 | } 154 | return tx.Commit() 155 | } 156 | 157 | var parseTopicNamespacetests = []struct { 158 | in1, in2, out string 159 | }{ 160 | {"", "", ""}, 161 | {"", "world", "world"}, 162 | {"hello", "", "hello."}, 163 | {"hello", "world", "hello.world"}, 164 | } 165 | 166 | func TestParseTopicNamespace(t *testing.T) { 167 | for _, tt := range parseTopicNamespacetests { 168 | t.Run(tt.out, func(t *testing.T) { 169 | actual := parseTopicNamespace(tt.in1, tt.in2) 170 | 171 | if actual != tt.out { 172 | t.Errorf("parseTopicNamespace(%q, %q) => %v, want: %v", tt.in1, tt.in2, actual, tt.out) 173 | } 174 | }) 175 | } 176 | } 177 | 178 | type mockProducer struct { 179 | messages []*kafka.Message 180 | } 181 | 182 | func (p *mockProducer) Close() { 183 | } 184 | func (p *mockProducer) Flush(timeout int) int { 185 | return 0 186 | } 187 | func (p *mockProducer) Produce(msg *kafka.Message, deliveryChan chan kafka.Event) error { 188 | p.messages = append(p.messages, msg) 189 | go func() { 190 | deliveryChan <- msg 191 | }() 192 | return nil 193 | } 194 | -------------------------------------------------------------------------------- /script/bootstrap: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # script/bootstrap: Install project dependencies. 4 | 5 | set -e 6 | 7 | cd "$(dirname "$0")/.." 8 | 9 | command -v dep >/dev/null 2>&1 || { 10 | if [ -z "$CI" ]; then 11 | echo 'You need to install dep as a dependency, run this command on macOS:' 12 | echo '$ brew install dep && brew upgrade dep' 13 | exit 1 14 | else 15 | os=$(uname -s | awk '{print tolower($0)}') 16 | curl -L "https://github.com/golang/dep/releases/download/v0.3.1/dep-$os-amd64" > /usr/local/bin/dep 17 | chmod +x /usr/local/bin/dep 18 | fi 19 | } 20 | 21 | command -v gometalinter >/dev/null 2>&1 || { 22 | if [ -z "$CI" ]; then 23 | echo 'You need to install gometalinter as a dependency, run this command:' 24 | echo '$ go get -u github.com/alecthomas/gometalinter' 25 | exit 1 26 | else 27 | go get -u github.com/alecthomas/gometalinter 28 | fi 29 | } 30 | gometalinter --install --update 31 | 32 | command -v godog >/dev/null 2>&1 || { 33 | if [ -z "$CI" ]; then 34 | echo 'You need to install godog as a dependency, run this command:' 35 | echo '$ go get -u github.com/DATA-DOG/godog/cmd/godog' 36 | exit 1 37 | else 38 | go get -u github.com/DATA-DOG/godog/cmd/godog 39 | fi 40 | } 41 | 42 | if [ -z "$CI" ]; then 43 | echo 'You need to set up a database to test against and export it as "DATABASE_URL"' 44 | fi 45 | -------------------------------------------------------------------------------- /script/build: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eu 3 | 4 | GIT_COMMIT=$(git rev-parse --short @) 5 | docker build -t "eu.gcr.io/bnl-blendle/pg2kafka:$GIT_COMMIT" . 6 | -------------------------------------------------------------------------------- /script/deploy: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eu 3 | 4 | GIT_COMMIT=$(git rev-parse --short @) 5 | script/build 6 | gcloud docker -- push "eu.gcr.io/bnl-blendle/pg2kafka:$GIT_COMMIT" 7 | -------------------------------------------------------------------------------- /script/setup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # script/setup: Prepare project for development and local testing. 4 | 5 | set -e 6 | 7 | cd "$(dirname "$0")/.." 8 | 9 | script/bootstrap 10 | dep ensure -vendor-only 11 | -------------------------------------------------------------------------------- /script/test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # script/test: Run the test suite and linters. 4 | 5 | set -e 6 | 7 | cd "$(dirname "$0")/.." 8 | 9 | script/tests/gometalinter 10 | script/tests/go-test 11 | -------------------------------------------------------------------------------- /script/tests/go-test: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # script/tests/go-race: Run go tests 4 | 5 | set -e 6 | 7 | cd "$(dirname "$0")/../.." 8 | 9 | export DATABASE_URL="postgres://postgres@localhost/postgres?sslmode=disable" 10 | 11 | [ -z "$CI" ] || { 12 | cd "$GOPATH/src/github.com/blendle/pg2kafka" 13 | export DATABASE_URL="postgres://postgres@localhost/test1?sslmode=disable" 14 | } 15 | 16 | go test ./... 17 | -------------------------------------------------------------------------------- /script/tests/gometalinter: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # script/tests/gometalinter: Run linters and static analysis 4 | 5 | set -e 6 | 7 | cd "$(dirname "$0")/../.." 8 | 9 | export DATABASE_URL="postgres://postgres@localhost/postgres?sslmode=disable" 10 | 11 | [ -z "$CI" ] || { 12 | cd "$GOPATH/src/github.com/blendle/pg2kafka" 13 | export DATABASE_URL="postgres://postgres@localhost/test2?sslmode=disable" 14 | } 15 | 16 | matches=$(grep \ 17 | --ignore-case \ 18 | --recursive \ 19 | --exclude-dir vendor \ 20 | '^// [a-z]* \.\.\.$' . || true) 21 | 22 | if [ -n "$matches" ]; then 23 | >&2 echo "Invalid code comments detected:" 24 | >&2 echo 25 | >&2 echo "$matches" 26 | >&2 echo 27 | exit 1 28 | fi 29 | 30 | # concurrency is set to 1 due to conrrent linters manipulating the DB, 31 | # which won't work, yet. 32 | gometalinter \ 33 | --vendor \ 34 | --tests \ 35 | --aggregate \ 36 | --line-length=100 \ 37 | --deadline=300s \ 38 | --concurrency=1 \ 39 | --exclude='.*_test\.go:.*is unused \(U1000\)' \ 40 | --exclude='.*_test\.go:.*duplicate.+\(dupl\)' \ 41 | --enable-all \ 42 | --disable=safesql \ 43 | ./... 44 | -------------------------------------------------------------------------------- /sql/migrations.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; 2 | 3 | CREATE SCHEMA IF NOT EXISTS pg2kafka; 4 | 5 | CREATE SEQUENCE IF NOT EXISTS pg2kafka.outbound_event_queue_id; 6 | CREATE TABLE IF NOT EXISTS pg2kafka.outbound_event_queue ( 7 | id integer NOT NULL DEFAULT nextval('pg2kafka.outbound_event_queue_id'::regclass), 8 | uuid uuid NOT NULL DEFAULT uuid_generate_v4(), 9 | external_id varchar(255), 10 | table_name varchar(255) NOT NULL, 11 | statement varchar(20) NOT NULL, 12 | data jsonb NOT NULL, 13 | created_at timestamp NOT NULL DEFAULT current_timestamp, 14 | processed boolean DEFAULT false 15 | ); 16 | 17 | CREATE INDEX IF NOT EXISTS outbound_event_queue_id_index 18 | ON pg2kafka.outbound_event_queue (id); 19 | 20 | CREATE SEQUENCE IF NOT EXISTS pg2kafka.external_id_relations_id; 21 | CREATE TABLE IF NOT EXISTS pg2kafka.external_id_relations ( 22 | id integer NOT NULL DEFAULT nextval('pg2kafka.external_id_relations_id'::regclass), 23 | external_id varchar(255) NOT NULL, 24 | table_name varchar(255) NOT NULL 25 | ); 26 | 27 | CREATE UNIQUE INDEX IF NOT EXISTS external_id_relations_unique_table_name_index 28 | ON pg2kafka.external_id_relations(table_name); 29 | -------------------------------------------------------------------------------- /sql/sql_test.go: -------------------------------------------------------------------------------- 1 | package sql_test 2 | 3 | import ( 4 | "bytes" 5 | "database/sql" 6 | "os" 7 | "testing" 8 | 9 | "github.com/blendle/pg2kafka/eventqueue" 10 | "github.com/buger/jsonparser" 11 | _ "github.com/lib/pq" 12 | ) 13 | 14 | const selectTriggerNamesQuery = ` 15 | SELECT tgname 16 | FROM pg_trigger 17 | WHERE tgisinternal = false 18 | AND tgrelid = 'users'::regclass; 19 | ` 20 | 21 | func TestSQL_SetupPG2Kafka(t *testing.T) { 22 | db, _, cleanup := setupTriggers(t) 23 | defer cleanup() 24 | 25 | triggerName := "" 26 | err := db.QueryRow(selectTriggerNamesQuery).Scan(&triggerName) 27 | if err != nil { 28 | t.Fatalf("Error fetching triggers: %v", err) 29 | } 30 | 31 | if triggerName != "users_enqueue_event" { 32 | t.Fatalf("Expected trigger 'users_enqueue_event', got: '%v'", triggerName) 33 | } 34 | } 35 | 36 | func TestSQL_SetupPG2Kafka_Idempotency(t *testing.T) { 37 | db, _, cleanup := setupTriggers(t) 38 | defer cleanup() 39 | 40 | _, err := db.Exec(`SELECT pg2kafka.setup('users', 'uuid');`) 41 | if err != nil { 42 | t.Fatal(err) 43 | } 44 | 45 | triggerName := "" 46 | err = db.QueryRow(selectTriggerNamesQuery).Scan(&triggerName) 47 | if err != nil { 48 | t.Fatalf("Error fetching triggers: %v", err) 49 | } 50 | 51 | if triggerName != "users_enqueue_event" { 52 | t.Fatalf("Expected trigger 'users_enqueue_event', got: '%v'", triggerName) 53 | } 54 | } 55 | 56 | func TestSQL_Trigger_Insert(t *testing.T) { 57 | db, eq, cleanup := setupTriggers(t) 58 | defer cleanup() 59 | 60 | _, err := db.Exec(`INSERT INTO users (name, email) VALUES ('jurre', 'jurre@blendle.com')`) 61 | if err != nil { 62 | t.Fatal(err) 63 | } 64 | 65 | events, err := eq.FetchUnprocessedRecords() 66 | if err != nil { 67 | t.Fatal(err) 68 | } 69 | if len(events) != 1 { 70 | t.Fatalf("Expected 1 event, got %d", len(events)) 71 | } 72 | 73 | if events[0].Statement != "INSERT" { 74 | t.Errorf("Expected 'INSERT', got %s", events[0].Statement) 75 | } 76 | 77 | if events[0].TableName != "users" { 78 | t.Errorf("Expected 'users', got %s", events[0].TableName) 79 | } 80 | 81 | email, _ := jsonparser.GetString(events[0].Data, "email") 82 | if email != "jurre@blendle.com" { 83 | t.Errorf("Expected 'jurre@blendle.com', got %s", email) 84 | } 85 | 86 | name, _ := jsonparser.GetString(events[0].Data, "name") 87 | if name != "jurre" { 88 | t.Errorf("Expected 'jurre', got %s", name) 89 | } 90 | } 91 | 92 | func TestSQL_Trigger_CreateWithNull(t *testing.T) { 93 | db, eq, cleanup := setupTriggers(t) 94 | defer cleanup() 95 | 96 | _, err := db.Exec(`INSERT INTO users (name, email) VALUES ('niels', null)`) 97 | if err != nil { 98 | t.Fatal(err) 99 | } 100 | 101 | events, err := eq.FetchUnprocessedRecords() 102 | if err != nil { 103 | t.Fatal(err) 104 | } 105 | 106 | _, valueType, _, _ := jsonparser.Get(events[0].Data, "email") 107 | if valueType != jsonparser.Null { 108 | t.Errorf("Expected null, got %v", valueType) 109 | } 110 | } 111 | 112 | func TestSQL_Trigger_UpdateToNull(t *testing.T) { 113 | db, eq, cleanup := setupTriggers(t) 114 | defer cleanup() 115 | 116 | _, err := db.Exec(`INSERT INTO users (name, email) VALUES ('jurre', 'jurre@blendle.com')`) 117 | if err != nil { 118 | t.Fatal(err) 119 | } 120 | 121 | _, err = db.Exec(`UPDATE users SET email = null WHERE name = 'jurre'`) 122 | if err != nil { 123 | t.Fatal(err) 124 | } 125 | 126 | events, err := eq.FetchUnprocessedRecords() 127 | if err != nil { 128 | t.Fatal(err) 129 | } 130 | 131 | if len(events) != 2 { 132 | t.Fatalf("Expected 2 events, got %d", len(events)) 133 | } 134 | 135 | _, valueType, _, _ := jsonparser.Get(events[1].Data, "email") 136 | if valueType != jsonparser.Null { 137 | t.Errorf("Expected null, got %v", valueType) 138 | } 139 | 140 | _, valueType, _, _ = jsonparser.Get(events[1].Data, "name") 141 | if valueType != jsonparser.NotExist { 142 | t.Error("Expected data not to contain key 'name'") 143 | } 144 | } 145 | 146 | func TestSQL_Trigger_UpdateExtensionColumn(t *testing.T) { 147 | db, eq, cleanup := setupTriggers(t) 148 | defer cleanup() 149 | 150 | _, err := db.Exec(`INSERT INTO users (name, email, properties, data) VALUES ('jurre', 'jurre@blendle.com', 'a=>1'::hstore, '{ "foo": "bar" }'::jsonb)`) // nolint: lll 151 | if err != nil { 152 | t.Fatal(err) 153 | } 154 | 155 | _, err = db.Exec(`UPDATE users SET properties = 'a=>2,b=>2'::hstore WHERE name = 'jurre'`) 156 | if err != nil { 157 | t.Fatal(err) 158 | } 159 | 160 | _, err = db.Exec(`UPDATE users SET data = jsonb_set(data, '{foo}', '"baz"') WHERE name = 'jurre'`) 161 | if err != nil { 162 | t.Fatal(err) 163 | } 164 | 165 | events, err := eq.FetchUnprocessedRecords() 166 | if err != nil { 167 | t.Fatal(err) 168 | } 169 | 170 | if len(events) != 3 { 171 | t.Fatalf("Expected 2 events, got %d", len(events)) 172 | } 173 | 174 | if string(events[1].Data) != `{"properties": {"a": "2", "b": "2"}}` { 175 | t.Errorf("Data did not match: %q", events[1].Data) 176 | } 177 | 178 | if string(events[2].Data) != `{"data": {"foo": "baz"}}` { 179 | t.Errorf("Data did not match: %q", events[2].Data) 180 | } 181 | } 182 | 183 | func TestSQL_Snapshot(t *testing.T) { 184 | db, eq, cleanup := setupTriggers(t) 185 | defer cleanup() 186 | 187 | _, err := db.Exec(` 188 | DROP TABLE IF EXISTS products; 189 | CREATE TABLE products ( 190 | uid varchar, 191 | name varchar 192 | ); 193 | INSERT INTO products (uid, name) VALUES ('duff-1', 'Duffs Beer'); 194 | INSERT INTO products (uid, name) VALUES ('duff-2', null); 195 | INSERT INTO products (uid, name) VALUES (null, 'Duff Dry'); 196 | SELECT pg2kafka.setup('products', 'uid') 197 | `) 198 | if err != nil { 199 | t.Fatalf("Error creating products table: %v", err) 200 | } 201 | 202 | events, err := eq.FetchUnprocessedRecords() 203 | if err != nil { 204 | t.Fatal(err) 205 | } 206 | 207 | if len(events) != 3 { 208 | t.Fatalf("Expected 3 events, got %d", len(events)) 209 | } 210 | 211 | if !bytes.Equal(events[0].ExternalID, []byte("duff-1")) { 212 | t.Fatalf("Incorrect external id, expected 'duff-1', got '%v'", events[0].ExternalID) 213 | } 214 | 215 | _, valueType, _, _ := jsonparser.Get(events[1].Data, "name") 216 | if valueType != jsonparser.Null { 217 | t.Errorf("Expected null, got %v", valueType) 218 | } 219 | 220 | if events[2].ExternalID != nil { 221 | t.Fatalf("Incorrect external id, expected NULL, got %q", events[2].ExternalID) 222 | } 223 | } 224 | 225 | func setupTriggers(t *testing.T) (*sql.DB, *eventqueue.Queue, func()) { 226 | t.Helper() 227 | db, err := sql.Open("postgres", os.Getenv("DATABASE_URL")) 228 | if err != nil { 229 | t.Fatalf("failed to open database: %v", err) 230 | } 231 | 232 | eq := eventqueue.NewWithDB(db) 233 | 234 | err = eq.ConfigureOutboundEventQueueAndTriggers("./") 235 | if err != nil { 236 | t.Fatal(err) 237 | } 238 | 239 | _, err = db.Exec(` 240 | CREATE EXTENSION IF NOT EXISTS hstore; 241 | DROP TABLE IF EXISTS users cascade; 242 | CREATE TABLE users ( 243 | uuid uuid NOT NULL DEFAULT uuid_generate_v4(), 244 | name varchar, 245 | email text, 246 | properties hstore, 247 | data jsonb 248 | ); 249 | SELECT pg2kafka.setup('users', 'uuid'); 250 | `) 251 | if err != nil { 252 | t.Fatalf("Error creating users table: %v", err) 253 | } 254 | 255 | return db, eq, func() { 256 | _, err := db.Exec("DROP SCHEMA pg2kafka CASCADE") 257 | if err != nil { 258 | t.Fatalf("failed to drop pg2kafka schema: %v", err) 259 | } 260 | 261 | if cerr := eq.Close(); cerr != nil { 262 | t.Fatalf("failed to close eventqueue %v", err) 263 | } 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /sql/triggers.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION pg2kafka.enqueue_event() RETURNS trigger 2 | LANGUAGE plpgsql 3 | AS $_$ 4 | DECLARE 5 | external_id varchar; 6 | changes jsonb; 7 | col record; 8 | outbound_event record; 9 | BEGIN 10 | SELECT pg2kafka.external_id_relations.external_id INTO external_id 11 | FROM pg2kafka.external_id_relations 12 | WHERE table_name = TG_TABLE_NAME; 13 | 14 | IF TG_OP = 'INSERT' THEN 15 | EXECUTE format('SELECT ($1).%s::text', external_id) USING NEW INTO external_id; 16 | ELSE 17 | EXECUTE format('SELECT ($1).%s::text', external_id) USING OLD INTO external_id; 18 | END IF; 19 | 20 | IF TG_OP = 'INSERT' THEN 21 | changes := row_to_json(NEW); 22 | ELSIF TG_OP = 'UPDATE' THEN 23 | changes := row_to_json(NEW); 24 | -- Remove object that didn't change 25 | FOR col IN SELECT * FROM jsonb_each(row_to_json(OLD)::jsonb) LOOP 26 | IF changes->col.key = col.value THEN 27 | changes = changes - col.key; 28 | END IF; 29 | END LOOP; 30 | ELSIF TG_OP = 'DELETE' THEN 31 | changes := '{}'::jsonb; 32 | END IF; 33 | 34 | -- Don't enqueue an event for updates that did not change anything 35 | IF TG_OP = 'UPDATE' AND changes = '{}'::jsonb THEN 36 | RETURN NULL; 37 | END IF; 38 | 39 | INSERT INTO pg2kafka.outbound_event_queue(external_id, table_name, statement, data) 40 | VALUES (external_id, TG_TABLE_NAME, TG_OP, changes) 41 | RETURNING * INTO outbound_event; 42 | 43 | PERFORM pg_notify('outbound_event_queue', TG_OP); 44 | 45 | RETURN NULL; 46 | END 47 | $_$; 48 | 49 | CREATE OR REPLACE FUNCTION pg2kafka.create_snapshot_events(table_name_ref regclass) RETURNS void 50 | LANGUAGE plpgsql 51 | AS $_$ 52 | DECLARE 53 | query text; 54 | rec record; 55 | changes jsonb; 56 | external_id_ref varchar; 57 | external_id varchar; 58 | BEGIN 59 | SELECT pg2kafka.external_id_relations.external_id INTO external_id_ref 60 | FROM pg2kafka.external_id_relations 61 | WHERE pg2kafka.external_id_relations.table_name = table_name_ref::varchar; 62 | 63 | query := 'SELECT * FROM ' || table_name_ref; 64 | 65 | FOR rec IN EXECUTE query LOOP 66 | changes := row_to_json(rec); 67 | external_id := changes->>external_id_ref; 68 | 69 | INSERT INTO pg2kafka.outbound_event_queue(external_id, table_name, statement, data) 70 | VALUES (external_id, table_name_ref, 'SNAPSHOT', changes); 71 | END LOOP; 72 | 73 | PERFORM pg_notify('outbound_event_queue', 'SNAPSHOT'); 74 | END 75 | $_$; 76 | 77 | CREATE OR REPLACE FUNCTION pg2kafka.setup(table_name_ref regclass, external_id_name text) RETURNS void 78 | LANGUAGE plpgsql 79 | AS $_$ 80 | DECLARE 81 | existing_id varchar; 82 | trigger_name varchar; 83 | lock_query varchar; 84 | trigger_query varchar; 85 | BEGIN 86 | SELECT pg2kafka.external_id_relations.external_id INTO existing_id 87 | FROM pg2kafka.external_id_relations 88 | WHERE pg2kafka.external_id_relations.table_name = table_name_ref::varchar; 89 | 90 | IF existing_id != '' THEN 91 | RAISE WARNING 'table/external_id relation already exists for %/%. Skipping setup.', table_name_ref, external_id_name; 92 | 93 | RETURN; 94 | END IF; 95 | 96 | INSERT INTO pg2kafka.external_id_relations(external_id, table_name) 97 | VALUES (external_id_name, table_name_ref); 98 | 99 | trigger_name := table_name_ref || '_enqueue_event'; 100 | lock_query := 'LOCK TABLE ' || table_name_ref || ' IN ACCESS EXCLUSIVE MODE'; 101 | trigger_query := 'CREATE TRIGGER ' || trigger_name 102 | || ' AFTER INSERT OR DElETE OR UPDATE ON ' || table_name_ref 103 | || ' FOR EACH ROW EXECUTE PROCEDURE pg2kafka.enqueue_event()'; 104 | 105 | -- We aqcuire an exlusive lock on the table to ensure that we do not miss any 106 | -- events between snapshotting and once the trigger is added. 107 | EXECUTE lock_query; 108 | 109 | PERFORM pg2kafka.create_snapshot_events(table_name_ref); 110 | 111 | EXECUTE trigger_query; 112 | END 113 | $_$; 114 | --------------------------------------------------------------------------------