├── .env.sample
├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── benchmark
    ├── README.md
    ├── data
    │   ├── create-indexes.ddl
    │   └── create-tables.ddl
    ├── queries.sql
    ├── query-templates
    │   ├── 1.sql
    │   ├── 10.sql
    │   ├── 11.sql
    │   ├── 12.sql
    │   ├── 13.sql
    │   ├── 14.sql
    │   ├── 15.sql
    │   ├── 16.sql
    │   ├── 17.sql
    │   ├── 18.sql
    │   ├── 19.sql
    │   ├── 2.sql
    │   ├── 20.sql
    │   ├── 21.sql
    │   ├── 22.sql
    │   ├── 3.sql
    │   ├── 4.sql
    │   ├── 5.sql
    │   ├── 6.sql
    │   ├── 7.sql
    │   ├── 8.sql
    │   └── 9.sql
    └── scripts
    │   ├── generate-data.sh
    │   ├── load-pg-data.sh
    │   └── measure-memory.sh
├── build
    └── .gitkeep
├── devbox.json
├── devbox.lock
├── img
    ├── BemiDB.gif
    ├── architecture.png
    └── tpc-h_database_structure.png
├── scripts
    ├── build-darwin.sh
    ├── build-linux.sh
    ├── install.sh
    ├── publish-docker.sh
    ├── test-data-types.sql
    ├── test-partitioned-tables.sql
    └── test-schemas.sql
└── src
    ├── capped_buffer.go
    ├── capped_buffer_test.go
    ├── config.go
    ├── config_test.go
    ├── custom_types.go
    ├── duckdb.go
    ├── duckdb_test.go
    ├── error_utils.go
    ├── go.mod
    ├── go.sum
    ├── iceberg_reader.go
    ├── iceberg_writer.go
    ├── iceberg_writer_table.go
    ├── iceberg_writer_table_test.go
    ├── init_test.go
    ├── logger.go
    ├── main.go
    ├── parser_a_expr.go
    ├── parser_column_ref.go
    ├── parser_function.go
    ├── parser_select.go
    ├── parser_show.go
    ├── parser_table.go
    ├── parser_type_cast.go
    ├── parser_utils.go
    ├── pg_constants.go
    ├── pg_schema_column.go
    ├── postgres.go
    ├── query_handler.go
    ├── query_handler_test.go
    ├── query_remapper.go
    ├── query_remapper_expression.go
    ├── query_remapper_function.go
    ├── query_remapper_select.go
    ├── query_remapper_show.go
    ├── query_remapper_table.go
    ├── storage_interface.go
    ├── storage_local.go
    ├── storage_local_test.go
    ├── storage_s3.go
    ├── storage_utils.go
    ├── syncer.go
    ├── syncer_table.go
    ├── syncer_table_test.go
    ├── syncer_test.go
    └── utils.go


/.env.sample:
--------------------------------------------------------------------------------
 1 | BEMIDB_PORT=54321
 2 | BEMIDB_DATABASE=bemidb
 3 | BEMIDB_USER=
 4 | BEMIDB_PASSWORD=
 5 | BEMIDB_HOST=127.0.0.1
 6 | BEMIDB_INIT_SQL=./init.sql
 7 | BEMIDB_LOG_LEVEL=INFO
 8 | 
 9 | # Local storage
10 | BEMIDB_STORAGE_TYPE=LOCAL
11 | BEMIDB_STORAGE_PATH=./iceberg
12 | 
13 | # S3 storage
14 | # BEMIDB_STORAGE_TYPE=S3
15 | # BEMIDB_STORAGE_PATH=iceberg
16 | # AWS_REGION=us-west-1
17 | # AWS_ENDPOINT=s3.amazonaws.com
18 | # AWS_S3_BUCKET=[REPLACE_ME]
19 | # AWS_ACCESS_KEY_ID=[REPLACE_ME]
20 | # AWS_SECRET_ACCESS_KEY=[REPLACE_ME]
21 | 
22 | # BEMIDB_DISABLE_ANONYMOUS_ANALYTICS=true
23 | 
24 | # Postgres syncing
25 | PG_DATABASE_URL=postgres://[USER]:[PASSWORD]@localhost:5432/[DATABASE]
26 | # PG_SYNC_INTERVAL=1h
27 | # PG_SCHEMA_PREFIX=mydb_
28 | # PG_INCLUDE_TABLES=public.users,public.posts
29 | # PG_EXCLUDE_TABLES=public.logs
30 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ['**']
 6 |   pull_request:
 7 |     branches: ['**']
 8 | 
 9 | jobs:
10 |   test:
11 |     name: Test
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - name: Checkout Code
15 |       uses: actions/checkout@v4
16 | 
17 |     - name: Set Up Go
18 |       uses: actions/setup-go@v5
19 |       with:
20 |         go-version: '1.24.3'
21 | 
22 |     - name: Install Dependencies
23 |       run: go get .
24 |       working-directory: ./src
25 | 
26 |     - name: Run Tests
27 |       run: go test -v ./...
28 |       working-directory: ./src
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /iceberg
 2 | /iceberg-test
 3 | /src/iceberg
 4 | /src/iceberg-test
 5 | .DS_Store
 6 | .vscode
 7 | .env
 8 | /benchmark/tpch-kit
 9 | /benchmark/data/*.tbl
10 | /build
11 | bemidb
12 | /src/__debug*
13 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | Please see [Releases](https://github.com/BemiHQ/BemiDB/releases).
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PLATFORM
 2 | ARG GOOS
 3 | ARG GOARCH
 4 | 
 5 | FROM --platform=$PLATFORM golang:1.24.3 AS builder
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | COPY src/go.mod src/go.sum ./
10 | RUN go mod download
11 | 
12 | COPY src/ .
13 | RUN CGO_ENABLED=1 GOOS=$GOOS GOARCH=$GOARCH go build -o /app/bemidb
14 | 
15 | ################################################################################
16 | 
17 | FROM --platform=$PLATFORM debian:bookworm-slim
18 | 
19 | WORKDIR /app
20 | 
21 | COPY --from=builder /app/bemidb /app/bemidb
22 | 
23 | ENTRYPOINT ["/app/bemidb"]
24 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | sh:
 2 | 	devbox --env-file .env shell
 3 | 
 4 | install:
 5 | 	devbox run "cd src && go mod tidy"
 6 | 
 7 | up:
 8 | 	devbox run --env-file .env "cd src && go run ."
 9 | 
10 | .PHONY: build
11 | build:
12 | 	rm -rf build/bemidb-* && \
13 | 		devbox run "./scripts/build-darwin.sh" && \
14 | 		./scripts/build-linux.sh
15 | 
16 | build-local:
17 | 	rm -rf build/bemidb-* && \
18 | 		cd src && go build -o ../build/bemidb-darwin-arm64
19 | 
20 | publish:
21 | 	./scripts/publish-docker.sh
22 | 
23 | sync:
24 | 	devbox run --env-file .env "cd src && go run . sync"
25 | 
26 | test:
27 | 	devbox run "cd src && go test ./..."
28 | 
29 | test-function:
30 | 	devbox run "cd src && go test ./... -run $(FUNC)"
31 | 
32 | debug:
33 | 	devbox run "cd src && dlv test github.com/BemiHQ/BemiDB"
34 | 
35 | lint:
36 | 	devbox run "cd src && go fmt && deadcode . && staticcheck ."
37 | 
38 | console:
39 | 	devbox run "cd src && gore"
40 | 
41 | outdated:
42 | 	devbox run "cd src && go list -u -m -f '{{if and .Update (not .Indirect)}}{{.}}{{end}}' all"
43 | 
44 | .PHONY: benchmark
45 | benchmark:
46 | 	devbox run "time psql postgres://127.0.0.1:54321/bemidb < ./benchmark/queries.sql"
47 | 
48 | pg-init:
49 | 	devbox run initdb && \
50 | 		sed -i "s/#log_statement = 'none'/log_statement = 'all'/g" ./.devbox/virtenv/postgresql/data/postgresql.conf && \
51 | 		sed -i "s/#logging_collector = off/logging_collector = on/g" ./.devbox/virtenv/postgresql/data/postgresql.conf && \
52 | 		sed -i "s/#log_directory = 'log'/log_directory = 'log'/g" ./.devbox/virtenv/postgresql/data/postgresql.conf
53 | 
54 | pg-up:
55 | 	devbox services start postgresql
56 | 
57 | pg-create:
58 | 	devbox run "(dropdb tpch || true) && \
59 | 		createdb tpch && \
60 | 		./benchmark/scripts/load-pg-data.sh"
61 | 
62 | pg-index:
63 | 	devbox run "psql postgres://127.0.0.1:5432/tpch -f ./benchmark/data/create-indexes.ddl"
64 | 
65 | pg-benchmark:
66 | 	devbox run "psql postgres://127.0.0.1:5432/tpch -c 'ANALYZE VERBOSE' && \
67 | 		time psql postgres://127.0.0.1:5432/tpch < ./benchmark/queries.sql"
68 | 
69 | pg-down:
70 | 	devbox services stop postgresql
71 | 
72 | pg-logs:
73 | 	tail -f .devbox/virtenv/postgresql/data/log/postgresql-*.log
74 | 
75 | pg-sniff:
76 | 	sudo tshark -i lo0 -f 'tcp port 5432' -d tcp.port==5432,pgsql -O pgsql
77 | 
78 | tpch-install:
79 | 	devbox run "cd benchmark && \
80 | 		rm -rf tpch-kit && \
81 | 		git clone https://github.com/gregrahn/tpch-kit.git && \
82 | 		cd tpch-kit/dbgen && \
83 | 		make MACHINE=$$MACHINE DATABASE=POSTGRESQL"
84 | 
85 | tpch-generate:
86 | 	devbox run "./benchmark/scripts/generate-data.sh"
87 | 
88 | sniff:
89 | 	sudo tshark -i lo0 -f 'tcp port 54321' -d tcp.port==54321,pgsql -O pgsql
90 | 
91 | measure-mem:
92 | 	devbox run "./benchmark/scripts/measure-memory.sh"
93 | 
94 | profile-mem:
95 | 	devbox run "watch -n 1 go tool pprof -top http://localhost:6060/debug/pprof/heap"
96 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # BemiDB Bechmark
 2 | 
 3 | We use the standardized TPC-H benchmark to compare PostgreSQL with BemiDB.
 4 | This benchmark measures the performance of databases that handle large volumes of data and perform business-oriented ad-hoc queries (OLAP).
 5 | 
 6 | ![TPC-H database structure](/img/tpc-h_database_structure.png)
 7 | 
 8 | ## Running the TPC-H Benchmark
 9 | 
10 | ### PostgreSQL
11 | 
12 | Download and unzip `TPC-H_generated_data_s*.zip` from the latest release into the "benchmark/data" directory and then set up a local PostgreSQL database:
13 | 
14 | ```sh
15 | make pg-init
16 | make pg-up
17 | make pg-create
18 | ```
19 | 
20 | Run the benchmark queries with PostgreSQL:
21 | 
22 | ```sh
23 | make pg-benchmark
24 | ```
25 | 
26 | Run the benchmark queries with indexed PostgreSQL:
27 | 
28 | ```sh
29 | make pg-index
30 | make pg-benchmark
31 | ```
32 | 
33 | ### BemiDB
34 | 
35 | Set up a local BemiDB database:
36 | 
37 | ```sh
38 | make sync
39 | make up
40 | ```
41 | 
42 | Run the benchmark queries with BemiDB:
43 | 
44 | ```sh
45 | make benchmark
46 | ```
47 | 
48 | ## Generating the TPC-H Data
49 | 
50 | Install the TPC-H benchmark kit:
51 | 
52 | ```sh
53 | make tpch-install MACHINE=MACOS # MACHINE=LINUX for Linux
54 | make tpch-generate SCALE_FACTOR=1
55 | ```
56 | 


--------------------------------------------------------------------------------
/benchmark/data/create-indexes.ddl:
--------------------------------------------------------------------------------
 1 | CREATE INDEX IF NOT EXISTS idx_part_name ON part (p_name varchar_pattern_ops);
 2 | CREATE INDEX IF NOT EXISTS idx_part_brand_container ON part (p_brand, p_container, p_partkey);
 3 | CREATE INDEX IF NOT EXISTS idx_part_partkey ON part (p_partkey);
 4 | 
 5 | CREATE INDEX IF NOT EXISTS idx_partsupp_part_supp ON partsupp (ps_partkey, ps_suppkey);
 6 | CREATE INDEX IF NOT EXISTS idx_partsupp_suppkey ON partsupp (ps_suppkey);
 7 | 
 8 | CREATE INDEX IF NOT EXISTS idx_lineitem_dates ON lineitem (l_shipdate);
 9 | CREATE INDEX IF NOT EXISTS idx_lineitem_part_supp ON lineitem (l_partkey, l_suppkey);
10 | CREATE INDEX IF NOT EXISTS idx_lineitem_part_qty ON lineitem (l_partkey, l_quantity, l_extendedprice);
11 | 
12 | CREATE INDEX IF NOT EXISTS idx_nation_name ON nation (n_name);
13 | CREATE INDEX IF NOT EXISTS idx_nation_nationkey ON nation (n_nationkey);
14 | 
15 | CREATE INDEX IF NOT EXISTS idx_supplier_nationkey ON supplier (s_nationkey);
16 | 


--------------------------------------------------------------------------------
/benchmark/data/create-tables.ddl:
--------------------------------------------------------------------------------
 1 | -- Sccsid:     @(#)dss.ddl	2.1.8.1
 2 | CREATE TABLE NATION  ( N_NATIONKEY  INTEGER NOT NULL,
 3 |                             N_NAME       CHAR(25) NOT NULL,
 4 |                             N_REGIONKEY  INTEGER NOT NULL,
 5 |                             N_COMMENT    VARCHAR(152));
 6 | 
 7 | CREATE TABLE REGION  ( R_REGIONKEY  INTEGER NOT NULL,
 8 |                             R_NAME       CHAR(25) NOT NULL,
 9 |                             R_COMMENT    VARCHAR(152));
10 | 
11 | CREATE TABLE PART  ( P_PARTKEY     INTEGER NOT NULL,
12 |                           P_NAME        VARCHAR(55) NOT NULL,
13 |                           P_MFGR        CHAR(25) NOT NULL,
14 |                           P_BRAND       CHAR(10) NOT NULL,
15 |                           P_TYPE        VARCHAR(25) NOT NULL,
16 |                           P_SIZE        INTEGER NOT NULL,
17 |                           P_CONTAINER   CHAR(10) NOT NULL,
18 |                           P_RETAILPRICE DECIMAL(15,2) NOT NULL,
19 |                           P_COMMENT     VARCHAR(23) NOT NULL );
20 | 
21 | CREATE TABLE SUPPLIER ( S_SUPPKEY     INTEGER NOT NULL,
22 |                              S_NAME        CHAR(25) NOT NULL,
23 |                              S_ADDRESS     VARCHAR(40) NOT NULL,
24 |                              S_NATIONKEY   INTEGER NOT NULL,
25 |                              S_PHONE       CHAR(15) NOT NULL,
26 |                              S_ACCTBAL     DECIMAL(15,2) NOT NULL,
27 |                              S_COMMENT     VARCHAR(101) NOT NULL);
28 | 
29 | CREATE TABLE PARTSUPP ( PS_PARTKEY     INTEGER NOT NULL,
30 |                              PS_SUPPKEY     INTEGER NOT NULL,
31 |                              PS_AVAILQTY    INTEGER NOT NULL,
32 |                              PS_SUPPLYCOST  DECIMAL(15,2)  NOT NULL,
33 |                              PS_COMMENT     VARCHAR(199) NOT NULL );
34 | 
35 | CREATE TABLE CUSTOMER ( C_CUSTKEY     INTEGER NOT NULL,
36 |                              C_NAME        VARCHAR(25) NOT NULL,
37 |                              C_ADDRESS     VARCHAR(40) NOT NULL,
38 |                              C_NATIONKEY   INTEGER NOT NULL,
39 |                              C_PHONE       CHAR(15) NOT NULL,
40 |                              C_ACCTBAL     DECIMAL(15,2)   NOT NULL,
41 |                              C_MKTSEGMENT  CHAR(10) NOT NULL,
42 |                              C_COMMENT     VARCHAR(117) NOT NULL);
43 | 
44 | CREATE TABLE ORDERS  ( O_ORDERKEY       INTEGER NOT NULL,
45 |                            O_CUSTKEY        INTEGER NOT NULL,
46 |                            O_ORDERSTATUS    CHAR(1) NOT NULL,
47 |                            O_TOTALPRICE     DECIMAL(15,2) NOT NULL,
48 |                            O_ORDERDATE      DATE NOT NULL,
49 |                            O_ORDERPRIORITY  CHAR(15) NOT NULL,  
50 |                            O_CLERK          CHAR(15) NOT NULL, 
51 |                            O_SHIPPRIORITY   INTEGER NOT NULL,
52 |                            O_COMMENT        VARCHAR(79) NOT NULL);
53 | 
54 | CREATE TABLE LINEITEM ( L_ORDERKEY    INTEGER NOT NULL,
55 |                              L_PARTKEY     INTEGER NOT NULL,
56 |                              L_SUPPKEY     INTEGER NOT NULL,
57 |                              L_LINENUMBER  INTEGER NOT NULL,
58 |                              L_QUANTITY    DECIMAL(15,2) NOT NULL,
59 |                              L_EXTENDEDPRICE  DECIMAL(15,2) NOT NULL,
60 |                              L_DISCOUNT    DECIMAL(15,2) NOT NULL,
61 |                              L_TAX         DECIMAL(15,2) NOT NULL,
62 |                              L_RETURNFLAG  CHAR(1) NOT NULL,
63 |                              L_LINESTATUS  CHAR(1) NOT NULL,
64 |                              L_SHIPDATE    DATE NOT NULL,
65 |                              L_COMMITDATE  DATE NOT NULL,
66 |                              L_RECEIPTDATE DATE NOT NULL,
67 |                              L_SHIPINSTRUCT CHAR(25) NOT NULL,
68 |                              L_SHIPMODE     CHAR(10) NOT NULL,
69 |                              L_COMMENT      VARCHAR(44) NOT NULL);
70 | 
71 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/1.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Pricing Summary Report Query (Q1)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	l_returnflag,
 9 | 	l_linestatus,
10 | 	sum(l_quantity) as sum_qty,
11 | 	sum(l_extendedprice) as sum_base_price,
12 | 	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
13 | 	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
14 | 	avg(l_quantity) as avg_qty,
15 | 	avg(l_extendedprice) as avg_price,
16 | 	avg(l_discount) as avg_disc,
17 | 	count(*) as count_order
18 | from
19 | 	lineitem
20 | where
21 | 	l_shipdate <= date '1998-12-01' - interval ':1 day'
22 | group by
23 | 	l_returnflag,
24 | 	l_linestatus
25 | order by
26 | 	l_returnflag,
27 | 	l_linestatus;
28 | :n -1
29 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/10.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Returned Item Reporting Query (Q10)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	c_custkey,
 9 | 	c_name,
10 | 	sum(l_extendedprice * (1 - l_discount)) as revenue,
11 | 	c_acctbal,
12 | 	n_name,
13 | 	c_address,
14 | 	c_phone,
15 | 	c_comment
16 | from
17 | 	customer,
18 | 	orders,
19 | 	lineitem,
20 | 	nation
21 | where
22 | 	c_custkey = o_custkey
23 | 	and l_orderkey = o_orderkey
24 | 	and o_orderdate >= date ':1'
25 | 	and o_orderdate < date ':1' + interval '3 months'
26 | 	and l_returnflag = 'R'
27 | 	and c_nationkey = n_nationkey
28 | group by
29 | 	c_custkey,
30 | 	c_name,
31 | 	c_acctbal,
32 | 	c_phone,
33 | 	n_name,
34 | 	c_address,
35 | 	c_comment
36 | order by
37 | 	revenue desc;
38 | :n 20
39 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/11.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Important Stock Identification Query (Q11)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	ps_partkey,
 9 | 	sum(ps_supplycost * ps_availqty) as value
10 | from
11 | 	partsupp,
12 | 	supplier,
13 | 	nation
14 | where
15 | 	ps_suppkey = s_suppkey
16 | 	and s_nationkey = n_nationkey
17 | 	and n_name = ':1'
18 | group by
19 | 	ps_partkey having
20 | 		sum(ps_supplycost * ps_availqty) > (
21 | 			select
22 | 				sum(ps_supplycost * ps_availqty) * :2
23 | 			from
24 | 				partsupp,
25 | 				supplier,
26 | 				nation
27 | 			where
28 | 				ps_suppkey = s_suppkey
29 | 				and s_nationkey = n_nationkey
30 | 				and n_name = ':1'
31 | 		)
32 | order by
33 | 	value desc;
34 | :n -1
35 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/12.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	l_shipmode,
 9 | 	sum(case
10 | 		when o_orderpriority = '1-URGENT'
11 | 			or o_orderpriority = '2-HIGH'
12 | 			then 1
13 | 		else 0
14 | 	end) as high_line_count,
15 | 	sum(case
16 | 		when o_orderpriority <> '1-URGENT'
17 | 			and o_orderpriority <> '2-HIGH'
18 | 			then 1
19 | 		else 0
20 | 	end) as low_line_count
21 | from
22 | 	orders,
23 | 	lineitem
24 | where
25 | 	o_orderkey = l_orderkey
26 | 	and l_shipmode in (':1', ':2')
27 | 	and l_commitdate < l_receiptdate
28 | 	and l_shipdate < l_commitdate
29 | 	and l_receiptdate >= date ':3'
30 | 	and l_receiptdate < date ':3' + interval '1 year'
31 | group by
32 | 	l_shipmode
33 | order by
34 | 	l_shipmode;
35 | :n -1
36 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/13.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Customer Distribution Query (Q13)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	c_count,
 9 | 	count(*) as custdist
10 | from
11 | 	(
12 | 		select
13 | 			c_custkey,
14 | 			count(o_orderkey)
15 | 		from
16 | 			customer left outer join orders on
17 | 				c_custkey = o_custkey
18 | 				and o_comment not like '%:1%:2%'
19 | 		group by
20 | 			c_custkey
21 | 	) as c_orders (c_custkey, c_count)
22 | group by
23 | 	c_count
24 | order by
25 | 	custdist desc,
26 | 	c_count desc;
27 | :n -1
28 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/14.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Promotion Effect Query (Q14)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	100.00 * sum(case
 9 | 		when p_type like 'PROMO%'
10 | 			then l_extendedprice * (1 - l_discount)
11 | 		else 0
12 | 	end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
13 | from
14 | 	lineitem,
15 | 	part
16 | where
17 | 	l_partkey = p_partkey
18 | 	and l_shipdate >= date ':1'
19 | 	and l_shipdate < date ':1' + interval '1 month';
20 | :n -1
21 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/15.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Top Supplier Query (Q15)
 3 | -- Variant A
 4 | -- Approved February 1998
 5 | --
 6 | -- BemiDB: Variant without "view"
 7 | :x
 8 |   with revenue (supplier_no, total_revenue) as (
 9 |     select
10 |       l_suppkey,
11 |       sum(l_extendedprice * (1-l_discount))
12 |     from
13 |       lineitem
14 |     where
15 |       l_shipdate >= date ':1'
16 |       and l_shipdate < date ':1' + interval '3 months'
17 |     group by
18 |       l_suppkey
19 |   )
20 | 
21 |   :o
22 |   select
23 |   s_suppkey,
24 |   s_name,
25 |   s_address,
26 |   s_phone,
27 |   total_revenue
28 |   from
29 |   supplier,
30 |   revenue
31 |   where
32 |   s_suppkey = supplier_no
33 |   and total_revenue = (
34 |     select
35 |       max(total_revenue)
36 |     from
37 |       revenue
38 |   )
39 |   order by
40 |   s_suppkey;
41 |   :n -1
42 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/16.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	p_brand,
 9 | 	p_type,
10 | 	p_size,
11 | 	count(distinct ps_suppkey) as supplier_cnt
12 | from
13 | 	partsupp,
14 | 	part
15 | where
16 | 	p_partkey = ps_partkey
17 | 	and p_brand <> ':1'
18 | 	and p_type not like ':2%'
19 | 	and p_size in (:3, :4, :5, :6, :7, :8, :9, :10)
20 | 	and ps_suppkey not in (
21 | 		select
22 | 			s_suppkey
23 | 		from
24 | 			supplier
25 | 		where
26 | 			s_comment like '%Customer%Complaints%'
27 | 	)
28 | group by
29 | 	p_brand,
30 | 	p_type,
31 | 	p_size
32 | order by
33 | 	supplier_cnt desc,
34 | 	p_brand,
35 | 	p_type,
36 | 	p_size;
37 | :n -1
38 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/17.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | --
 6 | -- BemiDB: 20s with unindexed PostgreSQL
 7 | :x
 8 | :o
 9 | select
10 | 	sum(l_extendedprice) / 7.0 as avg_yearly
11 | from
12 | 	lineitem,
13 | 	part
14 | where
15 | 	p_partkey = l_partkey
16 | 	and p_brand = ':1'
17 | 	and p_container = ':2'
18 | 	and l_quantity < (
19 | 		select
20 | 			0.2 * avg(l_quantity)
21 | 		from
22 | 			lineitem
23 | 		where
24 | 			l_partkey = p_partkey
25 | 	);
26 | :n -1
27 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/18.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Large Volume Customer Query (Q18)
 3 | -- Function Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	c_name,
 9 | 	c_custkey,
10 | 	o_orderkey,
11 | 	o_orderdate,
12 | 	o_totalprice,
13 | 	sum(l_quantity)
14 | from
15 | 	customer,
16 | 	orders,
17 | 	lineitem
18 | where
19 | 	o_orderkey in (
20 | 		select
21 | 			l_orderkey
22 | 		from
23 | 			lineitem
24 | 		group by
25 | 			l_orderkey having
26 | 				sum(l_quantity) > :1
27 | 	)
28 | 	and c_custkey = o_custkey
29 | 	and o_orderkey = l_orderkey
30 | group by
31 | 	c_name,
32 | 	c_custkey,
33 | 	o_orderkey,
34 | 	o_orderdate,
35 | 	o_totalprice
36 | order by
37 | 	o_totalprice desc,
38 | 	o_orderdate;
39 | :n 100
40 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/19.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Discounted Revenue Query (Q19)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	sum(l_extendedprice* (1 - l_discount)) as revenue
 9 | from
10 | 	lineitem,
11 | 	part
12 | where
13 | 	(
14 | 		p_partkey = l_partkey
15 | 		and p_brand = ':1'
16 | 		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
17 | 		and l_quantity >= :4 and l_quantity <= :4 + 10
18 | 		and p_size between 1 and 5
19 | 		and l_shipmode in ('AIR', 'AIR REG')
20 | 		and l_shipinstruct = 'DELIVER IN PERSON'
21 | 	)
22 | 	or
23 | 	(
24 | 		p_partkey = l_partkey
25 | 		and p_brand = ':2'
26 | 		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
27 | 		and l_quantity >= :5 and l_quantity <= :5 + 10
28 | 		and p_size between 1 and 10
29 | 		and l_shipmode in ('AIR', 'AIR REG')
30 | 		and l_shipinstruct = 'DELIVER IN PERSON'
31 | 	)
32 | 	or
33 | 	(
34 | 		p_partkey = l_partkey
35 | 		and p_brand = ':3'
36 | 		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
37 | 		and l_quantity >= :6 and l_quantity <= :6 + 10
38 | 		and p_size between 1 and 15
39 | 		and l_shipmode in ('AIR', 'AIR REG')
40 | 		and l_shipinstruct = 'DELIVER IN PERSON'
41 | 	);
42 | :n -1
43 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/2.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Minimum Cost Supplier Query (Q2)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	s_acctbal,
 9 | 	s_name,
10 | 	n_name,
11 | 	p_partkey,
12 | 	p_mfgr,
13 | 	s_address,
14 | 	s_phone,
15 | 	s_comment
16 | from
17 | 	part,
18 | 	supplier,
19 | 	partsupp,
20 | 	nation,
21 | 	region
22 | where
23 | 	p_partkey = ps_partkey
24 | 	and s_suppkey = ps_suppkey
25 | 	and p_size = :1
26 | 	and p_type like '%:2'
27 | 	and s_nationkey = n_nationkey
28 | 	and n_regionkey = r_regionkey
29 | 	and r_name = ':3'
30 | 	and ps_supplycost = (
31 | 		select
32 | 			min(ps_supplycost)
33 | 		from
34 | 			partsupp,
35 | 			supplier,
36 | 			nation,
37 | 			region
38 | 		where
39 | 			p_partkey = ps_partkey
40 | 			and s_suppkey = ps_suppkey
41 | 			and s_nationkey = n_nationkey
42 | 			and n_regionkey = r_regionkey
43 | 			and r_name = ':3'
44 | 	)
45 | order by
46 | 	s_acctbal desc,
47 | 	n_name,
48 | 	s_name,
49 | 	p_partkey;
50 | :n 100
51 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/20.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Potential Part Promotion Query (Q20)
 3 | -- Function Query Definition
 4 | -- Approved February 1998
 5 | --
 6 | -- BemiDB: 1h 23m with unindexed PostgreSQL
 7 | :x
 8 | :o
 9 | select
10 | 	s_name,
11 | 	s_address
12 | from
13 | 	supplier,
14 | 	nation
15 | where
16 | 	s_suppkey in (
17 | 		select
18 | 			ps_suppkey
19 | 		from
20 | 			partsupp
21 | 		where
22 | 			ps_partkey in (
23 | 				select
24 | 					p_partkey
25 | 				from
26 | 					part
27 | 				where
28 | 					p_name like ':1%'
29 | 			)
30 | 			and ps_availqty > (
31 | 				select
32 | 					0.5 * sum(l_quantity)
33 | 				from
34 | 					lineitem
35 | 				where
36 | 					l_partkey = ps_partkey
37 | 					and l_suppkey = ps_suppkey
38 | 					and l_shipdate >= date ':2'
39 | 					and l_shipdate < date ':2' + interval '1 year'
40 | 			)
41 | 	)
42 | 	and s_nationkey = n_nationkey
43 | 	and n_name = ':3'
44 | order by
45 | 	s_name;
46 | :n -1
47 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/21.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	s_name,
 9 | 	count(*) as numwait
10 | from
11 | 	supplier,
12 | 	lineitem l1,
13 | 	orders,
14 | 	nation
15 | where
16 | 	s_suppkey = l1.l_suppkey
17 | 	and o_orderkey = l1.l_orderkey
18 | 	and o_orderstatus = 'F'
19 | 	and l1.l_receiptdate > l1.l_commitdate
20 | 	and exists (
21 | 		select
22 | 			*
23 | 		from
24 | 			lineitem l2
25 | 		where
26 | 			l2.l_orderkey = l1.l_orderkey
27 | 			and l2.l_suppkey <> l1.l_suppkey
28 | 	)
29 | 	and not exists (
30 | 		select
31 | 			*
32 | 		from
33 | 			lineitem l3
34 | 		where
35 | 			l3.l_orderkey = l1.l_orderkey
36 | 			and l3.l_suppkey <> l1.l_suppkey
37 | 			and l3.l_receiptdate > l3.l_commitdate
38 | 	)
39 | 	and s_nationkey = n_nationkey
40 | 	and n_name = ':1'
41 | group by
42 | 	s_name
43 | order by
44 | 	numwait desc,
45 | 	s_name;
46 | :n 100
47 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/22.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Global Sales Opportunity Query (Q22)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	cntrycode,
 9 | 	count(*) as numcust,
10 | 	sum(c_acctbal) as totacctbal
11 | from
12 | 	(
13 | 		select
14 | 			substring(c_phone from 1 for 2) as cntrycode,
15 | 			c_acctbal
16 | 		from
17 | 			customer
18 | 		where
19 | 			substring(c_phone from 1 for 2) in
20 | 				(':1', ':2', ':3', ':4', ':5', ':6', ':7')
21 | 			and c_acctbal > (
22 | 				select
23 | 					avg(c_acctbal)
24 | 				from
25 | 					customer
26 | 				where
27 | 					c_acctbal > 0.00
28 | 					and substring(c_phone from 1 for 2) in
29 | 						(':1', ':2', ':3', ':4', ':5', ':6', ':7')
30 | 			)
31 | 			and not exists (
32 | 				select
33 | 					*
34 | 				from
35 | 					orders
36 | 				where
37 | 					o_custkey = c_custkey
38 | 			)
39 | 	) as custsale
40 | group by
41 | 	cntrycode
42 | order by
43 | 	cntrycode;
44 | :n -1
45 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/3.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Shipping Priority Query (Q3)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	l_orderkey,
 9 | 	sum(l_extendedprice * (1 - l_discount)) as revenue,
10 | 	o_orderdate,
11 | 	o_shippriority
12 | from
13 | 	customer,
14 | 	orders,
15 | 	lineitem
16 | where
17 | 	c_mktsegment = ':1'
18 | 	and c_custkey = o_custkey
19 | 	and l_orderkey = o_orderkey
20 | 	and o_orderdate < date ':2'
21 | 	and l_shipdate > date ':2'
22 | group by
23 | 	l_orderkey,
24 | 	o_orderdate,
25 | 	o_shippriority
26 | order by
27 | 	revenue desc,
28 | 	o_orderdate;
29 | :n 10
30 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/4.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Order Priority Checking Query (Q4)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	o_orderpriority,
 9 | 	count(*) as order_count
10 | from
11 | 	orders
12 | where
13 | 	o_orderdate >= date ':1'
14 | 	and o_orderdate < date ':1' + interval '3 months'
15 | 	and exists (
16 | 		select
17 | 			*
18 | 		from
19 | 			lineitem
20 | 		where
21 | 			l_orderkey = o_orderkey
22 | 			and l_commitdate < l_receiptdate
23 | 	)
24 | group by
25 | 	o_orderpriority
26 | order by
27 | 	o_orderpriority;
28 | :n -1
29 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/5.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Local Supplier Volume Query (Q5)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	n_name,
 9 | 	sum(l_extendedprice * (1 - l_discount)) as revenue
10 | from
11 | 	customer,
12 | 	orders,
13 | 	lineitem,
14 | 	supplier,
15 | 	nation,
16 | 	region
17 | where
18 | 	c_custkey = o_custkey
19 | 	and l_orderkey = o_orderkey
20 | 	and l_suppkey = s_suppkey
21 | 	and c_nationkey = s_nationkey
22 | 	and s_nationkey = n_nationkey
23 | 	and n_regionkey = r_regionkey
24 | 	and r_name = ':1'
25 | 	and o_orderdate >= date ':2'
26 | 	and o_orderdate < date ':2' + interval '1 year'
27 | group by
28 | 	n_name
29 | order by
30 | 	revenue desc;
31 | :n -1
32 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/6.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Forecasting Revenue Change Query (Q6)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	sum(l_extendedprice * l_discount) as revenue
 9 | from
10 | 	lineitem
11 | where
12 | 	l_shipdate >= date ':1'
13 | 	and l_shipdate < date ':1' + interval '1 year'
14 | 	and l_discount between :2 - 0.01 and :2 + 0.01
15 | 	and l_quantity < :3;
16 | :n -1
17 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/7.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Volume Shipping Query (Q7)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	supp_nation,
 9 | 	cust_nation,
10 | 	l_year,
11 | 	sum(volume) as revenue
12 | from
13 | 	(
14 | 		select
15 | 			n1.n_name as supp_nation,
16 | 			n2.n_name as cust_nation,
17 | 			extract(year from l_shipdate) as l_year,
18 | 			l_extendedprice * (1 - l_discount) as volume
19 | 		from
20 | 			supplier,
21 | 			lineitem,
22 | 			orders,
23 | 			customer,
24 | 			nation n1,
25 | 			nation n2
26 | 		where
27 | 			s_suppkey = l_suppkey
28 | 			and o_orderkey = l_orderkey
29 | 			and c_custkey = o_custkey
30 | 			and s_nationkey = n1.n_nationkey
31 | 			and c_nationkey = n2.n_nationkey
32 | 			and (
33 | 				(n1.n_name = ':1' and n2.n_name = ':2')
34 | 				or (n1.n_name = ':2' and n2.n_name = ':1')
35 | 			)
36 | 			and l_shipdate between date '1995-01-01' and date '1996-12-31'
37 | 	) as shipping
38 | group by
39 | 	supp_nation,
40 | 	cust_nation,
41 | 	l_year
42 | order by
43 | 	supp_nation,
44 | 	cust_nation,
45 | 	l_year;
46 | :n -1
47 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/8.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R National Market Share Query (Q8)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	o_year,
 9 | 	sum(case
10 | 		when nation = ':1' then volume
11 | 		else 0
12 | 	end) / sum(volume) as mkt_share
13 | from
14 | 	(
15 | 		select
16 | 			extract(year from o_orderdate) as o_year,
17 | 			l_extendedprice * (1 - l_discount) as volume,
18 | 			n2.n_name as nation
19 | 		from
20 | 			part,
21 | 			supplier,
22 | 			lineitem,
23 | 			orders,
24 | 			customer,
25 | 			nation n1,
26 | 			nation n2,
27 | 			region
28 | 		where
29 | 			p_partkey = l_partkey
30 | 			and s_suppkey = l_suppkey
31 | 			and l_orderkey = o_orderkey
32 | 			and o_custkey = c_custkey
33 | 			and c_nationkey = n1.n_nationkey
34 | 			and n1.n_regionkey = r_regionkey
35 | 			and r_name = ':2'
36 | 			and s_nationkey = n2.n_nationkey
37 | 			and o_orderdate between date '1995-01-01' and date '1996-12-31'
38 | 			and p_type = ':3'
39 | 	) as all_nations
40 | group by
41 | 	o_year
42 | order by
43 | 	o_year;
44 | :n -1
45 | 


--------------------------------------------------------------------------------
/benchmark/query-templates/9.sql:
--------------------------------------------------------------------------------
 1 | -- $ID$
 2 | -- TPC-H/TPC-R Product Type Profit Measure Query (Q9)
 3 | -- Functional Query Definition
 4 | -- Approved February 1998
 5 | :x
 6 | :o
 7 | select
 8 | 	nation,
 9 | 	o_year,
10 | 	sum(amount) as sum_profit
11 | from
12 | 	(
13 | 		select
14 | 			n_name as nation,
15 | 			extract(year from o_orderdate) as o_year,
16 | 			l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
17 | 		from
18 | 			part,
19 | 			supplier,
20 | 			lineitem,
21 | 			partsupp,
22 | 			orders,
23 | 			nation
24 | 		where
25 | 			s_suppkey = l_suppkey
26 | 			and ps_suppkey = l_suppkey
27 | 			and ps_partkey = l_partkey
28 | 			and p_partkey = l_partkey
29 | 			and o_orderkey = l_orderkey
30 | 			and s_nationkey = n_nationkey
31 | 			and p_name like '%:1%'
32 | 	) as profit
33 | group by
34 | 	nation,
35 | 	o_year
36 | order by
37 | 	nation,
38 | 	o_year desc;
39 | :n -1
40 | 


--------------------------------------------------------------------------------
/benchmark/scripts/generate-data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd benchmark
 4 | 
 5 | # Structure
 6 | cp ./tpch-kit/dbgen/dss.ddl ./data/create-tables.ddl
 7 | 
 8 | # Data
 9 | cd ./tpch-kit/dbgen
10 | export DSS_PATH=../../data
11 | export DSS_CONFIG=./
12 | ./dbgen -vf -s $SCALE_FACTOR # 1 = 1GB
13 | 
14 | # Queries
15 | cd -
16 | rm -rf /tmp/query-templates
17 | mkdir /tmp/query-templates
18 | for i in `ls query-templates/*.sql`; do
19 |   tac $i | sed '2s/;//' | tac > /tmp/$i # Remove ";"
20 | done
21 | cd ./tpch-kit/dbgen
22 | export DSS_QUERY=/tmp/query-templates
23 | ./qgen -v -s 0.1 | sed 's/limit -1//' > ../../queries.sql
24 | 


--------------------------------------------------------------------------------
/benchmark/scripts/load-pg-data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | curl -L -o ./tpch.zip https://github.com/BemiHQ/BemiDB/releases/download/v0.1.0/TPC-H_generated_data_s0.1.zip
 4 | unzip ./tpch.zip -d ./benchmark/data
 5 | rm ./tpch.zip
 6 | 
 7 | cd ./benchmark/data
 8 | 
 9 | mv ./TPC-H_generated_data/* ./
10 | rm -rf ./TPC-H_generated_data
11 | rm -rf ./__MACOSX
12 | 
13 | psql postgres://127.0.0.1:5432/tpch -f ./create-tables.ddl
14 | 
15 | for i in `ls *.tbl`; do
16 |   table=${i/.tbl/}
17 |   echo "Loading $table..."
18 |   sed 's/|$//' $i > /tmp/$i
19 |   psql postgres://127.0.0.1:5432/tpch -q -c "TRUNCATE $table"
20 |   psql postgres://127.0.0.1:5432/tpch -c "\\copy $table FROM '/tmp/$i' CSV DELIMITER '|'"
21 | done
22 | 


--------------------------------------------------------------------------------
/benchmark/scripts/measure-memory.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PID=$(ps | grep "/exe/BemiDB sync" | grep -v grep | awk '{print $1}')
 4 | 
 5 | if ! ps -p "$PID" > /dev/null 2>&1; then
 6 |   echo "Error: Process with PID $PID does not exist"
 7 |   exit 1
 8 | fi
 9 | 
10 | echo "PID    Time                 Mem"
11 | 
12 | max_memory=0
13 | 
14 | while true; do
15 |   current_time=$(date "+%Y-%m-%d %H:%M:%S")
16 |   current_memory=$(top -pid $PID -stats mem -l 1 | tail -n 1 | sed 's/[^0-9]*//g')
17 | 
18 |   if [ "$current_memory" -gt "$max_memory" ]; then
19 |     max_memory=$current_memory
20 |     printf "%s  %s  %sMB (%sMB new max)\n" "$PID" "$current_time" "$current_memory" "$max_memory"
21 |   else
22 |     printf "%s  %s  %sMB\n" "$PID" "$current_time" "$current_memory"
23 |   fi
24 | 
25 |   sleep 1
26 | done
27 | 


--------------------------------------------------------------------------------
/build/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BemiHQ/BemiDB/989c88ec373493fce3e76694a07a3cae0c429323/build/.gitkeep


--------------------------------------------------------------------------------
/devbox.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.13.1/.schema/devbox.schema.json",
 3 |   "packages": [
 4 |     "go@latest",
 5 |     "postgresql@latest",
 6 |     "gcc@latest"
 7 |   ],
 8 |   "shell": {
 9 |     "init_hook": [],
10 |     "scripts": {
11 |       "test": [
12 |         "echo \"Error: no test specified\" && exit 1"
13 |       ]
14 |     }
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/devbox.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |   "lockfile_version": "1",
  3 |   "packages": {
  4 |     "gcc@latest": {
  5 |       "last_modified": "2025-02-07T11:26:36Z",
  6 |       "resolved": "github:NixOS/nixpkgs/d98abf5cf5914e5e4e9d57205e3af55ca90ffc1d#gcc",
  7 |       "source": "devbox-search",
  8 |       "version": "14-20241116",
  9 |       "systems": {
 10 |         "aarch64-darwin": {
 11 |           "outputs": [
 12 |             {
 13 |               "name": "out",
 14 |               "path": "/nix/store/3k1dfk03xkmaf6cksgpk492k3m8brvmp-gcc-wrapper-14-20241116",
 15 |               "default": true
 16 |             },
 17 |             {
 18 |               "name": "man",
 19 |               "path": "/nix/store/cf67mihrjf3a1w4sw8jkgw49kfi54wpq-gcc-wrapper-14-20241116-man",
 20 |               "default": true
 21 |             },
 22 |             {
 23 |               "name": "info",
 24 |               "path": "/nix/store/jgsywypzhdim8s6x25cnr31ygm28lhin-gcc-wrapper-14-20241116-info"
 25 |             }
 26 |           ],
 27 |           "store_path": "/nix/store/3k1dfk03xkmaf6cksgpk492k3m8brvmp-gcc-wrapper-14-20241116"
 28 |         },
 29 |         "aarch64-linux": {
 30 |           "outputs": [
 31 |             {
 32 |               "name": "out",
 33 |               "path": "/nix/store/pavcqvq7ycdbpal1mfjsscvgngzsg9sp-gcc-wrapper-14-20241116",
 34 |               "default": true
 35 |             },
 36 |             {
 37 |               "name": "man",
 38 |               "path": "/nix/store/srwbca27wpylwzaqz9ssbhlkx910ryv1-gcc-wrapper-14-20241116-man",
 39 |               "default": true
 40 |             },
 41 |             {
 42 |               "name": "info",
 43 |               "path": "/nix/store/yrdz1bra65cc9i2n2ghsz98g4fx2jra3-gcc-wrapper-14-20241116-info"
 44 |             }
 45 |           ],
 46 |           "store_path": "/nix/store/pavcqvq7ycdbpal1mfjsscvgngzsg9sp-gcc-wrapper-14-20241116"
 47 |         },
 48 |         "x86_64-darwin": {
 49 |           "outputs": [
 50 |             {
 51 |               "name": "out",
 52 |               "path": "/nix/store/g2462k2svl4zn5q7yypqirp6xxq0s9aq-gcc-wrapper-14-20241116",
 53 |               "default": true
 54 |             },
 55 |             {
 56 |               "name": "man",
 57 |               "path": "/nix/store/s0vndfpg8gsz40wmf3v11csk3n497kqm-gcc-wrapper-14-20241116-man",
 58 |               "default": true
 59 |             },
 60 |             {
 61 |               "name": "info",
 62 |               "path": "/nix/store/3x75ibh7rj95vm09q38iqj8l5jwbrarl-gcc-wrapper-14-20241116-info"
 63 |             }
 64 |           ],
 65 |           "store_path": "/nix/store/g2462k2svl4zn5q7yypqirp6xxq0s9aq-gcc-wrapper-14-20241116"
 66 |         },
 67 |         "x86_64-linux": {
 68 |           "outputs": [
 69 |             {
 70 |               "name": "out",
 71 |               "path": "/nix/store/4ijy8jbsiqmj37avrk83gn2m903486mr-gcc-wrapper-14-20241116",
 72 |               "default": true
 73 |             },
 74 |             {
 75 |               "name": "man",
 76 |               "path": "/nix/store/vyax7fpbw688qwx32c1i8n1f5jbjkcii-gcc-wrapper-14-20241116-man",
 77 |               "default": true
 78 |             },
 79 |             {
 80 |               "name": "info",
 81 |               "path": "/nix/store/m4641rrm7dw80bn23ab0812pm6aj6402-gcc-wrapper-14-20241116-info"
 82 |             }
 83 |           ],
 84 |           "store_path": "/nix/store/4ijy8jbsiqmj37avrk83gn2m903486mr-gcc-wrapper-14-20241116"
 85 |         }
 86 |       }
 87 |     },
 88 |     "github:NixOS/nixpkgs/nixpkgs-unstable": {
 89 |       "resolved": "github:NixOS/nixpkgs/d9b69c3ec2a2e2e971c534065bdd53374bd68b97?lastModified=1740396192&narHash=sha256-ATMHHrg3sG1KgpQA5x8I%2BzcYpp5Sf17FaFj%2FfN%2B8OoQ%3D"
 90 |     },
 91 |     "go@latest": {
 92 |       "last_modified": "2025-05-16T20:19:48Z",
 93 |       "resolved": "github:NixOS/nixpkgs/12a55407652e04dcf2309436eb06fef0d3713ef3#go",
 94 |       "source": "devbox-search",
 95 |       "version": "1.24.3",
 96 |       "systems": {
 97 |         "aarch64-darwin": {
 98 |           "outputs": [
 99 |             {
100 |               "name": "out",
101 |               "path": "/nix/store/ps3admpzmc1ryvn9q7sw5xfd94dkrb3f-go-1.24.3",
102 |               "default": true
103 |             }
104 |           ],
105 |           "store_path": "/nix/store/ps3admpzmc1ryvn9q7sw5xfd94dkrb3f-go-1.24.3"
106 |         },
107 |         "aarch64-linux": {
108 |           "outputs": [
109 |             {
110 |               "name": "out",
111 |               "path": "/nix/store/45bnqhyyq40p91k3cjw0farx3hn1swx6-go-1.24.3",
112 |               "default": true
113 |             }
114 |           ],
115 |           "store_path": "/nix/store/45bnqhyyq40p91k3cjw0farx3hn1swx6-go-1.24.3"
116 |         },
117 |         "x86_64-darwin": {
118 |           "outputs": [
119 |             {
120 |               "name": "out",
121 |               "path": "/nix/store/9z2kb6hxij7pqi0fgcn9ijhpb7ajpazs-go-1.24.3",
122 |               "default": true
123 |             }
124 |           ],
125 |           "store_path": "/nix/store/9z2kb6hxij7pqi0fgcn9ijhpb7ajpazs-go-1.24.3"
126 |         },
127 |         "x86_64-linux": {
128 |           "outputs": [
129 |             {
130 |               "name": "out",
131 |               "path": "/nix/store/5xvi25nqmbrg58aixp4zgczilfnp7pwg-go-1.24.3",
132 |               "default": true
133 |             }
134 |           ],
135 |           "store_path": "/nix/store/5xvi25nqmbrg58aixp4zgczilfnp7pwg-go-1.24.3"
136 |         }
137 |       }
138 |     },
139 |     "postgresql@latest": {
140 |       "last_modified": "2025-03-25T17:32:05Z",
141 |       "plugin_version": "0.0.2",
142 |       "resolved": "github:NixOS/nixpkgs/25d1b84f5c90632a623c48d83a2faf156451e6b1#postgresql",
143 |       "source": "devbox-search",
144 |       "version": "17.4",
145 |       "systems": {
146 |         "aarch64-darwin": {
147 |           "outputs": [
148 |             {
149 |               "name": "out",
150 |               "path": "/nix/store/prh52g9iwjdddxbv4n0b52gbnlxnnk6w-postgresql-17.4",
151 |               "default": true
152 |             },
153 |             {
154 |               "name": "man",
155 |               "path": "/nix/store/il144892arv36x68b5y95bkvrq32ym91-postgresql-17.4-man",
156 |               "default": true
157 |             },
158 |             {
159 |               "name": "dev",
160 |               "path": "/nix/store/7gf8hy13r7li2balcini7004aml54l5n-postgresql-17.4-dev"
161 |             },
162 |             {
163 |               "name": "doc",
164 |               "path": "/nix/store/6x00505hxzfwjfpk15v6p4qqnbpk5dza-postgresql-17.4-doc"
165 |             },
166 |             {
167 |               "name": "lib",
168 |               "path": "/nix/store/c9g6v34cjsf308m9xzcs7figc1vgbbw3-postgresql-17.4-lib"
169 |             }
170 |           ],
171 |           "store_path": "/nix/store/prh52g9iwjdddxbv4n0b52gbnlxnnk6w-postgresql-17.4"
172 |         },
173 |         "aarch64-linux": {
174 |           "outputs": [
175 |             {
176 |               "name": "out",
177 |               "path": "/nix/store/1lgjdy1nm8l68y2jw6m1lhas4j5jcmk1-postgresql-17.4",
178 |               "default": true
179 |             },
180 |             {
181 |               "name": "man",
182 |               "path": "/nix/store/1v352rrzfv5p105jfaizxhd29nk41hgp-postgresql-17.4-man",
183 |               "default": true
184 |             },
185 |             {
186 |               "name": "debug",
187 |               "path": "/nix/store/5bywayb6ywgznzh9cck9wpya1bzg4v0a-postgresql-17.4-debug"
188 |             },
189 |             {
190 |               "name": "dev",
191 |               "path": "/nix/store/zs35b02p7cay6jp7zr1xihwx8vzab17c-postgresql-17.4-dev"
192 |             },
193 |             {
194 |               "name": "doc",
195 |               "path": "/nix/store/1qccl3dm5wcja6h2kjkhvs5r9l1bx4hz-postgresql-17.4-doc"
196 |             },
197 |             {
198 |               "name": "lib",
199 |               "path": "/nix/store/kyh4l6wsjgwghvjw9810p7nn1ap106mj-postgresql-17.4-lib"
200 |             }
201 |           ],
202 |           "store_path": "/nix/store/1lgjdy1nm8l68y2jw6m1lhas4j5jcmk1-postgresql-17.4"
203 |         },
204 |         "x86_64-darwin": {
205 |           "outputs": [
206 |             {
207 |               "name": "out",
208 |               "path": "/nix/store/0mzgv54qxafr66f4d7prz42fhs833mhk-postgresql-17.4",
209 |               "default": true
210 |             },
211 |             {
212 |               "name": "man",
213 |               "path": "/nix/store/akb98lb29c1x3mflzcwqy4a0gqfk331r-postgresql-17.4-man",
214 |               "default": true
215 |             },
216 |             {
217 |               "name": "dev",
218 |               "path": "/nix/store/gpkbg9yhx7jji2hr3jp89q06hi6v7qrk-postgresql-17.4-dev"
219 |             },
220 |             {
221 |               "name": "doc",
222 |               "path": "/nix/store/9hxw6pf1qnlz1ygx5ximyvc48swb54n0-postgresql-17.4-doc"
223 |             },
224 |             {
225 |               "name": "lib",
226 |               "path": "/nix/store/kyml5v1q498ympq67jvcnhgmsn8384zk-postgresql-17.4-lib"
227 |             }
228 |           ],
229 |           "store_path": "/nix/store/0mzgv54qxafr66f4d7prz42fhs833mhk-postgresql-17.4"
230 |         },
231 |         "x86_64-linux": {
232 |           "outputs": [
233 |             {
234 |               "name": "out",
235 |               "path": "/nix/store/snfxmriwav4i0k1fxp78xk5w12hbv4q9-postgresql-17.4",
236 |               "default": true
237 |             },
238 |             {
239 |               "name": "man",
240 |               "path": "/nix/store/pcx190vq4awjcgpmj2flrbp9awhdc74q-postgresql-17.4-man",
241 |               "default": true
242 |             },
243 |             {
244 |               "name": "lib",
245 |               "path": "/nix/store/yja4rgfrwyxckwqf10rbr4armbn0p2y5-postgresql-17.4-lib"
246 |             },
247 |             {
248 |               "name": "debug",
249 |               "path": "/nix/store/zrlrz84kzfvnxcx5mis53scr205p29hx-postgresql-17.4-debug"
250 |             },
251 |             {
252 |               "name": "dev",
253 |               "path": "/nix/store/piqzr58swwmbsngl3jp98xgrf17a960n-postgresql-17.4-dev"
254 |             },
255 |             {
256 |               "name": "doc",
257 |               "path": "/nix/store/ilc5sycwvqjjfa33978nb1p14x358l1c-postgresql-17.4-doc"
258 |             }
259 |           ],
260 |           "store_path": "/nix/store/snfxmriwav4i0k1fxp78xk5w12hbv4q9-postgresql-17.4"
261 |         }
262 |       }
263 |     }
264 |   }
265 | }
266 | 


--------------------------------------------------------------------------------
/img/BemiDB.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BemiHQ/BemiDB/989c88ec373493fce3e76694a07a3cae0c429323/img/BemiDB.gif


--------------------------------------------------------------------------------
/img/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BemiHQ/BemiDB/989c88ec373493fce3e76694a07a3cae0c429323/img/architecture.png


--------------------------------------------------------------------------------
/img/tpc-h_database_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BemiHQ/BemiDB/989c88ec373493fce3e76694a07a3cae0c429323/img/tpc-h_database_structure.png


--------------------------------------------------------------------------------
/scripts/build-darwin.sh:
--------------------------------------------------------------------------------
 1 | cd src
 2 | go build -o ../build/bemidb-darwin-arm64
 3 | 
 4 | create_dir_if_needed() {
 5 |     local dir=$1
 6 |     if [ ! -d "$dir" ]; then
 7 |         echo "Creating directory: $dir"
 8 |         sudo mkdir -p "$dir"
 9 |         sudo chmod 755 "$dir"
10 |     fi
11 | }
12 | 
13 | create_dir_if_needed "/usr/local/lib"
14 | cd ../build
15 | LIBCPP_OLD_PATH=$(otool -L ./bemidb-darwin-arm64 | grep -o '/.*/libc++\.1\.0\.dylib')
16 | if [ -z "$LIBCPP_OLD_PATH" ]; then
17 |     echo "Error: Could not find libc++ dependency in binary"
18 |     exit 1
19 | fi
20 | LIBCPP_NEW_PATH=/usr/local/lib/libc++.1.0.dylib
21 | sudo cp $LIBCPP_OLD_PATH $LIBCPP_NEW_PATH
22 | 
23 | LIBCPPABI_OLD_PATH=$(otool -L $LIBCPP_NEW_PATH | grep -o '/.*/libc++abi\.1\.dylib')
24 | if [ -z "$LIBCPPABI_OLD_PATH" ]; then
25 |     echo "Error: Could not find libc++abi dependency"
26 |     exit 1
27 | fi
28 | LIBCPPABI_NEW_PATH=/usr/local/lib/libc++abi.1.dylib
29 | sudo cp $LIBCPPABI_OLD_PATH $LIBCPPABI_NEW_PATH
30 | 
31 | sudo install_name_tool -change $LIBCPPABI_OLD_PATH $LIBCPPABI_NEW_PATH $LIBCPP_NEW_PATH
32 | sudo install_name_tool -change $LIBCPP_OLD_PATH $LIBCPP_NEW_PATH ./bemidb-darwin-arm64
33 | 
34 | sudo cp $LIBCPP_NEW_PATH ./libc++.1.0.dylib
35 | sudo cp $LIBCPPABI_NEW_PATH ./libc++abi.1.dylib
36 | otool -L ./bemidb-darwin-arm64
37 | 


--------------------------------------------------------------------------------
/scripts/build-linux.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | platforms=("linux/amd64" "linux/arm64")
 4 | 
 5 | version=$(grep -E 'VERSION = "[^"]+"' src/config.go | sed -E 's/.*VERSION = "([^"]+)".*/\1/')
 6 | if [ -z "$version" ]; then
 7 |   echo "Error: Could not extract version from config.go"
 8 |   exit 1
 9 | fi
10 | 
11 | for platform in "${platforms[@]}"
12 | do
13 |   os="${platform%/*}"
14 |   arch="${platform#*/}"
15 |   tag="ghcr.io/bemihq/bemidb:$version-$arch"
16 | 
17 |   echo "Building bemidb version $version for $os/$arch"
18 | 
19 |   docker buildx build \
20 |     --build-arg PLATFORM=$platform \
21 |     --build-arg GOOS=$os \
22 |     --build-arg GOARCH="$arch" \
23 |     -t $tag .
24 | 
25 |   docker create --name temp-container $tag
26 |   docker cp temp-container:/app/bemidb ./build/bemidb-$os-$arch
27 |   docker rm temp-container
28 | done
29 | 


--------------------------------------------------------------------------------
/scripts/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Detect OS and architecture
 4 | OS=$(uname -s | tr '[:upper:]' '[:lower:]')
 5 | ARCH=$(uname -m)
 6 | 
 7 | # Map architecture to Go naming convention
 8 | case $ARCH in
 9 |   x86_64|amd64)
10 |     ARCH="amd64"
11 |     ;;
12 |   aarch64|arm64)
13 |     ARCH="arm64"
14 |     ;;
15 |   *)
16 |     echo "Unsupported architecture: $ARCH"
17 |     exit 1
18 |     ;;
19 | esac
20 | 
21 | # Set the download URL and binary name
22 | BINARY_NAME="bemidb-${OS}-${ARCH}"
23 | DOWNLOAD_URL="https://github.com/BemiHQ/BemiDB/releases/latest/download/$BINARY_NAME"
24 | 
25 | # Download the binary
26 | echo "Downloading $DOWNLOAD_URL..."
27 | curl -L "$DOWNLOAD_URL" -o ./bemidb
28 | 
29 | if [ "$ARCH" = "arm64" ] && [ "$OS" = "darwin" ]; then
30 |   # Ensure /usr/local/lib exists
31 |   if [ ! -d "/usr/local/lib" ]; then
32 |     sudo mkdir -p /usr/local/lib
33 |   fi
34 | 
35 |   # Download the libc++ dynamic libraries for macOS (can't be statically linked)
36 |   curl -sL "https://github.com/BemiHQ/BemiDB/releases/latest/download/libc++.1.0.dylib" -o ./libc++.1.0.dylib
37 |   sudo mv ./libc++.1.0.dylib /usr/local/lib/libc++.1.0.dylib
38 |   curl -sL "https://github.com/BemiHQ/BemiDB/releases/latest/download/libc++abi.1.dylib" -o ./libc++abi.1.dylib
39 |   sudo mv ./libc++abi.1.dylib /usr/local/lib/libc++abi.1.dylib
40 | fi
41 | 
42 | # Make the binary executable
43 | chmod +x ./bemidb
44 | 


--------------------------------------------------------------------------------
/scripts/publish-docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | version=$(grep -E 'VERSION = "[^"]+"' src/config.go | sed -E 's/.*VERSION = "([^"]+)".*/\1/')
 4 | if [ -z "$version" ]; then
 5 |   echo "Error: Could not extract version from config.go"
 6 |   exit 1
 7 | fi
 8 | 
 9 | echo "Pushing bemidb version $version to ghcr.io"
10 | 
11 | docker push ghcr.io/bemihq/bemidb:$version-amd64
12 | docker push ghcr.io/bemihq/bemidb:$version-arm64
13 | 
14 | docker manifest inspect ghcr.io/bemihq/bemidb:$version &> /dev/null && docker manifest rm ghcr.io/bemihq/bemidb:$version
15 | docker manifest create ghcr.io/bemihq/bemidb:$version ghcr.io/bemihq/bemidb:$version-amd64 ghcr.io/bemihq/bemidb:$version-arm64
16 | docker manifest annotate ghcr.io/bemihq/bemidb:$version ghcr.io/bemihq/bemidb:$version-amd64 --arch amd64
17 | docker manifest annotate ghcr.io/bemihq/bemidb:$version ghcr.io/bemihq/bemidb:$version-arm64 --arch arm64
18 | docker manifest push ghcr.io/bemihq/bemidb:$version
19 | 
20 | docker manifest inspect ghcr.io/bemihq/bemidb:latest &> /dev/null && docker manifest rm ghcr.io/bemihq/bemidb:latest
21 | docker manifest create ghcr.io/bemihq/bemidb:latest ghcr.io/bemihq/bemidb:$version-amd64 ghcr.io/bemihq/bemidb:$version-arm64
22 | docker manifest annotate ghcr.io/bemihq/bemidb:latest ghcr.io/bemihq/bemidb:$version-amd64 --arch amd64
23 | docker manifest annotate ghcr.io/bemihq/bemidb:latest ghcr.io/bemihq/bemidb:$version-arm64 --arch arm64
24 | docker manifest push ghcr.io/bemihq/bemidb:latest
25 | 
26 | echo
27 | echo "See https://github.com/orgs/BemiHQ/packages/container/package/bemidb"
28 | 


--------------------------------------------------------------------------------
/scripts/test-data-types.sql:
--------------------------------------------------------------------------------
  1 | -- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-data-types.sql
  2 | 
  3 | DROP TABLE IF EXISTS test_table;
  4 | DROP TYPE IF EXISTS address;
  5 | 
  6 | CREATE EXTENSION IF NOT EXISTS ltree;
  7 | 
  8 | CREATE TYPE address AS (
  9 |   city VARCHAR(50)
 10 | );
 11 | 
 12 | CREATE TABLE test_table (
 13 |   id SERIAL PRIMARY KEY,
 14 |   bit_column BIT,
 15 |   bool_column BOOLEAN,
 16 |   bpchar_column BPCHAR(10),
 17 |   varchar_column VARCHAR(255),
 18 |   text_column TEXT,
 19 |   int2_column INT2,
 20 |   int4_column INT4,
 21 |   int8_column INT8,
 22 |   hugeint_column NUMERIC(20, 0),
 23 |   xid_column XID,
 24 |   xid8_column XID8,
 25 |   float4_column FLOAT4,
 26 |   float8_column FLOAT8,
 27 |   numeric_column NUMERIC(40, 2),
 28 |   numeric_column_without_precision NUMERIC,
 29 |   date_column DATE,
 30 |   time_column TIME,
 31 |   timeMsColumn TIME(3),
 32 |   timetz_column TIMETZ,
 33 |   timetz_ms_column TIMETZ(3),
 34 |   timestamp_column TIMESTAMP,
 35 |   timestamp_ms_column TIMESTAMP(3),
 36 |   timestamptz_column TIMESTAMPTZ,
 37 |   timestamptz_ms_column TIMESTAMPTZ(3),
 38 |   uuid_column UUID,
 39 |   bytea_column BYTEA,
 40 |   interval_column INTERVAL,
 41 |   point_column POINT,
 42 |   inet_column INET,
 43 |   json_column JSON,
 44 |   jsonb_column JSONB,
 45 |   tsvector_column TSVECTOR,
 46 |   xml_column XML,
 47 |   pg_snapshot_column PG_SNAPSHOT,
 48 |   array_text_column TEXT[],
 49 |   array_int_column INT[],
 50 |   array_jsonb_column JSONB[],
 51 |   array_ltree_column LTREE[],
 52 |   user_defined_column address
 53 | );
 54 | 
 55 | INSERT INTO test_table (
 56 |   bit_column,
 57 |   bool_column,
 58 |   bpchar_column,
 59 |   varchar_column,
 60 |   text_column,
 61 |   int2_column,
 62 |   int4_column,
 63 |   int8_column,
 64 |   hugeint_column,
 65 |   xid_column,
 66 |   xid8_column,
 67 |   float4_column,
 68 |   float8_column,
 69 |   numeric_column,
 70 |   numeric_column_without_precision,
 71 |   date_column,
 72 |   time_column,
 73 |   timeMsColumn,
 74 |   timetz_column,
 75 |   timetz_ms_column,
 76 |   timestamp_column,
 77 |   timestamp_ms_column,
 78 |   timestamptz_column,
 79 |   timestamptz_ms_column,
 80 |   uuid_column,
 81 |   bytea_column,
 82 |   interval_column,
 83 |   point_column,
 84 |   inet_column,
 85 |   json_column,
 86 |   jsonb_column,
 87 |   tsvector_column,
 88 |   xml_column,
 89 |   pg_snapshot_column,
 90 |   array_text_column,
 91 |   array_int_column,
 92 |   array_jsonb_column,
 93 |   array_ltree_column,
 94 |   user_defined_column
 95 | ) VALUES (
 96 |   B'1',                                     -- bit_column
 97 |   TRUE,                                     -- bool_column
 98 |   'bpchar',                                 -- bpchar_column
 99 |   'varchar',                                -- varchar_column
100 |   'text',                                   -- text_column
101 |   32767::INT2,                              -- int2_column
102 |   2147483647::INT4,                         -- int4_column
103 |   9223372036854775807::INT8,                -- int8_column
104 |   10000000000000000000,                     -- hugeint_column
105 |   '4294967295'::XID,                        -- xid_column
106 |   '18446744073709551615'::XID8,             -- xid8_column
107 |   3.14::FLOAT4,                             -- float4_column
108 |   3.141592653589793::FLOAT8,                -- float8_column
109 |   12345.67::NUMERIC(10, 2),                 -- numeric_column
110 |   12345.67,                                 -- numeric_column_without_precision
111 |   '2024-01-01',                             -- date_column
112 |   '12:00:00.123456',                        -- time_column
113 |   '12:00:00.123',                           -- timeMsColumn
114 |   '12:00:00.123456-05',                     -- timetz_column
115 |   '12:00:00.123-05',                        -- timetz_ms_column
116 |   '2024-01-01 12:00:00.123456',             -- timestamp_column
117 |   '2024-01-01 12:00:00.123',                -- timestamp_ms_column
118 |   '2024-01-01 12:00:00.123456-05',          -- timestamptz_column
119 |   '2024-01-01 12:00:00.123-05',             -- timestamptz_ms_column
120 |   '58a7c845-af77-44b2-8664-7ca613d92f04',   -- uuid_column
121 |   decode('48656c6c6f', 'hex'),              -- bytea_column
122 |   '1 mon 2 days 01:00:01.000001'::INTERVAL, -- interval_column
123 |   '(1, 2)'::POINT,                          -- point_column
124 |   '192.168.0.1',                            -- inet_column
125 |   '{"key": "value"}'::JSON,                 -- json_column
126 |   '{"key": "value"}'::JSONB,                -- jsonb_column
127 |   to_tsvector('Sample text for tsvector'),  -- tsvector_column
128 |   '<root><child>text</child></root>',       -- xml_column
129 |   pg_current_snapshot(),                    -- pg_snapshot_column
130 |   '{"one", "two", "three"}',                -- array_text_column
131 |   '{1, 2, 3}',                              -- array_int_column
132 |   '{"{\"key\": \"value1\"}", "{\"key\": \"value2\"}"}'::JSONB[], -- array_jsonb_column
133 |   '{"a.b", "c.d"}'::LTREE[],                -- array_ltree_column
134 |   ROW('Toronto')                            -- user_defined_column
135 | ), (
136 |   NULL,                                     -- bit_column
137 |   FALSE,                                    -- bool_column
138 |   '',                                       -- bpchar_column
139 |   NULL,                                     -- varchar_column
140 |   '',                                       -- text_column
141 |   -32767::INT2,                             -- int2_column
142 |   NULL,                                     -- int4_column
143 |   -9223372036854775807::INT8,               -- int8_column
144 |   NULL,                                     -- hugeint_column
145 |   NULL,                                     -- xid_column
146 |   NULL,                                     -- xid8_column
147 |   'NaN',                                    -- float4_column
148 |   -3.141592653589793::FLOAT8,               -- float8_column
149 |   -12345.00::NUMERIC(10, 2),                -- numeric_column
150 |   NULL,                                     -- numeric_column_without_precision
151 |   '20025-11-12',                            -- date_column
152 |   '12:00:00.123',                           -- time_column
153 |   NULL,                                     -- timeMsColumn
154 |   '12:00:00.12300+05',                      -- timetz_column
155 |   '12:00:00.1+05',                          -- timetz_ms_column
156 |   '2024-01-01 12:00:00',                    -- timestamp_column
157 |   NULL,                                     -- timestamp_ms_column
158 |   '2024-01-01 12:00:00.000123+05',          -- timestamptz_column
159 |   '2024-01-01 12:00:00.12+05',              -- timestamptz_ms_column
160 |   NULL,                                     -- uuid_column
161 |   NULL,                                     -- bytea_column
162 |   NULL,                                     -- interval_column
163 |   NULL,                                     -- point_column
164 |   NULL,                                     -- inet_column
165 |   NULL,                                     -- json_column
166 |   '{}'::JSONB,                              -- jsonb_column
167 |   NULL,                                     -- tsvector_column
168 |   NULL,                                     -- xml_column
169 |   NULL,                                     -- pg_snapshot_column
170 |   NULL,                                     -- array_text_column
171 |   '{}',                                     -- array_int_column
172 |   NULL,                                     -- array_jsonb_column
173 |   NULL,                                     -- array_ltree_column
174 |   NULL                                      -- user_defined_column
175 | );
176 | 
177 | SELECT
178 |   table_schema,
179 |   table_name,
180 |   column_name,
181 |   data_type,
182 |   udt_name,
183 |   is_nullable,
184 |   character_maximum_length,
185 |   numeric_precision,
186 |   numeric_scale,
187 |   datetime_precision
188 | FROM information_schema.columns
189 | WHERE table_schema = 'public'
190 | ORDER BY table_schema, table_name, ordinal_position;
191 | 


--------------------------------------------------------------------------------
/scripts/test-partitioned-tables.sql:
--------------------------------------------------------------------------------
 1 | -- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-partitioned-tables.sql
 2 | 
 3 | DROP TABLE IF EXISTS test_table;
 4 | 
 5 | CREATE TABLE test_table (
 6 |     id SERIAL,
 7 |     created_at TIMESTAMP NOT NULL
 8 | ) PARTITION BY RANGE (created_at);
 9 | 
10 | CREATE TABLE test_table_q1 PARTITION OF test_table FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');
11 | CREATE TABLE test_table_q2 PARTITION OF test_table FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');
12 | CREATE TABLE test_table_q3 PARTITION OF test_table FOR VALUES FROM ('2024-07-01') TO ('2024-10-01');
13 | CREATE TABLE test_table_q4 PARTITION OF test_table FOR VALUES FROM ('2024-10-01') TO ('2025-01-01');
14 | 
15 | INSERT INTO test_table (created_at) VALUES
16 |   ('2024-02-15 10:00:00'),
17 |   ('2024-09-01 12:00:30'),
18 |   ('2024-10-12 08:00:00'),
19 |   ('2024-05-20 14:30:00');
20 | 


--------------------------------------------------------------------------------
/scripts/test-schemas.sql:
--------------------------------------------------------------------------------
 1 | -- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-schemas.sql
 2 | 
 3 | DROP SCHEMA IF EXISTS test_schema CASCADE;
 4 | 
 5 | CREATE SCHEMA test_schema;
 6 | 
 7 | CREATE TABLE test_schema.test_table (
 8 |   id SERIAL PRIMARY KEY
 9 | );
10 | 
11 | CREATE TABLE test_schema.simple_table (
12 |   id SERIAL PRIMARY KEY
13 | );
14 | 
15 | INSERT INTO test_schema.simple_table DEFAULT VALUES;
16 | 
17 | SELECT
18 |   table_schema,
19 |   table_name,
20 |   column_name,
21 |   data_type,
22 |   udt_name,
23 |   is_nullable,
24 |   character_maximum_length,
25 |   numeric_precision,
26 |   numeric_scale,
27 |   datetime_precision
28 | FROM information_schema.columns
29 | WHERE table_schema = 'test_schema'
30 | ORDER BY table_schema, table_name, ordinal_position;
31 | 


--------------------------------------------------------------------------------
/src/capped_buffer.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"io"
  6 | 	"sync"
  7 | )
  8 | 
  9 | type CappedBuffer struct {
 10 | 	config       *Config
 11 | 	maxSizeBytes int
 12 | 
 13 | 	buffer          []byte
 14 | 	mutex           sync.Mutex
 15 | 	conditionalSync *sync.Cond
 16 | 
 17 | 	closeOnceSync sync.Once
 18 | 	closed        bool
 19 | }
 20 | 
 21 | func NewCappedBuffer(maxSizeBytes int, config *Config) *CappedBuffer {
 22 | 	sizedBuffer := &CappedBuffer{
 23 | 		config:       config,
 24 | 		buffer:       make([]byte, 0, maxSizeBytes),
 25 | 		maxSizeBytes: maxSizeBytes,
 26 | 	}
 27 | 	sizedBuffer.conditionalSync = sync.NewCond(&sizedBuffer.mutex)
 28 | 	return sizedBuffer
 29 | }
 30 | 
 31 | // Implements io.Writer
 32 | func (buf *CappedBuffer) Write(payload []byte) (writtenBytes int, err error) {
 33 | 	if len(payload) == 0 {
 34 | 		return 0, nil
 35 | 	}
 36 | 
 37 | 	buf.mutex.Lock()
 38 | 	defer buf.mutex.Unlock()
 39 | 
 40 | 	if buf.closed {
 41 | 		return 0, errors.New("buffer is closed")
 42 | 	}
 43 | 
 44 | 	for len(buf.buffer)+len(payload) > buf.maxSizeBytes && !buf.closed {
 45 | 		LogTrace(buf.config, ">> Waiting for more space in capped buffer...")
 46 | 		buf.conditionalSync.Wait() // Wait for the reader
 47 | 	}
 48 | 
 49 | 	// Check again if buffer was closed while waiting
 50 | 	if buf.closed {
 51 | 		return 0, errors.New("buffer is closed")
 52 | 	}
 53 | 
 54 | 	writtenBytes = len(payload)
 55 | 	buf.buffer = append(buf.buffer, payload...)
 56 | 	LogTrace(buf.config, ">> Writing", writtenBytes, "bytes to capped buffer...")
 57 | 
 58 | 	buf.conditionalSync.Broadcast() // Notify the reader that new data is available
 59 | 
 60 | 	return writtenBytes, nil
 61 | }
 62 | 
 63 | // Implements io.Reader
 64 | func (buf *CappedBuffer) Read(payload []byte) (readBytes int, err error) {
 65 | 	if len(payload) == 0 {
 66 | 		return 0, nil
 67 | 	}
 68 | 
 69 | 	buf.mutex.Lock()
 70 | 	defer buf.mutex.Unlock()
 71 | 
 72 | 	for len(buf.buffer) == 0 && !buf.closed {
 73 | 		LogTrace(buf.config, "<< Waiting for more data in capped buffer...")
 74 | 		buf.conditionalSync.Wait() // Wait for the writer
 75 | 	}
 76 | 
 77 | 	if len(buf.buffer) == 0 && buf.closed {
 78 | 		return 0, io.EOF
 79 | 	}
 80 | 
 81 | 	maxReadBytes := len(payload)
 82 | 	readBytes = copy(payload, buf.buffer)
 83 | 	buf.buffer = buf.buffer[readBytes:]
 84 | 	LogTrace(buf.config, "<< Reading "+IntToString(readBytes)+"/"+IntToString(maxReadBytes)+" bytes from capped buffer...")
 85 | 
 86 | 	buf.conditionalSync.Broadcast() // Notify the writer that space is now available
 87 | 
 88 | 	return readBytes, nil
 89 | }
 90 | 
 91 | func (buf *CappedBuffer) Close() error {
 92 | 	buf.closeOnceSync.Do(func() {
 93 | 		buf.mutex.Lock()
 94 | 
 95 | 		LogTrace(buf.config, "== Closing capped buffer...")
 96 | 		buf.closed = true
 97 | 
 98 | 		buf.conditionalSync.Broadcast() // Wake up any waiting writers/readers
 99 | 
100 | 		buf.mutex.Unlock()
101 | 	})
102 | 	return nil
103 | }
104 | 


--------------------------------------------------------------------------------
/src/capped_buffer_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"io"
  6 | 	"sync"
  7 | 	"testing"
  8 | 	"time"
  9 | )
 10 | 
 11 | func initTestConfig() *Config {
 12 | 	return &Config{
 13 | 		LogLevel: LOG_LEVEL_INFO, // Use INFO to avoid excessive logging during tests
 14 | 	}
 15 | }
 16 | 
 17 | func TestCappedBufferWrite(t *testing.T) {
 18 | 	t.Run("Writes data to buffer", func(t *testing.T) {
 19 | 		config := initTestConfig()
 20 | 		bufferSize := 100 // 100 bytes
 21 | 		buffer := NewCappedBuffer(bufferSize, config)
 22 | 		writeData := []byte("hello world")
 23 | 
 24 | 		writtenBytes, err := buffer.Write(writeData)
 25 | 
 26 | 		if err != nil {
 27 | 			t.Fatalf("Failed to write to buffer: %v", err)
 28 | 		}
 29 | 		if writtenBytes != len(writeData) {
 30 | 			t.Errorf("Expected to write %d bytes, but wrote %d", len(writeData), writtenBytes)
 31 | 		}
 32 | 	})
 33 | 
 34 | 	t.Run("Waits to write data to a full buffer", func(t *testing.T) {
 35 | 		config := initTestConfig()
 36 | 		bufferSize := 11
 37 | 		buffer := NewCappedBuffer(bufferSize, config)
 38 | 		writeDataFull := []byte("hello world")
 39 | 		buffer.Write(writeDataFull)
 40 | 		writeDataOverflow := []byte("overflow")
 41 | 		done := make(chan struct{})
 42 | 
 43 | 		go func() {
 44 | 			buffer.Write(writeDataOverflow)
 45 | 			close(done)
 46 | 		}()
 47 | 
 48 | 		select {
 49 | 		case <-done:
 50 | 			t.Error("Write to full buffer should block, but it returned immediately")
 51 | 		case <-time.After(100 * time.Millisecond):
 52 | 			// This is expected - Write should block
 53 | 		}
 54 | 	})
 55 | 
 56 | 	t.Run("Writes data to a buffer after it was read (more space available)", func(t *testing.T) {
 57 | 		config := initTestConfig()
 58 | 		bufferSize := 11
 59 | 		buffer := NewCappedBuffer(bufferSize, config)
 60 | 		writeDataFull := []byte("hello world")
 61 | 		buffer.Write(writeDataFull)
 62 | 		writeDataOverflow := []byte("over")
 63 | 		done := make(chan struct{})
 64 | 
 65 | 		go func() {
 66 | 			writtenBytes, err := buffer.Write(writeDataOverflow)
 67 | 			if err != nil {
 68 | 				t.Errorf("Failed to write to buffer: %v", err)
 69 | 			}
 70 | 			if writtenBytes != len(writeDataOverflow) {
 71 | 				t.Errorf("Expected to write %d bytes, but wrote %d", len(writeDataOverflow), writtenBytes)
 72 | 			}
 73 | 
 74 | 			close(done)
 75 | 		}()
 76 | 
 77 | 		readData := make([]byte, 5)
 78 | 		readBytes, err := buffer.Read(readData)
 79 | 		if err != nil {
 80 | 			t.Fatalf("Failed to read from buffer: %v", err)
 81 | 		}
 82 | 		if readBytes != 5 {
 83 | 			t.Errorf("Expected to read 5 bytes, but read %d", readBytes)
 84 | 		}
 85 | 
 86 | 		select {
 87 | 		case <-done:
 88 | 			// This is expected - Write should proceed
 89 | 		case <-time.After(100 * time.Millisecond):
 90 | 			t.Error("Write should have proceeded after Read")
 91 | 		}
 92 | 	})
 93 | 
 94 | 	t.Run("Receive error when writing to a closed buffer", func(t *testing.T) {
 95 | 		config := initTestConfig()
 96 | 		bufferSize := 100
 97 | 		buffer := NewCappedBuffer(bufferSize, config)
 98 | 		writeData := []byte("hello world")
 99 | 
100 | 		buffer.Close()
101 | 
102 | 		writtenBytes, err := buffer.Write(writeData)
103 | 
104 | 		if err == nil {
105 | 			t.Error("Write to closed buffer should return an error")
106 | 		}
107 | 		if err.Error() != "buffer is closed" {
108 | 			t.Errorf("Expected error message 'buffer is closed', but got: %v", err)
109 | 		}
110 | 		if writtenBytes != 0 {
111 | 			t.Errorf("Expected to write 0 bytes, but wrote %d", writtenBytes)
112 | 		}
113 | 	})
114 | }
115 | 
116 | func TestCappedBufferRead(t *testing.T) {
117 | 	t.Run("Reads data from buffer", func(t *testing.T) {
118 | 		config := initTestConfig()
119 | 		bufferSize := 100 // 100 bytes
120 | 		buffer := NewCappedBuffer(bufferSize, config)
121 | 		writeData := []byte("hello world")
122 | 		buffer.Write(writeData)
123 | 		readData := make([]byte, len(writeData))
124 | 
125 | 		readBytes, err := buffer.Read(readData)
126 | 
127 | 		if err != nil {
128 | 			t.Fatalf("Failed to read from buffer: %v", err)
129 | 		}
130 | 		if readBytes != len(writeData) {
131 | 			t.Errorf("Expected to read %d bytes, but read %d", len(writeData), readBytes)
132 | 		}
133 | 		if !bytes.Equal(readData, writeData) {
134 | 			t.Errorf("Read data does not match written data. Got %q, want %q", readData, writeData)
135 | 		}
136 | 	})
137 | 
138 | 	t.Run("Waits to read data from an empty buffer", func(t *testing.T) {
139 | 		config := initTestConfig()
140 | 		bufferSize := 100 // 100 bytes
141 | 		buffer := NewCappedBuffer(bufferSize, config)
142 | 		done := make(chan struct{}) // Create a channel to signal when read is done
143 | 		readData := make([]byte, 10)
144 | 
145 | 		// Start a goroutine to read from the buffer
146 | 		go func() {
147 | 			buffer.Read(readData)
148 | 			close(done)
149 | 		}()
150 | 
151 | 		// Wait for a short time to see if Read blocks
152 | 		select {
153 | 		case <-done:
154 | 			t.Error("Read from empty buffer should block, but it returned immediately")
155 | 		case <-time.After(100 * time.Millisecond):
156 | 			// This is expected - Read should block
157 | 		}
158 | 	})
159 | 
160 | 	t.Run("Waits and reads data from a buffer after it was closed", func(t *testing.T) {
161 | 		config := initTestConfig()
162 | 		bufferSize := 100 // 100 bytes
163 | 		buffer := NewCappedBuffer(bufferSize, config)
164 | 		writeData := []byte("hello world")
165 | 		buffer.Write(writeData)
166 | 		done := make(chan struct{}) // Create a channel to signal when read is done
167 | 		readData := make([]byte, 11)
168 | 
169 | 		// Start a goroutine to read from the buffer
170 | 		go func() {
171 | 			readBytes, err := buffer.Read(readData)
172 | 			if err != nil {
173 | 				t.Errorf("Failed to read from buffer: %v", err)
174 | 			}
175 | 			if readBytes != len(writeData) {
176 | 				t.Errorf("Expected to read %d bytes, but read %d", len(writeData), readBytes)
177 | 			}
178 | 			close(done)
179 | 		}()
180 | 
181 | 		buffer.Close() // Close the buffer to unblock the read
182 | 
183 | 		// Wait for the read to complete
184 | 		select {
185 | 		case <-done:
186 | 			if !bytes.Equal(readData, writeData) {
187 | 				t.Errorf("Read data does not match written data. Got %q, want %q", readData, writeData)
188 | 			}
189 | 		case <-time.After(100 * time.Millisecond):
190 | 			t.Error("Read should have returned after Close")
191 | 		}
192 | 	})
193 | 
194 | 	t.Run("Reads data from a buffer after it was written to (more data available)", func(t *testing.T) {
195 | 		config := initTestConfig()
196 | 		bufferSize := 11
197 | 		buffer := NewCappedBuffer(bufferSize, config)
198 | 		readData := make([]byte, 5)
199 | 		done := make(chan struct{})
200 | 		writeData := []byte("hello world")
201 | 
202 | 		go func() {
203 | 			readBytes, err := buffer.Read(readData)
204 | 			if err != nil {
205 | 				t.Errorf("Failed to read from buffer: %v", err)
206 | 			}
207 | 			if readBytes != len(readData) {
208 | 				t.Errorf("Expected to read %d bytes, but read %d", len(readData), readBytes)
209 | 			}
210 | 			if !bytes.Equal(readData, writeData[:len(readData)]) {
211 | 				t.Errorf("Read data does not match written data. Got %q, want %q", readData, writeData[:len(readData)])
212 | 			}
213 | 			close(done)
214 | 		}()
215 | 
216 | 		buffer.Write(writeData)
217 | 
218 | 		select {
219 | 		case <-done:
220 | 			// This is expected - Read should proceed
221 | 		case <-time.After(100 * time.Millisecond):
222 | 			t.Error("Read should have proceeded after Write")
223 | 		}
224 | 	})
225 | 
226 | 	t.Run("Receive EOF when reading from a closed and empty buffer", func(t *testing.T) {
227 | 		config := initTestConfig()
228 | 		bufferSize := 100
229 | 		buffer := NewCappedBuffer(bufferSize, config)
230 | 		readData := make([]byte, 10)
231 | 
232 | 		buffer.Close()
233 | 
234 | 		readBytes, err := buffer.Read(readData)
235 | 
236 | 		if err != io.EOF {
237 | 			t.Errorf("Read from closed and empty buffer should return EOF, but got: %v", err)
238 | 		}
239 | 		if readBytes != 0 {
240 | 			t.Errorf("Expected to read 0 bytes, but read %d", readBytes)
241 | 		}
242 | 	})
243 | }
244 | 
245 | func TestCappedBufferConcurrentReadWrite(t *testing.T) {
246 | 	t.Run("Concurrent read and write operations", func(t *testing.T) {
247 | 		config := initTestConfig()
248 | 		bufferSize := 100 // 100 bytes
249 | 		buffer := NewCappedBuffer(bufferSize, config)
250 | 		iterations := 100
251 | 		writeData := []byte("test data")
252 | 
253 | 		// WaitGroup to wait for all goroutines to complete
254 | 		var wg sync.WaitGroup
255 | 		wg.Add(2) // One for reader, one for writer
256 | 
257 | 		// Start writer goroutine
258 | 		go func() {
259 | 			defer wg.Done()
260 | 			for i := 0; i < iterations; i++ {
261 | 				_, err := buffer.Write(writeData)
262 | 				if err != nil {
263 | 					t.Errorf("Write error at iteration %d: %v", i, err)
264 | 					return
265 | 				}
266 | 			}
267 | 		}()
268 | 
269 | 		// Start reader goroutine
270 | 		go func() {
271 | 			defer wg.Done()
272 | 			readData := make([]byte, len(writeData))
273 | 			for i := 0; i < iterations; i++ {
274 | 				_, err := buffer.Read(readData)
275 | 				if err != nil {
276 | 					t.Errorf("Read error at iteration %d: %v", i, err)
277 | 					return
278 | 				}
279 | 				if !bytes.Equal(readData, writeData) {
280 | 					t.Errorf("Read data does not match at iteration %d. Got %q, want %q", i, readData, writeData)
281 | 					return
282 | 				}
283 | 			}
284 | 		}()
285 | 
286 | 		// Wait for both goroutines to complete
287 | 		wg.Wait()
288 | 	})
289 | 
290 | 	t.Run("Multiple sequential read and write operations", func(t *testing.T) {
291 | 		config := initTestConfig()
292 | 		bufferSize := 20
293 | 		buffer := NewCappedBuffer(bufferSize, config)
294 | 		data1 := []byte("first")
295 | 		data2 := []byte("second")
296 | 		data3 := []byte("third")
297 | 		readData1 := make([]byte, 5) // "first"
298 | 		readData2 := make([]byte, 6) // "second"
299 | 		readData3 := make([]byte, 5) // "third"
300 | 
301 | 		buffer.Write(data1)
302 | 		buffer.Write(data2)
303 | 		buffer.Write(data3)
304 | 
305 | 		readBytes1, _ := buffer.Read(readData1)
306 | 		if readBytes1 != 5 || string(readData1) != "first" {
307 | 			t.Errorf("First read failed: got %q, want %q", readData1, "first")
308 | 		}
309 | 
310 | 		readBytes2, _ := buffer.Read(readData2)
311 | 		if readBytes2 != 6 || string(readData2) != "second" {
312 | 			t.Errorf("Second read failed: got %q, want %q", readData2, "second")
313 | 		}
314 | 
315 | 		readBytes3, _ := buffer.Read(readData3)
316 | 		if readBytes3 != 5 || string(readData3) != "third" {
317 | 			t.Errorf("Third read failed: got %q, want %q", readData3, "third")
318 | 		}
319 | 	})
320 | }
321 | 


--------------------------------------------------------------------------------
/src/config.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"os"
  6 | 	"slices"
  7 | 	"strings"
  8 | )
  9 | 
 10 | const (
 11 | 	VERSION = "0.51.1"
 12 | 
 13 | 	ENV_PORT              = "BEMIDB_PORT"
 14 | 	ENV_DATABASE          = "BEMIDB_DATABASE"
 15 | 	ENV_USER              = "BEMIDB_USER"
 16 | 	ENV_PASSWORD          = "BEMIDB_PASSWORD"
 17 | 	ENV_HOST              = "BEMIDB_HOST"
 18 | 	ENV_INIT_SQL_FILEPATH = "BEMIDB_INIT_SQL"
 19 | 
 20 | 	ENV_STORAGE_PATH          = "BEMIDB_STORAGE_PATH"
 21 | 	ENV_STORAGE_TYPE          = "BEMIDB_STORAGE_TYPE"
 22 | 	ENV_AWS_REGION            = "AWS_REGION"
 23 | 	ENV_AWS_S3_ENDPOINT       = "AWS_S3_ENDPOINT"
 24 | 	ENV_AWS_S3_BUCKET         = "AWS_S3_BUCKET"
 25 | 	ENV_AWS_ACCESS_KEY_ID     = "AWS_ACCESS_KEY_ID"
 26 | 	ENV_AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"
 27 | 
 28 | 	ENV_PG_DATABASE_URL                   = "PG_DATABASE_URL"
 29 | 	ENV_PG_SYNC_INTERVAL                  = "PG_SYNC_INTERVAL"
 30 | 	ENV_PG_SCHEMA_PREFIX                  = "PG_SCHEMA_PREFIX"
 31 | 	ENV_PG_INCLUDE_TABLES                 = "PG_INCLUDE_TABLES"
 32 | 	ENV_PG_EXCLUDE_TABLES                 = "PG_EXCLUDE_TABLES"
 33 | 	ENV_PG_INCREMENTALLY_REFRESHED_TABLES = "PG_INCREMENTALLY_REFRESHED_TABLES"
 34 | 	ENV_PG_PRESERVE_UNSYNCED              = "PG_PRESERVE_UNSYNCED"
 35 | 
 36 | 	ENV_LOG_LEVEL                   = "BEMIDB_LOG_LEVEL"
 37 | 	ENV_DISABLE_ANONYMOUS_ANALYTICS = "BEMIDB_DISABLE_ANONYMOUS_ANALYTICS"
 38 | 
 39 | 	DEFAULT_PORT            = "54321"
 40 | 	DEFAULT_DATABASE        = "bemidb"
 41 | 	DEFAULT_USER            = ""
 42 | 	DEFAULT_PASSWORD        = ""
 43 | 	DEFAULT_HOST            = "127.0.0.1"
 44 | 	DEFAULT_STORAGE_PATH    = "iceberg"
 45 | 	DEFAULT_LOG_LEVEL       = "INFO"
 46 | 	DEFAULT_DB_STORAGE_TYPE = "LOCAL"
 47 | 
 48 | 	DEFAULT_AWS_S3_ENDPOINT = "s3.amazonaws.com"
 49 | 
 50 | 	STORAGE_TYPE_LOCAL = "LOCAL"
 51 | 	STORAGE_TYPE_S3    = "S3"
 52 | )
 53 | 
 54 | var STORAGE_TYPES = []string{STORAGE_TYPE_LOCAL, STORAGE_TYPE_S3}
 55 | 
 56 | type AwsConfig struct {
 57 | 	Region          string
 58 | 	S3Endpoint      string // optional
 59 | 	S3Bucket        string
 60 | 	AccessKeyId     string
 61 | 	SecretAccessKey string
 62 | }
 63 | 
 64 | type PgConfig struct {
 65 | 	DatabaseUrl                  string
 66 | 	SyncInterval                 string   // optional
 67 | 	SchemaPrefix                 string   // optional
 68 | 	IncludeTables                []string // optional
 69 | 	ExcludeTables                []string // optional
 70 | 	IncrementallyRefreshedTables []string // optional
 71 | 	PreserveUnsynced             bool     // optional
 72 | }
 73 | 
 74 | type Config struct {
 75 | 	Host                      string
 76 | 	Port                      string
 77 | 	Database                  string
 78 | 	User                      string
 79 | 	EncryptedPassword         string
 80 | 	LogLevel                  string
 81 | 	StorageType               string
 82 | 	StoragePath               string
 83 | 	Aws                       AwsConfig
 84 | 	Pg                        PgConfig
 85 | 	DisableAnonymousAnalytics bool
 86 | }
 87 | 
 88 | type configParseValues struct {
 89 | 	password                       string
 90 | 	pgIncludeTables                string
 91 | 	pgExcludeTables                string
 92 | 	pgIncrementallyRefreshedTables string
 93 | }
 94 | 
 95 | var _config Config
 96 | var _configParseValues configParseValues
 97 | 
 98 | func init() {
 99 | 	registerFlags()
100 | }
101 | 
102 | func registerFlags() {
103 | 	flag.StringVar(&_config.Host, "host", os.Getenv(ENV_HOST), "Database host. Default: \""+DEFAULT_HOST+"\"")
104 | 	flag.StringVar(&_config.Port, "port", os.Getenv(ENV_PORT), "Port for BemiDB to listen on. Default: \""+DEFAULT_PORT+"\"")
105 | 	flag.StringVar(&_config.Database, "database", os.Getenv(ENV_DATABASE), "Database name. Default: \""+DEFAULT_DATABASE+"\"")
106 | 	flag.StringVar(&_config.User, "user", os.Getenv(ENV_USER), "Database user. Default: \""+DEFAULT_USER+"\"")
107 | 	flag.StringVar(&_configParseValues.password, "password", os.Getenv(ENV_PASSWORD), "Database password. Default: \""+DEFAULT_PASSWORD+"\"")
108 | 	flag.StringVar(&_config.StoragePath, "storage-path", os.Getenv(ENV_STORAGE_PATH), "Path to the storage folder. Default: \""+DEFAULT_STORAGE_PATH+"\"")
109 | 	flag.StringVar(&_config.LogLevel, "log-level", os.Getenv(ENV_LOG_LEVEL), "Log level: \"ERROR\", \"WARN\", \"INFO\", \"DEBUG\", \"TRACE\". Default: \""+DEFAULT_LOG_LEVEL+"\"")
110 | 	flag.StringVar(&_config.StorageType, "storage-type", os.Getenv(ENV_STORAGE_TYPE), "Storage type: \"LOCAL\", \"S3\". Default: \""+DEFAULT_DB_STORAGE_TYPE+"\"")
111 | 	flag.StringVar(&_config.Pg.SchemaPrefix, "pg-schema-prefix", os.Getenv(ENV_PG_SCHEMA_PREFIX), "(Optional) Prefix for PostgreSQL schema names")
112 | 	flag.StringVar(&_config.Pg.SyncInterval, "pg-sync-interval", os.Getenv(ENV_PG_SYNC_INTERVAL), "(Optional) Interval between syncs. Valid units: \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
113 | 	flag.StringVar(&_configParseValues.pgIncludeTables, "pg-include-tables", os.Getenv(ENV_PG_INCLUDE_TABLES), "(Optional) Comma-separated list of tables to include in sync (format: schema.table)")
114 | 	flag.StringVar(&_configParseValues.pgExcludeTables, "pg-exclude-tables", os.Getenv(ENV_PG_EXCLUDE_TABLES), "(Optional) Comma-separated list of tables to exclude from sync (format: schema.table)")
115 | 	flag.StringVar(&_configParseValues.pgIncrementallyRefreshedTables, "pg-incrementally-refreshed-tables", os.Getenv(ENV_PG_INCREMENTALLY_REFRESHED_TABLES), "(Optional) Comma-separated list of tables to refresh incrementally (format: schema.table)")
116 | 	flag.BoolVar(&_config.Pg.PreserveUnsynced, "pg-preserve-unsynced", os.Getenv(ENV_PG_PRESERVE_UNSYNCED) == "true", "(Optional) Do not delete the existing tables in BemiDB that are not part of the sync")
117 | 	flag.StringVar(&_config.Pg.DatabaseUrl, "pg-database-url", os.Getenv(ENV_PG_DATABASE_URL), "PostgreSQL database URL to sync")
118 | 	flag.StringVar(&_config.Aws.Region, "aws-region", os.Getenv(ENV_AWS_REGION), "AWS region")
119 | 	flag.StringVar(&_config.Aws.S3Endpoint, "aws-s3-endpoint", os.Getenv(ENV_AWS_S3_ENDPOINT), "AWS S3 endpoint. Default: \""+DEFAULT_AWS_S3_ENDPOINT+"\"")
120 | 	flag.StringVar(&_config.Aws.S3Bucket, "aws-s3-bucket", os.Getenv(ENV_AWS_S3_BUCKET), "AWS S3 bucket name")
121 | 	flag.StringVar(&_config.Aws.AccessKeyId, "aws-access-key-id", os.Getenv(ENV_AWS_ACCESS_KEY_ID), "AWS access key ID")
122 | 	flag.StringVar(&_config.Aws.SecretAccessKey, "aws-secret-access-key", os.Getenv(ENV_AWS_SECRET_ACCESS_KEY), "AWS secret access key")
123 | 	flag.BoolVar(&_config.DisableAnonymousAnalytics, "disable-anonymous-analytics", os.Getenv(ENV_DISABLE_ANONYMOUS_ANALYTICS) == "true", "Disable anonymous analytics collection")
124 | }
125 | 
126 | func parseFlags() {
127 | 	flag.Parse()
128 | 
129 | 	if _config.Host == "" {
130 | 		_config.Host = DEFAULT_HOST
131 | 	}
132 | 	if _config.Port == "" {
133 | 		_config.Port = DEFAULT_PORT
134 | 	}
135 | 	if _config.Database == "" {
136 | 		_config.Database = DEFAULT_DATABASE
137 | 	}
138 | 	if _config.User == "" {
139 | 		_config.User = DEFAULT_USER
140 | 	}
141 | 	if _configParseValues.password == "" {
142 | 		_configParseValues.password = DEFAULT_PASSWORD
143 | 	}
144 | 	if _configParseValues.password != "" {
145 | 		if _config.User == "" {
146 | 			panic("Password is set without a user")
147 | 		}
148 | 		_config.EncryptedPassword = StringToScramSha256(_configParseValues.password)
149 | 	}
150 | 	if _config.StoragePath == "" {
151 | 		_config.StoragePath = DEFAULT_STORAGE_PATH
152 | 	}
153 | 	if _config.LogLevel == "" {
154 | 		_config.LogLevel = DEFAULT_LOG_LEVEL
155 | 	} else if !slices.Contains(LOG_LEVELS, _config.LogLevel) {
156 | 		panic("Invalid log level " + _config.LogLevel + ". Must be one of " + strings.Join(LOG_LEVELS, ", "))
157 | 	}
158 | 	if _config.StorageType == "" {
159 | 		_config.StorageType = DEFAULT_DB_STORAGE_TYPE
160 | 	} else if !slices.Contains(STORAGE_TYPES, _config.StorageType) {
161 | 		panic("Invalid storage type " + _config.StorageType + ". Must be one of " + strings.Join(STORAGE_TYPES, ", "))
162 | 	}
163 | 
164 | 	if _config.StorageType == STORAGE_TYPE_S3 {
165 | 		if _config.Aws.Region == "" {
166 | 			panic("AWS region is required")
167 | 		}
168 | 		if _config.Aws.S3Endpoint == "" {
169 | 			_config.Aws.S3Endpoint = DEFAULT_AWS_S3_ENDPOINT
170 | 		}
171 | 		if _config.Aws.S3Bucket == "" {
172 | 			panic("AWS S3 bucket name is required")
173 | 		}
174 | 		if _config.Aws.AccessKeyId != "" && _config.Aws.SecretAccessKey == "" {
175 | 			panic("AWS secret access key is required")
176 | 		}
177 | 		if _config.Aws.AccessKeyId == "" && _config.Aws.SecretAccessKey != "" {
178 | 			panic("AWS access key ID is required")
179 | 		}
180 | 	}
181 | 	if _configParseValues.pgIncludeTables != "" {
182 | 		_config.Pg.IncludeTables = strings.Split(_configParseValues.pgIncludeTables, ",")
183 | 	}
184 | 	if _configParseValues.pgIncrementallyRefreshedTables != "" {
185 | 		_config.Pg.IncrementallyRefreshedTables = strings.Split(_configParseValues.pgIncrementallyRefreshedTables, ",")
186 | 	}
187 | 	if _configParseValues.pgExcludeTables != "" {
188 | 		_config.Pg.ExcludeTables = strings.Split(_configParseValues.pgExcludeTables, ",")
189 | 	}
190 | 
191 | 	_configParseValues = configParseValues{}
192 | }
193 | 
194 | func LoadConfig(reRegisterFlags ...bool) *Config {
195 | 	if reRegisterFlags != nil && reRegisterFlags[0] {
196 | 		flag.CommandLine = flag.NewFlagSet(os.Args[0], flag.ExitOnError)
197 | 		registerFlags()
198 | 	}
199 | 	parseFlags()
200 | 	return &_config
201 | }
202 | 


--------------------------------------------------------------------------------
/src/config_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"testing"
  5 | )
  6 | 
  7 | func TestLoadConfig(t *testing.T) {
  8 | 	t.Run("Uses default config values with local storage", func(t *testing.T) {
  9 | 		config := LoadConfig(true)
 10 | 
 11 | 		if config.Port != "54321" {
 12 | 			t.Errorf("Expected port to be 54321, got %s", config.Port)
 13 | 		}
 14 | 		if config.Database != "bemidb" {
 15 | 			t.Errorf("Expected database to be bemidb, got %s", config.Database)
 16 | 		}
 17 | 		if config.StoragePath != "iceberg" {
 18 | 			t.Errorf("Expected StoragePath to be iceberg, got %s", config.StoragePath)
 19 | 		}
 20 | 		if config.LogLevel != "INFO" {
 21 | 			t.Errorf("Expected logLevel to be INFO, got %s", config.LogLevel)
 22 | 		}
 23 | 		if config.StorageType != "LOCAL" {
 24 | 			t.Errorf("Expected storageType to be LOCAL, got %s", config.StorageType)
 25 | 		}
 26 | 		if config.Pg.DatabaseUrl != "" {
 27 | 			t.Errorf("Expected pgDatabaseUrl to be empty, got %s", config.Pg.DatabaseUrl)
 28 | 		}
 29 | 		if config.Pg.SyncInterval != "" {
 30 | 			t.Errorf("Expected interval to be empty, got %s", config.Pg.SyncInterval)
 31 | 		}
 32 | 		if config.Pg.SchemaPrefix != "" {
 33 | 			t.Errorf("Expected schemaPrefix to be empty, got %s", config.Pg.SchemaPrefix)
 34 | 		}
 35 | 		if config.Pg.IncludeTables != nil {
 36 | 			t.Errorf("Expected includeTables to be empty, got %v", config.Pg.IncludeTables)
 37 | 		}
 38 | 		if config.Pg.ExcludeTables != nil {
 39 | 			t.Errorf("Expected includeTables to be empty, got %v", config.Pg.ExcludeTables)
 40 | 		}
 41 | 	})
 42 | 
 43 | 	t.Run("Uses config values from environment variables with LOCAL storage", func(t *testing.T) {
 44 | 		t.Setenv("BEMIDB_PORT", "12345")
 45 | 		t.Setenv("BEMIDB_DATABASE", "mydb")
 46 | 		t.Setenv("BEMIDB_INIT_SQL", "./init/duckdb.sql")
 47 | 		t.Setenv("BEMIDB_STORAGE_PATH", "storage-path")
 48 | 		t.Setenv("BEMIDB_LOG_LEVEL", "ERROR")
 49 | 		t.Setenv("BEMIDB_STORAGE_TYPE", "LOCAL")
 50 | 
 51 | 		config := LoadConfig(true)
 52 | 
 53 | 		if config.Port != "12345" {
 54 | 			t.Errorf("Expected port to be 12345, got %s", config.Port)
 55 | 		}
 56 | 		if config.Database != "mydb" {
 57 | 			t.Errorf("Expected database to be mydb, got %s", config.Database)
 58 | 		}
 59 | 		if config.StoragePath != "storage-path" {
 60 | 			t.Errorf("Expected StoragePath to be storage-path, got %s", config.StoragePath)
 61 | 		}
 62 | 		if config.LogLevel != "ERROR" {
 63 | 			t.Errorf("Expected logLevel to be ERROR, got %s", config.LogLevel)
 64 | 		}
 65 | 		if config.StorageType != "LOCAL" {
 66 | 			t.Errorf("Expected storageType to be local, got %s", config.StorageType)
 67 | 		}
 68 | 	})
 69 | 
 70 | 	t.Run("Uses config values from environment variables with AWS S3 storage", func(t *testing.T) {
 71 | 		t.Setenv("BEMIDB_PORT", "12345")
 72 | 		t.Setenv("BEMIDB_DATABASE", "mydb")
 73 | 		t.Setenv("BEMIDB_INIT_SQL", "./init/duckdb.sql")
 74 | 		t.Setenv("BEMIDB_STORAGE_PATH", "storage-path")
 75 | 		t.Setenv("BEMIDB_LOG_LEVEL", "ERROR")
 76 | 		t.Setenv("BEMIDB_STORAGE_TYPE", "S3")
 77 | 		t.Setenv("AWS_REGION", "us-west-1")
 78 | 		t.Setenv("AWS_S3_ENDPOINT", "s3-us-west-1.amazonaws.com")
 79 | 		t.Setenv("AWS_S3_BUCKET", "my_bucket")
 80 | 		t.Setenv("AWS_ACCESS_KEY_ID", "my_access_key_id")
 81 | 		t.Setenv("AWS_SECRET_ACCESS_KEY", "my_secret_access_key")
 82 | 
 83 | 		config := LoadConfig(true)
 84 | 
 85 | 		if config.Port != "12345" {
 86 | 			t.Errorf("Expected port to be 12345, got %s", config.Port)
 87 | 		}
 88 | 		if config.Database != "mydb" {
 89 | 			t.Errorf("Expected database to be mydb, got %s", config.Database)
 90 | 		}
 91 | 		if config.StoragePath != "storage-path" {
 92 | 			t.Errorf("Expected StoragePath to be storage-path, got %s", config.StoragePath)
 93 | 		}
 94 | 		if config.LogLevel != "ERROR" {
 95 | 			t.Errorf("Expected logLevel to be ERROR, got %s", config.LogLevel)
 96 | 		}
 97 | 		if config.StorageType != "S3" {
 98 | 			t.Errorf("Expected storageType to be S3, got %s", config.StorageType)
 99 | 		}
100 | 		if config.Aws.Region != "us-west-1" {
101 | 			t.Errorf("Expected awsRegion to be us-west-1, got %s", config.Aws.Region)
102 | 		}
103 | 		if config.Aws.S3Endpoint != "s3-us-west-1.amazonaws.com" {
104 | 			t.Errorf("Expected awsS3Endpoint to be s3-us-west-1.amazonaws.com, got %s", config.Aws.S3Endpoint)
105 | 		}
106 | 		if config.Aws.S3Bucket != "my_bucket" {
107 | 			t.Errorf("Expected awsS3Bucket to be mybucket, got %s", config.Aws.S3Bucket)
108 | 		}
109 | 		if config.Aws.AccessKeyId != "my_access_key_id" {
110 | 			t.Errorf("Expected awsAccessKeyId to be my_access_key_id, got %s", config.Aws.AccessKeyId)
111 | 		}
112 | 		if config.Aws.SecretAccessKey != "my_secret_access_key" {
113 | 			t.Errorf("Expected awsSecretAccessKey to be my_secret_access_key, got %s", config.Aws.SecretAccessKey)
114 | 		}
115 | 	})
116 | 
117 | 	t.Run("Uses config values from environment variables for PG", func(t *testing.T) {
118 | 		t.Setenv("PG_DATABASE_URL", "postgres://user:password@localhost:5432/template1")
119 | 		t.Setenv("PG_SYNC_INTERVAL", "1h")
120 | 		t.Setenv("PG_SCHEMA_PREFIX", "mydb_")
121 | 		t.Setenv("PG_EXCLUDE_TABLES", "public.users,public.secrets")
122 | 
123 | 		config := LoadConfig(true)
124 | 
125 | 		if config.Pg.DatabaseUrl != "postgres://user:password@localhost:5432/template1" {
126 | 			t.Errorf("Expected pgDatabaseUrl to be postgres://user:password@localhost:5432/template1, got %s", config.Pg.DatabaseUrl)
127 | 		}
128 | 		if config.Pg.SyncInterval != "1h" {
129 | 			t.Errorf("Expected interval to be 1h, got %s", config.Pg.SyncInterval)
130 | 		}
131 | 		if config.Pg.SchemaPrefix != "mydb_" {
132 | 			t.Errorf("Expected schemaPrefix to be empty, got %s", config.Pg.SchemaPrefix)
133 | 		}
134 | 		if !HasExactOrWildcardMatch(config.Pg.ExcludeTables, "public.users") {
135 | 			t.Errorf("Expected ExcludeTables to contain public.users, got %v", config.Pg.ExcludeTables)
136 | 		}
137 | 		if !HasExactOrWildcardMatch(config.Pg.ExcludeTables, "public.secrets") {
138 | 			t.Errorf("Expected ExcludeTables to contain public.secrets, got %v", config.Pg.ExcludeTables)
139 | 		}
140 | 	})
141 | 
142 | 	t.Run("Panics when only AWS_ACCESS_KEY_ID is set without AWS_SECRET_ACCESS_KEY", func(t *testing.T) {
143 | 		t.Setenv("BEMIDB_STORAGE_TYPE", "S3")
144 | 		t.Setenv("AWS_ACCESS_KEY_ID", "my_access_key_id")
145 | 
146 | 		defer func() {
147 | 			if r := recover(); r == nil {
148 | 				t.Error("Expected panic when only AWS_ACCESS_KEY_ID is set")
149 | 			}
150 | 		}()
151 | 
152 | 		LoadConfig(true)
153 | 	})
154 | 
155 | 	t.Run("Panics when only AWS_SECRET_ACCESS_KEY is set without AWS_ACCESS_KEY_ID", func(t *testing.T) {
156 | 		t.Setenv("BEMIDB_STORAGE_TYPE", "S3")
157 | 		t.Setenv("AWS_SECRET_ACCESS_KEY", "my_secret_access_key")
158 | 
159 | 		defer func() {
160 | 			if r := recover(); r == nil {
161 | 				t.Error("Expected panic when only AWS_SECRET_ACCESS_KEY is set")
162 | 			}
163 | 		}()
164 | 
165 | 		LoadConfig(true)
166 | 	})
167 | 
168 | 	t.Run("Uses command line arguments", func(t *testing.T) {
169 | 		setTestArgs([]string{
170 | 			"--port", "12345",
171 | 			"--database", "mydb",
172 | 			"--storage-path", "storage-path",
173 | 			"--log-level", "ERROR",
174 | 			"--storage-type", "LOCAL",
175 | 			"--pg-database-url", "postgres://user:password@localhost:5432/db",
176 | 			"--pg-sync-interval", "2h30m",
177 | 			"--pg-schema-prefix", "mydb_",
178 | 			"--pg-exclude-tables", "public.users,public.secrets",
179 | 		})
180 | 
181 | 		config := LoadConfig(true)
182 | 
183 | 		if config.Port != "12345" {
184 | 			t.Errorf("Expected port to be 12345, got %s", config.Port)
185 | 		}
186 | 		if config.Database != "mydb" {
187 | 			t.Errorf("Expected database to be mydb, got %s", config.Database)
188 | 		}
189 | 		if config.StoragePath != "storage-path" {
190 | 			t.Errorf("Expected StoragePath to be storage-path, got %s", config.StoragePath)
191 | 		}
192 | 		if config.LogLevel != "ERROR" {
193 | 			t.Errorf("Expected logLevel to be ERROR, got %s", config.LogLevel)
194 | 		}
195 | 		if config.StorageType != "LOCAL" {
196 | 			t.Errorf("Expected storageType to be local, got %s", config.StorageType)
197 | 		}
198 | 		if config.Pg.DatabaseUrl != "postgres://user:password@localhost:5432/db" {
199 | 			t.Errorf("Expected pgDatabaseUrl to be postgres://user:password@localhost:5432/db, got %s", config.Pg.DatabaseUrl)
200 | 		}
201 | 		if config.Pg.SyncInterval != "2h30m" {
202 | 			t.Errorf("Expected interval to be 2h30m, got %s", config.Pg.SyncInterval)
203 | 		}
204 | 		if config.Pg.SchemaPrefix != "mydb_" {
205 | 			t.Errorf("Expected schemaPrefix to be mydb_, got %s", config.Pg.SchemaPrefix)
206 | 		}
207 | 		if !HasExactOrWildcardMatch(config.Pg.ExcludeTables, "public.users") {
208 | 			t.Errorf("Expected ExcludeTables to have public.users, got %v", config.Pg.ExcludeTables)
209 | 		}
210 | 		if !HasExactOrWildcardMatch(config.Pg.ExcludeTables, "public.secrets") {
211 | 			t.Errorf("Expected ExcludeTables to have public.secrets, got %v", config.Pg.ExcludeTables)
212 | 		}
213 | 	})
214 | }
215 | 


--------------------------------------------------------------------------------
/src/custom_types.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | )
  7 | 
  8 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  9 | 
 10 | type Set[T comparable] map[T]struct{}
 11 | 
 12 | func NewSet[T comparable](items []T) Set[T] {
 13 | 	set := make(Set[T])
 14 | 
 15 | 	for _, item := range items {
 16 | 		set.Add(item)
 17 | 	}
 18 | 
 19 | 	return set
 20 | }
 21 | 
 22 | func (set Set[T]) Add(item T) {
 23 | 	set[item] = struct{}{}
 24 | }
 25 | 
 26 | func (set Set[T]) Contains(item T) bool {
 27 | 	_, ok := set[item]
 28 | 	return ok
 29 | }
 30 | 
 31 | func (set Set[T]) Values() []T {
 32 | 	values := make([]T, 0, len(set))
 33 | 	for val := range set {
 34 | 		values = append(values, val)
 35 | 	}
 36 | 
 37 | 	return values
 38 | }
 39 | 
 40 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 41 | 
 42 | type IcebergSchemaTable struct {
 43 | 	Schema string
 44 | 	Table  string
 45 | }
 46 | 
 47 | func (schemaTable IcebergSchemaTable) String() string {
 48 | 	return fmt.Sprintf(`"%s"."%s"`, schemaTable.Schema, schemaTable.Table)
 49 | }
 50 | 
 51 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 52 | 
 53 | type IcebergTableField struct {
 54 | 	Name     string
 55 | 	Type     string
 56 | 	Required bool
 57 | 	IsList   bool
 58 | }
 59 | 
 60 | func (tableField IcebergTableField) ToSql() string {
 61 | 	sql := fmt.Sprintf(`"%s" %s`, tableField.Name, tableField.Type)
 62 | 
 63 | 	if tableField.IsList {
 64 | 		sql += "[]"
 65 | 	}
 66 | 
 67 | 	if tableField.Required {
 68 | 		sql += " NOT NULL"
 69 | 	}
 70 | 
 71 | 	return sql
 72 | }
 73 | 
 74 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 75 | 
 76 | type QuerySchemaTable struct {
 77 | 	Schema string
 78 | 	Table  string
 79 | 	Alias  string
 80 | }
 81 | 
 82 | func NewQuerySchemaTableFromString(schemaTable string) QuerySchemaTable {
 83 | 	parts := strings.Split(schemaTable, ".")
 84 | 
 85 | 	qSchemaTable := QuerySchemaTable{
 86 | 		Table: parts[len(parts)-1],
 87 | 	}
 88 | 	if len(parts) > 1 {
 89 | 		qSchemaTable.Schema = parts[0]
 90 | 	}
 91 | 
 92 | 	if !StringContainsUpper(qSchemaTable.Schema) {
 93 | 		qSchemaTable.Schema = strings.ReplaceAll(qSchemaTable.Schema, "\"", "")
 94 | 	}
 95 | 	if !StringContainsUpper(qSchemaTable.Table) {
 96 | 		qSchemaTable.Table = strings.ReplaceAll(qSchemaTable.Table, "\"", "")
 97 | 	}
 98 | 
 99 | 	return qSchemaTable
100 | }
101 | 
102 | func (qSchemaTable QuerySchemaTable) ToIcebergSchemaTable() IcebergSchemaTable {
103 | 	if qSchemaTable.Schema == "" {
104 | 		return IcebergSchemaTable{
105 | 			Schema: PG_SCHEMA_PUBLIC,
106 | 			Table:  qSchemaTable.Table,
107 | 		}
108 | 	}
109 | 
110 | 	return IcebergSchemaTable{
111 | 		Schema: qSchemaTable.Schema,
112 | 		Table:  qSchemaTable.Table,
113 | 	}
114 | }
115 | 
116 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
117 | 
118 | type QuerySchemaFunction struct {
119 | 	Schema   string
120 | 	Function string
121 | }
122 | 
123 | ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
124 | 
125 | type PgSchemaTable struct {
126 | 	Schema                 string
127 | 	Table                  string
128 | 	ParentPartitionedTable string
129 | }
130 | 
131 | func (pgSchemaTable PgSchemaTable) String() string {
132 | 	return fmt.Sprintf(`"%s"."%s"`, pgSchemaTable.Schema, pgSchemaTable.Table)
133 | }
134 | 
135 | func (pgSchemaTable PgSchemaTable) ParentPartitionedTableString() string {
136 | 	if pgSchemaTable.ParentPartitionedTable == "" {
137 | 		return ""
138 | 	}
139 | 
140 | 	return fmt.Sprintf(`"%s"."%s"`, pgSchemaTable.Schema, pgSchemaTable.ParentPartitionedTable)
141 | }
142 | 
143 | func (pgSchemaTable PgSchemaTable) ToConfigArg() string {
144 | 	return fmt.Sprintf(`%s.%s`, pgSchemaTable.Schema, pgSchemaTable.Table)
145 | }
146 | 
147 | func (pgSchemaTable PgSchemaTable) ToIcebergSchemaTable() IcebergSchemaTable {
148 | 	return IcebergSchemaTable{
149 | 		Schema: pgSchemaTable.Schema,
150 | 		Table:  pgSchemaTable.Table,
151 | 	}
152 | }
153 | 


--------------------------------------------------------------------------------
/src/duckdb.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"context"
  6 | 	"database/sql"
  7 | 	"io"
  8 | 	"regexp"
  9 | 	"slices"
 10 | 	"strings"
 11 | 	"time"
 12 | 
 13 | 	_ "github.com/marcboeker/go-duckdb"
 14 | )
 15 | 
 16 | const (
 17 | 	DUCKDB_SCHEMA_MAIN                        = "main"
 18 | 	REFRESH_IMPLICIT_AWS_CREDENTIALS_INTERVAL = 10 * time.Minute
 19 | )
 20 | 
 21 | var DUCKDB_INIT_BOOT_QUERIES = []string{
 22 | 	// Set up Iceberg
 23 | 	"INSTALL iceberg",
 24 | 	"LOAD iceberg",
 25 | 
 26 | 	// Set up schemas
 27 | 	"SELECT oid FROM pg_catalog.pg_namespace",
 28 | 	"CREATE SCHEMA public",
 29 | 
 30 | 	// Configure DuckDB
 31 | 	"SET scalar_subquery_error_on_multiple_rows=false",
 32 | 	"SET timezone='UTC'",
 33 | }
 34 | 
 35 | type Duckdb struct {
 36 | 	db                                    *sql.DB
 37 | 	config                                *Config
 38 | 	stopImplicitAwsCredentialsRefreshChan chan struct{}
 39 | }
 40 | 
 41 | func NewDuckdb(config *Config, withPgCompatibility bool) *Duckdb {
 42 | 	ctx := context.Background()
 43 | 	db, err := sql.Open("duckdb", "")
 44 | 	PanicIfError(config, err)
 45 | 
 46 | 	duckdb := &Duckdb{
 47 | 		db:                                    db,
 48 | 		config:                                config,
 49 | 		stopImplicitAwsCredentialsRefreshChan: make(chan struct{}),
 50 | 	}
 51 | 
 52 | 	bootQueries := []string{}
 53 | 	if withPgCompatibility {
 54 | 		bootQueries = slices.Concat(
 55 | 			// Set up DuckDB
 56 | 			DUCKDB_INIT_BOOT_QUERIES,
 57 | 
 58 | 			// Create pg-compatible functions
 59 | 			CreatePgCatalogMacroQueries(config),
 60 | 			CreateInformationSchemaMacroQueries(config),
 61 | 
 62 | 			// Create pg-compatible tables and views
 63 | 			CreatePgCatalogTableQueries(config),
 64 | 			CreateInformationSchemaTableQueries(config),
 65 | 
 66 | 			// Use the public schema
 67 | 			[]string{"USE public"},
 68 | 		)
 69 | 	}
 70 | 
 71 | 	for _, query := range bootQueries {
 72 | 		_, err := duckdb.ExecContext(ctx, query, nil)
 73 | 		PanicIfError(config, err)
 74 | 	}
 75 | 
 76 | 	switch config.StorageType {
 77 | 	case STORAGE_TYPE_S3:
 78 | 		if duckdb.config.Aws.AccessKeyId != "" && duckdb.config.Aws.SecretAccessKey != "" {
 79 | 			duckdb.setExplicitAwsCredentials(ctx)
 80 | 		} else {
 81 | 			duckdb.setImplicitAwsCredentials(ctx)
 82 | 			duckdb.autoRefreshImplicitAwsCredentials(ctx)
 83 | 		}
 84 | 
 85 | 		if IsLocalHost(config.Aws.S3Endpoint) {
 86 | 			_, err = duckdb.ExecContext(ctx, "SET s3_use_ssl=false", nil)
 87 | 			PanicIfError(config, err)
 88 | 		}
 89 | 
 90 | 		if config.Aws.S3Endpoint != DEFAULT_AWS_S3_ENDPOINT {
 91 | 			// Use endpoint/bucket/key (path, deprecated on AWS) instead of bucket.endpoint/key (vhost)
 92 | 			_, err = duckdb.ExecContext(ctx, "SET s3_url_style='path'", nil)
 93 | 			PanicIfError(config, err)
 94 | 		}
 95 | 
 96 | 		if config.LogLevel == LOG_LEVEL_TRACE {
 97 | 			_, err = duckdb.ExecContext(ctx, "SET enable_http_logging=true", nil)
 98 | 			PanicIfError(config, err)
 99 | 		}
100 | 	}
101 | 
102 | 	return duckdb
103 | }
104 | 
105 | func (duckdb *Duckdb) ExecContext(ctx context.Context, query string, args map[string]string) (sql.Result, error) {
106 | 	LogDebug(duckdb.config, "Querying DuckDB:", query)
107 | 	return duckdb.db.ExecContext(ctx, replaceNamedStringArgs(query, args))
108 | }
109 | 
110 | func (duckdb *Duckdb) QueryContext(ctx context.Context, query string) (*sql.Rows, error) {
111 | 	LogDebug(duckdb.config, "Querying DuckDB:", query)
112 | 	return duckdb.db.QueryContext(ctx, query)
113 | }
114 | 
115 | func (duckdb *Duckdb) PrepareContext(ctx context.Context, query string) (*sql.Stmt, error) {
116 | 	LogDebug(duckdb.config, "Preparing DuckDB statement:", query)
117 | 	return duckdb.db.PrepareContext(ctx, query)
118 | }
119 | 
120 | func (duckdb *Duckdb) Close() {
121 | 	close(duckdb.stopImplicitAwsCredentialsRefreshChan)
122 | 	duckdb.db.Close()
123 | }
124 | 
125 | func (duckdb *Duckdb) ExecTransactionContext(ctx context.Context, queries []string) error {
126 | 	tx, err := duckdb.db.Begin()
127 | 	LogDebug(duckdb.config, "Querying DuckDB: BEGIN")
128 | 	if err != nil {
129 | 		return err
130 | 	}
131 | 
132 | 	for _, query := range queries {
133 | 		LogDebug(duckdb.config, "Querying DuckDB:", query)
134 | 		_, err := tx.ExecContext(ctx, query)
135 | 		if err != nil {
136 | 			tx.Rollback()
137 | 			return err
138 | 		}
139 | 	}
140 | 
141 | 	LogDebug(duckdb.config, "Querying DuckDB: COMMIT")
142 | 	return tx.Commit()
143 | }
144 | 
145 | func (duckdb *Duckdb) ExecFile(reader io.ReadCloser) {
146 | 	defer reader.Close()
147 | 
148 | 	lines := []string{}
149 | 	scanner := bufio.NewScanner(reader)
150 | 	for scanner.Scan() {
151 | 		lines = append(lines, scanner.Text())
152 | 	}
153 | 	PanicIfError(duckdb.config, scanner.Err())
154 | 
155 | 	ctx := context.Background()
156 | 	for _, sql := range lines {
157 | 		_, err := duckdb.ExecContext(ctx, sql, nil)
158 | 		PanicIfError(duckdb.config, err)
159 | 	}
160 | }
161 | 
162 | func (duckdb *Duckdb) setExplicitAwsCredentials(ctx context.Context) {
163 | 	config := duckdb.config
164 | 	query := "CREATE OR REPLACE SECRET aws_s3_secret (TYPE S3, KEY_ID '$accessKeyId', SECRET '$secretAccessKey', REGION '$region', ENDPOINT '$endpoint', SCOPE '$s3Bucket')"
165 | 	_, err := duckdb.ExecContext(ctx, query, map[string]string{
166 | 		"accessKeyId":     config.Aws.AccessKeyId,
167 | 		"secretAccessKey": config.Aws.SecretAccessKey,
168 | 		"region":          config.Aws.Region,
169 | 		"endpoint":        config.Aws.S3Endpoint,
170 | 		"s3Bucket":        "s3://" + config.Aws.S3Bucket,
171 | 	})
172 | 	PanicIfError(config, err)
173 | }
174 | 
175 | func (duckdb *Duckdb) setImplicitAwsCredentials(ctx context.Context) {
176 | 	config := duckdb.config
177 | 	query := "CREATE OR REPLACE SECRET aws_s3_secret (TYPE S3, PROVIDER CREDENTIAL_CHAIN, REGION '$region', ENDPOINT '$endpoint', SCOPE '$s3Bucket')"
178 | 	_, err := duckdb.ExecContext(ctx, query, map[string]string{
179 | 		"region":   config.Aws.Region,
180 | 		"endpoint": config.Aws.S3Endpoint,
181 | 		"s3Bucket": "s3://" + config.Aws.S3Bucket,
182 | 	})
183 | 	PanicIfError(config, err)
184 | }
185 | 
186 | func (duckdb *Duckdb) autoRefreshImplicitAwsCredentials(ctx context.Context) {
187 | 	ticker := time.NewTicker(REFRESH_IMPLICIT_AWS_CREDENTIALS_INTERVAL)
188 | 	go func() {
189 | 		for {
190 | 			select {
191 | 			case <-ticker.C:
192 | 				duckdb.setImplicitAwsCredentials(ctx)
193 | 			case <-duckdb.stopImplicitAwsCredentialsRefreshChan:
194 | 				ticker.Stop()
195 | 				return
196 | 			}
197 | 		}
198 | 	}()
199 | }
200 | 
201 | func replaceNamedStringArgs(query string, args map[string]string) string {
202 | 	re := regexp.MustCompile(`['";]`) // Escape single quotes, double quotes, and semicolons from args
203 | 
204 | 	for key, value := range args {
205 | 		query = strings.ReplaceAll(query, "$"+key, re.ReplaceAllString(value, ""))
206 | 	}
207 | 	return query
208 | }
209 | 


--------------------------------------------------------------------------------
/src/duckdb_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"io"
 6 | 	"strings"
 7 | 	"testing"
 8 | )
 9 | 
10 | func TestNewDuckdb(t *testing.T) {
11 | 	t.Run("Creates a new DuckDB instance", func(t *testing.T) {
12 | 		config := loadTestConfig()
13 | 
14 | 		duckdb := NewDuckdb(config, false)
15 | 
16 | 		defer duckdb.Close()
17 | 		rows, err := duckdb.QueryContext(context.Background(), "SELECT 1")
18 | 		if err != nil {
19 | 			t.Errorf("Expected query to succeed")
20 | 		}
21 | 		defer rows.Close()
22 | 
23 | 		for rows.Next() {
24 | 			var result int
25 | 			err = rows.Scan(&result)
26 | 			if err != nil {
27 | 				t.Errorf("Expected query to return a result")
28 | 			}
29 | 			if result != 1 {
30 | 				t.Errorf("Expected query result to be 1, got %d", result)
31 | 			}
32 | 		}
33 | 	})
34 | }
35 | 
36 | func TestExecFile(t *testing.T) {
37 | 	t.Run("Executes SQL file", func(t *testing.T) {
38 | 		config := loadTestConfig()
39 | 		duckdb := NewDuckdb(config, false)
40 | 		defer duckdb.Close()
41 | 		fileContent := strings.Join([]string{
42 | 			"CREATE TABLE test (id INTEGER);",
43 | 			"INSERT INTO test VALUES (1);",
44 | 		}, "\n")
45 | 		file := io.NopCloser(strings.NewReader(fileContent))
46 | 
47 | 		duckdb.ExecFile(file)
48 | 
49 | 		rows, err := duckdb.QueryContext(context.Background(), "SELECT COUNT(*) FROM test")
50 | 		if err != nil {
51 | 			t.Errorf("Expected query to succeed")
52 | 		}
53 | 		defer rows.Close()
54 | 		var count int
55 | 		rows.Next()
56 | 		err = rows.Scan(&count)
57 | 		if err != nil {
58 | 			t.Errorf("Expected query to return a result")
59 | 		}
60 | 		if count != 1 {
61 | 			t.Errorf("Expected query result to be 1, got %d", count)
62 | 		}
63 | 	})
64 | }
65 | 


--------------------------------------------------------------------------------
/src/error_utils.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/json"
 6 | 	"errors"
 7 | 	"flag"
 8 | 	"fmt"
 9 | 	"net/http"
10 | 	"net/url"
11 | 	"os"
12 | 	"runtime"
13 | 	"runtime/debug"
14 | 	"strings"
15 | 	"time"
16 | )
17 | 
18 | func PanicIfError(config *Config, err error) {
19 | 	if err != nil {
20 | 		sendAnonymousErrorReport(config, err)
21 | 		printUnexpectedError(config, err)
22 | 		os.Exit(1)
23 | 	}
24 | }
25 | 
26 | func Panic(config *Config, message string) {
27 | 	err := errors.New(message)
28 | 	PanicIfError(config, err)
29 | }
30 | 
31 | func PrintErrorAndExit(config *Config, message string) {
32 | 	LogError(config, message+"\n")
33 | 	os.Exit(1)
34 | }
35 | 
36 | func HandleUnexpectedError(config *Config, err error) {
37 | 	sendAnonymousErrorReport(config, err)
38 | 	printUnexpectedError(config, err)
39 | 	os.Exit(1)
40 | }
41 | 
42 | func printUnexpectedError(config *Config, err error) {
43 | 	errorMessage := err.Error()
44 | 	stackTrace := string(debug.Stack())
45 | 
46 | 	title := "Unexpected error: " + strings.Split(errorMessage, "\n")[0]
47 | 	body := "* Version: " + VERSION +
48 | 		"\n* OS: " + runtime.GOOS + "-" + runtime.GOARCH +
49 | 		"\n\n```\n" + errorMessage + "\n\n" + stackTrace + "\n```"
50 | 
51 | 	fmt.Println("Unexpected error:", errorMessage)
52 | 	fmt.Println(stackTrace)
53 | 	fmt.Println("________________________________________________________________________________")
54 | 	fmt.Println("\nPlease submit a new issue by simply visiting the following link:")
55 | 	fmt.Println(
56 | 		"https://github.com/BemiHQ/BemiDB/issues/new?title=" +
57 | 			url.QueryEscape(title) +
58 | 			"&body=" +
59 | 			url.QueryEscape(body),
60 | 	)
61 | 	fmt.Println("\nAlternatively, send us an email at hi@bemidb.com")
62 | }
63 | 
64 | type AnonymousErrorData struct {
65 | 	Command    string `json:"command"`
66 | 	OsName     string `json:"osName"`
67 | 	Version    string `json:"version"`
68 | 	Error      string `json:"error"`
69 | 	StackTrace string `json:"stackTrace"`
70 | 	PgHost     string `json:"pgHost"`
71 | }
72 | 
73 | func sendAnonymousErrorReport(config *Config, err error) {
74 | 	if config.DisableAnonymousAnalytics {
75 | 		return
76 | 	}
77 | 
78 | 	data := AnonymousErrorData{
79 | 		Command:    flag.Arg(0),
80 | 		OsName:     runtime.GOOS + "-" + runtime.GOARCH,
81 | 		Version:    VERSION,
82 | 		Error:      err.Error(),
83 | 		StackTrace: string(debug.Stack()),
84 | 		PgHost:     ParseDatabaseHost(config.Pg.DatabaseUrl),
85 | 	}
86 | 
87 | 	jsonData, err := json.Marshal(data)
88 | 	if err != nil {
89 | 		return
90 | 	}
91 | 
92 | 	client := http.Client{Timeout: 5 * time.Second}
93 | 	_, _ = client.Post("https://api.bemidb.com/api/errors", "application/json", bytes.NewBuffer(jsonData))
94 | }
95 | 


--------------------------------------------------------------------------------
/src/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/BemiHQ/BemiDB
 2 | 
 3 | go 1.24.3
 4 | 
 5 | require (
 6 | 	github.com/aws/aws-sdk-go-v2 v1.32.3
 7 | 	github.com/aws/aws-sdk-go-v2/config v1.28.1
 8 | 	github.com/aws/aws-sdk-go-v2/credentials v1.17.42
 9 | 	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.35
10 | 	github.com/aws/aws-sdk-go-v2/service/s3 v1.66.2
11 | 	github.com/google/uuid v1.6.0
12 | 	github.com/jackc/pgx/v5 v5.7.2
13 | 	github.com/linkedin/goavro v2.1.0+incompatible
14 | 	github.com/marcboeker/go-duckdb v1.8.3
15 | 	github.com/pganalyze/pg_query_go/v5 v5.1.0
16 | 	github.com/xitongsys/parquet-go v1.6.3-0.20240813051905-693d3323dee0
17 | )
18 | 
19 | require (
20 | 	github.com/xitongsys/parquet-go-source v0.0.0-20241021075129-b732d2ac9c9b
21 | 	golang.org/x/crypto v0.35.0
22 | 	golang.org/x/exp v0.0.0-20240909161429-701f63a606c0
23 | )
24 | 
25 | require (
26 | 	github.com/andybalholm/brotli v1.1.1 // indirect
27 | 	github.com/apache/arrow-go/v18 v18.0.0 // indirect
28 | 	github.com/apache/arrow/go/v12 v12.0.1 // indirect
29 | 	github.com/apache/thrift v0.21.0 // indirect
30 | 	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.6 // indirect
31 | 	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.18 // indirect
32 | 	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.22 // indirect
33 | 	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.22 // indirect
34 | 	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect
35 | 	github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.22 // indirect
36 | 	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0 // indirect
37 | 	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.3 // indirect
38 | 	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.3 // indirect
39 | 	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.3 // indirect
40 | 	github.com/aws/aws-sdk-go-v2/service/sso v1.24.3 // indirect
41 | 	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.3 // indirect
42 | 	github.com/aws/aws-sdk-go-v2/service/sts v1.32.3 // indirect
43 | 	github.com/aws/smithy-go v1.22.0 // indirect
44 | 	github.com/goccy/go-json v0.10.3 // indirect
45 | 	github.com/goccy/go-reflect v1.2.0 // indirect
46 | 	github.com/golang/snappy v0.0.4 // indirect
47 | 	github.com/google/flatbuffers v24.3.25+incompatible // indirect
48 | 	github.com/jackc/pgpassfile v1.0.0 // indirect
49 | 	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
50 | 	github.com/klauspost/asmfmt v1.3.2 // indirect
51 | 	github.com/klauspost/compress v1.17.11 // indirect
52 | 	github.com/klauspost/cpuid/v2 v2.2.8 // indirect
53 | 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
54 | 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
55 | 	github.com/mitchellh/mapstructure v1.5.0 // indirect
56 | 	github.com/pierrec/lz4/v4 v4.1.21 // indirect
57 | 	github.com/zeebo/xxh3 v1.0.2 // indirect
58 | 	golang.org/x/mod v0.21.0 // indirect
59 | 	golang.org/x/sync v0.11.0 // indirect
60 | 	golang.org/x/sys v0.30.0 // indirect
61 | 	golang.org/x/text v0.22.0 // indirect
62 | 	golang.org/x/tools v0.26.0 // indirect
63 | 	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
64 | 	google.golang.org/protobuf v1.35.1 // indirect
65 | 	gopkg.in/linkedin/goavro.v1 v1.0.5 // indirect
66 | )
67 | 


--------------------------------------------------------------------------------
/src/iceberg_reader.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"io"
 5 | )
 6 | 
 7 | type IcebergReader struct {
 8 | 	config  *Config
 9 | 	storage StorageInterface
10 | }
11 | 
12 | func NewIcebergReader(config *Config) *IcebergReader {
13 | 	storage := NewStorage(config)
14 | 	return &IcebergReader{config: config, storage: storage}
15 | }
16 | 
17 | func (reader *IcebergReader) Schemas() (icebergSchemas []string, err error) {
18 | 	LogDebug(reader.config, "Reading Iceberg schemas...")
19 | 	return reader.storage.IcebergSchemas()
20 | }
21 | 
22 | func (reader *IcebergReader) SchemaTables() (icebergSchemaTables Set[IcebergSchemaTable], err error) {
23 | 	LogDebug(reader.config, "Reading Iceberg tables...")
24 | 	return reader.storage.IcebergSchemaTables()
25 | }
26 | 
27 | func (reader *IcebergReader) TableFields(icebergSchemaTable IcebergSchemaTable) (icebergTableFields []IcebergTableField, err error) {
28 | 	LogDebug(reader.config, "Reading Iceberg table "+icebergSchemaTable.String()+" fields...")
29 | 	return reader.storage.IcebergTableFields(icebergSchemaTable)
30 | }
31 | 
32 | func (reader *IcebergReader) MetadataFilePath(icebergSchemaTable IcebergSchemaTable) string {
33 | 	return reader.storage.IcebergMetadataFilePath(icebergSchemaTable)
34 | }
35 | 
36 | func (reader *IcebergReader) InternalTableMetadata(pgSchemaTable PgSchemaTable) (internalTableMetadata InternalTableMetadata, err error) {
37 | 	LogDebug(reader.config, "Reading internal table metadata for "+pgSchemaTable.String()+"...")
38 | 	return reader.storage.InternalTableMetadata(pgSchemaTable)
39 | }
40 | 
41 | func (reader *IcebergReader) InternalStartSqlFile() io.ReadCloser {
42 | 	return reader.storage.InternalStartSqlFile()
43 | }
44 | 


--------------------------------------------------------------------------------
/src/iceberg_writer.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | type IcebergWriter struct {
 4 | 	config  *Config
 5 | 	storage StorageInterface
 6 | }
 7 | 
 8 | func NewIcebergWriter(config *Config) *IcebergWriter {
 9 | 	storage := NewStorage(config)
10 | 	return &IcebergWriter{config: config, storage: storage}
11 | }
12 | 
13 | func (writer *IcebergWriter) DeleteSchema(icebergSchema string) (err error) {
14 | 	return writer.storage.DeleteSchema(icebergSchema)
15 | }
16 | 
17 | func (writer *IcebergWriter) DeleteSchemaTable(icebergSchemaTable IcebergSchemaTable) (err error) {
18 | 	return writer.storage.DeleteSchemaTable(icebergSchemaTable)
19 | }
20 | 
21 | func (writer *IcebergWriter) WriteInternalStartSqlFile(queries []string) (err error) {
22 | 	return writer.storage.WriteInternalStartSqlFile(queries)
23 | }
24 | 


--------------------------------------------------------------------------------
/src/iceberg_writer_table.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | type IcebergWriterTable struct {
  4 | 	config                     *Config
  5 | 	schemaTable                IcebergSchemaTable
  6 | 	storage                    StorageInterface
  7 | 	pgSchemaColumns            []PgSchemaColumn
  8 | 	dynamicRowCountPerBatch    int
  9 | 	maxParquetPayloadThreshold int
 10 | 	continuedRefresh           bool
 11 | }
 12 | 
 13 | func NewIcebergWriterTable(
 14 | 	config *Config,
 15 | 	schemaTable IcebergSchemaTable,
 16 | 	pgSchemaColumns []PgSchemaColumn,
 17 | 	dynamicRowCountPerBatch int,
 18 | 	maxParquetPayloadThreshold int,
 19 | 	continuedRefresh bool,
 20 | ) *IcebergWriterTable {
 21 | 	return &IcebergWriterTable{
 22 | 		config:                     config,
 23 | 		schemaTable:                schemaTable,
 24 | 		pgSchemaColumns:            pgSchemaColumns,
 25 | 		dynamicRowCountPerBatch:    dynamicRowCountPerBatch,
 26 | 		maxParquetPayloadThreshold: maxParquetPayloadThreshold,
 27 | 		continuedRefresh:           continuedRefresh,
 28 | 		storage:                    NewStorage(config),
 29 | 	}
 30 | }
 31 | 
 32 | func (writer *IcebergWriterTable) Write(loadRows func() ([][]string, InternalTableMetadata)) {
 33 | 	dataDirPath := writer.storage.CreateDataDir(writer.schemaTable)
 34 | 	metadataDirPath := writer.storage.CreateMetadataDir(writer.schemaTable)
 35 | 
 36 | 	var lastSequenceNumber int
 37 | 	newManifestListItemsSortedDesc := []ManifestListItem{}
 38 | 	existingManifestListItemsSortedDesc := []ManifestListItem{}
 39 | 	finalManifestListFilesSortedAsc := []ManifestListFile{}
 40 | 
 41 | 	if writer.continuedRefresh {
 42 | 		existingManifestListFilesSortedAsc, err := writer.storage.ExistingManifestListFiles(metadataDirPath)
 43 | 		PanicIfError(writer.config, err)
 44 | 
 45 | 		existingManifestListItemsSortedDesc, err = writer.storage.ExistingManifestListItems(existingManifestListFilesSortedAsc[len(existingManifestListFilesSortedAsc)-1])
 46 | 		PanicIfError(writer.config, err)
 47 | 
 48 | 		lastSequenceNumber = existingManifestListItemsSortedDesc[0].SequenceNumber
 49 | 		finalManifestListFilesSortedAsc = existingManifestListFilesSortedAsc
 50 | 	}
 51 | 
 52 | 	var firstNewParquetFile ParquetFile
 53 | 	var newParquetCount int
 54 | 	loadMoreRows := true
 55 | 
 56 | 	for loadMoreRows {
 57 | 		newParquetFile, newInternalTableMetadata, err := writer.storage.CreateParquet(
 58 | 			dataDirPath,
 59 | 			writer.pgSchemaColumns,
 60 | 			writer.maxParquetPayloadThreshold,
 61 | 			loadRows,
 62 | 		)
 63 | 		PanicIfError(writer.config, err)
 64 | 
 65 | 		// If Parquet is empty and we are continuing to refresh / process subsequent chunks, delete it, mark the sync as completed and exit (no trailing Parquet files)
 66 | 		if newParquetFile.RecordCount == 0 && (writer.continuedRefresh || newParquetCount > 0) {
 67 | 			err = writer.storage.DeleteParquet(newParquetFile)
 68 | 			PanicIfError(writer.config, err)
 69 | 
 70 | 			err = writer.storage.WriteInternalTableMetadata(metadataDirPath, newInternalTableMetadata)
 71 | 			PanicIfError(writer.config, err)
 72 | 
 73 | 			return
 74 | 		}
 75 | 
 76 | 		newParquetCount++
 77 | 		if firstNewParquetFile.Path == "" {
 78 | 			firstNewParquetFile = newParquetFile
 79 | 		}
 80 | 
 81 | 		if writer.continuedRefresh {
 82 | 			var overwrittenManifestListFilesSortedAsc []ManifestListFile
 83 | 
 84 | 			existingManifestListItemsSortedDesc, overwrittenManifestListFilesSortedAsc, lastSequenceNumber = writer.overwriteExistingFiles(
 85 | 				dataDirPath,
 86 | 				metadataDirPath,
 87 | 				existingManifestListItemsSortedDesc,
 88 | 				newParquetFile,
 89 | 				firstNewParquetFile,
 90 | 				lastSequenceNumber,
 91 | 			)
 92 | 
 93 | 			finalManifestListFilesSortedAsc = append(finalManifestListFilesSortedAsc, overwrittenManifestListFilesSortedAsc...)
 94 | 		}
 95 | 
 96 | 		newManifestFile, err := writer.storage.CreateManifest(metadataDirPath, newParquetFile)
 97 | 		PanicIfError(writer.config, err)
 98 | 
 99 | 		lastSequenceNumber++
100 | 		newManifestListItem := ManifestListItem{SequenceNumber: lastSequenceNumber, ManifestFile: newManifestFile}
101 | 		newManifestListItemsSortedDesc = append([]ManifestListItem{newManifestListItem}, newManifestListItemsSortedDesc...)
102 | 
103 | 		finalManifestListItemsSortedDesc := append(newManifestListItemsSortedDesc, existingManifestListItemsSortedDesc...)
104 | 		newManifestListFile, err := writer.storage.CreateManifestList(metadataDirPath, firstNewParquetFile.Uuid, finalManifestListItemsSortedDesc)
105 | 		PanicIfError(writer.config, err)
106 | 
107 | 		finalManifestListFilesSortedAsc = append(finalManifestListFilesSortedAsc, newManifestListFile)
108 | 		_, err = writer.storage.CreateMetadata(metadataDirPath, writer.pgSchemaColumns, finalManifestListFilesSortedAsc)
109 | 		PanicIfError(writer.config, err)
110 | 
111 | 		err = writer.storage.WriteInternalTableMetadata(metadataDirPath, newInternalTableMetadata)
112 | 		PanicIfError(writer.config, err)
113 | 
114 | 		loadMoreRows = newInternalTableMetadata.IsInProgress()
115 | 		LogDebug(writer.config, "Written", newParquetCount, "Parquet file(s). Load more rows:", loadMoreRows)
116 | 	}
117 | }
118 | 
119 | func (writer *IcebergWriterTable) overwriteExistingFiles(
120 | 	dataDirPath string,
121 | 	metadataDirPath string,
122 | 	originalExistingManifestListItemsSortedDesc []ManifestListItem,
123 | 	newParquetFile ParquetFile,
124 | 	firstNewParquetFile ParquetFile,
125 | 	originalLastSequenceNumber int,
126 | ) (existingManifestListItemsSortedDesc []ManifestListItem, overwrittenManifestListFilesSortedAsc []ManifestListFile, lastSequenceNumber int) {
127 | 	originalExistingManifestListItemsSortedAsc := Reverse(originalExistingManifestListItemsSortedDesc)
128 | 	lastSequenceNumber = originalLastSequenceNumber
129 | 
130 | 	for i, existingManifestListItem := range originalExistingManifestListItemsSortedAsc {
131 | 		existingManifestFile := existingManifestListItem.ManifestFile
132 | 		existingParquetFilePath, err := writer.storage.ExistingParquetFilePath(existingManifestFile)
133 | 		PanicIfError(writer.config, err)
134 | 
135 | 		overwrittenParquetFile, err := writer.storage.CreateOverwrittenParquet(dataDirPath, existingParquetFilePath, newParquetFile.Path, writer.pgSchemaColumns, writer.dynamicRowCountPerBatch)
136 | 		PanicIfError(writer.config, err)
137 | 
138 | 		// Keep as is if no overlapping records found
139 | 		if overwrittenParquetFile.Path == "" {
140 | 			LogDebug(writer.config, "No overlapping records found")
141 | 			existingManifestListItemsSortedDesc = append([]ManifestListItem{existingManifestListItem}, existingManifestListItemsSortedDesc...)
142 | 			continue
143 | 		}
144 | 
145 | 		if overwrittenParquetFile.RecordCount == 0 {
146 | 			// DELETE
147 | 			LogDebug(writer.config, "Deleting", existingManifestFile.RecordCount, "record(s)...")
148 | 
149 | 			deletedRecsManifestFile, err := writer.storage.CreateDeletedRecordsManifest(metadataDirPath, overwrittenParquetFile.Uuid, existingManifestFile)
150 | 			PanicIfError(writer.config, err)
151 | 
152 | 			// Constructing a new manifest list without the previous manifest file and with the new "deleted" manifest file
153 | 			finalManifestListItemsSortedAsc := []ManifestListItem{}
154 | 			for j, existingItem := range originalExistingManifestListItemsSortedAsc {
155 | 				if i != j {
156 | 					finalManifestListItemsSortedAsc = append(finalManifestListItemsSortedAsc, existingItem)
157 | 				}
158 | 			}
159 | 			lastSequenceNumber++
160 | 			overwrittenManifestListItem := ManifestListItem{SequenceNumber: lastSequenceNumber, ManifestFile: deletedRecsManifestFile}
161 | 			finalManifestListItemsSortedAsc = append(finalManifestListItemsSortedAsc, overwrittenManifestListItem)
162 | 
163 | 			overwrittenManifestList, err := writer.storage.CreateManifestList(metadataDirPath, firstNewParquetFile.Uuid, Reverse(finalManifestListItemsSortedAsc))
164 | 			PanicIfError(writer.config, err)
165 | 			overwrittenManifestListFilesSortedAsc = append(overwrittenManifestListFilesSortedAsc, overwrittenManifestList)
166 | 			continue
167 | 		} else {
168 | 			// UPDATE (overwrite)
169 | 			LogDebug(writer.config, "Overwritting", existingManifestFile.RecordCount, "record(s) with", overwrittenParquetFile.RecordCount, "record(s)...")
170 | 
171 | 			deletedRecsManifestFile, err := writer.storage.CreateDeletedRecordsManifest(metadataDirPath, overwrittenParquetFile.Uuid, existingManifestFile)
172 | 			PanicIfError(writer.config, err)
173 | 
174 | 			overwrittenManifestFile, err := writer.storage.CreateManifest(metadataDirPath, overwrittenParquetFile)
175 | 			PanicIfError(writer.config, err)
176 | 
177 | 			lastSequenceNumber++
178 | 			overwrittenManifestListItem := ManifestListItem{SequenceNumber: lastSequenceNumber, ManifestFile: overwrittenManifestFile}
179 | 			deletedRecsManifestListItem := ManifestListItem{SequenceNumber: lastSequenceNumber, ManifestFile: deletedRecsManifestFile}
180 | 			overwrittenManifestList, err := writer.storage.CreateManifestList(metadataDirPath, firstNewParquetFile.Uuid, []ManifestListItem{overwrittenManifestListItem, deletedRecsManifestListItem})
181 | 			PanicIfError(writer.config, err)
182 | 
183 | 			existingManifestListItemsSortedDesc = append([]ManifestListItem{overwrittenManifestListItem}, existingManifestListItemsSortedDesc...)
184 | 			overwrittenManifestListFilesSortedAsc = append(overwrittenManifestListFilesSortedAsc, overwrittenManifestList)
185 | 		}
186 | 	}
187 | 
188 | 	return existingManifestListItemsSortedDesc, overwrittenManifestListFilesSortedAsc, lastSequenceNumber
189 | }
190 | 


--------------------------------------------------------------------------------
/src/logger.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"log"
 5 | )
 6 | 
 7 | type LogLevel string
 8 | 
 9 | const (
10 | 	LOG_LEVEL_TRACE = "TRACE"
11 | 	LOG_LEVEL_DEBUG = "DEBUG"
12 | 	LOG_LEVEL_WARN  = "WARN"
13 | 	LOG_LEVEL_INFO  = "INFO"
14 | 	LOG_LEVEL_ERROR = "ERROR"
15 | )
16 | 
17 | var LOG_LEVELS = []string{
18 | 	LOG_LEVEL_TRACE,
19 | 	LOG_LEVEL_DEBUG,
20 | 	LOG_LEVEL_WARN,
21 | 	LOG_LEVEL_INFO,
22 | 	LOG_LEVEL_ERROR,
23 | }
24 | 
25 | func LogError(config *Config, message ...interface{}) {
26 | 	log.Println(append([]interface{}{"[ERROR]"}, message...)...)
27 | }
28 | 
29 | func LogWarn(config *Config, message ...interface{}) {
30 | 	if config.LogLevel != LOG_LEVEL_ERROR {
31 | 		log.Println(append([]interface{}{"[WARN]"}, message...)...)
32 | 	}
33 | }
34 | 
35 | func LogInfo(config *Config, message ...interface{}) {
36 | 	if config.LogLevel != LOG_LEVEL_ERROR && config.LogLevel != LOG_LEVEL_WARN {
37 | 		log.Println(append([]interface{}{"[INFO]"}, message...)...)
38 | 	}
39 | }
40 | 
41 | func LogDebug(config *Config, message ...interface{}) {
42 | 	if config.LogLevel == LOG_LEVEL_DEBUG || config.LogLevel == LOG_LEVEL_TRACE {
43 | 		log.Println(append([]interface{}{"[DEBUG]"}, message...)...)
44 | 	}
45 | }
46 | 
47 | func LogTrace(config *Config, message ...interface{}) {
48 | 	if config.LogLevel == LOG_LEVEL_TRACE {
49 | 		log.Println(append([]interface{}{"[TRACE]"}, message...)...)
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"net/http"
  8 | 	_ "net/http/pprof"
  9 | 	"time"
 10 | )
 11 | 
 12 | const (
 13 | 	COMMAND_START   = "start"
 14 | 	COMMAND_SYNC    = "sync"
 15 | 	COMMAND_VERSION = "version"
 16 | )
 17 | 
 18 | func main() {
 19 | 	config := LoadConfig()
 20 | 	defer handlePanic(config)
 21 | 
 22 | 	if config.LogLevel == LOG_LEVEL_TRACE {
 23 | 		go enableProfiling()
 24 | 	}
 25 | 
 26 | 	command := flag.Arg(0)
 27 | 	if len(flag.Args()) == 0 {
 28 | 		command = COMMAND_START
 29 | 	}
 30 | 
 31 | 	switch command {
 32 | 	case COMMAND_START:
 33 | 		LogDebug(config, "Starting BemiDB v"+VERSION)
 34 | 		start(config)
 35 | 	case COMMAND_SYNC:
 36 | 		LogDebug(config, "Syncing with BemiDB v"+VERSION)
 37 | 		if config.Pg.SyncInterval != "" {
 38 | 			duration, err := time.ParseDuration(config.Pg.SyncInterval)
 39 | 			if err != nil {
 40 | 				PrintErrorAndExit(config, "Invalid interval format: "+config.Pg.SyncInterval+".\n\n"+
 41 | 					"Supported formats: 1h, 20m, 30s.\n"+
 42 | 					"See https://github.com/BemiHQ/BemiDB#sync-command-options for more information.",
 43 | 				)
 44 | 
 45 | 			}
 46 | 			LogInfo(config, "Starting sync loop with interval:", config.Pg.SyncInterval)
 47 | 			for {
 48 | 				syncFromPg(config)
 49 | 				LogInfo(config, "Sleeping for", config.Pg.SyncInterval)
 50 | 				time.Sleep(duration)
 51 | 			}
 52 | 		} else {
 53 | 			syncFromPg(config)
 54 | 		}
 55 | 	case COMMAND_VERSION:
 56 | 		fmt.Println("BemiDB version:", VERSION)
 57 | 	default:
 58 | 		PrintErrorAndExit(config, "Unknown command: "+command+".\n\n"+
 59 | 			"Supported commands: "+COMMAND_START+", "+COMMAND_SYNC+", "+COMMAND_VERSION+".\n"+
 60 | 			"See https://github.com/BemiHQ/BemiDB#quickstart for more information.",
 61 | 		)
 62 | 	}
 63 | }
 64 | 
 65 | func start(config *Config) {
 66 | 	tcpListener := NewTcpListener(config)
 67 | 	LogInfo(config, "BemiDB: Listening on", tcpListener.Addr())
 68 | 
 69 | 	duckdb := NewDuckdb(config, true)
 70 | 	LogInfo(config, "DuckDB: Connected")
 71 | 	defer duckdb.Close()
 72 | 
 73 | 	icebergReader := NewIcebergReader(config)
 74 | 	duckdb.ExecFile(icebergReader.InternalStartSqlFile())
 75 | 
 76 | 	queryHandler := NewQueryHandler(config, duckdb, icebergReader)
 77 | 
 78 | 	for {
 79 | 		conn := AcceptConnection(config, tcpListener)
 80 | 		LogInfo(config, "BemiDB: Accepted connection from", conn.RemoteAddr())
 81 | 		postgres := NewPostgres(config, &conn)
 82 | 
 83 | 		go func() {
 84 | 			postgres.Run(queryHandler)
 85 | 			defer postgres.Close()
 86 | 			LogInfo(config, "BemiDB: Closed connection from", conn.RemoteAddr())
 87 | 		}()
 88 | 	}
 89 | }
 90 | 
 91 | func syncFromPg(config *Config) {
 92 | 	syncer := NewSyncer(config)
 93 | 	syncer.SyncFromPostgres()
 94 | 	LogInfo(config, "Sync from PostgreSQL completed successfully.")
 95 | }
 96 | 
 97 | func enableProfiling() {
 98 | 	func() { log.Println(http.ListenAndServe(":6060", nil)) }()
 99 | }
100 | 
101 | func handlePanic(config *Config) {
102 | 	func() {
103 | 		if r := recover(); r != nil {
104 | 			err, _ := r.(error)
105 | 			HandleUnexpectedError(config, err)
106 | 		}
107 | 	}()
108 | }
109 | 


--------------------------------------------------------------------------------
/src/parser_a_expr.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 
 6 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 7 | )
 8 | 
 9 | type ParserAExpr struct {
10 | 	config *Config
11 | 	utils  *ParserUtils
12 | }
13 | 
14 | func NewParserAExpr(config *Config) *ParserAExpr {
15 | 	return &ParserAExpr{
16 | 		config: config,
17 | 		utils:  NewParserUtils(config),
18 | 	}
19 | }
20 | 
21 | func (parser *ParserAExpr) AExpr(node *pgQuery.Node) *pgQuery.A_Expr {
22 | 	return node.GetAExpr()
23 | }
24 | 
25 | // = ANY({schema_information}) -> IN (schema_information)
26 | func (parser *ParserAExpr) ConvertedRightAnyToIn(node *pgQuery.Node) *pgQuery.Node {
27 | 	aExpr := parser.AExpr(node)
28 | 
29 | 	if aExpr.Kind != pgQuery.A_Expr_Kind_AEXPR_OP_ANY {
30 | 		return node
31 | 	}
32 | 
33 | 	if aExpr.Rexpr.GetAConst() == nil {
34 | 		// NOTE: ... = ANY() on non-constants is not fully supported yet
35 | 		return parser.utils.MakeNullNode()
36 | 	}
37 | 
38 | 	arrayStr := aExpr.Rexpr.GetAConst().GetSval().Sval
39 | 	arrayStr = strings.Trim(arrayStr, "{}")
40 | 	values := strings.Split(arrayStr, ",")
41 | 
42 | 	items := make([]*pgQuery.Node, len(values))
43 | 	for i, value := range values {
44 | 		value = strings.Trim(value, " ")
45 | 		items[i] = &pgQuery.Node{
46 | 			Node: &pgQuery.Node_AConst{
47 | 				AConst: &pgQuery.A_Const{
48 | 					Val: &pgQuery.A_Const_Sval{
49 | 						Sval: &pgQuery.String{
50 | 							Sval: value,
51 | 						},
52 | 					},
53 | 					Location: 0,
54 | 				},
55 | 			},
56 | 		}
57 | 	}
58 | 
59 | 	return &pgQuery.Node{
60 | 		Node: &pgQuery.Node_AExpr{
61 | 			AExpr: &pgQuery.A_Expr{
62 | 				Kind:  pgQuery.A_Expr_Kind_AEXPR_IN,
63 | 				Name:  []*pgQuery.Node{{Node: &pgQuery.Node_String_{String_: &pgQuery.String{Sval: "="}}}},
64 | 				Lexpr: aExpr.Lexpr,
65 | 				Rexpr: &pgQuery.Node{
66 | 					Node: &pgQuery.Node_List{
67 | 						List: &pgQuery.List{
68 | 							Items: items,
69 | 						},
70 | 					},
71 | 				},
72 | 				Location: aExpr.Location,
73 | 			},
74 | 		},
75 | 	}
76 | }
77 | 
78 | // pg_catalog.[operator] -> [operator]
79 | func (parser *ParserAExpr) RemovePgCatalog(node *pgQuery.Node) {
80 | 	aExpr := parser.AExpr(node)
81 | 
82 | 	if aExpr == nil || aExpr.Kind != pgQuery.A_Expr_Kind_AEXPR_OP {
83 | 		return
84 | 	}
85 | 
86 | 	if len(aExpr.Name) == 2 && aExpr.Name[0].GetString_().Sval == PG_SCHEMA_PG_CATALOG {
87 | 		aExpr.Name = aExpr.Name[1:] // Remove the first element
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/src/parser_column_ref.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 5 | )
 6 | 
 7 | type ParserColumnRef struct {
 8 | 	config *Config
 9 | }
10 | 
11 | func NewParserColumnRef(config *Config) *ParserColumnRef {
12 | 	return &ParserColumnRef{config: config}
13 | }
14 | 
15 | func (parser *ParserColumnRef) FieldNames(node *pgQuery.Node) []string {
16 | 	columnRef := node.GetColumnRef()
17 | 	if columnRef == nil {
18 | 		return nil
19 | 	}
20 | 
21 | 	fieldNames := make([]string, 0)
22 | 	for _, field := range columnRef.Fields {
23 | 		if field.GetString_() == nil {
24 | 			return nil
25 | 		}
26 | 		fieldNames = append(fieldNames, field.GetString_().Sval)
27 | 	}
28 | 	return fieldNames
29 | }
30 | 
31 | func (parser *ParserColumnRef) SetFields(node *pgQuery.Node, fields []string) {
32 | 	columnRef := node.GetColumnRef()
33 | 
34 | 	columnRef.Fields = make([]*pgQuery.Node, len(fields))
35 | 	for i, field := range fields {
36 | 		columnRef.Fields[i] = pgQuery.MakeStrNode(field)
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/src/parser_function.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 
  6 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
  7 | )
  8 | 
  9 | type ParserFunction struct {
 10 | 	config *Config
 11 | 	utils  *ParserUtils
 12 | }
 13 | 
 14 | func NewParserFunction(config *Config) *ParserFunction {
 15 | 	return &ParserFunction{config: config, utils: NewParserUtils(config)}
 16 | }
 17 | 
 18 | func (parser *ParserFunction) FunctionCall(targetNode *pgQuery.Node) *pgQuery.FuncCall {
 19 | 	return targetNode.GetResTarget().Val.GetFuncCall()
 20 | }
 21 | 
 22 | func (parser *ParserFunction) FirstArgumentToString(functionCall *pgQuery.FuncCall) string {
 23 | 	if len(functionCall.Args) < 1 {
 24 | 		return ""
 25 | 	}
 26 | 	return functionCall.Args[0].GetAConst().GetSval().Sval
 27 | }
 28 | 
 29 | // n from (FUNCTION()).n
 30 | func (parser *ParserFunction) IndirectionName(targetNode *pgQuery.Node) string {
 31 | 	indirection := targetNode.GetResTarget().Val.GetAIndirection()
 32 | 	if indirection != nil {
 33 | 		return indirection.Indirection[0].GetString_().Sval
 34 | 	}
 35 | 
 36 | 	return ""
 37 | }
 38 | 
 39 | func (parser *ParserFunction) NestedFunctionCalls(functionCall *pgQuery.FuncCall) []*pgQuery.FuncCall {
 40 | 	nestedFunctionCalls := []*pgQuery.FuncCall{}
 41 | 
 42 | 	for _, arg := range functionCall.Args {
 43 | 		nestedFunctionCalls = append(nestedFunctionCalls, arg.GetFuncCall())
 44 | 	}
 45 | 
 46 | 	return nestedFunctionCalls
 47 | }
 48 | 
 49 | func (parser *ParserFunction) SchemaFunction(functionCall *pgQuery.FuncCall) *QuerySchemaFunction {
 50 | 	return parser.utils.SchemaFunction(functionCall)
 51 | }
 52 | 
 53 | // pg_catalog.func() -> main.func()
 54 | func (parser *ParserFunction) RemapSchemaToMain(functionCall *pgQuery.FuncCall) *pgQuery.FuncCall {
 55 | 	switch len(functionCall.Funcname) {
 56 | 	case 1:
 57 | 		functionCall.Funcname = append([]*pgQuery.Node{pgQuery.MakeStrNode(DUCKDB_SCHEMA_MAIN)}, functionCall.Funcname...)
 58 | 	case 2:
 59 | 		functionCall.Funcname[0] = pgQuery.MakeStrNode(DUCKDB_SCHEMA_MAIN)
 60 | 	}
 61 | 
 62 | 	return functionCall
 63 | }
 64 | 
 65 | // format('%s %1$s', str) -> printf('%1$s %1$s', str)
 66 | func (parser *ParserFunction) RemapFormatToPrintf(functionCall *pgQuery.FuncCall) *pgQuery.FuncCall {
 67 | 	format := parser.FirstArgumentToString(functionCall)
 68 | 	for i := range functionCall.Args[1:] {
 69 | 		format = strings.Replace(format, "%s", "%"+IntToString(i+1)+"$s", 1)
 70 | 	}
 71 | 
 72 | 	functionCall.Funcname = []*pgQuery.Node{pgQuery.MakeStrNode("printf")}
 73 | 	functionCall.Args[0] = pgQuery.MakeAConstStrNode(format, 0)
 74 | 	return functionCall
 75 | }
 76 | 
 77 | // encode(sha256(...), 'hex') -> sha256(...)
 78 | func (parser *ParserFunction) RemoveEncode(functionCall *pgQuery.FuncCall) {
 79 | 	if len(functionCall.Args) != 2 {
 80 | 		return
 81 | 	}
 82 | 
 83 | 	firstArg := functionCall.Args[0]
 84 | 	nestedFunctionCall := firstArg.GetFuncCall()
 85 | 	schemaFunction := parser.utils.SchemaFunction(nestedFunctionCall)
 86 | 	if schemaFunction.Function != "sha256" {
 87 | 		return
 88 | 	}
 89 | 
 90 | 	secondArg := functionCall.Args[1]
 91 | 	var format string
 92 | 	if secondArg.GetAConst() != nil {
 93 | 		format = secondArg.GetAConst().GetSval().Sval
 94 | 	} else if secondArg.GetTypeCast() != nil {
 95 | 		format = secondArg.GetTypeCast().Arg.GetAConst().GetSval().Sval
 96 | 	}
 97 | 	if format != "hex" {
 98 | 		return
 99 | 	}
100 | 
101 | 	functionCall.Funcname = nestedFunctionCall.Funcname
102 | 	functionCall.Args = nestedFunctionCall.Args
103 | }
104 | 
105 | // to_timestamp(...)
106 | func (parser *ParserFunction) RemapToTimestamp(functionCall *pgQuery.FuncCall, timestamp int64) {
107 | 	functionCall.Funcname = []*pgQuery.Node{pgQuery.MakeStrNode("to_timestamp")}
108 | 
109 | 	if timestamp == 0 {
110 | 		functionCall.Args[0] = parser.utils.MakeNullNode()
111 | 	} else {
112 | 		functionCall.Args[0] = pgQuery.MakeAConstIntNode(timestamp, 0)
113 | 	}
114 | }
115 | 


--------------------------------------------------------------------------------
/src/parser_select.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 5 | )
 6 | 
 7 | type ParserSelect struct {
 8 | 	config *Config
 9 | 	utils  *ParserUtils
10 | }
11 | 
12 | func NewParserSelect(config *Config) *ParserSelect {
13 | 	return &ParserSelect{config: config, utils: NewParserUtils(config)}
14 | }
15 | 
16 | func (parser *ParserSelect) SetDefaultTargetName(targetNode *pgQuery.Node, name string) {
17 | 	target := targetNode.GetResTarget()
18 | 
19 | 	if target.Name == "" {
20 | 		target.Name = name
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/src/parser_show.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 5 | )
 6 | 
 7 | type ParserShow struct {
 8 | 	config *Config
 9 | }
10 | 
11 | func NewParserShow(config *Config) *ParserShow {
12 | 	return &ParserShow{config: config}
13 | }
14 | 
15 | func (parser *ParserShow) VariableName(stmt *pgQuery.RawStmt) string {
16 | 	return stmt.Stmt.GetVariableShowStmt().Name
17 | }
18 | 
19 | // SHOW var -> SELECT value AS var FROM duckdb_settings() WHERE LOWER(name) = 'var';
20 | func (parser *ParserShow) MakeSelectFromDuckdbSettings(variableName string) *pgQuery.RawStmt {
21 | 	return &pgQuery.RawStmt{
22 | 		Stmt: &pgQuery.Node{
23 | 			Node: &pgQuery.Node_SelectStmt{
24 | 				SelectStmt: &pgQuery.SelectStmt{
25 | 					TargetList: []*pgQuery.Node{
26 | 						pgQuery.MakeResTargetNodeWithNameAndVal(
27 | 							variableName,
28 | 							pgQuery.MakeColumnRefNode(
29 | 								[]*pgQuery.Node{pgQuery.MakeStrNode("value")},
30 | 								0,
31 | 							),
32 | 							0,
33 | 						),
34 | 					},
35 | 					FromClause: []*pgQuery.Node{
36 | 						pgQuery.MakeSimpleRangeFunctionNode(
37 | 							[]*pgQuery.Node{
38 | 								pgQuery.MakeListNode(
39 | 									[]*pgQuery.Node{
40 | 										pgQuery.MakeFuncCallNode(
41 | 											[]*pgQuery.Node{pgQuery.MakeStrNode("duckdb_settings")},
42 | 											nil,
43 | 											0,
44 | 										),
45 | 									},
46 | 								),
47 | 							},
48 | 						),
49 | 					},
50 | 					WhereClause: pgQuery.MakeAExprNode(
51 | 						pgQuery.A_Expr_Kind_AEXPR_OP,
52 | 						[]*pgQuery.Node{pgQuery.MakeStrNode("=")},
53 | 						pgQuery.MakeFuncCallNode(
54 | 							[]*pgQuery.Node{pgQuery.MakeStrNode("lower")},
55 | 							[]*pgQuery.Node{
56 | 								pgQuery.MakeColumnRefNode(
57 | 									[]*pgQuery.Node{pgQuery.MakeStrNode("name")},
58 | 									0,
59 | 								),
60 | 							},
61 | 							0,
62 | 						),
63 | 						pgQuery.MakeAConstStrNode(variableName, 0),
64 | 						0,
65 | 					),
66 | 				},
67 | 			},
68 | 		},
69 | 	}
70 | }
71 | 
72 | // SELECT value AS search_path -> SELECT CONCAT('"$user", ', value) AS search_path
73 | func (parser *ParserShow) SetTargetListForSearchPath(stmt *pgQuery.RawStmt) {
74 | 	stmt.Stmt.GetSelectStmt().TargetList = []*pgQuery.Node{
75 | 		pgQuery.MakeResTargetNodeWithNameAndVal(
76 | 			PG_VAR_SEARCH_PATH,
77 | 			pgQuery.MakeFuncCallNode(
78 | 				[]*pgQuery.Node{pgQuery.MakeStrNode("concat")},
79 | 				[]*pgQuery.Node{
80 | 					pgQuery.MakeAConstStrNode(`"$user", `, 0),
81 | 					pgQuery.MakeColumnRefNode(
82 | 						[]*pgQuery.Node{pgQuery.MakeStrNode("value")},
83 | 						0,
84 | 					),
85 | 				},
86 | 				0,
87 | 			),
88 | 			0,
89 | 		),
90 | 	}
91 | }
92 | 


--------------------------------------------------------------------------------
/src/parser_table.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
  5 | )
  6 | 
  7 | type QueryToIcebergTable struct {
  8 | 	QuerySchemaTable QuerySchemaTable
  9 | 	IcebergTablePath string
 10 | }
 11 | 
 12 | type ParserTable struct {
 13 | 	config *Config
 14 | 	utils  *ParserUtils
 15 | }
 16 | 
 17 | func NewParserTable(config *Config) *ParserTable {
 18 | 	return &ParserTable{config: config, utils: NewParserUtils(config)}
 19 | }
 20 | 
 21 | func (parser *ParserTable) NodeToQuerySchemaTable(node *pgQuery.Node) QuerySchemaTable {
 22 | 	rangeVar := node.GetRangeVar()
 23 | 	var alias string
 24 | 
 25 | 	if rangeVar.Alias != nil {
 26 | 		alias = rangeVar.Alias.Aliasname
 27 | 	}
 28 | 
 29 | 	return QuerySchemaTable{
 30 | 		Schema: rangeVar.Schemaname,
 31 | 		Table:  rangeVar.Relname,
 32 | 		Alias:  alias,
 33 | 	}
 34 | }
 35 | 
 36 | func (parser *ParserTable) RemapSchemaToMain(node *pgQuery.Node) {
 37 | 	node.GetRangeVar().Schemaname = DUCKDB_SCHEMA_MAIN
 38 | }
 39 | 
 40 | // Other information_schema.* tables
 41 | func (parser *ParserTable) IsTableFromInformationSchema(qSchemaTable QuerySchemaTable) bool {
 42 | 	return qSchemaTable.Schema == PG_SCHEMA_INFORMATION_SCHEMA
 43 | }
 44 | 
 45 | // public.table -> FROM iceberg_scan('path', skip_schema_inference = true) table
 46 | // schema.table -> FROM iceberg_scan('path', skip_schema_inference = true) schema_table
 47 | func (parser *ParserTable) MakeIcebergTableNode(queryToIcebergTable QueryToIcebergTable) *pgQuery.Node {
 48 | 	node := pgQuery.MakeSimpleRangeFunctionNode([]*pgQuery.Node{
 49 | 		pgQuery.MakeListNode([]*pgQuery.Node{
 50 | 			pgQuery.MakeFuncCallNode(
 51 | 				[]*pgQuery.Node{
 52 | 					pgQuery.MakeStrNode("iceberg_scan"),
 53 | 				},
 54 | 				[]*pgQuery.Node{
 55 | 					pgQuery.MakeAConstStrNode(
 56 | 						queryToIcebergTable.IcebergTablePath,
 57 | 						0,
 58 | 					),
 59 | 					pgQuery.MakeAExprNode(
 60 | 						pgQuery.A_Expr_Kind_AEXPR_OP,
 61 | 						[]*pgQuery.Node{pgQuery.MakeStrNode("=")},
 62 | 						pgQuery.MakeColumnRefNode([]*pgQuery.Node{pgQuery.MakeStrNode("skip_schema_inference")}, 0),
 63 | 						parser.utils.MakeAConstBoolNode(true),
 64 | 						0,
 65 | 					),
 66 | 				},
 67 | 				0,
 68 | 			),
 69 | 		}),
 70 | 	})
 71 | 
 72 | 	// DuckDB doesn't support aliases on iceberg_scan() functions, so we need to wrap it in a nested select that can have an alias
 73 | 	selectStarNode := pgQuery.MakeResTargetNodeWithVal(
 74 | 		pgQuery.MakeColumnRefNode(
 75 | 			[]*pgQuery.Node{pgQuery.MakeAStarNode()},
 76 | 			0,
 77 | 		),
 78 | 		0,
 79 | 	)
 80 | 	return parser.utils.MakeSubselectFromNode(queryToIcebergTable.QuerySchemaTable, []*pgQuery.Node{selectStarNode}, node)
 81 | }
 82 | 
 83 | func (parser *ParserTable) TopLevelSchemaFunction(rangeFunction *pgQuery.RangeFunction) *QuerySchemaFunction {
 84 | 	if len(rangeFunction.Functions) == 0 || len(rangeFunction.Functions[0].GetList().Items) == 0 {
 85 | 		return nil
 86 | 	}
 87 | 
 88 | 	functionNode := rangeFunction.Functions[0].GetList().Items[0]
 89 | 	if functionNode.GetFuncCall() == nil {
 90 | 		return nil // E.g., system PG calls like "... FROM user" => sqlvalue_function:{op:SVFOP_USER}
 91 | 	}
 92 | 
 93 | 	return parser.utils.SchemaFunction(functionNode.GetFuncCall())
 94 | }
 95 | 
 96 | func (parser *ParserTable) TableFunctionCalls(rangeFunction *pgQuery.RangeFunction) []*pgQuery.FuncCall {
 97 | 	functionCalls := []*pgQuery.FuncCall{}
 98 | 
 99 | 	for _, funcNode := range rangeFunction.Functions {
100 | 		for _, funcItemNode := range funcNode.GetList().Items {
101 | 			functionCall := funcItemNode.GetFuncCall()
102 | 			if functionCall != nil {
103 | 				functionCalls = append(functionCalls, functionCall)
104 | 			}
105 | 		}
106 | 	}
107 | 
108 | 	return functionCalls
109 | }
110 | 
111 | func (parser *ParserTable) SetAliasIfNotExists(rangeFunction *pgQuery.RangeFunction, alias string) {
112 | 	if rangeFunction.GetAlias() != nil {
113 | 		return
114 | 	}
115 | 
116 | 	rangeFunction.Alias = &pgQuery.Alias{Aliasname: alias}
117 | }
118 | 


--------------------------------------------------------------------------------
/src/parser_type_cast.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 
  6 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
  7 | )
  8 | 
  9 | type ParserTypeCast struct {
 10 | 	utils  *ParserUtils
 11 | 	config *Config
 12 | }
 13 | 
 14 | func NewParserTypeCast(config *Config) *ParserTypeCast {
 15 | 	return &ParserTypeCast{utils: NewParserUtils(config), config: config}
 16 | }
 17 | 
 18 | func (parser *ParserTypeCast) TypeCast(node *pgQuery.Node) *pgQuery.TypeCast {
 19 | 	if node.GetTypeCast() == nil {
 20 | 		return nil
 21 | 	}
 22 | 
 23 | 	typeCast := node.GetTypeCast()
 24 | 	if len(typeCast.TypeName.Names) == 0 {
 25 | 		return nil
 26 | 	}
 27 | 
 28 | 	return typeCast
 29 | }
 30 | 
 31 | func (parser *ParserTypeCast) TypeName(typeCast *pgQuery.TypeCast) string {
 32 | 	if typeCast == nil {
 33 | 		return ""
 34 | 	}
 35 | 
 36 | 	typeNameNode := typeCast.TypeName
 37 | 	var typeNames []string
 38 | 
 39 | 	for _, name := range typeNameNode.Names {
 40 | 		typeNames = append(typeNames, name.GetString_().Sval)
 41 | 	}
 42 | 
 43 | 	typeName := strings.Join(typeNames, ".")
 44 | 
 45 | 	if typeNameNode.ArrayBounds != nil {
 46 | 		typeName += "[]"
 47 | 	}
 48 | 
 49 | 	return typeName
 50 | }
 51 | 
 52 | func (parser *ParserTypeCast) NestedTypeCast(typeCast *pgQuery.TypeCast) *pgQuery.TypeCast {
 53 | 	return parser.TypeCast(typeCast.Arg)
 54 | }
 55 | 
 56 | // "value" COLLATE pg_catalog.default -> "value"
 57 | func (parser *ParserTypeCast) RemovedDefaultCollateClause(node *pgQuery.Node) *pgQuery.Node {
 58 | 	collname := node.GetCollateClause().Collname
 59 | 
 60 | 	if len(collname) == 2 && collname[0].GetString_().Sval == "pg_catalog" && collname[1].GetString_().Sval == "default" {
 61 | 		return node.GetCollateClause().Arg
 62 | 	}
 63 | 
 64 | 	return node
 65 | }
 66 | 
 67 | func (parser *ParserTypeCast) ArgStringValue(typeCast *pgQuery.TypeCast) string {
 68 | 	return typeCast.Arg.GetAConst().GetSval().Sval
 69 | }
 70 | 
 71 | // pg_catalog.[type] -> [type]
 72 | func (parser *ParserTypeCast) RemovePgCatalog(typeCast *pgQuery.TypeCast) {
 73 | 	if typeCast != nil && len(typeCast.TypeName.Names) == 2 && typeCast.TypeName.Names[0].GetString_().Sval == PG_SCHEMA_PG_CATALOG {
 74 | 		typeCast.TypeName.Names = typeCast.TypeName.Names[1:]
 75 | 	}
 76 | }
 77 | 
 78 | func (parser *ParserTypeCast) SetTypeCastArg(typeCast *pgQuery.TypeCast, arg *pgQuery.Node) {
 79 | 	typeCast.Arg = arg
 80 | }
 81 | 
 82 | func (parser *ParserTypeCast) MakeListValueFromArray(node *pgQuery.Node) *pgQuery.Node {
 83 | 	arrayStr := node.GetAConst().GetSval().Sval
 84 | 	arrayStr = strings.Trim(arrayStr, "{}")
 85 | 	elements := strings.Split(arrayStr, ",")
 86 | 
 87 | 	funcCall := &pgQuery.FuncCall{
 88 | 		Funcname: []*pgQuery.Node{
 89 | 			pgQuery.MakeStrNode("list_value"),
 90 | 		},
 91 | 	}
 92 | 
 93 | 	for _, elem := range elements {
 94 | 		funcCall.Args = append(funcCall.Args,
 95 | 			pgQuery.MakeAConstStrNode(elem, 0))
 96 | 	}
 97 | 
 98 | 	return &pgQuery.Node{
 99 | 		Node: &pgQuery.Node_FuncCall{
100 | 			FuncCall: funcCall,
101 | 		},
102 | 	}
103 | }
104 | 
105 | // SELECT c.oid
106 | // FROM pg_class c
107 | // JOIN pg_namespace n ON n.oid = c.relnamespace
108 | // WHERE n.nspname = 'schema' AND c.relname = 'table'
109 | func (parser *ParserTypeCast) MakeSubselectOidBySchemaTableArg(argumentNode *pgQuery.Node) *pgQuery.Node {
110 | 	targetNode := pgQuery.MakeResTargetNodeWithVal(
111 | 		pgQuery.MakeColumnRefNode([]*pgQuery.Node{
112 | 			pgQuery.MakeStrNode("c"),
113 | 			pgQuery.MakeStrNode("oid"),
114 | 		}, 0),
115 | 		0,
116 | 	)
117 | 
118 | 	joinNode := pgQuery.MakeJoinExprNode(
119 | 		pgQuery.JoinType_JOIN_INNER,
120 | 		pgQuery.MakeFullRangeVarNode("", "pg_class", "c", 0),
121 | 		pgQuery.MakeFullRangeVarNode("", "pg_namespace", "n", 0),
122 | 		pgQuery.MakeAExprNode(
123 | 			pgQuery.A_Expr_Kind_AEXPR_OP,
124 | 			[]*pgQuery.Node{
125 | 				pgQuery.MakeStrNode("="),
126 | 			},
127 | 			pgQuery.MakeColumnRefNode([]*pgQuery.Node{
128 | 				pgQuery.MakeStrNode("n"),
129 | 				pgQuery.MakeStrNode("oid"),
130 | 			}, 0),
131 | 			pgQuery.MakeColumnRefNode([]*pgQuery.Node{
132 | 				pgQuery.MakeStrNode("c"),
133 | 				pgQuery.MakeStrNode("relnamespace"),
134 | 			}, 0),
135 | 			0,
136 | 		),
137 | 	)
138 | 
139 | 	if argumentNode.GetAConst() == nil {
140 | 		// NOTE: ::regclass::oid on non-constants is not fully supported yet
141 | 		return parser.utils.MakeNullNode()
142 | 	}
143 | 
144 | 	value := argumentNode.GetAConst().GetSval().Sval
145 | 	qSchemaTable := NewQuerySchemaTableFromString(value)
146 | 	if qSchemaTable.Schema == "" {
147 | 		qSchemaTable.Schema = PG_SCHEMA_PUBLIC
148 | 	}
149 | 
150 | 	whereNode := pgQuery.MakeBoolExprNode(
151 | 		pgQuery.BoolExprType_AND_EXPR,
152 | 		[]*pgQuery.Node{
153 | 			pgQuery.MakeAExprNode(
154 | 				pgQuery.A_Expr_Kind_AEXPR_OP,
155 | 				[]*pgQuery.Node{
156 | 					pgQuery.MakeStrNode("="),
157 | 				},
158 | 				pgQuery.MakeColumnRefNode([]*pgQuery.Node{
159 | 					pgQuery.MakeStrNode("n"),
160 | 					pgQuery.MakeStrNode("nspname"),
161 | 				}, 0),
162 | 				pgQuery.MakeAConstStrNode(qSchemaTable.Schema, 0),
163 | 				0,
164 | 			),
165 | 			pgQuery.MakeAExprNode(
166 | 				pgQuery.A_Expr_Kind_AEXPR_OP,
167 | 				[]*pgQuery.Node{
168 | 					pgQuery.MakeStrNode("="),
169 | 				},
170 | 				pgQuery.MakeColumnRefNode([]*pgQuery.Node{
171 | 					pgQuery.MakeStrNode("c"),
172 | 					pgQuery.MakeStrNode("relname"),
173 | 				}, 0),
174 | 				pgQuery.MakeAConstStrNode(qSchemaTable.Table, 0),
175 | 				0,
176 | 			),
177 | 		},
178 | 		0,
179 | 	)
180 | 
181 | 	return &pgQuery.Node{
182 | 		Node: &pgQuery.Node_SubLink{
183 | 			SubLink: &pgQuery.SubLink{
184 | 				SubLinkType: pgQuery.SubLinkType_EXPR_SUBLINK,
185 | 				Subselect: &pgQuery.Node{
186 | 					Node: &pgQuery.Node_SelectStmt{
187 | 						SelectStmt: &pgQuery.SelectStmt{
188 | 							TargetList:  []*pgQuery.Node{targetNode},
189 | 							FromClause:  []*pgQuery.Node{joinNode},
190 | 							WhereClause: whereNode,
191 | 						},
192 | 					},
193 | 				},
194 | 			},
195 | 		},
196 | 	}
197 | 
198 | }
199 | 


--------------------------------------------------------------------------------
/src/parser_utils.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 5 | )
 6 | 
 7 | type ParserUtils struct {
 8 | 	config *Config
 9 | }
10 | 
11 | func NewParserUtils(config *Config) *ParserUtils {
12 | 	return &ParserUtils{config: config}
13 | }
14 | 
15 | func (utils *ParserUtils) SchemaFunction(functionCall *pgQuery.FuncCall) *QuerySchemaFunction {
16 | 	switch len(functionCall.Funcname) {
17 | 	case 1:
18 | 		return &QuerySchemaFunction{
19 | 			Schema:   "",
20 | 			Function: functionCall.Funcname[0].GetString_().Sval,
21 | 		}
22 | 	case 2:
23 | 		return &QuerySchemaFunction{
24 | 			Schema:   functionCall.Funcname[0].GetString_().Sval,
25 | 			Function: functionCall.Funcname[1].GetString_().Sval,
26 | 		}
27 | 	default:
28 | 		Panic(utils.config, "Invalid function call")
29 | 		return nil
30 | 	}
31 | }
32 | 
33 | func (utils *ParserUtils) MakeSubselectFromNode(qSchemaTable QuerySchemaTable, targetList []*pgQuery.Node, fromNode *pgQuery.Node) *pgQuery.Node {
34 | 	alias := qSchemaTable.Alias
35 | 	if alias == "" {
36 | 		if qSchemaTable.Schema == PG_SCHEMA_PUBLIC || qSchemaTable.Schema == "" {
37 | 			alias = qSchemaTable.Table
38 | 		} else {
39 | 			alias = qSchemaTable.Schema + "_" + qSchemaTable.Table
40 | 		}
41 | 	}
42 | 
43 | 	return &pgQuery.Node{
44 | 		Node: &pgQuery.Node_RangeSubselect{
45 | 			RangeSubselect: &pgQuery.RangeSubselect{
46 | 				Subquery: &pgQuery.Node{
47 | 					Node: &pgQuery.Node_SelectStmt{
48 | 						SelectStmt: &pgQuery.SelectStmt{
49 | 							TargetList: targetList,
50 | 							FromClause: []*pgQuery.Node{fromNode},
51 | 						},
52 | 					},
53 | 				},
54 | 				Alias: &pgQuery.Alias{
55 | 					Aliasname: alias,
56 | 				},
57 | 			},
58 | 		},
59 | 	}
60 | }
61 | 
62 | func (utils *ParserUtils) MakeAConstBoolNode(val bool) *pgQuery.Node {
63 | 	return &pgQuery.Node{
64 | 		Node: &pgQuery.Node_AConst{
65 | 			AConst: &pgQuery.A_Const{
66 | 				Val: &pgQuery.A_Const_Boolval{
67 | 					Boolval: &pgQuery.Boolean{
68 | 						Boolval: val,
69 | 					},
70 | 				},
71 | 				Isnull:   false,
72 | 				Location: 0,
73 | 			},
74 | 		},
75 | 	}
76 | }
77 | 
78 | func (utils *ParserUtils) MakeNullNode() *pgQuery.Node {
79 | 	return &pgQuery.Node{
80 | 		Node: &pgQuery.Node_AConst{
81 | 			AConst: &pgQuery.A_Const{
82 | 				Isnull: true,
83 | 			},
84 | 		},
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/src/pg_constants.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | const (
  4 | 	PG_SCHEMA_INFORMATION_SCHEMA = "information_schema"
  5 | 	PG_SCHEMA_PG_CATALOG         = "pg_catalog"
  6 | 	PG_SCHEMA_PUBLIC             = "public"
  7 | 
  8 | 	PG_FUNCTION_FORMAT = "format"
  9 | 	PG_FUNCTION_ENCODE = "encode"
 10 | 
 11 | 	PG_TABLE_PG_CLASS            = "pg_class"
 12 | 	PG_TABLE_PG_STAT_USER_TABLES = "pg_stat_user_tables"
 13 | 	PG_TABLE_TABLES              = "tables"
 14 | 
 15 | 	PG_VAR_SEARCH_PATH = "search_path"
 16 | )
 17 | 
 18 | var PG_SYSTEM_TABLES = NewSet([]string{
 19 | 	"pg_aggregate",
 20 | 	"pg_am",
 21 | 	"pg_amop",
 22 | 	"pg_amproc",
 23 | 	"pg_attrdef",
 24 | 	"pg_attribute",
 25 | 	"pg_auth_members",
 26 | 	"pg_authid",
 27 | 	"pg_cast",
 28 | 	"pg_class",
 29 | 	"pg_collation",
 30 | 	"pg_constraint",
 31 | 	"pg_conversion",
 32 | 	"pg_database",
 33 | 	"pg_db_role_setting",
 34 | 	"pg_default_acl",
 35 | 	"pg_depend",
 36 | 	"pg_description",
 37 | 	"pg_enum",
 38 | 	"pg_event_trigger",
 39 | 	"pg_extension",
 40 | 	"pg_foreign_data_wrapper",
 41 | 	"pg_foreign_server",
 42 | 	"pg_foreign_table",
 43 | 	"pg_index",
 44 | 	"pg_inherits",
 45 | 	"pg_init_privs",
 46 | 	"pg_language",
 47 | 	"pg_largeobject",
 48 | 	"pg_largeobject_metadata",
 49 | 	"pg_matviews",
 50 | 	"pg_namespace",
 51 | 	"pg_opclass",
 52 | 	"pg_operator",
 53 | 	"pg_opfamily",
 54 | 	"pg_parameter_acl",
 55 | 	"pg_partitioned_table",
 56 | 	"pg_policy",
 57 | 	"pg_proc",
 58 | 	"pg_publication",
 59 | 	"pg_publication_namespace",
 60 | 	"pg_publication_rel",
 61 | 	"pg_user",
 62 | 	"pg_range",
 63 | 	"pg_replication_origin",
 64 | 	"pg_replication_slots",
 65 | 	"pg_rewrite",
 66 | 	"pg_roles",
 67 | 	"pg_seclabel",
 68 | 	"pg_sequence",
 69 | 	"pg_shadow",
 70 | 	"pg_shdepend",
 71 | 	"pg_shdescription",
 72 | 	"pg_shseclabel",
 73 | 	"pg_statistic",
 74 | 	"pg_statistic_ext",
 75 | 	"pg_statistic_ext_data",
 76 | 	"pg_subscription",
 77 | 	"pg_subscription_rel",
 78 | 	"pg_tablespace",
 79 | 	"pg_transform",
 80 | 	"pg_trigger",
 81 | 	"pg_ts_config",
 82 | 	"pg_ts_config_map",
 83 | 	"pg_ts_dict",
 84 | 	"pg_ts_parser",
 85 | 	"pg_ts_template",
 86 | 	"pg_type",
 87 | 	"pg_user_mapping",
 88 | 	"pg_views",
 89 | })
 90 | 
 91 | var PG_SYSTEM_VIEWS = NewSet([]string{
 92 | 	"pg_stat_activity",
 93 | 	"pg_stat_replication",
 94 | 	"pg_stat_wal_receiver",
 95 | 	"pg_stat_recovery_prefetch",
 96 | 	"pg_stat_subscription",
 97 | 	"pg_stat_ssl",
 98 | 	"pg_stat_gssapi",
 99 | 	"pg_stat_progress_analyze",
100 | 	"pg_stat_progress_create_index",
101 | 	"pg_stat_progress_vacuum",
102 | 	"pg_stat_progress_cluster",
103 | 	"pg_stat_progress_basebackup",
104 | 	"pg_stat_progress_copy",
105 | 	"pg_stat_archiver",
106 | 	"pg_stat_bgwriter",
107 | 	"pg_stat_checkpointer",
108 | 	"pg_stat_database",
109 | 	"pg_stat_database_conflicts",
110 | 	"pg_stat_io",
111 | 	"pg_stat_replication_slots",
112 | 	"pg_stat_slru",
113 | 	"pg_stat_subscription_stats",
114 | 	"pg_stat_wal",
115 | 	"pg_stat_all_tables",
116 | 	"pg_stat_sys_tables",
117 | 	"pg_stat_user_tables",
118 | 	"pg_stat_xact_all_tables",
119 | 	"pg_stat_xact_sys_tables",
120 | 	"pg_stat_xact_user_tables",
121 | 	"pg_stat_all_indexes",
122 | 	"pg_stat_sys_indexes",
123 | 	"pg_stat_user_indexes",
124 | 	"pg_stat_user_functions",
125 | 	"pg_stat_xact_user_functions",
126 | 	"pg_statio_all_tables",
127 | 	"pg_statio_sys_tables",
128 | 	"pg_statio_user_tables",
129 | 	"pg_statio_all_indexes",
130 | 	"pg_statio_sys_indexes",
131 | 	"pg_statio_user_indexes",
132 | 	"pg_statio_all_sequences",
133 | 	"pg_statio_sys_sequences",
134 | 	"pg_statio_user_sequences",
135 | })
136 | 


--------------------------------------------------------------------------------
/src/postgres.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"net"
  6 | 
  7 | 	"github.com/jackc/pgx/v5/pgproto3"
  8 | )
  9 | 
 10 | const (
 11 | 	PG_VERSION        = "17.0"
 12 | 	PG_ENCODING       = "UTF8"
 13 | 	PG_TX_STATUS_IDLE = 'I'
 14 | 
 15 | 	SYSTEM_AUTH_USER = "bemidb"
 16 | )
 17 | 
 18 | type Postgres struct {
 19 | 	backend *pgproto3.Backend
 20 | 	conn    *net.Conn
 21 | 	config  *Config
 22 | }
 23 | 
 24 | func NewPostgres(config *Config, conn *net.Conn) *Postgres {
 25 | 	return &Postgres{
 26 | 		conn:    conn,
 27 | 		backend: pgproto3.NewBackend(*conn, *conn),
 28 | 		config:  config,
 29 | 	}
 30 | }
 31 | 
 32 | func NewTcpListener(config *Config) net.Listener {
 33 | 	parsedIp := net.ParseIP(config.Host)
 34 | 	if parsedIp == nil {
 35 | 		PrintErrorAndExit(config, "Invalid host: "+config.Host+".")
 36 | 	}
 37 | 
 38 | 	var network, host string
 39 | 	if parsedIp.To4() == nil {
 40 | 		network = "tcp6"
 41 | 		host = "[" + config.Host + "]"
 42 | 	} else {
 43 | 		network = "tcp4"
 44 | 		host = config.Host
 45 | 	}
 46 | 
 47 | 	tcpListener, err := net.Listen(network, host+":"+config.Port)
 48 | 	PanicIfError(config, err)
 49 | 	return tcpListener
 50 | }
 51 | 
 52 | func AcceptConnection(config *Config, listener net.Listener) net.Conn {
 53 | 	conn, err := listener.Accept()
 54 | 	PanicIfError(config, err)
 55 | 	return conn
 56 | }
 57 | 
 58 | func (postgres *Postgres) Run(queryHandler *QueryHandler) {
 59 | 	err := postgres.handleStartup()
 60 | 	if err != nil {
 61 | 		LogError(postgres.config, "Error handling startup:", err)
 62 | 		return // Terminate connection
 63 | 	}
 64 | 
 65 | 	for {
 66 | 		message, err := postgres.backend.Receive()
 67 | 		if err != nil {
 68 | 			return // Terminate connection
 69 | 		}
 70 | 
 71 | 		switch message := message.(type) {
 72 | 		case *pgproto3.Query:
 73 | 			postgres.handleSimpleQuery(queryHandler, message)
 74 | 		case *pgproto3.Parse:
 75 | 			err = postgres.handleExtendedQuery(queryHandler, message)
 76 | 			if err != nil {
 77 | 				return // Terminate connection
 78 | 			}
 79 | 		case *pgproto3.Terminate:
 80 | 			LogDebug(postgres.config, "Client terminated connection")
 81 | 			return
 82 | 		default:
 83 | 			LogError(postgres.config, "Received message other than Query from client:", message)
 84 | 			return // Terminate connection
 85 | 		}
 86 | 	}
 87 | }
 88 | 
 89 | func (postgres *Postgres) Close() error {
 90 | 	return (*postgres.conn).Close()
 91 | }
 92 | 
 93 | func (postgres *Postgres) handleSimpleQuery(queryHandler *QueryHandler, queryMessage *pgproto3.Query) {
 94 | 	LogDebug(postgres.config, "Received query:", queryMessage.String)
 95 | 	messages, err := queryHandler.HandleSimpleQuery(queryMessage.String)
 96 | 	if err != nil {
 97 | 		postgres.writeError(err)
 98 | 		return
 99 | 	}
100 | 	messages = append(messages, &pgproto3.ReadyForQuery{TxStatus: PG_TX_STATUS_IDLE})
101 | 	postgres.writeMessages(messages...)
102 | }
103 | 
104 | func (postgres *Postgres) handleExtendedQuery(queryHandler *QueryHandler, parseMessage *pgproto3.Parse) error {
105 | 	LogDebug(postgres.config, "Parsing query", parseMessage.Query)
106 | 	messages, preparedStatement, err := queryHandler.HandleParseQuery(parseMessage)
107 | 	if err != nil {
108 | 		postgres.writeError(err)
109 | 		return nil
110 | 	}
111 | 	postgres.writeMessages(messages...)
112 | 
113 | 	var previousErr error
114 | 	for {
115 | 		message, err := postgres.backend.Receive()
116 | 		if err != nil {
117 | 			return err
118 | 		}
119 | 
120 | 		switch message := message.(type) {
121 | 		case *pgproto3.Bind:
122 | 			if previousErr != nil { // Skip processing the next message if there was an error in the previous message
123 | 				continue
124 | 			}
125 | 
126 | 			LogDebug(postgres.config, "Binding query", message.PreparedStatement)
127 | 			messages, preparedStatement, err = queryHandler.HandleBindQuery(message, preparedStatement)
128 | 			if err != nil {
129 | 				postgres.writeError(err)
130 | 				previousErr = err
131 | 			}
132 | 			postgres.writeMessages(messages...)
133 | 		case *pgproto3.Describe:
134 | 			if previousErr != nil { // Skip processing the next message if there was an error in the previous message
135 | 				continue
136 | 			}
137 | 
138 | 			LogDebug(postgres.config, "Describing query", message.Name, "("+string(message.ObjectType)+")")
139 | 			var messages []pgproto3.Message
140 | 			messages, preparedStatement, err = queryHandler.HandleDescribeQuery(message, preparedStatement)
141 | 			if err != nil {
142 | 				postgres.writeError(err)
143 | 				previousErr = err
144 | 			}
145 | 			postgres.writeMessages(messages...)
146 | 		case *pgproto3.Execute:
147 | 			if previousErr != nil { // Skip processing the next message if there was an error in the previous message
148 | 				continue
149 | 			}
150 | 
151 | 			LogDebug(postgres.config, "Executing query", message.Portal)
152 | 			messages, err := queryHandler.HandleExecuteQuery(message, preparedStatement)
153 | 			if err != nil {
154 | 				postgres.writeError(err)
155 | 				previousErr = err
156 | 			}
157 | 			postgres.writeMessages(messages...)
158 | 		case *pgproto3.Sync:
159 | 			LogDebug(postgres.config, "Syncing query")
160 | 			postgres.writeMessages(
161 | 				&pgproto3.ReadyForQuery{TxStatus: PG_TX_STATUS_IDLE},
162 | 			)
163 | 
164 | 			// If there was an error or Parse->Bind->Sync (...) or Parse->Describe->Sync (e.g., Metabase)
165 | 			// it means that sync is the last message in the extended query protocol, we can exit handleExtendedQuery
166 | 			if previousErr != nil || preparedStatement.Bound || preparedStatement.Described {
167 | 				return nil
168 | 			}
169 | 			// Otherwise, wait for Bind/Describe/Execute/Sync.
170 | 			// For example, psycopg sends Parse->[extra Sync]->Bind->Describe->Execute->Sync
171 | 		}
172 | 	}
173 | }
174 | 
175 | func (postgres *Postgres) writeMessages(messages ...pgproto3.Message) {
176 | 	var buf []byte
177 | 	for _, message := range messages {
178 | 		buf, _ = message.Encode(buf)
179 | 	}
180 | 	(*postgres.conn).Write(buf)
181 | }
182 | 
183 | func (postgres *Postgres) writeError(err error) {
184 | 	LogError(postgres.config, err.Error())
185 | 
186 | 	postgres.writeMessages(
187 | 		&pgproto3.ErrorResponse{
188 | 			Severity: "ERROR",
189 | 			Message:  err.Error(),
190 | 		},
191 | 		&pgproto3.ReadyForQuery{TxStatus: PG_TX_STATUS_IDLE},
192 | 	)
193 | }
194 | 
195 | func (postgres *Postgres) handleStartup() error {
196 | 	startupMessage, err := postgres.backend.ReceiveStartupMessage()
197 | 	if err != nil {
198 | 		return err
199 | 	}
200 | 
201 | 	switch startupMessage := startupMessage.(type) {
202 | 	case *pgproto3.StartupMessage:
203 | 		params := startupMessage.Parameters
204 | 		LogDebug(postgres.config, "BemiDB: startup message", params)
205 | 
206 | 		if params["database"] != postgres.config.Database {
207 | 			postgres.writeError(errors.New("database " + params["database"] + " does not exist"))
208 | 			return errors.New("database does not exist")
209 | 		}
210 | 
211 | 		if postgres.config.User != "" && params["user"] != postgres.config.User && params["user"] != SYSTEM_AUTH_USER {
212 | 			postgres.writeError(errors.New("role \"" + params["user"] + "\" does not exist"))
213 | 			return errors.New("role does not exist")
214 | 		}
215 | 
216 | 		postgres.writeMessages(
217 | 			&pgproto3.AuthenticationOk{},
218 | 			&pgproto3.ParameterStatus{Name: "client_encoding", Value: PG_ENCODING},
219 | 			&pgproto3.ParameterStatus{Name: "server_version", Value: PG_VERSION},
220 | 			&pgproto3.ReadyForQuery{TxStatus: PG_TX_STATUS_IDLE},
221 | 		)
222 | 		return nil
223 | 	case *pgproto3.SSLRequest:
224 | 		_, err = (*postgres.conn).Write([]byte("N"))
225 | 		if err != nil {
226 | 			return err
227 | 		}
228 | 		postgres.handleStartup()
229 | 		return nil
230 | 	default:
231 | 		return errors.New("unknown startup message")
232 | 	}
233 | }
234 | 


--------------------------------------------------------------------------------
/src/query_remapper_expression.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 
  6 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
  7 | )
  8 | 
  9 | type QueryRemapperExpression struct {
 10 | 	parserTypeCast  *ParserTypeCast
 11 | 	parserColumnRef *ParserColumnRef
 12 | 	parserAExpr     *ParserAExpr
 13 | 	config          *Config
 14 | }
 15 | 
 16 | func NewQueryRemapperExpression(config *Config) *QueryRemapperExpression {
 17 | 	remapper := &QueryRemapperExpression{
 18 | 		parserTypeCast:  NewParserTypeCast(config),
 19 | 		parserColumnRef: NewParserColumnRef(config),
 20 | 		parserAExpr:     NewParserAExpr(config),
 21 | 		config:          config,
 22 | 	}
 23 | 	return remapper
 24 | }
 25 | 
 26 | func (remapper *QueryRemapperExpression) RemappedExpression(node *pgQuery.Node) *pgQuery.Node {
 27 | 	node = remapper.remappedTypeCast(node)
 28 | 	node = remapper.remappedArithmeticExpression(node)
 29 | 	node = remapper.remappedCollateClause(node)
 30 | 	remapper.remapColumnReference(node)
 31 | 
 32 | 	return node
 33 | }
 34 | 
 35 | // value::type or CAST(value AS type)
 36 | func (remapper *QueryRemapperExpression) remappedTypeCast(node *pgQuery.Node) *pgQuery.Node {
 37 | 	typeCast := remapper.parserTypeCast.TypeCast(node)
 38 | 	if typeCast == nil {
 39 | 		return node
 40 | 	}
 41 | 
 42 | 	remapper.parserTypeCast.RemovePgCatalog(typeCast)
 43 | 	typeName := remapper.parserTypeCast.TypeName(typeCast)
 44 | 
 45 | 	switch typeName {
 46 | 	case "text[]":
 47 | 		// '{a,b,c}'::text[] -> ARRAY['a', 'b', 'c']
 48 | 		return remapper.parserTypeCast.MakeListValueFromArray(typeCast.Arg)
 49 | 	case "regproc":
 50 | 		// 'schema.function_name'::regproc -> 'function_name'
 51 | 		nameParts := strings.Split(remapper.parserTypeCast.ArgStringValue(typeCast), ".")
 52 | 		return pgQuery.MakeAConstStrNode(nameParts[len(nameParts)-1], 0)
 53 | 	case "regclass":
 54 | 		// 'schema.table'::regclass -> SELECT c.oid FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'schema' AND c.relname = 'table'
 55 | 		return remapper.parserTypeCast.MakeSubselectOidBySchemaTableArg(typeCast.Arg)
 56 | 	case "oid":
 57 | 		// 'schema.table'::regclass::oid -> SELECT c.oid FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'schema' AND c.relname = 'table'
 58 | 		nestedTypeCast := remapper.parserTypeCast.NestedTypeCast(typeCast)
 59 | 		remapper.parserTypeCast.RemovePgCatalog(nestedTypeCast)
 60 | 		nestedTypeName := remapper.parserTypeCast.TypeName(nestedTypeCast)
 61 | 		if nestedTypeName != "regclass" {
 62 | 			return node
 63 | 		}
 64 | 		return remapper.parserTypeCast.MakeSubselectOidBySchemaTableArg(nestedTypeCast.Arg)
 65 | 	case "text":
 66 | 		// value::(regtype|regnamespace|regclass)::text -> value::text
 67 | 		nestedTypeCast := remapper.parserTypeCast.NestedTypeCast(typeCast)
 68 | 		remapper.parserTypeCast.RemovePgCatalog(nestedTypeCast)
 69 | 		nestedTypeName := remapper.parserTypeCast.TypeName(nestedTypeCast)
 70 | 		if nestedTypeName != "regtype" && nestedTypeName != "regnamespace" && nestedTypeName != "regclass" {
 71 | 			return node
 72 | 		}
 73 | 		remapper.parserTypeCast.SetTypeCastArg(typeCast, nestedTypeCast.Arg)
 74 | 	}
 75 | 
 76 | 	return node
 77 | }
 78 | 
 79 | func (remapper *QueryRemapperExpression) remappedArithmeticExpression(node *pgQuery.Node) *pgQuery.Node {
 80 | 	aExpr := remapper.parserAExpr.AExpr(node)
 81 | 	if aExpr == nil {
 82 | 		return node
 83 | 	}
 84 | 
 85 | 	// = ANY({schema_information}) -> IN (schema_information)
 86 | 	node = remapper.parserAExpr.ConvertedRightAnyToIn(node)
 87 | 
 88 | 	// pg_catalog.[operator] -> [operator]
 89 | 	remapper.parserAExpr.RemovePgCatalog(node)
 90 | 
 91 | 	return node
 92 | }
 93 | 
 94 | // public.table.column -> table.column
 95 | // schema.table.column -> schema_table.column
 96 | func (remapper *QueryRemapperExpression) remapColumnReference(node *pgQuery.Node) {
 97 | 	fieldNames := remapper.parserColumnRef.FieldNames(node)
 98 | 	if fieldNames == nil || len(fieldNames) != 3 {
 99 | 		return
100 | 	}
101 | 
102 | 	schema := fieldNames[0]
103 | 	if schema == PG_SCHEMA_PG_CATALOG || schema == PG_SCHEMA_INFORMATION_SCHEMA {
104 | 		return
105 | 	}
106 | 
107 | 	table := fieldNames[1]
108 | 	column := fieldNames[2]
109 | 	if schema == PG_SCHEMA_PUBLIC {
110 | 		remapper.parserColumnRef.SetFields(node, []string{table, column})
111 | 		return
112 | 	}
113 | 
114 | 	remapper.parserColumnRef.SetFields(node, []string{schema + "_" + table, column})
115 | }
116 | 
117 | // "value" COLLATE pg_catalog.default -> "value"
118 | func (remapper *QueryRemapperExpression) remappedCollateClause(node *pgQuery.Node) *pgQuery.Node {
119 | 	if node.GetCollateClause() == nil {
120 | 		return node
121 | 	}
122 | 
123 | 	return remapper.parserTypeCast.RemovedDefaultCollateClause(node)
124 | }
125 | 


--------------------------------------------------------------------------------
/src/query_remapper_function.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"regexp"
  5 | 	"strings"
  6 | 
  7 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
  8 | )
  9 | 
 10 | const (
 11 | 	BEMIDB_FUNCTION_LAST_SYNCED_AT = "bemidb_last_synced_at"
 12 | )
 13 | 
 14 | var PG_CATALOG_MACRO_FUNCTION_NAMES = Set[string]{}
 15 | var PG_INFORMATION_SCHEMA_MACRO_FUNCTION_NAMES = Set[string]{}
 16 | 
 17 | func CreatePgCatalogMacroQueries(config *Config) []string {
 18 | 	result := []string{
 19 | 		// Functions
 20 | 		"CREATE MACRO aclexplode(aclitem_array) AS json(aclitem_array)",
 21 | 		"CREATE MACRO current_setting(setting_name) AS '', (setting_name, missing_ok) AS ''",
 22 | 		"CREATE MACRO pg_backend_pid() AS 0",
 23 | 		"CREATE MACRO pg_encoding_to_char(encoding_int) AS 'UTF8'",
 24 | 		"CREATE MACRO pg_get_expr(pg_node_tree, relation_oid) AS pg_catalog.pg_get_expr(pg_node_tree, relation_oid), (pg_node_tree, relation_oid, pretty_bool) AS pg_catalog.pg_get_expr(pg_node_tree, relation_oid)",
 25 | 		"CREATE MACRO pg_get_function_identity_arguments(func_oid) AS ''",
 26 | 		"CREATE MACRO pg_get_indexdef(index_oid) AS '', (index_oid, column_int) AS '', (index_oid, column_int, pretty_bool) AS ''",
 27 | 		"CREATE MACRO pg_get_partkeydef(table_oid) AS ''",
 28 | 		"CREATE MACRO pg_get_userbyid(role_id) AS 'bemidb'",
 29 | 		"CREATE MACRO pg_get_viewdef(view_oid) AS pg_catalog.pg_get_viewdef(view_oid), (view_oid, pretty_bool) AS pg_catalog.pg_get_viewdef(view_oid)",
 30 | 		"CREATE MACRO pg_indexes_size(regclass) AS 0",
 31 | 		"CREATE MACRO pg_is_in_recovery() AS false",
 32 | 		"CREATE MACRO pg_table_size(regclass) AS 0",
 33 | 		"CREATE MACRO pg_tablespace_location(tablespace_oid) AS ''",
 34 | 		"CREATE MACRO pg_total_relation_size(regclass) AS 0",
 35 | 		"CREATE MACRO quote_ident(text) AS '\"' || text || '\"'",
 36 | 		"CREATE MACRO row_to_json(record) AS to_json(record), (record, pretty_bool) AS to_json(record)",
 37 | 		"CREATE MACRO set_config(setting_name, new_value, is_local) AS new_value",
 38 | 		"CREATE MACRO version() AS 'PostgreSQL " + PG_VERSION + ", compiled by BemiDB'",
 39 | 		"CREATE MACRO pg_get_statisticsobjdef_columns(oid) AS NULL",
 40 | 		"CREATE MACRO pg_relation_is_publishable(val) AS NULL",
 41 | 		`CREATE MACRO jsonb_extract_path_text(from_json, path_elems) AS
 42 | 			CASE typeof(path_elems) LIKE '%[]'
 43 | 			WHEN true THEN json_extract_path_text(from_json, path_elems)[1]::varchar
 44 | 			ELSE json_extract_path_text(from_json, path_elems)::varchar
 45 | 		END`,
 46 | 		`CREATE MACRO json_build_object(k1, v1) AS json_object(k1, v1),
 47 | 			(k1, v1, k2, v2) AS json_object(k1, v1, k2, v2),
 48 | 			(k1, v1, k2, v2, k3, v3) AS json_object(k1, v1, k2, v2, k3, v3),
 49 | 			(k1, v1, k2, v2, k3, v3, k4, v4) AS json_object(k1, v1, k2, v2, k3, v3, k4, v4)`,
 50 | 		`CREATE MACRO array_upper(arr, dimension) AS
 51 | 			CASE dimension
 52 | 			WHEN 1 THEN len(arr)
 53 | 			ELSE NULL
 54 | 		END`,
 55 | 
 56 | 		// Table functions
 57 | 		"CREATE MACRO pg_is_in_recovery() AS TABLE SELECT false AS pg_is_in_recovery",
 58 | 		`CREATE MACRO pg_show_all_settings() AS TABLE SELECT
 59 | 			name,
 60 | 			value AS setting,
 61 | 			NULL::text AS unit,
 62 | 			'Settings' AS category,
 63 | 			description AS short_desc,
 64 | 			NULL::text AS extra_desc,
 65 | 			'user' AS context,
 66 | 			input_type AS vartype,
 67 | 			'default' AS source,
 68 | 			NULL::int4 AS min_val,
 69 | 			NULL::int4 AS max_val,
 70 | 			NULL::text[] AS enumvals,
 71 | 			value AS boot_val,
 72 | 			value AS reset_val,
 73 | 			NULL::text AS sourcefile,
 74 | 			NULL::int4 AS sourceline,
 75 | 			FALSE AS pending_restart
 76 | 		FROM duckdb_settings()`,
 77 | 		`CREATE MACRO pg_get_keywords() AS TABLE SELECT
 78 | 			keyword_name AS word,
 79 | 			'U' AS catcode,
 80 | 			TRUE AS barelabel,
 81 | 			keyword_category AS catdesc,
 82 | 			'can be bare label' AS baredesc
 83 | 		FROM duckdb_keywords()`,
 84 | 	}
 85 | 	PG_CATALOG_MACRO_FUNCTION_NAMES = extractMacroNames(result)
 86 | 	return result
 87 | }
 88 | 
 89 | func CreateInformationSchemaMacroQueries(config *Config) []string {
 90 | 	result := []string{
 91 | 		"CREATE MACRO _pg_expandarray(arr) AS STRUCT_PACK(x := unnest(arr), n := unnest(generate_series(1, array_length(arr))))",
 92 | 	}
 93 | 	PG_INFORMATION_SCHEMA_MACRO_FUNCTION_NAMES = extractMacroNames(result)
 94 | 	return result
 95 | }
 96 | 
 97 | var BUILTIN_DUCKDB_PG_FUNCTION_NAMES = NewSet([]string{
 98 | 	"array_to_string",
 99 | 	"generate_series",
100 | })
101 | 
102 | type QueryRemapperFunction struct {
103 | 	parserFunction *ParserFunction
104 | 	icebergReader  *IcebergReader
105 | 	config         *Config
106 | }
107 | 
108 | func NewQueryRemapperFunction(config *Config, icebergReader *IcebergReader) *QueryRemapperFunction {
109 | 	return &QueryRemapperFunction{
110 | 		parserFunction: NewParserFunction(config),
111 | 		icebergReader:  icebergReader,
112 | 		config:         config,
113 | 	}
114 | }
115 | 
116 | func (remapper *QueryRemapperFunction) SchemaFunction(functionCall *pgQuery.FuncCall) *QuerySchemaFunction {
117 | 	return remapper.parserFunction.SchemaFunction(functionCall)
118 | }
119 | 
120 | // FUNCTION(...) -> ANOTHER_FUNCTION(...)
121 | func (remapper *QueryRemapperFunction) RemapFunctionCall(functionCall *pgQuery.FuncCall) *QuerySchemaFunction {
122 | 	schemaFunction := remapper.SchemaFunction(functionCall)
123 | 
124 | 	// Pre-defined macro functions
125 | 	switch schemaFunction.Schema {
126 | 
127 | 	// pg_catalog.func() -> main.func()
128 | 	case PG_SCHEMA_PG_CATALOG, "":
129 | 		if PG_CATALOG_MACRO_FUNCTION_NAMES.Contains(schemaFunction.Function) || BUILTIN_DUCKDB_PG_FUNCTION_NAMES.Contains(schemaFunction.Function) {
130 | 			remapper.parserFunction.RemapSchemaToMain(functionCall)
131 | 			return schemaFunction
132 | 		}
133 | 
134 | 	// information_schema.func() -> main.func()
135 | 	case PG_SCHEMA_INFORMATION_SCHEMA:
136 | 		if PG_INFORMATION_SCHEMA_MACRO_FUNCTION_NAMES.Contains(schemaFunction.Function) {
137 | 			remapper.parserFunction.RemapSchemaToMain(functionCall)
138 | 			return schemaFunction
139 | 		}
140 | 	}
141 | 
142 | 	switch {
143 | 
144 | 	// format('%s %1$s', str) -> printf('%1$s %1$s', str)
145 | 	case schemaFunction.Function == PG_FUNCTION_FORMAT:
146 | 		remapper.parserFunction.RemapFormatToPrintf(functionCall)
147 | 		return schemaFunction
148 | 
149 | 	// encode(sha256(...), 'hex') -> sha256(...)
150 | 	case schemaFunction.Function == PG_FUNCTION_ENCODE:
151 | 		remapper.parserFunction.RemoveEncode(functionCall)
152 | 		return schemaFunction
153 | 
154 | 	// bemidb_last_synced_at('schema.table') -> to_timestamp(internalTableMetadata.LastSyncedAt)
155 | 	case schemaFunction.Function == BEMIDB_FUNCTION_LAST_SYNCED_AT:
156 | 		schemaTableName := remapper.parserFunction.FirstArgumentToString(functionCall)
157 | 		schemaTableParts := strings.Split(schemaTableName, ".")
158 | 		var pgSchemaTable PgSchemaTable
159 | 		if len(schemaTableParts) == 2 {
160 | 			pgSchemaTable.Schema = schemaTableParts[0]
161 | 			pgSchemaTable.Table = schemaTableParts[1]
162 | 		} else {
163 | 			pgSchemaTable.Schema = PG_SCHEMA_PUBLIC
164 | 			pgSchemaTable.Table = schemaTableParts[0]
165 | 		}
166 | 
167 | 		internalTableMetadata, err := remapper.icebergReader.InternalTableMetadata(pgSchemaTable)
168 | 
169 | 		if err != nil {
170 | 			LogError(remapper.config, "Failed to get internal table metadata for %s: %v", pgSchemaTable, err)
171 | 			remapper.parserFunction.RemapToTimestamp(functionCall, 0)
172 | 		} else {
173 | 			remapper.parserFunction.RemapToTimestamp(functionCall, internalTableMetadata.LastSyncedAt)
174 | 		}
175 | 
176 | 		return schemaFunction
177 | 	}
178 | 
179 | 	return nil
180 | }
181 | 
182 | func (remapper *QueryRemapperFunction) RemapNestedFunctionCalls(functionCall *pgQuery.FuncCall) {
183 | 	nestedFunctionCalls := remapper.parserFunction.NestedFunctionCalls(functionCall)
184 | 	if len(nestedFunctionCalls) == 0 {
185 | 		return
186 | 	}
187 | 
188 | 	for _, nestedFunctionCall := range nestedFunctionCalls {
189 | 		if nestedFunctionCall == nil {
190 | 			continue
191 | 		}
192 | 
193 | 		schemaFunction := remapper.RemapFunctionCall(nestedFunctionCall)
194 | 		if schemaFunction != nil {
195 | 			continue
196 | 		}
197 | 
198 | 		remapper.RemapNestedFunctionCalls(nestedFunctionCall) // self-recursion
199 | 	}
200 | }
201 | 
202 | func extractMacroNames(macros []string) Set[string] {
203 | 	names := make(Set[string])
204 | 	re := regexp.MustCompile(`CREATE MACRO (\w+)\(`)
205 | 
206 | 	for _, macro := range macros {
207 | 		matches := re.FindStringSubmatch(macro)
208 | 		if len(matches) > 1 {
209 | 			names.Add(matches[1])
210 | 		}
211 | 	}
212 | 
213 | 	return names
214 | }
215 | 


--------------------------------------------------------------------------------
/src/query_remapper_select.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 5 | )
 6 | 
 7 | type QueryRemapperSelect struct {
 8 | 	parserSelect   *ParserSelect
 9 | 	parserFunction *ParserFunction
10 | 	config         *Config
11 | }
12 | 
13 | func NewQueryRemapperSelect(config *Config) *QueryRemapperSelect {
14 | 	return &QueryRemapperSelect{
15 | 		parserSelect:   NewParserSelect(config),
16 | 		parserFunction: NewParserFunction(config),
17 | 		config:         config,
18 | 	}
19 | }
20 | 
21 | // SELECT FUNCTION(...) -> SELECT FUNCTION(...) AS FUNCTION
22 | func (remapper *QueryRemapperSelect) SetDefaultTargetNameToFunctionName(targetNode *pgQuery.Node) *pgQuery.Node {
23 | 	functionCall := remapper.parserFunction.FunctionCall(targetNode)
24 | 	if functionCall != nil {
25 | 		schemaFunction := remapper.parserFunction.SchemaFunction(functionCall)
26 | 		// FUNCTION(...) -> FUNCTION(...) AS FUNCTION
27 | 		remapper.parserSelect.SetDefaultTargetName(targetNode, schemaFunction.Function)
28 | 		return targetNode
29 | 	}
30 | 
31 | 	indirectionName := remapper.parserFunction.IndirectionName(targetNode)
32 | 	if indirectionName != "" {
33 | 		// (FUNCTION()).n -> (FUNCTION()).n AS n
34 | 		remapper.parserSelect.SetDefaultTargetName(targetNode, indirectionName)
35 | 		return targetNode
36 | 	}
37 | 
38 | 	return targetNode
39 | }
40 | 


--------------------------------------------------------------------------------
/src/query_remapper_show.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	pgQuery "github.com/pganalyze/pg_query_go/v5"
 5 | )
 6 | 
 7 | type QueryRemapperShow struct {
 8 | 	config     *Config
 9 | 	parserShow *ParserShow
10 | }
11 | 
12 | func NewQueryRemapperShow(config *Config) *QueryRemapperShow {
13 | 	return &QueryRemapperShow{
14 | 		config:     config,
15 | 		parserShow: NewParserShow(config),
16 | 	}
17 | }
18 | 
19 | func (remapper *QueryRemapperShow) RemapShowStatement(stmt *pgQuery.RawStmt) *pgQuery.RawStmt {
20 | 	parser := remapper.parserShow
21 | 	variableName := parser.VariableName(stmt)
22 | 
23 | 	// SHOW var -> SELECT value AS var FROM duckdb_settings() WHERE LOWER(name) = 'var';
24 | 	newStmt := parser.MakeSelectFromDuckdbSettings(variableName)
25 | 
26 | 	// SELECT value AS search_path -> SELECT CONCAT('"$user", ', value) AS search_path
27 | 	if variableName == PG_VAR_SEARCH_PATH {
28 | 		parser.SetTargetListForSearchPath(newStmt)
29 | 	}
30 | 
31 | 	return newStmt
32 | }
33 | 


--------------------------------------------------------------------------------
/src/storage_interface.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | )
  7 | 
  8 | type RefreshMode string
  9 | 
 10 | const (
 11 | 	RefreshModeFull                  RefreshMode = "FULL"
 12 | 	RefreshModeFullInProgress        RefreshMode = "FULL_IN_PROGRESS"
 13 | 	RefreshModeIncremental           RefreshMode = "INCREMENTAL"
 14 | 	RefreshModeIncrementalInProgress RefreshMode = "INCREMENTAL_IN_PROGRESS"
 15 | 
 16 | 	INTERNAL_START_SQL_FILE_NAME = "bemidb-start.sql"
 17 | )
 18 | 
 19 | type ParquetFileStats struct {
 20 | 	ColumnSizes     map[int]int64
 21 | 	ValueCounts     map[int]int64
 22 | 	NullValueCounts map[int]int64
 23 | 	LowerBounds     map[int][]byte
 24 | 	UpperBounds     map[int][]byte
 25 | 	SplitOffsets    []int64
 26 | }
 27 | 
 28 | type ParquetFile struct {
 29 | 	Uuid        string
 30 | 	Path        string
 31 | 	Size        int64
 32 | 	RecordCount int64
 33 | 	Stats       ParquetFileStats
 34 | }
 35 | 
 36 | type ManifestFile struct {
 37 | 	RecordsDeleted bool
 38 | 	SnapshotId     int64
 39 | 	Path           string
 40 | 	Size           int64
 41 | 	RecordCount    int64
 42 | 	DataFileSize   int64
 43 | }
 44 | 
 45 | type ManifestListItem struct {
 46 | 	SequenceNumber int
 47 | 	ManifestFile   ManifestFile
 48 | }
 49 | 
 50 | type ManifestListFile struct {
 51 | 	SequenceNumber   int
 52 | 	SnapshotId       int64
 53 | 	TimestampMs      int64
 54 | 	Path             string
 55 | 	Operation        string
 56 | 	AddedFilesSize   int64
 57 | 	AddedDataFiles   int64
 58 | 	AddedRecords     int64
 59 | 	RemovedFilesSize int64
 60 | 	DeletedDataFiles int64
 61 | 	DeletedRecords   int64
 62 | }
 63 | 
 64 | type MetadataFile struct {
 65 | 	Version int64
 66 | 	Path    string
 67 | }
 68 | 
 69 | type InternalTableMetadata struct {
 70 | 	LastRefreshMode RefreshMode `json:"last-refresh-mode"`
 71 | 	LastSyncedAt    int64       `json:"last-synced-at"`
 72 | 	LastTxid        int64       `json:"last-txid"`
 73 | 	MaxXmin         *uint32     `json:"max-xmin"`
 74 | }
 75 | 
 76 | func (internalTableMetadata InternalTableMetadata) IsInProgress() bool {
 77 | 	return internalTableMetadata.LastRefreshMode == RefreshModeIncrementalInProgress || internalTableMetadata.LastRefreshMode == RefreshModeFullInProgress
 78 | }
 79 | 
 80 | func (internalTableMetadata InternalTableMetadata) MaxXminString() string {
 81 | 	if internalTableMetadata.MaxXmin == nil {
 82 | 		panic("MaxXmin is unexpectedly null. " + internalTableMetadata.String())
 83 | 	}
 84 | 	return Uint32ToString(*internalTableMetadata.MaxXmin)
 85 | }
 86 | 
 87 | func (internalTableMetadata InternalTableMetadata) LastWrappedAroundTxidString() string {
 88 | 	return Int64ToString(PgWraparoundTxid(internalTableMetadata.LastTxid))
 89 | }
 90 | 
 91 | func (internalTableMetadata InternalTableMetadata) String() string {
 92 | 	maxXmin := "null"
 93 | 	if internalTableMetadata.MaxXmin != nil {
 94 | 		maxXmin = Uint32ToString(*internalTableMetadata.MaxXmin)
 95 | 	}
 96 | 
 97 | 	return fmt.Sprintf(
 98 | 		"LastRefreshMode: %s, LastSyncedAt: %d, MaxXmin: %s",
 99 | 		internalTableMetadata.LastRefreshMode,
100 | 		internalTableMetadata.LastSyncedAt,
101 | 		maxXmin,
102 | 	)
103 | }
104 | 
105 | type StorageInterface interface {
106 | 	// Read
107 | 	IcebergSchemas() (icebergSchemas []string, err error)
108 | 	IcebergSchemaTables() (icebersSchemaTables Set[IcebergSchemaTable], err error)
109 | 	IcebergMetadataFilePath(icebergSchemaTable IcebergSchemaTable) (path string)
110 | 	IcebergTableFields(icebergSchemaTable IcebergSchemaTable) (icebergTableFields []IcebergTableField, err error)
111 | 	ExistingManifestListFiles(metadataDirPath string) (manifestListFilesSortedAsc []ManifestListFile, err error)
112 | 	ExistingManifestListItems(manifestListFile ManifestListFile) (manifestListItemsSortedDesc []ManifestListItem, err error)
113 | 	ExistingParquetFilePath(manifestFile ManifestFile) (parquetFilePath string, err error)
114 | 
115 | 	// Write
116 | 	DeleteSchema(schema string) (err error)
117 | 	DeleteSchemaTable(schemaTable IcebergSchemaTable) (err error)
118 | 	CreateDataDir(schemaTable IcebergSchemaTable) (dataDirPath string)
119 | 	CreateMetadataDir(schemaTable IcebergSchemaTable) (metadataDirPath string)
120 | 	CreateParquet(dataDirPath string, pgSchemaColumns []PgSchemaColumn, maxPayloadThreshold int, loadRows func() ([][]string, InternalTableMetadata)) (parquetFile ParquetFile, internalTableMetadata InternalTableMetadata, err error)
121 | 	CreateOverwrittenParquet(dataDirPath string, existingParquetFilePath string, newParquetFilePath string, pgSchemaColumns []PgSchemaColumn, dynamicRowCountPerBatch int) (overwrittenParquetFile ParquetFile, err error)
122 | 	DeleteParquet(parquetFile ParquetFile) (err error)
123 | 	CreateManifest(metadataDirPath string, parquetFile ParquetFile) (manifestFile ManifestFile, err error)
124 | 	CreateDeletedRecordsManifest(metadataDirPath string, uuid string, existingManifestFile ManifestFile) (deletedRecsManifestFile ManifestFile, err error)
125 | 	CreateManifestList(metadataDirPath string, parquetFileUuid string, manifestListItemsSortedDesc []ManifestListItem) (manifestListFile ManifestListFile, err error)
126 | 	CreateMetadata(metadataDirPath string, pgSchemaColumns []PgSchemaColumn, manifestListFilesSortedAsc []ManifestListFile) (metadataFile MetadataFile, err error)
127 | 
128 | 	// Read (internal)
129 | 	InternalStartSqlFile() (sqlFile io.ReadCloser)
130 | 	InternalTableMetadata(pgSchemaTable PgSchemaTable) (internalTableMetadata InternalTableMetadata, err error)
131 | 	// Write (internal)
132 | 	WriteInternalStartSqlFile(queries []string) (err error)
133 | 	WriteInternalTableMetadata(metadataDirPath string, internalTableMetadata InternalTableMetadata) (err error)
134 | }
135 | 
136 | func NewStorage(config *Config) StorageInterface {
137 | 	switch config.StorageType {
138 | 	case STORAGE_TYPE_LOCAL:
139 | 		return NewLocalStorage(config)
140 | 	case STORAGE_TYPE_S3:
141 | 		return NewS3Storage(config)
142 | 	}
143 | 
144 | 	return nil
145 | }
146 | 


--------------------------------------------------------------------------------
/src/storage_local_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"os"
  6 | 	"testing"
  7 | )
  8 | 
  9 | var TEST_STORAGE_PG_SCHEMA_COLUMNS = []PgSchemaColumn{
 10 | 	{ColumnName: "id", DataType: "integer", UdtName: "int4", IsNullable: "NO", NumericPrecision: "32", OrdinalPosition: "1", Namespace: "pg_catalog"},
 11 | 	{ColumnName: "name", DataType: "character varying", UdtName: "varchar", IsNullable: "YES", CharacterMaximumLength: "255", OrdinalPosition: "2", Namespace: "pg_catalog"},
 12 | }
 13 | var TEST_STORAGE_ROWS = [][]string{
 14 | 	{"1", "John"},
 15 | 	{"2", PG_NULL_STRING},
 16 | }
 17 | 
 18 | func TestCreateParquet(t *testing.T) {
 19 | 	t.Run("Creates a parquet file", func(t *testing.T) {
 20 | 		tempDir := os.TempDir()
 21 | 		config := loadTestConfig()
 22 | 		storage := NewLocalStorage(config)
 23 | 		loadedRows := false
 24 | 		loadRows := func() ([][]string, InternalTableMetadata) {
 25 | 			if loadedRows {
 26 | 				return [][]string{}, InternalTableMetadata{}
 27 | 			}
 28 | 			loadedRows = true
 29 | 			return TEST_STORAGE_ROWS, InternalTableMetadata{}
 30 | 		}
 31 | 
 32 | 		parquetFile, _, err := storage.CreateParquet(tempDir, TEST_STORAGE_PG_SCHEMA_COLUMNS, 0, loadRows)
 33 | 
 34 | 		if err != nil {
 35 | 			t.Errorf("Expected no error, got %v", err)
 36 | 		}
 37 | 		if parquetFile.Uuid == "" {
 38 | 			t.Errorf("Expected a non-empty UUID, got %v", parquetFile.Uuid)
 39 | 		}
 40 | 		if parquetFile.Path == "" {
 41 | 			t.Errorf("Expected a non-empty path, got %v", parquetFile.Path)
 42 | 		}
 43 | 		if parquetFile.Size == 0 {
 44 | 			t.Errorf("Expected a non-zero size, got %v", parquetFile.Size)
 45 | 		}
 46 | 		if parquetFile.RecordCount != 2 {
 47 | 			t.Errorf("Expected a non-zero record count, got %v", parquetFile.RecordCount)
 48 | 		}
 49 | 		if len(parquetFile.Stats.ColumnSizes) != 2 {
 50 | 			t.Errorf("Expected 2 column sizes, got %v", len(parquetFile.Stats.ColumnSizes))
 51 | 		}
 52 | 		if parquetFile.Stats.ColumnSizes[1] == 0 {
 53 | 			t.Errorf("Expected a non-zero column size, got %v", parquetFile.Stats.ColumnSizes[1])
 54 | 		}
 55 | 		if parquetFile.Stats.ColumnSizes[2] == 0 {
 56 | 			t.Errorf("Expected a non-zero value count, got %v", parquetFile.Stats.ColumnSizes[2])
 57 | 		}
 58 | 		if parquetFile.Stats.ValueCounts[1] != 2 {
 59 | 			t.Errorf("Expected a value count of 2, got %v", parquetFile.Stats.ValueCounts[1])
 60 | 		}
 61 | 		if parquetFile.Stats.ValueCounts[2] != 2 {
 62 | 			t.Errorf("Expected a value count of 1, got %v", parquetFile.Stats.ValueCounts[2])
 63 | 		}
 64 | 		if parquetFile.Stats.NullValueCounts[1] != 0 {
 65 | 			t.Errorf("Expected a null value count of 0, got %v", parquetFile.Stats.NullValueCounts[1])
 66 | 		}
 67 | 		if parquetFile.Stats.NullValueCounts[2] != 1 {
 68 | 			t.Errorf("Expected a null value count of 1, got %v", parquetFile.Stats.NullValueCounts[2])
 69 | 		}
 70 | 		if binary.LittleEndian.Uint32(parquetFile.Stats.LowerBounds[1]) != 1 {
 71 | 			t.Errorf("Expected a lower bound of 1, got %v", binary.LittleEndian.Uint32(parquetFile.Stats.LowerBounds[1]))
 72 | 		}
 73 | 		if string(parquetFile.Stats.LowerBounds[2]) != "John" {
 74 | 			t.Errorf("Expected a lower bound of John, got %v", parquetFile.Stats.LowerBounds[2])
 75 | 		}
 76 | 		if binary.LittleEndian.Uint32(parquetFile.Stats.UpperBounds[1]) != 2 {
 77 | 			t.Errorf("Expected an upper bound of 2, got %v", binary.LittleEndian.Uint32(parquetFile.Stats.UpperBounds[1]))
 78 | 		}
 79 | 		if string(parquetFile.Stats.UpperBounds[2]) != "John" {
 80 | 			t.Errorf("Expected an upper bound of John, got %v", parquetFile.Stats.UpperBounds[2])
 81 | 		}
 82 | 		if len(parquetFile.Stats.SplitOffsets) != 0 {
 83 | 			t.Errorf("Expected 0 split offsets, got %v", len(parquetFile.Stats.SplitOffsets))
 84 | 		}
 85 | 	})
 86 | }
 87 | 
 88 | func TestCreateManifest(t *testing.T) {
 89 | 	t.Run("Creates a manifest file", func(t *testing.T) {
 90 | 		tempDir := os.TempDir()
 91 | 		config := loadTestConfig()
 92 | 		storage := NewLocalStorage(config)
 93 | 		parquetFile := createTestParquetFile(storage, tempDir)
 94 | 
 95 | 		manifestFile, err := storage.CreateManifest(tempDir, parquetFile)
 96 | 
 97 | 		if err != nil {
 98 | 			t.Errorf("Expected no error, got %v", err)
 99 | 		}
100 | 		if manifestFile.SnapshotId == 0 {
101 | 			t.Errorf("Expected a non-zero snapshot ID, got %v", manifestFile.SnapshotId)
102 | 		}
103 | 		if manifestFile.Path == "" {
104 | 			t.Errorf("Expected a non-empty path, got %v", manifestFile.Path)
105 | 		}
106 | 		if manifestFile.Size == 0 {
107 | 			t.Errorf("Expected a non-zero size, got %v", manifestFile.Size)
108 | 		}
109 | 		if manifestFile.RecordCount != parquetFile.RecordCount {
110 | 			t.Errorf("Expected a record count of %v, got %v", parquetFile.RecordCount, manifestFile.RecordCount)
111 | 		}
112 | 		if manifestFile.DataFileSize != parquetFile.Size {
113 | 			t.Errorf("Expected a data file size of %v, got %v", parquetFile.Size, manifestFile.DataFileSize)
114 | 		}
115 | 	})
116 | }
117 | 
118 | func TestCreateManifestList(t *testing.T) {
119 | 	t.Run("Creates a manifest list file", func(t *testing.T) {
120 | 		tempDir := os.TempDir()
121 | 		config := loadTestConfig()
122 | 		storage := NewLocalStorage(config)
123 | 		parquetFile := createTestParquetFile(storage, tempDir)
124 | 		manifestFile, err := storage.CreateManifest(tempDir, parquetFile)
125 | 		PanicIfError(config, err)
126 | 		manifestListItem := ManifestListItem{SequenceNumber: 1, ManifestFile: manifestFile}
127 | 
128 | 		manifestListFile, err := storage.CreateManifestList(tempDir, parquetFile.Uuid, []ManifestListItem{manifestListItem})
129 | 
130 | 		if err != nil {
131 | 			t.Errorf("Expected no error, got %v", err)
132 | 		}
133 | 		if manifestListFile.SnapshotId != manifestFile.SnapshotId {
134 | 			t.Errorf("Expected a snapshot ID of %v, got %v", manifestFile.SnapshotId, manifestListFile.SnapshotId)
135 | 		}
136 | 		if manifestListFile.TimestampMs == 0 {
137 | 			t.Errorf("Expected a non-zero timestamp, got %v", manifestListFile.TimestampMs)
138 | 		}
139 | 		if manifestListFile.Path == "" {
140 | 			t.Errorf("Expected a non-empty path, got %v", manifestListFile.Path)
141 | 		}
142 | 		if manifestListFile.Operation != "append" {
143 | 			t.Errorf("Expected an operation of append, got %v", manifestListFile.Operation)
144 | 		}
145 | 		if manifestListFile.AddedFilesSize != parquetFile.Size {
146 | 			t.Errorf("Expected an added files size of %v, got %v", parquetFile.Size, manifestListFile.AddedFilesSize)
147 | 		}
148 | 		if manifestListFile.AddedDataFiles != 1 {
149 | 			t.Errorf("Expected an added data files count of 1, got %v", manifestListFile.AddedDataFiles)
150 | 		}
151 | 		if manifestListFile.AddedRecords != parquetFile.RecordCount {
152 | 			t.Errorf("Expected an added records count of %v, got %v", parquetFile.RecordCount, manifestListFile.AddedRecords)
153 | 		}
154 | 	})
155 | }
156 | 
157 | func TestCreateMetadata(t *testing.T) {
158 | 	t.Run("Creates a metadata file", func(t *testing.T) {
159 | 		tempDir := os.TempDir()
160 | 		config := loadTestConfig()
161 | 		storage := NewLocalStorage(config)
162 | 		parquetFile := createTestParquetFile(storage, tempDir)
163 | 		manifestFile, err := storage.CreateManifest(tempDir, parquetFile)
164 | 		PanicIfError(config, err)
165 | 		manifestListItem := ManifestListItem{SequenceNumber: 1, ManifestFile: manifestFile}
166 | 		manifestListFile, err := storage.CreateManifestList(tempDir, parquetFile.Uuid, []ManifestListItem{manifestListItem})
167 | 		PanicIfError(config, err)
168 | 
169 | 		metadataFile, err := storage.CreateMetadata(tempDir, TEST_STORAGE_PG_SCHEMA_COLUMNS, []ManifestListFile{manifestListFile})
170 | 
171 | 		if err != nil {
172 | 			t.Errorf("Expected no error, got %v", err)
173 | 		}
174 | 		if metadataFile.Version != 1 {
175 | 			t.Errorf("Expected a version of 1, got %v", metadataFile.Version)
176 | 		}
177 | 		if metadataFile.Path == "" {
178 | 			t.Errorf("Expected a non-empty path, got %v", metadataFile.Path)
179 | 		}
180 | 	})
181 | }
182 | 
183 | func TestExistingManifestListFiles(t *testing.T) {
184 | 	t.Run("Returns existing manifest list files", func(t *testing.T) {
185 | 		tempDir := os.TempDir()
186 | 		config := loadTestConfig()
187 | 		storage := NewLocalStorage(config)
188 | 		parquetFile := createTestParquetFile(storage, tempDir)
189 | 		manifestFile, err := storage.CreateManifest(tempDir, parquetFile)
190 | 		PanicIfError(config, err)
191 | 		manifestListItem := ManifestListItem{SequenceNumber: 1, ManifestFile: manifestFile}
192 | 		manifestListFile, err := storage.CreateManifestList(tempDir, parquetFile.Uuid, []ManifestListItem{manifestListItem})
193 | 		PanicIfError(config, err)
194 | 		_, err = storage.CreateMetadata(tempDir, TEST_STORAGE_PG_SCHEMA_COLUMNS, []ManifestListFile{manifestListFile})
195 | 		PanicIfError(config, err)
196 | 
197 | 		existingManifestListFiles, err := storage.ExistingManifestListFiles(tempDir)
198 | 
199 | 		if err != nil {
200 | 			t.Errorf("Expected no error, got %v", err)
201 | 		}
202 | 		if len(existingManifestListFiles) != 1 {
203 | 			t.Errorf("Expected 1 existing manifest list file, got %v", len(existingManifestListFiles))
204 | 		}
205 | 		if existingManifestListFiles[0].SnapshotId != manifestListFile.SnapshotId {
206 | 			t.Errorf("Expected a snapshot ID of %v, got %v", manifestListFile.SnapshotId, existingManifestListFiles[0].SnapshotId)
207 | 		}
208 | 		if existingManifestListFiles[0].TimestampMs != manifestListFile.TimestampMs {
209 | 			t.Errorf("Expected a timestamp of %v, got %v", manifestListFile.TimestampMs, existingManifestListFiles[0].TimestampMs)
210 | 		}
211 | 		if existingManifestListFiles[0].Path != manifestListFile.Path {
212 | 			t.Errorf("Expected a path of %v, got %v", manifestListFile.Path, existingManifestListFiles[0].Path)
213 | 		}
214 | 		if existingManifestListFiles[0].Operation != manifestListFile.Operation {
215 | 			t.Errorf("Expected an operation of %v, got %v", manifestListFile.Operation, existingManifestListFiles[0].Operation)
216 | 		}
217 | 		if existingManifestListFiles[0].AddedFilesSize != manifestListFile.AddedFilesSize {
218 | 			t.Errorf("Expected an added files size of %v, got %v", manifestListFile.AddedFilesSize, existingManifestListFiles[0].AddedFilesSize)
219 | 		}
220 | 		if existingManifestListFiles[0].AddedDataFiles != manifestListFile.AddedDataFiles {
221 | 			t.Errorf("Expected an added data files count of %v, got %v", manifestListFile.AddedDataFiles, existingManifestListFiles[0].AddedDataFiles)
222 | 		}
223 | 		if existingManifestListFiles[0].AddedRecords != manifestListFile.AddedRecords {
224 | 			t.Errorf("Expected an added records count of %v, got %v", manifestListFile.AddedRecords, existingManifestListFiles[0].AddedRecords)
225 | 		}
226 | 	})
227 | }
228 | 
229 | func TestExistingManifestFiles(t *testing.T) {
230 | 	t.Run("Returns existing manifest files", func(t *testing.T) {
231 | 		tempDir := os.TempDir()
232 | 		config := loadTestConfig()
233 | 		storage := NewLocalStorage(config)
234 | 		parquetFile := createTestParquetFile(storage, tempDir)
235 | 		manifestFile, err := storage.CreateManifest(tempDir, parquetFile)
236 | 		PanicIfError(config, err)
237 | 		manifestListItem := ManifestListItem{SequenceNumber: 1, ManifestFile: manifestFile}
238 | 		manifestListFile, err := storage.CreateManifestList(tempDir, parquetFile.Uuid, []ManifestListItem{manifestListItem})
239 | 		PanicIfError(config, err)
240 | 
241 | 		existingManifestListItems, err := storage.ExistingManifestListItems(manifestListFile)
242 | 
243 | 		if err != nil {
244 | 			t.Errorf("Expected no error, got %v", err)
245 | 		}
246 | 		if len(existingManifestListItems) != 1 {
247 | 			t.Errorf("Expected 1 existing manifest file, got %v", len(existingManifestListItems))
248 | 		}
249 | 		if existingManifestListItems[0].SequenceNumber != 1 {
250 | 			t.Errorf("Expected a sequence number of 1, got %v", existingManifestListItems[0].SequenceNumber)
251 | 		}
252 | 		if existingManifestListItems[0].ManifestFile.SnapshotId != manifestFile.SnapshotId {
253 | 			t.Errorf("Expected a snapshot ID of %v, got %v", manifestFile.SnapshotId, existingManifestListItems[0].ManifestFile.SnapshotId)
254 | 		}
255 | 		if existingManifestListItems[0].ManifestFile.Path != manifestFile.Path {
256 | 			t.Errorf("Expected a path of %v, got %v", manifestFile.Path, existingManifestListItems[0].ManifestFile.Path)
257 | 		}
258 | 		if existingManifestListItems[0].ManifestFile.Size != manifestFile.Size {
259 | 			t.Errorf("Expected a size of %v, got %v", manifestFile.Size, existingManifestListItems[0].ManifestFile.Size)
260 | 		}
261 | 		if existingManifestListItems[0].ManifestFile.RecordCount != manifestFile.RecordCount {
262 | 			t.Errorf("Expected a record count of %v, got %v", manifestFile.RecordCount, existingManifestListItems[0].ManifestFile.RecordCount)
263 | 		}
264 | 	})
265 | }
266 | 
267 | func createTestParquetFile(storage *StorageLocal, dir string) ParquetFile {
268 | 	loadedRows := false
269 | 	loadRows := func() ([][]string, InternalTableMetadata) {
270 | 		if loadedRows {
271 | 			return [][]string{}, InternalTableMetadata{}
272 | 		}
273 | 		loadedRows = true
274 | 		return TEST_STORAGE_ROWS, InternalTableMetadata{}
275 | 	}
276 | 
277 | 	parquetFile, _, err := storage.CreateParquet(dir, TEST_STORAGE_PG_SCHEMA_COLUMNS, 0, loadRows)
278 | 	if err != nil {
279 | 		panic(err)
280 | 	}
281 | 
282 | 	return parquetFile
283 | }
284 | 


--------------------------------------------------------------------------------
/src/syncer.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"net/http"
  9 | 	"net/url"
 10 | 	"runtime"
 11 | 	"strings"
 12 | 	"time"
 13 | 
 14 | 	"github.com/jackc/pgx/v5"
 15 | )
 16 | 
 17 | const (
 18 | 	MAX_IN_MEMORY_BUFFER_SIZE = 128 * 1024 * 1024 // 128 MB (expands to ~160 MB memory usage)
 19 | 	MAX_PG_ROWS_BATCH_SIZE    = 1 * 1024 * 1024   // 1 MB
 20 | 	PING_PG_INTERVAL_SECONDS  = 24
 21 | 
 22 | 	MAX_PARQUET_PAYLOAD_THRESHOLD = 2 * 1024 * 1024 * 1024 // 2 GB (compressed to ~256 MB Parquet)
 23 | )
 24 | 
 25 | type Syncer struct {
 26 | 	config        *Config
 27 | 	icebergWriter *IcebergWriter
 28 | 	icebergReader *IcebergReader
 29 | 	syncerTable   *SyncerTable
 30 | }
 31 | 
 32 | func NewSyncer(config *Config) *Syncer {
 33 | 	if config.Pg.DatabaseUrl == "" {
 34 | 		PrintErrorAndExit(config, "Missing PostgreSQL database URL.\n\n"+
 35 | 			"See https://github.com/BemiHQ/BemiDB#sync-command-options for more information.",
 36 | 		)
 37 | 	}
 38 | 
 39 | 	icebergWriter := NewIcebergWriter(config)
 40 | 	icebergReader := NewIcebergReader(config)
 41 | 	return &Syncer{
 42 | 		config:        config,
 43 | 		icebergWriter: icebergWriter,
 44 | 		icebergReader: icebergReader,
 45 | 		syncerTable:   NewSyncerTable(config),
 46 | 	}
 47 | }
 48 | 
 49 | func (syncer *Syncer) SyncFromPostgres() {
 50 | 	ctx := context.Background()
 51 | 	if syncer.config.Pg.IncrementallyRefreshedTables == nil {
 52 | 		syncer.sendAnonymousAnalytics("sync-start")
 53 | 	} else {
 54 | 		syncer.sendAnonymousAnalytics("sync-start-incremental")
 55 | 	}
 56 | 
 57 | 	databaseUrl := syncer.urlEncodePassword(syncer.config.Pg.DatabaseUrl)
 58 | 	icebergSchemaTables, icebergSchemaTablesErr := syncer.icebergReader.SchemaTables()
 59 | 
 60 | 	structureConn := syncer.newConnection(ctx, databaseUrl)
 61 | 	defer structureConn.Close(ctx)
 62 | 
 63 | 	copyConn := syncer.newConnection(ctx, databaseUrl)
 64 | 	defer copyConn.Close(ctx)
 65 | 
 66 | 	syncedPgSchemaTables := []PgSchemaTable{}
 67 | 
 68 | 	for _, schema := range syncer.listPgSchemas(structureConn) {
 69 | 		for _, pgSchemaTable := range syncer.listPgSchemaTables(structureConn, schema) {
 70 | 			if syncer.shouldSyncTable(pgSchemaTable) {
 71 | 				var internalTableMetadata InternalTableMetadata
 72 | 				syncedPreviously := icebergSchemaTablesErr == nil && icebergSchemaTables.Contains(pgSchemaTable.ToIcebergSchemaTable())
 73 | 				if syncedPreviously {
 74 | 					internalTableMetadata = syncer.readInternalTableMetadata(pgSchemaTable)
 75 | 				}
 76 | 
 77 | 				incrementalRefresh := syncer.config.Pg.IncrementallyRefreshedTables != nil && HasExactOrWildcardMatch(syncer.config.Pg.IncrementallyRefreshedTables, pgSchemaTable.ToConfigArg())
 78 | 
 79 | 				syncer.syncerTable.SyncPgTable(pgSchemaTable, structureConn, copyConn, internalTableMetadata, incrementalRefresh)
 80 | 				LogInfo(syncer.config, "Finished writing to Iceberg\n")
 81 | 
 82 | 				syncedPgSchemaTables = append(syncedPgSchemaTables, pgSchemaTable)
 83 | 			}
 84 | 		}
 85 | 	}
 86 | 
 87 | 	syncer.WriteInternalStartSqlFile(syncedPgSchemaTables)
 88 | 
 89 | 	if !syncer.config.Pg.PreserveUnsynced {
 90 | 		syncer.deleteOldIcebergSchemaTables(syncedPgSchemaTables)
 91 | 	}
 92 | 
 93 | 	if syncer.config.Pg.IncrementallyRefreshedTables == nil {
 94 | 		syncer.sendAnonymousAnalytics("sync-finish")
 95 | 	} else {
 96 | 		syncer.sendAnonymousAnalytics("sync-finish-incremental")
 97 | 	}
 98 | }
 99 | 
100 | func (syncer *Syncer) WriteInternalStartSqlFile(pgSchemaTables []PgSchemaTable) {
101 | 	childTablesByParentTable := make(map[string][]string)
102 | 	for _, pgSchemaTable := range pgSchemaTables {
103 | 		if pgSchemaTable.ParentPartitionedTable != "" {
104 | 			parent := pgSchemaTable.ParentPartitionedTableString()
105 | 			childTablesByParentTable[parent] = append(childTablesByParentTable[parent], pgSchemaTable.String())
106 | 		}
107 | 	}
108 | 
109 | 	queryRemapper := NewQueryRemapper(syncer.config, syncer.icebergReader, nil)
110 | 	queries := []string{}
111 | 
112 | 	for parent, children := range childTablesByParentTable {
113 | 		// CREATE OR REPLACE TABLE test_table AS
114 | 		//   SELECT * FROM iceberg_scan('/iceberg/public/test_table_q1/metadata/v1.metadata.json', skip_schema_inference = true)
115 | 		//   UNION ALL
116 | 		//   SELECT * FROM iceberg_scan('/iceberg/public/test_table_q2/metadata/v1.metadata.json', skip_schema_inference = true)
117 | 
118 | 		subqueries := []string{}
119 | 		for _, child := range children {
120 | 			originalSubquery := fmt.Sprintf("SELECT * FROM %s", child)
121 | 			queryStatements, _, err := queryRemapper.ParseAndRemapQuery(originalSubquery)
122 | 			PanicIfError(syncer.config, err)
123 | 			subqueries = append(subqueries, queryStatements[0])
124 | 		}
125 | 		queries = append(queries, fmt.Sprintf("CREATE OR REPLACE TABLE %s AS %s", parent, strings.Join(subqueries, " UNION ALL ")))
126 | 	}
127 | 
128 | 	syncer.icebergWriter.WriteInternalStartSqlFile(queries)
129 | }
130 | 
131 | // Example:
132 | // - From postgres://username:pas$:wor^d@host:port/database
133 | // - To postgres://username:pas%24%3Awor%5Ed@host:port/database
134 | func (syncer *Syncer) urlEncodePassword(databaseUrl string) string {
135 | 	// No credentials
136 | 	if !strings.Contains(databaseUrl, "@") {
137 | 		return databaseUrl
138 | 	}
139 | 
140 | 	password := strings.TrimPrefix(databaseUrl, "postgresql://")
141 | 	password = strings.TrimPrefix(password, "postgres://")
142 | 	passwordEndIndex := strings.LastIndex(password, "@")
143 | 	password = password[:passwordEndIndex]
144 | 
145 | 	// Credentials without password
146 | 	if !strings.Contains(password, ":") {
147 | 		return databaseUrl
148 | 	}
149 | 
150 | 	_, password, _ = strings.Cut(password, ":")
151 | 	decodedPassword, err := url.QueryUnescape(password)
152 | 	if err != nil {
153 | 		return databaseUrl
154 | 	}
155 | 
156 | 	// Password is already encoded
157 | 	if decodedPassword != password {
158 | 		return databaseUrl
159 | 	}
160 | 
161 | 	return strings.Replace(databaseUrl, ":"+password+"@", ":"+url.QueryEscape(password)+"@", 1)
162 | }
163 | 
164 | func (syncer *Syncer) shouldSyncTable(pgSchemaTable PgSchemaTable) bool {
165 | 	if syncer.config.Pg.ExcludeTables != nil && HasExactOrWildcardMatch(syncer.config.Pg.ExcludeTables, pgSchemaTable.ToConfigArg()) {
166 | 		return false
167 | 	}
168 | 
169 | 	if syncer.config.Pg.IncludeTables != nil {
170 | 		return HasExactOrWildcardMatch(syncer.config.Pg.IncludeTables, pgSchemaTable.ToConfigArg())
171 | 	}
172 | 
173 | 	return true
174 | }
175 | 
176 | func (syncer *Syncer) listPgSchemas(conn *pgx.Conn) []string {
177 | 	var schemas []string
178 | 
179 | 	schemasRows, err := conn.Query(
180 | 		context.Background(),
181 | 		"SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT IN ('pg_catalog', 'pg_toast', 'information_schema')",
182 | 	)
183 | 	PanicIfError(syncer.config, err)
184 | 	defer schemasRows.Close()
185 | 
186 | 	for schemasRows.Next() {
187 | 		var schema string
188 | 		err = schemasRows.Scan(&schema)
189 | 		PanicIfError(syncer.config, err)
190 | 		schemas = append(schemas, schema)
191 | 	}
192 | 
193 | 	return schemas
194 | }
195 | 
196 | func (syncer *Syncer) listPgSchemaTables(conn *pgx.Conn, schema string) []PgSchemaTable {
197 | 	var pgSchemaTables []PgSchemaTable
198 | 
199 | 	tablesRows, err := conn.Query(
200 | 		context.Background(),
201 | 		`
202 | 		SELECT pg_class.relname AS table, COALESCE(parent.relname, '') AS parent_partitioned_table
203 | 		FROM pg_class
204 | 		JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
205 | 		LEFT JOIN pg_inherits ON pg_inherits.inhrelid = pg_class.oid
206 | 		LEFT JOIN pg_class AS parent ON pg_inherits.inhparent = parent.oid
207 | 		WHERE pg_namespace.nspname = $1 AND pg_class.relkind = 'r';
208 | 		`,
209 | 		schema,
210 | 	)
211 | 	PanicIfError(syncer.config, err)
212 | 	defer tablesRows.Close()
213 | 
214 | 	for tablesRows.Next() {
215 | 		pgSchemaTable := PgSchemaTable{Schema: schema}
216 | 		err = tablesRows.Scan(&pgSchemaTable.Table, &pgSchemaTable.ParentPartitionedTable)
217 | 		PanicIfError(syncer.config, err)
218 | 		pgSchemaTables = append(pgSchemaTables, pgSchemaTable)
219 | 	}
220 | 
221 | 	return pgSchemaTables
222 | }
223 | 
224 | func (syncer *Syncer) newConnection(ctx context.Context, databaseUrl string) *pgx.Conn {
225 | 	conn, err := pgx.Connect(ctx, databaseUrl)
226 | 	PanicIfError(syncer.config, err)
227 | 
228 | 	_, err = conn.Exec(ctx, "BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE")
229 | 	PanicIfError(syncer.config, err)
230 | 
231 | 	return conn
232 | }
233 | 
234 | func (syncer *Syncer) readInternalTableMetadata(pgSchemaTable PgSchemaTable) InternalTableMetadata {
235 | 	internalTableMetadata, err := syncer.icebergReader.InternalTableMetadata(pgSchemaTable)
236 | 	PanicIfError(syncer.config, err)
237 | 	return internalTableMetadata
238 | }
239 | 
240 | func (syncer *Syncer) deleteOldIcebergSchemaTables(pgSchemaTables []PgSchemaTable) {
241 | 	var prefixedPgSchemaTables []PgSchemaTable
242 | 	for _, pgSchemaTable := range pgSchemaTables {
243 | 		prefixedPgSchemaTables = append(
244 | 			prefixedPgSchemaTables,
245 | 			PgSchemaTable{Schema: syncer.config.Pg.SchemaPrefix + pgSchemaTable.Schema, Table: pgSchemaTable.Table},
246 | 		)
247 | 	}
248 | 
249 | 	icebergSchemas, err := syncer.icebergReader.Schemas()
250 | 	PanicIfError(syncer.config, err)
251 | 
252 | 	for _, icebergSchema := range icebergSchemas {
253 | 		found := false
254 | 		for _, pgSchemaTable := range prefixedPgSchemaTables {
255 | 			if icebergSchema == pgSchemaTable.Schema {
256 | 				found = true
257 | 				break
258 | 			}
259 | 		}
260 | 
261 | 		if !found {
262 | 			LogInfo(syncer.config, "Deleting", icebergSchema, "...")
263 | 			err := syncer.icebergWriter.DeleteSchema(icebergSchema)
264 | 			PanicIfError(syncer.config, err)
265 | 		}
266 | 	}
267 | 
268 | 	icebergSchemaTables, err := syncer.icebergReader.SchemaTables()
269 | 	PanicIfError(syncer.config, err)
270 | 
271 | 	for _, icebergSchemaTable := range icebergSchemaTables.Values() {
272 | 		found := false
273 | 		for _, pgSchemaTable := range prefixedPgSchemaTables {
274 | 			if icebergSchemaTable.String() == pgSchemaTable.String() {
275 | 				found = true
276 | 				break
277 | 			}
278 | 		}
279 | 
280 | 		if !found {
281 | 			LogInfo(syncer.config, "Deleting", icebergSchemaTable.String(), "...")
282 | 			err := syncer.icebergWriter.DeleteSchemaTable(icebergSchemaTable)
283 | 			PanicIfError(syncer.config, err)
284 | 		}
285 | 	}
286 | }
287 | 
288 | type AnonymousAnalyticsData struct {
289 | 	Command string `json:"command"`
290 | 	OsName  string `json:"osName"`
291 | 	Version string `json:"version"`
292 | 	PgHost  string `json:"pgHost"`
293 | }
294 | 
295 | func (syncer *Syncer) sendAnonymousAnalytics(command string) {
296 | 	if syncer.config.DisableAnonymousAnalytics {
297 | 		return
298 | 	}
299 | 
300 | 	data := AnonymousAnalyticsData{
301 | 		Command: command,
302 | 		OsName:  runtime.GOOS + "-" + runtime.GOARCH,
303 | 		Version: VERSION,
304 | 		PgHost:  ParseDatabaseHost(syncer.config.Pg.DatabaseUrl),
305 | 	}
306 | 	if data.PgHost == "" || IsLocalHost(data.PgHost) {
307 | 		return
308 | 	}
309 | 
310 | 	jsonData, err := json.Marshal(data)
311 | 	if err != nil {
312 | 		return
313 | 	}
314 | 
315 | 	client := http.Client{Timeout: 5 * time.Second}
316 | 	_, _ = client.Post("https://api.bemidb.com/api/analytics", "application/json", bytes.NewBuffer(jsonData))
317 | }
318 | 


--------------------------------------------------------------------------------
/src/syncer_table_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"testing"
  5 | )
  6 | 
  7 | func TestCopyFromPgTableSql(t *testing.T) {
  8 | 	config := &Config{}
  9 | 	syncer := NewSyncerTable(config)
 10 | 	pgSchemaTable := PgSchemaTable{Schema: "public", Table: "users"}
 11 | 
 12 | 	t.Run("Full refresh", func(t *testing.T) {
 13 | 		// [**************************************************************************************************]
 14 | 		// 0                                                                                           curr max xmin
 15 | 		t.Run("Runs a full refresh if there is no previous internalTableMetadata", func(t *testing.T) {
 16 | 			internalTableMetadata := InternalTableMetadata{}
 17 | 			currentTxid := int64(100)
 18 | 
 19 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, false)
 20 | 
 21 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
 22 | 			if sql != expected {
 23 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
 24 | 			}
 25 | 		})
 26 | 
 27 | 		// [**************************************************************************************************]
 28 | 		// 0                                                                                           curr max xmin
 29 | 		t.Run("Runs a full refresh after successful full sync", func(t *testing.T) {
 30 | 			previousMaxXmin := uint32(500)
 31 | 			initialTxid := int64(800)
 32 | 			currentTxid := int64(1000)
 33 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFull, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
 34 | 
 35 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, false)
 36 | 
 37 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
 38 | 			if sql != expected {
 39 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
 40 | 			}
 41 | 		})
 42 | 	})
 43 | 
 44 | 	t.Run("Continued in-progress refresh without a wraparound", func(t *testing.T) {
 45 | 		// Full refresh in progress
 46 | 		// [-----------------------|************************|************************|------------------------]
 47 | 		// 0                 prev max xmin        init (wraparound) txid    curr (wraparound) txid          32^2
 48 | 		t.Run("Continues a full refresh before reaching the initial txid", func(t *testing.T) {
 49 | 			previousMaxXmin := uint32(1_000_000_000)
 50 | 			initialTxid := int64(2_000_000_000) + (int64(1) << 32)
 51 | 			currentTxid := int64(3_000_000_000) + (int64(1) << 32)
 52 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
 53 | 
 54 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
 55 | 
 56 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 1000000000 AND xmin::text::bigint <= 3000000000 ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
 57 | 			if sql != expected {
 58 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
 59 | 			}
 60 | 		})
 61 | 
 62 | 		// Incremental refresh
 63 | 		// [-----------------------|************************|************************|------------------------]
 64 | 		// 0                 prev max xmin       init (wraparound) txid     curr (wraparound) txid          32^2
 65 | 		t.Run("Starts an incremental refresh before reaching the initial txid equal to the current txid", func(t *testing.T) {
 66 | 			previousMaxXmin := uint32(1_000_000_000)
 67 | 			initialTxid := int64(2_000_000_000) + (int64(1) << 32)
 68 | 			currentTxid := int64(3_000_000_000) + (int64(1) << 32)
 69 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeIncremental, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
 70 | 
 71 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
 72 | 
 73 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint > 1000000000 AND xmin::text::bigint <= 3000000000 ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
 74 | 			if sql != expected {
 75 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
 76 | 			}
 77 | 		})
 78 | 
 79 | 		// [-----------------------|************************|-------------------------------------------------]
 80 | 		// 0                 prev max xmin        init (wraparound) txid                                    32^2
 81 | 		//                                        curr (wraparound) txid
 82 | 		t.Run("Continues a full refresh before reaching the initial txid equal to the current txid", func(t *testing.T) {
 83 | 			previousMaxXmin := uint32(1_000_000_000)
 84 | 			initialTxid := int64(2_000_000_000) + (int64(1) << 32)
 85 | 			currentTxid := int64(2_000_000_000) + (int64(1) << 32)
 86 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
 87 | 
 88 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
 89 | 
 90 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 1000000000 AND xmin::text::bigint <= 2000000000 ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
 91 | 			if sql != expected {
 92 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
 93 | 			}
 94 | 		})
 95 | 
 96 | 		// [-----------------------|------------------------|************************|------------------------]
 97 | 		// 0            init (wraparound) txid       prev max xmin        curr (wraparound) txid            32^2
 98 | 		t.Run("Continues a full refresh after reaching the initial txid", func(t *testing.T) {
 99 | 			initialTxid := int64(1_000_000_000)
100 | 			previousMaxXmin := uint32(2_000_000_000)
101 | 			currentTxid := int64(3_000_000_000)
102 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
103 | 
104 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
105 | 
106 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 2000000000 AND xmin::text::bigint <= 3000000000 ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
107 | 			if sql != expected {
108 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
109 | 			}
110 | 		})
111 | 	})
112 | 
113 | 	t.Run("Continued in-progress refresh with a wraparound", func(t *testing.T) {
114 | 		// [***********************|------------------------|************************|************************]
115 | 		// 0             curr wraparound txid        prev max xmin       init (wraparound) txid             32^2
116 | 		t.Run("Continues a full refresh before reaching the initial txid", func(t *testing.T) {
117 | 			currentTxid := int64(1_000_000_000) + (int64(1) << 32)
118 | 			previousMaxXmin := uint32(2_000_000_000)
119 | 			initialTxid := int64(3_000_000_000)
120 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
121 | 
122 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
123 | 
124 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 2000000000 OR xmin::text::bigint <= 1000000000 ORDER BY xmin::text::bigint <= 1000000000 ASC, xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
125 | 			if sql != expected {
126 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
127 | 			}
128 | 		})
129 | 
130 | 		// [***********************|------------------------|------------------------|************************]
131 | 		// 0             curr wraparound txid     init (wraparound) txid     prev max xmin                  32^2
132 | 		t.Run("Continues a full refresh after reaching the initial txid", func(t *testing.T) {
133 | 			currentTxid := int64(1_000_000_000) + (int64(1) << 32)
134 | 			initialTxid := int64(2_000_000_000)
135 | 			previousMaxXmin := uint32(3_000_000_000)
136 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
137 | 
138 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
139 | 
140 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 3000000000 OR xmin::text::bigint <= 1000000000 ORDER BY xmin::text::bigint <= 1000000000 ASC, xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
141 | 			if sql != expected {
142 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
143 | 			}
144 | 		})
145 | 
146 | 		// [-----------------------|************************|------------------------|------------------------]
147 | 		// 0                 prev max xmin        curr wraparound txid     init (wraparound) txid           32^2
148 | 		t.Run("Continues a full refresh if a wraparound occurred during a full sync and max xmin was reset", func(t *testing.T) {
149 | 			previousMaxXmin := uint32(1_000_000_000)
150 | 			currentTxid := int64(2_000_000_000) + (int64(1) << 32)
151 | 			initialTxid := int64(3_000_000_000)
152 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
153 | 
154 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
155 | 
156 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 1000000000 AND xmin::text::bigint <= 2000000000 ORDER BY xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
157 | 			if sql != expected {
158 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
159 | 			}
160 | 		})
161 | 
162 | 		// [***********************|************************|------------------------|************************]
163 | 		// 0            init (wraparound) txid    curr wraparound txid        prev max xmin                 32^2
164 | 		t.Run("Continues a full refresh after the current wrapparound txid exceeds the initial txid", func(t *testing.T) {
165 | 			initialTxid := int64(1_000_000_000)
166 | 			currentTxid := int64(2_000_000_000) + (int64(1) << 32)
167 | 			previousMaxXmin := uint32(3_000_000_000)
168 | 			internalTableMetadata := InternalTableMetadata{LastRefreshMode: RefreshModeFullInProgress, LastTxid: initialTxid, MaxXmin: &previousMaxXmin}
169 | 
170 | 			sql := syncer.CopyFromPgTableSql(pgSchemaTable, internalTableMetadata, currentTxid, true)
171 | 
172 | 			expected := "COPY (SELECT *, xmin::text::bigint AS xmin FROM \"public\".\"users\" WHERE xmin::text::bigint >= 3000000000 OR xmin::text::bigint <= 2000000000 ORDER BY xmin::text::bigint <= 2000000000 ASC, xmin::text::bigint ASC) TO STDOUT WITH CSV HEADER NULL 'BEMIDB_NULL'"
173 | 			if sql != expected {
174 | 				t.Errorf("Expected SQL:\n%s\nGot:\n%s", expected, sql)
175 | 			}
176 | 		})
177 | 	})
178 | }
179 | 


--------------------------------------------------------------------------------
/src/syncer_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestShouldSyncTable(t *testing.T) {
 6 | 	t.Run("returns true when no filters are set", func(t *testing.T) {
 7 | 		config := &Config{
 8 | 			Pg: PgConfig{
 9 | 				DatabaseUrl: "postgres://user:pass@localhost:5432/db",
10 | 			},
11 | 		}
12 | 		syncer := NewSyncer(config)
13 | 		pgSchemaTable := PgSchemaTable{Schema: "public", Table: "users"}
14 | 
15 | 		if !syncer.shouldSyncTable(pgSchemaTable) {
16 | 			t.Error("Expected shouldSyncTable to return true when no filters are set")
17 | 		}
18 | 	})
19 | 
20 | 	t.Run("respects include filter", func(t *testing.T) {
21 | 		config := &Config{
22 | 			Pg: PgConfig{
23 | 				DatabaseUrl:   "postgres://user:pass@localhost:5432/db",
24 | 				IncludeTables: []string{"public.users", "public.orders"},
25 | 			},
26 | 		}
27 | 		syncer := NewSyncer(config)
28 | 
29 | 		pgSchemaTableIncluded := PgSchemaTable{Schema: "public", Table: "users"}
30 | 		if !syncer.shouldSyncTable(pgSchemaTableIncluded) {
31 | 			t.Error("Expected shouldSyncTable to return true for included table")
32 | 		}
33 | 
34 | 		pgSchemaTableExcluded := PgSchemaTable{Schema: "public", Table: "secrets"}
35 | 		if syncer.shouldSyncTable(pgSchemaTableExcluded) {
36 | 			t.Error("Expected shouldSyncTable to return false for non-included table")
37 | 		}
38 | 	})
39 | 
40 | 	t.Run("respects exclude filter", func(t *testing.T) {
41 | 		config := &Config{
42 | 			Pg: PgConfig{
43 | 				DatabaseUrl:   "postgres://user:pass@localhost:5432/db",
44 | 				ExcludeTables: []string{"public.secrets", "public.cache"},
45 | 			},
46 | 		}
47 | 		syncer := NewSyncer(config)
48 | 
49 | 		pgSchemaTableIncluded := PgSchemaTable{Schema: "public", Table: "users"}
50 | 		if !syncer.shouldSyncTable(pgSchemaTableIncluded) {
51 | 			t.Error("Expected shouldSyncTable to return true for non-excluded table")
52 | 		}
53 | 
54 | 		pgSchemaTableExcluded := PgSchemaTable{Schema: "public", Table: "secrets"}
55 | 		if syncer.shouldSyncTable(pgSchemaTableExcluded) {
56 | 			t.Error("Expected shouldSyncTable to return false for excluded table")
57 | 		}
58 | 	})
59 | }
60 | 


--------------------------------------------------------------------------------
/src/utils.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"crypto/hmac"
  5 | 	"crypto/rand"
  6 | 	"crypto/sha256"
  7 | 	"encoding/base64"
  8 | 	"errors"
  9 | 	"fmt"
 10 | 	"net/url"
 11 | 	"regexp"
 12 | 	"strconv"
 13 | 	"strings"
 14 | 	"time"
 15 | 	"unicode"
 16 | 
 17 | 	"golang.org/x/crypto/pbkdf2"
 18 | )
 19 | 
 20 | func IntToString(i int) string {
 21 | 	return strconv.Itoa(i)
 22 | }
 23 | 
 24 | func Int64ToString(i int64) string {
 25 | 	return strconv.FormatInt(i, 10)
 26 | }
 27 | 
 28 | func Uint32ToString(i uint32) string {
 29 | 	return strconv.FormatUint(uint64(i), 10)
 30 | }
 31 | 
 32 | func StringToInt(s string) (int, error) {
 33 | 	return strconv.Atoi(s)
 34 | }
 35 | 
 36 | func StringToInt64(s string) (int64, error) {
 37 | 	return strconv.ParseInt(s, 10, 64)
 38 | }
 39 | 
 40 | func StringToUint32(s string) (uint32, error) {
 41 | 	i, err := strconv.ParseUint(s, 10, 32)
 42 | 	return uint32(i), err
 43 | }
 44 | 
 45 | func StringToScramSha256(password string) string {
 46 | 	saltLength := 16
 47 | 	digestLength := 32
 48 | 	iterations := 4096
 49 | 	clientKey := []byte("Client Key")
 50 | 	serverKey := []byte("Server Key")
 51 | 
 52 | 	salt := make([]byte, saltLength)
 53 | 	_, err := rand.Read(salt)
 54 | 	if err != nil {
 55 | 		return ""
 56 | 	}
 57 | 
 58 | 	digestKey := pbkdf2.Key([]byte(password), salt, iterations, digestLength, sha256.New)
 59 | 	clientKeyHash := hmacSha256Hash(digestKey, clientKey)
 60 | 	serverKeyHash := hmacSha256Hash(digestKey, serverKey)
 61 | 	storedKeyHash := sha256Hash(clientKeyHash)
 62 | 
 63 | 	return fmt.Sprintf(
 64 | 		"SCRAM-SHA-256$%d:%s$%s:%s",
 65 | 		iterations,
 66 | 		base64.StdEncoding.EncodeToString(salt),
 67 | 		base64.StdEncoding.EncodeToString(storedKeyHash),
 68 | 		base64.StdEncoding.EncodeToString(serverKeyHash),
 69 | 	)
 70 | }
 71 | 
 72 | func StringDateToTime(str string) (time.Time, error) {
 73 | 	// Golang's time.Parse() function does not support parsing dates with 5+ digit years
 74 | 	// So we need to handle this case manually by parsing the year separately
 75 | 	var nonStandardYear int
 76 | 	var err error
 77 | 	parts := strings.Split(str, "-")
 78 | 	if len(parts) == 3 && len(parts[0]) > 4 {
 79 | 		nonStandardYear, err = StringToInt(parts[0])
 80 | 		if err != nil {
 81 | 			return time.Time{}, errors.New("Invalid year: " + parts[0])
 82 | 		}
 83 | 
 84 | 		str = str[len(parts[0])-4:] // Remove the prefix from str leaving only the standard 10 characters (YYYY-MM-DD)
 85 | 	}
 86 | 
 87 | 	parsedTime, err := time.Parse("2006-01-02", str)
 88 | 
 89 | 	// If the year is non-standard, add the year difference to the parsed time after parsing
 90 | 	if err == nil && nonStandardYear != 0 {
 91 | 		parsedTime = parsedTime.AddDate(nonStandardYear-parsedTime.Year(), 0, 0)
 92 | 		return parsedTime, nil
 93 | 	}
 94 | 
 95 | 	return parsedTime, err
 96 | }
 97 | 
 98 | func StringContainsUpper(str string) bool {
 99 | 	for _, char := range str {
100 | 		if unicode.IsUpper(char) {
101 | 			return true
102 | 		}
103 | 	}
104 | 	return false
105 | }
106 | 
107 | func Reverse[T any](originalSlice []T) []T {
108 | 	length := len(originalSlice)
109 | 	reversedSlice := make([]T, length)
110 | 
111 | 	for i, elem := range originalSlice {
112 | 		reversedSlice[length-1-i] = elem
113 | 	}
114 | 
115 | 	return reversedSlice
116 | }
117 | 
118 | func HasExactOrWildcardMatch(strs []string, value string) bool {
119 | 	for _, str := range strs {
120 | 		if str == value {
121 | 			return true
122 | 		}
123 | 
124 | 		if strings.Contains(str, "*") {
125 | 			pattern := strings.ReplaceAll(regexp.QuoteMeta(str), "\\*", ".*")
126 | 			matched, _ := regexp.MatchString("\\A"+pattern+"\\z", value)
127 | 			if matched {
128 | 				return true
129 | 			}
130 | 		}
131 | 	}
132 | 
133 | 	return false
134 | }
135 | 
136 | func ParseDatabaseHost(dbUrl string) string {
137 | 	if dbUrl == "" {
138 | 		return ""
139 | 	}
140 | 
141 | 	url, err := url.Parse(dbUrl)
142 | 	if err != nil {
143 | 		return ""
144 | 	}
145 | 
146 | 	return url.Hostname()
147 | }
148 | 
149 | func IsLocalHost(host string) bool {
150 | 	return strings.HasPrefix(host, "127.0.0.1") || strings.HasPrefix(host, "localhost")
151 | }
152 | 
153 | func PgWraparoundTxid(txid int64) int64 {
154 | 	return txid % (int64(1) << 32)
155 | }
156 | 
157 | func IsPgWraparoundTxid(txid int64) bool {
158 | 	return txid > (int64(1) << 32)
159 | }
160 | 
161 | func hmacSha256Hash(key []byte, message []byte) []byte {
162 | 	hash := hmac.New(sha256.New, key)
163 | 	hash.Write(message)
164 | 	return hash.Sum(nil)
165 | }
166 | 
167 | func sha256Hash(data []byte) []byte {
168 | 	hash := sha256.New()
169 | 	hash.Write(data)
170 | 	return hash.Sum(nil)
171 | }
172 | 


--------------------------------------------------------------------------------