├── .dockerignore ├── .github ├── CODEOWNERS └── workflows │ ├── audit.yml │ └── general.yml ├── .gitignore ├── .vscode └── settings.json ├── Cargo.toml ├── LICENSE ├── README.md ├── api ├── .sqlx │ ├── query-046b6b7500c21d7236a146287f58e793388e5a22ba165e0018cfb513a12059dd.json │ ├── query-04a6be1adccd3ee3f1e5e52a372d9924c22de5d3dbe0dfb2b2459ac6b89d3801.json │ ├── query-0924fd49251cef93c0729c6ce5c803c4d6e340dc6ef9f1a948ff7a60d8e7022f.json │ ├── query-0bd3a776b8c3a795eb4a7df13da9f7a10a9fa258c223a41c9008b80da60ade63.json │ ├── query-0cd20ffc83a735c61f55743eb7821611a34a926404d528619eb9897e42416652.json │ ├── query-0d96b66a879595ee2ad34b8ff531431737daaba7e0dd73641c1669a9fe8858f3.json │ ├── query-13b095006e9d49b7f9ce6f0177f7f0a1cec433d9c05092d47850bca1d1348fac.json │ ├── query-16fb6d2a6c717b2f552ded179440d88f88e3f0283bebcc9e7f34a3894f10074b.json │ ├── query-2249847047d736d83932f6efcd5a7279520c4f7f9a3d58541d0bb5b30493b5d1.json │ ├── query-2f67b6c19e969b20581cdef7bf92b0b595bac3aa7e65eb156c72bc181e38b1d8.json │ ├── query-344b7bac3092c85f7a337e6c120fa263b19141f3059f8f62b00bdb8f8a656fcb.json │ ├── query-3740ae4bf53167d8113e43d71d85b7552d2122596820f849a7b366a3bed8703f.json │ ├── query-389faf1c8ea8aac1487e734040e8bb7a304719a2f1188be4a62c386399a647a7.json │ ├── query-40438470eae2af511cd51eba4075e5f14de7fa77ca0e6843aeeed497f66149ef.json │ ├── query-44c74dd07b704e7fe90745914976a792e07f1cd63e9307d7b111c0f85b4a2829.json │ ├── query-51a192c02465ee1b3ef3ce40a730223a28deeaed1b6bb70a5df2e48d92d8e197.json │ ├── query-5491e32f69e73ad62e54ac56228bdfbc4fc899103508a4a6e4eefc0f8aede791.json │ ├── query-665e42a3a8948a66cb687d7327b9930792f613f20dfb2511de0edbd8f3abc7e1.json │ ├── query-7455471d7c982b9456c6e051a64636e79308fa46f7ad60062a62a0f8d245af6b.json │ ├── query-75da20a892b377e62bb27a051e5712f129df1d466ac49a63eb2898912f05bccb.json │ ├── query-77a1de78ec6fc1f6382e7594ede907470ac3808a5aeb7e458fab380fe78d823e.json │ ├── query-7864a7c199c4c65647e620f5c7e77c9f4f8cda589e565fa2d2d5a2f29df9c60e.json │ ├── query-808c0febda267eabf1ac487a5d8e9b6b8e67e7582dd9d3c4d23c409258506645.json │ ├── query-96a26fc4caf7b072bd3d3ff4705cdd3a5de0034143b9a53c7859d72fe35efff7.json │ ├── query-ad02b73edec5b9ee54244aa340d450d20cac7447e473629abf5cac0ab0266978.json │ ├── query-b235f8fb03837d21191916b974fe2166db455218f1e1308d3a74b1bc41cd8008.json │ ├── query-c12b3f5b0c165b08628eb2c4c9eb08c56afa0513f24eb147c431ce229b1efc24.json │ ├── query-c6214e61e82efa1d305f8dd85ce47f71bca40b81d0e4c54352e0676767de9bc3.json │ ├── query-c76d3e7cbd7f1d47acef03ffcdcd3e505581e1b595ffc84d1ef1e824d49d7bb5.json │ ├── query-d4a7110a5dac90f9f60d89ffa131b968191a0a5e7b9eaaa2c79982f2d8064702.json │ ├── query-defb1d999e12327ca2840bdd3d524d1deeaa0fde8ed918157deab043cfbe911c.json │ ├── query-e0135e45bbf859ec1bbf27654d84b49fbe41f4d41fd4b8cc9d88f8762884f157.json │ └── query-e930b526695677d24b02ada5778b01d312df0d47b420f215ba9f9d8d7876c70b.json ├── Cargo.toml ├── Dockerfile ├── README.md ├── configuration │ ├── base.yaml │ └── dev.yaml ├── migrations │ ├── 20240819111151_create_tenants.sql │ ├── 20240821104756_create_sources.sql │ ├── 20240822044837_create_sinks.sql │ ├── 20240828090309_create_images.sql │ ├── 20240902113739_create_replicators.sql │ ├── 20240903105045_create_pipelines.sql │ ├── 20250122125912_cascade_delete_tenants.sql │ ├── 20250506035802_cascade_delete_pipelines.sql │ ├── 20250526083530_rename_sinks_to_destinations.sql │ └── 20250526084758_rename_sink_id_to_destination_id.sql ├── src │ ├── authentication.rs │ ├── configuration.rs │ ├── db │ │ ├── destinations.rs │ │ ├── destinations_pipelines.rs │ │ ├── images.rs │ │ ├── mod.rs │ │ ├── pipelines.rs │ │ ├── publications.rs │ │ ├── replicators.rs │ │ ├── sources.rs │ │ ├── tables.rs │ │ ├── tenants.rs │ │ └── tenants_sources.rs │ ├── encryption.rs │ ├── k8s_client.rs │ ├── lib.rs │ ├── main.rs │ ├── replicator_config.rs │ ├── routes │ │ ├── destinations.rs │ │ ├── destinations_pipelines.rs │ │ ├── health_check.rs │ │ ├── images.rs │ │ ├── mod.rs │ │ ├── pipelines.rs │ │ ├── sources.rs │ │ ├── sources │ │ │ ├── publications.rs │ │ │ └── tables.rs │ │ ├── tenants.rs │ │ └── tenants_sources.rs │ ├── span_builder.rs │ ├── startup.rs │ └── utils.rs └── tests │ ├── common │ ├── database.rs │ ├── mod.rs │ └── test_app.rs │ ├── integration │ ├── destination_test.rs │ ├── destinations_pipelines.rs │ ├── health_check_test.rs │ ├── images_test.rs │ ├── mod.rs │ ├── pipelines_test.rs │ ├── sources_test.rs │ ├── tenants_sources_test.rs │ └── tenants_test.rs │ └── mod.rs ├── deny.toml ├── etl ├── Cargo.toml ├── docs │ ├── Replication in Postgres.md │ └── replication_trace.txt ├── examples │ ├── bigquery.rs │ ├── duckdb.rs │ └── stdout.rs ├── src │ ├── clients │ │ ├── bigquery.rs │ │ ├── duckdb.rs │ │ ├── mod.rs │ │ └── postgres.rs │ ├── conversions │ │ ├── bool.rs │ │ ├── cdc_event.rs │ │ ├── hex.rs │ │ ├── mod.rs │ │ ├── numeric.rs │ │ ├── table_row.rs │ │ └── text.rs │ ├── lib.rs │ ├── main.rs │ ├── pipeline │ │ ├── batching │ │ │ ├── data_pipeline.rs │ │ │ ├── mod.rs │ │ │ └── stream.rs │ │ ├── destinations │ │ │ ├── bigquery.rs │ │ │ ├── duckdb │ │ │ │ ├── destination.rs │ │ │ │ ├── executor.rs │ │ │ │ └── mod.rs │ │ │ ├── mod.rs │ │ │ └── stdout.rs │ │ ├── mod.rs │ │ └── sources │ │ │ ├── mod.rs │ │ │ └── postgres.rs │ └── v2 │ │ ├── concurrency │ │ ├── future.rs │ │ └── mod.rs │ │ ├── destination │ │ ├── base.rs │ │ ├── memory.rs │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── pipeline.rs │ │ ├── replication │ │ ├── apply.rs │ │ ├── client.rs │ │ ├── mod.rs │ │ └── table_sync.rs │ │ ├── state │ │ ├── mod.rs │ │ ├── pipeline.rs │ │ ├── store │ │ │ ├── base.rs │ │ │ ├── memory.rs │ │ │ └── mod.rs │ │ └── table.rs │ │ └── workers │ │ ├── apply.rs │ │ ├── base.rs │ │ ├── mod.rs │ │ ├── pool.rs │ │ └── table_sync.rs └── tests │ ├── common │ ├── database.rs │ ├── destination.rs │ ├── mod.rs │ ├── pipeline.rs │ └── table.rs │ ├── integration │ ├── mod.rs │ └── pipeline_test.rs │ └── mod.rs ├── postgres ├── Cargo.toml └── src │ ├── lib.rs │ ├── schema.rs │ ├── sqlx │ ├── mod.rs │ ├── options.rs │ └── test_utils.rs │ └── tokio │ ├── mod.rs │ ├── options.rs │ └── test_utils.rs ├── replicator ├── Cargo.toml ├── Dockerfile ├── configuration │ ├── base.yaml │ └── dev.yaml └── src │ ├── configuration.rs │ └── main.rs ├── scripts └── init_db.sh └── telemetry ├── Cargo.toml └── src └── lib.rs /.dockerignore: -------------------------------------------------------------------------------- 1 | target/ -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @supabase/postgres -------------------------------------------------------------------------------- /.github/workflows/audit.yml: -------------------------------------------------------------------------------- 1 | # copied from Luca Palmieri's gist here: https://gist.github.com/LukeMathWalker/5ae1107432ce283310c3e601fac915f3 2 | name: Security audit 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" 6 | push: 7 | paths: 8 | - "**/Cargo.toml" 9 | - "**/Cargo.lock" 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | security_audit: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - uses: taiki-e/install-action@cargo-deny 20 | - name: Scan for vulnerabilities 21 | run: cargo deny check advisories 22 | -------------------------------------------------------------------------------- /.github/workflows/general.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | CARGO_TERM_COLOR: always 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | fmt: 13 | name: Rustfmt 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: dtolnay/rust-toolchain@stable 18 | with: 19 | components: rustfmt 20 | - name: Enforce formatting 21 | run: cargo fmt --check 22 | 23 | clippy: 24 | name: Clippy 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v3 28 | - uses: dtolnay/rust-toolchain@stable 29 | with: 30 | components: clippy 31 | - name: Linting 32 | run: cargo clippy --all-targets --all-features -- -D warnings 33 | 34 | coverage: 35 | name: Tests 36 | runs-on: ubuntu-latest 37 | permissions: 38 | contents: read 39 | id-token: write 40 | services: 41 | postgres: 42 | image: postgres:15 43 | env: 44 | POSTGRES_USER: postgres 45 | POSTGRES_PASSWORD: postgres 46 | POSTGRES_DB: postgres 47 | ports: 48 | - 5430:5432 49 | steps: 50 | - name: Checkout repository 51 | uses: actions/checkout@v3 52 | 53 | - name: Wait for Postgres to be ready 54 | run: | 55 | until pg_isready -h localhost -p 5430; do 56 | echo "Waiting for Postgres..." 57 | sleep 1 58 | done 59 | 60 | - name: Enable logical WAL 61 | run: | 62 | PGPASSWORD=postgres psql -h localhost -p 5430 -U postgres -c "ALTER SYSTEM SET wal_level = 'logical';" 63 | 64 | - name: Restart Postgres service container 65 | run: | 66 | docker restart ${{ job.services.postgres.id }} 67 | 68 | - name: Install sqlx-cli 69 | run: | 70 | cargo install sqlx-cli \ 71 | --features native-tls,postgres \ 72 | --no-default-features \ 73 | --locked 74 | 75 | - name: Migrate database 76 | run: | 77 | sudo apt-get install libpq-dev -y 78 | SKIP_DOCKER=true ./scripts/init_db.sh 79 | 80 | - name: Install cargo-llvm-cov 81 | uses: taiki-e/install-action@cargo-llvm-cov 82 | 83 | - name: Generate code coverage 84 | id: coverage 85 | run: | 86 | cargo llvm-cov test \ 87 | --workspace --no-fail-fast \ 88 | --lcov --output-path lcov.info 89 | 90 | - name: Coveralls upload 91 | uses: coverallsapp/github-action@v2 92 | with: 93 | github-token: ${{ secrets.GITHUB_TOKEN }} 94 | path-to-lcov: lcov.info 95 | debug: true 96 | 97 | docker: 98 | name: Docker 99 | runs-on: ubuntu-latest 100 | if: github.ref == 'refs/heads/main' 101 | steps: 102 | - name: Set up Docker Buildx 103 | uses: docker/setup-buildx-action@v3 104 | - name: Login to Docker Hub 105 | uses: docker/login-action@v3 106 | with: 107 | username: ${{ vars.DOCKERHUB_USERNAME }} 108 | password: ${{ secrets.DOCKERHUB_TOKEN }} 109 | - name: Build and push api image 110 | uses: docker/build-push-action@v6 111 | with: 112 | file: ./api/Dockerfile 113 | push: true 114 | tags: ${{ vars.DOCKERHUB_USERNAME }}/api:${{ github.head_ref || github.ref_name }}.${{ github.sha }} 115 | - name: Build and push replicator image 116 | uses: docker/build-push-action@v6 117 | with: 118 | file: ./replicator/Dockerfile 119 | push: true 120 | tags: ${{ vars.DOCKERHUB_USERNAME }}/replicator:${{ github.head_ref || github.ref_name }}.${{ github.sha }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | # JetBrains IDEs 17 | .idea 18 | 19 | # Added by cargo 20 | /target 21 | .env 22 | 23 | # macOS system files 24 | .DS_Store 25 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.cargo.features": "all" 3 | } 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = ["api", "etl", "postgres", "replicator", "telemetry"] 4 | 5 | [workspace.dependencies] 6 | api = { path = "api" } 7 | etl = { path = "etl" } 8 | postgres = { path = "postgres" } 9 | replicator = { path = "replicator" } 10 | telemetry = { path = "telemetry" } 11 | 12 | actix-web = { version = "4", default-features = false } 13 | actix-web-httpauth = { version = "0.8.2", default-features = false } 14 | anyhow = { version = "1.0", default-features = false } 15 | async-trait = { version = "0.1" } 16 | aws-lc-rs = { version = "1.8.1", default-features = false } 17 | base64 = { version = "0.22.1", default-features = false } 18 | bigdecimal = { version = "0.4.6", default-features = false } 19 | bytes = { version = "1.0" } 20 | byteorder = { version = "1.5.0", default-features = false } 21 | chrono = { version = "0.4", default-features = false } 22 | clap = { version = "4.5", default-features = false } 23 | config = { version = "0.14", default-features = false } 24 | constant_time_eq = { version = "0.3.1" } 25 | duckdb = { version = "1.0", default-features = false, features = ["bundled"] } 26 | futures = { version = "0.3.31", default-features = false } 27 | gcp-bigquery-client = { version = "0.25.0", default-features = false } 28 | # gcp-bigquery-client = { git = "https://github.com/imor/gcp-bigquery-client", default-features = false, rev = "d9fe29a33f9e4dc12c4adf061035ee1628da5e39" } 29 | k8s-openapi = { version = "0.23.0", default-features = false } 30 | kube = { version = "0.96.0", default-features = false } 31 | pg_escape = { version = "0.1.1", default-features = false } 32 | pin-project-lite = { version = "0.2", default-features = false } 33 | postgres-protocol = { git = "https://github.com/imor/rust-postgres", rev = "20265ef38e32a06f76b6f9b678e2077fc2211f6b" } 34 | postgres-replication = { git = "https://github.com/imor/rust-postgres", default-features = false, rev = "20265ef38e32a06f76b6f9b678e2077fc2211f6b" } 35 | prost = { version = "0.13.1", default-features = false } 36 | rand = { version = "0.8.5", default-features = false } 37 | reqwest = { version = "0.12", default-features = false } 38 | rustls = { version = "0.23.12", default-features = false } 39 | rustls-pemfile = { version = "2.2.0", default-features = false } 40 | rustyline = { version = "14.0.0", default-features = false } 41 | secrecy = { version = "0.8.0", default-features = false } 42 | serde = { version = "1.0", default-features = false } 43 | serde_json = { version = "1.0", default-features = false } 44 | sqlx = { version = "0.8.2", default-features = false } 45 | thiserror = "1.0" 46 | tokio = { version = "1.38", default-features = false } 47 | tokio-postgres = { git = "https://github.com/imor/rust-postgres", default-features = false, rev = "20265ef38e32a06f76b6f9b678e2077fc2211f6b" } 48 | tokio-postgres-rustls = { git = "https://github.com/imor/tokio-postgres-rustls", default-features = false } 49 | tracing = { version = "0.1", default-features = false } 50 | tracing-actix-web = { version = "0.7", default-features = false } 51 | tracing-appender = { version = "0.2.3", default-features = false } 52 | tracing-log = { version = "0.2.0", default-features = false } 53 | tracing-subscriber = { version = "0.3", default-features = false } 54 | utoipa = { version = "4.2.3", default-features = false } 55 | utoipa-swagger-ui = { version = "7.1.0", default-features = false } 56 | uuid = { version = "1.10.0", default-features = false } 57 | 58 | # [patch."https://github.com/imor/gcp-bigquery-client"] 59 | # gcp-bigquery-client = { path = "../gcp-bigquery-client" } 60 | -------------------------------------------------------------------------------- /api/.sqlx/query-046b6b7500c21d7236a146287f58e793388e5a22ba165e0018cfb513a12059dd.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n delete from app.tenants\n where id = $1\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text" 15 | ] 16 | }, 17 | "nullable": [ 18 | false 19 | ] 20 | }, 21 | "hash": "046b6b7500c21d7236a146287f58e793388e5a22ba165e0018cfb513a12059dd" 22 | } 23 | -------------------------------------------------------------------------------- /api/.sqlx/query-04a6be1adccd3ee3f1e5e52a372d9924c22de5d3dbe0dfb2b2459ac6b89d3801.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select r.id, r.tenant_id, r.image_id\n from app.replicators r\n join app.pipelines p on r.id = p.replicator_id\n where r.tenant_id = $1 and p.tenant_id = $1 and p.id = $2\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "image_id", 19 | "type_info": "Int8" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [ 24 | "Text", 25 | "Int8" 26 | ] 27 | }, 28 | "nullable": [ 29 | false, 30 | false, 31 | false 32 | ] 33 | }, 34 | "hash": "04a6be1adccd3ee3f1e5e52a372d9924c22de5d3dbe0dfb2b2459ac6b89d3801" 35 | } 36 | -------------------------------------------------------------------------------- /api/.sqlx/query-0924fd49251cef93c0729c6ce5c803c4d6e340dc6ef9f1a948ff7a60d8e7022f.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.sources (tenant_id, name, config)\n values ($1, $2, $3)\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Text", 16 | "Jsonb" 17 | ] 18 | }, 19 | "nullable": [ 20 | false 21 | ] 22 | }, 23 | "hash": "0924fd49251cef93c0729c6ce5c803c4d6e340dc6ef9f1a948ff7a60d8e7022f" 24 | } 25 | -------------------------------------------------------------------------------- /api/.sqlx/query-0bd3a776b8c3a795eb4a7df13da9f7a10a9fa258c223a41c9008b80da60ade63.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n update app.tenants\n set name = $1\n where id = $2\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Text" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "0bd3a776b8c3a795eb4a7df13da9f7a10a9fa258c223a41c9008b80da60ade63" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-0cd20ffc83a735c61f55743eb7821611a34a926404d528619eb9897e42416652.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, tenant_id, name, config\n from app.sources\n where tenant_id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "name", 19 | "type_info": "Text" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "config", 24 | "type_info": "Jsonb" 25 | } 26 | ], 27 | "parameters": { 28 | "Left": [ 29 | "Text" 30 | ] 31 | }, 32 | "nullable": [ 33 | false, 34 | false, 35 | false, 36 | false 37 | ] 38 | }, 39 | "hash": "0cd20ffc83a735c61f55743eb7821611a34a926404d528619eb9897e42416652" 40 | } 41 | -------------------------------------------------------------------------------- /api/.sqlx/query-0d96b66a879595ee2ad34b8ff531431737daaba7e0dd73641c1669a9fe8858f3.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select i.id, i.name, i.is_default\n from app.images i\n join app.replicators r on i.id = r.image_id\n where r.id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "name", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "is_default", 19 | "type_info": "Bool" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [ 24 | "Int8" 25 | ] 26 | }, 27 | "nullable": [ 28 | false, 29 | false, 30 | false 31 | ] 32 | }, 33 | "hash": "0d96b66a879595ee2ad34b8ff531431737daaba7e0dd73641c1669a9fe8858f3" 34 | } 35 | -------------------------------------------------------------------------------- /api/.sqlx/query-13b095006e9d49b7f9ce6f0177f7f0a1cec433d9c05092d47850bca1d1348fac.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select exists (select id\n from app.destinations\n where tenant_id = $1 and id = $2) as \"exists!\"\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "exists!", 9 | "type_info": "Bool" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8" 16 | ] 17 | }, 18 | "nullable": [ 19 | null 20 | ] 21 | }, 22 | "hash": "13b095006e9d49b7f9ce6f0177f7f0a1cec433d9c05092d47850bca1d1348fac" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-16fb6d2a6c717b2f552ded179440d88f88e3f0283bebcc9e7f34a3894f10074b.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, tenant_id, name, config\n from app.sources\n where tenant_id = $1 and id = $2\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "name", 19 | "type_info": "Text" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "config", 24 | "type_info": "Jsonb" 25 | } 26 | ], 27 | "parameters": { 28 | "Left": [ 29 | "Text", 30 | "Int8" 31 | ] 32 | }, 33 | "nullable": [ 34 | false, 35 | false, 36 | false, 37 | false 38 | ] 39 | }, 40 | "hash": "16fb6d2a6c717b2f552ded179440d88f88e3f0283bebcc9e7f34a3894f10074b" 41 | } 42 | -------------------------------------------------------------------------------- /api/.sqlx/query-2249847047d736d83932f6efcd5a7279520c4f7f9a3d58541d0bb5b30493b5d1.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n update app.sources\n set config = $1, name = $2\n where tenant_id = $3 and id = $4\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Jsonb", 15 | "Text", 16 | "Text", 17 | "Int8" 18 | ] 19 | }, 20 | "nullable": [ 21 | false 22 | ] 23 | }, 24 | "hash": "2249847047d736d83932f6efcd5a7279520c4f7f9a3d58541d0bb5b30493b5d1" 25 | } 26 | -------------------------------------------------------------------------------- /api/.sqlx/query-2f67b6c19e969b20581cdef7bf92b0b595bac3aa7e65eb156c72bc181e38b1d8.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.pipelines (tenant_id, source_id, destination_id, replicator_id, publication_name, config)\n values ($1, $2, $3, $4, $5, $6)\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8", 16 | "Int8", 17 | "Int8", 18 | "Text", 19 | "Jsonb" 20 | ] 21 | }, 22 | "nullable": [ 23 | false 24 | ] 25 | }, 26 | "hash": "2f67b6c19e969b20581cdef7bf92b0b595bac3aa7e65eb156c72bc181e38b1d8" 27 | } 28 | -------------------------------------------------------------------------------- /api/.sqlx/query-344b7bac3092c85f7a337e6c120fa263b19141f3059f8f62b00bdb8f8a656fcb.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n update app.images\n set name = $1, is_default = $2\n where id = $3\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Bool", 16 | "Int8" 17 | ] 18 | }, 19 | "nullable": [ 20 | false 21 | ] 22 | }, 23 | "hash": "344b7bac3092c85f7a337e6c120fa263b19141f3059f8f62b00bdb8f8a656fcb" 24 | } 25 | -------------------------------------------------------------------------------- /api/.sqlx/query-3740ae4bf53167d8113e43d71d85b7552d2122596820f849a7b366a3bed8703f.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.tenants (id, name)\n values ($1, $2)\n on conflict (id) do update set name = $2\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Text" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "3740ae4bf53167d8113e43d71d85b7552d2122596820f849a7b366a3bed8703f" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-389faf1c8ea8aac1487e734040e8bb7a304719a2f1188be4a62c386399a647a7.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n update app.pipelines\n set source_id = $1, destination_id = $2, publication_name = $3, config = $4\n where tenant_id = $5 and id = $6\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Int8", 15 | "Int8", 16 | "Text", 17 | "Jsonb", 18 | "Text", 19 | "Int8" 20 | ] 21 | }, 22 | "nullable": [ 23 | false 24 | ] 25 | }, 26 | "hash": "389faf1c8ea8aac1487e734040e8bb7a304719a2f1188be4a62c386399a647a7" 27 | } 28 | -------------------------------------------------------------------------------- /api/.sqlx/query-40438470eae2af511cd51eba4075e5f14de7fa77ca0e6843aeeed497f66149ef.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.destinations (tenant_id, name, config)\n values ($1, $2, $3)\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Text", 16 | "Jsonb" 17 | ] 18 | }, 19 | "nullable": [ 20 | false 21 | ] 22 | }, 23 | "hash": "40438470eae2af511cd51eba4075e5f14de7fa77ca0e6843aeeed497f66149ef" 24 | } 25 | -------------------------------------------------------------------------------- /api/.sqlx/query-44c74dd07b704e7fe90745914976a792e07f1cd63e9307d7b111c0f85b4a2829.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select exists (select id\n from app.sources\n where tenant_id = $1 and id = $2) as \"exists!\"\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "exists!", 9 | "type_info": "Bool" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8" 16 | ] 17 | }, 18 | "nullable": [ 19 | null 20 | ] 21 | }, 22 | "hash": "44c74dd07b704e7fe90745914976a792e07f1cd63e9307d7b111c0f85b4a2829" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-51a192c02465ee1b3ef3ce40a730223a28deeaed1b6bb70a5df2e48d92d8e197.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, name, is_default\n from app.images\n where id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "name", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "is_default", 19 | "type_info": "Bool" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [ 24 | "Int8" 25 | ] 26 | }, 27 | "nullable": [ 28 | false, 29 | false, 30 | false 31 | ] 32 | }, 33 | "hash": "51a192c02465ee1b3ef3ce40a730223a28deeaed1b6bb70a5df2e48d92d8e197" 34 | } 35 | -------------------------------------------------------------------------------- /api/.sqlx/query-5491e32f69e73ad62e54ac56228bdfbc4fc899103508a4a6e4eefc0f8aede791.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, name, is_default\n from app.images\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "name", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "is_default", 19 | "type_info": "Bool" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [] 24 | }, 25 | "nullable": [ 26 | false, 27 | false, 28 | false 29 | ] 30 | }, 31 | "hash": "5491e32f69e73ad62e54ac56228bdfbc4fc899103508a4a6e4eefc0f8aede791" 32 | } 33 | -------------------------------------------------------------------------------- /api/.sqlx/query-665e42a3a8948a66cb687d7327b9930792f613f20dfb2511de0edbd8f3abc7e1.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, tenant_id, name, config\n from app.destinations\n where tenant_id = $1 and id = $2\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "name", 19 | "type_info": "Text" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "config", 24 | "type_info": "Jsonb" 25 | } 26 | ], 27 | "parameters": { 28 | "Left": [ 29 | "Text", 30 | "Int8" 31 | ] 32 | }, 33 | "nullable": [ 34 | false, 35 | false, 36 | false, 37 | false 38 | ] 39 | }, 40 | "hash": "665e42a3a8948a66cb687d7327b9930792f613f20dfb2511de0edbd8f3abc7e1" 41 | } 42 | -------------------------------------------------------------------------------- /api/.sqlx/query-7455471d7c982b9456c6e051a64636e79308fa46f7ad60062a62a0f8d245af6b.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, tenant_id, name, config\n from app.destinations\n where tenant_id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "name", 19 | "type_info": "Text" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "config", 24 | "type_info": "Jsonb" 25 | } 26 | ], 27 | "parameters": { 28 | "Left": [ 29 | "Text" 30 | ] 31 | }, 32 | "nullable": [ 33 | false, 34 | false, 35 | false, 36 | false 37 | ] 38 | }, 39 | "hash": "7455471d7c982b9456c6e051a64636e79308fa46f7ad60062a62a0f8d245af6b" 40 | } 41 | -------------------------------------------------------------------------------- /api/.sqlx/query-75da20a892b377e62bb27a051e5712f129df1d466ac49a63eb2898912f05bccb.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, name, is_default\n from app.images\n where is_default = true\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "name", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "is_default", 19 | "type_info": "Bool" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [] 24 | }, 25 | "nullable": [ 26 | false, 27 | false, 28 | false 29 | ] 30 | }, 31 | "hash": "75da20a892b377e62bb27a051e5712f129df1d466ac49a63eb2898912f05bccb" 32 | } 33 | -------------------------------------------------------------------------------- /api/.sqlx/query-77a1de78ec6fc1f6382e7594ede907470ac3808a5aeb7e458fab380fe78d823e.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.images (name, is_default)\n values ($1, $2)\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Bool" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "77a1de78ec6fc1f6382e7594ede907470ac3808a5aeb7e458fab380fe78d823e" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-7864a7c199c4c65647e620f5c7e77c9f4f8cda589e565fa2d2d5a2f29df9c60e.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.tenants (id, name)\n values ($1, $2)\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Text" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "7864a7c199c4c65647e620f5c7e77c9f4f8cda589e565fa2d2d5a2f29df9c60e" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-808c0febda267eabf1ac487a5d8e9b6b8e67e7582dd9d3c4d23c409258506645.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n delete from app.images\n where id = $1\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Int8" 15 | ] 16 | }, 17 | "nullable": [ 18 | false 19 | ] 20 | }, 21 | "hash": "808c0febda267eabf1ac487a5d8e9b6b8e67e7582dd9d3c4d23c409258506645" 22 | } 23 | -------------------------------------------------------------------------------- /api/.sqlx/query-96a26fc4caf7b072bd3d3ff4705cdd3a5de0034143b9a53c7859d72fe35efff7.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n update app.destinations\n set config = $1, name = $2\n where tenant_id = $3 and id = $4\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Jsonb", 15 | "Text", 16 | "Text", 17 | "Int8" 18 | ] 19 | }, 20 | "nullable": [ 21 | false 22 | ] 23 | }, 24 | "hash": "96a26fc4caf7b072bd3d3ff4705cdd3a5de0034143b9a53c7859d72fe35efff7" 25 | } 26 | -------------------------------------------------------------------------------- /api/.sqlx/query-ad02b73edec5b9ee54244aa340d450d20cac7447e473629abf5cac0ab0266978.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n delete from app.pipelines\n where tenant_id = $1 and id = $2\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "ad02b73edec5b9ee54244aa340d450d20cac7447e473629abf5cac0ab0266978" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-b235f8fb03837d21191916b974fe2166db455218f1e1308d3a74b1bc41cd8008.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n delete from app.sources\n where tenant_id = $1 and id = $2\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "b235f8fb03837d21191916b974fe2166db455218f1e1308d3a74b1bc41cd8008" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-c12b3f5b0c165b08628eb2c4c9eb08c56afa0513f24eb147c431ce229b1efc24.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, name\n from app.tenants\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "name", 14 | "type_info": "Text" 15 | } 16 | ], 17 | "parameters": { 18 | "Left": [] 19 | }, 20 | "nullable": [ 21 | false, 22 | false 23 | ] 24 | }, 25 | "hash": "c12b3f5b0c165b08628eb2c4c9eb08c56afa0513f24eb147c431ce229b1efc24" 26 | } 27 | -------------------------------------------------------------------------------- /api/.sqlx/query-c6214e61e82efa1d305f8dd85ce47f71bca40b81d0e4c54352e0676767de9bc3.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select r.id, r.tenant_id, r.image_id\n from app.replicators r\n join app.pipelines p on r.id = p.replicator_id\n where r.tenant_id = $1 and p.tenant_id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "image_id", 19 | "type_info": "Int8" 20 | } 21 | ], 22 | "parameters": { 23 | "Left": [ 24 | "Text" 25 | ] 26 | }, 27 | "nullable": [ 28 | false, 29 | false, 30 | false 31 | ] 32 | }, 33 | "hash": "c6214e61e82efa1d305f8dd85ce47f71bca40b81d0e4c54352e0676767de9bc3" 34 | } 35 | -------------------------------------------------------------------------------- /api/.sqlx/query-c76d3e7cbd7f1d47acef03ffcdcd3e505581e1b595ffc84d1ef1e824d49d7bb5.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n insert into app.replicators (tenant_id, image_id)\n values ($1, $2)\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "c76d3e7cbd7f1d47acef03ffcdcd3e505581e1b595ffc84d1ef1e824d49d7bb5" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-d4a7110a5dac90f9f60d89ffa131b968191a0a5e7b9eaaa2c79982f2d8064702.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n delete from app.destinations\n where tenant_id = $1 and id = $2\n returning id\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Int8" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "d4a7110a5dac90f9f60d89ffa131b968191a0a5e7b9eaaa2c79982f2d8064702" 23 | } 24 | -------------------------------------------------------------------------------- /api/.sqlx/query-defb1d999e12327ca2840bdd3d524d1deeaa0fde8ed918157deab043cfbe911c.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select id, name\n from app.tenants\n where id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Text" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "name", 14 | "type_info": "Text" 15 | } 16 | ], 17 | "parameters": { 18 | "Left": [ 19 | "Text" 20 | ] 21 | }, 22 | "nullable": [ 23 | false, 24 | false 25 | ] 26 | }, 27 | "hash": "defb1d999e12327ca2840bdd3d524d1deeaa0fde8ed918157deab043cfbe911c" 28 | } 29 | -------------------------------------------------------------------------------- /api/.sqlx/query-e0135e45bbf859ec1bbf27654d84b49fbe41f4d41fd4b8cc9d88f8762884f157.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select p.id,\n p.tenant_id,\n source_id,\n s.name as source_name,\n destination_id,\n d.name as destination_name,\n replicator_id,\n publication_name,\n p.config\n from app.pipelines p\n join app.sources s on p.source_id = s.id\n join app.destinations d on p.destination_id = d.id\n where p.tenant_id = $1 and p.id = $2\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "source_id", 19 | "type_info": "Int8" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "source_name", 24 | "type_info": "Text" 25 | }, 26 | { 27 | "ordinal": 4, 28 | "name": "destination_id", 29 | "type_info": "Int8" 30 | }, 31 | { 32 | "ordinal": 5, 33 | "name": "destination_name", 34 | "type_info": "Text" 35 | }, 36 | { 37 | "ordinal": 6, 38 | "name": "replicator_id", 39 | "type_info": "Int8" 40 | }, 41 | { 42 | "ordinal": 7, 43 | "name": "publication_name", 44 | "type_info": "Text" 45 | }, 46 | { 47 | "ordinal": 8, 48 | "name": "config", 49 | "type_info": "Jsonb" 50 | } 51 | ], 52 | "parameters": { 53 | "Left": [ 54 | "Text", 55 | "Int8" 56 | ] 57 | }, 58 | "nullable": [ 59 | false, 60 | false, 61 | false, 62 | false, 63 | false, 64 | false, 65 | false, 66 | false, 67 | false 68 | ] 69 | }, 70 | "hash": "e0135e45bbf859ec1bbf27654d84b49fbe41f4d41fd4b8cc9d88f8762884f157" 71 | } 72 | -------------------------------------------------------------------------------- /api/.sqlx/query-e930b526695677d24b02ada5778b01d312df0d47b420f215ba9f9d8d7876c70b.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n select p.id,\n p.tenant_id,\n source_id,\n s.name as source_name,\n destination_id,\n d.name as destination_name,\n replicator_id,\n publication_name,\n p.config\n from app.pipelines p\n join app.sources s on p.source_id = s.id\n join app.destinations d on p.destination_id = d.id\n where p.tenant_id = $1\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Int8" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "tenant_id", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "source_id", 19 | "type_info": "Int8" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "source_name", 24 | "type_info": "Text" 25 | }, 26 | { 27 | "ordinal": 4, 28 | "name": "destination_id", 29 | "type_info": "Int8" 30 | }, 31 | { 32 | "ordinal": 5, 33 | "name": "destination_name", 34 | "type_info": "Text" 35 | }, 36 | { 37 | "ordinal": 6, 38 | "name": "replicator_id", 39 | "type_info": "Int8" 40 | }, 41 | { 42 | "ordinal": 7, 43 | "name": "publication_name", 44 | "type_info": "Text" 45 | }, 46 | { 47 | "ordinal": 8, 48 | "name": "config", 49 | "type_info": "Jsonb" 50 | } 51 | ], 52 | "parameters": { 53 | "Left": [ 54 | "Text" 55 | ] 56 | }, 57 | "nullable": [ 58 | false, 59 | false, 60 | false, 61 | false, 62 | false, 63 | false, 64 | false, 65 | false, 66 | false 67 | ] 68 | }, 69 | "hash": "e930b526695677d24b02ada5778b01d312df0d47b420f215ba9f9d8d7876c70b" 70 | } 71 | -------------------------------------------------------------------------------- /api/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "api" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | path = "src/lib.rs" 8 | 9 | [[bin]] 10 | path = "src/main.rs" 11 | name = "api" 12 | 13 | [dependencies] 14 | postgres = { workspace = true, features = ["sqlx"] } 15 | telemetry = { workspace = true } 16 | 17 | actix-web = { workspace = true, features = ["macros", "http2"] } 18 | actix-web-httpauth = { workspace = true } 19 | anyhow = { workspace = true, features = ["std"] } 20 | async-trait = { workspace = true } 21 | aws-lc-rs = { workspace = true, features = ["alloc", "aws-lc-sys"] } 22 | base64 = { workspace = true, features = ["std"] } 23 | config = { workspace = true, features = ["yaml"] } 24 | constant_time_eq = { workspace = true } 25 | k8s-openapi = { workspace = true, features = ["latest"] } 26 | kube = { workspace = true, features = [ 27 | "runtime", 28 | "derive", 29 | "client", 30 | "rustls-tls", 31 | ] } 32 | pg_escape = { workspace = true } 33 | rand = { workspace = true, features = ["std"] } 34 | reqwest = { workspace = true, features = ["json"] } 35 | serde = { workspace = true, features = ["derive"] } 36 | serde_json = { workspace = true, features = ["std"] } 37 | sqlx = { workspace = true, features = [ 38 | "runtime-tokio-rustls", 39 | "macros", 40 | "postgres", 41 | "json", 42 | "migrate", 43 | ] } 44 | thiserror = { workspace = true } 45 | tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } 46 | tracing = { workspace = true, default-features = false } 47 | tracing-actix-web = { workspace = true, features = ["emit_event_on_error"] } 48 | utoipa = { workspace = true, features = ["actix_extras"] } 49 | utoipa-swagger-ui = { workspace = true, features = ["actix-web", "reqwest"] } 50 | uuid = { version = "1.10.0", features = ["v4"] } 51 | 52 | [dev-dependencies] 53 | postgres = { workspace = true, features = ["test-utils", "sqlx"] } -------------------------------------------------------------------------------- /api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:1.85.0-slim-bookworm AS builder 2 | WORKDIR /app 3 | COPY . . 4 | ENV SQLX_OFFLINE=true 5 | RUN cargo build --release -p api 6 | 7 | FROM debian:bookworm-slim 8 | WORKDIR /app 9 | COPY --from=builder /app/target/release/api api 10 | COPY api/configuration configuration 11 | ENV APP_ENVIRONMENT=prod 12 | ENTRYPOINT ["./api"] 13 | -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | # `etl` API 2 | 3 | This API service provides a RESTful interface for managing PostgreSQL replication pipelines. It enables you to: 4 | 5 | - Create and manage replication pipelines between PostgreSQL sources and destinations 6 | - Handle multi-tenant replication configurations 7 | - Manage publications and tables for replication 8 | - Control pipeline lifecycle (start/stop/status) 9 | - Secure configuration with encryption 10 | - Deploy and manage replicators in Kubernetes 11 | 12 | ## Table of Contents 13 | 14 | - [Local Setup](#local-setup) 15 | - [Database Management](#database-management) 16 | - [Development](#development) 17 | - [Environment Variables](#environment-variables) 18 | 19 | ## Local Setup 20 | 21 | ### Prerequisites 22 | 23 | Before you begin, ensure you have the following installed: 24 | 25 | - PostgreSQL client (`psql`) 26 | - SQLx CLI (`cargo install --version='~0.7' sqlx-cli --no-default-features --features rustls,postgres`) 27 | - Rust toolchain 28 | 29 | ## Database Management 30 | 31 | ### Initial Setup 32 | 33 | To set up and initialize the database, run the following command from the main directory: 34 | 35 | ```bash 36 | ./scripts/init_db.sh 37 | ``` 38 | 39 | This script will: 40 | 41 | 1. Check for required dependencies (psql and sqlx) 42 | 2. Start a PostgreSQL container if one isn't already running 43 | 3. Create the database if it doesn't exist 44 | 4. Run all migrations 45 | 46 | ### Environment Variables 47 | 48 | You can customize the database setup using these environment variables: 49 | 50 | | Variable | Description | Default | 51 | | ---------------------- | ------------------------------ | --------------- | 52 | | `POSTGRES_DATA_VOLUME` | Data volume path | ./postgres_data | 53 | | `SKIP_DOCKER` | Skip Docker container creation | false | 54 | 55 | Example usage: 56 | 57 | ```bash 58 | POSTGRES_DATA_VOLUME="~/postgres_data" ./scripts/init_db.sh 59 | ``` 60 | 61 | ## Development 62 | 63 | ### Database Migrations 64 | 65 | #### Adding a New Migration 66 | 67 | To create a new migration file: 68 | 69 | ```bash 70 | sqlx migrate add 71 | ``` 72 | 73 | #### Running Migrations 74 | 75 | To apply all pending migrations: 76 | 77 | ```bash 78 | sqlx migrate run 79 | ``` 80 | 81 | #### Resetting Database 82 | 83 | To reset the database to its initial state: 84 | 85 | ```bash 86 | sqlx migrate reset 87 | ``` 88 | 89 | #### Updating SQLx Metadata 90 | 91 | After making changes to the database schema, update the SQLx metadata: 92 | 93 | ```bash 94 | cargo sqlx prepare 95 | ``` 96 | -------------------------------------------------------------------------------- /api/configuration/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/supabase/etl/ef11abefadf43863c8d0a5d979357d11b7ec6451/api/configuration/base.yaml -------------------------------------------------------------------------------- /api/configuration/dev.yaml: -------------------------------------------------------------------------------- 1 | database: 2 | host: "localhost" 3 | port: 5430 4 | name: "postgres" 5 | username: "postgres" 6 | password: "postgres" 7 | require_ssl: false 8 | application: 9 | host: "127.0.0.1" 10 | port: 8000 11 | encryption_key: 12 | id: 0 13 | key: BlK9AlrzqRnCZy53j42uE1p2qGBiF7HYZjZYFaZObqg= 14 | api_key: XOUbHmWbt9h7nWl15wWwyWQnctmFGNjpawMc3lT5CFs= 15 | -------------------------------------------------------------------------------- /api/migrations/20240819111151_create_tenants.sql: -------------------------------------------------------------------------------- 1 | create schema app; 2 | 3 | create table 4 | app.tenants (id text primary key, name text not null); -------------------------------------------------------------------------------- /api/migrations/20240821104756_create_sources.sql: -------------------------------------------------------------------------------- 1 | create table 2 | app.sources ( 3 | id bigint generated always as identity primary key, 4 | tenant_id text references app.tenants (id) not null, 5 | name text not null, 6 | config jsonb not null 7 | ); -------------------------------------------------------------------------------- /api/migrations/20240822044837_create_sinks.sql: -------------------------------------------------------------------------------- 1 | create table 2 | app.sinks ( 3 | id bigint generated always as identity primary key, 4 | tenant_id text references app.tenants (id) not null, 5 | name text not null, 6 | config jsonb not null 7 | ); -------------------------------------------------------------------------------- /api/migrations/20240828090309_create_images.sql: -------------------------------------------------------------------------------- 1 | create table 2 | app.images ( 3 | id bigint generated always as identity primary key, 4 | name text not null, 5 | is_default boolean not null 6 | ); 7 | -------------------------------------------------------------------------------- /api/migrations/20240902113739_create_replicators.sql: -------------------------------------------------------------------------------- 1 | create type app.replicator_status as enum ('stopped', 'starting', 'started', 'stopping'); 2 | 3 | create table 4 | app.replicators ( 5 | id bigint generated always as identity primary key, 6 | tenant_id text references app.tenants (id) not null, 7 | image_id bigint references app.images (id) not null 8 | ); -------------------------------------------------------------------------------- /api/migrations/20240903105045_create_pipelines.sql: -------------------------------------------------------------------------------- 1 | create table 2 | app.pipelines ( 3 | id bigint generated always as identity primary key, 4 | tenant_id text references app.tenants (id) not null, 5 | source_id bigint references app.sources (id) not null, 6 | sink_id bigint references app.sinks (id) not null, 7 | replicator_id bigint references app.replicators (id) not null, 8 | publication_name text not null, 9 | config jsonb not null 10 | ); -------------------------------------------------------------------------------- /api/migrations/20250122125912_cascade_delete_tenants.sql: -------------------------------------------------------------------------------- 1 | alter table app.sources 2 | drop constraint sources_tenant_id_fkey, 3 | add constraint sources_tenant_id_fkey 4 | foreign key (tenant_id) 5 | references app.tenants (id) 6 | on delete cascade; 7 | 8 | alter table app.sinks 9 | drop constraint sinks_tenant_id_fkey, 10 | add constraint sinks_tenant_id_fkey 11 | foreign key (tenant_id) 12 | references app.tenants (id) 13 | on delete cascade; 14 | 15 | alter table app.replicators 16 | drop constraint replicators_tenant_id_fkey, 17 | add constraint replicators_tenant_id_fkey 18 | foreign key (tenant_id) 19 | references app.tenants (id) 20 | on delete cascade; 21 | 22 | alter table app.pipelines 23 | drop constraint pipelines_tenant_id_fkey, 24 | add constraint pipelines_tenant_id_fkey 25 | foreign key (tenant_id) 26 | references app.tenants (id) 27 | on delete cascade; -------------------------------------------------------------------------------- /api/migrations/20250506035802_cascade_delete_pipelines.sql: -------------------------------------------------------------------------------- 1 | alter table app.pipelines 2 | drop constraint pipelines_source_id_fkey, 3 | add constraint pipelines_source_id_fkey 4 | foreign key (source_id) 5 | references app.sources (id) 6 | on delete cascade; 7 | 8 | alter table app.pipelines 9 | drop constraint pipelines_sink_id_fkey, 10 | add constraint pipelines_sink_id_fkey 11 | foreign key (sink_id) 12 | references app.sinks (id) 13 | on delete cascade; -------------------------------------------------------------------------------- /api/migrations/20250526083530_rename_sinks_to_destinations.sql: -------------------------------------------------------------------------------- 1 | -- alter table doesn't allow schema qualification in the new name 2 | -- so we use destinations here instead of app.destinations 3 | alter table app.sinks rename to destinations; -------------------------------------------------------------------------------- /api/migrations/20250526084758_rename_sink_id_to_destination_id.sql: -------------------------------------------------------------------------------- 1 | alter table app.pipelines rename column sink_id to destination_id; -------------------------------------------------------------------------------- /api/src/authentication.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{dev::ServiceRequest, web::Data, Error}; 2 | use actix_web_httpauth::extractors::{ 3 | bearer::{BearerAuth, Config}, 4 | AuthenticationError, 5 | }; 6 | use constant_time_eq::constant_time_eq_n; 7 | 8 | use crate::configuration::ApiKey; 9 | 10 | pub async fn auth_validator( 11 | req: ServiceRequest, 12 | credentials: BearerAuth, 13 | ) -> Result { 14 | let config = req 15 | .app_data::() 16 | .cloned() 17 | .unwrap_or_default() 18 | .scope("v1"); 19 | 20 | let api_key: &str = req.app_data::>().expect("missing api_key"); 21 | let token = credentials.token(); 22 | 23 | let api_key: ApiKey = match api_key.try_into() { 24 | Ok(api_key) => api_key, 25 | Err(_) => { 26 | return Err((AuthenticationError::from(config).into(), req)); 27 | } 28 | }; 29 | 30 | let token: ApiKey = match token.try_into() { 31 | Ok(token) => token, 32 | Err(_) => { 33 | return Err((AuthenticationError::from(config).into(), req)); 34 | } 35 | }; 36 | 37 | if !constant_time_eq_n(&api_key.key, &token.key) { 38 | return Err((AuthenticationError::from(config).into(), req)); 39 | } 40 | 41 | Ok(req) 42 | } 43 | -------------------------------------------------------------------------------- /api/src/configuration.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self, Display}; 2 | 3 | use base64::{prelude::BASE64_STANDARD, Engine}; 4 | use postgres::sqlx::options::PgDatabaseOptions; 5 | use serde::{ 6 | de::{self, MapAccess, Unexpected, Visitor}, 7 | Deserialize, Deserializer, 8 | }; 9 | use thiserror::Error; 10 | 11 | #[derive(serde::Deserialize, Clone)] 12 | pub struct EncryptionKey { 13 | pub id: u32, 14 | pub key: String, 15 | } 16 | 17 | const API_KEY_LENGTH_IN_BYTES: usize = 32; 18 | 19 | pub struct ApiKey { 20 | pub key: [u8; API_KEY_LENGTH_IN_BYTES], 21 | } 22 | 23 | #[derive(Debug, Error)] 24 | pub enum ApiKeyConversionError { 25 | #[error("api key is not base64 encoded")] 26 | NotBase64Encoded, 27 | 28 | #[error("expected length of api key is 32, but actual length is {0}")] 29 | LengthNot32IBytes(usize), 30 | } 31 | 32 | impl TryFrom<&str> for ApiKey { 33 | type Error = ApiKeyConversionError; 34 | 35 | fn try_from(value: &str) -> Result { 36 | let key = BASE64_STANDARD 37 | .decode(value) 38 | .map_err(|_| ApiKeyConversionError::NotBase64Encoded)?; 39 | 40 | if key.len() != API_KEY_LENGTH_IN_BYTES { 41 | return Err(ApiKeyConversionError::LengthNot32IBytes(key.len())); 42 | } 43 | 44 | Ok(ApiKey { 45 | key: key 46 | .try_into() 47 | .expect("failed to convert api key into array"), 48 | }) 49 | } 50 | } 51 | 52 | impl<'de> Deserialize<'de> for ApiKey { 53 | fn deserialize(deserializer: D) -> Result 54 | where 55 | D: Deserializer<'de>, 56 | { 57 | #[derive(Deserialize)] 58 | #[serde(field_identifier, rename_all = "lowercase")] 59 | enum Field { 60 | Key, 61 | } 62 | 63 | struct ApiKeyVisitor; 64 | 65 | impl<'de> Visitor<'de> for ApiKeyVisitor { 66 | type Value = ApiKey; 67 | 68 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 69 | formatter.write_str("struct ApiKey") 70 | } 71 | 72 | fn visit_map(self, mut map: V) -> Result 73 | where 74 | V: MapAccess<'de>, 75 | { 76 | let mut key: Option<&str> = None; 77 | while let Some(map_key) = map.next_key()? { 78 | match map_key { 79 | Field::Key => { 80 | if key.is_some() { 81 | return Err(de::Error::duplicate_field("key")); 82 | } 83 | key = Some(map.next_value()?); 84 | } 85 | } 86 | } 87 | let key_str = key.ok_or_else(|| de::Error::missing_field("key"))?; 88 | let key = key_str.try_into().map_err(|_| { 89 | de::Error::invalid_value(Unexpected::Str(key_str), &"base64 encoded 32 bytes") 90 | })?; 91 | Ok(key) 92 | } 93 | } 94 | 95 | const FIELDS: &[&str] = &["key"]; 96 | deserializer.deserialize_struct("ApiKey", FIELDS, ApiKeyVisitor) 97 | } 98 | } 99 | 100 | #[derive(serde::Deserialize, Clone)] 101 | pub struct Settings { 102 | pub database: PgDatabaseOptions, 103 | pub application: ApplicationSettings, 104 | pub encryption_key: EncryptionKey, 105 | pub api_key: String, 106 | } 107 | 108 | #[derive(serde::Deserialize, Clone)] 109 | pub struct ApplicationSettings { 110 | /// host the api listens on 111 | pub host: String, 112 | 113 | /// port the api listens on 114 | pub port: u16, 115 | } 116 | 117 | impl Display for ApplicationSettings { 118 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 119 | writeln!(f, " host: {}", self.host)?; 120 | writeln!(f, " port: {}", self.port) 121 | } 122 | } 123 | 124 | pub fn get_settings<'a, T: serde::Deserialize<'a>>() -> Result { 125 | let base_path = std::env::current_dir().expect("Failed to determine the current directory"); 126 | let configuration_directory = base_path.join("configuration"); 127 | 128 | // Detect the running environment. 129 | // Default to `dev` if unspecified. 130 | let environment: Environment = std::env::var("APP_ENVIRONMENT") 131 | .unwrap_or_else(|_| DEV_ENV_NAME.into()) 132 | .try_into() 133 | .expect("Failed to parse APP_ENVIRONMENT."); 134 | 135 | let environment_filename = format!("{}.yaml", environment.as_str()); 136 | let settings = config::Config::builder() 137 | .add_source(config::File::from( 138 | configuration_directory.join("base.yaml"), 139 | )) 140 | .add_source(config::File::from( 141 | configuration_directory.join(environment_filename), 142 | )) 143 | // Add in settings from environment variables (with a prefix of APP and '__' as separator) 144 | // E.g. `APP_DESTINATION__BIG_QUERY__PROJECT_ID=my-project-id would set `Settings { destination: BigQuery { project_id }}` to my-project-id 145 | .add_source( 146 | config::Environment::with_prefix("APP") 147 | .prefix_separator("_") 148 | .separator("__"), 149 | ) 150 | .build()?; 151 | 152 | settings.try_deserialize::() 153 | } 154 | 155 | pub const DEV_ENV_NAME: &str = "dev"; 156 | pub const PROD_ENV_NAME: &str = "prod"; 157 | 158 | /// The possible runtime environment for our application. 159 | pub enum Environment { 160 | Dev, 161 | Prod, 162 | } 163 | 164 | impl Environment { 165 | pub fn as_str(&self) -> &'static str { 166 | match self { 167 | Environment::Dev => DEV_ENV_NAME, 168 | Environment::Prod => PROD_ENV_NAME, 169 | } 170 | } 171 | } 172 | 173 | impl TryFrom for Environment { 174 | type Error = String; 175 | 176 | fn try_from(s: String) -> Result { 177 | match s.to_lowercase().as_str() { 178 | "dev" => Ok(Self::Dev), 179 | "prod" => Ok(Self::Prod), 180 | other => Err(format!( 181 | "{other} is not a supported environment. Use either `{DEV_ENV_NAME}` or `{PROD_ENV_NAME}`.", 182 | )), 183 | } 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /api/src/db/destinations_pipelines.rs: -------------------------------------------------------------------------------- 1 | use aws_lc_rs::error::Unspecified; 2 | use sqlx::PgPool; 3 | use thiserror::Error; 4 | 5 | use crate::encryption::EncryptionKey; 6 | 7 | use super::{ 8 | destinations::{ 9 | create_destination_txn, update_destination_txn, DestinationConfig, DestinationsDbError, 10 | }, 11 | pipelines::{create_pipeline_txn, update_pipeline_txn, PipelineConfig}, 12 | }; 13 | 14 | #[derive(Debug, Error)] 15 | pub enum DestinationPipelineDbError { 16 | #[error("sqlx error: {0}")] 17 | Sqlx(#[from] sqlx::Error), 18 | 19 | #[error("encryption error: {0}")] 20 | Encryption(#[from] Unspecified), 21 | 22 | #[error("sources error: {0}")] 23 | Destinations(#[from] DestinationsDbError), 24 | 25 | #[error("destination with id {0} not found")] 26 | DestinationNotFound(i64), 27 | 28 | #[error("pipeline with id {0} not found")] 29 | PipelineNotFound(i64), 30 | } 31 | 32 | #[expect(clippy::too_many_arguments)] 33 | pub async fn create_destination_and_pipeline( 34 | pool: &PgPool, 35 | tenant_id: &str, 36 | source_id: i64, 37 | destination_name: &str, 38 | destination_config: DestinationConfig, 39 | image_id: i64, 40 | publication_name: &str, 41 | pipeline_config: PipelineConfig, 42 | encryption_key: &EncryptionKey, 43 | ) -> Result<(i64, i64), DestinationPipelineDbError> { 44 | let destination_config = destination_config.into_db_config(encryption_key)?; 45 | let destination_config = 46 | serde_json::to_value(destination_config).expect("failed to serialize config"); 47 | let pipeline_config = 48 | serde_json::to_value(pipeline_config).expect("failed to serialize config"); 49 | let mut txn = pool.begin().await?; 50 | let destination_id = 51 | create_destination_txn(&mut txn, tenant_id, destination_name, destination_config).await?; 52 | let pipeline_id = create_pipeline_txn( 53 | &mut txn, 54 | tenant_id, 55 | source_id, 56 | destination_id, 57 | image_id, 58 | publication_name, 59 | pipeline_config, 60 | ) 61 | .await?; 62 | txn.commit().await?; 63 | Ok((destination_id, pipeline_id)) 64 | } 65 | 66 | #[expect(clippy::too_many_arguments)] 67 | pub async fn update_destination_and_pipeline( 68 | pool: &PgPool, 69 | tenant_id: &str, 70 | destination_id: i64, 71 | pipeline_id: i64, 72 | source_id: i64, 73 | destination_name: &str, 74 | destination_config: DestinationConfig, 75 | publication_name: &str, 76 | pipeline_config: PipelineConfig, 77 | encryption_key: &EncryptionKey, 78 | ) -> Result<(), DestinationPipelineDbError> { 79 | let destination_config = destination_config.into_db_config(encryption_key)?; 80 | let destination_config = 81 | serde_json::to_value(destination_config).expect("failed to serialize config"); 82 | let pipeline_config = 83 | serde_json::to_value(pipeline_config).expect("failed to serialize config"); 84 | let mut txn = pool.begin().await?; 85 | let destination_id_res = update_destination_txn( 86 | &mut txn, 87 | tenant_id, 88 | destination_name, 89 | destination_id, 90 | destination_config, 91 | ) 92 | .await?; 93 | if destination_id_res.is_none() { 94 | txn.rollback().await?; 95 | return Err(DestinationPipelineDbError::DestinationNotFound( 96 | destination_id, 97 | )); 98 | }; 99 | let pipeline_id_res = update_pipeline_txn( 100 | &mut txn, 101 | tenant_id, 102 | pipeline_id, 103 | source_id, 104 | destination_id, 105 | publication_name, 106 | pipeline_config, 107 | ) 108 | .await?; 109 | 110 | if pipeline_id_res.is_none() { 111 | txn.rollback().await?; 112 | return Err(DestinationPipelineDbError::PipelineNotFound(pipeline_id)); 113 | }; 114 | 115 | txn.commit().await?; 116 | 117 | Ok(()) 118 | } 119 | -------------------------------------------------------------------------------- /api/src/db/images.rs: -------------------------------------------------------------------------------- 1 | use sqlx::{PgPool, Postgres, Transaction}; 2 | 3 | pub struct Image { 4 | pub id: i64, 5 | pub name: String, 6 | pub is_default: bool, 7 | } 8 | 9 | pub async fn create_image(pool: &PgPool, name: &str, is_default: bool) -> Result { 10 | let mut txn = pool.begin().await?; 11 | let res = create_image_txn(&mut txn, name, is_default).await; 12 | txn.commit().await?; 13 | res 14 | } 15 | 16 | pub async fn create_image_txn( 17 | txn: &mut Transaction<'_, Postgres>, 18 | name: &str, 19 | is_default: bool, 20 | ) -> Result { 21 | let record = sqlx::query!( 22 | r#" 23 | insert into app.images (name, is_default) 24 | values ($1, $2) 25 | returning id 26 | "#, 27 | name, 28 | is_default 29 | ) 30 | .fetch_one(&mut **txn) 31 | .await?; 32 | 33 | Ok(record.id) 34 | } 35 | 36 | pub async fn read_default_image(pool: &PgPool) -> Result, sqlx::Error> { 37 | let record = sqlx::query!( 38 | r#" 39 | select id, name, is_default 40 | from app.images 41 | where is_default = true 42 | "#, 43 | ) 44 | .fetch_optional(pool) 45 | .await?; 46 | 47 | Ok(record.map(|r| Image { 48 | id: r.id, 49 | name: r.name, 50 | is_default: r.is_default, 51 | })) 52 | } 53 | 54 | pub async fn read_image(pool: &PgPool, image_id: i64) -> Result, sqlx::Error> { 55 | let record = sqlx::query!( 56 | r#" 57 | select id, name, is_default 58 | from app.images 59 | where id = $1 60 | "#, 61 | image_id, 62 | ) 63 | .fetch_optional(pool) 64 | .await?; 65 | 66 | Ok(record.map(|r| Image { 67 | id: r.id, 68 | name: r.name, 69 | is_default: r.is_default, 70 | })) 71 | } 72 | 73 | pub async fn update_image( 74 | pool: &PgPool, 75 | image_id: i64, 76 | name: &str, 77 | is_default: bool, 78 | ) -> Result, sqlx::Error> { 79 | let record = sqlx::query!( 80 | r#" 81 | update app.images 82 | set name = $1, is_default = $2 83 | where id = $3 84 | returning id 85 | "#, 86 | name, 87 | is_default, 88 | image_id 89 | ) 90 | .fetch_optional(pool) 91 | .await?; 92 | 93 | Ok(record.map(|r| r.id)) 94 | } 95 | 96 | pub async fn delete_image(pool: &PgPool, image_id: i64) -> Result, sqlx::Error> { 97 | let record = sqlx::query!( 98 | r#" 99 | delete from app.images 100 | where id = $1 101 | returning id 102 | "#, 103 | image_id 104 | ) 105 | .fetch_optional(pool) 106 | .await?; 107 | 108 | Ok(record.map(|r| r.id)) 109 | } 110 | 111 | pub async fn read_all_images(pool: &PgPool) -> Result, sqlx::Error> { 112 | let mut record = sqlx::query!( 113 | r#" 114 | select id, name, is_default 115 | from app.images 116 | "#, 117 | ) 118 | .fetch_all(pool) 119 | .await?; 120 | 121 | Ok(record 122 | .drain(..) 123 | .map(|r| Image { 124 | id: r.id, 125 | name: r.name, 126 | is_default: r.is_default, 127 | }) 128 | .collect()) 129 | } 130 | 131 | pub async fn read_image_by_replicator_id( 132 | pool: &PgPool, 133 | replicator_id: i64, 134 | ) -> Result, sqlx::Error> { 135 | let record = sqlx::query!( 136 | r#" 137 | select i.id, i.name, i.is_default 138 | from app.images i 139 | join app.replicators r on i.id = r.image_id 140 | where r.id = $1 141 | "#, 142 | replicator_id, 143 | ) 144 | .fetch_optional(pool) 145 | .await?; 146 | 147 | Ok(record.map(|r| Image { 148 | id: r.id, 149 | name: r.name, 150 | is_default: r.is_default, 151 | })) 152 | } 153 | -------------------------------------------------------------------------------- /api/src/db/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod destinations; 2 | pub mod destinations_pipelines; 3 | pub mod images; 4 | pub mod pipelines; 5 | pub mod publications; 6 | pub mod replicators; 7 | pub mod sources; 8 | pub mod tables; 9 | pub mod tenants; 10 | pub mod tenants_sources; 11 | -------------------------------------------------------------------------------- /api/src/db/publications.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use pg_escape::{quote_identifier, quote_literal}; 4 | use serde::Serialize; 5 | use sqlx::{postgres::PgConnectOptions, Connection, Executor, PgConnection, Row}; 6 | use utoipa::ToSchema; 7 | 8 | use super::tables::Table; 9 | 10 | #[derive(Serialize, ToSchema)] 11 | pub struct Publication { 12 | pub name: String, 13 | pub tables: Vec, 14 | } 15 | 16 | pub async fn create_publication( 17 | publication: &Publication, 18 | options: &PgConnectOptions, 19 | ) -> Result<(), sqlx::Error> { 20 | let mut query = String::new(); 21 | let quoted_publication_name = quote_identifier(&publication.name); 22 | query.push_str("create publication "); 23 | query.push_str("ed_publication_name); 24 | query.push_str(" for table only "); 25 | 26 | for (i, table) in publication.tables.iter().enumerate() { 27 | let quoted_schema = quote_identifier(&table.schema); 28 | let quoted_name = quote_identifier(&table.name); 29 | query.push_str("ed_schema); 30 | query.push('.'); 31 | query.push_str("ed_name); 32 | 33 | if i < publication.tables.len() - 1 { 34 | query.push(',') 35 | } 36 | } 37 | 38 | let mut connection = PgConnection::connect_with(options).await?; 39 | connection.execute(query.as_str()).await?; 40 | 41 | Ok(()) 42 | } 43 | 44 | pub async fn update_publication( 45 | publication: &Publication, 46 | options: &PgConnectOptions, 47 | ) -> Result<(), sqlx::Error> { 48 | let mut query = String::new(); 49 | let quoted_publication_name = quote_identifier(&publication.name); 50 | query.push_str("alter publication "); 51 | query.push_str("ed_publication_name); 52 | query.push_str(" set table only "); 53 | 54 | for (i, table) in publication.tables.iter().enumerate() { 55 | let quoted_schema = quote_identifier(&table.schema); 56 | let quoted_name = quote_identifier(&table.name); 57 | query.push_str("ed_schema); 58 | query.push('.'); 59 | query.push_str("ed_name); 60 | 61 | if i < publication.tables.len() - 1 { 62 | query.push(',') 63 | } 64 | } 65 | 66 | let mut connection = PgConnection::connect_with(options).await?; 67 | connection.execute(query.as_str()).await?; 68 | 69 | Ok(()) 70 | } 71 | 72 | pub async fn drop_publication( 73 | publication_name: &str, 74 | options: &PgConnectOptions, 75 | ) -> Result<(), sqlx::Error> { 76 | let mut query = String::new(); 77 | query.push_str("drop publication if exists "); 78 | let quoted_publication_name = quote_identifier(publication_name); 79 | query.push_str("ed_publication_name); 80 | 81 | let mut connection = PgConnection::connect_with(options).await?; 82 | connection.execute(query.as_str()).await?; 83 | 84 | Ok(()) 85 | } 86 | 87 | pub async fn read_publication( 88 | publication_name: &str, 89 | options: &PgConnectOptions, 90 | ) -> Result, sqlx::Error> { 91 | let mut query = String::new(); 92 | query.push_str( 93 | r#" 94 | select p.pubname, 95 | pt.schemaname as "schemaname?", 96 | pt.tablename as "tablename?" 97 | from pg_publication p 98 | left join pg_publication_tables pt on p.pubname = pt.pubname 99 | where 100 | p.puballtables = false 101 | and p.pubinsert = true 102 | and p.pubupdate = true 103 | and p.pubdelete = true 104 | and p.pubtruncate = true 105 | and p.pubname = 106 | "#, 107 | ); 108 | 109 | let quoted_publication_name = quote_literal(publication_name); 110 | query.push_str("ed_publication_name); 111 | 112 | let mut connection = PgConnection::connect_with(options).await?; 113 | 114 | let mut tables = vec![]; 115 | let mut name: Option = None; 116 | 117 | for row in connection.fetch_all(query.as_str()).await? { 118 | let pub_name: String = row.get("pubname"); 119 | if let Some(ref name) = name { 120 | assert_eq!(name.as_str(), pub_name); 121 | } else { 122 | name = Some(pub_name); 123 | } 124 | let schema: Option = row.get("schemaname?"); 125 | let name: Option = row.get("tablename?"); 126 | if let (Some(schema), Some(name)) = (schema, name) { 127 | tables.push(Table { schema, name }); 128 | } 129 | } 130 | 131 | let publication = name.map(|name| Publication { name, tables }); 132 | 133 | Ok(publication) 134 | } 135 | 136 | pub async fn read_all_publications( 137 | options: &PgConnectOptions, 138 | ) -> Result, sqlx::Error> { 139 | let query = r#" 140 | select p.pubname, 141 | pt.schemaname as "schemaname?", 142 | pt.tablename as "tablename?" 143 | from pg_publication p 144 | left join pg_publication_tables pt on p.pubname = pt.pubname 145 | where 146 | p.puballtables = false 147 | and p.pubinsert = true 148 | and p.pubupdate = true 149 | and p.pubdelete = true 150 | and p.pubtruncate = true; 151 | "#; 152 | 153 | let mut connection = PgConnection::connect_with(options).await?; 154 | 155 | let mut pub_name_to_tables: HashMap> = HashMap::new(); 156 | 157 | for row in connection.fetch_all(query).await? { 158 | let pub_name: String = row.get("pubname"); 159 | let schema: Option = row.get("schemaname?"); 160 | let name: Option = row.get("tablename?"); 161 | let tables = pub_name_to_tables.entry(pub_name).or_default(); 162 | 163 | if let (Some(schema), Some(name)) = (schema, name) { 164 | tables.push(Table { schema, name }); 165 | } 166 | } 167 | 168 | let publications = pub_name_to_tables 169 | .into_iter() 170 | .map(|(name, tables)| Publication { name, tables }) 171 | .collect(); 172 | 173 | Ok(publications) 174 | } 175 | -------------------------------------------------------------------------------- /api/src/db/replicators.rs: -------------------------------------------------------------------------------- 1 | use sqlx::{PgPool, Postgres, Transaction}; 2 | 3 | pub struct Replicator { 4 | pub id: i64, 5 | pub tenant_id: String, 6 | pub image_id: i64, 7 | } 8 | 9 | pub async fn create_replicator( 10 | pool: &PgPool, 11 | tenant_id: &str, 12 | image_id: i64, 13 | ) -> Result { 14 | let mut txn = pool.begin().await?; 15 | let res = create_replicator_txn(&mut txn, tenant_id, image_id).await; 16 | txn.commit().await?; 17 | res 18 | } 19 | 20 | pub async fn create_replicator_txn( 21 | txn: &mut Transaction<'_, Postgres>, 22 | tenant_id: &str, 23 | image_id: i64, 24 | ) -> Result { 25 | let record = sqlx::query!( 26 | r#" 27 | insert into app.replicators (tenant_id, image_id) 28 | values ($1, $2) 29 | returning id 30 | "#, 31 | tenant_id, 32 | image_id 33 | ) 34 | .fetch_one(&mut **txn) 35 | .await?; 36 | 37 | Ok(record.id) 38 | } 39 | 40 | pub async fn read_replicator_by_pipeline_id( 41 | pool: &PgPool, 42 | tenant_id: &str, 43 | pipeline_id: i64, 44 | ) -> Result, sqlx::Error> { 45 | let record = sqlx::query!( 46 | r#" 47 | select r.id, r.tenant_id, r.image_id 48 | from app.replicators r 49 | join app.pipelines p on r.id = p.replicator_id 50 | where r.tenant_id = $1 and p.tenant_id = $1 and p.id = $2 51 | "#, 52 | tenant_id, 53 | pipeline_id, 54 | ) 55 | .fetch_optional(pool) 56 | .await?; 57 | 58 | Ok(record.map(|r| Replicator { 59 | id: r.id, 60 | tenant_id: r.tenant_id, 61 | image_id: r.image_id, 62 | })) 63 | } 64 | 65 | pub async fn read_replicators( 66 | pool: &PgPool, 67 | tenant_id: &str, 68 | ) -> Result, sqlx::Error> { 69 | let mut records = sqlx::query!( 70 | r#" 71 | select r.id, r.tenant_id, r.image_id 72 | from app.replicators r 73 | join app.pipelines p on r.id = p.replicator_id 74 | where r.tenant_id = $1 and p.tenant_id = $1 75 | "#, 76 | tenant_id, 77 | ) 78 | .fetch_all(pool) 79 | .await?; 80 | 81 | Ok(records 82 | .drain(..) 83 | .map(|r| Replicator { 84 | id: r.id, 85 | tenant_id: r.tenant_id, 86 | image_id: r.image_id, 87 | }) 88 | .collect()) 89 | } 90 | -------------------------------------------------------------------------------- /api/src/db/tables.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use sqlx::{postgres::PgConnectOptions, Connection, Executor, PgConnection, Row}; 3 | 4 | #[derive(Serialize, Deserialize)] 5 | pub struct Table { 6 | pub schema: String, 7 | pub name: String, 8 | } 9 | 10 | pub async fn get_tables(options: &PgConnectOptions) -> Result, sqlx::Error> { 11 | let mut connection = PgConnection::connect_with(options).await?; 12 | let query = r#" 13 | select 14 | n.nspname as schema, 15 | c.relname as name 16 | from pg_catalog.pg_class c 17 | left join pg_catalog.pg_namespace n on n.oid = c.relnamespace 18 | left join pg_catalog.pg_am am on am.oid = c.relam 19 | where 20 | c.relkind = 'r' 21 | and n.nspname <> 'pg_catalog' 22 | and n.nspname !~ '^pg_toast' 23 | and n.nspname <> 'information_schema' 24 | and pg_catalog.pg_table_is_visible(c.oid) 25 | order by schema, name; 26 | "#; 27 | let tables = connection 28 | .fetch_all(query) 29 | .await? 30 | .iter() 31 | .map(|r| Table { 32 | schema: r.get("schema"), 33 | name: r.get("name"), 34 | }) 35 | .collect(); 36 | Ok(tables) 37 | } 38 | -------------------------------------------------------------------------------- /api/src/db/tenants.rs: -------------------------------------------------------------------------------- 1 | use sqlx::{PgPool, Postgres, Transaction}; 2 | 3 | pub struct Tenant { 4 | pub id: String, 5 | pub name: String, 6 | } 7 | 8 | pub async fn create_tenant( 9 | pool: &PgPool, 10 | tenant_id: &str, 11 | tenant_name: &str, 12 | ) -> Result { 13 | let mut txn = pool.begin().await?; 14 | let res = create_tenant_txn(&mut txn, tenant_id, tenant_name).await; 15 | txn.commit().await?; 16 | res 17 | } 18 | 19 | pub async fn create_tenant_txn( 20 | txn: &mut Transaction<'_, Postgres>, 21 | tenant_id: &str, 22 | tenant_name: &str, 23 | ) -> Result { 24 | let record = sqlx::query!( 25 | r#" 26 | insert into app.tenants (id, name) 27 | values ($1, $2) 28 | returning id 29 | "#, 30 | tenant_id, 31 | tenant_name, 32 | ) 33 | .fetch_one(&mut **txn) 34 | .await?; 35 | 36 | Ok(record.id) 37 | } 38 | 39 | pub async fn create_or_update_tenant( 40 | pool: &PgPool, 41 | tenant_id: &str, 42 | tenant_name: &str, 43 | ) -> Result { 44 | let record = sqlx::query!( 45 | r#" 46 | insert into app.tenants (id, name) 47 | values ($1, $2) 48 | on conflict (id) do update set name = $2 49 | returning id 50 | "#, 51 | tenant_id, 52 | tenant_name, 53 | ) 54 | .fetch_one(pool) 55 | .await?; 56 | 57 | Ok(record.id) 58 | } 59 | 60 | pub async fn read_tenant(pool: &PgPool, tenant_id: &str) -> Result, sqlx::Error> { 61 | let record = sqlx::query!( 62 | r#" 63 | select id, name 64 | from app.tenants 65 | where id = $1 66 | "#, 67 | tenant_id 68 | ) 69 | .fetch_optional(pool) 70 | .await?; 71 | 72 | Ok(record.map(|r| Tenant { 73 | id: r.id, 74 | name: r.name, 75 | })) 76 | } 77 | 78 | pub async fn update_tenant( 79 | pool: &PgPool, 80 | tenant_id: &str, 81 | tenant_name: &str, 82 | ) -> Result, sqlx::Error> { 83 | let record = sqlx::query!( 84 | r#" 85 | update app.tenants 86 | set name = $1 87 | where id = $2 88 | returning id 89 | "#, 90 | tenant_name, 91 | tenant_id 92 | ) 93 | .fetch_optional(pool) 94 | .await?; 95 | 96 | Ok(record.map(|r| r.id)) 97 | } 98 | 99 | pub async fn delete_tenant(pool: &PgPool, tenant_id: &str) -> Result, sqlx::Error> { 100 | let record = sqlx::query!( 101 | r#" 102 | delete from app.tenants 103 | where id = $1 104 | returning id 105 | "#, 106 | tenant_id 107 | ) 108 | .fetch_optional(pool) 109 | .await?; 110 | 111 | Ok(record.map(|r| r.id)) 112 | } 113 | 114 | pub async fn read_all_tenants(pool: &PgPool) -> Result, sqlx::Error> { 115 | let mut record = sqlx::query!( 116 | r#" 117 | select id, name 118 | from app.tenants 119 | "#, 120 | ) 121 | .fetch_all(pool) 122 | .await?; 123 | 124 | Ok(record 125 | .drain(..) 126 | .map(|r| Tenant { 127 | id: r.id, 128 | name: r.name, 129 | }) 130 | .collect()) 131 | } 132 | -------------------------------------------------------------------------------- /api/src/db/tenants_sources.rs: -------------------------------------------------------------------------------- 1 | use aws_lc_rs::error::Unspecified; 2 | use sqlx::PgPool; 3 | use thiserror::Error; 4 | 5 | use crate::encryption::EncryptionKey; 6 | 7 | use super::{ 8 | sources::{create_source_txn, SourceConfig, SourcesDbError}, 9 | tenants::create_tenant_txn, 10 | }; 11 | 12 | #[derive(Debug, Error)] 13 | pub enum TenantSourceDbError { 14 | #[error("sqlx error: {0}")] 15 | Sqlx(#[from] sqlx::Error), 16 | 17 | #[error("encryption error: {0}")] 18 | Encryption(#[from] Unspecified), 19 | 20 | #[error("sources error: {0}")] 21 | Sources(#[from] SourcesDbError), 22 | } 23 | 24 | pub async fn create_tenant_and_source( 25 | pool: &PgPool, 26 | tenant_id: &str, 27 | tenant_name: &str, 28 | source_name: &str, 29 | source_config: SourceConfig, 30 | encryption_key: &EncryptionKey, 31 | ) -> Result<(String, i64), TenantSourceDbError> { 32 | let db_config = source_config.into_db_config(encryption_key)?; 33 | let db_config = serde_json::to_value(db_config).expect("failed to serialize config"); 34 | let mut txn = pool.begin().await?; 35 | let tenant_id = create_tenant_txn(&mut txn, tenant_id, tenant_name).await?; 36 | let source_id = create_source_txn(&mut txn, &tenant_id, source_name, db_config).await?; 37 | txn.commit().await?; 38 | Ok((tenant_id, source_id)) 39 | } 40 | -------------------------------------------------------------------------------- /api/src/encryption.rs: -------------------------------------------------------------------------------- 1 | use aws_lc_rs::{ 2 | aead::{Aad, Nonce, RandomizedNonceKey, AES_256_GCM}, 3 | error::Unspecified, 4 | rand::fill, 5 | }; 6 | 7 | pub struct EncryptionKey { 8 | pub id: u32, 9 | pub key: RandomizedNonceKey, 10 | } 11 | 12 | #[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq)] 13 | pub struct EncryptedValue { 14 | pub id: u32, 15 | pub nonce: String, 16 | pub value: String, 17 | } 18 | 19 | pub fn encrypt( 20 | plaintext: &[u8], 21 | key: &RandomizedNonceKey, 22 | ) -> Result<(Vec, Nonce), Unspecified> { 23 | let mut in_out = plaintext.to_vec(); 24 | let nonce = key.seal_in_place_append_tag(Aad::empty(), &mut in_out)?; 25 | Ok((in_out, nonce)) 26 | } 27 | 28 | pub fn decrypt( 29 | mut ciphertext: Vec, 30 | nonce: Nonce, 31 | key: &RandomizedNonceKey, 32 | ) -> Result, Unspecified> { 33 | let plaintext = key.open_in_place(nonce, Aad::empty(), &mut ciphertext)?; 34 | Ok(plaintext.to_vec()) 35 | } 36 | 37 | /// Returns a `T` byte long random key encoded as base64 38 | pub fn generate_random_key() -> Result { 39 | let mut key_bytes = [0u8; T]; 40 | fill(&mut key_bytes)?; 41 | let key = RandomizedNonceKey::new(&AES_256_GCM, &key_bytes)?; 42 | Ok(key) 43 | } 44 | -------------------------------------------------------------------------------- /api/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod authentication; 2 | pub mod configuration; 3 | pub mod db; 4 | pub mod encryption; 5 | pub mod k8s_client; 6 | pub mod replicator_config; 7 | pub mod routes; 8 | pub mod span_builder; 9 | pub mod startup; 10 | pub mod utils; 11 | -------------------------------------------------------------------------------- /api/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use anyhow::anyhow; 4 | use api::{ 5 | configuration::{get_settings, Settings}, 6 | startup::Application, 7 | }; 8 | use postgres::sqlx::options::PgDatabaseOptions; 9 | use telemetry::init_tracing; 10 | use tracing::{error, info}; 11 | 12 | #[actix_web::main] 13 | pub async fn main() -> anyhow::Result<()> { 14 | let app_name = env!("CARGO_BIN_NAME"); 15 | // We pass emit_on_span_close = true to emit logs on span close 16 | // for the api because it is a web server and we need to emit logs 17 | // for every closing request. This is a bit of a hack, but it works 18 | // for now. Ideally the tracing middleware should emit a log on 19 | // request end, but it doesn't do that yet. 20 | let _log_flusher = init_tracing(app_name, true)?; 21 | let mut args = env::args(); 22 | 23 | match args.len() { 24 | // Run the application server 25 | 1 => { 26 | let configuration = get_settings::<'_, Settings>()?; 27 | log_pg_database_options(&configuration.database); 28 | let application = Application::build(configuration.clone()).await?; 29 | application.run_until_stopped().await?; 30 | } 31 | // Handle single command commands 32 | 2 => { 33 | let command = args.nth(1).unwrap(); 34 | match command.as_str() { 35 | "migrate" => { 36 | let options = get_settings::<'_, PgDatabaseOptions>()?; 37 | log_pg_database_options(&options); 38 | Application::migrate_database(options).await?; 39 | info!("database migrated successfully"); 40 | } 41 | _ => { 42 | let message = format!("invalid command: {command}"); 43 | error!("{message}"); 44 | return Err(anyhow!(message)); 45 | } 46 | } 47 | } 48 | _ => { 49 | let message = "invalid number of command line arguments"; 50 | error!("{message}"); 51 | return Err(anyhow!(message)); 52 | } 53 | } 54 | 55 | Ok(()) 56 | } 57 | 58 | fn log_pg_database_options(options: &PgDatabaseOptions) { 59 | info!( 60 | host = options.host, 61 | port = options.port, 62 | dbname = options.name, 63 | username = options.username, 64 | require_ssl = options.require_ssl, 65 | "pg database options", 66 | ); 67 | } 68 | -------------------------------------------------------------------------------- /api/src/routes/health_check.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{get, HttpResponse, Responder}; 2 | 3 | #[utoipa::path( 4 | responses( 5 | (status = 200, description = "Api is healthy"), 6 | ) 7 | )] 8 | #[get("/health_check")] 9 | pub async fn health_check() -> impl Responder { 10 | HttpResponse::Ok().body("ok") 11 | } 12 | -------------------------------------------------------------------------------- /api/src/routes/images.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{ 2 | delete, get, 3 | http::{header::ContentType, StatusCode}, 4 | post, 5 | web::{Data, Json, Path}, 6 | HttpResponse, Responder, ResponseError, 7 | }; 8 | use serde::{Deserialize, Serialize}; 9 | use sqlx::PgPool; 10 | use thiserror::Error; 11 | use utoipa::ToSchema; 12 | 13 | use crate::db; 14 | 15 | use super::ErrorMessage; 16 | 17 | #[derive(Debug, Error)] 18 | enum ImageError { 19 | #[error("database error: {0}")] 20 | DatabaseError(#[from] sqlx::Error), 21 | 22 | #[error("image with id {0} not found")] 23 | ImageNotFound(i64), 24 | } 25 | 26 | impl ImageError { 27 | fn to_message(&self) -> String { 28 | match self { 29 | // Do not expose internal database details in error messages 30 | ImageError::DatabaseError(_) => "internal server error".to_string(), 31 | // Every other message is ok, as they do not divulge sensitive information 32 | e => e.to_string(), 33 | } 34 | } 35 | } 36 | 37 | impl ResponseError for ImageError { 38 | fn status_code(&self) -> StatusCode { 39 | match self { 40 | ImageError::DatabaseError(_) => StatusCode::INTERNAL_SERVER_ERROR, 41 | ImageError::ImageNotFound(_) => StatusCode::NOT_FOUND, 42 | } 43 | } 44 | 45 | fn error_response(&self) -> HttpResponse { 46 | let error_message = ErrorMessage { 47 | error: self.to_message(), 48 | }; 49 | let body = 50 | serde_json::to_string(&error_message).expect("failed to serialize error message"); 51 | HttpResponse::build(self.status_code()) 52 | .insert_header(ContentType::json()) 53 | .body(body) 54 | } 55 | } 56 | 57 | #[derive(Deserialize, ToSchema)] 58 | pub struct PostImageRequest { 59 | #[schema(example = "supabase/replicator:1.2.3")] 60 | pub name: String, 61 | #[schema(example = true)] 62 | pub is_default: bool, 63 | } 64 | 65 | #[derive(Serialize, ToSchema)] 66 | pub struct PostImageResponse { 67 | id: i64, 68 | } 69 | 70 | #[derive(Serialize, ToSchema)] 71 | pub struct GetImageResponse { 72 | id: i64, 73 | name: String, 74 | is_default: bool, 75 | } 76 | 77 | #[derive(Serialize, ToSchema)] 78 | pub struct GetImagesResponse { 79 | images: Vec, 80 | } 81 | 82 | #[utoipa::path( 83 | context_path = "/v1", 84 | request_body = PostImageRequest, 85 | responses( 86 | (status = 200, description = "Create new image", body = PostImageResponse), 87 | (status = 500, description = "Internal server error") 88 | ) 89 | )] 90 | #[post("/images")] 91 | pub async fn create_image( 92 | pool: Data, 93 | image: Json, 94 | ) -> Result { 95 | let image = image.0; 96 | let id = db::images::create_image(&pool, &image.name, image.is_default).await?; 97 | let response = PostImageResponse { id }; 98 | Ok(Json(response)) 99 | } 100 | 101 | #[utoipa::path( 102 | context_path = "/v1", 103 | params( 104 | ("image_id" = i64, Path, description = "Id of the image"), 105 | ), 106 | responses( 107 | (status = 200, description = "Return image with id = image_id", body = GetImageResponse), 108 | (status = 404, description = "Image not found"), 109 | (status = 500, description = "Internal server error") 110 | ) 111 | )] 112 | #[get("/images/{image_id}")] 113 | pub async fn read_image( 114 | pool: Data, 115 | image_id: Path, 116 | ) -> Result { 117 | let image_id = image_id.into_inner(); 118 | let response = db::images::read_image(&pool, image_id) 119 | .await? 120 | .map(|s| GetImageResponse { 121 | id: s.id, 122 | name: s.name, 123 | is_default: s.is_default, 124 | }) 125 | .ok_or(ImageError::ImageNotFound(image_id))?; 126 | Ok(Json(response)) 127 | } 128 | 129 | #[utoipa::path( 130 | context_path = "/v1", 131 | request_body = PostImageRequest, 132 | params( 133 | ("image_id" = i64, Path, description = "Id of the image"), 134 | ), 135 | responses( 136 | (status = 200, description = "Update image with id = image_id"), 137 | (status = 404, description = "Image not found"), 138 | (status = 500, description = "Internal server error") 139 | ) 140 | )] 141 | #[post("/images/{image_id}")] 142 | pub async fn update_image( 143 | pool: Data, 144 | image_id: Path, 145 | image: Json, 146 | ) -> Result { 147 | let image_id = image_id.into_inner(); 148 | db::images::update_image(&pool, image_id, &image.name, image.is_default) 149 | .await? 150 | .ok_or(ImageError::ImageNotFound(image_id))?; 151 | Ok(HttpResponse::Ok().finish()) 152 | } 153 | 154 | #[utoipa::path( 155 | context_path = "/v1", 156 | params( 157 | ("image_id" = i64, Path, description = "Id of the image"), 158 | ), 159 | responses( 160 | (status = 200, description = "Delete image with id = image_id"), 161 | (status = 404, description = "Image not found"), 162 | (status = 500, description = "Internal server error") 163 | ) 164 | )] 165 | #[delete("/images/{image_id}")] 166 | pub async fn delete_image( 167 | pool: Data, 168 | image_id: Path, 169 | ) -> Result { 170 | let image_id = image_id.into_inner(); 171 | db::images::delete_image(&pool, image_id) 172 | .await? 173 | .ok_or(ImageError::ImageNotFound(image_id))?; 174 | Ok(HttpResponse::Ok().finish()) 175 | } 176 | 177 | #[utoipa::path( 178 | context_path = "/v1", 179 | responses( 180 | (status = 200, description = "Return all images"), 181 | (status = 500, description = "Internal server error") 182 | ) 183 | )] 184 | #[get("/images")] 185 | pub async fn read_all_images(pool: Data) -> Result { 186 | let mut images = vec![]; 187 | for image in db::images::read_all_images(&pool).await? { 188 | let image = GetImageResponse { 189 | id: image.id, 190 | name: image.name, 191 | is_default: image.is_default, 192 | }; 193 | images.push(image); 194 | } 195 | let response = GetImagesResponse { images }; 196 | Ok(Json(response)) 197 | } 198 | -------------------------------------------------------------------------------- /api/src/routes/mod.rs: -------------------------------------------------------------------------------- 1 | use actix_web::HttpRequest; 2 | use serde::Serialize; 3 | use thiserror::Error; 4 | 5 | pub mod destinations; 6 | pub mod destinations_pipelines; 7 | pub mod health_check; 8 | pub mod images; 9 | pub mod pipelines; 10 | pub mod sources; 11 | pub mod tenants; 12 | pub mod tenants_sources; 13 | 14 | #[derive(Serialize)] 15 | pub struct ErrorMessage { 16 | pub error: String, 17 | } 18 | 19 | #[derive(Debug, Error)] 20 | pub enum TenantIdError { 21 | #[error("tenant id missing in request")] 22 | TenantIdMissing, 23 | 24 | #[error("tenant id ill formed in request")] 25 | TenantIdIllFormed, 26 | } 27 | 28 | fn extract_tenant_id(req: &HttpRequest) -> Result<&str, TenantIdError> { 29 | let headers = req.headers(); 30 | let tenant_id = headers 31 | .get("tenant_id") 32 | .ok_or(TenantIdError::TenantIdMissing)?; 33 | let tenant_id = tenant_id 34 | .to_str() 35 | .map_err(|_| TenantIdError::TenantIdIllFormed)?; 36 | Ok(tenant_id) 37 | } 38 | -------------------------------------------------------------------------------- /api/src/routes/sources/tables.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{ 2 | get, 3 | http::{header::ContentType, StatusCode}, 4 | web::{Data, Json, Path}, 5 | HttpRequest, HttpResponse, Responder, ResponseError, 6 | }; 7 | use serde::Serialize; 8 | use sqlx::PgPool; 9 | use thiserror::Error; 10 | use utoipa::ToSchema; 11 | 12 | use crate::{ 13 | db::{self, sources::SourcesDbError, tables::Table}, 14 | encryption::EncryptionKey, 15 | routes::{extract_tenant_id, ErrorMessage, TenantIdError}, 16 | }; 17 | 18 | #[derive(Debug, Error)] 19 | enum TableError { 20 | #[error("database error: {0}")] 21 | DatabaseError(#[from] sqlx::Error), 22 | 23 | #[error("source with id {0} not found")] 24 | SourceNotFound(i64), 25 | 26 | #[error("tenant id error: {0}")] 27 | TenantId(#[from] TenantIdError), 28 | 29 | #[error("sources db error: {0}")] 30 | SourcesDb(#[from] SourcesDbError), 31 | } 32 | 33 | impl TableError { 34 | fn to_message(&self) -> String { 35 | match self { 36 | // Do not expose internal database details in error messages 37 | TableError::DatabaseError(_) => "internal server error".to_string(), 38 | // Every other message is ok, as they do not divulge sensitive information 39 | e => e.to_string(), 40 | } 41 | } 42 | } 43 | 44 | #[derive(Serialize, ToSchema)] 45 | pub struct GetTablesReponse { 46 | pub tables: Vec
, 47 | } 48 | 49 | impl ResponseError for TableError { 50 | fn status_code(&self) -> StatusCode { 51 | match self { 52 | TableError::DatabaseError(_) | TableError::SourcesDb(_) => { 53 | StatusCode::INTERNAL_SERVER_ERROR 54 | } 55 | TableError::SourceNotFound(_) => StatusCode::NOT_FOUND, 56 | TableError::TenantId(_) => StatusCode::BAD_REQUEST, 57 | } 58 | } 59 | 60 | fn error_response(&self) -> HttpResponse { 61 | let error_message = ErrorMessage { 62 | error: self.to_message(), 63 | }; 64 | let body = 65 | serde_json::to_string(&error_message).expect("failed to serialize error message"); 66 | HttpResponse::build(self.status_code()) 67 | .insert_header(ContentType::json()) 68 | .body(body) 69 | } 70 | } 71 | 72 | #[utoipa::path( 73 | context_path = "/v1", 74 | params( 75 | ("source_id" = i64, Path, description = "Id of the source"), 76 | ), 77 | responses( 78 | (status = 200, description = "Return all tables from source with id = source_id", body = Vec
), 79 | (status = 500, description = "Internal server error") 80 | ) 81 | )] 82 | #[get("/sources/{source_id}/tables")] 83 | pub async fn read_table_names( 84 | req: HttpRequest, 85 | pool: Data, 86 | encryption_key: Data, 87 | source_id: Path, 88 | ) -> Result { 89 | let tenant_id = extract_tenant_id(&req)?; 90 | let source_id = source_id.into_inner(); 91 | 92 | let config = db::sources::read_source(&pool, tenant_id, source_id, &encryption_key) 93 | .await? 94 | .map(|s| s.config) 95 | .ok_or(TableError::SourceNotFound(source_id))?; 96 | 97 | let options = config.connect_options(); 98 | let tables = db::tables::get_tables(&options).await?; 99 | let response = GetTablesReponse { tables }; 100 | Ok(Json(response)) 101 | } 102 | -------------------------------------------------------------------------------- /api/src/routes/tenants_sources.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{ 2 | http::{header::ContentType, StatusCode}, 3 | post, 4 | web::{Data, Json}, 5 | HttpResponse, Responder, ResponseError, 6 | }; 7 | use serde::{Deserialize, Serialize}; 8 | use sqlx::PgPool; 9 | use thiserror::Error; 10 | use tracing_actix_web::RootSpan; 11 | use utoipa::ToSchema; 12 | 13 | use crate::{ 14 | db::{self, sources::SourceConfig, tenants_sources::TenantSourceDbError}, 15 | encryption::EncryptionKey, 16 | }; 17 | 18 | use super::ErrorMessage; 19 | 20 | #[derive(Deserialize, ToSchema)] 21 | pub struct CreateTenantSourceRequest { 22 | #[schema(example = "abcdefghijklmnopqrst", required = true)] 23 | tenant_id: String, 24 | 25 | #[schema(example = "Tenant Name", required = true)] 26 | tenant_name: String, 27 | 28 | #[schema(example = "Source Name", required = true)] 29 | source_name: String, 30 | 31 | #[schema(required = true)] 32 | source_config: SourceConfig, 33 | } 34 | 35 | #[derive(Debug, Error)] 36 | enum TenantSourceError { 37 | #[error("tenants and sources db error: {0}")] 38 | TenantSourceDb(#[from] TenantSourceDbError), 39 | } 40 | 41 | impl TenantSourceError { 42 | fn to_message(&self) -> String { 43 | match self { 44 | TenantSourceError::TenantSourceDb(_) => "internal server error".to_string(), 45 | } 46 | } 47 | } 48 | 49 | impl ResponseError for TenantSourceError { 50 | fn status_code(&self) -> StatusCode { 51 | match self { 52 | TenantSourceError::TenantSourceDb(e) => match e { 53 | TenantSourceDbError::Sqlx(_) 54 | | TenantSourceDbError::Sources(_) 55 | | TenantSourceDbError::Encryption(_) => StatusCode::INTERNAL_SERVER_ERROR, 56 | }, 57 | } 58 | } 59 | 60 | fn error_response(&self) -> HttpResponse { 61 | let error_message = ErrorMessage { 62 | error: self.to_message(), 63 | }; 64 | let body = 65 | serde_json::to_string(&error_message).expect("failed to serialize error message"); 66 | HttpResponse::build(self.status_code()) 67 | .insert_header(ContentType::json()) 68 | .body(body) 69 | } 70 | } 71 | 72 | #[derive(Serialize, ToSchema)] 73 | pub struct PostTenantSourceResponse { 74 | tenant_id: String, 75 | source_id: i64, 76 | } 77 | 78 | #[utoipa::path( 79 | context_path = "/v1", 80 | request_body = CreateTenantSourceRequest, 81 | responses( 82 | (status = 200, description = "Create a new tenant and a source", body = PostTenantSourceResponse), 83 | (status = 500, description = "Internal server error") 84 | ) 85 | )] 86 | #[post("/tenants-sources")] 87 | pub async fn create_tenant_and_source( 88 | pool: Data, 89 | tenant_and_source: Json, 90 | encryption_key: Data, 91 | root_span: RootSpan, 92 | ) -> Result { 93 | let tenant_and_source = tenant_and_source.0; 94 | let CreateTenantSourceRequest { 95 | tenant_id, 96 | tenant_name, 97 | source_name, 98 | source_config, 99 | } = tenant_and_source; 100 | root_span.record("project", &tenant_id); 101 | let (tenant_id, source_id) = db::tenants_sources::create_tenant_and_source( 102 | &pool, 103 | &tenant_id, 104 | &tenant_name, 105 | &source_name, 106 | source_config, 107 | &encryption_key, 108 | ) 109 | .await?; 110 | let response = PostTenantSourceResponse { 111 | tenant_id, 112 | source_id, 113 | }; 114 | Ok(Json(response)) 115 | } 116 | -------------------------------------------------------------------------------- /api/src/span_builder.rs: -------------------------------------------------------------------------------- 1 | use actix_web::{ 2 | body::MessageBody, 3 | dev::{ServiceRequest, ServiceResponse}, 4 | Error, 5 | }; 6 | use tracing::Span; 7 | use tracing_actix_web::{DefaultRootSpanBuilder, RootSpanBuilder}; 8 | 9 | /// The `RootSpanBuilder` implementation for the API service. 10 | /// It extracts the project ref from the `tenant_id` header 11 | /// and sets it as a field in the root span. If the header is not 12 | /// present, it sets the field to `Empty` to allow handlers 13 | /// to set it later. 14 | pub struct ApiRootSpanBuilder; 15 | 16 | impl RootSpanBuilder for ApiRootSpanBuilder { 17 | fn on_request_start(request: &ServiceRequest) -> Span { 18 | let project = request.headers().get("tenant_id"); 19 | match project { 20 | Some(project) => { 21 | // We convert lossily to a string to be able to read at least part of the 22 | // project ref in case of invalid UTF-8. This is useful for debugging. 23 | // This is anyways an extreme edge case, as the project ref is 24 | // generated by the system and should be valid UTF-8. 25 | let project = String::from_utf8_lossy(project.as_bytes()); 26 | let project = project.as_ref(); 27 | tracing_actix_web::root_span!(request, project = project) 28 | } 29 | None => tracing_actix_web::root_span!(request, project = tracing::field::Empty), 30 | } 31 | } 32 | 33 | fn on_request_end(span: Span, outcome: &Result, Error>) { 34 | DefaultRootSpanBuilder::on_request_end(span, outcome); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /api/src/utils.rs: -------------------------------------------------------------------------------- 1 | use rand::{distributions::Slice, Rng}; 2 | 3 | /// Generates a random alphabetic string of length `len` 4 | pub fn generate_random_alpha_str(len: usize) -> String { 5 | let chars = [ 6 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 7 | 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 8 | ]; 9 | let chars_dist = Slice::new(&chars).expect("passed slice was empty"); 10 | let rng = rand::thread_rng(); 11 | rng.sample_iter(&chars_dist).take(len).collect() 12 | } 13 | -------------------------------------------------------------------------------- /api/tests/common/database.rs: -------------------------------------------------------------------------------- 1 | use postgres::sqlx::options::PgDatabaseOptions; 2 | use postgres::sqlx::test_utils::create_pg_database; 3 | use sqlx::PgPool; 4 | 5 | /// Creates and configures a new PostgreSQL database for the API. 6 | /// 7 | /// Similar to [`create_pg_database`], but additionally runs all database migrations 8 | /// from the "./migrations" directory after creation. Returns a [`PgPool`] 9 | /// connected to the newly created and migrated database. Panics if database 10 | /// creation or migration fails. 11 | pub async fn create_etl_api_database(options: &PgDatabaseOptions) -> PgPool { 12 | let connection_pool = create_pg_database(options).await; 13 | 14 | sqlx::migrate!("./migrations") 15 | .run(&connection_pool) 16 | .await 17 | .expect("Failed to migrate the database"); 18 | 19 | connection_pool 20 | } 21 | -------------------------------------------------------------------------------- /api/tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | //! Common test utilities for etl API tests. 2 | //! 3 | //! This module provides shared functionality used across integration tests: 4 | //! 5 | //! - `test_app`: A test application wrapper that provides: 6 | //! - A running instance of the API server for testing 7 | //! - Helper methods for making authenticated HTTP requests 8 | //! - Request/response type definitions for all API endpoints 9 | //! - Methods to create, read, update, and delete resources 10 | //! 11 | //! - `database`: Database configuration utilities that: 12 | //! - Set up test databases with proper configuration 13 | //! - Handle database migrations 14 | //! - Provide connection pools for tests 15 | //! 16 | //! These utilities help maintain consistency across tests and reduce code duplication. 17 | pub mod database; 18 | pub mod test_app; 19 | -------------------------------------------------------------------------------- /api/tests/integration/health_check_test.rs: -------------------------------------------------------------------------------- 1 | use crate::common::test_app::spawn_test_app; 2 | 3 | #[tokio::test(flavor = "multi_thread")] 4 | async fn health_check_works() { 5 | // Arrange 6 | let app = spawn_test_app().await; 7 | 8 | let client = reqwest::Client::new(); 9 | 10 | // Act 11 | let response = client 12 | .get(format!("{}/health_check", app.address)) 13 | .send() 14 | .await 15 | .expect("Failed to execute request."); 16 | 17 | // Assert 18 | assert!(response.status().is_success()); 19 | assert_eq!(Some(2), response.content_length()); 20 | } 21 | -------------------------------------------------------------------------------- /api/tests/integration/mod.rs: -------------------------------------------------------------------------------- 1 | mod destination_test; 2 | mod destinations_pipelines; 3 | mod health_check_test; 4 | mod images_test; 5 | mod pipelines_test; 6 | mod sources_test; 7 | mod tenants_sources_test; 8 | mod tenants_test; 9 | -------------------------------------------------------------------------------- /api/tests/integration/tenants_sources_test.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::test_app::{ 3 | spawn_test_app, CreateTenantSourceRequest, CreateTenantSourceResponse, SourceResponse, 4 | TenantResponse, 5 | }, 6 | integration::sources_test::{new_name, new_source_config}, 7 | }; 8 | 9 | #[tokio::test(flavor = "multi_thread")] 10 | async fn tenant_and_source_can_be_created() { 11 | // Arrange 12 | let app = spawn_test_app().await; 13 | 14 | // Act 15 | let tenant_source = CreateTenantSourceRequest { 16 | tenant_id: "abcdefghijklmnopqrst".to_string(), 17 | tenant_name: "NewTenant".to_string(), 18 | source_name: new_name(), 19 | source_config: new_source_config(), 20 | }; 21 | let response = app.create_tenant_source(&tenant_source).await; 22 | 23 | // Assert 24 | assert!(response.status().is_success()); 25 | let response: CreateTenantSourceResponse = response 26 | .json() 27 | .await 28 | .expect("failed to deserialize response"); 29 | assert_eq!(response.tenant_id, "abcdefghijklmnopqrst"); 30 | assert_eq!(response.source_id, 1); 31 | 32 | let tenant_id = &response.tenant_id; 33 | let source_id = response.source_id; 34 | 35 | let response = app.read_tenant(tenant_id).await; 36 | let response: TenantResponse = response 37 | .json() 38 | .await 39 | .expect("failed to deserialize response"); 40 | assert_eq!(&response.id, tenant_id); 41 | assert_eq!(response.name, tenant_source.tenant_name); 42 | 43 | let response = app.read_source(tenant_id, source_id).await; 44 | let response: SourceResponse = response 45 | .json() 46 | .await 47 | .expect("failed to deserialize response"); 48 | assert_eq!(response.id, source_id); 49 | assert_eq!(&response.tenant_id, tenant_id); 50 | assert_eq!(response.name, tenant_source.source_name); 51 | assert_eq!(response.config, tenant_source.source_config); 52 | } 53 | -------------------------------------------------------------------------------- /api/tests/mod.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | mod integration; 3 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | [advisories] 2 | version = 2 3 | ignore = [ 4 | { id = "RUSTSEC-2024-0370", reason = "Disabling because it is not a real vulnerability. The crate proc-macro-error is just unmaintained." }, 5 | { id = "RUSTSEC-2024-0384", reason = "Disabling because it is not a real vulnerability. The crate instant is just unmaintained." }, 6 | { id = "RUSTSEC-2025-0012", reason = "Disabling because it is not a real vulnerability. The crate backoff is just unmaintained." }, 7 | { id = "RUSTSEC-2024-0436", reason = "Disabling because it is not a real vulnerability. The crate paste is just unmaintained." }, 8 | ] 9 | -------------------------------------------------------------------------------- /etl/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "etl" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [[example]] 9 | name = "bigquery" 10 | required-features = ["bigquery"] 11 | 12 | [[example]] 13 | name = "duckdb" 14 | required-features = ["duckdb"] 15 | 16 | [[example]] 17 | name = "stdout" 18 | required-features = ["stdout"] 19 | 20 | [dependencies] 21 | postgres = { workspace = true, features = ["tokio"] } 22 | 23 | async-trait = { workspace = true } 24 | bigdecimal = { workspace = true, features = ["std"] } 25 | bytes = { workspace = true } 26 | byteorder = { workspace = true } 27 | chrono = { workspace = true, features = ["serde"] } 28 | duckdb = { workspace = true, optional = true } 29 | futures = { workspace = true } 30 | gcp-bigquery-client = { workspace = true, optional = true, features = [ 31 | "rust-tls", 32 | "aws-lc-rs", 33 | ] } 34 | pg_escape = { workspace = true } 35 | pin-project-lite = { workspace = true } 36 | postgres-protocol = { workspace = true } 37 | postgres-replication = { workspace = true } 38 | prost = { workspace = true, optional = true } 39 | rustls = { workspace = true, features = ["aws-lc-rs", "logging"] } 40 | serde = { workspace = true, features = ["derive"] } 41 | serde_json = { workspace = true, features = ["std"] } 42 | thiserror = { workspace = true } 43 | tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] } 44 | tokio-postgres = { workspace = true, features = [ 45 | "runtime", 46 | "with-chrono-0_4", 47 | "with-uuid-1", 48 | "with-serde_json-1", 49 | ] } 50 | tokio-postgres-rustls = { workspace = true } 51 | tracing = { workspace = true, default-features = true } 52 | tracing-subscriber = { workspace = true, default-features = true, features = [ 53 | "env-filter", 54 | ] } 55 | uuid = { workspace = true, features = ["v4"] } 56 | 57 | [dev-dependencies] 58 | postgres = { workspace = true, features = ["test-utils", "tokio"] } 59 | 60 | clap = { workspace = true, default-features = true, features = [ 61 | "std", 62 | "derive", 63 | ] } 64 | 65 | 66 | [features] 67 | bigquery = ["dep:gcp-bigquery-client", "dep:prost"] 68 | duckdb = ["dep:duckdb"] 69 | stdout = [] 70 | # When enabled converts unknown types to bytes 71 | unknown_types_to_bytes = [] 72 | default = ["unknown_types_to_bytes"] 73 | -------------------------------------------------------------------------------- /etl/docs/Replication in Postgres.md: -------------------------------------------------------------------------------- 1 | The following explains in low level detail how Postgres replicates changes from a primary and a replica server. These changes were reverse engineering by sniffing Postgres wire protocol replication traffic using [pgroxy](https://github.com/imor/pgroxy). The full trace from which this information has been collected is in the replication_trace.txt file in this folder. 2 | 3 | Connection 1 4 | ============ 5 | 6 | 1. Connect with following params: 7 | Parameter: database = pub 8 | Parameter: replication = database 9 | Parameter: application_name = mysub 10 | 11 | 2. Run query: SELECT t.pubname FROM 12 | pg_catalog.pg_publication t WHERE 13 | t.pubname IN ('mypub') 14 | Result: 15 | pubname = "mypub" 16 | 17 | 3. Run query: SELECT DISTINCT t.schemaname, t.tablename 18 | , t.attnames 19 | FROM pg_catalog.pg_publication_tables t 20 | WHERE t.pubname IN ('mypub') 21 | Result: 22 | schemaname = "public", 23 | tablename = "table_1", 24 | attnames = "{id,name}" 25 | 26 | 4. Run query: CREATE_REPLICATION_SLOT "mysub" LOGICAL pgoutput (SNAPSHOT 'nothing') 27 | Result: 28 | slotname = "mysub", 29 | consistent_point = "0/19BD9E8", 30 | snapshot_name = "", 31 | output_plugin = "pgoutput" 32 | 33 | 5. Close connection 34 | 35 | 36 | Connection 2 (CDC connection) 37 | ============================= 38 | 39 | 1. Connect with following params: 40 | Parameter: database = pub 41 | Parameter: replication = database 42 | Parameter: application_name = mysub 43 | 44 | 2. Run query: IDENTIFY_SYSTEM 45 | Result: 46 | systemid = "7329763972895242536" 47 | timelin = "1" 48 | xlogpos = "0/19BD9E8" 49 | dbname = "pub" 50 | 51 | 3. Run query: START_REPLICATION SLOT "mysub" LOGICAL 0/0 (proto_version '3', publication_names '"mypub"') 52 | Result: 53 | server sends two keep alive messages with "client reply" bit set to 0 54 | client sends standby status update: Data: 55 | received_lsn: 0, 0, 0, 0, 1, 155, 217, 232, 56 | flushed_lsn: 0, 0, 0, 0, 1, 155, 217, 232, 57 | applied_lsn: 0, 0, 0, 0, 1, 155, 217, 232, 58 | client_time: 0, 2, 179, 42, 70, 55, 57, 124 59 | ping: 0 60 | 61 | 62 | Connection 3 (snapshot connection) 63 | ================================== 64 | 65 | 1. Connect with following params: 66 | Parameter: database = pub 67 | Parameter: replication = database 68 | Parameter: application_name = pg_16406_sync_16399_7329764006882527550 69 | 70 | 2. Run query: BEGIN READ ONLY ISOLATION LEVEL REPEATABLE READ 71 | Result: 72 | 73 | 3. Run query: CREATE_REPLICATION_SLOT "pg_16406_sync_16399_7329764006882527550" LOGICAL pgoutput (SNAPSHOT 'use') 74 | Result: 75 | Column: slot_name = "pg_16406_sync_16399_7329764006882527550" 76 | Column: consistent_point = "0/19BDA20" 77 | Column: snapshot_name = "" 78 | Column: output_plugin = "pgoutput" 79 | 80 | 4. Run query: SELECT c.oid, c.relreplident, c.relkind FROM pg_catalog.pg_class c INNER JOIN pg_catalog.pg_namespace n ON (c.relnamespace = n.oid) WHERE n.nspname = 'public' AND c.relname = 'table_1' 81 | Result: 82 | Column: oid = "16389" 83 | Column: relreplident = "d" 84 | Column: relkind = "r" 85 | 86 | 5. Run query: SELECT DISTINCT (CASE WHEN (array_length(gpt.attrs, 1) = c.relnatts) THEN NULL ELSE gpt.attrs END) FROM pg_publication p, LATERAL pg_get_publication_tables(p.pubname) gpt, pg_class c WHERE gpt.relid = 16389 AND c.oid = gpt.relid AND p.pubname IN ( 'mypub' ) 87 | Result: 88 | Column: attrs = "" 89 | 90 | 6. Run query: SELECT a.attnum, a.attname, a.atttypid, a.attnum = ANY(i.indkey) FROM pg_catalog.pg_attribute a LEFT JOIN pg_catalog.pg_index i ON (i.indexrelid = pg_get_replica_identity_index(16389)) WHERE a.attnum > 0::pg_catalog.int2 AND NOT a.attisdropped AND a.attgenerated = '' AND a.attrelid = 16389 ORDER BY a.attnum 91 | Result: 92 | Column: attnum = "1" 93 | Column: attname = "id" 94 | Column: atttypid = "23" 95 | Column: ?column? = "t" 96 | 97 | Column: attnum = "2" 98 | Column: attname = "name" 99 | Column: atttypid = "1043" 100 | Column: ?column? = "f" 101 | 102 | 6. Run query: SELECT DISTINCT pg_get_expr(gpt.qual, gpt.relid) FROM pg_publication p, LATERAL pg_get_publication_tables(p.pubname) gpt WHERE gpt.relid = 16389 AND p.pubname IN ( 'mypub' ) 103 | Result: 104 | Column: pg_get_expr = "" 105 | 106 | 7. Run query: COPY public.table_1 (id, name) TO STDOUT 107 | Result: 108 | Data: [49, 9, 100, 97, 116, 97, 49, 10] 109 | Data: [50, 9, 100, 97, 116, 97, 50, 10] 110 | Data: [51, 9, 100, 97, 116, 97, 51, 10] 111 | Data: [52, 9, 100, 97, 116, 97, 52, 10] 112 | Data: [53, 9, 100, 97, 116, 97, 53, 10] 113 | Data: [54, 9, 100, 97, 116, 97, 54, 10] 114 | Data: [55, 9, 100, 97, 116, 97, 55, 10] 115 | Data: [56, 9, 100, 97, 116, 97, 56, 10] 116 | Data: [57, 9, 100, 97, 116, 97, 57, 10] 117 | Data: [49, 48, 9, 100, 97, 116, 97, 49, 48, 10] 118 | 119 | 8. Run query: COMMIT 120 | 121 | 9. Run query: START_REPLICATION SLOT "pg_16406_sync_16399_7329764006882527550" LOGICAL 0/19BDA20 (proto_version '3', publication_names '"mypub"') 122 | Result: 123 | ← Data: [107, 0, 0, 0, 0, 1, 155, 218, 32, 0, 2, 179, 42, 70, 55, 146, 232, 0] 124 | → Data: [114, 0, 0, 0, 0, 1, 155, 218, 32, 0, 0, 0, 0, 1, 155, 218, 32, 0, 0, 0, 0, 1, 155, 218, 32, 0, 2, 179, 42, 70, 55, 146, 255, 0] 125 | 126 | 10. Run query: DROP_REPLICATION_SLOT pg_16406_sync_16399_7329764006882527550 WAIT 127 | Result: 128 | 129 | 11. Terminate 130 | -------------------------------------------------------------------------------- /etl/examples/bigquery.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, time::Duration}; 2 | 3 | use clap::{Args, Parser, Subcommand}; 4 | use etl::{ 5 | pipeline::{ 6 | batching::{data_pipeline::BatchDataPipeline, BatchConfig}, 7 | destinations::bigquery::BigQueryBatchDestination, 8 | sources::postgres::{PostgresSource, TableNamesFrom}, 9 | PipelineAction, 10 | }, 11 | SslMode, 12 | }; 13 | use postgres::schema::TableName; 14 | use postgres::tokio::options::PgDatabaseOptions; 15 | use tracing::error; 16 | use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; 17 | 18 | #[derive(Debug, Parser)] 19 | #[command(name = "bigquery", version, about, arg_required_else_help = true)] 20 | struct AppArgs { 21 | #[clap(flatten)] 22 | db_args: DbArgs, 23 | 24 | #[clap(flatten)] 25 | bq_args: BqArgs, 26 | 27 | #[clap(subcommand)] 28 | command: Command, 29 | } 30 | 31 | #[derive(Debug, Args)] 32 | struct DbArgs { 33 | /// Host on which Postgres is running 34 | #[arg(long)] 35 | db_host: String, 36 | 37 | /// Port on which Postgres is running 38 | #[arg(long)] 39 | db_port: u16, 40 | 41 | /// Postgres database name 42 | #[arg(long)] 43 | db_name: String, 44 | 45 | /// Postgres database user name 46 | #[arg(long)] 47 | db_username: String, 48 | 49 | /// Postgres database user password 50 | #[arg(long)] 51 | db_password: Option, 52 | } 53 | 54 | #[derive(Debug, Args)] 55 | struct BqArgs { 56 | /// Path to GCP's service account key to access BigQuery 57 | #[arg(long)] 58 | bq_sa_key_file: String, 59 | 60 | /// BigQuery project id 61 | #[arg(long)] 62 | bq_project_id: String, 63 | 64 | /// BigQuery dataset id 65 | #[arg(long)] 66 | bq_dataset_id: String, 67 | 68 | #[arg(long)] 69 | max_batch_size: usize, 70 | 71 | #[arg(long)] 72 | max_batch_fill_duration_secs: u64, 73 | } 74 | 75 | #[derive(Debug, Subcommand)] 76 | enum Command { 77 | /// Copy a table 78 | CopyTable { schema: String, name: String }, 79 | 80 | /// Start a change data capture 81 | Cdc { 82 | publication: String, 83 | slot_name: String, 84 | }, 85 | } 86 | 87 | #[tokio::main] 88 | async fn main() -> Result<(), Box> { 89 | if let Err(e) = main_impl().await { 90 | error!("{e}"); 91 | } 92 | 93 | Ok(()) 94 | } 95 | 96 | fn init_tracing() { 97 | tracing_subscriber::registry() 98 | .with( 99 | tracing_subscriber::EnvFilter::try_from_default_env() 100 | .unwrap_or_else(|_| "bigquery=info".into()), 101 | ) 102 | .with(tracing_subscriber::fmt::layer()) 103 | .init(); 104 | } 105 | 106 | fn set_log_level() { 107 | if std::env::var("RUST_LOG").is_err() { 108 | std::env::set_var("RUST_LOG", "info"); 109 | } 110 | } 111 | 112 | async fn main_impl() -> Result<(), Box> { 113 | set_log_level(); 114 | init_tracing(); 115 | 116 | rustls::crypto::aws_lc_rs::default_provider() 117 | .install_default() 118 | .expect("failed to install default crypto provider"); 119 | 120 | let args = AppArgs::parse(); 121 | let db_args = args.db_args; 122 | let bq_args = args.bq_args; 123 | 124 | let options = PgDatabaseOptions { 125 | host: db_args.db_host, 126 | port: db_args.db_port, 127 | name: db_args.db_name, 128 | username: db_args.db_username, 129 | password: db_args.db_password, 130 | ssl_mode: SslMode::Disable, 131 | }; 132 | 133 | let (postgres_source, action) = match args.command { 134 | Command::CopyTable { schema, name } => { 135 | let table_names = vec![TableName { schema, name }]; 136 | 137 | let postgres_source = 138 | PostgresSource::new(options, vec![], None, TableNamesFrom::Vec(table_names)) 139 | .await?; 140 | (postgres_source, PipelineAction::TableCopiesOnly) 141 | } 142 | Command::Cdc { 143 | publication, 144 | slot_name, 145 | } => { 146 | let postgres_source = PostgresSource::new( 147 | options, 148 | vec![], 149 | Some(slot_name), 150 | TableNamesFrom::Publication(publication), 151 | ) 152 | .await?; 153 | 154 | (postgres_source, PipelineAction::Both) 155 | } 156 | }; 157 | 158 | let bigquery_destination = BigQueryBatchDestination::new_with_key_path( 159 | bq_args.bq_project_id, 160 | bq_args.bq_dataset_id, 161 | &bq_args.bq_sa_key_file, 162 | 5, 163 | ) 164 | .await?; 165 | 166 | let batch_config = BatchConfig::new( 167 | bq_args.max_batch_size, 168 | Duration::from_secs(bq_args.max_batch_fill_duration_secs), 169 | ); 170 | let mut pipeline = 171 | BatchDataPipeline::new(postgres_source, bigquery_destination, action, batch_config); 172 | 173 | pipeline.start().await?; 174 | 175 | Ok(()) 176 | } 177 | -------------------------------------------------------------------------------- /etl/examples/duckdb.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, time::Duration}; 2 | 3 | use clap::{Args, Parser, Subcommand}; 4 | use etl::{ 5 | pipeline::{ 6 | batching::{data_pipeline::BatchDataPipeline, BatchConfig}, 7 | destinations::duckdb::DuckDbDestination, 8 | sources::postgres::{PostgresSource, TableNamesFrom}, 9 | PipelineAction, 10 | }, 11 | SslMode, 12 | }; 13 | use postgres::schema::TableName; 14 | use postgres::tokio::options::PgDatabaseOptions; 15 | use tracing::error; 16 | use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; 17 | 18 | #[derive(Debug, Parser)] 19 | #[command(name = "duckdb", version, about, arg_required_else_help = true)] 20 | struct AppArgs { 21 | #[clap(flatten)] 22 | db_args: DbArgs, 23 | 24 | #[clap(subcommand)] 25 | command: Command, 26 | } 27 | 28 | #[derive(Debug, Args)] 29 | struct DbArgs { 30 | /// Host on which Postgres is running 31 | #[arg(long)] 32 | db_host: String, 33 | 34 | /// Port on which Postgres is running 35 | #[arg(long)] 36 | db_port: u16, 37 | 38 | /// Postgres database name 39 | #[arg(long)] 40 | db_name: String, 41 | 42 | /// Postgres database user name 43 | #[arg(long)] 44 | db_username: String, 45 | 46 | /// Postgres database user password 47 | #[arg(long)] 48 | db_password: Option, 49 | 50 | #[clap(flatten)] 51 | duckdb: DuckDbOptions, 52 | } 53 | 54 | #[derive(Debug, clap::Args)] 55 | #[group(required = true, multiple = true)] 56 | pub struct DuckDbOptions { 57 | /// DuckDb file name 58 | #[clap(long)] 59 | duckdb_file: Option, 60 | 61 | /// MotherDuck access token 62 | #[clap(long, conflicts_with = "duckdb_file", requires = "motherduck_db_name")] 63 | motherduck_access_token: Option, 64 | 65 | /// MotherDuck database name 66 | #[clap( 67 | long, 68 | conflicts_with = "duckdb_file", 69 | requires = "motherduck_access_token" 70 | )] 71 | motherduck_db_name: Option, 72 | } 73 | 74 | #[derive(Debug, Subcommand)] 75 | enum Command { 76 | /// Copy a table 77 | CopyTable { schema: String, name: String }, 78 | 79 | /// Start a change data capture 80 | Cdc { 81 | publication: String, 82 | slot_name: String, 83 | }, 84 | } 85 | 86 | #[tokio::main] 87 | async fn main() -> Result<(), Box> { 88 | if let Err(e) = main_impl().await { 89 | error!("{e}"); 90 | } 91 | 92 | Ok(()) 93 | } 94 | 95 | fn init_tracing() { 96 | tracing_subscriber::registry() 97 | .with( 98 | tracing_subscriber::EnvFilter::try_from_default_env() 99 | .unwrap_or_else(|_| "duckdb=info".into()), 100 | ) 101 | .with(tracing_subscriber::fmt::layer()) 102 | .init(); 103 | } 104 | 105 | fn set_log_level() { 106 | if std::env::var("RUST_LOG").is_err() { 107 | std::env::set_var("RUST_LOG", "info"); 108 | } 109 | } 110 | 111 | async fn main_impl() -> Result<(), Box> { 112 | set_log_level(); 113 | init_tracing(); 114 | 115 | let args = AppArgs::parse(); 116 | let db_args = args.db_args; 117 | 118 | let options = PgDatabaseOptions { 119 | host: db_args.db_host, 120 | port: db_args.db_port, 121 | name: db_args.db_name, 122 | username: db_args.db_username, 123 | password: db_args.db_password, 124 | ssl_mode: SslMode::Disable, 125 | }; 126 | 127 | let (postgres_source, action) = match args.command { 128 | Command::CopyTable { schema, name } => { 129 | let table_names = vec![TableName { schema, name }]; 130 | 131 | let postgres_source = 132 | PostgresSource::new(options, vec![], None, TableNamesFrom::Vec(table_names)) 133 | .await?; 134 | (postgres_source, PipelineAction::TableCopiesOnly) 135 | } 136 | Command::Cdc { 137 | publication, 138 | slot_name, 139 | } => { 140 | let postgres_source = PostgresSource::new( 141 | options, 142 | vec![], 143 | Some(slot_name), 144 | TableNamesFrom::Publication(publication), 145 | ) 146 | .await?; 147 | 148 | (postgres_source, PipelineAction::Both) 149 | } 150 | }; 151 | 152 | let duckdb_destination = match ( 153 | db_args.duckdb.duckdb_file, 154 | db_args.duckdb.motherduck_access_token, 155 | db_args.duckdb.motherduck_db_name, 156 | ) { 157 | (Some(duckdb_file), None, None) => DuckDbDestination::file(duckdb_file).await?, 158 | (None, Some(access_token), Some(db_name)) => { 159 | DuckDbDestination::mother_duck(&access_token, &db_name).await? 160 | } 161 | _ => { 162 | unreachable!() 163 | } 164 | }; 165 | 166 | let batch_config = BatchConfig::new(1000, Duration::from_secs(10)); 167 | let mut pipeline = 168 | BatchDataPipeline::new(postgres_source, duckdb_destination, action, batch_config); 169 | 170 | pipeline.start().await?; 171 | 172 | Ok(()) 173 | } 174 | -------------------------------------------------------------------------------- /etl/examples/stdout.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, time::Duration}; 2 | 3 | use clap::{Args, Parser, Subcommand}; 4 | use etl::{ 5 | pipeline::{ 6 | batching::{data_pipeline::BatchDataPipeline, BatchConfig}, 7 | destinations::stdout::StdoutDestination, 8 | sources::postgres::{PostgresSource, TableNamesFrom}, 9 | PipelineAction, 10 | }, 11 | SslMode, 12 | }; 13 | use postgres::schema::TableName; 14 | use postgres::tokio::options::PgDatabaseOptions; 15 | use tracing::error; 16 | use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; 17 | 18 | #[derive(Debug, Parser)] 19 | #[command(name = "stdout", version, about, arg_required_else_help = true)] 20 | struct AppArgs { 21 | #[clap(flatten)] 22 | db_args: DbArgs, 23 | 24 | #[clap(subcommand)] 25 | command: Command, 26 | } 27 | 28 | #[derive(Debug, Args)] 29 | struct DbArgs { 30 | /// Host on which Postgres is running 31 | #[arg(long)] 32 | db_host: String, 33 | 34 | /// Port on which Postgres is running 35 | #[arg(long)] 36 | db_port: u16, 37 | 38 | /// Postgres database name 39 | #[arg(long)] 40 | db_name: String, 41 | 42 | /// Postgres database user name 43 | #[arg(long)] 44 | db_username: String, 45 | 46 | /// Postgres database user password 47 | #[arg(long)] 48 | db_password: Option, 49 | } 50 | 51 | #[derive(Debug, Subcommand)] 52 | enum Command { 53 | /// Copy a table 54 | CopyTable { schema: String, name: String }, 55 | 56 | /// Start a change data capture 57 | Cdc { 58 | publication: String, 59 | slot_name: String, 60 | }, 61 | } 62 | 63 | #[tokio::main] 64 | async fn main() -> Result<(), Box> { 65 | if let Err(e) = main_impl().await { 66 | error!("{e}"); 67 | } 68 | 69 | Ok(()) 70 | } 71 | 72 | fn init_tracing() { 73 | tracing_subscriber::registry() 74 | .with( 75 | tracing_subscriber::EnvFilter::try_from_default_env() 76 | .unwrap_or_else(|_| "stdout=info".into()), 77 | ) 78 | .with(tracing_subscriber::fmt::layer()) 79 | .init(); 80 | } 81 | 82 | fn set_log_level() { 83 | if std::env::var("RUST_LOG").is_err() { 84 | std::env::set_var("RUST_LOG", "info"); 85 | } 86 | } 87 | 88 | async fn main_impl() -> Result<(), Box> { 89 | set_log_level(); 90 | init_tracing(); 91 | let args = AppArgs::parse(); 92 | let db_args = args.db_args; 93 | 94 | let options = PgDatabaseOptions { 95 | host: db_args.db_host, 96 | port: db_args.db_port, 97 | name: db_args.db_name, 98 | username: db_args.db_username, 99 | password: db_args.db_password, 100 | ssl_mode: SslMode::Disable, 101 | }; 102 | 103 | let (postgres_source, action) = match args.command { 104 | Command::CopyTable { schema, name } => { 105 | let table_names = vec![TableName { schema, name }]; 106 | 107 | let postgres_source = 108 | PostgresSource::new(options, vec![], None, TableNamesFrom::Vec(table_names)) 109 | .await?; 110 | (postgres_source, PipelineAction::TableCopiesOnly) 111 | } 112 | Command::Cdc { 113 | publication, 114 | slot_name, 115 | } => { 116 | let postgres_source = PostgresSource::new( 117 | options, 118 | vec![], 119 | Some(slot_name), 120 | TableNamesFrom::Publication(publication), 121 | ) 122 | .await?; 123 | 124 | (postgres_source, PipelineAction::Both) 125 | } 126 | }; 127 | 128 | let stdout_destination = StdoutDestination; 129 | 130 | let batch_config = BatchConfig::new(1000, Duration::from_secs(10)); 131 | let mut pipeline = 132 | BatchDataPipeline::new(postgres_source, stdout_destination, action, batch_config); 133 | 134 | pipeline.start().await?; 135 | 136 | Ok(()) 137 | } 138 | -------------------------------------------------------------------------------- /etl/src/clients/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "bigquery")] 2 | pub mod bigquery; 3 | #[cfg(feature = "duckdb")] 4 | pub mod duckdb; 5 | pub mod postgres; 6 | -------------------------------------------------------------------------------- /etl/src/conversions/bool.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Debug, Error)] 4 | pub enum ParseBoolError { 5 | #[error("invalid input value: {0}")] 6 | InvalidInput(String), 7 | } 8 | 9 | pub fn parse_bool(s: &str) -> Result { 10 | if s == "t" { 11 | Ok(true) 12 | } else if s == "f" { 13 | Ok(false) 14 | } else { 15 | Err(ParseBoolError::InvalidInput(s.to_string())) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /etl/src/conversions/hex.rs: -------------------------------------------------------------------------------- 1 | use std::num::ParseIntError; 2 | 3 | use thiserror::Error; 4 | 5 | #[derive(Debug, Error)] 6 | pub enum ByteaHexParseError { 7 | #[error("missing prefix '\\x'")] 8 | InvalidPrefix, 9 | 10 | #[error("invalid byte")] 11 | OddNumerOfDigits, 12 | 13 | #[error("parse int result: {0}")] 14 | ParseInt(#[from] ParseIntError), 15 | } 16 | 17 | pub fn from_bytea_hex(s: &str) -> Result, ByteaHexParseError> { 18 | if s.len() < 2 || &s[..2] != "\\x" { 19 | return Err(ByteaHexParseError::InvalidPrefix); 20 | } 21 | 22 | let mut result = Vec::with_capacity((s.len() - 2) / 2); 23 | let s = &s[2..]; 24 | 25 | if s.len() % 2 != 0 { 26 | return Err(ByteaHexParseError::OddNumerOfDigits); 27 | } 28 | 29 | for i in (0..s.len()).step_by(2) { 30 | let val = u8::from_str_radix(&s[i..i + 2], 16)?; 31 | result.push(val); 32 | } 33 | 34 | Ok(result) 35 | } 36 | -------------------------------------------------------------------------------- /etl/src/conversions/mod.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; 4 | use numeric::PgNumeric; 5 | use uuid::Uuid; 6 | 7 | pub mod bool; 8 | pub mod cdc_event; 9 | pub mod hex; 10 | pub mod numeric; 11 | pub mod table_row; 12 | pub mod text; 13 | 14 | #[derive(Debug, Clone)] 15 | pub enum Cell { 16 | Null, 17 | Bool(bool), 18 | String(String), 19 | I16(i16), 20 | I32(i32), 21 | U32(u32), 22 | I64(i64), 23 | F32(f32), 24 | F64(f64), 25 | Numeric(PgNumeric), 26 | Date(NaiveDate), 27 | Time(NaiveTime), 28 | TimeStamp(NaiveDateTime), 29 | TimeStampTz(DateTime), 30 | Uuid(Uuid), 31 | Json(serde_json::Value), 32 | Bytes(Vec), 33 | Array(ArrayCell), 34 | } 35 | 36 | #[derive(Debug, Clone)] 37 | pub enum ArrayCell { 38 | Null, 39 | Bool(Vec>), 40 | String(Vec>), 41 | I16(Vec>), 42 | I32(Vec>), 43 | U32(Vec>), 44 | I64(Vec>), 45 | F32(Vec>), 46 | F64(Vec>), 47 | Numeric(Vec>), 48 | Date(Vec>), 49 | Time(Vec>), 50 | TimeStamp(Vec>), 51 | TimeStampTz(Vec>>), 52 | Uuid(Vec>), 53 | Json(Vec>), 54 | Bytes(Vec>>), 55 | } 56 | -------------------------------------------------------------------------------- /etl/src/conversions/numeric.rs: -------------------------------------------------------------------------------- 1 | // adapted from the bigdecimal crate 2 | use bigdecimal::{ 3 | num_bigint::{BigInt, BigUint, Sign}, 4 | BigDecimal, ParseBigDecimalError, 5 | }; 6 | use byteorder::{BigEndian, ReadBytesExt}; 7 | use std::{fmt::Display, io::Cursor, str::FromStr}; 8 | use tokio_postgres::types::{FromSql, Type}; 9 | 10 | /// A rust variant of the Postgres Numeric type. The full spectrum of Postgres' 11 | /// Numeric value range is supported. 12 | /// 13 | /// Represented as an Optional BigDecimal. None for 'NaN', Some(bigdecimal) for 14 | /// all other values. 15 | #[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone)] 16 | pub enum PgNumeric { 17 | NaN, 18 | PositiveInf, 19 | NegativeInf, 20 | Value(BigDecimal), 21 | } 22 | 23 | impl FromStr for PgNumeric { 24 | type Err = ParseBigDecimalError; 25 | 26 | fn from_str(s: &str) -> Result { 27 | match BigDecimal::from_str(s) { 28 | Ok(n) => Ok(PgNumeric::Value(n)), 29 | Err(e) => { 30 | if s.to_lowercase() == "infinity" { 31 | Ok(PgNumeric::PositiveInf) 32 | } else if s.to_lowercase() == "-infinity" { 33 | Ok(PgNumeric::NegativeInf) 34 | } else if s.to_lowercase() == "nan" { 35 | Ok(PgNumeric::NaN) 36 | } else { 37 | Err(e) 38 | } 39 | } 40 | } 41 | } 42 | } 43 | 44 | impl<'a> FromSql<'a> for PgNumeric { 45 | fn from_sql( 46 | _: &Type, 47 | raw: &'a [u8], 48 | ) -> Result> { 49 | let mut rdr = Cursor::new(raw); 50 | 51 | let n_digits = rdr.read_u16::()?; 52 | let weight = rdr.read_i16::()?; 53 | let sign = match rdr.read_u16::()? { 54 | 0x4000 => Sign::Minus, 55 | 0x0000 => Sign::Plus, 56 | 0xC000 => return Ok(PgNumeric::NaN), 57 | 0xD000 => return Ok(PgNumeric::PositiveInf), 58 | 0xF000 => return Ok(PgNumeric::NegativeInf), 59 | v => { 60 | return Err(std::io::Error::new( 61 | std::io::ErrorKind::InvalidData, 62 | format!("invalid sign {v:#04x}"), 63 | ) 64 | .into()) 65 | } 66 | }; 67 | let scale = rdr.read_u16::()?; 68 | 69 | let mut biguint = BigUint::from(0u32); 70 | for n in (0..n_digits).rev() { 71 | let digit = rdr.read_u16::()?; 72 | biguint += BigUint::from(digit) * BigUint::from(10_000u32).pow(n as u32); 73 | } 74 | 75 | // First digit in unsigned now has factor 10_000^(digits.len() - 1), 76 | // but should have 10_000^weight 77 | // 78 | // Credits: this logic has been copied from rust Diesel's related code 79 | // that provides the same translation from Postgres numeric into their 80 | // related rust type. 81 | let correction_exp = 4 * (i64::from(weight) - i64::from(n_digits) + 1); 82 | let res = BigDecimal::new(BigInt::from_biguint(sign, biguint), -correction_exp) 83 | .with_scale(i64::from(scale)); 84 | 85 | Ok(PgNumeric::Value(res)) 86 | } 87 | 88 | fn accepts(ty: &Type) -> bool { 89 | matches!(*ty, Type::NUMERIC) 90 | } 91 | } 92 | 93 | impl Display for PgNumeric { 94 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 95 | match self { 96 | PgNumeric::NaN => write!(f, "NaN"), 97 | PgNumeric::PositiveInf => write!(f, "Infinity"), 98 | PgNumeric::NegativeInf => write!(f, "-Infinity"), 99 | PgNumeric::Value(n) => write!(f, "{n}"), 100 | } 101 | } 102 | } 103 | 104 | impl Default for PgNumeric { 105 | fn default() -> Self { 106 | PgNumeric::Value(BigDecimal::default()) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /etl/src/conversions/table_row.rs: -------------------------------------------------------------------------------- 1 | use core::str; 2 | use std::str::Utf8Error; 3 | 4 | use postgres::schema::ColumnSchema; 5 | use thiserror::Error; 6 | use tokio_postgres::types::Type; 7 | use tracing::error; 8 | 9 | use crate::{conversions::text::TextFormatConverter, pipeline::batching::BatchBoundary}; 10 | 11 | use super::{text::FromTextError, Cell}; 12 | 13 | #[derive(Debug, Clone)] 14 | pub struct TableRow { 15 | pub values: Vec, 16 | } 17 | 18 | impl BatchBoundary for TableRow { 19 | fn is_last_in_batch(&self) -> bool { 20 | true 21 | } 22 | } 23 | 24 | #[derive(Debug, Error)] 25 | pub enum TableRowConversionError { 26 | #[error("unsupported type {0}")] 27 | UnsupportedType(Type), 28 | 29 | #[error("invalid string: {0}")] 30 | InvalidString(#[from] Utf8Error), 31 | 32 | #[error("mismatch in num of columns in schema and row")] 33 | NumColsMismatch, 34 | 35 | #[error("unterminated row")] 36 | UnterminatedRow, 37 | 38 | #[error("invalid value: {0}")] 39 | InvalidValue(#[from] FromTextError), 40 | } 41 | 42 | pub struct TableRowConverter; 43 | 44 | impl TableRowConverter { 45 | // parses text produced by this code in Postgres: https://github.com/postgres/postgres/blob/263a3f5f7f508167dbeafc2aefd5835b41d77481/src/backend/commands/copyto.c#L988-L1134 46 | pub fn try_from( 47 | row: &[u8], 48 | column_schemas: &[ColumnSchema], 49 | ) -> Result { 50 | let mut values = Vec::with_capacity(column_schemas.len()); 51 | 52 | let row_str = str::from_utf8(row)?; 53 | let mut column_schemas_iter = column_schemas.iter(); 54 | let mut chars = row_str.chars(); 55 | let mut val_str = String::with_capacity(10); 56 | let mut in_escape = false; 57 | let mut row_terminated = false; 58 | let mut done = false; 59 | 60 | while !done { 61 | loop { 62 | match chars.next() { 63 | Some(c) => match c { 64 | c if in_escape => { 65 | if c == 'N' { 66 | val_str.push('\\'); 67 | val_str.push(c); 68 | } else if c == 'b' { 69 | val_str.push(8 as char); 70 | } else if c == 'f' { 71 | val_str.push(12 as char); 72 | } else if c == 'n' { 73 | val_str.push('\n'); 74 | } else if c == 'r' { 75 | val_str.push('\r'); 76 | } else if c == 't' { 77 | val_str.push('\t'); 78 | } else if c == 'v' { 79 | val_str.push(11 as char) 80 | } else { 81 | val_str.push(c); 82 | } 83 | in_escape = false; 84 | } 85 | '\t' => { 86 | break; 87 | } 88 | '\n' => { 89 | row_terminated = true; 90 | break; 91 | } 92 | '\\' => in_escape = true, 93 | c => { 94 | val_str.push(c); 95 | } 96 | }, 97 | None => { 98 | if !row_terminated { 99 | return Err(TableRowConversionError::UnterminatedRow); 100 | } 101 | done = true; 102 | break; 103 | } 104 | } 105 | } 106 | 107 | if !done { 108 | let Some(column_schema) = column_schemas_iter.next() else { 109 | return Err(TableRowConversionError::NumColsMismatch); 110 | }; 111 | 112 | let value = if val_str == "\\N" { 113 | Cell::Null 114 | } else { 115 | match TextFormatConverter::try_from_str(&column_schema.typ, &val_str) { 116 | Ok(value) => value, 117 | Err(e) => { 118 | error!( 119 | "error parsing column `{}` of type `{}` from text `{val_str}`", 120 | column_schema.name, column_schema.typ 121 | ); 122 | return Err(e.into()); 123 | } 124 | } 125 | }; 126 | 127 | values.push(value); 128 | val_str.clear(); 129 | } 130 | } 131 | 132 | Ok(TableRow { values }) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /etl/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod clients; 2 | pub mod conversions; 3 | pub mod pipeline; 4 | pub mod v2; 5 | 6 | pub use tokio_postgres::config::SslMode; 7 | -------------------------------------------------------------------------------- /etl/src/main.rs: -------------------------------------------------------------------------------- 1 | use etl::v2::destination::memory::MemoryDestination; 2 | use etl::v2::pipeline::Pipeline; 3 | use etl::v2::state::store::base::PipelineStateStore; 4 | use etl::v2::state::store::memory::MemoryPipelineStateStore; 5 | use etl::v2::state::table::{TableReplicationPhase, TableReplicationState}; 6 | use tracing_subscriber::EnvFilter; 7 | 8 | // Temporary main method to quickly validate the inner workings of the new v2 architecture. 9 | #[tokio::main] 10 | async fn main() { 11 | let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); 12 | 13 | // 2. Install a pretty-printing subscriber that writes to stdout 14 | tracing_subscriber::fmt().with_env_filter(filter).init(); 15 | 16 | // We create a store with mock table ids. 17 | let state_store = MemoryPipelineStateStore::new(); 18 | for i in 0..1 { 19 | state_store 20 | .store_table_replication_state(TableReplicationState::new( 21 | i, 22 | TableReplicationPhase::Init, 23 | )) 24 | .await; 25 | } 26 | 27 | let destination = MemoryDestination::new(); 28 | 29 | let mut pipeline = 30 | Pipeline::new(1, "my_publication".to_owned(), state_store, destination).await; 31 | 32 | pipeline.start().await.unwrap(); 33 | pipeline.wait().await; 34 | } 35 | -------------------------------------------------------------------------------- /etl/src/pipeline/batching/mod.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | pub mod data_pipeline; 4 | pub mod stream; 5 | 6 | /// A trait to indicate which items in a stream can be the last in a batch. 7 | pub trait BatchBoundary: Sized { 8 | fn is_last_in_batch(&self) -> bool; 9 | } 10 | 11 | // For an item wrapped in a result we fall back to the item 12 | // for the Ok variant and always return true for an Err variant 13 | // to fail fast as this batch is anyway going to fail. 14 | impl BatchBoundary for Result { 15 | fn is_last_in_batch(&self) -> bool { 16 | match self { 17 | Ok(v) => v.is_last_in_batch(), 18 | Err(_) => true, 19 | } 20 | } 21 | } 22 | 23 | #[derive(Debug, Clone)] 24 | pub struct BatchConfig { 25 | max_batch_size: usize, 26 | max_batch_fill_time: Duration, 27 | } 28 | 29 | impl BatchConfig { 30 | pub fn new(max_batch_size: usize, max_batch_fill_time: Duration) -> BatchConfig { 31 | BatchConfig { 32 | max_batch_size, 33 | max_batch_fill_time, 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /etl/src/pipeline/batching/stream.rs: -------------------------------------------------------------------------------- 1 | use futures::{ready, Future, Stream}; 2 | use pin_project_lite::pin_project; 3 | use tokio::time::{sleep, Sleep}; 4 | 5 | use super::{BatchBoundary, BatchConfig}; 6 | use core::pin::Pin; 7 | use core::task::{Context, Poll}; 8 | use tokio::sync::futures::Notified; 9 | use tracing::info; 10 | 11 | // Implementation adapted from https://github.com/tokio-rs/tokio/blob/master/tokio-stream/src/stream_ext/chunks_timeout.rs 12 | pin_project! { 13 | /// Adapter stream which batches the items of the underlying stream when it 14 | /// reaches max_size or when a timeout expires. The underlying streams items 15 | /// must implement [`BatchBoundary`]. A batch is guaranteed to end on an 16 | /// item which returns true from [`BatchBoundary::is_last_in_batch`] unless the 17 | /// stream is forcefully stopped. 18 | #[must_use = "streams do nothing unless polled"] 19 | #[derive(Debug)] 20 | pub struct BatchTimeoutStream<'a, B: BatchBoundary, S: Stream> { 21 | #[pin] 22 | stream: S, 23 | #[pin] 24 | deadline: Option, 25 | #[pin] 26 | stream_stop: Notified<'a>, 27 | items: Vec, 28 | batch_config: BatchConfig, 29 | reset_timer: bool, 30 | inner_stream_ended: bool, 31 | stream_stopped: bool 32 | } 33 | } 34 | 35 | impl<'a, B: BatchBoundary, S: Stream> BatchTimeoutStream<'a, B, S> { 36 | pub fn new(stream: S, batch_config: BatchConfig, stream_stop: Notified<'a>) -> Self { 37 | BatchTimeoutStream { 38 | stream, 39 | deadline: None, 40 | stream_stop, 41 | items: Vec::with_capacity(batch_config.max_batch_size), 42 | batch_config, 43 | reset_timer: true, 44 | inner_stream_ended: false, 45 | stream_stopped: false, 46 | } 47 | } 48 | 49 | pub fn get_inner_mut(&mut self) -> &mut S { 50 | &mut self.stream 51 | } 52 | } 53 | 54 | impl<'a, B: BatchBoundary, S: Stream> Stream for BatchTimeoutStream<'a, B, S> { 55 | type Item = Vec; 56 | 57 | fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 58 | let mut this = self.as_mut().project(); 59 | 60 | if *this.inner_stream_ended { 61 | return Poll::Ready(None); 62 | } 63 | 64 | loop { 65 | if *this.stream_stopped { 66 | return Poll::Ready(None); 67 | } 68 | 69 | // If the stream has been asked to stop, we mark the stream as stopped and return the 70 | // remaining elements, irrespectively of boundaries. 71 | if this.stream_stop.as_mut().poll(cx).is_ready() { 72 | info!("the stream has been forcefully stopped"); 73 | *this.stream_stopped = true; 74 | return if !this.items.is_empty() { 75 | Poll::Ready(Some(std::mem::take(this.items))) 76 | } else { 77 | Poll::Ready(None) 78 | }; 79 | } 80 | 81 | if *this.reset_timer { 82 | this.deadline 83 | .set(Some(sleep(this.batch_config.max_batch_fill_time))); 84 | *this.reset_timer = false; 85 | } 86 | if this.items.is_empty() { 87 | this.items.reserve_exact(this.batch_config.max_batch_size); 88 | } 89 | match this.stream.as_mut().poll_next(cx) { 90 | Poll::Pending => break, 91 | Poll::Ready(Some(item)) => { 92 | let is_last_in_batch = item.is_last_in_batch(); 93 | this.items.push(item); 94 | if this.items.len() >= this.batch_config.max_batch_size && is_last_in_batch { 95 | *this.reset_timer = true; 96 | return Poll::Ready(Some(std::mem::take(this.items))); 97 | } 98 | } 99 | Poll::Ready(None) => { 100 | let last = if this.items.is_empty() { 101 | None 102 | } else { 103 | *this.reset_timer = true; 104 | Some(std::mem::take(this.items)) 105 | }; 106 | 107 | *this.inner_stream_ended = true; 108 | 109 | return Poll::Ready(last); 110 | } 111 | } 112 | } 113 | 114 | if !this.items.is_empty() { 115 | if let Some(deadline) = this.deadline.as_pin_mut() { 116 | ready!(deadline.poll(cx)); 117 | } 118 | 119 | let last_item = this.items.last().expect("missing last item"); 120 | if last_item.is_last_in_batch() { 121 | *this.reset_timer = true; 122 | return Poll::Ready(Some(std::mem::take(this.items))); 123 | } 124 | } 125 | 126 | Poll::Pending 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /etl/src/pipeline/destinations/duckdb/mod.rs: -------------------------------------------------------------------------------- 1 | pub use destination::DuckDbDestination; 2 | pub use executor::{DuckDbExecutorError, DuckDbRequest}; 3 | 4 | mod destination; 5 | mod executor; 6 | -------------------------------------------------------------------------------- /etl/src/pipeline/destinations/mod.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use async_trait::async_trait; 4 | use postgres::schema::{TableId, TableSchema}; 5 | use thiserror::Error; 6 | use tokio_postgres::types::PgLsn; 7 | 8 | use crate::conversions::{cdc_event::CdcEvent, table_row::TableRow}; 9 | 10 | use super::PipelineResumptionState; 11 | 12 | #[cfg(feature = "bigquery")] 13 | pub mod bigquery; 14 | #[cfg(feature = "duckdb")] 15 | pub mod duckdb; 16 | #[cfg(feature = "stdout")] 17 | pub mod stdout; 18 | 19 | pub trait DestinationError: std::error::Error + Send + Sync + 'static {} 20 | 21 | #[derive(Debug, Error)] 22 | #[error("unreachable")] 23 | pub enum InfallibleDestinationError {} 24 | impl DestinationError for InfallibleDestinationError {} 25 | 26 | #[async_trait] 27 | pub trait BatchDestination { 28 | type Error: DestinationError; 29 | async fn get_resumption_state(&mut self) -> Result; 30 | async fn write_table_schemas( 31 | &mut self, 32 | table_schemas: HashMap, 33 | ) -> Result<(), Self::Error>; 34 | async fn write_table_rows( 35 | &mut self, 36 | rows: Vec, 37 | table_id: TableId, 38 | ) -> Result<(), Self::Error>; 39 | async fn write_cdc_events(&mut self, events: Vec) -> Result; 40 | async fn table_copied(&mut self, table_id: TableId) -> Result<(), Self::Error>; 41 | async fn truncate_table(&mut self, table_id: TableId) -> Result<(), Self::Error>; 42 | } 43 | -------------------------------------------------------------------------------- /etl/src/pipeline/destinations/stdout.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | 3 | use async_trait::async_trait; 4 | use postgres::schema::{TableId, TableSchema}; 5 | use tokio_postgres::types::PgLsn; 6 | use tracing::info; 7 | 8 | use crate::{ 9 | conversions::{cdc_event::CdcEvent, table_row::TableRow}, 10 | pipeline::PipelineResumptionState, 11 | }; 12 | 13 | use super::{BatchDestination, InfallibleDestinationError}; 14 | 15 | pub struct StdoutDestination; 16 | 17 | #[async_trait] 18 | impl BatchDestination for StdoutDestination { 19 | type Error = InfallibleDestinationError; 20 | async fn get_resumption_state(&mut self) -> Result { 21 | Ok(PipelineResumptionState { 22 | copied_tables: HashSet::new(), 23 | last_lsn: PgLsn::from(0), 24 | }) 25 | } 26 | 27 | async fn write_table_schemas( 28 | &mut self, 29 | table_schemas: HashMap, 30 | ) -> Result<(), Self::Error> { 31 | info!("{table_schemas:?}"); 32 | Ok(()) 33 | } 34 | 35 | async fn write_table_rows( 36 | &mut self, 37 | rows: Vec, 38 | _table_id: TableId, 39 | ) -> Result<(), Self::Error> { 40 | for row in rows { 41 | info!("{row:?}"); 42 | } 43 | Ok(()) 44 | } 45 | 46 | async fn write_cdc_events(&mut self, events: Vec) -> Result { 47 | for event in events { 48 | info!("{event:?}"); 49 | } 50 | Ok(PgLsn::from(0)) 51 | } 52 | 53 | async fn table_copied(&mut self, table_id: TableId) -> Result<(), Self::Error> { 54 | info!("table {table_id} copied"); 55 | Ok(()) 56 | } 57 | 58 | async fn truncate_table(&mut self, table_id: TableId) -> Result<(), Self::Error> { 59 | info!("table {table_id} truncated"); 60 | Ok(()) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /etl/src/pipeline/mod.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use destinations::DestinationError; 4 | use postgres::schema::TableId; 5 | use sources::SourceError; 6 | use thiserror::Error; 7 | use tokio_postgres::types::PgLsn; 8 | 9 | pub mod batching; 10 | pub mod destinations; 11 | pub mod sources; 12 | 13 | #[derive(Debug)] 14 | pub enum PipelineAction { 15 | TableCopiesOnly, 16 | CdcOnly, 17 | Both, 18 | } 19 | 20 | #[derive(Debug)] 21 | pub struct PipelineResumptionState { 22 | pub copied_tables: HashSet, 23 | pub last_lsn: PgLsn, 24 | } 25 | 26 | #[derive(Debug, Error)] 27 | pub enum PipelineError { 28 | #[error("source error: {0}")] 29 | Source(#[source] SrcErr), 30 | 31 | #[error("destination error: {0}")] 32 | Destination(#[source] DstErr), 33 | 34 | #[error("source error: {0}")] 35 | CommonSource(#[from] sources::CommonSourceError), 36 | } 37 | -------------------------------------------------------------------------------- /etl/src/pipeline/sources/mod.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use ::postgres::schema::{ColumnSchema, TableId, TableName, TableSchema}; 4 | use async_trait::async_trait; 5 | use thiserror::Error; 6 | use tokio_postgres::types::PgLsn; 7 | 8 | use postgres::{ 9 | CdcStream, CdcStreamError, PostgresSourceError, StatusUpdateError, TableCopyStream, 10 | TableCopyStreamError, 11 | }; 12 | 13 | pub mod postgres; 14 | 15 | pub trait SourceError: std::error::Error + Send + Sync + 'static {} 16 | 17 | #[derive(Debug, Error)] 18 | #[error("unreachable")] 19 | pub enum InfallibleSourceError {} 20 | impl SourceError for InfallibleSourceError {} 21 | 22 | #[derive(Debug, Error)] 23 | pub enum CommonSourceError { 24 | #[error("source error: {0}")] 25 | Postgres(#[from] PostgresSourceError), 26 | 27 | #[error("table copy stream error: {0}")] 28 | TableCopyStream(#[from] TableCopyStreamError), 29 | 30 | #[error("cdc stream error: {0}")] 31 | CdcStream(#[from] CdcStreamError), 32 | 33 | #[error("status update error: {0}")] 34 | StatusUpdate(#[from] StatusUpdateError), 35 | } 36 | 37 | impl SourceError for CommonSourceError {} 38 | 39 | #[async_trait] 40 | pub trait Source { 41 | type Error: SourceError; 42 | 43 | fn get_table_schemas(&self) -> &HashMap; 44 | 45 | async fn get_table_copy_stream( 46 | &self, 47 | table_name: &TableName, 48 | column_schemas: &[ColumnSchema], 49 | ) -> Result; 50 | 51 | async fn commit_transaction(&mut self) -> Result<(), Self::Error>; 52 | 53 | async fn get_cdc_stream(&self, start_lsn: PgLsn) -> Result; 54 | } 55 | -------------------------------------------------------------------------------- /etl/src/v2/concurrency/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod future; 2 | -------------------------------------------------------------------------------- /etl/src/v2/destination/base.rs: -------------------------------------------------------------------------------- 1 | use crate::conversions::cdc_event::CdcEvent; 2 | use std::future::Future; 3 | 4 | pub trait Destination { 5 | fn apply_events(&self, events: Vec) -> impl Future + Send; 6 | } 7 | -------------------------------------------------------------------------------- /etl/src/v2/destination/memory.rs: -------------------------------------------------------------------------------- 1 | use crate::conversions::cdc_event::CdcEvent; 2 | use crate::v2::destination::base::Destination; 3 | use std::sync::Arc; 4 | use tokio::sync::RwLock; 5 | 6 | #[derive(Debug)] 7 | struct Inner { 8 | _events: Vec, 9 | } 10 | 11 | #[derive(Debug, Clone)] 12 | pub struct MemoryDestination { 13 | _inner: Arc>, 14 | } 15 | 16 | impl MemoryDestination { 17 | pub fn new() -> Self { 18 | let inner = Inner { 19 | _events: Vec::new(), 20 | }; 21 | 22 | Self { 23 | _inner: Arc::new(RwLock::new(inner)), 24 | } 25 | } 26 | } 27 | 28 | impl Default for MemoryDestination { 29 | fn default() -> Self { 30 | Self::new() 31 | } 32 | } 33 | 34 | impl Destination for MemoryDestination { 35 | async fn apply_events(&self, _events: Vec) {} 36 | } 37 | -------------------------------------------------------------------------------- /etl/src/v2/destination/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod base; 2 | pub mod memory; 3 | -------------------------------------------------------------------------------- /etl/src/v2/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod concurrency; 2 | pub mod destination; 3 | pub mod pipeline; 4 | pub mod replication; 5 | pub mod state; 6 | pub mod workers; 7 | -------------------------------------------------------------------------------- /etl/src/v2/pipeline.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | use tracing::{error, info}; 3 | 4 | use crate::v2::destination::base::Destination; 5 | use crate::v2::state::store::base::PipelineStateStore; 6 | use crate::v2::workers::apply::{ApplyWorker, ApplyWorkerHandle}; 7 | use crate::v2::workers::base::{Worker, WorkerHandle}; 8 | use crate::v2::workers::pool::TableSyncWorkerPool; 9 | 10 | #[derive(Debug, Error)] 11 | pub enum PipelineError { 12 | #[error("An error occurred in a worker")] 13 | WorkerError, 14 | } 15 | 16 | #[derive(Debug)] 17 | enum PipelineWorkers { 18 | NotStarted, 19 | Started { 20 | // TODO: investigate whether we could benefit from a central launcher that deals at a high-level 21 | // with workers management, which should not be done in the pipeline. 22 | apply_worker: ApplyWorkerHandle, 23 | table_sync_workers: TableSyncWorkerPool, 24 | }, 25 | } 26 | 27 | #[derive(Debug)] 28 | pub struct Pipeline { 29 | _id: u64, 30 | publication_name: String, 31 | state_store: S, 32 | destination: D, 33 | workers: PipelineWorkers, 34 | } 35 | 36 | impl Pipeline 37 | where 38 | S: PipelineStateStore + Clone + Send + 'static, 39 | D: Destination + Clone + Send + 'static, 40 | { 41 | pub async fn new(_id: u64, publication_name: String, state_store: S, destination: D) -> Self { 42 | Self { 43 | _id, 44 | publication_name, 45 | state_store, 46 | destination, 47 | workers: PipelineWorkers::NotStarted, 48 | } 49 | } 50 | 51 | pub async fn start(&mut self) -> Result<(), PipelineError> { 52 | info!( 53 | "Starting pipeline for publication {}", 54 | self.publication_name 55 | ); 56 | 57 | // We synchronize the relation subscription states with the publication, to make sure we 58 | // always know which tables to work with. Maybe in the future we also want to react in real 59 | // time to new relation ids being sent over by the cdc event stream. 60 | self.sync_relation_subscription_states().await; 61 | 62 | // We create the table sync workers shared memory area. 63 | let table_sync_workers = TableSyncWorkerPool::new(); 64 | 65 | // We create and start the apply worker. 66 | let apply_worker = ApplyWorker::new( 67 | self.state_store.clone(), 68 | self.destination.clone(), 69 | table_sync_workers.clone(), 70 | ) 71 | .start() 72 | .await 73 | .ok_or_else(|| { 74 | error!("Failed to start apply worker"); 75 | PipelineError::WorkerError 76 | })?; 77 | 78 | self.workers = PipelineWorkers::Started { 79 | apply_worker, 80 | table_sync_workers, 81 | }; 82 | 83 | Ok(()) 84 | } 85 | 86 | async fn sync_relation_subscription_states(&self) { 87 | info!("Synchronizing relation subscription states"); 88 | // TODO: in this function we want to: 89 | // 1. Load all tables for the publication 90 | // 2. For each table, we check if it already exists in the store 91 | // 3. If the table is not there, add it with `Init` state 92 | // 4. If it's there do not do anything 93 | } 94 | 95 | pub async fn wait(self) { 96 | let PipelineWorkers::Started { 97 | apply_worker, 98 | table_sync_workers, 99 | } = self.workers 100 | else { 101 | info!("Pipeline was not started, nothing to wait for"); 102 | return; 103 | }; 104 | 105 | // TODO: handle failure of errors on wait. 106 | info!("Waiting for pipeline workers to complete"); 107 | // We first wait for the apply worker to finish, since that must be done before waiting for 108 | // the table sync workers to finish, otherwise if we wait for sync workers first, we might 109 | // be having the apply worker that spawns new sync workers after we waited for the current 110 | // ones to finish. 111 | apply_worker 112 | .wait() 113 | .await 114 | .expect("Failed to wait for apply worker"); 115 | info!("Apply worker completed"); 116 | 117 | let mut table_sync_workers = table_sync_workers.write().await; 118 | table_sync_workers.wait_all().await; 119 | info!("All table sync workers completed"); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /etl/src/v2/replication/apply.rs: -------------------------------------------------------------------------------- 1 | use postgres::schema::Oid; 2 | use std::future::Future; 3 | use tokio_postgres::types::PgLsn; 4 | 5 | use crate::v2::destination::base::Destination; 6 | use crate::v2::state::store::base::PipelineStateStore; 7 | 8 | pub trait ApplyLoopHook 9 | where 10 | S: PipelineStateStore + Clone + Send + 'static, 11 | D: Destination + Clone + Send + 'static, 12 | { 13 | fn process_syncing_tables( 14 | &self, 15 | state_store: S, 16 | destination: D, 17 | current_lsn: PgLsn, 18 | ) -> impl Future + Send; 19 | 20 | fn should_apply_changes( 21 | &self, 22 | table_id: Oid, 23 | remote_final_lsn: PgLsn, 24 | ) -> impl Future + Send; 25 | } 26 | 27 | pub async fn start_apply_loop(_state_store: S, _destination: D, _hook: T, _last_lsn: PgLsn) 28 | where 29 | S: PipelineStateStore + Clone + Send + 'static, 30 | D: Destination + Clone + Send + 'static, 31 | T: ApplyLoopHook, 32 | { 33 | // Create a select between: 34 | // - Shutdown signal -> when called we stop the apply operation. 35 | // - Logical replication stream socket -> when an event is received, we handle it in a special 36 | // processing component. 37 | // - Else we do the table syncing if we are not at a boundary -> we perform table syncing to make 38 | // sure progress is happening in table sync workers. 39 | 40 | // TODO: implement. 41 | } 42 | -------------------------------------------------------------------------------- /etl/src/v2/replication/client.rs: -------------------------------------------------------------------------------- 1 | // TODO: implement client. 2 | -------------------------------------------------------------------------------- /etl/src/v2/replication/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod apply; 2 | pub mod client; 3 | pub mod table_sync; 4 | -------------------------------------------------------------------------------- /etl/src/v2/replication/table_sync.rs: -------------------------------------------------------------------------------- 1 | use crate::v2::destination::base::Destination; 2 | use crate::v2::state::store::base::PipelineStateStore; 3 | use crate::v2::workers::table_sync::TableSyncWorkerState; 4 | 5 | pub async fn start_table_sync( 6 | _state_store: S, 7 | _destination: D, 8 | _table_sync_worker_state: TableSyncWorkerState, 9 | ) where 10 | S: PipelineStateStore + Clone + Send + 'static, 11 | D: Destination + Clone + Send + 'static, 12 | { 13 | // 1. Load the state from the TableSyncWorkerState (must be done there since we know that the 14 | // in-memory state is consistent from this point, if we re-read from the store, we already 15 | // gave out the state which could have been changed already). 16 | // 2. Check the state and exit if SyncDone, Ready, Unknown 17 | // 3. Compute the slot name 18 | // 4. Make sure the state is either Init, DataSync, FinishedCopy 19 | // 5. Handle slot deletion in case it's in DataSync 20 | // 6. Start copy table with transaction 21 | // 7. Mark the table as FinishedCopy 22 | 23 | // Mark the table as SyncWait in memory only 24 | 25 | // Wait until the catchup is reached 26 | 27 | // TODO: implement. 28 | } 29 | -------------------------------------------------------------------------------- /etl/src/v2/state/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod pipeline; 2 | pub mod store; 3 | pub mod table; 4 | -------------------------------------------------------------------------------- /etl/src/v2/state/pipeline.rs: -------------------------------------------------------------------------------- 1 | use tokio_postgres::types::PgLsn; 2 | 3 | #[derive(Debug, Clone)] 4 | pub struct PipelineState { 5 | /// The last LSN which was applied by a pipeline. 6 | pub last_lsn: PgLsn, 7 | } 8 | 9 | impl Default for PipelineState { 10 | fn default() -> Self { 11 | Self { 12 | last_lsn: PgLsn::from(0), 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /etl/src/v2/state/store/base.rs: -------------------------------------------------------------------------------- 1 | use crate::v2::state::pipeline::PipelineState; 2 | use crate::v2::state::table::TableReplicationState; 3 | use postgres::schema::Oid; 4 | use std::future::Future; 5 | 6 | pub trait PipelineStateStore { 7 | fn load_pipeline_state(&self) -> impl Future + Send; 8 | 9 | fn store_pipeline_state(&self, state: PipelineState) -> impl Future + Send; 10 | 11 | fn load_table_replication_states( 12 | &self, 13 | ) -> impl Future> + Send; 14 | 15 | fn load_table_replication_state( 16 | &self, 17 | table_id: &Oid, 18 | ) -> impl Future> + Send; 19 | 20 | fn store_table_replication_state( 21 | &self, 22 | state: TableReplicationState, 23 | ) -> impl Future + Send; 24 | } 25 | -------------------------------------------------------------------------------- /etl/src/v2/state/store/memory.rs: -------------------------------------------------------------------------------- 1 | use postgres::schema::Oid; 2 | use std::collections::HashMap; 3 | use std::sync::Arc; 4 | use tokio::sync::RwLock; 5 | 6 | use crate::v2::state::pipeline::PipelineState; 7 | use crate::v2::state::store::base::PipelineStateStore; 8 | use crate::v2::state::table::TableReplicationState; 9 | 10 | #[derive(Debug)] 11 | struct Inner { 12 | pipeline_state: PipelineState, 13 | table_replication_states: HashMap, 14 | } 15 | 16 | #[derive(Debug, Clone)] 17 | pub struct MemoryPipelineStateStore { 18 | inner: Arc>, 19 | } 20 | 21 | impl MemoryPipelineStateStore { 22 | pub fn new() -> Self { 23 | let inner = Inner { 24 | pipeline_state: PipelineState::default(), 25 | table_replication_states: HashMap::new(), 26 | }; 27 | 28 | Self { 29 | inner: Arc::new(RwLock::new(inner)), 30 | } 31 | } 32 | } 33 | 34 | impl Default for MemoryPipelineStateStore { 35 | fn default() -> Self { 36 | Self::new() 37 | } 38 | } 39 | 40 | impl PipelineStateStore for MemoryPipelineStateStore { 41 | async fn load_pipeline_state(&self) -> PipelineState { 42 | self.inner.read().await.pipeline_state.clone() 43 | } 44 | 45 | async fn store_pipeline_state(&self, state: PipelineState) { 46 | self.inner.write().await.pipeline_state = state; 47 | } 48 | 49 | async fn load_table_replication_states(&self) -> Vec { 50 | self.inner 51 | .read() 52 | .await 53 | .table_replication_states 54 | .values() 55 | .cloned() 56 | .collect() 57 | } 58 | 59 | async fn load_table_replication_state(&self, table_id: &Oid) -> Option { 60 | self.inner 61 | .read() 62 | .await 63 | .table_replication_states 64 | .get(table_id) 65 | .cloned() 66 | } 67 | 68 | async fn store_table_replication_state(&self, state: TableReplicationState) { 69 | self.inner 70 | .write() 71 | .await 72 | .table_replication_states 73 | .insert(state.id, state); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /etl/src/v2/state/store/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod base; 2 | pub mod memory; 3 | -------------------------------------------------------------------------------- /etl/src/v2/state/table.rs: -------------------------------------------------------------------------------- 1 | use postgres::schema::Oid; 2 | use std::borrow::Borrow; 3 | use tokio_postgres::types::PgLsn; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct TableReplicationState { 7 | /// The table (relation) OID to which this subscription refers. 8 | pub id: Oid, 9 | /// The phase of replication of the table. 10 | pub phase: TableReplicationPhase, 11 | } 12 | 13 | impl TableReplicationState { 14 | pub fn new(id: Oid, phase: TableReplicationPhase) -> Self { 15 | Self { id, phase } 16 | } 17 | 18 | pub fn with_phase(self, phase: TableReplicationPhase) -> TableReplicationState { 19 | TableReplicationState { phase, ..self } 20 | } 21 | } 22 | 23 | impl PartialEq for TableReplicationState { 24 | fn eq(&self, other: &Self) -> bool { 25 | self.id == other.id 26 | } 27 | } 28 | 29 | impl Borrow for TableReplicationState { 30 | fn borrow(&self) -> &Oid { 31 | &self.id 32 | } 33 | } 34 | 35 | impl Eq for TableReplicationState {} 36 | 37 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] 38 | pub enum TableReplicationPhase { 39 | Init, 40 | DataSync, 41 | FinishedCopy, 42 | SyncWait, 43 | Catchup { 44 | /// The LSN to catch up to. 45 | lsn: PgLsn, 46 | }, 47 | SyncDone { 48 | /// The LSN up to which the table sync arrived. 49 | lsn: PgLsn, 50 | }, 51 | Ready { 52 | /// The LSN of the apply worker which set this state to ready. 53 | lsn: PgLsn, 54 | }, 55 | Unknown, 56 | } 57 | 58 | impl TableReplicationPhase { 59 | pub fn as_type(&self) -> TableReplicationPhaseType { 60 | self.into() 61 | } 62 | } 63 | 64 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] 65 | pub enum TableReplicationPhaseType { 66 | Init, 67 | DataSync, 68 | FinishedCopy, 69 | SyncWait, 70 | Catchup, 71 | SyncDone, 72 | Ready, 73 | Unknown, 74 | } 75 | 76 | impl TableReplicationPhaseType { 77 | pub fn should_store(&self) -> bool { 78 | match self { 79 | Self::Init => true, 80 | Self::DataSync => true, 81 | Self::FinishedCopy => true, 82 | Self::SyncDone => true, 83 | Self::Ready => true, 84 | // We set `false` to the statuses which are exclusively used for cross-task synchronization 85 | // and do not need to be stored. 86 | Self::SyncWait => false, 87 | Self::Catchup => false, 88 | Self::Unknown => false, 89 | } 90 | } 91 | } 92 | 93 | impl<'a> From<&'a TableReplicationPhase> for TableReplicationPhaseType { 94 | fn from(phase: &'a TableReplicationPhase) -> Self { 95 | match phase { 96 | TableReplicationPhase::Init => Self::Init, 97 | TableReplicationPhase::DataSync => Self::DataSync, 98 | TableReplicationPhase::FinishedCopy => Self::FinishedCopy, 99 | TableReplicationPhase::SyncWait => Self::SyncWait, 100 | TableReplicationPhase::Catchup { .. } => Self::Catchup, 101 | TableReplicationPhase::SyncDone { .. } => Self::SyncDone, 102 | TableReplicationPhase::Ready { .. } => Self::Ready, 103 | TableReplicationPhase::Unknown => Self::Unknown, 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /etl/src/v2/workers/base.rs: -------------------------------------------------------------------------------- 1 | use std::future::Future; 2 | use thiserror::Error; 3 | 4 | /// Errors that can occur during worker execution. 5 | #[derive(Debug, Error)] 6 | pub enum WorkerError { 7 | /// The worker task failed to join, typically due to a panic or cancellation. 8 | #[error("The worker experienced an uncaught error: {0}")] 9 | Join(#[from] tokio::task::JoinError), 10 | 11 | /// The worker encountered a caught error during execution. 12 | #[error("The worker experienced a caught error: {0}")] 13 | Caught(String), 14 | } 15 | 16 | /// A trait for types that can be started as workers. 17 | /// 18 | /// The generic parameter `H` represents the handle type that will be returned when the worker starts, 19 | /// and `S` represents the state type that can be accessed through the handle. 20 | pub trait Worker 21 | where 22 | H: WorkerHandle, 23 | { 24 | /// Starts the worker and returns a future that resolves to an optional handle. 25 | /// 26 | /// The handle can be used to monitor and control the worker's execution. 27 | fn start(self) -> impl Future> + Send; 28 | } 29 | 30 | /// A handle to a running worker that provides access to its state and completion status. 31 | /// 32 | /// The generic parameter `S` represents the type of state that can be accessed through this handle. 33 | pub trait WorkerHandle { 34 | /// Returns the current state of the worker. 35 | /// 36 | /// Note that the state of the worker is expected to NOT be tied with its lifetime, so if you 37 | /// hold a reference to the state, it won't say anything about the worker's status, however it 38 | /// could be used to encode it's state but this is based on the semantics of the concrete type 39 | /// and not this abstraction. 40 | fn state(&self) -> S; 41 | 42 | /// Returns a future that resolves when the worker completes. 43 | /// 44 | /// The future resolves to a [`Result`] indicating whether the worker completed successfully 45 | /// or encountered an error. 46 | fn wait(self) -> impl Future> + Send; 47 | } 48 | -------------------------------------------------------------------------------- /etl/src/v2/workers/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod apply; 2 | pub mod base; 3 | pub mod pool; 4 | pub mod table_sync; 5 | -------------------------------------------------------------------------------- /etl/src/v2/workers/pool.rs: -------------------------------------------------------------------------------- 1 | use crate::v2::concurrency::future::ReactiveFutureCallback; 2 | use crate::v2::destination::base::Destination; 3 | use crate::v2::state::store::base::PipelineStateStore; 4 | use crate::v2::workers::base::{Worker, WorkerError, WorkerHandle}; 5 | use crate::v2::workers::table_sync::{ 6 | TableSyncWorker, TableSyncWorkerHandle, TableSyncWorkerState, 7 | }; 8 | use postgres::schema::Oid; 9 | use std::collections::HashMap; 10 | use std::mem; 11 | use std::ops::Deref; 12 | use std::sync::Arc; 13 | use tokio::sync::RwLock; 14 | use tracing::{info, warn}; 15 | 16 | #[derive(Debug)] 17 | pub enum TableSyncWorkerInactiveReason { 18 | Success, 19 | Error(String), 20 | } 21 | 22 | #[derive(Debug)] 23 | pub struct TableSyncWorkerPoolInner { 24 | /// The table sync workers that are currently active. 25 | active: HashMap, 26 | /// The table sync workers that are inactive, meaning that they are completed or errored. 27 | /// 28 | /// Having the state of finished workers gives us the power to reschedule failed table sync 29 | /// workers very cheaply since the state can be fed into a new table worker future as if it was 30 | /// read initially from the state store. 31 | inactive: HashMap>, 32 | } 33 | 34 | impl TableSyncWorkerPoolInner { 35 | fn new() -> Self { 36 | Self { 37 | active: HashMap::new(), 38 | inactive: HashMap::new(), 39 | } 40 | } 41 | 42 | pub async fn start_worker(&mut self, worker: TableSyncWorker) -> bool 43 | where 44 | S: PipelineStateStore + Clone + Send + 'static, 45 | D: Destination + Clone + Send + 'static, 46 | { 47 | let table_id = worker.table_id(); 48 | if self.active.contains_key(&table_id) { 49 | warn!("Worker for table {} already exists in pool", table_id); 50 | return false; 51 | } 52 | 53 | let Some(handle) = worker.start().await else { 54 | warn!("Failed to start worker for table {}", table_id); 55 | return false; 56 | }; 57 | 58 | self.active.insert(table_id, handle); 59 | info!("Successfully added worker for table {} to pool", table_id); 60 | 61 | true 62 | } 63 | 64 | pub fn get_worker_state(&self, table_id: Oid) -> Option { 65 | let state = self.active.get(&table_id)?.state().clone(); 66 | info!("Retrieved worker state for table {}", table_id); 67 | 68 | Some(state) 69 | } 70 | 71 | pub fn set_worker_finished(&mut self, table_id: Oid, reason: TableSyncWorkerInactiveReason) { 72 | let removed_worker = self.active.remove(&table_id); 73 | if let Some(removed_worker) = removed_worker { 74 | info!( 75 | "Marked worker for table {} as inactive with reason {:?}", 76 | table_id, reason 77 | ); 78 | 79 | self.inactive 80 | .entry(table_id) 81 | .or_default() 82 | .push((reason, removed_worker)); 83 | } 84 | } 85 | 86 | pub async fn wait_all(&mut self) -> Vec { 87 | let worker_count = self.active.len(); 88 | info!("Waiting for {} workers to complete", worker_count); 89 | 90 | let mut errors = Vec::new(); 91 | 92 | let active = mem::take(&mut self.active); 93 | for (_, worker) in active { 94 | if let Err(err) = worker.wait().await { 95 | errors.push(err); 96 | } 97 | } 98 | 99 | let finished = mem::take(&mut self.inactive); 100 | for (_, workers) in finished { 101 | for (finish, worker) in workers { 102 | if let Err(err) = worker.wait().await { 103 | errors.push(err); 104 | } 105 | 106 | // If we have a failure in the worker we should not have a failure here, since the 107 | // custom future we run table syncs with, catches panics, however, we do not want 108 | // to make that assumption here. 109 | if let TableSyncWorkerInactiveReason::Error(err) = finish { 110 | errors.push(WorkerError::Caught(err)); 111 | } 112 | } 113 | } 114 | 115 | info!("All {} workers completed", worker_count); 116 | 117 | errors 118 | } 119 | } 120 | 121 | impl ReactiveFutureCallback for TableSyncWorkerPoolInner { 122 | fn on_complete(&mut self, id: Oid) { 123 | self.set_worker_finished(id, TableSyncWorkerInactiveReason::Success); 124 | } 125 | 126 | fn on_error(&mut self, id: Oid, error: String) { 127 | self.set_worker_finished(id, TableSyncWorkerInactiveReason::Error(error)); 128 | } 129 | } 130 | 131 | #[derive(Debug, Clone)] 132 | pub struct TableSyncWorkerPool { 133 | workers: Arc>, 134 | } 135 | 136 | impl TableSyncWorkerPool { 137 | pub fn new() -> Self { 138 | Self { 139 | workers: Arc::new(RwLock::new(TableSyncWorkerPoolInner::new())), 140 | } 141 | } 142 | 143 | pub fn workers(&self) -> Arc> { 144 | self.workers.clone() 145 | } 146 | } 147 | 148 | impl Default for TableSyncWorkerPool { 149 | fn default() -> Self { 150 | Self::new() 151 | } 152 | } 153 | 154 | impl Deref for TableSyncWorkerPool { 155 | type Target = RwLock; 156 | 157 | fn deref(&self) -> &Self::Target { 158 | &self.workers 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /etl/tests/common/database.rs: -------------------------------------------------------------------------------- 1 | use postgres::schema::TableName; 2 | use postgres::tokio::options::PgDatabaseOptions; 3 | use postgres::tokio::test_utils::PgDatabase; 4 | use tokio_postgres::config::SslMode; 5 | use uuid::Uuid; 6 | 7 | /// The schema name used for organizing test tables. 8 | /// 9 | /// This constant defines the default schema where test tables are created, 10 | /// providing isolation from other database objects. 11 | const TEST_DATABASE_SCHEMA: &str = "test"; 12 | 13 | /// Creates a [`TableName`] in the test schema. 14 | /// 15 | /// This helper function constructs a [`TableName`] with the schema set to [`TEST_DATABASE_SCHEMA`] 16 | /// and the provided name as the table name. It's used to ensure consistent table naming 17 | /// across test scenarios. 18 | pub fn test_table_name(name: &str) -> TableName { 19 | TableName { 20 | schema: TEST_DATABASE_SCHEMA.to_owned(), 21 | name: name.to_owned(), 22 | } 23 | } 24 | 25 | /// Creates a new test database instance with a unique name. 26 | /// 27 | /// This function spawns a new PostgreSQL database with a random UUID as its name, 28 | /// using default credentials and disabled SSL. It automatically creates the test schema 29 | /// for organizing test tables. 30 | /// 31 | /// # Panics 32 | /// 33 | /// Panics if the test schema cannot be created. 34 | pub async fn spawn_database() -> PgDatabase { 35 | let options = PgDatabaseOptions { 36 | host: "localhost".to_owned(), 37 | port: 5430, 38 | // We create a random database name to avoid conflicts with existing databases. 39 | name: Uuid::new_v4().to_string(), 40 | username: "postgres".to_owned(), 41 | password: Some("postgres".to_owned()), 42 | ssl_mode: SslMode::Disable, 43 | }; 44 | 45 | let database = PgDatabase::new(options).await; 46 | 47 | // Create the test schema. 48 | database 49 | .client 50 | .execute(&format!("CREATE SCHEMA {}", TEST_DATABASE_SCHEMA), &[]) 51 | .await 52 | .expect("Failed to create test schema"); 53 | 54 | database 55 | } 56 | -------------------------------------------------------------------------------- /etl/tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | /// Common utilities and helpers for testing PostgreSQL replication functionality. 2 | /// 3 | /// This module provides shared testing infrastructure including database management, 4 | /// pipeline testing utilities, destination testing helpers, and table manipulation utilities. 5 | /// It also includes common testing patterns like waiting for conditions to be met. 6 | use std::time::{Duration, Instant}; 7 | use tokio::time::sleep; 8 | 9 | pub mod database; 10 | pub mod destination; 11 | pub mod pipeline; 12 | pub mod table; 13 | 14 | /// The maximum duration to wait for test conditions to be met. 15 | /// 16 | /// This constant defines the timeout period for asynchronous test assertions, 17 | /// ensuring tests don't hang indefinitely while waiting for expected states. 18 | const MAX_ASSERTION_DURATION: Duration = Duration::from_secs(20); 19 | 20 | /// The interval between condition checks during test assertions. 21 | /// 22 | /// This constant defines how frequently we poll for condition changes while 23 | /// waiting for test assertions to complete. 24 | const ASSERTION_FREQUENCY_DURATION: Duration = Duration::from_millis(10); 25 | 26 | /// Waits asynchronously for a condition to be met within the maximum timeout period. 27 | /// 28 | /// This function repeatedly evaluates the provided condition until it returns true 29 | /// or the maximum duration is exceeded. It's useful for testing asynchronous 30 | /// operations where the exact completion time is not known. 31 | /// 32 | /// # Panics 33 | /// 34 | /// Panics if the condition is not met within [`MAX_ASSERTION_DURATION`]. 35 | pub async fn wait_for_condition(condition: F) 36 | where 37 | F: Fn() -> bool, 38 | { 39 | let start = Instant::now(); 40 | while start.elapsed() < MAX_ASSERTION_DURATION { 41 | if condition() { 42 | return; 43 | } 44 | 45 | sleep(ASSERTION_FREQUENCY_DURATION).await; 46 | } 47 | 48 | panic!("Failed to process all events within timeout") 49 | } 50 | -------------------------------------------------------------------------------- /etl/tests/common/pipeline.rs: -------------------------------------------------------------------------------- 1 | use etl::pipeline::batching::data_pipeline::{BatchDataPipeline, BatchDataPipelineHandle}; 2 | use etl::pipeline::batching::BatchConfig; 3 | use etl::pipeline::destinations::BatchDestination; 4 | use etl::pipeline::sources::postgres::{PostgresSource, TableNamesFrom}; 5 | use etl::pipeline::PipelineAction; 6 | use postgres::schema::TableName; 7 | use postgres::tokio::options::PgDatabaseOptions; 8 | use std::time::Duration; 9 | use tokio::task::JoinHandle; 10 | 11 | /// Defines the operational mode for a PostgreSQL replication pipeline. 12 | #[derive(Debug, Clone)] 13 | pub enum PipelineMode { 14 | /// Initializes a pipeline to copy specified tables. 15 | CopyTable { table_names: Vec }, 16 | /// Initializes a pipeline to consume changes from a publication and replication slot. 17 | /// 18 | /// If no slot name is provided, a new slot will be created on the specified publication. 19 | Cdc { 20 | publication: String, 21 | slot_name: String, 22 | }, 23 | } 24 | 25 | /// Generates a test-specific replication slot name. 26 | /// 27 | /// This function prefixes the provided slot name with "test_" to avoid conflicts 28 | /// with other replication slots. 29 | pub fn test_slot_name(slot_name: &str) -> String { 30 | format!("test_{}", slot_name) 31 | } 32 | 33 | /// Creates a new PostgreSQL replication pipeline. 34 | /// 35 | /// This function initializes a pipeline with a batch size of 1000 records and 36 | /// a maximum batch duration of 10 seconds. 37 | /// 38 | /// # Panics 39 | /// 40 | /// Panics if the PostgreSQL source cannot be created. 41 | pub async fn spawn_pg_pipeline( 42 | options: &PgDatabaseOptions, 43 | mode: PipelineMode, 44 | destination: Snk, 45 | ) -> BatchDataPipeline { 46 | let batch_config = BatchConfig::new(1000, Duration::from_secs(10)); 47 | 48 | let pipeline = match mode { 49 | PipelineMode::CopyTable { table_names } => { 50 | let source = PostgresSource::new( 51 | options.clone(), 52 | vec![], 53 | None, 54 | TableNamesFrom::Vec(table_names), 55 | ) 56 | .await 57 | .expect("Failure when creating the Postgres source for copying tables"); 58 | let action = PipelineAction::TableCopiesOnly; 59 | BatchDataPipeline::new(source, destination, action, batch_config) 60 | } 61 | PipelineMode::Cdc { 62 | publication, 63 | slot_name, 64 | } => { 65 | let source = PostgresSource::new( 66 | options.clone(), 67 | vec![], 68 | Some(test_slot_name(&slot_name)), 69 | TableNamesFrom::Publication(publication), 70 | ) 71 | .await 72 | .expect("Failure when creating the Postgres source for cdc"); 73 | let action = PipelineAction::CdcOnly; 74 | BatchDataPipeline::new(source, destination, action, batch_config) 75 | } 76 | }; 77 | 78 | pipeline 79 | } 80 | 81 | /// Creates and spawns a new asynchronous PostgreSQL replication pipeline. 82 | /// 83 | /// This function creates a pipeline and wraps it in a [`PipelineRunner`] for 84 | /// easier management of the pipeline lifecycle. 85 | pub async fn spawn_async_pg_pipeline( 86 | options: &PgDatabaseOptions, 87 | mode: PipelineMode, 88 | destination: Dst, 89 | ) -> PipelineRunner { 90 | let pipeline = spawn_pg_pipeline(options, mode, destination).await; 91 | PipelineRunner::new(pipeline) 92 | } 93 | 94 | /// Manages the lifecycle of a PostgreSQL replication pipeline. 95 | /// 96 | /// This struct provides methods to run and stop a pipeline, handling the 97 | /// pipeline's state and ensuring proper cleanup. 98 | pub struct PipelineRunner { 99 | pipeline: Option>, 100 | pipeline_handle: BatchDataPipelineHandle, 101 | } 102 | 103 | impl PipelineRunner { 104 | /// Creates a new pipeline runner with the specified pipeline. 105 | pub fn new(pipeline: BatchDataPipeline) -> Self { 106 | let pipeline_handle = pipeline.handle(); 107 | Self { 108 | pipeline: Some(pipeline), 109 | pipeline_handle, 110 | } 111 | } 112 | 113 | /// Starts the pipeline asynchronously. 114 | /// 115 | /// # Panics 116 | /// 117 | /// Panics if the pipeline has already been run. 118 | pub async fn run(&mut self) -> JoinHandle> { 119 | if let Some(mut pipeline) = self.pipeline.take() { 120 | return tokio::spawn(async move { 121 | pipeline 122 | .start() 123 | .await 124 | .expect("The pipeline experienced an error"); 125 | 126 | pipeline 127 | }); 128 | } 129 | 130 | panic!("The pipeline has already been run"); 131 | } 132 | 133 | /// Stops the pipeline and waits for it to complete. 134 | /// 135 | /// This method signals the pipeline to stop and waits for it to finish 136 | /// before returning. The pipeline is then restored to its initial state 137 | /// for potential reuse. 138 | /// 139 | /// # Panics 140 | /// 141 | /// Panics if the pipeline task fails. 142 | pub async fn stop_and_wait( 143 | &mut self, 144 | pipeline_task_handle: JoinHandle>, 145 | ) { 146 | // We signal the existing pipeline to stop. 147 | self.pipeline_handle.stop(); 148 | 149 | // We wait for the pipeline to finish, and we put it back for the next run. 150 | let pipeline = pipeline_task_handle 151 | .await 152 | .expect("The pipeline task has failed"); 153 | // We recreate the handle just to make sure the pipeline handle and pipelines connected. 154 | self.pipeline_handle = pipeline.handle(); 155 | self.pipeline = Some(pipeline); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /etl/tests/common/table.rs: -------------------------------------------------------------------------------- 1 | use crate::common::destination::TestDestination; 2 | use postgres::schema::{ColumnSchema, TableId, TableName}; 3 | use tokio_postgres::types::Type; 4 | 5 | /// Verifies that a table's schema matches the expected configuration. 6 | /// 7 | /// This function compares a table's actual schema against the expected schema, 8 | /// checking the table name, ID, and all column properties including name, type, 9 | /// modifiers, nullability, and primary key status. 10 | /// 11 | /// # Panics 12 | /// 13 | /// Panics if: 14 | /// - The table ID is not found in the destinations's schema 15 | /// - The schema index is out of bounds 16 | /// - Any column property doesn't match the expected configuration 17 | pub fn assert_table_schema( 18 | destination: &TestDestination, 19 | table_id: TableId, 20 | schema_index: usize, 21 | expected_table_name: TableName, 22 | additional_expected_columns: &[ColumnSchema], 23 | ) { 24 | // By default, we expect the ID column since we always add it when `PgDatabase::create_table` 25 | // is called. 26 | let mut expected_columns = vec![ColumnSchema { 27 | name: "id".to_string(), 28 | typ: Type::INT8, 29 | modifier: -1, 30 | nullable: false, 31 | primary: true, 32 | }]; 33 | expected_columns.extend_from_slice(additional_expected_columns); 34 | 35 | let tables_schemas = &destination.get_tables_schemas()[schema_index]; 36 | let table_schema = tables_schemas.get(&table_id).unwrap(); 37 | 38 | assert_eq!(table_schema.table_id, table_id); 39 | assert_eq!(table_schema.table_name, expected_table_name); 40 | 41 | let columns = &table_schema.column_schemas; 42 | assert_eq!(columns.len(), expected_columns.len()); 43 | 44 | for (actual, expected) in columns.iter().zip(expected_columns.iter()) { 45 | assert_eq!(actual.name, expected.name); 46 | assert_eq!(actual.typ, expected.typ); 47 | assert_eq!(actual.modifier, expected.modifier); 48 | assert_eq!(actual.nullable, expected.nullable); 49 | assert_eq!(actual.primary, expected.primary); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /etl/tests/integration/mod.rs: -------------------------------------------------------------------------------- 1 | mod pipeline_test; 2 | -------------------------------------------------------------------------------- /etl/tests/mod.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | mod integration; 3 | -------------------------------------------------------------------------------- /postgres/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "postgres" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | [dependencies] 7 | pg_escape = { workspace = true } 8 | serde = { workspace = true, features = ["derive"] } 9 | serde_json = { workspace = true, features = ["std"] } 10 | secrecy = { workspace = true, features = ["serde", "alloc"] } 11 | sqlx = { workspace = true, features = [ 12 | "runtime-tokio-rustls", 13 | "macros", 14 | "postgres", 15 | "json", 16 | "migrate", 17 | ] } 18 | tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } 19 | tokio-postgres = { workspace = true, features = [ 20 | "runtime", 21 | "with-chrono-0_4", 22 | "with-uuid-1", 23 | "with-serde_json-1", 24 | ] } 25 | tokio-postgres-rustls = { workspace = true } 26 | 27 | 28 | [features] 29 | test-utils = [] 30 | tokio = [] 31 | sqlx = [] -------------------------------------------------------------------------------- /postgres/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! PostgreSQL database connection utilities for all crates. 2 | //! 3 | //! This crate provides database connection options and utilities for working with PostgreSQL. 4 | //! It supports both the [`sqlx`] and [`tokio-postgres`] crates through feature flags. 5 | //! 6 | //! # Features 7 | //! 8 | //! - `sqlx`: Enables SQLx-specific database connection options and utilities 9 | //! - `tokio`: Enables tokio-postgres-specific database connection options and utilities 10 | //! - `test-utils`: Enables test utilities for both SQLx and tokio-postgres implementations 11 | 12 | pub mod schema; 13 | #[cfg(feature = "sqlx")] 14 | pub mod sqlx; 15 | #[cfg(feature = "tokio")] 16 | pub mod tokio; 17 | -------------------------------------------------------------------------------- /postgres/src/schema.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use pg_escape::quote_identifier; 4 | use tokio_postgres::types::Type; 5 | 6 | /// An object identifier in PostgreSQL. 7 | pub type Oid = u32; 8 | 9 | /// A fully qualified PostgreSQL table name consisting of a schema and table name. 10 | /// 11 | /// This type represents a table identifier in PostgreSQL, which requires both a schema name 12 | /// and a table name. It provides methods for formatting the name in different contexts. 13 | #[derive(Debug, Clone, Eq, PartialEq)] 14 | pub struct TableName { 15 | /// The schema name containing the table 16 | pub schema: String, 17 | /// The name of the table within the schema 18 | pub name: String, 19 | } 20 | 21 | impl TableName { 22 | /// Returns the table name as a properly quoted PostgreSQL identifier. 23 | /// 24 | /// This method ensures the schema and table names are properly escaped according to 25 | /// PostgreSQL identifier quoting rules. 26 | pub fn as_quoted_identifier(&self) -> String { 27 | let quoted_schema = quote_identifier(&self.schema); 28 | let quoted_name = quote_identifier(&self.name); 29 | format!("{quoted_schema}.{quoted_name}") 30 | } 31 | } 32 | 33 | impl fmt::Display for TableName { 34 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 35 | f.write_fmt(format_args!("{0}.{1}", self.schema, self.name)) 36 | } 37 | } 38 | 39 | /// A type alias for PostgreSQL type modifiers. 40 | /// 41 | /// Type modifiers in PostgreSQL are used to specify additional type-specific attributes, 42 | /// such as length for varchar or precision for numeric types. 43 | type TypeModifier = i32; 44 | 45 | /// Represents the schema of a single column in a PostgreSQL table. 46 | /// 47 | /// This type contains all metadata about a column including its name, data type, 48 | /// type modifier, nullability, and whether it's part of the primary key. 49 | #[derive(Debug, Clone, Eq, PartialEq)] 50 | pub struct ColumnSchema { 51 | /// The name of the column 52 | pub name: String, 53 | /// The PostgreSQL data type of the column 54 | pub typ: Type, 55 | /// Type-specific modifier value (e.g., length for varchar) 56 | pub modifier: TypeModifier, 57 | /// Whether the column can contain NULL values 58 | pub nullable: bool, 59 | /// Whether the column is part of the table's primary key 60 | pub primary: bool, 61 | } 62 | 63 | /// A type alias for PostgreSQL table OIDs. 64 | /// 65 | /// Table OIDs are unique identifiers assigned to tables in PostgreSQL. 66 | pub type TableId = u32; 67 | 68 | /// Represents the complete schema of a PostgreSQL table. 69 | /// 70 | /// This type contains all metadata about a table including its name, OID, 71 | /// and the schemas of all its columns. 72 | #[derive(Debug, Clone, Eq, PartialEq)] 73 | pub struct TableSchema { 74 | /// The fully qualified name of the table 75 | pub table_name: TableName, 76 | /// The PostgreSQL OID of the table 77 | pub table_id: TableId, 78 | /// The schemas of all columns in the table 79 | pub column_schemas: Vec, 80 | } 81 | 82 | impl TableSchema { 83 | /// Returns whether the table has any primary key columns. 84 | /// 85 | /// This method checks if any column in the table is marked as part of the primary key. 86 | pub fn has_primary_keys(&self) -> bool { 87 | self.column_schemas.iter().any(|cs| cs.primary) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /postgres/src/sqlx/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod options; 2 | #[cfg(feature = "test-utils")] 3 | pub mod test_utils; 4 | -------------------------------------------------------------------------------- /postgres/src/sqlx/options.rs: -------------------------------------------------------------------------------- 1 | use secrecy::{ExposeSecret, Secret}; 2 | use serde::Deserialize; 3 | use sqlx::postgres::{PgConnectOptions, PgSslMode}; 4 | 5 | /// Connection options for a PostgreSQL database. 6 | /// 7 | /// Contains the connection parameters needed to establish a connection to a PostgreSQL 8 | /// database server, including network location, authentication credentials, and security 9 | /// settings. 10 | #[derive(Debug, Clone, Deserialize)] 11 | pub struct PgDatabaseOptions { 12 | /// Host name or IP address of the PostgreSQL server 13 | pub host: String, 14 | /// Port number that the PostgreSQL server listens on 15 | pub port: u16, 16 | /// Name of the target database 17 | pub name: String, 18 | /// Username for authentication 19 | pub username: String, 20 | /// Optional password for authentication, wrapped in [`Secret`] for secure handling 21 | pub password: Option>, 22 | /// If true, requires SSL/TLS encryption for the connection 23 | pub require_ssl: bool, 24 | } 25 | 26 | impl PgDatabaseOptions { 27 | /// Creates connection options for connecting to the PostgreSQL server without 28 | /// specifying a database. 29 | /// 30 | /// Returns [`PgConnectOptions`] configured with the host, port, username, SSL mode 31 | /// and optional password from this instance. Useful for administrative operations 32 | /// that must be performed before connecting to a specific database, like database 33 | /// creation. 34 | pub fn without_db(&self) -> PgConnectOptions { 35 | let ssl_mode = if self.require_ssl { 36 | PgSslMode::Require 37 | } else { 38 | PgSslMode::Prefer 39 | }; 40 | 41 | let options = PgConnectOptions::new_without_pgpass() 42 | .host(&self.host) 43 | .username(&self.username) 44 | .port(self.port) 45 | .ssl_mode(ssl_mode); 46 | 47 | if let Some(password) = &self.password { 48 | options.password(password.expose_secret()) 49 | } else { 50 | options 51 | } 52 | } 53 | 54 | /// Creates connection options for connecting to a specific database. 55 | /// 56 | /// Returns [`PgConnectOptions`] configured with all connection parameters including 57 | /// the database name from this instance. 58 | pub fn with_db(&self) -> PgConnectOptions { 59 | self.without_db().database(&self.name) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /postgres/src/sqlx/test_utils.rs: -------------------------------------------------------------------------------- 1 | use crate::sqlx::options::PgDatabaseOptions; 2 | use sqlx::{Connection, Executor, PgConnection, PgPool}; 3 | 4 | /// Creates a new PostgreSQL database and returns a connection pool to it. 5 | /// 6 | /// Establishes a connection to the PostgreSQL server using the provided options, 7 | /// creates a new database, and returns a [`PgPool`] connected to the new database. 8 | /// Panics if the connection fails or if database creation fails. 9 | pub async fn create_pg_database(options: &PgDatabaseOptions) -> PgPool { 10 | // Create the database via a single connection. 11 | let mut connection = PgConnection::connect_with(&options.without_db()) 12 | .await 13 | .expect("Failed to connect to Postgres"); 14 | connection 15 | .execute(&*format!(r#"create database "{}";"#, options.name)) 16 | .await 17 | .expect("Failed to create database"); 18 | 19 | // Create a connection pool to the database. 20 | PgPool::connect_with(options.with_db()) 21 | .await 22 | .expect("Failed to connect to Postgres") 23 | } 24 | 25 | /// Drops a PostgreSQL database and cleans up all connections. 26 | /// 27 | /// Connects to the PostgreSQL server, forcefully terminates all active connections 28 | /// to the target database, and drops the database if it exists. Useful for cleaning 29 | /// up test databases. Takes a reference to [`PgDatabaseOptions`] specifying the database 30 | /// to drop. Panics if any operation fails. 31 | pub async fn drop_pg_database(options: &PgDatabaseOptions) { 32 | // Connect to the default database. 33 | let mut connection = PgConnection::connect_with(&options.without_db()) 34 | .await 35 | .expect("Failed to connect to Postgres"); 36 | 37 | // Forcefully terminate any remaining connections to the database. 38 | connection 39 | .execute(&*format!( 40 | r#" 41 | select pg_terminate_backend(pg_stat_activity.pid) 42 | from pg_stat_activity 43 | where pg_stat_activity.datname = '{}' 44 | and pid <> pg_backend_pid();"#, 45 | options.name 46 | )) 47 | .await 48 | .expect("Failed to terminate database connections"); 49 | 50 | // Drop the database. 51 | connection 52 | .execute(&*format!(r#"drop database if exists "{}";"#, options.name)) 53 | .await 54 | .expect("Failed to destroy database"); 55 | } 56 | -------------------------------------------------------------------------------- /postgres/src/tokio/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod options; 2 | #[cfg(feature = "test-utils")] 3 | pub mod test_utils; 4 | -------------------------------------------------------------------------------- /postgres/src/tokio/options.rs: -------------------------------------------------------------------------------- 1 | use tokio_postgres::Config; 2 | use tokio_postgres::config::SslMode; 3 | 4 | /// Connection options for a PostgreSQL database. 5 | /// 6 | /// Contains the connection parameters needed to establish a connection to a PostgreSQL 7 | /// database server, including network location, authentication credentials, and security 8 | /// settings. 9 | #[derive(Debug, Clone)] 10 | pub struct PgDatabaseOptions { 11 | /// Host name or IP address of the PostgreSQL server 12 | pub host: String, 13 | /// Port number that the PostgreSQL server listens on 14 | pub port: u16, 15 | /// Name of the target database 16 | pub name: String, 17 | /// Username for authentication 18 | pub username: String, 19 | /// Optional password for authentication 20 | pub password: Option, 21 | /// SSL mode for the connection 22 | pub ssl_mode: SslMode, 23 | } 24 | 25 | impl PgDatabaseOptions { 26 | /// Creates connection options for connecting to the PostgreSQL server without 27 | /// specifying a database. 28 | /// 29 | /// Returns [`Config`] configured with the host, port, username, SSL mode and optional 30 | /// password from this instance. The database name is set to the username as per 31 | /// PostgreSQL convention. Useful for administrative operations that must be performed 32 | /// before connecting to a specific database, like database creation. 33 | pub fn without_db(&self) -> Config { 34 | let mut this = self.clone(); 35 | // Postgres requires a database, so we default to the database which is equal to the username 36 | // since this seems to be the standard. 37 | this.name = this.username.clone(); 38 | 39 | this.into() 40 | } 41 | 42 | /// Creates connection options for connecting to a specific database. 43 | /// 44 | /// Returns [`Config`] configured with all connection parameters including the database 45 | /// name from this instance. 46 | pub fn with_db(&self) -> Config { 47 | self.clone().into() 48 | } 49 | } 50 | 51 | impl From for Config { 52 | /// Converts [`PgDatabaseOptions`] into a [`Config`] instance. 53 | /// 54 | /// Sets all connection parameters including host, port, database name, username, 55 | /// SSL mode, and optional password. 56 | fn from(value: PgDatabaseOptions) -> Self { 57 | let mut config = Config::new(); 58 | config 59 | .host(value.host) 60 | .port(value.port) 61 | .dbname(value.name) 62 | .user(value.username) 63 | .ssl_mode(value.ssl_mode); 64 | 65 | if let Some(password) = value.password { 66 | config.password(password); 67 | } 68 | 69 | config 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /replicator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "replicator" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | etl = { workspace = true, features = ["bigquery"] } 8 | postgres = { workspace = true, features = ["tokio"] } 9 | telemetry = { workspace = true } 10 | 11 | anyhow = { workspace = true, features = ["std"] } 12 | config = { workspace = true, features = ["yaml"] } 13 | rustls = { workspace = true, features = ["aws-lc-rs", "logging"] } 14 | secrecy = { workspace = true, features = ["serde"] } 15 | serde = { workspace = true, features = ["derive"] } 16 | serde_json = { workspace = true, features = ["std"] } 17 | thiserror = { workspace = true } 18 | rustls-pemfile = { workspace = true, features = ["std"] } 19 | tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } 20 | tracing = { workspace = true, default-features = true } 21 | -------------------------------------------------------------------------------- /replicator/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:1.85.0-slim-bookworm AS builder 2 | WORKDIR /app 3 | # TODO: remove protobuf-compiler once the upstream gcp-bigquery-client remove it from its deps 4 | RUN apt update && apt install protobuf-compiler clang -y 5 | COPY . . 6 | RUN cargo build --release -p replicator 7 | 8 | FROM debian:bookworm-slim 9 | WORKDIR /app 10 | RUN apt update && apt install ca-certificates -y 11 | COPY --from=builder /app/target/release/replicator replicator 12 | COPY replicator/configuration configuration 13 | ENTRYPOINT ["./replicator"] 14 | -------------------------------------------------------------------------------- /replicator/configuration/base.yaml: -------------------------------------------------------------------------------- 1 | batch: 2 | max_size: 1000 3 | max_fill_secs: 10 4 | tls: 5 | trusted_root_certs: "" 6 | enabled: false 7 | project: "abcdefghijklmnopqrst" -------------------------------------------------------------------------------- /replicator/configuration/dev.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | postgres: 3 | host: "localhost" 4 | port: 5432 5 | name: "postgres" 6 | username: "postgres" 7 | slot_name: "replicator_slot" 8 | publication: "replicator_publication" 9 | destination: 10 | big_query: 11 | project_id: "project-id" 12 | dataset_id: "dataset-id" 13 | -------------------------------------------------------------------------------- /replicator/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{io::BufReader, time::Duration, vec}; 2 | 3 | use configuration::{ 4 | get_configuration, BatchSettings, DestinationSettings, Settings, SourceSettings, TlsSettings, 5 | }; 6 | use etl::{ 7 | pipeline::{ 8 | batching::{data_pipeline::BatchDataPipeline, BatchConfig}, 9 | destinations::bigquery::BigQueryBatchDestination, 10 | sources::postgres::{PostgresSource, TableNamesFrom}, 11 | PipelineAction, 12 | }, 13 | SslMode, 14 | }; 15 | use postgres::tokio::options::PgDatabaseOptions; 16 | use telemetry::init_tracing; 17 | use tracing::{info, instrument}; 18 | 19 | mod configuration; 20 | 21 | // APP_SOURCE__POSTGRES__PASSWORD and APP_DESTINATION__BIG_QUERY__PROJECT_ID environment variables must be set 22 | // before running because these are sensitive values which can't be configured in the config files 23 | #[tokio::main] 24 | async fn main() -> anyhow::Result<()> { 25 | let app_name = env!("CARGO_BIN_NAME"); 26 | // We pass emit_on_span_close = false to avoid emitting logs on span close 27 | // for replicator because it is not a web server and we don't need to emit logs 28 | // for every closing span. 29 | let _log_flusher = init_tracing(app_name, false)?; 30 | let settings = get_configuration()?; 31 | start_replication(settings).await 32 | } 33 | 34 | #[instrument(name = "replication", skip(settings), fields(project = settings.project))] 35 | async fn start_replication(settings: Settings) -> anyhow::Result<()> { 36 | rustls::crypto::aws_lc_rs::default_provider() 37 | .install_default() 38 | .expect("failed to install default crypto provider"); 39 | 40 | let SourceSettings::Postgres { 41 | host, 42 | port, 43 | name, 44 | username, 45 | password: _, 46 | slot_name, 47 | publication, 48 | } = &settings.source; 49 | info!( 50 | host, 51 | port, 52 | dbname = name, 53 | username, 54 | slot_name, 55 | publication, 56 | "source settings" 57 | ); 58 | 59 | let DestinationSettings::BigQuery { 60 | project_id, 61 | dataset_id, 62 | service_account_key: _, 63 | max_staleness_mins, 64 | } = &settings.destination; 65 | 66 | info!( 67 | project_id, 68 | dataset_id, max_staleness_mins, "destination settings" 69 | ); 70 | 71 | let BatchSettings { 72 | max_size, 73 | max_fill_secs, 74 | } = &settings.batch; 75 | info!(max_size, max_fill_secs, "batch settings"); 76 | 77 | let TlsSettings { 78 | trusted_root_certs: _, 79 | enabled, 80 | } = &settings.tls; 81 | info!(tls_enabled = enabled, "tls settings"); 82 | 83 | settings.tls.validate()?; 84 | 85 | let SourceSettings::Postgres { 86 | host, 87 | port, 88 | name, 89 | username, 90 | password, 91 | slot_name, 92 | publication, 93 | } = settings.source; 94 | 95 | let TlsSettings { 96 | trusted_root_certs, 97 | enabled, 98 | } = settings.tls; 99 | 100 | let mut trusted_root_certs_vec = vec![]; 101 | let ssl_mode = if enabled { 102 | let mut root_certs_reader = BufReader::new(trusted_root_certs.as_bytes()); 103 | for cert in rustls_pemfile::certs(&mut root_certs_reader) { 104 | let cert = cert?; 105 | trusted_root_certs_vec.push(cert); 106 | } 107 | 108 | SslMode::VerifyFull 109 | } else { 110 | SslMode::Disable 111 | }; 112 | 113 | let options = PgDatabaseOptions { 114 | host, 115 | port, 116 | name, 117 | username, 118 | password, 119 | ssl_mode, 120 | }; 121 | 122 | let postgres_source = PostgresSource::new( 123 | options, 124 | trusted_root_certs_vec, 125 | Some(slot_name), 126 | TableNamesFrom::Publication(publication), 127 | ) 128 | .await?; 129 | 130 | let DestinationSettings::BigQuery { 131 | project_id, 132 | dataset_id, 133 | service_account_key, 134 | max_staleness_mins, 135 | } = settings.destination; 136 | 137 | let bigquery_destination = BigQueryBatchDestination::new_with_key( 138 | project_id, 139 | dataset_id, 140 | &service_account_key, 141 | max_staleness_mins.unwrap_or(5), 142 | ) 143 | .await?; 144 | 145 | let BatchSettings { 146 | max_size, 147 | max_fill_secs, 148 | } = settings.batch; 149 | 150 | let batch_config = BatchConfig::new(max_size, Duration::from_secs(max_fill_secs)); 151 | let mut pipeline = BatchDataPipeline::new( 152 | postgres_source, 153 | bigquery_destination, 154 | PipelineAction::Both, 155 | batch_config, 156 | ); 157 | 158 | pipeline.start().await?; 159 | 160 | Ok(()) 161 | } 162 | -------------------------------------------------------------------------------- /scripts/init_db.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | if [ ! -d "api/migrations" ]; then 5 | echo >&2 "❌ Error: 'api/migrations' folder not found." 6 | echo >&2 "Please run this script from the 'etl' directory." 7 | exit 1 8 | fi 9 | 10 | if ! [ -x "$(command -v psql)" ]; then 11 | echo >&2 "❌ Error: PostgreSQL client (psql) is not installed." 12 | echo >&2 "Please install it using your system's package manager." 13 | exit 1 14 | fi 15 | 16 | if ! [ -x "$(command -v sqlx)" ]; then 17 | echo >&2 "❌ Error: SQLx CLI is not installed." 18 | echo >&2 "To install it, run:" 19 | echo >&2 " cargo install --version='~0.7' sqlx-cli --no-default-features --features rustls,postgres" 20 | exit 1 21 | fi 22 | 23 | # Database configuration (should be the same as '/configuration/dev.yaml') 24 | echo "🔧 Configuring database settings..." 25 | DB_USER="${POSTGRES_USER:=postgres}" 26 | DB_PASSWORD="${POSTGRES_PASSWORD:=postgres}" 27 | DB_NAME="${POSTGRES_DB:=postgres}" 28 | DB_PORT="${POSTGRES_PORT:=5430}" 29 | DB_HOST="${POSTGRES_HOST:=localhost}" 30 | 31 | # Docker container setup 32 | if [[ -z "${SKIP_DOCKER}" ]] 33 | then 34 | echo "🐳 Checking Docker container status..." 35 | RUNNING_POSTGRES_CONTAINER=$(docker ps --filter 'name=postgres' --format '{{.ID}}') 36 | if [[ -n $RUNNING_POSTGRES_CONTAINER ]]; then 37 | echo "✅ PostgreSQL container is already running" 38 | else 39 | echo "🚀 Starting new PostgreSQL container..." 40 | 41 | # Prepare docker run command 42 | DOCKER_RUN_CMD="docker run \ 43 | -e POSTGRES_USER=${DB_USER} \ 44 | -e POSTGRES_PASSWORD=${DB_PASSWORD} \ 45 | -e POSTGRES_DB=${DB_NAME} \ 46 | -p "${DB_PORT}":5432 \ 47 | -d" 48 | 49 | # Handle persistent storage 50 | if [[ -n "${POSTGRES_DATA_VOLUME}" ]]; then 51 | echo "📁 Setting up persistent storage at ${POSTGRES_DATA_VOLUME}" 52 | mkdir -p "${POSTGRES_DATA_VOLUME}" 53 | DOCKER_RUN_CMD="${DOCKER_RUN_CMD} \ 54 | -v "${POSTGRES_DATA_VOLUME}":/var/lib/postgresql/data" 55 | else 56 | echo "📁 No storage path specified, using default Docker volume" 57 | fi 58 | 59 | # Complete the docker run command 60 | DOCKER_RUN_CMD="${DOCKER_RUN_CMD} \ 61 | --name "postgres_$(date '+%s')" \ 62 | postgres:15 -N 1000 \ 63 | -c wal_level=logical" 64 | # Increased maximum number of connections for testing purposes 65 | 66 | # Start the container 67 | eval "${DOCKER_RUN_CMD}" 68 | echo "✅ PostgreSQL container started" 69 | fi 70 | fi 71 | 72 | # Wait for PostgreSQL to be ready 73 | echo "⏳ Waiting for PostgreSQL to be ready..." 74 | until PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -U "${DB_USER}" -p "${DB_PORT}" -d "postgres" -c '\q'; do 75 | echo "⏳ PostgreSQL is still starting up... waiting" 76 | sleep 1 77 | done 78 | 79 | echo "✅ PostgreSQL is up and running on port ${DB_PORT}" 80 | 81 | # Set up the database 82 | echo "🔄 Setting up the database..." 83 | export DATABASE_URL=postgres://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME} 84 | sqlx database create 85 | sqlx migrate run --source api/migrations 86 | 87 | echo "✨ Database setup complete! Ready to go!" 88 | -------------------------------------------------------------------------------- /telemetry/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "telemetry" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | thiserror = { workspace = true } 8 | tracing = { workspace = true, default-features = true } 9 | tracing-appender = { workspace = true } 10 | tracing-log = { workspace = true, features = ["std", "log-tracer"] } 11 | tracing-subscriber = { workspace = true, default-features = true, features = [ 12 | "json", 13 | "env-filter", 14 | "ansi", 15 | ] } 16 | --------------------------------------------------------------------------------