├── .github ├── ISSUE_TEMPLATE.md └── workflows │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── config.yml ├── exe └── pgsync ├── lib ├── pgsync.rb └── pgsync │ ├── client.rb │ ├── data_source.rb │ ├── init.rb │ ├── schema_sync.rb │ ├── sequence.rb │ ├── sync.rb │ ├── table.rb │ ├── table_sync.rb │ ├── task.rb │ ├── task_resolver.rb │ ├── utils.rb │ └── version.rb ├── pgsync.gemspec └── test ├── commands_test.rb ├── data_rules_test.rb ├── data_source_test.rb ├── in_batches_test.rb ├── init_test.rb ├── schema_test.rb ├── support ├── bad.yml ├── config.yml ├── schema1.sql ├── schema2.sql └── schema3.sql ├── sync_test.rb ├── tables_test.rb ├── test_helper.rb └── variable_test.rb /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Hi, 2 | 3 | Please use this script to show what you're trying to do when possible. Thanks! 4 | 5 | ```sh 6 | createdb pgsync_from 7 | createdb pgsync_to 8 | 9 | psql pgsync_from << SQL 10 | CREATE TABLE posts ( 11 | id BIGINT PRIMARY KEY, 12 | name TEXT 13 | ); 14 | INSERT INTO posts VALUES (1, 'hello'), (2, 'world'); 15 | SQL 16 | 17 | psql pgsync_to << SQL 18 | CREATE TABLE posts ( 19 | id BIGINT PRIMARY KEY, 20 | name TEXT 21 | ); 22 | SQL 23 | 24 | pgsync --from pgsync_from --to pgsync_to 25 | ``` 26 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | fail-fast: false 8 | matrix: 9 | include: 10 | - ruby: 3.4 11 | postgres: 17 12 | - ruby: 2.7 13 | postgres: 9.6 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: ruby/setup-ruby@v1 17 | with: 18 | ruby-version: ${{ matrix.ruby }} 19 | bundler-cache: true 20 | - uses: ankane/setup-postgres@v1 21 | with: 22 | postgres-version: ${{ matrix.postgres }} 23 | - run: | 24 | createdb pgsync_test1 25 | createdb pgsync_test2 26 | createdb pgsync_test3 27 | - run: bundle exec rake test 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | /.pgsync.yml 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.8.0 (2024-07-10) 2 | 3 | - Added Docker image for `linux/arm64` 4 | - Fixed warning with Ruby 3.3 5 | - Dropped support for Ruby < 2.7 6 | 7 | ## 0.7.4 (2023-03-06) 8 | 9 | - Fixed issue with slop 4.10.0 10 | 11 | ## 0.7.3 (2022-11-09) 12 | 13 | - Fixed issue with pg 1.4.4 14 | - Fixed output when `pg_restore` not found 15 | 16 | ## 0.7.2 (2022-09-19) 17 | 18 | - Improved error message when a primary key is required 19 | - Switched to monotonic time 20 | - Fixed schema sync with Homebrew Postgres 14.5 21 | 22 | ## 0.7.1 (2022-07-06) 23 | 24 | - Fixed random letter data rule generating non-letter 25 | 26 | ## 0.7.0 (2022-03-10) 27 | 28 | - Changed `--defer-constraints` to `--defer-constraints-v1` 29 | - Changed `--defer-constraints-v2` to `--defer-constraints` 30 | - Fixed unknown alias error with Ruby 3.1 31 | - Dropped support for Ruby < 2.5 32 | 33 | ## 0.6.8 (2021-09-21) 34 | 35 | - Fixed error when schema missing in destination with `--schema-first` and `--schema-only` 36 | 37 | ## 0.6.7 (2021-04-26) 38 | 39 | - Fixed connection security for `--schema-first` and `--schema-only` - [more info](https://github.com/ankane/pgsync/issues/121) 40 | 41 | ## 0.6.6 (2020-10-29) 42 | 43 | - Added support for tables with generated columns 44 | 45 | ## 0.6.5 (2020-07-10) 46 | 47 | - Improved help 48 | 49 | ## 0.6.4 (2020-06-10) 50 | 51 | - Log SQL with `--debug` option 52 | - Improved sequence queries 53 | 54 | ## 0.6.3 (2020-06-09) 55 | 56 | - Added `--defer-constraints-v2` option 57 | - Ensure consistent source snapshot with `--disable-integrity` 58 | 59 | ## 0.6.2 (2020-06-09) 60 | 61 | - Added support for `--disable-integrity` on Amazon RDS 62 | - Fixed error when excluded table not found in source 63 | 64 | ## 0.6.1 (2020-06-07) 65 | 66 | - Added Django and Laravel integrations 67 | 68 | ## 0.6.0 (2020-06-07) 69 | 70 | - Added messages for different column types and non-deferrable constraints 71 | - Added support for wildcards to `--exclude` 72 | - Improved `--overwrite` and `--preserve` options for foreign keys 73 | - Improved output for schema sync 74 | - Fixed `--overwrite` and `--preserve` options for multicolumn primary keys 75 | - Fixed output for notices 76 | 77 | Breaking 78 | 79 | - Syncs shared tables instead of raising an error when tables missing in destination 80 | - Raise an error when `--config` or `--db` option provided and config not found 81 | - Removed deprecated options 82 | - Dropped support for Postgres < 9.5 83 | 84 | ## 0.5.5 (2020-05-13) 85 | 86 | - Added `--jobs` option 87 | - Added `--defer-constraints` option 88 | - Added `--disable-user-triggers` option 89 | - Added `--disable-integrity` option 90 | - Improved error message for older libpq 91 | 92 | ## 0.5.4 (2020-05-09) 93 | 94 | - Fixed output for `--in-batches` 95 | 96 | ## 0.5.3 (2020-04-03) 97 | 98 | - Improved Postgres error messages 99 | - Fixed behavior of wildcard without schema 100 | 101 | ## 0.5.2 (2020-03-27) 102 | 103 | - Added `--fail-fast` option 104 | - Automatically exclude tables when `--init` run inside Rails app 105 | - Improved error message 106 | - Fixed typo in error message 107 | 108 | ## 0.5.1 (2020-03-26) 109 | 110 | - Fixed Slop warning with Ruby 2.7 111 | 112 | ## 0.5.0 (2020-03-26) 113 | 114 | - Improved output when syncing 115 | - Improved output on interrupt 116 | - Added `--no-sequences` option 117 | 118 | ## 0.4.3 (2019-10-27) 119 | 120 | - Added `sslmode` to template 121 | 122 | ## 0.4.2 (2019-10-27) 123 | 124 | - Improved flexibility of commands 125 | - Sync all objects when no tables specified 126 | 127 | ## 0.4.1 (2018-12-15) 128 | 129 | - Made `psql` version check more robust 130 | - Fixed issue with non-lowercase primary key 131 | - Prefer `--init` over `--setup` 132 | - Improved data rules 133 | 134 | ## 0.4.0 (2018-02-28) 135 | 136 | - Sync all schemas in search path by default 137 | - Added support for socket connections 138 | - Added support for environment variables 139 | 140 | ## 0.3.9 (2018-02-27) 141 | 142 | - Better support for schemas 143 | - Added `--schemas` option 144 | - Added `--all-schemas` option 145 | - Added `--schema-first` option 146 | - Fixed issue with non-lowercase tables and partial syncs 147 | 148 | ## 0.3.8 (2017-10-01) 149 | 150 | - Added Windows support 151 | - Added `random_string` and `random_number` replacement options 152 | - Improved performance of `--in-batches` for large tables 153 | 154 | ## 0.3.7 (2017-08-30) 155 | 156 | - Fixed non-lowercase tables and columns 157 | - Fixed `--truncate` option with `--in-batches` 158 | 159 | ## 0.3.6 (2016-10-02) 160 | 161 | - Fixed `Table does not exist in source` error 162 | 163 | ## 0.3.5 (2016-07-23) 164 | 165 | - Support schemas other than public 166 | 167 | ## 0.3.4 (2016-04-29) 168 | 169 | - Added `--in-batches` mode for production transfers with `--batch-size` and `--sleep` 170 | 171 | ## 0.3.3 (2016-04-25) 172 | 173 | - Added `-d` option as an alias for `--db` 174 | - Added support for wildcard tables 175 | - Fixed `--schema-only` errors 176 | 177 | ## 0.3.2 (2016-04-19) 178 | 179 | - Prefer `{1}` for interpolation 180 | - Added `--overwrite` option 181 | - Deprecated `--where` and `--limit` 182 | 183 | ## 0.3.1 (2016-04-06) 184 | 185 | - Added `-t` or `--tables`, `-g` or `--groups` options 186 | - Deprecated `tables`, `groups`, and `setup` commands 187 | 188 | ## 0.3.0 (2016-04-06) 189 | 190 | - More powerful groups 191 | - Overwrite rows by default when `WHERE` clause (previously truncated) 192 | - Added `pgsync users "WHERE id = 1` 193 | - Added `pgsync group1`, shorthand for `pgsync groups group1` 194 | - Added `--schema-only` option 195 | - Added `--no-rules` option 196 | - Added `--setup` option 197 | - Added `--truncate` option 198 | 199 | ## 0.2.4 (2016-04-04) 200 | 201 | - Added `--preserve` option 202 | - Added `--list` option for groups and tables 203 | - Added `--limit` option 204 | 205 | ## 0.2.3 (2016-03-30) 206 | 207 | - Fixed `no PostgreSQL user name specified in startup packet` 208 | 209 | ## 0.2.2 (2016-03-28) 210 | 211 | - Added `--debug` option 212 | 213 | ## 0.2.1 (2016-03-27) 214 | 215 | - Do not require config file 216 | 217 | ## 0.2.0 (2016-03-26) 218 | 219 | - Fixed idle transaction timeout errors - respects `statement_timeout` as a result 220 | - Raise error when command exits with non-zero status 221 | 222 | ## 0.1.1 (2016-03-23) 223 | 224 | - Better support for multiple databases 225 | - Search parent directories for config file 226 | 227 | ## 0.1.0 (2015-12-07) 228 | 229 | - First release 230 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3-alpine 2 | 3 | LABEL org.opencontainers.image.authors="Andrew Kane " 4 | 5 | RUN apk add --update build-base libpq-dev postgresql-client && \ 6 | gem install pgsync && \ 7 | apk del build-base && \ 8 | rm -rf /var/cache/apk/* 9 | 10 | ENTRYPOINT ["pgsync"] 11 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "minitest", ">= 5" 6 | gem "rake" 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2023 Andrew Kane 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pgsync 2 | 3 | Sync data from one Postgres database to another (like `pg_dump`/`pg_restore`). Designed for: 4 | 5 | - **speed** - tables are transferred in parallel 6 | - **security** - built-in methods to prevent sensitive data from ever leaving the server 7 | - **flexibility** - gracefully handles schema differences, like missing columns and extra columns 8 | - **convenience** - sync partial tables, groups of tables, and related records 9 | 10 | :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource) 11 | 12 | [![Build Status](https://github.com/ankane/pgsync/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/pgsync/actions) 13 | 14 | ## Installation 15 | 16 | pgsync is a command line tool. To install, run: 17 | 18 | ```sh 19 | gem install pgsync 20 | ``` 21 | 22 | This will give you the `pgsync` command. If installation fails, you may need to install [dependencies](#dependencies). 23 | 24 | You can also install it with Homebrew: 25 | 26 | ```sh 27 | brew install pgsync 28 | ``` 29 | 30 | ## Setup 31 | 32 | In your project directory, run: 33 | 34 | ```sh 35 | pgsync --init 36 | ``` 37 | 38 | This creates `.pgsync.yml` for you to customize. We recommend checking this into your version control (assuming it doesn’t contain sensitive information). `pgsync` commands can be run from this directory or any subdirectory. 39 | 40 | ## How to Use 41 | 42 | First, make sure your schema is set up in both databases. We recommend using a schema migration tool for this, but pgsync also provides a few [convenience methods](#schema). Once that’s done, you’re ready to sync data. 43 | 44 | Sync tables 45 | 46 | ```sh 47 | pgsync 48 | ``` 49 | 50 | Sync specific tables 51 | 52 | ```sh 53 | pgsync table1,table2 54 | ``` 55 | 56 | Works with wildcards as well 57 | 58 | ```sh 59 | pgsync "table*" 60 | ``` 61 | 62 | Sync specific rows (existing rows are overwritten) 63 | 64 | ```sh 65 | pgsync products "where store_id = 1" 66 | ``` 67 | 68 | You can also preserve existing rows 69 | 70 | ```sh 71 | pgsync products "where store_id = 1" --preserve 72 | ``` 73 | 74 | Or truncate them 75 | 76 | ```sh 77 | pgsync products "where store_id = 1" --truncate 78 | ``` 79 | 80 | ## Tables 81 | 82 | Exclude specific tables 83 | 84 | ```sh 85 | pgsync --exclude table1,table2 86 | ``` 87 | 88 | Add to `.pgsync.yml` to exclude by default 89 | 90 | ```yml 91 | exclude: 92 | - table1 93 | - table2 94 | ``` 95 | 96 | Sync tables from all schemas or specific schemas (by default, only the search path is synced) 97 | 98 | ```sh 99 | pgsync --all-schemas 100 | # or 101 | pgsync --schemas public,other 102 | # or 103 | pgsync public.table1,other.table2 104 | ``` 105 | 106 | ## Groups 107 | 108 | Define groups in `.pgsync.yml`: 109 | 110 | ```yml 111 | groups: 112 | group1: 113 | - table1 114 | - table2 115 | ``` 116 | 117 | And run: 118 | 119 | ```sh 120 | pgsync group1 121 | ``` 122 | 123 | ## Variables 124 | 125 | You can also use groups to sync a specific record and associated records in other tables. 126 | 127 | To get product `123` with its reviews, last 10 coupons, and store, use: 128 | 129 | ```yml 130 | groups: 131 | product: 132 | products: "where id = {1}" 133 | reviews: "where product_id = {1}" 134 | coupons: "where product_id = {1} order by created_at desc limit 10" 135 | stores: "where id in (select store_id from products where id = {1})" 136 | ``` 137 | 138 | And run: 139 | 140 | ```sh 141 | pgsync product:123 142 | ``` 143 | 144 | ## Schema 145 | 146 | Sync the schema before the data (this wipes out existing data) 147 | 148 | ```sh 149 | pgsync --schema-first 150 | ``` 151 | 152 | Specify tables 153 | 154 | ```sh 155 | pgsync table1,table2 --schema-first 156 | ``` 157 | 158 | Sync the schema without data (this wipes out existing data) 159 | 160 | ```sh 161 | pgsync --schema-only 162 | ``` 163 | 164 | pgsync does not try to sync Postgres extensions. 165 | 166 | ## Sensitive Data 167 | 168 | Prevent sensitive data like email addresses from leaving the remote server. 169 | 170 | Define rules in `.pgsync.yml`: 171 | 172 | ```yml 173 | data_rules: 174 | email: unique_email 175 | last_name: random_letter 176 | birthday: random_date 177 | users.auth_token: 178 | value: secret 179 | visits_count: 180 | statement: "(RANDOM() * 10)::int" 181 | encrypted_*: null 182 | ``` 183 | 184 | `last_name` matches all columns named `last_name` and `users.last_name` matches only the users table. Wildcards are supported, and the first matching rule is applied. 185 | 186 | Options for replacement are: 187 | 188 | - `unique_email` 189 | - `unique_phone` 190 | - `unique_secret` 191 | - `random_letter` 192 | - `random_int` 193 | - `random_date` 194 | - `random_time` 195 | - `random_ip` 196 | - `value` 197 | - `statement` 198 | - `null` 199 | - `untouched` 200 | 201 | Rules starting with `unique_` require the table to have a single column primary key. `unique_phone` requires a numeric primary key. 202 | 203 | ## Foreign Keys 204 | 205 | Foreign keys can make it difficult to sync data. Three options are: 206 | 207 | 1. Defer constraints (recommended) 208 | 2. Manually specify the order of tables 209 | 3. Disable foreign key triggers, which can silently break referential integrity (not recommended) 210 | 211 | To defer constraints, use: 212 | 213 | ```sh 214 | pgsync --defer-constraints 215 | ``` 216 | 217 | To manually specify the order of tables, use `--jobs 1` so tables are synced one-at-a-time. 218 | 219 | ```sh 220 | pgsync table1,table2,table3 --jobs 1 221 | ``` 222 | 223 | To disable foreign key triggers and potentially break referential integrity, use: 224 | 225 | ```sh 226 | pgsync --disable-integrity 227 | ``` 228 | 229 | This requires superuser privileges on the `to` database. If syncing to (not from) Amazon RDS, use the `rds_superuser` role. If syncing to (not from) Heroku, there doesn’t appear to be a way to disable integrity. 230 | 231 | ## Triggers 232 | 233 | Disable user triggers with: 234 | 235 | ```sh 236 | pgsync --disable-user-triggers 237 | ``` 238 | 239 | ## Sequences 240 | 241 | Skip syncing sequences with: 242 | 243 | ```sh 244 | pgsync --no-sequences 245 | ``` 246 | 247 | ## Append-Only Tables 248 | 249 | For extremely large, append-only tables, sync in batches. 250 | 251 | ```sh 252 | pgsync large_table --in-batches 253 | ``` 254 | 255 | Note: This requires the table to have a numeric, increasing primary key 256 | 257 | The script will resume where it left off when run again, making it great for backfills. 258 | 259 | ## Connection Security 260 | 261 | Always make sure your [connection is secure](https://ankane.org/postgres-sslmode-explained) when connecting to a database over a network you don’t fully trust. Your best option is to connect over SSH or a VPN. Another option is to use `sslmode=verify-full`. If you don’t do this, your database credentials can be compromised. 262 | 263 | ## Safety 264 | 265 | To keep you from accidentally overwriting production, the destination is limited to `localhost` or `127.0.0.1` by default. 266 | 267 | To use another host, add `to_safe: true` to your `.pgsync.yml`. 268 | 269 | ## Multiple Databases 270 | 271 | To use with multiple databases, run: 272 | 273 | ```sh 274 | pgsync --init db2 275 | ``` 276 | 277 | This creates `.pgsync-db2.yml` for you to edit. Specify a database in commands with: 278 | 279 | ```sh 280 | pgsync --db db2 281 | ``` 282 | 283 | ## Integrations 284 | 285 | - [Django](#django) 286 | - [Heroku](#heroku) 287 | - [Laravel](#laravel) 288 | - [Rails](#rails) 289 | 290 | ### Django 291 | 292 | If you run `pgsync --init` in a Django project, migrations will be excluded in `.pgsync.yml`. 293 | 294 | ```yml 295 | exclude: 296 | - django_migrations 297 | ``` 298 | 299 | ### Heroku 300 | 301 | If you run `pgsync --init` in a Heroku project, the `from` database will be set in `.pgsync.yml`. 302 | 303 | ```yml 304 | from: $(heroku config:get DATABASE_URL)?sslmode=require 305 | ``` 306 | 307 | ### Laravel 308 | 309 | If you run `pgsync --init` in a Laravel project, migrations will be excluded in `.pgsync.yml`. 310 | 311 | ```yml 312 | exclude: 313 | - migrations 314 | ``` 315 | 316 | ### Rails 317 | 318 | If you run `pgsync --init` in a Rails project, Active Record metadata and schema migrations will be excluded in `.pgsync.yml`. 319 | 320 | ```yml 321 | exclude: 322 | - ar_internal_metadata 323 | - schema_migrations 324 | ``` 325 | 326 | ## Debugging 327 | 328 | To view the SQL that’s run, use: 329 | 330 | ```sh 331 | pgsync --debug 332 | ``` 333 | 334 | ## Other Commands 335 | 336 | Help 337 | 338 | ```sh 339 | pgsync --help 340 | ``` 341 | 342 | Version 343 | 344 | ```sh 345 | pgsync --version 346 | ``` 347 | 348 | List tables 349 | 350 | ```sh 351 | pgsync --list 352 | ``` 353 | 354 | ## Scripts 355 | 356 | Use groups when possible to take advantage of parallelism. 357 | 358 | For Ruby scripts, you may need to do: 359 | 360 | ```rb 361 | Bundler.with_unbundled_env do 362 | system "pgsync ..." 363 | end 364 | ``` 365 | 366 | ## Docker 367 | 368 | Get the [Docker image](https://hub.docker.com/r/ankane/pgsync) with: 369 | 370 | ```sh 371 | docker pull ankane/pgsync 372 | alias pgsync="docker run -ti ankane/pgsync" 373 | ``` 374 | 375 | This will give you the `pgsync` command. 376 | 377 | ## Dependencies 378 | 379 | If installation fails, your system may be missing Ruby or libpq. 380 | 381 | On Mac, run: 382 | 383 | ```sh 384 | brew install libpq 385 | ``` 386 | 387 | On Ubuntu, run: 388 | 389 | ```sh 390 | sudo apt-get install ruby-dev libpq-dev build-essential 391 | ``` 392 | 393 | ## Upgrading 394 | 395 | Run: 396 | 397 | ```sh 398 | gem install pgsync 399 | ``` 400 | 401 | To use master, run: 402 | 403 | ```sh 404 | gem install specific_install 405 | gem specific_install https://github.com/ankane/pgsync.git 406 | ``` 407 | 408 | With Homebrew, run: 409 | 410 | ```sh 411 | brew upgrade pgsync 412 | ``` 413 | 414 | With Docker, run: 415 | 416 | ```sh 417 | docker pull ankane/pgsync 418 | ``` 419 | 420 | ## Related Projects 421 | 422 | Also check out: 423 | 424 | - [Dexter](https://github.com/ankane/dexter) - The automatic indexer for Postgres 425 | - [PgHero](https://github.com/ankane/pghero) - A performance dashboard for Postgres 426 | - [pgslice](https://github.com/ankane/pgslice) - Postgres partitioning as easy as pie 427 | 428 | ## Thanks 429 | 430 | Inspired by [heroku-pg-transfer](https://github.com/ddollar/heroku-pg-transfer). 431 | 432 | ## History 433 | 434 | View the [changelog](https://github.com/ankane/pgsync/blob/master/CHANGELOG.md) 435 | 436 | ## Contributing 437 | 438 | Everyone is encouraged to help improve this project. Here are a few ways you can help: 439 | 440 | - [Report bugs](https://github.com/ankane/pgsync/issues) 441 | - Fix bugs and [submit pull requests](https://github.com/ankane/pgsync/pulls) 442 | - Write, clarify, or fix documentation 443 | - Suggest or add new features 444 | 445 | To get started with development: 446 | 447 | ```sh 448 | git clone https://github.com/ankane/pgsync.git 449 | cd pgsync 450 | bundle install 451 | 452 | createdb pgsync_test1 453 | createdb pgsync_test2 454 | createdb pgsync_test3 455 | 456 | bundle exec rake test 457 | ``` 458 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | task default: :test 5 | Rake::TestTask.new do |t| 6 | t.libs << "test" 7 | t.pattern = "test/**/*_test.rb" 8 | end 9 | 10 | namespace :docker do 11 | task :build do 12 | require_relative "lib/pgsync/version" 13 | 14 | system "docker build --pull --no-cache -t ankane/pgsync:latest -t ankane/pgsync:v#{PgSync::VERSION} .", exception: true 15 | end 16 | 17 | task :release do 18 | require_relative "lib/pgsync/version" 19 | 20 | system "docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 -t ankane/pgsync:latest -t ankane/pgsync:v#{PgSync::VERSION} .", exception: true 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | # source database URL 2 | # database URLs take the format of: 3 | # postgres://user:password@host:port/dbname 4 | # 5 | # we recommend a command which outputs a database URL 6 | # so sensitive information is not included in this file 7 | # 8 | # we *highly recommend* you use sslmode=verify-full when possible 9 | # see https://ankane.org/postgres-sslmode-explained for more info 10 | from: $(some_command)?sslmode=require 11 | 12 | # destination database URL 13 | to: postgres://localhost:5432/myapp_development 14 | 15 | # exclude tables 16 | %{exclude} 17 | # define groups 18 | # groups: 19 | # group1: 20 | # - table1 21 | # - table2 22 | 23 | # sync specific schemas 24 | # schemas: 25 | # - public 26 | 27 | # protect sensitive information 28 | data_rules: 29 | email: unique_email 30 | phone: unique_phone 31 | last_name: random_letter 32 | birthday: random_date 33 | encrypted_*: null 34 | -------------------------------------------------------------------------------- /exe/pgsync: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | # handle interrupts 4 | trap("SIGINT") { abort } 5 | 6 | require "pgsync" 7 | PgSync::Client.start 8 | -------------------------------------------------------------------------------- /lib/pgsync.rb: -------------------------------------------------------------------------------- 1 | # dependencies 2 | require "parallel" 3 | require "pg" 4 | require "slop" 5 | require "tty-spinner" 6 | 7 | # stdlib 8 | require "open3" 9 | require "set" 10 | require "shellwords" 11 | require "tempfile" 12 | require "uri" 13 | require "yaml" 14 | 15 | # modules 16 | require_relative "pgsync/utils" 17 | require_relative "pgsync/client" 18 | require_relative "pgsync/data_source" 19 | require_relative "pgsync/init" 20 | require_relative "pgsync/schema_sync" 21 | require_relative "pgsync/sequence" 22 | require_relative "pgsync/sync" 23 | require_relative "pgsync/table" 24 | require_relative "pgsync/table_sync" 25 | require_relative "pgsync/task" 26 | require_relative "pgsync/task_resolver" 27 | require_relative "pgsync/version" 28 | 29 | module PgSync 30 | class Error < StandardError; end 31 | end 32 | -------------------------------------------------------------------------------- /lib/pgsync/client.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class Client 3 | include Utils 4 | 5 | def initialize(args) 6 | @args = args 7 | output.sync = true 8 | end 9 | 10 | def perform 11 | result = Slop::Parser.new(slop_options).parse(@args) 12 | arguments = result.arguments 13 | options = result.to_h 14 | options[:defer_constraints_v2] ||= options[:defer_constraints] 15 | 16 | raise Error, "Specify either --db or --config, not both" if options[:db] && options[:config] 17 | raise Error, "Cannot use --overwrite with --in-batches" if options[:overwrite] && options[:in_batches] 18 | 19 | if options[:version] 20 | log VERSION 21 | elsif options[:help] 22 | log slop_options 23 | elsif options[:init] 24 | Init.new(arguments, options).perform 25 | else 26 | Sync.new(arguments, options).perform 27 | end 28 | rescue => e 29 | # Error, PG::ConnectionBad, Slop::Error 30 | raise e if options && options[:debug] 31 | abort colorize(e.message.strip, :red) 32 | end 33 | 34 | def self.start 35 | new(ARGV).perform 36 | end 37 | 38 | protected 39 | 40 | def slop_options 41 | o = Slop::Options.new 42 | o.banner = %{Usage: 43 | pgsync [tables,groups] [sql] [options]} 44 | 45 | # not shown 46 | o.string "-t", "--tables", "tables to sync", help: false 47 | o.string "-g", "--groups", "groups to sync", help: false 48 | 49 | o.separator "" 50 | o.separator "Table options:" 51 | o.string "--exclude", "tables to exclude" 52 | o.string "--schemas", "schemas to sync" 53 | o.boolean "--all-schemas", "sync all schemas", default: false 54 | 55 | o.separator "" 56 | o.separator "Row options:" 57 | o.boolean "--overwrite", "overwrite existing rows", default: false 58 | o.boolean "--preserve", "preserve existing rows", default: false 59 | o.boolean "--truncate", "truncate existing rows", default: false 60 | 61 | o.separator "" 62 | o.separator "Foreign key options:" 63 | o.boolean "--defer-constraints", "defer constraints", default: false 64 | o.boolean "--disable-integrity", "disable foreign key triggers", default: false 65 | o.integer "-j", "--jobs", "number of tables to sync at a time" 66 | 67 | # legacy 68 | o.boolean "--defer-constraints-v1", "defer constraints", default: false, help: false 69 | o.boolean "--defer-constraints-v2", "defer constraints", default: false, help: false 70 | # private, for testing 71 | o.boolean "--disable-integrity-v2", "disable foreign key triggers", default: false, help: false 72 | 73 | o.separator "" 74 | o.separator "Schema options:" 75 | o.boolean "--schema-first", "sync schema first", default: false 76 | o.boolean "--schema-only", "sync schema only", default: false 77 | 78 | o.separator "" 79 | o.separator "Config options:" 80 | # technically, defaults to searching path for .pgsync.yml, but this is simpler 81 | o.string "--config", "config file (defaults to .pgsync.yml)" 82 | o.string "-d", "--db", "database-specific config file" 83 | 84 | o.separator "" 85 | o.separator "Connection options:" 86 | o.string "--from", "source database URL" 87 | o.string "--to", "destination database URL" 88 | o.boolean "--to-safe", "confirms destination is safe (when not localhost)", default: false 89 | 90 | o.separator "" 91 | o.separator "Other options:" 92 | o.boolean "--debug", "show SQL statements", default: false 93 | o.boolean "--disable-user-triggers", "disable non-system triggers", default: false 94 | o.boolean "--fail-fast", "stop on the first failed table", default: false 95 | o.boolean "--no-rules", "don't apply data rules", default: false 96 | o.boolean "--no-sequences", "don't sync sequences", default: false 97 | 98 | # not shown in help 99 | # o.separator "" 100 | # o.separator "Append-only table options:" 101 | o.boolean "--in-batches", "sync in batches", default: false, help: false 102 | o.integer "--batch-size", "batch size", default: 10000, help: false 103 | o.float "--sleep", "time to sleep between batches", default: 0, help: false 104 | 105 | o.separator "" 106 | o.separator "Other commands:" 107 | o.boolean "--init", "create config file", default: false 108 | o.boolean "--list", "list tables", default: false 109 | o.boolean "-h", "--help", "print help" 110 | o.boolean "-v", "--version", "print version" 111 | 112 | o 113 | end 114 | end 115 | end 116 | -------------------------------------------------------------------------------- /lib/pgsync/data_source.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class DataSource 3 | include Utils 4 | 5 | attr_reader :url 6 | 7 | def initialize(url, name:, debug:) 8 | @url = url 9 | @name = name 10 | @debug = debug 11 | end 12 | 13 | def exists? 14 | @url && @url.size > 0 15 | end 16 | 17 | def local? 18 | !host || %w(localhost 127.0.0.1).include?(host) 19 | end 20 | 21 | def host 22 | @host ||= dedup_localhost(conninfo[:host]) 23 | end 24 | 25 | def port 26 | @port ||= dedup_localhost(conninfo[:port]) 27 | end 28 | 29 | def dbname 30 | @dbname ||= conninfo[:dbname] 31 | end 32 | 33 | # gets visible tables 34 | def tables 35 | @tables ||= begin 36 | query = <<~SQL 37 | SELECT 38 | table_schema AS schema, 39 | table_name AS table 40 | FROM 41 | information_schema.tables 42 | WHERE 43 | table_type = 'BASE TABLE' AND 44 | table_schema NOT IN ('information_schema', 'pg_catalog') 45 | ORDER BY 1, 2 46 | SQL 47 | execute(query).map { |row| Table.new(row["schema"], row["table"]) } 48 | end 49 | end 50 | 51 | def table_exists?(table) 52 | table_set.include?(table) 53 | end 54 | 55 | def max_id(table, primary_key, sql_clause = nil) 56 | execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["max"].to_i 57 | end 58 | 59 | def min_id(table, primary_key, sql_clause = nil) 60 | execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["min"].to_i 61 | end 62 | 63 | def last_value(seq) 64 | execute("SELECT last_value FROM #{quote_ident_full(seq)}").first["last_value"] 65 | end 66 | 67 | def truncate(table) 68 | execute("TRUNCATE #{quote_ident_full(table)} CASCADE") 69 | end 70 | 71 | def schemas 72 | @schemas ||= begin 73 | query = <<~SQL 74 | SELECT 75 | schema_name 76 | FROM 77 | information_schema.schemata 78 | ORDER BY 1 79 | SQL 80 | execute(query).map { |row| row["schema_name"] } 81 | end 82 | end 83 | 84 | def create_schema(schema) 85 | execute("CREATE SCHEMA #{quote_ident(schema)}") 86 | end 87 | 88 | def triggers(table) 89 | query = <<~SQL 90 | SELECT 91 | tgname AS name, 92 | tgisinternal AS internal, 93 | tgenabled != 'D' AS enabled, 94 | tgconstraint != 0 AS integrity 95 | FROM 96 | pg_trigger 97 | WHERE 98 | pg_trigger.tgrelid = $1::regclass 99 | SQL 100 | execute(query, [quote_ident_full(table)]) 101 | end 102 | 103 | def conn 104 | @conn ||= begin 105 | begin 106 | ENV["PGCONNECT_TIMEOUT"] ||= "3" 107 | if @url.start_with?("postgres://", "postgresql://") 108 | config = @url 109 | else 110 | config = {dbname: @url} 111 | end 112 | @concurrent_id = concurrent_id 113 | PG::Connection.new(config) 114 | rescue URI::InvalidURIError 115 | raise Error, "Invalid connection string. Make sure it works with `psql`" 116 | end 117 | end 118 | end 119 | 120 | def close 121 | if @conn 122 | @conn.close 123 | @conn = nil 124 | end 125 | end 126 | 127 | # reconnect for new thread or process 128 | def reconnect_if_needed 129 | reconnect if @concurrent_id != concurrent_id 130 | end 131 | 132 | def search_path 133 | @search_path ||= execute("SELECT unnest(current_schemas(true)) AS schema").map { |r| r["schema"] } 134 | end 135 | 136 | def server_version_num 137 | @server_version_num ||= execute("SHOW server_version_num").first["server_version_num"].to_i 138 | end 139 | 140 | def execute(query, params = []) 141 | log_sql query, params 142 | conn.exec_params(query, params).to_a 143 | end 144 | 145 | def transaction 146 | if conn.transaction_status == 0 147 | # not currently in transaction 148 | log_sql "BEGIN" 149 | result = 150 | conn.transaction do 151 | yield 152 | end 153 | log_sql "COMMIT" 154 | result 155 | else 156 | yield 157 | end 158 | end 159 | 160 | # TODO log time for each statement 161 | def log_sql(query, params = {}) 162 | if @debug 163 | message = "#{colorize("[#{@name}]", :cyan)} #{query.gsub(/\s+/, " ").strip}" 164 | message = "#{message} #{params.inspect}" if params.any? 165 | log message 166 | end 167 | end 168 | 169 | private 170 | 171 | def concurrent_id 172 | [Process.pid, Thread.current.object_id] 173 | end 174 | 175 | def reconnect 176 | @conn.reset 177 | @concurrent_id = concurrent_id 178 | end 179 | 180 | def table_set 181 | @table_set ||= Set.new(tables) 182 | end 183 | 184 | def conninfo 185 | @conninfo ||= begin 186 | unless conn.respond_to?(:conninfo_hash) 187 | raise Error, "libpq is too old. Upgrade it and run `gem install pg`" 188 | end 189 | conn.conninfo_hash 190 | end 191 | end 192 | 193 | # for pg 1.4.4 194 | # https://github.com/ged/ruby-pg/issues/490 195 | def dedup_localhost(value) 196 | if conninfo[:host] == "localhost,localhost" && conninfo[:port].to_s.split(",").uniq.size == 1 197 | value.split(",")[0] 198 | else 199 | value 200 | end 201 | end 202 | end 203 | end 204 | -------------------------------------------------------------------------------- /lib/pgsync/init.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class Init 3 | include Utils 4 | 5 | def initialize(arguments, options) 6 | @arguments = arguments 7 | @options = options 8 | end 9 | 10 | def perform 11 | if @arguments.size > 1 12 | raise Error, "Usage:\n pgsync --init [db]" 13 | end 14 | 15 | file = 16 | if @options[:config] 17 | @options[:config] 18 | elsif @arguments.any? 19 | db_config_file(@arguments.first) 20 | elsif @options[:db] 21 | db_config_file(@options[:db]) 22 | else 23 | ".pgsync.yml" 24 | end 25 | 26 | if File.exist?(file) 27 | raise Error, "#{file} exists." 28 | else 29 | exclude = 30 | if rails? 31 | <<~EOS 32 | exclude: 33 | - ar_internal_metadata 34 | - schema_migrations 35 | EOS 36 | elsif django? 37 | # TODO exclude other tables? 38 | <<~EOS 39 | exclude: 40 | - django_migrations 41 | EOS 42 | elsif laravel? 43 | <<~EOS 44 | exclude: 45 | - migrations 46 | EOS 47 | else 48 | <<~EOS 49 | # exclude: 50 | # - table1 51 | # - table2 52 | EOS 53 | end 54 | 55 | # create file 56 | contents = File.read(__dir__ + "/../../config.yml") 57 | contents.sub!("$(some_command)", "$(heroku config:get DATABASE_URL)") if heroku? 58 | File.write(file, contents % {exclude: exclude}) 59 | 60 | log "#{file} created. Add your database credentials." 61 | end 62 | end 63 | 64 | def django? 65 | file_exists?("manage.py", /django/i) 66 | end 67 | 68 | def heroku? 69 | `git remote -v 2>&1`.include?("git.heroku.com") rescue false 70 | end 71 | 72 | def laravel? 73 | file_exists?("artisan") 74 | end 75 | 76 | def rails? 77 | file_exists?("bin/rails") 78 | end 79 | 80 | def file_exists?(path, contents = nil) 81 | if contents 82 | File.read(path).match(contents) 83 | else 84 | File.exist?(path) 85 | end 86 | rescue 87 | false 88 | end 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /lib/pgsync/schema_sync.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class SchemaSync 3 | include Utils 4 | 5 | attr_reader :args, :opts 6 | 7 | def initialize(source:, destination:, tasks:, args:, opts:) 8 | @source = source 9 | @destination = destination 10 | @tasks = tasks 11 | @args = args 12 | @opts = opts 13 | end 14 | 15 | def perform 16 | if opts[:preserve] 17 | raise Error, "Cannot use --preserve with --schema-first or --schema-only" 18 | end 19 | 20 | # generate commands before starting spinner 21 | # for better error output if pg_restore not found 22 | dump_command = dump_command() 23 | restore_command = restore_command() 24 | 25 | show_spinner = output.tty? && !opts[:debug] 26 | 27 | if show_spinner 28 | spinner = TTY::Spinner.new(":spinner Syncing schema", format: :dots) 29 | spinner.auto_spin 30 | end 31 | 32 | create_schemas if specify_tables? 33 | 34 | # if spinner, capture lines to show on error 35 | lines = [] 36 | success = 37 | run_command(dump_command, restore_command) do |line| 38 | if show_spinner 39 | lines << line 40 | else 41 | log line 42 | end 43 | end 44 | 45 | if show_spinner 46 | if success 47 | spinner.success 48 | else 49 | spinner.error 50 | log lines.join 51 | end 52 | end 53 | 54 | raise Error, "Schema sync returned non-zero exit code" unless success 55 | end 56 | 57 | private 58 | 59 | def run_command(dump_command, restore_command) 60 | err_r, err_w = IO.pipe 61 | Open3.pipeline_start(dump_command, restore_command, err: err_w) do |wait_thrs| 62 | err_w.close 63 | err_r.each do |line| 64 | yield line 65 | end 66 | wait_thrs.all? { |t| t.value.success? } 67 | end 68 | end 69 | 70 | # --if-exists introduced in Postgres 9.4 71 | # not ideal, but simpler than trying to parse version 72 | def supports_if_exists? 73 | `pg_restore --help`.include?("--if-exists") 74 | rescue Errno::ENOENT 75 | raise Error, "pg_restore not found" 76 | end 77 | 78 | def dump_command 79 | cmd = ["pg_dump", "-Fc", "--verbose", "--schema-only", "--no-owner", "--no-acl"] 80 | if specify_tables? 81 | @tasks.each do |task| 82 | cmd.concat(["-t", task.quoted_table]) 83 | end 84 | end 85 | cmd.concat(["-d", @source.url]) 86 | end 87 | 88 | def restore_command 89 | cmd = ["pg_restore", "--verbose", "--no-owner", "--no-acl", "--clean"] 90 | cmd << "--if-exists" if supports_if_exists? 91 | cmd.concat(["-d", @destination.url]) 92 | end 93 | 94 | # pg_dump -t won't create schemas (even with -n) 95 | # not ideal that this happens outside restore transaction 96 | def create_schemas 97 | schemas = @tasks.map { |t| t.table.schema }.uniq - @destination.schemas 98 | schemas.sort.each do |schema| 99 | @destination.create_schema(schema) 100 | end 101 | end 102 | 103 | def specify_tables? 104 | !opts[:all_schemas] || opts[:tables] || opts[:groups] || args[0] || opts[:exclude] || opts[:schemas] 105 | end 106 | end 107 | end 108 | -------------------------------------------------------------------------------- /lib/pgsync/sequence.rb: -------------------------------------------------------------------------------- 1 | # minimal class to keep schema and sequence name separate 2 | module PgSync 3 | class Sequence 4 | attr_reader :schema, :name, :column 5 | 6 | def initialize(schema, name, column:) 7 | @schema = schema 8 | @name = name 9 | @column = column 10 | end 11 | 12 | def full_name 13 | "#{schema}.#{name}" 14 | end 15 | 16 | def eql?(other) 17 | other.schema == schema && other.name == name 18 | end 19 | 20 | # override hash when overriding eql? 21 | def hash 22 | [schema, name].hash 23 | end 24 | 25 | def to_s 26 | full_name 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/pgsync/sync.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class Sync 3 | include Utils 4 | 5 | def initialize(arguments, options) 6 | @arguments = arguments 7 | @options = options 8 | end 9 | 10 | def perform 11 | started_at = monotonic_time 12 | 13 | args = @arguments 14 | opts = @options 15 | 16 | # only resolve commands from config, not CLI arguments 17 | [:to, :from].each do |opt| 18 | opts[opt] ||= resolve_source(config[opt.to_s]) 19 | end 20 | 21 | # merge other config 22 | [:to_safe, :exclude, :schemas].each do |opt| 23 | opts[opt] ||= config[opt.to_s] 24 | end 25 | 26 | if args.size > 2 27 | raise Error, "Usage:\n pgsync [options]" 28 | end 29 | 30 | raise Error, "No source" unless source.exists? 31 | raise Error, "No destination" unless destination.exists? 32 | 33 | unless opts[:to_safe] || destination.local? 34 | raise Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1" 35 | end 36 | 37 | print_description("From", source) 38 | print_description("To", destination) 39 | 40 | if (opts[:preserve] || opts[:overwrite]) && destination.server_version_num < 90500 41 | raise Error, "Postgres 9.5+ is required for --preserve and --overwrite" 42 | end 43 | 44 | resolver = TaskResolver.new(args: args, opts: opts, source: source, destination: destination, config: config, first_schema: first_schema) 45 | tasks = 46 | resolver.tasks.map do |task| 47 | Task.new(source: source, destination: destination, config: config, table: task[:table], opts: opts.merge(sql: task[:sql])) 48 | end 49 | 50 | if opts[:in_batches] && tasks.size > 1 51 | raise Error, "Cannot use --in-batches with multiple tables" 52 | end 53 | 54 | confirm_tables_exist(source, tasks, "source") 55 | 56 | if opts[:list] 57 | confirm_tables_exist(destination, tasks, "destination") 58 | tasks.each do |task| 59 | log task_name(task) 60 | end 61 | else 62 | if opts[:schema_first] || opts[:schema_only] 63 | SchemaSync.new(source: source, destination: destination, tasks: tasks, args: args, opts: opts).perform 64 | end 65 | 66 | unless opts[:schema_only] 67 | TableSync.new(source: source, destination: destination, tasks: tasks, opts: opts, resolver: resolver).perform 68 | end 69 | 70 | log_completed(started_at) 71 | end 72 | end 73 | 74 | private 75 | 76 | def config 77 | @config ||= begin 78 | file = config_file 79 | if file 80 | begin 81 | # same options as YAML.load_file 82 | File.open(file, "r:bom|utf-8") do |f| 83 | # changed to keyword arguments in 3.1.0.pre1 84 | # https://github.com/ruby/psych/commit/c79ed445b4b3f8c9adf3da13bca3c976ddfae258 85 | if Psych::VERSION.to_f >= 3.1 86 | YAML.safe_load(f, aliases: true, filename: file) || {} 87 | else 88 | YAML.safe_load(f, [], [], true, file) || {} 89 | end 90 | end 91 | rescue Psych::SyntaxError => e 92 | raise Error, e.message 93 | rescue Errno::ENOENT 94 | raise Error, "Config file not found: #{file}" 95 | end 96 | else 97 | {} 98 | end 99 | end 100 | end 101 | 102 | def config_file 103 | if @options[:config] 104 | @options[:config] 105 | elsif @options[:db] 106 | file = db_config_file(@options[:db]) 107 | search_tree(file) || file 108 | else 109 | search_tree(".pgsync.yml") 110 | end 111 | end 112 | 113 | def search_tree(file) 114 | return file if File.exist?(file) 115 | 116 | path = Dir.pwd 117 | # prevent infinite loop 118 | 20.times do 119 | absolute_file = File.join(path, file) 120 | break absolute_file if File.exist?(absolute_file) 121 | path = File.dirname(path) 122 | break if path == "/" 123 | end 124 | end 125 | 126 | def print_description(prefix, source) 127 | location = " on #{source.host}:#{source.port}" if source.host 128 | log "#{prefix}: #{source.dbname}#{location}" 129 | end 130 | 131 | def log_completed(started_at) 132 | time = monotonic_time - started_at 133 | message = "Completed in #{time.round(1)}s" 134 | log colorize(message, :green) 135 | end 136 | 137 | def source 138 | @source ||= data_source(@options[:from], "from") 139 | end 140 | 141 | def destination 142 | @destination ||= data_source(@options[:to], "to") 143 | end 144 | 145 | def data_source(url, name) 146 | ds = DataSource.new(url, name: name, debug: @options[:debug]) 147 | ObjectSpace.define_finalizer(self, self.class.finalize(ds)) 148 | ds 149 | end 150 | 151 | # ideally aliases would work, but haven't found a nice way to do this 152 | def resolve_source(source) 153 | if source 154 | source = source.dup 155 | source.gsub!(/\$\([^)]+\)/) do |m| 156 | command = m[2..-2] 157 | result = `#{command}`.chomp 158 | unless $?.success? 159 | raise Error, "Command exited with non-zero status:\n#{command}" 160 | end 161 | result 162 | end 163 | end 164 | source 165 | end 166 | 167 | def self.finalize(ds) 168 | # must use proc instead of stabby lambda 169 | proc { ds.close } 170 | end 171 | end 172 | end 173 | -------------------------------------------------------------------------------- /lib/pgsync/table.rb: -------------------------------------------------------------------------------- 1 | # minimal class to keep schema and table name separate 2 | module PgSync 3 | class Table 4 | attr_reader :schema, :name 5 | 6 | def initialize(schema, name) 7 | @schema = schema 8 | @name = name 9 | end 10 | 11 | def full_name 12 | "#{schema}.#{name}" 13 | end 14 | 15 | def eql?(other) 16 | other.schema == schema && other.name == name 17 | end 18 | 19 | # override hash when overriding eql? 20 | def hash 21 | [schema, name].hash 22 | end 23 | 24 | def to_s 25 | full_name 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/pgsync/table_sync.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class TableSync 3 | include Utils 4 | 5 | attr_reader :source, :destination, :tasks, :opts, :resolver 6 | 7 | def initialize(source:, destination:, tasks:, opts:, resolver:) 8 | @source = source 9 | @destination = destination 10 | @tasks = tasks 11 | @opts = opts 12 | @resolver = resolver 13 | end 14 | 15 | def perform 16 | confirm_tables_exist(destination, tasks, "destination") 17 | 18 | add_columns 19 | 20 | add_primary_keys 21 | 22 | add_sequences unless opts[:no_sequences] 23 | 24 | show_notes 25 | 26 | # don't sync tables with no shared fields 27 | # we show a warning message above 28 | run_tasks(tasks.reject { |task| task.shared_fields.empty? }) 29 | end 30 | 31 | def add_columns 32 | source_columns = columns(source) 33 | destination_columns = columns(destination) 34 | 35 | tasks.each do |task| 36 | task.from_columns = source_columns[task.table] || [] 37 | task.to_columns = destination_columns[task.table] || [] 38 | end 39 | end 40 | 41 | def add_primary_keys 42 | destination_primary_keys = primary_keys(destination) 43 | 44 | tasks.each do |task| 45 | task.to_primary_key = destination_primary_keys[task.table] || [] 46 | end 47 | end 48 | 49 | def add_sequences 50 | source_sequences = sequences(source) 51 | destination_sequences = sequences(destination) 52 | 53 | tasks.each do |task| 54 | shared_columns = Set.new(task.shared_fields) 55 | 56 | task.from_sequences = (source_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) } 57 | task.to_sequences = (destination_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) } 58 | end 59 | end 60 | 61 | def sequences(data_source) 62 | query = <<~SQL 63 | SELECT 64 | nt.nspname as schema, 65 | t.relname as table, 66 | a.attname as column, 67 | n.nspname as sequence_schema, 68 | s.relname as sequence 69 | FROM 70 | pg_class s 71 | INNER JOIN 72 | pg_depend d ON d.objid = s.oid 73 | INNER JOIN 74 | pg_class t ON d.objid = s.oid AND d.refobjid = t.oid 75 | INNER JOIN 76 | pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum) 77 | INNER JOIN 78 | pg_namespace n ON n.oid = s.relnamespace 79 | INNER JOIN 80 | pg_namespace nt ON nt.oid = t.relnamespace 81 | WHERE 82 | s.relkind = 'S' 83 | SQL 84 | data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v| 85 | [k, v.map { |r| Sequence.new(r["sequence_schema"], r["sequence"], column: r["column"]) }] 86 | end.to_h 87 | end 88 | 89 | def primary_keys(data_source) 90 | # https://stackoverflow.com/a/20537829 91 | # TODO can simplify with array_position in Postgres 9.5+ 92 | query = <<~SQL 93 | SELECT 94 | nspname AS schema, 95 | relname AS table, 96 | pg_attribute.attname AS column, 97 | format_type(pg_attribute.atttypid, pg_attribute.atttypmod), 98 | pg_attribute.attnum, 99 | pg_index.indkey 100 | FROM 101 | pg_index, pg_class, pg_attribute, pg_namespace 102 | WHERE 103 | indrelid = pg_class.oid AND 104 | pg_class.relnamespace = pg_namespace.oid AND 105 | pg_attribute.attrelid = pg_class.oid AND 106 | pg_attribute.attnum = any(pg_index.indkey) AND 107 | indisprimary 108 | SQL 109 | data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v| 110 | [k, v.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["column"] }] 111 | end.to_h 112 | end 113 | 114 | def show_notes 115 | # for tables 116 | resolver.notes.each do |note| 117 | warning note 118 | end 119 | 120 | # for columns and sequences 121 | tasks.each do |task| 122 | task.notes.each do |note| 123 | warning "#{task_name(task)}: #{note}" 124 | end 125 | end 126 | 127 | # for non-deferrable constraints 128 | if opts[:defer_constraints_v1] 129 | constraints = non_deferrable_constraints(destination) 130 | constraints = tasks.flat_map { |t| constraints[t.table] || [] } 131 | warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any? 132 | end 133 | end 134 | 135 | def columns(data_source) 136 | query = <<~SQL 137 | SELECT 138 | table_schema AS schema, 139 | table_name AS table, 140 | column_name AS column, 141 | data_type AS type 142 | FROM 143 | information_schema.columns 144 | WHERE 145 | is_generated = 'NEVER' 146 | ORDER BY 1, 2, 3 147 | SQL 148 | data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v| 149 | [k, v.map { |r| {name: r["column"], type: r["type"]} }] 150 | end.to_h 151 | end 152 | 153 | def non_deferrable_constraints(data_source) 154 | query = <<~SQL 155 | SELECT 156 | table_schema AS schema, 157 | table_name AS table, 158 | constraint_name 159 | FROM 160 | information_schema.table_constraints 161 | WHERE 162 | constraint_type = 'FOREIGN KEY' AND 163 | is_deferrable = 'NO' 164 | SQL 165 | data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v| 166 | [k, v.map { |r| r["constraint_name"] }] 167 | end.to_h 168 | end 169 | 170 | def run_tasks(tasks, &block) 171 | notices = [] 172 | failed_tables = [] 173 | started_at = {} 174 | 175 | show_spinners = output.tty? && !opts[:in_batches] && !opts[:debug] 176 | if show_spinners 177 | spinners = TTY::Spinner::Multi.new(format: :dots, output: output) 178 | task_spinners = {} 179 | end 180 | 181 | start = lambda do |task, i| 182 | message = ":spinner #{display_item(task)}" 183 | 184 | if show_spinners 185 | spinner = spinners.register(message) 186 | spinner.auto_spin 187 | task_spinners[task] = spinner 188 | elsif opts[:in_batches] 189 | log message.sub(":spinner", "⠋") 190 | end 191 | 192 | started_at[task] = monotonic_time 193 | end 194 | 195 | finish = lambda do |task, i, result| 196 | time = (monotonic_time - started_at[task]).round(1) 197 | 198 | success = result[:status] == "success" 199 | 200 | message = 201 | if result[:message] 202 | "(#{result[:message].lines.first.to_s.strip})" 203 | else 204 | "- #{time}s" 205 | end 206 | 207 | notices.concat(result[:notices]) 208 | 209 | if show_spinners 210 | spinner = task_spinners[task] 211 | if success 212 | spinner.success(message) 213 | else 214 | spinner.error(message) 215 | end 216 | else 217 | status = success ? "✔" : "✖" 218 | log [status, display_item(task), message].join(" ") 219 | end 220 | 221 | unless success 222 | failed_tables << task_name(task) 223 | fail_sync(failed_tables) if opts[:fail_fast] 224 | end 225 | end 226 | 227 | options = {start: start, finish: finish} 228 | 229 | jobs = opts[:jobs] 230 | 231 | # disable multiple jobs for defer constraints and disable integrity 232 | # so we can use a transaction to ensure a consistent snapshot 233 | if opts[:debug] || opts[:in_batches] || opts[:defer_constraints_v1] || opts[:defer_constraints_v2] || opts[:disable_integrity] || opts[:disable_integrity_v2] 234 | warning "--jobs ignored" if jobs 235 | jobs = 0 236 | end 237 | 238 | if windows? 239 | options[:in_threads] = jobs || 4 240 | else 241 | options[:in_processes] = jobs if jobs 242 | end 243 | 244 | maybe_defer_constraints do 245 | # could try to use `raise Parallel::Kill` to fail faster with --fail-fast 246 | # see `fast_faster` branch 247 | # however, need to make sure connections are cleaned up properly 248 | Parallel.each(tasks, **options) do |task| 249 | source.reconnect_if_needed 250 | destination.reconnect_if_needed 251 | 252 | task.perform 253 | end 254 | end 255 | 256 | notices.each do |notice| 257 | warning notice 258 | end 259 | 260 | fail_sync(failed_tables) if failed_tables.any? 261 | end 262 | 263 | # TODO add option to open transaction on source when manually specifying order of tables 264 | def maybe_defer_constraints 265 | if opts[:disable_integrity] || opts[:disable_integrity_v2] 266 | # create a transaction on the source 267 | # to ensure we get a consistent snapshot 268 | source.transaction do 269 | yield 270 | end 271 | elsif opts[:defer_constraints_v1] || opts[:defer_constraints_v2] 272 | destination.transaction do 273 | if opts[:defer_constraints_v2] 274 | table_constraints = non_deferrable_constraints(destination) 275 | table_constraints.each do |table, constraints| 276 | constraints.each do |constraint| 277 | destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} DEFERRABLE") 278 | end 279 | end 280 | end 281 | 282 | destination.execute("SET CONSTRAINTS ALL DEFERRED") 283 | 284 | # create a transaction on the source 285 | # to ensure we get a consistent snapshot 286 | source.transaction do 287 | yield 288 | end 289 | 290 | # set them back 291 | # there are 3 modes: DEFERRABLE INITIALLY DEFERRED, DEFERRABLE INITIALLY IMMEDIATE, and NOT DEFERRABLE 292 | # we only update NOT DEFERRABLE 293 | # https://www.postgresql.org/docs/current/sql-set-constraints.html 294 | if opts[:defer_constraints_v2] 295 | destination.execute("SET CONSTRAINTS ALL IMMEDIATE") 296 | 297 | table_constraints.each do |table, constraints| 298 | constraints.each do |constraint| 299 | destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} NOT DEFERRABLE") 300 | end 301 | end 302 | end 303 | end 304 | else 305 | yield 306 | end 307 | end 308 | 309 | def fail_sync(failed_tables) 310 | raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}" 311 | end 312 | 313 | def display_item(item) 314 | messages = [] 315 | messages << task_name(item) 316 | messages << item.opts[:sql] if item.opts[:sql] 317 | messages.join(" ") 318 | end 319 | 320 | def windows? 321 | Gem.win_platform? 322 | end 323 | end 324 | end 325 | -------------------------------------------------------------------------------- /lib/pgsync/task.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class Task 3 | include Utils 4 | 5 | attr_reader :source, :destination, :config, :table, :opts 6 | attr_accessor :from_columns, :to_columns, :from_sequences, :to_sequences, :to_primary_key 7 | 8 | def initialize(source:, destination:, config:, table:, opts:) 9 | @source = source 10 | @destination = destination 11 | @config = config 12 | @table = table 13 | @opts = opts 14 | @from_sequences = [] 15 | @to_sequences = [] 16 | end 17 | 18 | def quoted_table 19 | quote_ident_full(table) 20 | end 21 | 22 | def perform 23 | with_notices do 24 | handle_errors do 25 | maybe_disable_triggers do 26 | sync_data 27 | end 28 | end 29 | end 30 | end 31 | 32 | def from_fields 33 | @from_fields ||= from_columns.map { |c| c[:name] } 34 | end 35 | 36 | def to_fields 37 | @to_fields ||= to_columns.map { |c| c[:name] } 38 | end 39 | 40 | def shared_fields 41 | @shared_fields ||= to_fields & from_fields 42 | end 43 | 44 | def shared_sequences 45 | @shared_sequences ||= to_sequences & from_sequences 46 | end 47 | 48 | def notes 49 | notes = [] 50 | if shared_fields.empty? 51 | notes << "No fields to copy" 52 | else 53 | extra_fields = to_fields - from_fields 54 | notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any? 55 | 56 | missing_fields = from_fields - to_fields 57 | notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any? 58 | 59 | extra_sequences = to_sequences - from_sequences 60 | notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any? 61 | 62 | missing_sequences = from_sequences - to_sequences 63 | notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any? 64 | 65 | from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h 66 | to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h 67 | different_types = [] 68 | shared_fields.each do |field| 69 | if from_types[field] != to_types[field] 70 | different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})" 71 | end 72 | end 73 | notes << "Different column types: #{different_types.join(", ")}" if different_types.any? 74 | end 75 | notes 76 | end 77 | 78 | def sync_data 79 | raise Error, "This should never happen. Please file a bug." if shared_fields.empty? 80 | 81 | sql_clause = String.new("") 82 | sql_clause << " #{opts[:sql]}" if opts[:sql] 83 | 84 | bad_fields = opts[:no_rules] ? [] : config["data_rules"] 85 | primary_key = to_primary_key 86 | copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ") 87 | fields = shared_fields.map { |f| quote_ident(f) }.join(", ") 88 | 89 | copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT" 90 | if opts[:in_batches] 91 | raise Error, "Primary key required for --in-batches" if primary_key.empty? 92 | primary_key = primary_key.first 93 | 94 | destination.truncate(table) if opts[:truncate] 95 | 96 | from_max_id = source.max_id(table, primary_key) 97 | to_max_id = destination.max_id(table, primary_key) + 1 98 | 99 | if to_max_id == 1 100 | from_min_id = source.min_id(table, primary_key) 101 | to_max_id = from_min_id if from_min_id > 0 102 | end 103 | 104 | starting_id = to_max_id 105 | batch_size = opts[:batch_size] 106 | 107 | i = 1 108 | batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil 109 | 110 | while starting_id <= from_max_id 111 | where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}" 112 | log " #{i}/#{batch_count}: #{where}" 113 | 114 | # TODO be smarter for advance sql clauses 115 | batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}" 116 | 117 | batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT" 118 | copy(batch_copy_to_command, dest_table: table, dest_fields: fields) 119 | 120 | starting_id += batch_size 121 | i += 1 122 | 123 | if opts[:sleep] && starting_id <= from_max_id 124 | sleep(opts[:sleep]) 125 | end 126 | end 127 | elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?) 128 | if primary_key.empty? 129 | raise Error, "Primary key required for --overwrite" if opts[:overwrite] 130 | raise Error, "Primary key required for --preserve" if opts[:preserve] 131 | raise Error, "Primary key required to sync specific rows" 132 | end 133 | 134 | # create a temp table 135 | temp_table = "pgsync_#{rand(1_000_000_000)}" 136 | destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA") 137 | 138 | # load data 139 | copy(copy_to_command, dest_table: temp_table, dest_fields: fields) 140 | 141 | on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ") 142 | action = 143 | if opts[:preserve] 144 | "NOTHING" 145 | else # overwrite or sql clause 146 | setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" } 147 | if setter.any? 148 | "UPDATE SET #{setter.join(", ")}" 149 | else 150 | "NOTHING" 151 | end 152 | end 153 | destination.execute("INSERT INTO #{quoted_table} (#{fields}) (SELECT #{fields} FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}") 154 | else 155 | # use delete instead of truncate for foreign keys 156 | if opts[:defer_constraints_v1] || opts[:defer_constraints_v2] 157 | destination.execute("DELETE FROM #{quoted_table}") 158 | else 159 | destination.truncate(table) 160 | end 161 | copy(copy_to_command, dest_table: table, dest_fields: fields) 162 | end 163 | 164 | # update sequences 165 | shared_sequences.each do |seq| 166 | value = source.last_value(seq) 167 | destination.execute("SELECT setval(#{escape(quote_ident_full(seq))}, #{escape(value)})") 168 | end 169 | 170 | {status: "success"} 171 | end 172 | 173 | private 174 | 175 | def with_notices 176 | notices = [] 177 | [source, destination].each do |data_source| 178 | data_source.send(:conn).set_notice_processor do |message| 179 | notices << message.strip 180 | end 181 | end 182 | result = yield 183 | result[:notices] = notices if result 184 | result 185 | ensure 186 | # clear notice processor 187 | [source, destination].each do |data_source| 188 | data_source.send(:conn).set_notice_processor 189 | end 190 | end 191 | 192 | # TODO add retries 193 | def handle_errors 194 | yield 195 | rescue => e 196 | raise e if opts[:debug] 197 | 198 | message = 199 | case e 200 | when PG::ConnectionBad 201 | # likely fine to show simplified message here 202 | # the full message will be shown when first trying to connect 203 | "Connection failed" 204 | when PG::Error 205 | e.message.sub("ERROR: ", "") 206 | when Error 207 | e.message 208 | else 209 | "#{e.class.name}: #{e.message}" 210 | end 211 | 212 | {status: "error", message: message} 213 | end 214 | 215 | def copy(source_command, dest_table:, dest_fields:) 216 | destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN" 217 | 218 | source.log_sql(source_command) 219 | destination.log_sql(destination_command) 220 | 221 | destination.conn.copy_data(destination_command) do 222 | source.conn.copy_data(source_command) do 223 | while (row = source.conn.get_copy_data) 224 | destination.conn.put_copy_data(row) 225 | end 226 | end 227 | end 228 | end 229 | 230 | # TODO better performance 231 | def rule_match?(table, column, rule) 232 | regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*', '[^\.]*') + '\z') 233 | regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}") 234 | end 235 | 236 | # TODO wildcard rules 237 | def apply_strategy(rule, table, column, primary_key) 238 | if rule.is_a?(Hash) 239 | if rule.key?("value") 240 | escape(rule["value"]) 241 | elsif rule.key?("statement") 242 | rule["statement"] 243 | else 244 | raise Error, "Unknown rule #{rule.inspect} for column #{column}" 245 | end 246 | else 247 | case rule 248 | when "untouched" 249 | quote_ident(column) 250 | when "unique_email" 251 | "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'" 252 | when "unique_phone" 253 | "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text" 254 | when "unique_secret" 255 | "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text" 256 | when "random_int", "random_number" 257 | "(RANDOM() * 100)::int" 258 | when "random_date" 259 | "date '1970-01-01' + (RANDOM() * 10000)::int" 260 | when "random_time" 261 | "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'" 262 | when "random_ip" 263 | # casting double to int rounds 264 | "(1 + RANDOM() * 254)::int::text || '.0.0.1'" 265 | when "random_letter" 266 | # casting double to int rounds 267 | "chr(65 + (RANDOM() * 25)::int)" 268 | when "random_string" 269 | "RIGHT(MD5(RANDOM()::text), 10)" 270 | when "null", nil 271 | "NULL" 272 | else 273 | raise Error, "Unknown rule #{rule} for column #{column}" 274 | end 275 | end 276 | end 277 | 278 | def quoted_primary_key(table, primary_key, rule) 279 | raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1 280 | "#{quoted_table}.#{quote_ident(primary_key.first)}" 281 | end 282 | 283 | def maybe_disable_triggers 284 | if opts[:disable_integrity] || opts[:disable_integrity_v2] || opts[:disable_user_triggers] 285 | destination.transaction do 286 | triggers = destination.triggers(table) 287 | triggers.select! { |t| t["enabled"] == "t" } 288 | internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" } 289 | integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" } 290 | restore_triggers = [] 291 | 292 | # both --disable-integrity options require superuser privileges 293 | # however, only v2 works on Amazon RDS, which added specific support for it 294 | # https://aws.amazon.com/about-aws/whats-new/2014/11/10/amazon-rds-postgresql-read-replicas/ 295 | # 296 | # session_replication_role disables more than foreign keys (like triggers and rules) 297 | # this is probably fine, but keep the current default for now 298 | if opts[:disable_integrity_v2] || (opts[:disable_integrity] && rds?) 299 | # SET LOCAL lasts until the end of the transaction 300 | # https://www.postgresql.org/docs/current/sql-set.html 301 | destination.execute("SET LOCAL session_replication_role = replica") 302 | elsif opts[:disable_integrity] 303 | integrity_triggers.each do |trigger| 304 | destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}") 305 | end 306 | restore_triggers.concat(integrity_triggers) 307 | end 308 | 309 | if opts[:disable_user_triggers] 310 | # important! 311 | # rely on Postgres to disable user triggers 312 | # we don't want to accidentally disable non-user triggers if logic above is off 313 | destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER") 314 | restore_triggers.concat(user_triggers) 315 | end 316 | 317 | result = yield 318 | 319 | # restore triggers that were previously enabled 320 | restore_triggers.each do |trigger| 321 | destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}") 322 | end 323 | 324 | result 325 | end 326 | else 327 | yield 328 | end 329 | end 330 | 331 | def rds? 332 | destination.execute("SELECT name, setting FROM pg_settings WHERE name LIKE 'rds.%'").any? 333 | end 334 | end 335 | end 336 | -------------------------------------------------------------------------------- /lib/pgsync/task_resolver.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | class TaskResolver 3 | include Utils 4 | 5 | attr_reader :args, :opts, :source, :destination, :config, :first_schema, :notes 6 | 7 | def initialize(args:, opts:, source:, destination:, config:, first_schema:) 8 | @args = args 9 | @opts = opts 10 | @source = source 11 | @destination = destination 12 | @config = config 13 | @groups = config["groups"] || {} 14 | @first_schema = first_schema 15 | @notes = [] 16 | end 17 | 18 | def tasks 19 | tasks = [] 20 | 21 | # get lists from args 22 | groups, tables = process_args 23 | 24 | # expand groups into tasks 25 | groups.each do |group| 26 | tasks.concat(group_to_tasks(group)) 27 | end 28 | 29 | # expand tables into tasks 30 | tables.each do |table| 31 | tasks.concat(table_to_tasks(table)) 32 | end 33 | 34 | # get default if none given 35 | if !opts[:groups] && !opts[:tables] && args.size == 0 36 | tasks.concat(default_tasks) 37 | end 38 | 39 | # resolve any tables that need it 40 | tasks.each do |task| 41 | task[:table] = fully_resolve(task[:table]) 42 | end 43 | 44 | tasks 45 | end 46 | 47 | def group?(group) 48 | @groups.key?(group) 49 | end 50 | 51 | private 52 | 53 | def group_to_tasks(value) 54 | group, param = value.split(":", 2) 55 | raise Error, "Group not found: #{group}" unless group?(group) 56 | 57 | @groups[group].map do |table| 58 | table_sql = nil 59 | if table.is_a?(Array) 60 | table, table_sql = table 61 | end 62 | 63 | { 64 | table: to_table(table), 65 | sql: expand_sql(table_sql, param) 66 | } 67 | end 68 | end 69 | 70 | def table_to_tasks(value) 71 | raise Error, "Cannot use parameters with tables" if value.include?(":") 72 | 73 | tables = 74 | if value.include?("*") 75 | regex = Regexp.new('\A' + Regexp.escape(value).gsub('\*', '[^\.]*') + '\z') 76 | shared_tables.select { |t| regex.match(t.full_name) || regex.match(t.name) } 77 | else 78 | [to_table(value)] 79 | end 80 | 81 | tables.map do |table| 82 | { 83 | table: table, 84 | sql: sql_arg # doesn't support params 85 | } 86 | end 87 | end 88 | 89 | # treats identifiers as if they were quoted (Users == "Users") 90 | # this is different from Postgres (Users == "users") 91 | # 92 | # TODO add support for quoted identifiers like "my.schema"."my.table" 93 | # so it's possible to specify identifiers with "." in them 94 | def to_table(value) 95 | parts = value.split(".") 96 | case parts.size 97 | when 1 98 | # unknown schema 99 | Table.new(nil, parts[0]) 100 | when 2 101 | Table.new(*parts) 102 | else 103 | raise Error, "Cannot resolve table: #{value}" 104 | end 105 | end 106 | 107 | def default_tasks 108 | shared_tables.map do |table| 109 | { 110 | table: table 111 | } 112 | end 113 | end 114 | 115 | # tables that exists in both source and destination 116 | # used when no tables specified, or a wildcard 117 | # removes excluded tables and filters by schema 118 | def shared_tables 119 | tables = filter_tables(source.tables) 120 | 121 | unless opts[:schema_only] || opts[:schema_first] 122 | from_tables = tables 123 | to_tables = filter_tables(destination.tables) 124 | 125 | extra_tables = to_tables - from_tables 126 | notes << "Extra tables: #{extra_tables.map { |t| friendly_name(t) }.join(", ")}" if extra_tables.any? 127 | 128 | missing_tables = from_tables - to_tables 129 | notes << "Missing tables: #{missing_tables.map { |t| friendly_name(t) }.join(", ")}" if missing_tables.any? 130 | 131 | tables &= to_tables 132 | end 133 | 134 | tables 135 | end 136 | 137 | def filter_tables(tables) 138 | tables = tables.dup 139 | 140 | unless opts[:all_schemas] 141 | # could support wildcard schemas as well 142 | schemas = Set.new(opts[:schemas] ? to_arr(opts[:schemas]) : source.search_path) 143 | tables.select! { |t| schemas.include?(t.schema) } 144 | end 145 | 146 | to_arr(opts[:exclude]).each do |value| 147 | if value.include?("*") 148 | regex = Regexp.new('\A' + Regexp.escape(value).gsub('\*', '[^\.]*') + '\z') 149 | tables.reject! { |t| regex.match(t.full_name) || regex.match(t.name) } 150 | else 151 | tables -= [fully_resolve(to_table(value), error: false)].compact 152 | end 153 | end 154 | 155 | tables 156 | end 157 | 158 | def process_args 159 | groups = to_arr(opts[:groups]) 160 | tables = to_arr(opts[:tables]) 161 | if args[0] 162 | # could be a group, table, or mix 163 | to_arr(args[0]).each do |value| 164 | if group?(value.split(":", 2)[0]) 165 | groups << value 166 | else 167 | tables << value 168 | end 169 | end 170 | end 171 | [groups, tables] 172 | end 173 | 174 | def no_schema_tables 175 | @no_schema_tables ||= begin 176 | search_path_index = source.search_path.map.with_index.to_h 177 | source.tables.group_by(&:name).map do |group, t2| 178 | [group, t2.select { |t| search_path_index[t.schema] }.sort_by { |t| search_path_index[t.schema] }.first] 179 | end.to_h 180 | end 181 | end 182 | 183 | # for tables without a schema, find the table in the search path 184 | def fully_resolve(table, error: true) 185 | return table if table.schema 186 | resolved_table = no_schema_tables[table.name] 187 | raise Error, "Table not found in source: #{table.name}" if !resolved_table && error 188 | resolved_table 189 | end 190 | 191 | # parse command line arguments and YAML 192 | def to_arr(value) 193 | if value.is_a?(Array) 194 | value 195 | else 196 | # Split by commas, but don't use commas inside double quotes 197 | # https://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes 198 | value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/) 199 | end 200 | end 201 | 202 | def sql_arg 203 | args[1] 204 | end 205 | 206 | def expand_sql(sql, param) 207 | # command line option takes precedence over group option 208 | sql = sql_arg if sql_arg 209 | 210 | return unless sql 211 | 212 | # vars must match \w 213 | missing_vars = sql.scan(/{\w+}/).map { |v| v[1..-2] } 214 | 215 | vars = {} 216 | if param 217 | vars["id"] = cast(param) 218 | vars["1"] = cast(param) 219 | end 220 | 221 | sql = sql.dup 222 | vars.each do |k, v| 223 | # only sub if in var list 224 | sql.gsub!("{#{k}}", cast(v)) if missing_vars.delete(k) 225 | end 226 | 227 | raise Error, "Missing variables: #{missing_vars.uniq.join(", ")}" if missing_vars.any? 228 | 229 | sql 230 | end 231 | 232 | # TODO quote vars in next major version 233 | def cast(value) 234 | value.to_s.gsub(/\A\"|\"\z/, '') 235 | end 236 | end 237 | end 238 | -------------------------------------------------------------------------------- /lib/pgsync/utils.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | module Utils 3 | COLOR_CODES = { 4 | red: 31, 5 | green: 32, 6 | yellow: 33, 7 | cyan: 36 8 | } 9 | 10 | def log(message = nil) 11 | output.puts message 12 | end 13 | 14 | def colorize(message, color) 15 | if output.tty? 16 | "\e[#{COLOR_CODES[color]}m#{message}\e[0m" 17 | else 18 | message 19 | end 20 | end 21 | 22 | def warning(message) 23 | log colorize(message, :yellow) 24 | end 25 | 26 | def deprecated(message) 27 | warning "[DEPRECATED] #{message}" 28 | end 29 | 30 | def output 31 | $stderr 32 | end 33 | 34 | def db_config_file(db) 35 | ".pgsync-#{db}.yml" 36 | end 37 | 38 | def confirm_tables_exist(data_source, tasks, description) 39 | tasks.map(&:table).each do |table| 40 | unless data_source.table_exists?(table) 41 | raise Error, "Table not found in #{description}: #{table}" 42 | end 43 | end 44 | end 45 | 46 | def first_schema 47 | @first_schema ||= source.search_path.find { |sp| sp != "pg_catalog" } 48 | end 49 | 50 | def task_name(task) 51 | friendly_name(task.table) 52 | end 53 | 54 | def friendly_name(table) 55 | if table.schema == first_schema 56 | table.name 57 | else 58 | table.full_name 59 | end 60 | end 61 | 62 | def quote_ident_full(ident) 63 | if ident.is_a?(Table) || ident.is_a?(Sequence) 64 | [quote_ident(ident.schema), quote_ident(ident.name)].join(".") 65 | else # temp table names are strings 66 | quote_ident(ident) 67 | end 68 | end 69 | 70 | def quote_ident(value) 71 | PG::Connection.quote_ident(value) 72 | end 73 | 74 | def escape(value) 75 | if value.is_a?(String) 76 | "'#{quote_string(value)}'" 77 | else 78 | value 79 | end 80 | end 81 | 82 | # activerecord 83 | def quote_string(s) 84 | s.gsub(/\\/, '\&\&').gsub(/'/, "''") 85 | end 86 | 87 | def monotonic_time 88 | Process.clock_gettime(Process::CLOCK_MONOTONIC) 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /lib/pgsync/version.rb: -------------------------------------------------------------------------------- 1 | module PgSync 2 | VERSION = "0.8.0" 3 | end 4 | -------------------------------------------------------------------------------- /pgsync.gemspec: -------------------------------------------------------------------------------- 1 | require_relative "lib/pgsync/version" 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = "pgsync" 5 | spec.version = PgSync::VERSION 6 | spec.summary = "Sync Postgres data between databases" 7 | spec.homepage = "https://github.com/ankane/pgsync" 8 | spec.license = "MIT" 9 | 10 | spec.authors = "Andrew Kane" 11 | spec.email = "andrew@ankane.org" 12 | 13 | spec.files = Dir["*.{md,txt}", "{lib,exe}/**/*", "config.yml"] 14 | spec.require_path = "lib" 15 | 16 | spec.bindir = "exe" 17 | spec.executables = ["pgsync"] 18 | 19 | spec.required_ruby_version = ">= 2.7" 20 | 21 | spec.add_dependency "bigdecimal" 22 | spec.add_dependency "parallel" 23 | spec.add_dependency "pg", ">= 0.18.2" 24 | spec.add_dependency "slop", ">= 4.10.1" 25 | spec.add_dependency "tty-spinner" 26 | end 27 | -------------------------------------------------------------------------------- /test/commands_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class CommandsTest < Minitest::Test 4 | def test_help 5 | assert_prints "Usage:", "-h" 6 | assert_prints "Usage:", "--help" 7 | end 8 | 9 | def test_version 10 | assert_prints PgSync::VERSION, "-v" 11 | assert_prints PgSync::VERSION, "--version" 12 | end 13 | 14 | def test_bad_option 15 | assert_error "unknown option", "--bad" 16 | end 17 | 18 | def test_list 19 | output = assert_works "--list", config: true 20 | assert_match "posts", output 21 | end 22 | 23 | def test_config_and_db 24 | assert_error "Specify either --db or --config, not both", "--db test --config .pgsync.yml" 25 | end 26 | 27 | def test_config_not_found 28 | assert_error "Config file not found: bad.yml", "--config bad.yml" 29 | end 30 | 31 | def test_config_absolute_path 32 | path = File.expand_path("test/support/config.yml") 33 | assert_works "--config #{path}" 34 | end 35 | 36 | def test_db_not_found 37 | assert_error "Config file not found: .pgsync-bad.yml", "--db bad" 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /test/data_rules_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class DataRulesTest < Minitest::Test 4 | def setup 5 | truncate_tables ["Users"] 6 | 7 | 2.times do 8 | insert(conn1, "Users", [{ 9 | "email" => "hi@example.org", 10 | "phone" => "555-555-5555", 11 | "token" => "token123", 12 | "attempts" => 1, 13 | "created_on" => Date.today, 14 | "updated_at" => Time.now, 15 | "ip" => "1.1.1.1", 16 | "name" => "Hi", 17 | "nonsense" => "Text", 18 | "untouchable" => "rock" 19 | }]) 20 | end 21 | end 22 | 23 | def test_rules 24 | assert_works "Users", config: true 25 | result = conn2.exec("SELECT * FROM \"Users\"").to_a 26 | row = result.first 27 | assert_equal "email#{row["Id"]}@example.org", row["email"] 28 | assert_equal "secret#{row["Id"]}", row["token"] 29 | assert row["ip"].end_with?("0.0.1") 30 | assert_equal 1, row["name"].size 31 | assert_equal "rock", row["untouchable"] 32 | end 33 | 34 | def test_no_rules 35 | assert_works "Users --no-rules", config: true 36 | result = conn2.exec("SELECT * FROM \"Users\"").to_a 37 | row = result.first 38 | assert_equal "hi@example.org", row["email"] 39 | assert_equal "555-555-5555", row["phone"] 40 | assert_equal "token123", row["token"] 41 | assert_equal 1, row["attempts"] 42 | assert_equal "1.1.1.1", row["ip"] 43 | assert_equal "Hi", row["name"] 44 | assert_equal "Text", row["nonsense"] 45 | assert_equal "rock", row["untouchable"] 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /test/data_source_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class DataSourceTest < Minitest::Test 4 | def test_no_source 5 | assert_error "No source", "" 6 | end 7 | 8 | def test_no_destination 9 | assert_error "No destination", "--from db1" 10 | end 11 | 12 | def test_source_command_error 13 | # https://github.com/oracle/truffleruby/issues/2568 14 | skip if RUBY_ENGINE == "truffleruby" 15 | 16 | assert_error "Command exited with non-zero status:\nexit 1", "--config test/support/bad.yml" 17 | end 18 | 19 | def test_source_command_not_run_with_option 20 | assert_works "--config test/support/bad.yml --from pgsync_test1" 21 | end 22 | 23 | def test_database 24 | assert_prints "From: pgsync_test1\nTo: pgsync_test2", "--from pgsync_test1 --to pgsync_test2" 25 | end 26 | 27 | def test_url 28 | assert_prints "From: pgsync_test1 on localhost:5432\nTo: pgsync_test2 on localhost:5432", "--from postgres://localhost/pgsync_test1 --to postgres://localhost/pgsync_test2" 29 | end 30 | 31 | # def test_destination_danger 32 | # assert_error "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1", "--from pgsync_test1 --to postgres://hostname/db2" 33 | # end 34 | 35 | def test_nonexistent_source 36 | assert_error "FATAL: database \"db1\" does not exist\n", "--from db1 --to pgsync_test2" 37 | end 38 | 39 | def test_nonexistent_destination 40 | assert_error "FATAL: database \"db2\" does not exist\n", "--from pgsync_test1 --to db2" 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/in_batches_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class InBatchesTest < Minitest::Test 4 | def setup 5 | truncate_tables ["posts"] 6 | end 7 | 8 | def test_works 9 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 10 | dest = [] 11 | expected = source 12 | assert_result("--in-batches --batch-size 1", source, dest, expected) 13 | end 14 | 15 | def test_existing_data 16 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 17 | dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}] 18 | expected = dest 19 | assert_result("--in-batches --batch-size 1", source, dest, expected) 20 | end 21 | 22 | def test_overwrite 23 | assert_error "Cannot use --overwrite with --in-batches", "posts --in-batches --overwrite", config: true 24 | end 25 | 26 | def test_multiple_tables 27 | assert_error "Cannot use --in-batches with multiple tables", "--in-batches", config: true 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /test/init_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class InitTest < Minitest::Test 4 | def test_works 5 | new_dir do 6 | assert_works "--init" 7 | assert_match "?sslmode=require", File.read(".pgsync.yml") 8 | end 9 | end 10 | 11 | def test_too_many_arguments 12 | assert_error "Usage:", "--init arg1 arg2" 13 | end 14 | 15 | def test_db_argument 16 | new_dir do 17 | assert_works "--init db2" 18 | assert File.exist?(".pgsync-db2.yml") 19 | end 20 | end 21 | 22 | def test_db_option 23 | new_dir do 24 | assert_works "--init --db db2" 25 | assert File.exist?(".pgsync-db2.yml") 26 | end 27 | end 28 | 29 | def test_config 30 | new_dir do 31 | assert_works "--init --config hi.yml" 32 | assert File.exist?("hi.yml") 33 | end 34 | end 35 | 36 | def test_django 37 | new_dir do 38 | File.write("manage.py", "django") 39 | assert_works "--init" 40 | assert_excludes "django_migrations" 41 | end 42 | end 43 | 44 | def test_heroku 45 | new_dir do 46 | system "git init --quiet" 47 | system "git remote add heroku https://git.heroku.com/test.git" 48 | assert_works "--init" 49 | assert_match "$(heroku config:get DATABASE_URL)?sslmode=require", File.read(".pgsync.yml") 50 | end 51 | end 52 | 53 | def test_laravel 54 | new_dir do 55 | File.write("artisan", "") 56 | assert_works "--init" 57 | assert_excludes "migrations" 58 | end 59 | end 60 | 61 | def test_rails 62 | new_dir do 63 | Dir.mkdir("bin") 64 | File.write("bin/rails", "") 65 | assert_works "--init" 66 | assert_excludes "ar_internal_metadata" 67 | assert_excludes "schema_migrations" 68 | end 69 | end 70 | 71 | def new_dir 72 | Dir.chdir(Dir.mktmpdir) do 73 | yield 74 | end 75 | end 76 | 77 | def assert_excludes(table) 78 | assert_match "- #{table}", File.read(".pgsync.yml") 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /test/schema_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class SchemaTest < Minitest::Test 4 | def setup 5 | conn3.exec(File.read("test/support/schema3.sql")) 6 | truncate(conn1, "posts") 7 | end 8 | 9 | def test_schema_only 10 | insert(conn1, "posts", [{"id" => 1}]) 11 | assert_equal [], tables(conn3) 12 | assert_works "--from pgsync_test1 --to pgsync_test3 --schema-only --all-schemas" 13 | assert_equal all_tables, tables(conn3) 14 | assert_equal [], conn3.exec("SELECT * FROM posts").to_a 15 | # make sure all_tables itself isn't broken 16 | assert all_tables.size >= 10 17 | end 18 | 19 | def test_schema_only_table 20 | assert_works "posts --from pgsync_test1 --to pgsync_test3 --schema-only" 21 | assert_equal ["public.posts"], tables(conn3) 22 | end 23 | 24 | def test_schema_only_exclude 25 | assert_works "--exclude Users --from pgsync_test1 --to pgsync_test3 --schema-only --all-schemas" 26 | assert_equal all_tables - ["public.Users"], tables(conn3) 27 | end 28 | 29 | def test_schema_only_schemas 30 | assert_works "--schemas other --from pgsync_test1 --to pgsync_test3 --schema-only" 31 | assert_equal all_tables.select { |t| t.start_with? "other." }, tables(conn3) 32 | assert_equal [], conn3.exec("SELECT id FROM other.pets").to_a 33 | end 34 | 35 | def test_schema_first 36 | insert(conn1, "posts", [{"id" => 1}]) 37 | assert_equal [], tables(conn3) 38 | assert_works "--from pgsync_test1 --to pgsync_test3 --schema-first --all-schemas" 39 | assert_equal all_tables, tables(conn3) 40 | assert_equal [{"id" => 1}], conn3.exec("SELECT id FROM posts").to_a 41 | end 42 | 43 | def all_tables 44 | tables(conn1) 45 | end 46 | 47 | def tables(conn) 48 | # sort in Ruby, as Postgres can return different order on different platforms 49 | query = <<~SQL 50 | SELECT 51 | table_schema || '.' || table_name AS table 52 | FROM 53 | information_schema.tables 54 | WHERE 55 | table_schema NOT IN ('information_schema', 'pg_catalog') 56 | SQL 57 | conn.exec(query).map { |v| v["table"] }.sort 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /test/support/bad.yml: -------------------------------------------------------------------------------- 1 | from: $(exit 1) 2 | to: pgsync_test2 3 | -------------------------------------------------------------------------------- /test/support/config.yml: -------------------------------------------------------------------------------- 1 | from: $(echo "pgsync_test1") 2 | to: pgsync_test2 3 | groups: 4 | group1: 5 | - Users 6 | variable: 7 | posts: "WHERE id = {1}" 8 | variable_id: 9 | posts: "WHERE id = {id}" 10 | group_with_excluded: 11 | - excluded 12 | exclude: 13 | - excluded 14 | - does_not_exist 15 | data_rules: 16 | email: unique_email 17 | phone: unique_phone 18 | token: unique_secret 19 | attempts: random_int 20 | created_on: random_date 21 | updated_at: random_time 22 | public.Users.ip: random_ip 23 | Users.name: random_letter 24 | nonsense: random_string 25 | -------------------------------------------------------------------------------- /test/support/schema1.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS public CASCADE; 2 | DROP SCHEMA IF EXISTS other CASCADE; 3 | 4 | CREATE SCHEMA public; 5 | CREATE SCHEMA other; 6 | 7 | DROP TYPE IF EXISTS mood; 8 | CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy'); 9 | CREATE TABLE "Users" ( 10 | "Id" SERIAL PRIMARY KEY, 11 | zip_code TEXT, 12 | email TEXT, 13 | phone TEXT, 14 | token TEXT, 15 | attempts INT, 16 | created_on DATE, 17 | updated_at TIMESTAMP, 18 | ip TEXT, 19 | name TEXT, 20 | nonsense TEXT, 21 | untouchable TEXT, 22 | "column_with_punctuation?" BOOLEAN, 23 | current_mood mood 24 | ); 25 | 26 | CREATE TABLE posts ( 27 | id SERIAL PRIMARY KEY, 28 | title TEXT 29 | ); 30 | 31 | CREATE TABLE comments ( 32 | id SERIAL PRIMARY KEY, 33 | post_id INTEGER REFERENCES posts(id) 34 | ); 35 | 36 | CREATE TABLE comments2 ( 37 | id SERIAL PRIMARY KEY, 38 | post_id INTEGER REFERENCES posts(id) 39 | ); 40 | 41 | CREATE TABLE books ( 42 | id SERIAL, 43 | id2 SERIAL, 44 | title TEXT, 45 | PRIMARY KEY (id, id2) 46 | ); 47 | 48 | CREATE TABLE authors ( 49 | first_name TEXT 50 | ); 51 | 52 | CREATE TABLE chapters ( 53 | pages INT 54 | ); 55 | 56 | CREATE TABLE stores ( 57 | name TEXT 58 | ); 59 | 60 | CREATE TABLE robots ( 61 | id SERIAL PRIMARY KEY, 62 | name TEXT 63 | ); 64 | 65 | CREATE TABLE excluded ( 66 | id SERIAL PRIMARY KEY 67 | ); 68 | 69 | CREATE TABLE other.pets ( 70 | id SERIAL PRIMARY KEY 71 | ); 72 | 73 | INSERT INTO other.pets 74 | VALUES 75 | (1), 76 | (2), 77 | (3); 78 | -------------------------------------------------------------------------------- /test/support/schema2.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS public CASCADE; 2 | DROP SCHEMA IF EXISTS other CASCADE; 3 | 4 | CREATE SCHEMA public; 5 | CREATE SCHEMA other; 6 | 7 | CREATE TABLE "Users" ( 8 | "Id" SERIAL PRIMARY KEY, 9 | email TEXT, 10 | phone TEXT, 11 | token TEXT, 12 | attempts INT, 13 | created_on DATE, 14 | updated_at TIMESTAMP, 15 | ip TEXT, 16 | name TEXT, 17 | nonsense TEXT, 18 | untouchable TEXT, 19 | "column_with_punctuation?" BOOLEAN 20 | ); 21 | 22 | CREATE TABLE posts ( 23 | id SERIAL PRIMARY KEY, 24 | title TEXT 25 | ); 26 | 27 | CREATE TABLE comments ( 28 | id SERIAL PRIMARY KEY, 29 | post_id INTEGER REFERENCES posts(id) DEFERRABLE 30 | ); 31 | 32 | CREATE TABLE comments2 ( 33 | id SERIAL PRIMARY KEY, 34 | post_id INTEGER REFERENCES posts(id) 35 | ); 36 | 37 | CREATE TABLE books ( 38 | id SERIAL, 39 | id2 SERIAL, 40 | title TEXT, 41 | PRIMARY KEY (id, id2) 42 | ); 43 | 44 | CREATE TABLE authors ( 45 | last_name TEXT 46 | ); 47 | 48 | CREATE TABLE chapters ( 49 | pages BIGINT 50 | ); 51 | 52 | CREATE TABLE robots ( 53 | id SERIAL PRIMARY KEY, 54 | name TEXT 55 | ); 56 | CREATE OR REPLACE FUNCTION nope() 57 | RETURNS trigger AS 58 | $$ 59 | BEGIN 60 | RAISE EXCEPTION 'Nope!'; 61 | END; 62 | $$ 63 | LANGUAGE plpgsql; 64 | CREATE TRIGGER nope_trigger BEFORE INSERT OR UPDATE ON robots FOR EACH ROW EXECUTE PROCEDURE nope(); 65 | 66 | CREATE TABLE excluded ( 67 | id SERIAL PRIMARY KEY 68 | ); 69 | 70 | CREATE TABLE other.pets ( 71 | id SERIAL PRIMARY KEY 72 | ); 73 | -------------------------------------------------------------------------------- /test/support/schema3.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS public CASCADE; 2 | DROP SCHEMA IF EXISTS other CASCADE; 3 | 4 | CREATE SCHEMA public; 5 | -------------------------------------------------------------------------------- /test/sync_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class SyncTest < Minitest::Test 4 | def setup 5 | truncate_tables ["posts", "comments", "books", "robots"] 6 | end 7 | 8 | def test_truncate 9 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 10 | dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}] 11 | expected = source 12 | assert_result("", source, dest, expected) 13 | end 14 | 15 | def test_overwrite 16 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 17 | dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}] 18 | expected = source + [dest[1]] 19 | assert_result("--overwrite", source, dest, expected) 20 | end 21 | 22 | def test_preserve 23 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 24 | dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}] 25 | expected = [dest[0]] + source[1..-1] + [dest[1]] 26 | assert_result("--preserve", source, dest, expected) 27 | end 28 | 29 | def test_where 30 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 31 | dest = [] 32 | expected = [source[0]] 33 | assert_result(" 'WHERE id = 1'", source, dest, expected) 34 | end 35 | 36 | def test_overwrite_multicolumn_primary_key 37 | source = [ 38 | {"id" => 1, "id2" => 1, "title" => "Post 1"}, 39 | {"id" => 1, "id2" => 2, "title" => "Post 2"}, 40 | {"id" => 1, "id2" => 3, "title" => "Post 3"} 41 | ] 42 | dest = [{"id" => 1, "id2" => 1, "title" => "First Post"}, {"id" => 1, "id2" => 4, "title" => "Post 4"}] 43 | expected = source + [dest[1]] 44 | assert_result("--overwrite", source, dest, expected, "books") 45 | end 46 | 47 | def test_preserve_multicolumn_primary_key 48 | source = [ 49 | {"id" => 1, "id2" => 1, "title" => "Post 1"}, 50 | {"id" => 1, "id2" => 2, "title" => "Post 2"}, 51 | {"id" => 2, "id2" => 4, "title" => "Post 3"} 52 | ] 53 | dest = [{"id" => 1, "id2" => 1, "title" => "First Post"}, {"id" => 3, "id2" => 4, "title" => "Post 4"}] 54 | expected = [dest[0]] + source[1..-1] + [dest[1]] 55 | assert_result("--preserve", source, dest, expected, "books") 56 | end 57 | 58 | def test_generated 59 | skip if server_version_num < 120000 60 | 61 | [conn1, conn2].each do |conn| 62 | conn.exec("DROP TABLE IF EXISTS shares") 63 | conn.exec <<~EOS 64 | CREATE TABLE shares ( 65 | id SERIAL PRIMARY KEY, 66 | gen integer GENERATED ALWAYS AS (id + 1) STORED 67 | ); 68 | EOS 69 | end 70 | 71 | source = 3.times.map { |i| {"id" => i + 1, "gen" => i + 2} } 72 | dest = [] 73 | expected = source 74 | assert_result("", source, dest, expected, "shares") 75 | 76 | truncate_tables ["shares"] 77 | assert_result("--overwrite", source, dest, expected, "shares") 78 | 79 | truncate_tables ["shares"] 80 | assert_result("--preserve", source, dest, expected, "shares") 81 | end 82 | 83 | def test_overwrite_no_primary_key 84 | assert_error "chapters (Primary key required for --overwrite)", "chapters --overwrite", config: true 85 | end 86 | 87 | def test_preserve_no_primary_key 88 | assert_error "chapters (Primary key required for --preserve)", "chapters --preserve", config: true 89 | end 90 | 91 | def test_no_shared_fields 92 | assert_prints "authors: No fields to copy", "authors", config: true 93 | end 94 | 95 | def test_missing_column 96 | assert_prints "Missing columns: current_mood, zip_code", "Users", config: true 97 | end 98 | 99 | def test_extra_column 100 | assert_prints "Extra columns: current_mood, zip_code", "Users --from pgsync_test2 --to pgsync_test1" 101 | end 102 | 103 | def test_different_column_types 104 | assert_prints "Different column types: pages (integer -> bigint)", "chapters", config: true 105 | end 106 | 107 | def test_notice 108 | skip if ENV["TRAVIS"] 109 | assert_prints "NOTICE: truncate cascades to table \"comments\"", "posts", config: true 110 | end 111 | 112 | def test_defer_constraints_v1 113 | insert(conn1, "posts", [{"id" => 1}]) 114 | insert(conn1, "comments", [{"post_id" => 1}]) 115 | assert_error "Sync failed for 1 table: comments", "comments,posts --jobs 1", config: true 116 | assert_works "comments,posts --defer-constraints-v1", config: true 117 | assert_works "comments,posts --defer-constraints-v1 --overwrite", config: true 118 | assert_works "comments,posts --defer-constraints-v1 --preserve", config: true 119 | assert_equal [{"id" => 1}], conn2.exec("SELECT id FROM posts ORDER BY id").to_a 120 | assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a 121 | end 122 | 123 | def test_defer_constraints_v1_not_deferrable 124 | insert(conn1, "posts", [{"id" => 1}]) 125 | insert(conn1, "comments2", [{"post_id" => 1}]) 126 | assert_prints "Non-deferrable constraints: comments2_post_id_fkey", "comments2,posts --defer-constraints-v1", config: true 127 | assert_error "violates foreign key constraint", "comments2,posts --defer-constraints-v1", config: true 128 | end 129 | 130 | def test_defer_constraints 131 | insert(conn1, "posts", [{"id" => 1}]) 132 | insert(conn1, "comments", [{"post_id" => 1}]) 133 | assert_error "Sync failed for 1 table: comments", "comments,posts --jobs 1", config: true 134 | assert_works "comments,posts --defer-constraints", config: true 135 | assert_works "comments,posts --defer-constraints --overwrite", config: true 136 | assert_works "comments,posts --defer-constraints --preserve", config: true 137 | assert_equal [{"id" => 1}], conn2.exec("SELECT id FROM posts ORDER BY id").to_a 138 | assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a 139 | end 140 | 141 | def test_defer_constraints_not_deferrable 142 | insert(conn1, "posts", [{"id" => 1}]) 143 | insert(conn1, "comments2", [{"post_id" => 1}]) 144 | assert_error "Sync failed for 1 table: comments2", "comments2,posts --jobs 1", config: true 145 | assert_works "comments2,posts --defer-constraints", config: true 146 | assert_works "comments2,posts --defer-constraints --overwrite", config: true 147 | assert_works "comments2,posts --defer-constraints --preserve", config: true 148 | assert_equal [{"id" => 1}], conn2.exec("SELECT id FROM posts ORDER BY id").to_a 149 | assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments2 ORDER BY post_id").to_a 150 | end 151 | 152 | def test_disable_user_triggers 153 | insert(conn1, "robots", [{"name" => "Test"}]) 154 | assert_error "Sync failed for 1 table: robots", "robots", config: true 155 | assert_works "robots --disable-user-triggers", config: true 156 | assert_equal [{"name" => "Test"}], conn2.exec("SELECT name FROM robots ORDER BY id").to_a 157 | end 158 | 159 | def test_disable_user_triggers_defer_constraints 160 | insert(conn1, "robots", [{"name" => "Test"}]) 161 | assert_works "robots --disable-user-triggers --defer-constraints", config: true 162 | assert_equal [{"name" => "Test"}], conn2.exec("SELECT name FROM robots ORDER BY id").to_a 163 | end 164 | 165 | def test_disable_integrity 166 | insert(conn1, "posts", [{"id" => 1}]) 167 | insert(conn1, "comments", [{"post_id" => 1}]) 168 | assert_error "Sync failed for 1 table: comments", "comments", config: true 169 | assert_works "comments --disable-integrity", config: true 170 | # integrity is lost! (as expected) 171 | assert_equal [], conn2.exec("SELECT * FROM posts ORDER BY id").to_a 172 | assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a 173 | end 174 | 175 | def test_disable_integrity_v2 176 | insert(conn1, "posts", [{"id" => 1}]) 177 | insert(conn1, "comments", [{"post_id" => 1}]) 178 | assert_error "Sync failed for 1 table: comments", "comments", config: true 179 | assert_works "comments --disable-integrity-v2", config: true 180 | # integrity is lost! (as expected) 181 | assert_equal [], conn2.exec("SELECT * FROM posts ORDER BY id").to_a 182 | assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a 183 | end 184 | end 185 | -------------------------------------------------------------------------------- /test/tables_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class TablesTest < Minitest::Test 4 | def test_default 5 | tables = list_tables 6 | assert_includes tables, "posts" 7 | refute_includes tables, "other.pets" 8 | refute_includes tables, "excluded" 9 | end 10 | 11 | def test_wildcard 12 | tables = list_tables("p*") 13 | assert_includes tables, "posts" 14 | refute_includes tables, "other.pets" 15 | refute_includes tables, "excluded" 16 | end 17 | 18 | def test_wildcard_all_schemas 19 | tables = list_tables("p* --all-schemas") 20 | assert_includes tables, "posts" 21 | assert_includes tables, "other.pets" 22 | end 23 | 24 | def test_wildcard_schemas 25 | tables = list_tables("p* --schemas public") 26 | assert_includes tables, "posts" 27 | refute_includes tables, "other.pets" 28 | end 29 | 30 | def test_all_schemas 31 | tables = list_tables("--all-schemas") 32 | assert_includes tables, "posts" 33 | assert_includes tables, "other.pets" 34 | refute_includes tables, "excluded" 35 | end 36 | 37 | def test_schemas 38 | tables = list_tables("--schemas public") 39 | assert_includes tables, "posts" 40 | refute_includes tables, "other.pets" 41 | refute_includes tables, "excluded" 42 | end 43 | 44 | def test_exclude_wildcard 45 | tables = list_tables("--exclude p*") 46 | refute_includes tables, "posts" 47 | assert_includes tables, "comments" 48 | end 49 | 50 | def test_exclude_overrides_config 51 | tables = list_tables("--exclude posts") 52 | refute_includes tables, "posts" 53 | assert_includes tables, "excluded" 54 | end 55 | 56 | def test_exclude_not_applied_to_groups 57 | tables = list_tables("group_with_excluded") 58 | assert_includes tables, "excluded" 59 | end 60 | 61 | def test_tables_option 62 | tables = list_tables("--tables posts") 63 | assert_includes tables, "posts" 64 | end 65 | 66 | def test_tables_option_group 67 | assert_error "Table not found in source: group1", "--tables group1", config: true 68 | end 69 | 70 | def test_groups_option 71 | tables = list_tables("--groups group1") 72 | assert_includes tables, "Users" 73 | end 74 | 75 | def test_groups_option_table 76 | assert_error "Group not found: posts", "--groups posts", config: true 77 | end 78 | 79 | def test_table_unknown 80 | assert_error "Table not found in source: bad", "bad", config: true 81 | end 82 | 83 | def test_table_invalid 84 | assert_error "Cannot resolve table: bad.bad.bad", "bad.bad.bad", config: true 85 | end 86 | 87 | def list_tables(command = "") 88 | output = assert_works("--list #{command}", config: true) 89 | output.split("\n")[2..-1] 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "bundler/setup" 2 | Bundler.require(:default) 3 | require "minitest/autorun" 4 | require "minitest/pride" 5 | require "pg" 6 | require "shellwords" 7 | require "tmpdir" 8 | require "open3" 9 | 10 | def connect(dbname) 11 | conn = PG::Connection.open(dbname: dbname) 12 | conn.exec("SET client_min_messages TO WARNING") 13 | conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn) 14 | conn.exec(File.read("test/support/schema#{dbname[-1]}.sql")) 15 | conn 16 | end 17 | 18 | def conn1 19 | @conn1 ||= connect("pgsync_test1") 20 | end 21 | 22 | def conn2 23 | @conn2 ||= connect("pgsync_test2") 24 | end 25 | 26 | def conn3 27 | @conn3 ||= connect("pgsync_test3") 28 | end 29 | 30 | [conn1, conn2, conn3] # setup schema 31 | 32 | class Minitest::Test 33 | def verbose? 34 | ENV["VERBOSE"] 35 | end 36 | 37 | # shelling out for each test is slower 38 | # but it prevents forking from messing up connections 39 | def run_command(command, config: false) 40 | command += " --config test/support/config.yml" if config 41 | if verbose? 42 | puts 43 | puts "$ pgsync #{command}" 44 | end 45 | exe = File.expand_path("../exe/pgsync", __dir__) 46 | output, status = Open3.capture2e(exe, *Shellwords.split(command)) 47 | puts output if verbose? 48 | [output, status] 49 | end 50 | 51 | def assert_works(command, **options) 52 | output, status = run_command(command, **options) 53 | assert status.success?, "Command failed" 54 | output 55 | end 56 | 57 | def assert_error(message, command, **options) 58 | output, status = run_command(command, **options) 59 | assert !status.success? 60 | assert_match message, output 61 | end 62 | 63 | def assert_prints(message, command, **options) 64 | output, _ = run_command(command, **options) 65 | assert_match message, output 66 | end 67 | 68 | def truncate(conn, table) 69 | conn.exec("TRUNCATE #{quote_ident(table)} CASCADE") 70 | end 71 | 72 | def truncate_tables(tables) 73 | [conn1, conn2].each do |conn| 74 | tables.each do |table| 75 | truncate(conn, table) 76 | end 77 | end 78 | end 79 | 80 | def insert(conn, table, rows) 81 | return if rows.empty? 82 | 83 | keys = rows.flat_map { |r| r.keys }.uniq 84 | values = rows.map { |r| keys.map { |k| r[k] } } 85 | 86 | key_str = keys.map { |k| quote_ident(k) }.join(", ") 87 | params_str = values.size.times.map { |i| "(" + keys.size.times.map { |j| "$#{i * keys.size + j + 1}" }.join(", ") + ")" }.join(", ") 88 | insert_str = "INSERT INTO #{quote_ident(table)} (#{key_str}) VALUES #{params_str}" 89 | conn.exec_params(insert_str, values.flatten) 90 | end 91 | 92 | def quote_ident(ident) 93 | PG::Connection.quote_ident(ident) 94 | end 95 | 96 | def assert_result(command, source, dest, expected, table = "posts") 97 | insert(conn1, table, source.map(&:dup).map { |v| v.delete("gen"); v }) 98 | insert(conn2, table, dest) 99 | 100 | assert_equal source, conn1.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a 101 | assert_equal dest, conn2.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a 102 | 103 | assert_works "#{table} #{command}", config: true 104 | 105 | assert_equal source, conn1.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a 106 | assert_equal expected, conn2.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a 107 | end 108 | 109 | def server_version_num 110 | conn1.exec("SHOW server_version_num").first["server_version_num"].to_i 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /test/variable_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class VariableTest < Minitest::Test 4 | def setup 5 | truncate_tables ["posts"] 6 | end 7 | 8 | def test_number 9 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 10 | expected = [source[1]] 11 | 12 | insert(conn1, "posts", source) 13 | assert_works "variable:2", config: true 14 | assert_equal expected, conn2.exec("SELECT * FROM posts ORDER BY 1, 2").to_a 15 | end 16 | 17 | def test_id 18 | source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} } 19 | expected = [source[1]] 20 | 21 | insert(conn1, "posts", source) 22 | assert_works "variable_id:2", config: true 23 | assert_equal expected, conn2.exec("SELECT * FROM posts ORDER BY 1, 2").to_a 24 | end 25 | 26 | def test_missing 27 | assert_error "Missing variables: 1", "variable", config: true 28 | end 29 | 30 | def test_table 31 | assert_error "Cannot use parameters with tables", "posts:123", config: true 32 | end 33 | end 34 | --------------------------------------------------------------------------------