├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   └── build.yml
├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── config.yml
├── exe
    └── pgsync
├── lib
    ├── pgsync.rb
    └── pgsync
    │   ├── client.rb
    │   ├── data_source.rb
    │   ├── init.rb
    │   ├── schema_sync.rb
    │   ├── sequence.rb
    │   ├── sync.rb
    │   ├── table.rb
    │   ├── table_sync.rb
    │   ├── task.rb
    │   ├── task_resolver.rb
    │   ├── utils.rb
    │   └── version.rb
├── pgsync.gemspec
└── test
    ├── commands_test.rb
    ├── data_rules_test.rb
    ├── data_source_test.rb
    ├── in_batches_test.rb
    ├── init_test.rb
    ├── schema_test.rb
    ├── support
        ├── bad.yml
        ├── config.yml
        ├── schema1.sql
        ├── schema2.sql
        └── schema3.sql
    ├── sync_test.rb
    ├── tables_test.rb
    ├── test_helper.rb
    └── variable_test.rb


/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Hi,
 2 | 
 3 | Please use this script to show what you're trying to do when possible. Thanks!
 4 | 
 5 | ```sh
 6 | createdb pgsync_from
 7 | createdb pgsync_to
 8 | 
 9 | psql pgsync_from << SQL
10 | CREATE TABLE posts (
11 |   id BIGINT PRIMARY KEY,
12 |   name TEXT
13 | );
14 | INSERT INTO posts VALUES (1, 'hello'), (2, 'world');
15 | SQL
16 | 
17 | psql pgsync_to << SQL
18 | CREATE TABLE posts (
19 |   id BIGINT PRIMARY KEY,
20 |   name TEXT
21 | );
22 | SQL
23 | 
24 | pgsync --from pgsync_from --to pgsync_to
25 | ```
26 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     strategy:
 7 |       fail-fast: false
 8 |       matrix:
 9 |         include:
10 |           - ruby: 3.4
11 |             postgres: 17
12 |           - ruby: 2.7
13 |             postgres: 9.6
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - uses: ruby/setup-ruby@v1
17 |         with:
18 |           ruby-version: ${{ matrix.ruby }}
19 |           bundler-cache: true
20 |       - uses: ankane/setup-postgres@v1
21 |         with:
22 |           postgres-version: ${{ matrix.postgres }}
23 |       - run: |
24 |           createdb pgsync_test1
25 |           createdb pgsync_test2
26 |           createdb pgsync_test3
27 |       - run: bundle exec rake test
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /Gemfile.lock
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | /.pgsync.yml
11 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ## 0.8.0 (2024-07-10)
  2 | 
  3 | - Added Docker image for `linux/arm64`
  4 | - Fixed warning with Ruby 3.3
  5 | - Dropped support for Ruby < 2.7
  6 | 
  7 | ## 0.7.4 (2023-03-06)
  8 | 
  9 | - Fixed issue with slop 4.10.0
 10 | 
 11 | ## 0.7.3 (2022-11-09)
 12 | 
 13 | - Fixed issue with pg 1.4.4
 14 | - Fixed output when `pg_restore` not found
 15 | 
 16 | ## 0.7.2 (2022-09-19)
 17 | 
 18 | - Improved error message when a primary key is required
 19 | - Switched to monotonic time
 20 | - Fixed schema sync with Homebrew Postgres 14.5
 21 | 
 22 | ## 0.7.1 (2022-07-06)
 23 | 
 24 | - Fixed random letter data rule generating non-letter
 25 | 
 26 | ## 0.7.0 (2022-03-10)
 27 | 
 28 | - Changed `--defer-constraints` to `--defer-constraints-v1`
 29 | - Changed `--defer-constraints-v2` to `--defer-constraints`
 30 | - Fixed unknown alias error with Ruby 3.1
 31 | - Dropped support for Ruby < 2.5
 32 | 
 33 | ## 0.6.8 (2021-09-21)
 34 | 
 35 | - Fixed error when schema missing in destination with `--schema-first` and `--schema-only`
 36 | 
 37 | ## 0.6.7 (2021-04-26)
 38 | 
 39 | - Fixed connection security for `--schema-first` and `--schema-only` - [more info](https://github.com/ankane/pgsync/issues/121)
 40 | 
 41 | ## 0.6.6 (2020-10-29)
 42 | 
 43 | - Added support for tables with generated columns
 44 | 
 45 | ## 0.6.5 (2020-07-10)
 46 | 
 47 | - Improved help
 48 | 
 49 | ## 0.6.4 (2020-06-10)
 50 | 
 51 | - Log SQL with `--debug` option
 52 | - Improved sequence queries
 53 | 
 54 | ## 0.6.3 (2020-06-09)
 55 | 
 56 | - Added `--defer-constraints-v2` option
 57 | - Ensure consistent source snapshot with `--disable-integrity`
 58 | 
 59 | ## 0.6.2 (2020-06-09)
 60 | 
 61 | - Added support for `--disable-integrity` on Amazon RDS
 62 | - Fixed error when excluded table not found in source
 63 | 
 64 | ## 0.6.1 (2020-06-07)
 65 | 
 66 | - Added Django and Laravel integrations
 67 | 
 68 | ## 0.6.0 (2020-06-07)
 69 | 
 70 | - Added messages for different column types and non-deferrable constraints
 71 | - Added support for wildcards to `--exclude`
 72 | - Improved `--overwrite` and `--preserve` options for foreign keys
 73 | - Improved output for schema sync
 74 | - Fixed `--overwrite` and `--preserve` options for multicolumn primary keys
 75 | - Fixed output for notices
 76 | 
 77 | Breaking
 78 | 
 79 | - Syncs shared tables instead of raising an error when tables missing in destination
 80 | - Raise an error when `--config` or `--db` option provided and config not found
 81 | - Removed deprecated options
 82 | - Dropped support for Postgres < 9.5
 83 | 
 84 | ## 0.5.5 (2020-05-13)
 85 | 
 86 | - Added `--jobs` option
 87 | - Added `--defer-constraints` option
 88 | - Added `--disable-user-triggers` option
 89 | - Added `--disable-integrity` option
 90 | - Improved error message for older libpq
 91 | 
 92 | ## 0.5.4 (2020-05-09)
 93 | 
 94 | - Fixed output for `--in-batches`
 95 | 
 96 | ## 0.5.3 (2020-04-03)
 97 | 
 98 | - Improved Postgres error messages
 99 | - Fixed behavior of wildcard without schema
100 | 
101 | ## 0.5.2 (2020-03-27)
102 | 
103 | - Added `--fail-fast` option
104 | - Automatically exclude tables when `--init` run inside Rails app
105 | - Improved error message
106 | - Fixed typo in error message
107 | 
108 | ## 0.5.1 (2020-03-26)
109 | 
110 | - Fixed Slop warning with Ruby 2.7
111 | 
112 | ## 0.5.0 (2020-03-26)
113 | 
114 | - Improved output when syncing
115 | - Improved output on interrupt
116 | - Added `--no-sequences` option
117 | 
118 | ## 0.4.3 (2019-10-27)
119 | 
120 | - Added `sslmode` to template
121 | 
122 | ## 0.4.2 (2019-10-27)
123 | 
124 | - Improved flexibility of commands
125 | - Sync all objects when no tables specified
126 | 
127 | ## 0.4.1 (2018-12-15)
128 | 
129 | - Made `psql` version check more robust
130 | - Fixed issue with non-lowercase primary key
131 | - Prefer `--init` over `--setup`
132 | - Improved data rules
133 | 
134 | ## 0.4.0 (2018-02-28)
135 | 
136 | - Sync all schemas in search path by default
137 | - Added support for socket connections
138 | - Added support for environment variables
139 | 
140 | ## 0.3.9 (2018-02-27)
141 | 
142 | - Better support for schemas
143 | - Added `--schemas` option
144 | - Added `--all-schemas` option
145 | - Added `--schema-first` option
146 | - Fixed issue with non-lowercase tables and partial syncs
147 | 
148 | ## 0.3.8 (2017-10-01)
149 | 
150 | - Added Windows support
151 | - Added `random_string` and `random_number` replacement options
152 | - Improved performance of `--in-batches` for large tables
153 | 
154 | ## 0.3.7 (2017-08-30)
155 | 
156 | - Fixed non-lowercase tables and columns
157 | - Fixed `--truncate` option with `--in-batches`
158 | 
159 | ## 0.3.6 (2016-10-02)
160 | 
161 | - Fixed `Table does not exist in source` error
162 | 
163 | ## 0.3.5 (2016-07-23)
164 | 
165 | - Support schemas other than public
166 | 
167 | ## 0.3.4 (2016-04-29)
168 | 
169 | - Added `--in-batches` mode for production transfers with `--batch-size` and `--sleep`
170 | 
171 | ## 0.3.3 (2016-04-25)
172 | 
173 | - Added `-d` option as an alias for `--db`
174 | - Added support for wildcard tables
175 | - Fixed `--schema-only` errors
176 | 
177 | ## 0.3.2 (2016-04-19)
178 | 
179 | - Prefer `{1}` for interpolation
180 | - Added `--overwrite` option
181 | - Deprecated `--where` and `--limit`
182 | 
183 | ## 0.3.1 (2016-04-06)
184 | 
185 | - Added `-t` or `--tables`, `-g` or `--groups` options
186 | - Deprecated `tables`, `groups`, and `setup` commands
187 | 
188 | ## 0.3.0 (2016-04-06)
189 | 
190 | - More powerful groups
191 | - Overwrite rows by default when `WHERE` clause (previously truncated)
192 | - Added `pgsync users "WHERE id = 1`
193 | - Added `pgsync group1`, shorthand for `pgsync groups group1`
194 | - Added `--schema-only` option
195 | - Added `--no-rules` option
196 | - Added `--setup` option
197 | - Added `--truncate` option
198 | 
199 | ## 0.2.4 (2016-04-04)
200 | 
201 | - Added `--preserve` option
202 | - Added `--list` option for groups and tables
203 | - Added `--limit` option
204 | 
205 | ## 0.2.3 (2016-03-30)
206 | 
207 | - Fixed `no PostgreSQL user name specified in startup packet`
208 | 
209 | ## 0.2.2 (2016-03-28)
210 | 
211 | - Added `--debug` option
212 | 
213 | ## 0.2.1 (2016-03-27)
214 | 
215 | - Do not require config file
216 | 
217 | ## 0.2.0 (2016-03-26)
218 | 
219 | - Fixed idle transaction timeout errors - respects `statement_timeout` as a result
220 | - Raise error when command exits with non-zero status
221 | 
222 | ## 0.1.1 (2016-03-23)
223 | 
224 | - Better support for multiple databases
225 | - Search parent directories for config file
226 | 
227 | ## 0.1.0 (2015-12-07)
228 | 
229 | - First release
230 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ruby:3-alpine
 2 | 
 3 | LABEL org.opencontainers.image.authors="Andrew Kane <andrew@ankane.org>"
 4 | 
 5 | RUN apk add --update build-base libpq-dev postgresql-client && \
 6 |     gem install pgsync && \
 7 |     apk del build-base && \
 8 |     rm -rf /var/cache/apk/*
 9 | 
10 | ENTRYPOINT ["pgsync"]
11 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | gemspec
4 | 
5 | gem "minitest", ">= 5"
6 | gem "rake"
7 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015-2023 Andrew Kane
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pgsync
  2 | 
  3 | Sync data from one Postgres database to another (like `pg_dump`/`pg_restore`). Designed for:
  4 | 
  5 | - **speed** - tables are transferred in parallel
  6 | - **security** - built-in methods to prevent sensitive data from ever leaving the server
  7 | - **flexibility** - gracefully handles schema differences, like missing columns and extra columns
  8 | - **convenience** - sync partial tables, groups of tables, and related records
  9 | 
 10 | :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource)
 11 | 
 12 | [![Build Status](https://github.com/ankane/pgsync/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/pgsync/actions)
 13 | 
 14 | ## Installation
 15 | 
 16 | pgsync is a command line tool. To install, run:
 17 | 
 18 | ```sh
 19 | gem install pgsync
 20 | ```
 21 | 
 22 | This will give you the `pgsync` command. If installation fails, you may need to install [dependencies](#dependencies).
 23 | 
 24 | You can also install it with Homebrew:
 25 | 
 26 | ```sh
 27 | brew install pgsync
 28 | ```
 29 | 
 30 | ## Setup
 31 | 
 32 | In your project directory, run:
 33 | 
 34 | ```sh
 35 | pgsync --init
 36 | ```
 37 | 
 38 | This creates `.pgsync.yml` for you to customize. We recommend checking this into your version control (assuming it doesn’t contain sensitive information). `pgsync` commands can be run from this directory or any subdirectory.
 39 | 
 40 | ## How to Use
 41 | 
 42 | First, make sure your schema is set up in both databases. We recommend using a schema migration tool for this, but pgsync also provides a few [convenience methods](#schema). Once that’s done, you’re ready to sync data.
 43 | 
 44 | Sync tables
 45 | 
 46 | ```sh
 47 | pgsync
 48 | ```
 49 | 
 50 | Sync specific tables
 51 | 
 52 | ```sh
 53 | pgsync table1,table2
 54 | ```
 55 | 
 56 | Works with wildcards as well
 57 | 
 58 | ```sh
 59 | pgsync "table*"
 60 | ```
 61 | 
 62 | Sync specific rows (existing rows are overwritten)
 63 | 
 64 | ```sh
 65 | pgsync products "where store_id = 1"
 66 | ```
 67 | 
 68 | You can also preserve existing rows
 69 | 
 70 | ```sh
 71 | pgsync products "where store_id = 1" --preserve
 72 | ```
 73 | 
 74 | Or truncate them
 75 | 
 76 | ```sh
 77 | pgsync products "where store_id = 1" --truncate
 78 | ```
 79 | 
 80 | ## Tables
 81 | 
 82 | Exclude specific tables
 83 | 
 84 | ```sh
 85 | pgsync --exclude table1,table2
 86 | ```
 87 | 
 88 | Add to `.pgsync.yml` to exclude by default
 89 | 
 90 | ```yml
 91 | exclude:
 92 |   - table1
 93 |   - table2
 94 | ```
 95 | 
 96 | Sync tables from all schemas or specific schemas (by default, only the search path is synced)
 97 | 
 98 | ```sh
 99 | pgsync --all-schemas
100 | # or
101 | pgsync --schemas public,other
102 | # or
103 | pgsync public.table1,other.table2
104 | ```
105 | 
106 | ## Groups
107 | 
108 | Define groups in `.pgsync.yml`:
109 | 
110 | ```yml
111 | groups:
112 |   group1:
113 |     - table1
114 |     - table2
115 | ```
116 | 
117 | And run:
118 | 
119 | ```sh
120 | pgsync group1
121 | ```
122 | 
123 | ## Variables
124 | 
125 | You can also use groups to sync a specific record and associated records in other tables.
126 | 
127 | To get product `123` with its reviews, last 10 coupons, and store, use:
128 | 
129 | ```yml
130 | groups:
131 |   product:
132 |     products: "where id = {1}"
133 |     reviews: "where product_id = {1}"
134 |     coupons: "where product_id = {1} order by created_at desc limit 10"
135 |     stores: "where id in (select store_id from products where id = {1})"
136 | ```
137 | 
138 | And run:
139 | 
140 | ```sh
141 | pgsync product:123
142 | ```
143 | 
144 | ## Schema
145 | 
146 | Sync the schema before the data (this wipes out existing data)
147 | 
148 | ```sh
149 | pgsync --schema-first
150 | ```
151 | 
152 | Specify tables
153 | 
154 | ```sh
155 | pgsync table1,table2 --schema-first
156 | ```
157 | 
158 | Sync the schema without data (this wipes out existing data)
159 | 
160 | ```sh
161 | pgsync --schema-only
162 | ```
163 | 
164 | pgsync does not try to sync Postgres extensions.
165 | 
166 | ## Sensitive Data
167 | 
168 | Prevent sensitive data like email addresses from leaving the remote server.
169 | 
170 | Define rules in `.pgsync.yml`:
171 | 
172 | ```yml
173 | data_rules:
174 |   email: unique_email
175 |   last_name: random_letter
176 |   birthday: random_date
177 |   users.auth_token:
178 |     value: secret
179 |   visits_count:
180 |     statement: "(RANDOM() * 10)::int"
181 |   encrypted_*: null
182 | ```
183 | 
184 | `last_name` matches all columns named `last_name` and `users.last_name` matches only the users table. Wildcards are supported, and the first matching rule is applied.
185 | 
186 | Options for replacement are:
187 | 
188 | - `unique_email`
189 | - `unique_phone`
190 | - `unique_secret`
191 | - `random_letter`
192 | - `random_int`
193 | - `random_date`
194 | - `random_time`
195 | - `random_ip`
196 | - `value`
197 | - `statement`
198 | - `null`
199 | - `untouched`
200 | 
201 | Rules starting with `unique_` require the table to have a single column primary key. `unique_phone` requires a numeric primary key.
202 | 
203 | ## Foreign Keys
204 | 
205 | Foreign keys can make it difficult to sync data. Three options are:
206 | 
207 | 1. Defer constraints (recommended)
208 | 2. Manually specify the order of tables
209 | 3. Disable foreign key triggers, which can silently break referential integrity (not recommended)
210 | 
211 | To defer constraints, use:
212 | 
213 | ```sh
214 | pgsync --defer-constraints
215 | ```
216 | 
217 | To manually specify the order of tables, use `--jobs 1` so tables are synced one-at-a-time.
218 | 
219 | ```sh
220 | pgsync table1,table2,table3 --jobs 1
221 | ```
222 | 
223 | To disable foreign key triggers and potentially break referential integrity, use:
224 | 
225 | ```sh
226 | pgsync --disable-integrity
227 | ```
228 | 
229 | This requires superuser privileges on the `to` database. If syncing to (not from) Amazon RDS, use the `rds_superuser` role. If syncing to (not from) Heroku, there doesn’t appear to be a way to disable integrity.
230 | 
231 | ## Triggers
232 | 
233 | Disable user triggers with:
234 | 
235 | ```sh
236 | pgsync --disable-user-triggers
237 | ```
238 | 
239 | ## Sequences
240 | 
241 | Skip syncing sequences with:
242 | 
243 | ```sh
244 | pgsync --no-sequences
245 | ```
246 | 
247 | ## Append-Only Tables
248 | 
249 | For extremely large, append-only tables, sync in batches.
250 | 
251 | ```sh
252 | pgsync large_table --in-batches
253 | ```
254 | 
255 | Note: This requires the table to have a numeric, increasing primary key
256 | 
257 | The script will resume where it left off when run again, making it great for backfills.
258 | 
259 | ## Connection Security
260 | 
261 | Always make sure your [connection is secure](https://ankane.org/postgres-sslmode-explained) when connecting to a database over a network you don’t fully trust. Your best option is to connect over SSH or a VPN. Another option is to use `sslmode=verify-full`. If you don’t do this, your database credentials can be compromised.
262 | 
263 | ## Safety
264 | 
265 | To keep you from accidentally overwriting production, the destination is limited to `localhost` or `127.0.0.1` by default.
266 | 
267 | To use another host, add `to_safe: true` to your `.pgsync.yml`.
268 | 
269 | ## Multiple Databases
270 | 
271 | To use with multiple databases, run:
272 | 
273 | ```sh
274 | pgsync --init db2
275 | ```
276 | 
277 | This creates `.pgsync-db2.yml` for you to edit. Specify a database in commands with:
278 | 
279 | ```sh
280 | pgsync --db db2
281 | ```
282 | 
283 | ## Integrations
284 | 
285 | - [Django](#django)
286 | - [Heroku](#heroku)
287 | - [Laravel](#laravel)
288 | - [Rails](#rails)
289 | 
290 | ### Django
291 | 
292 | If you run `pgsync --init` in a Django project, migrations will be excluded in `.pgsync.yml`.
293 | 
294 | ```yml
295 | exclude:
296 |   - django_migrations
297 | ```
298 | 
299 | ### Heroku
300 | 
301 | If you run `pgsync --init` in a Heroku project, the `from` database will be set in `.pgsync.yml`.
302 | 
303 | ```yml
304 | from: $(heroku config:get DATABASE_URL)?sslmode=require
305 | ```
306 | 
307 | ### Laravel
308 | 
309 | If you run `pgsync --init` in a Laravel project, migrations will be excluded in `.pgsync.yml`.
310 | 
311 | ```yml
312 | exclude:
313 |   - migrations
314 | ```
315 | 
316 | ### Rails
317 | 
318 | If you run `pgsync --init` in a Rails project, Active Record metadata and schema migrations will be excluded in `.pgsync.yml`.
319 | 
320 | ```yml
321 | exclude:
322 |   - ar_internal_metadata
323 |   - schema_migrations
324 | ```
325 | 
326 | ## Debugging
327 | 
328 | To view the SQL that’s run, use:
329 | 
330 | ```sh
331 | pgsync --debug
332 | ```
333 | 
334 | ## Other Commands
335 | 
336 | Help
337 | 
338 | ```sh
339 | pgsync --help
340 | ```
341 | 
342 | Version
343 | 
344 | ```sh
345 | pgsync --version
346 | ```
347 | 
348 | List tables
349 | 
350 | ```sh
351 | pgsync --list
352 | ```
353 | 
354 | ## Scripts
355 | 
356 | Use groups when possible to take advantage of parallelism.
357 | 
358 | For Ruby scripts, you may need to do:
359 | 
360 | ```rb
361 | Bundler.with_unbundled_env do
362 |   system "pgsync ..."
363 | end
364 | ```
365 | 
366 | ## Docker
367 | 
368 | Get the [Docker image](https://hub.docker.com/r/ankane/pgsync) with:
369 | 
370 | ```sh
371 | docker pull ankane/pgsync
372 | alias pgsync="docker run -ti ankane/pgsync"
373 | ```
374 | 
375 | This will give you the `pgsync` command.
376 | 
377 | ## Dependencies
378 | 
379 | If installation fails, your system may be missing Ruby or libpq.
380 | 
381 | On Mac, run:
382 | 
383 | ```sh
384 | brew install libpq
385 | ```
386 | 
387 | On Ubuntu, run:
388 | 
389 | ```sh
390 | sudo apt-get install ruby-dev libpq-dev build-essential
391 | ```
392 | 
393 | ## Upgrading
394 | 
395 | Run:
396 | 
397 | ```sh
398 | gem install pgsync
399 | ```
400 | 
401 | To use master, run:
402 | 
403 | ```sh
404 | gem install specific_install
405 | gem specific_install https://github.com/ankane/pgsync.git
406 | ```
407 | 
408 | With Homebrew, run:
409 | 
410 | ```sh
411 | brew upgrade pgsync
412 | ```
413 | 
414 | With Docker, run:
415 | 
416 | ```sh
417 | docker pull ankane/pgsync
418 | ```
419 | 
420 | ## Related Projects
421 | 
422 | Also check out:
423 | 
424 | - [Dexter](https://github.com/ankane/dexter) - The automatic indexer for Postgres
425 | - [PgHero](https://github.com/ankane/pghero) - A performance dashboard for Postgres
426 | - [pgslice](https://github.com/ankane/pgslice) - Postgres partitioning as easy as pie
427 | 
428 | ## Thanks
429 | 
430 | Inspired by [heroku-pg-transfer](https://github.com/ddollar/heroku-pg-transfer).
431 | 
432 | ## History
433 | 
434 | View the [changelog](https://github.com/ankane/pgsync/blob/master/CHANGELOG.md)
435 | 
436 | ## Contributing
437 | 
438 | Everyone is encouraged to help improve this project. Here are a few ways you can help:
439 | 
440 | - [Report bugs](https://github.com/ankane/pgsync/issues)
441 | - Fix bugs and [submit pull requests](https://github.com/ankane/pgsync/pulls)
442 | - Write, clarify, or fix documentation
443 | - Suggest or add new features
444 | 
445 | To get started with development:
446 | 
447 | ```sh
448 | git clone https://github.com/ankane/pgsync.git
449 | cd pgsync
450 | bundle install
451 | 
452 | createdb pgsync_test1
453 | createdb pgsync_test2
454 | createdb pgsync_test3
455 | 
456 | bundle exec rake test
457 | ```
458 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require "bundler/gem_tasks"
 2 | require "rake/testtask"
 3 | 
 4 | task default: :test
 5 | Rake::TestTask.new do |t|
 6 |   t.libs << "test"
 7 |   t.pattern = "test/**/*_test.rb"
 8 | end
 9 | 
10 | namespace :docker do
11 |   task :build do
12 |     require_relative "lib/pgsync/version"
13 | 
14 |     system "docker build --pull --no-cache -t ankane/pgsync:latest -t ankane/pgsync:v#{PgSync::VERSION} .", exception: true
15 |   end
16 | 
17 |   task :release do
18 |     require_relative "lib/pgsync/version"
19 | 
20 |     system "docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 -t ankane/pgsync:latest -t ankane/pgsync:v#{PgSync::VERSION} .", exception: true
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
 1 | # source database URL
 2 | # database URLs take the format of:
 3 | #   postgres://user:password@host:port/dbname
 4 | #
 5 | # we recommend a command which outputs a database URL
 6 | # so sensitive information is not included in this file
 7 | #
 8 | # we *highly recommend* you use sslmode=verify-full when possible
 9 | # see https://ankane.org/postgres-sslmode-explained for more info
10 | from: $(some_command)?sslmode=require
11 | 
12 | # destination database URL
13 | to: postgres://localhost:5432/myapp_development
14 | 
15 | # exclude tables
16 | %{exclude}
17 | # define groups
18 | # groups:
19 | #   group1:
20 | #     - table1
21 | #     - table2
22 | 
23 | # sync specific schemas
24 | # schemas:
25 | #   - public
26 | 
27 | # protect sensitive information
28 | data_rules:
29 |   email: unique_email
30 |   phone: unique_phone
31 |   last_name: random_letter
32 |   birthday: random_date
33 |   encrypted_*: null
34 | 


--------------------------------------------------------------------------------
/exe/pgsync:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | 
3 | # handle interrupts
4 | trap("SIGINT") { abort }
5 | 
6 | require "pgsync"
7 | PgSync::Client.start
8 | 


--------------------------------------------------------------------------------
/lib/pgsync.rb:
--------------------------------------------------------------------------------
 1 | # dependencies
 2 | require "parallel"
 3 | require "pg"
 4 | require "slop"
 5 | require "tty-spinner"
 6 | 
 7 | # stdlib
 8 | require "open3"
 9 | require "set"
10 | require "shellwords"
11 | require "tempfile"
12 | require "uri"
13 | require "yaml"
14 | 
15 | # modules
16 | require_relative "pgsync/utils"
17 | require_relative "pgsync/client"
18 | require_relative "pgsync/data_source"
19 | require_relative "pgsync/init"
20 | require_relative "pgsync/schema_sync"
21 | require_relative "pgsync/sequence"
22 | require_relative "pgsync/sync"
23 | require_relative "pgsync/table"
24 | require_relative "pgsync/table_sync"
25 | require_relative "pgsync/task"
26 | require_relative "pgsync/task_resolver"
27 | require_relative "pgsync/version"
28 | 
29 | module PgSync
30 |   class Error < StandardError; end
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/pgsync/client.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class Client
  3 |     include Utils
  4 | 
  5 |     def initialize(args)
  6 |       @args = args
  7 |       output.sync = true
  8 |     end
  9 | 
 10 |     def perform
 11 |       result = Slop::Parser.new(slop_options).parse(@args)
 12 |       arguments = result.arguments
 13 |       options = result.to_h
 14 |       options[:defer_constraints_v2] ||= options[:defer_constraints]
 15 | 
 16 |       raise Error, "Specify either --db or --config, not both" if options[:db] && options[:config]
 17 |       raise Error, "Cannot use --overwrite with --in-batches" if options[:overwrite] && options[:in_batches]
 18 | 
 19 |       if options[:version]
 20 |         log VERSION
 21 |       elsif options[:help]
 22 |         log slop_options
 23 |       elsif options[:init]
 24 |         Init.new(arguments, options).perform
 25 |       else
 26 |         Sync.new(arguments, options).perform
 27 |       end
 28 |     rescue => e
 29 |       # Error, PG::ConnectionBad, Slop::Error
 30 |       raise e if options && options[:debug]
 31 |       abort colorize(e.message.strip, :red)
 32 |     end
 33 | 
 34 |     def self.start
 35 |       new(ARGV).perform
 36 |     end
 37 | 
 38 |     protected
 39 | 
 40 |     def slop_options
 41 |       o = Slop::Options.new
 42 |       o.banner = %{Usage:
 43 |     pgsync [tables,groups] [sql] [options]}
 44 | 
 45 |       # not shown
 46 |       o.string "-t", "--tables", "tables to sync", help: false
 47 |       o.string "-g", "--groups", "groups to sync", help: false
 48 | 
 49 |       o.separator ""
 50 |       o.separator "Table options:"
 51 |       o.string "--exclude", "tables to exclude"
 52 |       o.string "--schemas", "schemas to sync"
 53 |       o.boolean "--all-schemas", "sync all schemas", default: false
 54 | 
 55 |       o.separator ""
 56 |       o.separator "Row options:"
 57 |       o.boolean "--overwrite", "overwrite existing rows", default: false
 58 |       o.boolean "--preserve", "preserve existing rows", default: false
 59 |       o.boolean "--truncate", "truncate existing rows", default: false
 60 | 
 61 |       o.separator ""
 62 |       o.separator "Foreign key options:"
 63 |       o.boolean "--defer-constraints", "defer constraints", default: false
 64 |       o.boolean "--disable-integrity", "disable foreign key triggers", default: false
 65 |       o.integer "-j", "--jobs", "number of tables to sync at a time"
 66 | 
 67 |       # legacy
 68 |       o.boolean "--defer-constraints-v1", "defer constraints", default: false, help: false
 69 |       o.boolean "--defer-constraints-v2", "defer constraints", default: false, help: false
 70 |       # private, for testing
 71 |       o.boolean "--disable-integrity-v2", "disable foreign key triggers", default: false, help: false
 72 | 
 73 |       o.separator ""
 74 |       o.separator "Schema options:"
 75 |       o.boolean "--schema-first", "sync schema first", default: false
 76 |       o.boolean "--schema-only", "sync schema only", default: false
 77 | 
 78 |       o.separator ""
 79 |       o.separator "Config options:"
 80 |       # technically, defaults to searching path for .pgsync.yml, but this is simpler
 81 |       o.string "--config", "config file (defaults to .pgsync.yml)"
 82 |       o.string "-d", "--db", "database-specific config file"
 83 | 
 84 |       o.separator ""
 85 |       o.separator "Connection options:"
 86 |       o.string "--from", "source database URL"
 87 |       o.string "--to", "destination database URL"
 88 |       o.boolean "--to-safe", "confirms destination is safe (when not localhost)", default: false
 89 | 
 90 |       o.separator ""
 91 |       o.separator "Other options:"
 92 |       o.boolean "--debug", "show SQL statements", default: false
 93 |       o.boolean "--disable-user-triggers", "disable non-system triggers", default: false
 94 |       o.boolean "--fail-fast", "stop on the first failed table", default: false
 95 |       o.boolean "--no-rules", "don't apply data rules", default: false
 96 |       o.boolean "--no-sequences", "don't sync sequences", default: false
 97 | 
 98 |       # not shown in help
 99 |       # o.separator ""
100 |       # o.separator "Append-only table options:"
101 |       o.boolean "--in-batches", "sync in batches", default: false, help: false
102 |       o.integer "--batch-size", "batch size", default: 10000, help: false
103 |       o.float "--sleep", "time to sleep between batches", default: 0, help: false
104 | 
105 |       o.separator ""
106 |       o.separator "Other commands:"
107 |       o.boolean "--init", "create config file", default: false
108 |       o.boolean "--list", "list tables", default: false
109 |       o.boolean "-h", "--help", "print help"
110 |       o.boolean "-v", "--version", "print version"
111 | 
112 |       o
113 |     end
114 |   end
115 | end
116 | 


--------------------------------------------------------------------------------
/lib/pgsync/data_source.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class DataSource
  3 |     include Utils
  4 | 
  5 |     attr_reader :url
  6 | 
  7 |     def initialize(url, name:, debug:)
  8 |       @url = url
  9 |       @name = name
 10 |       @debug = debug
 11 |     end
 12 | 
 13 |     def exists?
 14 |       @url && @url.size > 0
 15 |     end
 16 | 
 17 |     def local?
 18 |       !host || %w(localhost 127.0.0.1).include?(host)
 19 |     end
 20 | 
 21 |     def host
 22 |       @host ||= dedup_localhost(conninfo[:host])
 23 |     end
 24 | 
 25 |     def port
 26 |       @port ||= dedup_localhost(conninfo[:port])
 27 |     end
 28 | 
 29 |     def dbname
 30 |       @dbname ||= conninfo[:dbname]
 31 |     end
 32 | 
 33 |     # gets visible tables
 34 |     def tables
 35 |       @tables ||= begin
 36 |         query = <<~SQL
 37 |           SELECT
 38 |             table_schema AS schema,
 39 |             table_name AS table
 40 |           FROM
 41 |             information_schema.tables
 42 |           WHERE
 43 |             table_type = 'BASE TABLE' AND
 44 |             table_schema NOT IN ('information_schema', 'pg_catalog')
 45 |           ORDER BY 1, 2
 46 |         SQL
 47 |         execute(query).map { |row| Table.new(row["schema"], row["table"]) }
 48 |       end
 49 |     end
 50 | 
 51 |     def table_exists?(table)
 52 |       table_set.include?(table)
 53 |     end
 54 | 
 55 |     def max_id(table, primary_key, sql_clause = nil)
 56 |       execute("SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["max"].to_i
 57 |     end
 58 | 
 59 |     def min_id(table, primary_key, sql_clause = nil)
 60 |       execute("SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_ident_full(table)}#{sql_clause}").first["min"].to_i
 61 |     end
 62 | 
 63 |     def last_value(seq)
 64 |       execute("SELECT last_value FROM #{quote_ident_full(seq)}").first["last_value"]
 65 |     end
 66 | 
 67 |     def truncate(table)
 68 |       execute("TRUNCATE #{quote_ident_full(table)} CASCADE")
 69 |     end
 70 | 
 71 |     def schemas
 72 |       @schemas ||= begin
 73 |         query = <<~SQL
 74 |           SELECT
 75 |             schema_name
 76 |           FROM
 77 |             information_schema.schemata
 78 |           ORDER BY 1
 79 |         SQL
 80 |         execute(query).map { |row| row["schema_name"] }
 81 |       end
 82 |     end
 83 | 
 84 |     def create_schema(schema)
 85 |       execute("CREATE SCHEMA #{quote_ident(schema)}")
 86 |     end
 87 | 
 88 |     def triggers(table)
 89 |       query = <<~SQL
 90 |         SELECT
 91 |           tgname AS name,
 92 |           tgisinternal AS internal,
 93 |           tgenabled != 'D' AS enabled,
 94 |           tgconstraint != 0 AS integrity
 95 |         FROM
 96 |           pg_trigger
 97 |         WHERE
 98 |           pg_trigger.tgrelid = $1::regclass
 99 |       SQL
100 |       execute(query, [quote_ident_full(table)])
101 |     end
102 | 
103 |     def conn
104 |       @conn ||= begin
105 |         begin
106 |           ENV["PGCONNECT_TIMEOUT"] ||= "3"
107 |           if @url.start_with?("postgres://", "postgresql://")
108 |             config = @url
109 |           else
110 |             config = {dbname: @url}
111 |           end
112 |           @concurrent_id = concurrent_id
113 |           PG::Connection.new(config)
114 |         rescue URI::InvalidURIError
115 |           raise Error, "Invalid connection string. Make sure it works with `psql`"
116 |         end
117 |       end
118 |     end
119 | 
120 |     def close
121 |       if @conn
122 |         @conn.close
123 |         @conn = nil
124 |       end
125 |     end
126 | 
127 |     # reconnect for new thread or process
128 |     def reconnect_if_needed
129 |       reconnect if @concurrent_id != concurrent_id
130 |     end
131 | 
132 |     def search_path
133 |       @search_path ||= execute("SELECT unnest(current_schemas(true)) AS schema").map { |r| r["schema"] }
134 |     end
135 | 
136 |     def server_version_num
137 |       @server_version_num ||= execute("SHOW server_version_num").first["server_version_num"].to_i
138 |     end
139 | 
140 |     def execute(query, params = [])
141 |       log_sql query, params
142 |       conn.exec_params(query, params).to_a
143 |     end
144 | 
145 |     def transaction
146 |       if conn.transaction_status == 0
147 |         # not currently in transaction
148 |         log_sql "BEGIN"
149 |         result =
150 |           conn.transaction do
151 |             yield
152 |           end
153 |         log_sql "COMMIT"
154 |         result
155 |       else
156 |         yield
157 |       end
158 |     end
159 | 
160 |     # TODO log time for each statement
161 |     def log_sql(query, params = {})
162 |       if @debug
163 |         message = "#{colorize("[#{@name}]", :cyan)} #{query.gsub(/\s+/, " ").strip}"
164 |         message = "#{message} #{params.inspect}" if params.any?
165 |         log message
166 |       end
167 |     end
168 | 
169 |     private
170 | 
171 |     def concurrent_id
172 |       [Process.pid, Thread.current.object_id]
173 |     end
174 | 
175 |     def reconnect
176 |       @conn.reset
177 |       @concurrent_id = concurrent_id
178 |     end
179 | 
180 |     def table_set
181 |       @table_set ||= Set.new(tables)
182 |     end
183 | 
184 |     def conninfo
185 |       @conninfo ||= begin
186 |         unless conn.respond_to?(:conninfo_hash)
187 |           raise Error, "libpq is too old. Upgrade it and run `gem install pg`"
188 |         end
189 |         conn.conninfo_hash
190 |       end
191 |     end
192 | 
193 |     # for pg 1.4.4
194 |     # https://github.com/ged/ruby-pg/issues/490
195 |     def dedup_localhost(value)
196 |       if conninfo[:host] == "localhost,localhost" && conninfo[:port].to_s.split(",").uniq.size == 1
197 |         value.split(",")[0]
198 |       else
199 |         value
200 |       end
201 |     end
202 |   end
203 | end
204 | 


--------------------------------------------------------------------------------
/lib/pgsync/init.rb:
--------------------------------------------------------------------------------
 1 | module PgSync
 2 |   class Init
 3 |     include Utils
 4 | 
 5 |     def initialize(arguments, options)
 6 |       @arguments = arguments
 7 |       @options = options
 8 |     end
 9 | 
10 |     def perform
11 |       if @arguments.size > 1
12 |         raise Error, "Usage:\n    pgsync --init [db]"
13 |       end
14 | 
15 |       file =
16 |         if @options[:config]
17 |           @options[:config]
18 |         elsif @arguments.any?
19 |           db_config_file(@arguments.first)
20 |         elsif @options[:db]
21 |           db_config_file(@options[:db])
22 |         else
23 |           ".pgsync.yml"
24 |         end
25 | 
26 |       if File.exist?(file)
27 |         raise Error, "#{file} exists."
28 |       else
29 |         exclude =
30 |           if rails?
31 |             <<~EOS
32 |               exclude:
33 |                 - ar_internal_metadata
34 |                 - schema_migrations
35 |             EOS
36 |           elsif django?
37 |             # TODO exclude other tables?
38 |             <<~EOS
39 |               exclude:
40 |                 - django_migrations
41 |             EOS
42 |           elsif laravel?
43 |             <<~EOS
44 |               exclude:
45 |                 - migrations
46 |             EOS
47 |           else
48 |             <<~EOS
49 |               # exclude:
50 |               #   - table1
51 |               #   - table2
52 |             EOS
53 |           end
54 | 
55 |         # create file
56 |         contents = File.read(__dir__ + "/../../config.yml")
57 |         contents.sub!("$(some_command)", "$(heroku config:get DATABASE_URL)") if heroku?
58 |         File.write(file, contents % {exclude: exclude})
59 | 
60 |         log "#{file} created. Add your database credentials."
61 |       end
62 |     end
63 | 
64 |     def django?
65 |       file_exists?("manage.py", /django/i)
66 |     end
67 | 
68 |     def heroku?
69 |       `git remote -v 2>&1`.include?("git.heroku.com") rescue false
70 |     end
71 | 
72 |     def laravel?
73 |       file_exists?("artisan")
74 |     end
75 | 
76 |     def rails?
77 |       file_exists?("bin/rails")
78 |     end
79 | 
80 |     def file_exists?(path, contents = nil)
81 |       if contents
82 |         File.read(path).match(contents)
83 |       else
84 |         File.exist?(path)
85 |       end
86 |     rescue
87 |       false
88 |     end
89 |   end
90 | end
91 | 


--------------------------------------------------------------------------------
/lib/pgsync/schema_sync.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class SchemaSync
  3 |     include Utils
  4 | 
  5 |     attr_reader :args, :opts
  6 | 
  7 |     def initialize(source:, destination:, tasks:, args:, opts:)
  8 |       @source = source
  9 |       @destination = destination
 10 |       @tasks = tasks
 11 |       @args = args
 12 |       @opts = opts
 13 |     end
 14 | 
 15 |     def perform
 16 |       if opts[:preserve]
 17 |         raise Error, "Cannot use --preserve with --schema-first or --schema-only"
 18 |       end
 19 | 
 20 |       # generate commands before starting spinner
 21 |       # for better error output if pg_restore not found
 22 |       dump_command = dump_command()
 23 |       restore_command = restore_command()
 24 | 
 25 |       show_spinner = output.tty? && !opts[:debug]
 26 | 
 27 |       if show_spinner
 28 |         spinner = TTY::Spinner.new(":spinner Syncing schema", format: :dots)
 29 |         spinner.auto_spin
 30 |       end
 31 | 
 32 |       create_schemas if specify_tables?
 33 | 
 34 |       # if spinner, capture lines to show on error
 35 |       lines = []
 36 |       success =
 37 |         run_command(dump_command, restore_command) do |line|
 38 |           if show_spinner
 39 |             lines << line
 40 |           else
 41 |             log line
 42 |           end
 43 |         end
 44 | 
 45 |       if show_spinner
 46 |         if success
 47 |           spinner.success
 48 |         else
 49 |           spinner.error
 50 |           log lines.join
 51 |         end
 52 |       end
 53 | 
 54 |       raise Error, "Schema sync returned non-zero exit code" unless success
 55 |     end
 56 | 
 57 |     private
 58 | 
 59 |     def run_command(dump_command, restore_command)
 60 |       err_r, err_w = IO.pipe
 61 |       Open3.pipeline_start(dump_command, restore_command, err: err_w) do |wait_thrs|
 62 |         err_w.close
 63 |         err_r.each do |line|
 64 |           yield line
 65 |         end
 66 |         wait_thrs.all? { |t| t.value.success? }
 67 |       end
 68 |     end
 69 | 
 70 |     # --if-exists introduced in Postgres 9.4
 71 |     # not ideal, but simpler than trying to parse version
 72 |     def supports_if_exists?
 73 |       `pg_restore --help`.include?("--if-exists")
 74 |     rescue Errno::ENOENT
 75 |       raise Error, "pg_restore not found"
 76 |     end
 77 | 
 78 |     def dump_command
 79 |       cmd = ["pg_dump", "-Fc", "--verbose", "--schema-only", "--no-owner", "--no-acl"]
 80 |       if specify_tables?
 81 |         @tasks.each do |task|
 82 |           cmd.concat(["-t", task.quoted_table])
 83 |         end
 84 |       end
 85 |       cmd.concat(["-d", @source.url])
 86 |     end
 87 | 
 88 |     def restore_command
 89 |       cmd = ["pg_restore", "--verbose", "--no-owner", "--no-acl", "--clean"]
 90 |       cmd << "--if-exists" if supports_if_exists?
 91 |       cmd.concat(["-d", @destination.url])
 92 |     end
 93 | 
 94 |     # pg_dump -t won't create schemas (even with -n)
 95 |     # not ideal that this happens outside restore transaction
 96 |     def create_schemas
 97 |       schemas = @tasks.map { |t| t.table.schema }.uniq - @destination.schemas
 98 |       schemas.sort.each do |schema|
 99 |         @destination.create_schema(schema)
100 |       end
101 |     end
102 | 
103 |     def specify_tables?
104 |       !opts[:all_schemas] || opts[:tables] || opts[:groups] || args[0] || opts[:exclude] || opts[:schemas]
105 |     end
106 |   end
107 | end
108 | 


--------------------------------------------------------------------------------
/lib/pgsync/sequence.rb:
--------------------------------------------------------------------------------
 1 | # minimal class to keep schema and sequence name separate
 2 | module PgSync
 3 |   class Sequence
 4 |     attr_reader :schema, :name, :column
 5 | 
 6 |     def initialize(schema, name, column:)
 7 |       @schema = schema
 8 |       @name = name
 9 |       @column = column
10 |     end
11 | 
12 |     def full_name
13 |       "#{schema}.#{name}"
14 |     end
15 | 
16 |     def eql?(other)
17 |       other.schema == schema && other.name == name
18 |     end
19 | 
20 |     # override hash when overriding eql?
21 |     def hash
22 |       [schema, name].hash
23 |     end
24 | 
25 |     def to_s
26 |       full_name
27 |     end
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/lib/pgsync/sync.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class Sync
  3 |     include Utils
  4 | 
  5 |     def initialize(arguments, options)
  6 |       @arguments = arguments
  7 |       @options = options
  8 |     end
  9 | 
 10 |     def perform
 11 |       started_at = monotonic_time
 12 | 
 13 |       args = @arguments
 14 |       opts = @options
 15 | 
 16 |       # only resolve commands from config, not CLI arguments
 17 |       [:to, :from].each do |opt|
 18 |         opts[opt] ||= resolve_source(config[opt.to_s])
 19 |       end
 20 | 
 21 |       # merge other config
 22 |       [:to_safe, :exclude, :schemas].each do |opt|
 23 |         opts[opt] ||= config[opt.to_s]
 24 |       end
 25 | 
 26 |       if args.size > 2
 27 |         raise Error, "Usage:\n    pgsync [options]"
 28 |       end
 29 | 
 30 |       raise Error, "No source" unless source.exists?
 31 |       raise Error, "No destination" unless destination.exists?
 32 | 
 33 |       unless opts[:to_safe] || destination.local?
 34 |         raise Error, "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1"
 35 |       end
 36 | 
 37 |       print_description("From", source)
 38 |       print_description("To", destination)
 39 | 
 40 |       if (opts[:preserve] || opts[:overwrite]) && destination.server_version_num < 90500
 41 |         raise Error, "Postgres 9.5+ is required for --preserve and --overwrite"
 42 |       end
 43 | 
 44 |       resolver = TaskResolver.new(args: args, opts: opts, source: source, destination: destination, config: config, first_schema: first_schema)
 45 |       tasks =
 46 |         resolver.tasks.map do |task|
 47 |           Task.new(source: source, destination: destination, config: config, table: task[:table], opts: opts.merge(sql: task[:sql]))
 48 |         end
 49 | 
 50 |       if opts[:in_batches] && tasks.size > 1
 51 |         raise Error, "Cannot use --in-batches with multiple tables"
 52 |       end
 53 | 
 54 |       confirm_tables_exist(source, tasks, "source")
 55 | 
 56 |       if opts[:list]
 57 |         confirm_tables_exist(destination, tasks, "destination")
 58 |         tasks.each do |task|
 59 |           log task_name(task)
 60 |         end
 61 |       else
 62 |         if opts[:schema_first] || opts[:schema_only]
 63 |           SchemaSync.new(source: source, destination: destination, tasks: tasks, args: args, opts: opts).perform
 64 |         end
 65 | 
 66 |         unless opts[:schema_only]
 67 |           TableSync.new(source: source, destination: destination, tasks: tasks, opts: opts, resolver: resolver).perform
 68 |         end
 69 | 
 70 |         log_completed(started_at)
 71 |       end
 72 |     end
 73 | 
 74 |     private
 75 | 
 76 |     def config
 77 |       @config ||= begin
 78 |         file = config_file
 79 |         if file
 80 |           begin
 81 |             # same options as YAML.load_file
 82 |             File.open(file, "r:bom|utf-8") do |f|
 83 |               # changed to keyword arguments in 3.1.0.pre1
 84 |               # https://github.com/ruby/psych/commit/c79ed445b4b3f8c9adf3da13bca3c976ddfae258
 85 |               if Psych::VERSION.to_f >= 3.1
 86 |                 YAML.safe_load(f, aliases: true, filename: file) || {}
 87 |               else
 88 |                 YAML.safe_load(f, [], [], true, file) || {}
 89 |               end
 90 |             end
 91 |           rescue Psych::SyntaxError => e
 92 |             raise Error, e.message
 93 |           rescue Errno::ENOENT
 94 |             raise Error, "Config file not found: #{file}"
 95 |           end
 96 |         else
 97 |           {}
 98 |         end
 99 |       end
100 |     end
101 | 
102 |     def config_file
103 |       if @options[:config]
104 |         @options[:config]
105 |       elsif @options[:db]
106 |         file = db_config_file(@options[:db])
107 |         search_tree(file) || file
108 |       else
109 |         search_tree(".pgsync.yml")
110 |       end
111 |     end
112 | 
113 |     def search_tree(file)
114 |       return file if File.exist?(file)
115 | 
116 |       path = Dir.pwd
117 |       # prevent infinite loop
118 |       20.times do
119 |         absolute_file = File.join(path, file)
120 |         break absolute_file if File.exist?(absolute_file)
121 |         path = File.dirname(path)
122 |         break if path == "/"
123 |       end
124 |     end
125 | 
126 |     def print_description(prefix, source)
127 |       location = " on #{source.host}:#{source.port}" if source.host
128 |       log "#{prefix}: #{source.dbname}#{location}"
129 |     end
130 | 
131 |     def log_completed(started_at)
132 |       time = monotonic_time - started_at
133 |       message = "Completed in #{time.round(1)}s"
134 |       log colorize(message, :green)
135 |     end
136 | 
137 |     def source
138 |       @source ||= data_source(@options[:from], "from")
139 |     end
140 | 
141 |     def destination
142 |       @destination ||= data_source(@options[:to], "to")
143 |     end
144 | 
145 |     def data_source(url, name)
146 |       ds = DataSource.new(url, name: name, debug: @options[:debug])
147 |       ObjectSpace.define_finalizer(self, self.class.finalize(ds))
148 |       ds
149 |     end
150 | 
151 |     # ideally aliases would work, but haven't found a nice way to do this
152 |     def resolve_source(source)
153 |       if source
154 |         source = source.dup
155 |         source.gsub!(/\$\([^)]+\)/) do |m|
156 |           command = m[2..-2]
157 |           result = `#{command}`.chomp
158 |           unless $?.success?
159 |             raise Error, "Command exited with non-zero status:\n#{command}"
160 |           end
161 |           result
162 |         end
163 |       end
164 |       source
165 |     end
166 | 
167 |     def self.finalize(ds)
168 |       # must use proc instead of stabby lambda
169 |       proc { ds.close }
170 |     end
171 |   end
172 | end
173 | 


--------------------------------------------------------------------------------
/lib/pgsync/table.rb:
--------------------------------------------------------------------------------
 1 | # minimal class to keep schema and table name separate
 2 | module PgSync
 3 |   class Table
 4 |     attr_reader :schema, :name
 5 | 
 6 |     def initialize(schema, name)
 7 |       @schema = schema
 8 |       @name = name
 9 |     end
10 | 
11 |     def full_name
12 |       "#{schema}.#{name}"
13 |     end
14 | 
15 |     def eql?(other)
16 |       other.schema == schema && other.name == name
17 |     end
18 | 
19 |     # override hash when overriding eql?
20 |     def hash
21 |       [schema, name].hash
22 |     end
23 | 
24 |     def to_s
25 |       full_name
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/pgsync/table_sync.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class TableSync
  3 |     include Utils
  4 | 
  5 |     attr_reader :source, :destination, :tasks, :opts, :resolver
  6 | 
  7 |     def initialize(source:, destination:, tasks:, opts:, resolver:)
  8 |       @source = source
  9 |       @destination = destination
 10 |       @tasks = tasks
 11 |       @opts = opts
 12 |       @resolver = resolver
 13 |     end
 14 | 
 15 |     def perform
 16 |       confirm_tables_exist(destination, tasks, "destination")
 17 | 
 18 |       add_columns
 19 | 
 20 |       add_primary_keys
 21 | 
 22 |       add_sequences unless opts[:no_sequences]
 23 | 
 24 |       show_notes
 25 | 
 26 |       # don't sync tables with no shared fields
 27 |       # we show a warning message above
 28 |       run_tasks(tasks.reject { |task| task.shared_fields.empty? })
 29 |     end
 30 | 
 31 |     def add_columns
 32 |       source_columns = columns(source)
 33 |       destination_columns = columns(destination)
 34 | 
 35 |       tasks.each do |task|
 36 |         task.from_columns = source_columns[task.table] || []
 37 |         task.to_columns = destination_columns[task.table] || []
 38 |       end
 39 |     end
 40 | 
 41 |     def add_primary_keys
 42 |       destination_primary_keys = primary_keys(destination)
 43 | 
 44 |       tasks.each do |task|
 45 |         task.to_primary_key = destination_primary_keys[task.table] || []
 46 |       end
 47 |     end
 48 | 
 49 |     def add_sequences
 50 |       source_sequences = sequences(source)
 51 |       destination_sequences = sequences(destination)
 52 | 
 53 |       tasks.each do |task|
 54 |         shared_columns = Set.new(task.shared_fields)
 55 | 
 56 |         task.from_sequences = (source_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) }
 57 |         task.to_sequences = (destination_sequences[task.table] || []).select { |s| shared_columns.include?(s.column) }
 58 |       end
 59 |     end
 60 | 
 61 |     def sequences(data_source)
 62 |       query = <<~SQL
 63 |         SELECT
 64 |           nt.nspname as schema,
 65 |           t.relname as table,
 66 |           a.attname as column,
 67 |           n.nspname as sequence_schema,
 68 |           s.relname as sequence
 69 |         FROM
 70 |           pg_class s
 71 |         INNER JOIN
 72 |           pg_depend d ON d.objid = s.oid
 73 |         INNER JOIN
 74 |           pg_class t ON d.objid = s.oid AND d.refobjid = t.oid
 75 |         INNER JOIN
 76 |           pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum)
 77 |         INNER JOIN
 78 |           pg_namespace n ON n.oid = s.relnamespace
 79 |         INNER JOIN
 80 |           pg_namespace nt ON nt.oid = t.relnamespace
 81 |         WHERE
 82 |           s.relkind = 'S'
 83 |       SQL
 84 |       data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
 85 |         [k, v.map { |r| Sequence.new(r["sequence_schema"], r["sequence"], column: r["column"]) }]
 86 |       end.to_h
 87 |     end
 88 | 
 89 |     def primary_keys(data_source)
 90 |       # https://stackoverflow.com/a/20537829
 91 |       # TODO can simplify with array_position in Postgres 9.5+
 92 |       query = <<~SQL
 93 |         SELECT
 94 |           nspname AS schema,
 95 |           relname AS table,
 96 |           pg_attribute.attname AS column,
 97 |           format_type(pg_attribute.atttypid, pg_attribute.atttypmod),
 98 |           pg_attribute.attnum,
 99 |           pg_index.indkey
100 |         FROM
101 |           pg_index, pg_class, pg_attribute, pg_namespace
102 |         WHERE
103 |           indrelid = pg_class.oid AND
104 |           pg_class.relnamespace = pg_namespace.oid AND
105 |           pg_attribute.attrelid = pg_class.oid AND
106 |           pg_attribute.attnum = any(pg_index.indkey) AND
107 |           indisprimary
108 |       SQL
109 |       data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
110 |         [k, v.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["column"] }]
111 |       end.to_h
112 |     end
113 | 
114 |     def show_notes
115 |       # for tables
116 |       resolver.notes.each do |note|
117 |         warning note
118 |       end
119 | 
120 |       # for columns and sequences
121 |       tasks.each do |task|
122 |         task.notes.each do |note|
123 |           warning "#{task_name(task)}: #{note}"
124 |         end
125 |       end
126 | 
127 |       # for non-deferrable constraints
128 |       if opts[:defer_constraints_v1]
129 |         constraints = non_deferrable_constraints(destination)
130 |         constraints = tasks.flat_map { |t| constraints[t.table] || [] }
131 |         warning "Non-deferrable constraints: #{constraints.join(", ")}" if constraints.any?
132 |       end
133 |     end
134 | 
135 |     def columns(data_source)
136 |       query = <<~SQL
137 |         SELECT
138 |           table_schema AS schema,
139 |           table_name AS table,
140 |           column_name AS column,
141 |           data_type AS type
142 |         FROM
143 |           information_schema.columns
144 |         WHERE
145 |           is_generated = 'NEVER'
146 |         ORDER BY 1, 2, 3
147 |       SQL
148 |       data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
149 |         [k, v.map { |r| {name: r["column"], type: r["type"]} }]
150 |       end.to_h
151 |     end
152 | 
153 |     def non_deferrable_constraints(data_source)
154 |       query = <<~SQL
155 |         SELECT
156 |           table_schema AS schema,
157 |           table_name AS table,
158 |           constraint_name
159 |         FROM
160 |           information_schema.table_constraints
161 |         WHERE
162 |           constraint_type = 'FOREIGN KEY' AND
163 |           is_deferrable = 'NO'
164 |       SQL
165 |       data_source.execute(query).group_by { |r| Table.new(r["schema"], r["table"]) }.map do |k, v|
166 |         [k, v.map { |r| r["constraint_name"] }]
167 |       end.to_h
168 |     end
169 | 
170 |     def run_tasks(tasks, &block)
171 |       notices = []
172 |       failed_tables = []
173 |       started_at = {}
174 | 
175 |       show_spinners = output.tty? && !opts[:in_batches] && !opts[:debug]
176 |       if show_spinners
177 |         spinners = TTY::Spinner::Multi.new(format: :dots, output: output)
178 |         task_spinners = {}
179 |       end
180 | 
181 |       start = lambda do |task, i|
182 |         message = ":spinner #{display_item(task)}"
183 | 
184 |         if show_spinners
185 |           spinner = spinners.register(message)
186 |           spinner.auto_spin
187 |           task_spinners[task] = spinner
188 |         elsif opts[:in_batches]
189 |           log message.sub(":spinner", "⠋")
190 |         end
191 | 
192 |         started_at[task] = monotonic_time
193 |       end
194 | 
195 |       finish = lambda do |task, i, result|
196 |         time = (monotonic_time - started_at[task]).round(1)
197 | 
198 |         success = result[:status] == "success"
199 | 
200 |         message =
201 |           if result[:message]
202 |             "(#{result[:message].lines.first.to_s.strip})"
203 |           else
204 |             "- #{time}s"
205 |           end
206 | 
207 |         notices.concat(result[:notices])
208 | 
209 |         if show_spinners
210 |           spinner = task_spinners[task]
211 |           if success
212 |             spinner.success(message)
213 |           else
214 |             spinner.error(message)
215 |           end
216 |         else
217 |           status = success ? "✔" : "✖"
218 |           log [status, display_item(task), message].join(" ")
219 |         end
220 | 
221 |         unless success
222 |           failed_tables << task_name(task)
223 |           fail_sync(failed_tables) if opts[:fail_fast]
224 |         end
225 |       end
226 | 
227 |       options = {start: start, finish: finish}
228 | 
229 |       jobs = opts[:jobs]
230 | 
231 |       # disable multiple jobs for defer constraints and disable integrity
232 |       # so we can use a transaction to ensure a consistent snapshot
233 |       if opts[:debug] || opts[:in_batches] || opts[:defer_constraints_v1] || opts[:defer_constraints_v2] || opts[:disable_integrity] || opts[:disable_integrity_v2]
234 |         warning "--jobs ignored" if jobs
235 |         jobs = 0
236 |       end
237 | 
238 |       if windows?
239 |         options[:in_threads] = jobs || 4
240 |       else
241 |         options[:in_processes] = jobs if jobs
242 |       end
243 | 
244 |       maybe_defer_constraints do
245 |         # could try to use `raise Parallel::Kill` to fail faster with --fail-fast
246 |         # see `fast_faster` branch
247 |         # however, need to make sure connections are cleaned up properly
248 |         Parallel.each(tasks, **options) do |task|
249 |           source.reconnect_if_needed
250 |           destination.reconnect_if_needed
251 | 
252 |           task.perform
253 |         end
254 |       end
255 | 
256 |       notices.each do |notice|
257 |         warning notice
258 |       end
259 | 
260 |       fail_sync(failed_tables) if failed_tables.any?
261 |     end
262 | 
263 |     # TODO add option to open transaction on source when manually specifying order of tables
264 |     def maybe_defer_constraints
265 |       if opts[:disable_integrity] || opts[:disable_integrity_v2]
266 |         # create a transaction on the source
267 |         # to ensure we get a consistent snapshot
268 |         source.transaction do
269 |           yield
270 |         end
271 |       elsif opts[:defer_constraints_v1] || opts[:defer_constraints_v2]
272 |         destination.transaction do
273 |           if opts[:defer_constraints_v2]
274 |             table_constraints = non_deferrable_constraints(destination)
275 |             table_constraints.each do |table, constraints|
276 |               constraints.each do |constraint|
277 |                 destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} DEFERRABLE")
278 |               end
279 |             end
280 |           end
281 | 
282 |           destination.execute("SET CONSTRAINTS ALL DEFERRED")
283 | 
284 |           # create a transaction on the source
285 |           # to ensure we get a consistent snapshot
286 |           source.transaction do
287 |             yield
288 |           end
289 | 
290 |           # set them back
291 |           # there are 3 modes: DEFERRABLE INITIALLY DEFERRED, DEFERRABLE INITIALLY IMMEDIATE, and NOT DEFERRABLE
292 |           # we only update NOT DEFERRABLE
293 |           # https://www.postgresql.org/docs/current/sql-set-constraints.html
294 |           if opts[:defer_constraints_v2]
295 |             destination.execute("SET CONSTRAINTS ALL IMMEDIATE")
296 | 
297 |             table_constraints.each do |table, constraints|
298 |               constraints.each do |constraint|
299 |                 destination.execute("ALTER TABLE #{quote_ident_full(table)} ALTER CONSTRAINT #{quote_ident(constraint)} NOT DEFERRABLE")
300 |               end
301 |             end
302 |           end
303 |         end
304 |       else
305 |         yield
306 |       end
307 |     end
308 | 
309 |     def fail_sync(failed_tables)
310 |       raise Error, "Sync failed for #{failed_tables.size} table#{failed_tables.size == 1 ? nil : "s"}: #{failed_tables.join(", ")}"
311 |     end
312 | 
313 |     def display_item(item)
314 |       messages = []
315 |       messages << task_name(item)
316 |       messages << item.opts[:sql] if item.opts[:sql]
317 |       messages.join(" ")
318 |     end
319 | 
320 |     def windows?
321 |       Gem.win_platform?
322 |     end
323 |   end
324 | end
325 | 


--------------------------------------------------------------------------------
/lib/pgsync/task.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class Task
  3 |     include Utils
  4 | 
  5 |     attr_reader :source, :destination, :config, :table, :opts
  6 |     attr_accessor :from_columns, :to_columns, :from_sequences, :to_sequences, :to_primary_key
  7 | 
  8 |     def initialize(source:, destination:, config:, table:, opts:)
  9 |       @source = source
 10 |       @destination = destination
 11 |       @config = config
 12 |       @table = table
 13 |       @opts = opts
 14 |       @from_sequences = []
 15 |       @to_sequences = []
 16 |     end
 17 | 
 18 |     def quoted_table
 19 |       quote_ident_full(table)
 20 |     end
 21 | 
 22 |     def perform
 23 |       with_notices do
 24 |         handle_errors do
 25 |           maybe_disable_triggers do
 26 |             sync_data
 27 |           end
 28 |         end
 29 |       end
 30 |     end
 31 | 
 32 |     def from_fields
 33 |       @from_fields ||= from_columns.map { |c| c[:name] }
 34 |     end
 35 | 
 36 |     def to_fields
 37 |       @to_fields ||= to_columns.map { |c| c[:name] }
 38 |     end
 39 | 
 40 |     def shared_fields
 41 |       @shared_fields ||= to_fields & from_fields
 42 |     end
 43 | 
 44 |     def shared_sequences
 45 |       @shared_sequences ||= to_sequences & from_sequences
 46 |     end
 47 | 
 48 |     def notes
 49 |       notes = []
 50 |       if shared_fields.empty?
 51 |         notes << "No fields to copy"
 52 |       else
 53 |         extra_fields = to_fields - from_fields
 54 |         notes << "Extra columns: #{extra_fields.join(", ")}" if extra_fields.any?
 55 | 
 56 |         missing_fields = from_fields - to_fields
 57 |         notes << "Missing columns: #{missing_fields.join(", ")}" if missing_fields.any?
 58 | 
 59 |         extra_sequences = to_sequences - from_sequences
 60 |         notes << "Extra sequences: #{extra_sequences.join(", ")}" if extra_sequences.any?
 61 | 
 62 |         missing_sequences = from_sequences - to_sequences
 63 |         notes << "Missing sequences: #{missing_sequences.join(", ")}" if missing_sequences.any?
 64 | 
 65 |         from_types = from_columns.map { |c| [c[:name], c[:type]] }.to_h
 66 |         to_types = to_columns.map { |c| [c[:name], c[:type]] }.to_h
 67 |         different_types = []
 68 |         shared_fields.each do |field|
 69 |           if from_types[field] != to_types[field]
 70 |             different_types << "#{field} (#{from_types[field]} -> #{to_types[field]})"
 71 |           end
 72 |         end
 73 |         notes << "Different column types: #{different_types.join(", ")}" if different_types.any?
 74 |       end
 75 |       notes
 76 |     end
 77 | 
 78 |     def sync_data
 79 |       raise Error, "This should never happen. Please file a bug." if shared_fields.empty?
 80 | 
 81 |       sql_clause = String.new("")
 82 |       sql_clause << " #{opts[:sql]}" if opts[:sql]
 83 | 
 84 |       bad_fields = opts[:no_rules] ? [] : config["data_rules"]
 85 |       primary_key = to_primary_key
 86 |       copy_fields = shared_fields.map { |f| f2 = bad_fields.to_a.find { |bf, _| rule_match?(table, f, bf) }; f2 ? "#{apply_strategy(f2[1], table, f, primary_key)} AS #{quote_ident(f)}" : "#{quoted_table}.#{quote_ident(f)}" }.join(", ")
 87 |       fields = shared_fields.map { |f| quote_ident(f) }.join(", ")
 88 | 
 89 |       copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{sql_clause}) TO STDOUT"
 90 |       if opts[:in_batches]
 91 |         raise Error, "Primary key required for --in-batches" if primary_key.empty?
 92 |         primary_key = primary_key.first
 93 | 
 94 |         destination.truncate(table) if opts[:truncate]
 95 | 
 96 |         from_max_id = source.max_id(table, primary_key)
 97 |         to_max_id = destination.max_id(table, primary_key) + 1
 98 | 
 99 |         if to_max_id == 1
100 |           from_min_id = source.min_id(table, primary_key)
101 |           to_max_id = from_min_id if from_min_id > 0
102 |         end
103 | 
104 |         starting_id = to_max_id
105 |         batch_size = opts[:batch_size]
106 | 
107 |         i = 1
108 |         batch_count = ((from_max_id - starting_id + 1) / batch_size.to_f).ceil
109 | 
110 |         while starting_id <= from_max_id
111 |           where = "#{quote_ident(primary_key)} >= #{starting_id} AND #{quote_ident(primary_key)} < #{starting_id + batch_size}"
112 |           log "    #{i}/#{batch_count}: #{where}"
113 | 
114 |           # TODO be smarter for advance sql clauses
115 |           batch_sql_clause = " #{sql_clause.length > 0 ? "#{sql_clause} AND" : "WHERE"} #{where}"
116 | 
117 |           batch_copy_to_command = "COPY (SELECT #{copy_fields} FROM #{quoted_table}#{batch_sql_clause}) TO STDOUT"
118 |           copy(batch_copy_to_command, dest_table: table, dest_fields: fields)
119 | 
120 |           starting_id += batch_size
121 |           i += 1
122 | 
123 |           if opts[:sleep] && starting_id <= from_max_id
124 |             sleep(opts[:sleep])
125 |           end
126 |         end
127 |       elsif !opts[:truncate] && (opts[:overwrite] || opts[:preserve] || !sql_clause.empty?)
128 |         if primary_key.empty?
129 |           raise Error, "Primary key required for --overwrite" if opts[:overwrite]
130 |           raise Error, "Primary key required for --preserve" if opts[:preserve]
131 |           raise Error, "Primary key required to sync specific rows"
132 |         end
133 | 
134 |         # create a temp table
135 |         temp_table = "pgsync_#{rand(1_000_000_000)}"
136 |         destination.execute("CREATE TEMPORARY TABLE #{quote_ident_full(temp_table)} AS TABLE #{quoted_table} WITH NO DATA")
137 | 
138 |         # load data
139 |         copy(copy_to_command, dest_table: temp_table, dest_fields: fields)
140 | 
141 |         on_conflict = primary_key.map { |pk| quote_ident(pk) }.join(", ")
142 |         action =
143 |           if opts[:preserve]
144 |             "NOTHING"
145 |           else # overwrite or sql clause
146 |             setter = shared_fields.reject { |f| primary_key.include?(f) }.map { |f| "#{quote_ident(f)} = EXCLUDED.#{quote_ident(f)}" }
147 |             if setter.any?
148 |               "UPDATE SET #{setter.join(", ")}"
149 |             else
150 |               "NOTHING"
151 |             end
152 |           end
153 |         destination.execute("INSERT INTO #{quoted_table} (#{fields}) (SELECT #{fields} FROM #{quote_ident_full(temp_table)}) ON CONFLICT (#{on_conflict}) DO #{action}")
154 |       else
155 |         # use delete instead of truncate for foreign keys
156 |         if opts[:defer_constraints_v1] || opts[:defer_constraints_v2]
157 |           destination.execute("DELETE FROM #{quoted_table}")
158 |         else
159 |           destination.truncate(table)
160 |         end
161 |         copy(copy_to_command, dest_table: table, dest_fields: fields)
162 |       end
163 | 
164 |       # update sequences
165 |       shared_sequences.each do |seq|
166 |         value = source.last_value(seq)
167 |         destination.execute("SELECT setval(#{escape(quote_ident_full(seq))}, #{escape(value)})")
168 |       end
169 | 
170 |       {status: "success"}
171 |     end
172 | 
173 |     private
174 | 
175 |     def with_notices
176 |       notices = []
177 |       [source, destination].each do |data_source|
178 |         data_source.send(:conn).set_notice_processor do |message|
179 |           notices << message.strip
180 |         end
181 |       end
182 |       result = yield
183 |       result[:notices] = notices if result
184 |       result
185 |     ensure
186 |       # clear notice processor
187 |       [source, destination].each do |data_source|
188 |         data_source.send(:conn).set_notice_processor
189 |       end
190 |     end
191 | 
192 |     # TODO add retries
193 |     def handle_errors
194 |       yield
195 |     rescue => e
196 |       raise e if opts[:debug]
197 | 
198 |       message =
199 |         case e
200 |         when PG::ConnectionBad
201 |           # likely fine to show simplified message here
202 |           # the full message will be shown when first trying to connect
203 |           "Connection failed"
204 |         when PG::Error
205 |           e.message.sub("ERROR:  ", "")
206 |         when Error
207 |           e.message
208 |         else
209 |           "#{e.class.name}: #{e.message}"
210 |         end
211 | 
212 |       {status: "error", message: message}
213 |     end
214 | 
215 |     def copy(source_command, dest_table:, dest_fields:)
216 |       destination_command = "COPY #{quote_ident_full(dest_table)} (#{dest_fields}) FROM STDIN"
217 | 
218 |       source.log_sql(source_command)
219 |       destination.log_sql(destination_command)
220 | 
221 |       destination.conn.copy_data(destination_command) do
222 |         source.conn.copy_data(source_command) do
223 |           while (row = source.conn.get_copy_data)
224 |             destination.conn.put_copy_data(row)
225 |           end
226 |         end
227 |       end
228 |     end
229 | 
230 |     # TODO better performance
231 |     def rule_match?(table, column, rule)
232 |       regex = Regexp.new('\A' + Regexp.escape(rule).gsub('\*', '[^\.]*') + '\z')
233 |       regex.match(column) || regex.match("#{table.name}.#{column}") || regex.match("#{table.schema}.#{table.name}.#{column}")
234 |     end
235 | 
236 |     # TODO wildcard rules
237 |     def apply_strategy(rule, table, column, primary_key)
238 |       if rule.is_a?(Hash)
239 |         if rule.key?("value")
240 |           escape(rule["value"])
241 |         elsif rule.key?("statement")
242 |           rule["statement"]
243 |         else
244 |           raise Error, "Unknown rule #{rule.inspect} for column #{column}"
245 |         end
246 |       else
247 |         case rule
248 |         when "untouched"
249 |           quote_ident(column)
250 |         when "unique_email"
251 |           "'email' || #{quoted_primary_key(table, primary_key, rule)}::text || '@example.org'"
252 |         when "unique_phone"
253 |           "(#{quoted_primary_key(table, primary_key, rule)}::bigint + 1000000000)::text"
254 |         when "unique_secret"
255 |           "'secret' || #{quoted_primary_key(table, primary_key, rule)}::text"
256 |         when "random_int", "random_number"
257 |           "(RANDOM() * 100)::int"
258 |         when "random_date"
259 |           "date '1970-01-01' + (RANDOM() * 10000)::int"
260 |         when "random_time"
261 |           "NOW() - (RANDOM() * 100000000)::int * INTERVAL '1 second'"
262 |         when "random_ip"
263 |           # casting double to int rounds
264 |           "(1 + RANDOM() * 254)::int::text || '.0.0.1'"
265 |         when "random_letter"
266 |           # casting double to int rounds
267 |           "chr(65 + (RANDOM() * 25)::int)"
268 |         when "random_string"
269 |           "RIGHT(MD5(RANDOM()::text), 10)"
270 |         when "null", nil
271 |           "NULL"
272 |         else
273 |           raise Error, "Unknown rule #{rule} for column #{column}"
274 |         end
275 |       end
276 |     end
277 | 
278 |     def quoted_primary_key(table, primary_key, rule)
279 |       raise Error, "Single column primary key required for this data rule: #{rule}" unless primary_key.size == 1
280 |       "#{quoted_table}.#{quote_ident(primary_key.first)}"
281 |     end
282 | 
283 |     def maybe_disable_triggers
284 |       if opts[:disable_integrity] || opts[:disable_integrity_v2] || opts[:disable_user_triggers]
285 |         destination.transaction do
286 |           triggers = destination.triggers(table)
287 |           triggers.select! { |t| t["enabled"] == "t" }
288 |           internal_triggers, user_triggers = triggers.partition { |t| t["internal"] == "t" }
289 |           integrity_triggers = internal_triggers.select { |t| t["integrity"] == "t" }
290 |           restore_triggers = []
291 | 
292 |           # both --disable-integrity options require superuser privileges
293 |           # however, only v2 works on Amazon RDS, which added specific support for it
294 |           # https://aws.amazon.com/about-aws/whats-new/2014/11/10/amazon-rds-postgresql-read-replicas/
295 |           #
296 |           # session_replication_role disables more than foreign keys (like triggers and rules)
297 |           # this is probably fine, but keep the current default for now
298 |           if opts[:disable_integrity_v2] || (opts[:disable_integrity] && rds?)
299 |             # SET LOCAL lasts until the end of the transaction
300 |             # https://www.postgresql.org/docs/current/sql-set.html
301 |             destination.execute("SET LOCAL session_replication_role = replica")
302 |           elsif opts[:disable_integrity]
303 |             integrity_triggers.each do |trigger|
304 |               destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER #{quote_ident(trigger["name"])}")
305 |             end
306 |             restore_triggers.concat(integrity_triggers)
307 |           end
308 | 
309 |           if opts[:disable_user_triggers]
310 |             # important!
311 |             # rely on Postgres to disable user triggers
312 |             # we don't want to accidentally disable non-user triggers if logic above is off
313 |             destination.execute("ALTER TABLE #{quoted_table} DISABLE TRIGGER USER")
314 |             restore_triggers.concat(user_triggers)
315 |           end
316 | 
317 |           result = yield
318 | 
319 |           # restore triggers that were previously enabled
320 |           restore_triggers.each do |trigger|
321 |             destination.execute("ALTER TABLE #{quoted_table} ENABLE TRIGGER #{quote_ident(trigger["name"])}")
322 |           end
323 | 
324 |           result
325 |         end
326 |       else
327 |         yield
328 |       end
329 |     end
330 | 
331 |     def rds?
332 |       destination.execute("SELECT name, setting FROM pg_settings WHERE name LIKE 'rds.%'").any?
333 |     end
334 |   end
335 | end
336 | 


--------------------------------------------------------------------------------
/lib/pgsync/task_resolver.rb:
--------------------------------------------------------------------------------
  1 | module PgSync
  2 |   class TaskResolver
  3 |     include Utils
  4 | 
  5 |     attr_reader :args, :opts, :source, :destination, :config, :first_schema, :notes
  6 | 
  7 |     def initialize(args:, opts:, source:, destination:, config:, first_schema:)
  8 |       @args = args
  9 |       @opts = opts
 10 |       @source = source
 11 |       @destination = destination
 12 |       @config = config
 13 |       @groups = config["groups"] || {}
 14 |       @first_schema = first_schema
 15 |       @notes = []
 16 |     end
 17 | 
 18 |     def tasks
 19 |       tasks = []
 20 | 
 21 |       # get lists from args
 22 |       groups, tables = process_args
 23 | 
 24 |       # expand groups into tasks
 25 |       groups.each do |group|
 26 |         tasks.concat(group_to_tasks(group))
 27 |       end
 28 | 
 29 |       # expand tables into tasks
 30 |       tables.each do |table|
 31 |         tasks.concat(table_to_tasks(table))
 32 |       end
 33 | 
 34 |       # get default if none given
 35 |       if !opts[:groups] && !opts[:tables] && args.size == 0
 36 |         tasks.concat(default_tasks)
 37 |       end
 38 | 
 39 |       # resolve any tables that need it
 40 |       tasks.each do |task|
 41 |         task[:table] = fully_resolve(task[:table])
 42 |       end
 43 | 
 44 |       tasks
 45 |     end
 46 | 
 47 |     def group?(group)
 48 |       @groups.key?(group)
 49 |     end
 50 | 
 51 |     private
 52 | 
 53 |     def group_to_tasks(value)
 54 |       group, param = value.split(":", 2)
 55 |       raise Error, "Group not found: #{group}" unless group?(group)
 56 | 
 57 |       @groups[group].map do |table|
 58 |         table_sql = nil
 59 |         if table.is_a?(Array)
 60 |           table, table_sql = table
 61 |         end
 62 | 
 63 |         {
 64 |           table: to_table(table),
 65 |           sql: expand_sql(table_sql, param)
 66 |         }
 67 |       end
 68 |     end
 69 | 
 70 |     def table_to_tasks(value)
 71 |       raise Error, "Cannot use parameters with tables" if value.include?(":")
 72 | 
 73 |       tables =
 74 |         if value.include?("*")
 75 |           regex = Regexp.new('\A' + Regexp.escape(value).gsub('\*', '[^\.]*') + '\z')
 76 |           shared_tables.select { |t| regex.match(t.full_name) || regex.match(t.name) }
 77 |         else
 78 |           [to_table(value)]
 79 |         end
 80 | 
 81 |       tables.map do |table|
 82 |         {
 83 |           table: table,
 84 |           sql: sql_arg # doesn't support params
 85 |         }
 86 |       end
 87 |     end
 88 | 
 89 |     # treats identifiers as if they were quoted (Users == "Users")
 90 |     # this is different from Postgres (Users == "users")
 91 |     #
 92 |     # TODO add support for quoted identifiers like "my.schema"."my.table"
 93 |     # so it's possible to specify identifiers with "." in them
 94 |     def to_table(value)
 95 |       parts = value.split(".")
 96 |       case parts.size
 97 |       when 1
 98 |         # unknown schema
 99 |         Table.new(nil, parts[0])
100 |       when 2
101 |         Table.new(*parts)
102 |       else
103 |         raise Error, "Cannot resolve table: #{value}"
104 |       end
105 |     end
106 | 
107 |     def default_tasks
108 |       shared_tables.map do |table|
109 |         {
110 |           table: table
111 |         }
112 |       end
113 |     end
114 | 
115 |     # tables that exists in both source and destination
116 |     # used when no tables specified, or a wildcard
117 |     # removes excluded tables and filters by schema
118 |     def shared_tables
119 |       tables = filter_tables(source.tables)
120 | 
121 |       unless opts[:schema_only] || opts[:schema_first]
122 |         from_tables = tables
123 |         to_tables = filter_tables(destination.tables)
124 | 
125 |         extra_tables = to_tables - from_tables
126 |         notes << "Extra tables: #{extra_tables.map { |t| friendly_name(t) }.join(", ")}" if extra_tables.any?
127 | 
128 |         missing_tables = from_tables - to_tables
129 |         notes << "Missing tables: #{missing_tables.map { |t| friendly_name(t) }.join(", ")}" if missing_tables.any?
130 | 
131 |         tables &= to_tables
132 |       end
133 | 
134 |       tables
135 |     end
136 | 
137 |     def filter_tables(tables)
138 |       tables = tables.dup
139 | 
140 |       unless opts[:all_schemas]
141 |         # could support wildcard schemas as well
142 |         schemas = Set.new(opts[:schemas] ? to_arr(opts[:schemas]) : source.search_path)
143 |         tables.select! { |t| schemas.include?(t.schema) }
144 |       end
145 | 
146 |       to_arr(opts[:exclude]).each do |value|
147 |         if value.include?("*")
148 |           regex = Regexp.new('\A' + Regexp.escape(value).gsub('\*', '[^\.]*') + '\z')
149 |           tables.reject! { |t| regex.match(t.full_name) || regex.match(t.name) }
150 |         else
151 |           tables -= [fully_resolve(to_table(value), error: false)].compact
152 |         end
153 |       end
154 | 
155 |       tables
156 |     end
157 | 
158 |     def process_args
159 |       groups = to_arr(opts[:groups])
160 |       tables = to_arr(opts[:tables])
161 |       if args[0]
162 |         # could be a group, table, or mix
163 |         to_arr(args[0]).each do |value|
164 |           if group?(value.split(":", 2)[0])
165 |             groups << value
166 |           else
167 |             tables << value
168 |           end
169 |         end
170 |       end
171 |       [groups, tables]
172 |     end
173 | 
174 |     def no_schema_tables
175 |       @no_schema_tables ||= begin
176 |         search_path_index = source.search_path.map.with_index.to_h
177 |         source.tables.group_by(&:name).map do |group, t2|
178 |           [group, t2.select { |t| search_path_index[t.schema] }.sort_by { |t| search_path_index[t.schema] }.first]
179 |         end.to_h
180 |       end
181 |     end
182 | 
183 |     # for tables without a schema, find the table in the search path
184 |     def fully_resolve(table, error: true)
185 |       return table if table.schema
186 |       resolved_table = no_schema_tables[table.name]
187 |       raise Error, "Table not found in source: #{table.name}" if !resolved_table && error
188 |       resolved_table
189 |     end
190 | 
191 |     # parse command line arguments and YAML
192 |     def to_arr(value)
193 |       if value.is_a?(Array)
194 |         value
195 |       else
196 |         # Split by commas, but don't use commas inside double quotes
197 |         # https://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes
198 |         value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/)
199 |       end
200 |     end
201 | 
202 |     def sql_arg
203 |       args[1]
204 |     end
205 | 
206 |     def expand_sql(sql, param)
207 |       # command line option takes precedence over group option
208 |       sql = sql_arg if sql_arg
209 | 
210 |       return unless sql
211 | 
212 |       # vars must match \w
213 |       missing_vars = sql.scan(/{\w+}/).map { |v| v[1..-2] }
214 | 
215 |       vars = {}
216 |       if param
217 |         vars["id"] = cast(param)
218 |         vars["1"] = cast(param)
219 |       end
220 | 
221 |       sql = sql.dup
222 |       vars.each do |k, v|
223 |         # only sub if in var list
224 |         sql.gsub!("{#{k}}", cast(v)) if missing_vars.delete(k)
225 |       end
226 | 
227 |       raise Error, "Missing variables: #{missing_vars.uniq.join(", ")}" if missing_vars.any?
228 | 
229 |       sql
230 |     end
231 | 
232 |     # TODO quote vars in next major version
233 |     def cast(value)
234 |       value.to_s.gsub(/\A\"|\"\z/, '')
235 |     end
236 |   end
237 | end
238 | 


--------------------------------------------------------------------------------
/lib/pgsync/utils.rb:
--------------------------------------------------------------------------------
 1 | module PgSync
 2 |   module Utils
 3 |     COLOR_CODES = {
 4 |       red: 31,
 5 |       green: 32,
 6 |       yellow: 33,
 7 |       cyan: 36
 8 |     }
 9 | 
10 |     def log(message = nil)
11 |       output.puts message
12 |     end
13 | 
14 |     def colorize(message, color)
15 |       if output.tty?
16 |         "\e[#{COLOR_CODES[color]}m#{message}\e[0m"
17 |       else
18 |         message
19 |       end
20 |     end
21 | 
22 |     def warning(message)
23 |       log colorize(message, :yellow)
24 |     end
25 | 
26 |     def deprecated(message)
27 |       warning "[DEPRECATED] #{message}"
28 |     end
29 | 
30 |     def output
31 |       $stderr
32 |     end
33 | 
34 |     def db_config_file(db)
35 |       ".pgsync-#{db}.yml"
36 |     end
37 | 
38 |     def confirm_tables_exist(data_source, tasks, description)
39 |       tasks.map(&:table).each do |table|
40 |         unless data_source.table_exists?(table)
41 |           raise Error, "Table not found in #{description}: #{table}"
42 |         end
43 |       end
44 |     end
45 | 
46 |     def first_schema
47 |       @first_schema ||= source.search_path.find { |sp| sp != "pg_catalog" }
48 |     end
49 | 
50 |     def task_name(task)
51 |       friendly_name(task.table)
52 |     end
53 | 
54 |     def friendly_name(table)
55 |       if table.schema == first_schema
56 |         table.name
57 |       else
58 |         table.full_name
59 |       end
60 |     end
61 | 
62 |     def quote_ident_full(ident)
63 |       if ident.is_a?(Table) || ident.is_a?(Sequence)
64 |         [quote_ident(ident.schema), quote_ident(ident.name)].join(".")
65 |       else # temp table names are strings
66 |         quote_ident(ident)
67 |       end
68 |     end
69 | 
70 |     def quote_ident(value)
71 |       PG::Connection.quote_ident(value)
72 |     end
73 | 
74 |     def escape(value)
75 |       if value.is_a?(String)
76 |         "'#{quote_string(value)}'"
77 |       else
78 |         value
79 |       end
80 |     end
81 | 
82 |     # activerecord
83 |     def quote_string(s)
84 |       s.gsub(/\\/, '\&\&').gsub(/'/, "''")
85 |     end
86 | 
87 |     def monotonic_time
88 |       Process.clock_gettime(Process::CLOCK_MONOTONIC)
89 |     end
90 |   end
91 | end
92 | 


--------------------------------------------------------------------------------
/lib/pgsync/version.rb:
--------------------------------------------------------------------------------
1 | module PgSync
2 |   VERSION = "0.8.0"
3 | end
4 | 


--------------------------------------------------------------------------------
/pgsync.gemspec:
--------------------------------------------------------------------------------
 1 | require_relative "lib/pgsync/version"
 2 | 
 3 | Gem::Specification.new do |spec|
 4 |   spec.name          = "pgsync"
 5 |   spec.version       = PgSync::VERSION
 6 |   spec.summary       = "Sync Postgres data between databases"
 7 |   spec.homepage      = "https://github.com/ankane/pgsync"
 8 |   spec.license       = "MIT"
 9 | 
10 |   spec.authors       = "Andrew Kane"
11 |   spec.email         = "andrew@ankane.org"
12 | 
13 |   spec.files         = Dir["*.{md,txt}", "{lib,exe}/**/*", "config.yml"]
14 |   spec.require_path  = "lib"
15 | 
16 |   spec.bindir        = "exe"
17 |   spec.executables   = ["pgsync"]
18 | 
19 |   spec.required_ruby_version = ">= 2.7"
20 | 
21 |   spec.add_dependency "bigdecimal"
22 |   spec.add_dependency "parallel"
23 |   spec.add_dependency "pg", ">= 0.18.2"
24 |   spec.add_dependency "slop", ">= 4.10.1"
25 |   spec.add_dependency "tty-spinner"
26 | end
27 | 


--------------------------------------------------------------------------------
/test/commands_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class CommandsTest < Minitest::Test
 4 |   def test_help
 5 |     assert_prints "Usage:", "-h"
 6 |     assert_prints "Usage:", "--help"
 7 |   end
 8 | 
 9 |   def test_version
10 |     assert_prints PgSync::VERSION, "-v"
11 |     assert_prints PgSync::VERSION, "--version"
12 |   end
13 | 
14 |   def test_bad_option
15 |     assert_error "unknown option", "--bad"
16 |   end
17 | 
18 |   def test_list
19 |     output = assert_works "--list", config: true
20 |     assert_match "posts", output
21 |   end
22 | 
23 |   def test_config_and_db
24 |     assert_error "Specify either --db or --config, not both", "--db test --config .pgsync.yml"
25 |   end
26 | 
27 |   def test_config_not_found
28 |     assert_error "Config file not found: bad.yml", "--config bad.yml"
29 |   end
30 | 
31 |   def test_config_absolute_path
32 |     path = File.expand_path("test/support/config.yml")
33 |     assert_works "--config #{path}"
34 |   end
35 | 
36 |   def test_db_not_found
37 |     assert_error "Config file not found: .pgsync-bad.yml", "--db bad"
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/test/data_rules_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class DataRulesTest < Minitest::Test
 4 |   def setup
 5 |     truncate_tables ["Users"]
 6 | 
 7 |     2.times do
 8 |       insert(conn1, "Users", [{
 9 |         "email" => "hi@example.org",
10 |         "phone" => "555-555-5555",
11 |         "token" => "token123",
12 |         "attempts" => 1,
13 |         "created_on" => Date.today,
14 |         "updated_at" => Time.now,
15 |         "ip" => "1.1.1.1",
16 |         "name" => "Hi",
17 |         "nonsense" => "Text",
18 |         "untouchable" => "rock"
19 |       }])
20 |     end
21 |   end
22 | 
23 |   def test_rules
24 |     assert_works "Users", config: true
25 |     result = conn2.exec("SELECT * FROM \"Users\"").to_a
26 |     row = result.first
27 |     assert_equal "email#{row["Id"]}@example.org", row["email"]
28 |     assert_equal "secret#{row["Id"]}", row["token"]
29 |     assert row["ip"].end_with?("0.0.1")
30 |     assert_equal 1, row["name"].size
31 |     assert_equal "rock", row["untouchable"]
32 |   end
33 | 
34 |   def test_no_rules
35 |     assert_works "Users --no-rules", config: true
36 |     result = conn2.exec("SELECT * FROM \"Users\"").to_a
37 |     row = result.first
38 |     assert_equal "hi@example.org", row["email"]
39 |     assert_equal "555-555-5555", row["phone"]
40 |     assert_equal "token123", row["token"]
41 |     assert_equal 1, row["attempts"]
42 |     assert_equal "1.1.1.1", row["ip"]
43 |     assert_equal "Hi", row["name"]
44 |     assert_equal "Text", row["nonsense"]
45 |     assert_equal "rock", row["untouchable"]
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/test/data_source_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class DataSourceTest < Minitest::Test
 4 |   def test_no_source
 5 |     assert_error "No source", ""
 6 |   end
 7 | 
 8 |   def test_no_destination
 9 |     assert_error "No destination", "--from db1"
10 |   end
11 | 
12 |   def test_source_command_error
13 |     # https://github.com/oracle/truffleruby/issues/2568
14 |     skip if RUBY_ENGINE == "truffleruby"
15 | 
16 |     assert_error "Command exited with non-zero status:\nexit 1", "--config test/support/bad.yml"
17 |   end
18 | 
19 |   def test_source_command_not_run_with_option
20 |     assert_works "--config test/support/bad.yml --from pgsync_test1"
21 |   end
22 | 
23 |   def test_database
24 |     assert_prints "From: pgsync_test1\nTo: pgsync_test2", "--from pgsync_test1 --to pgsync_test2"
25 |   end
26 | 
27 |   def test_url
28 |     assert_prints "From: pgsync_test1 on localhost:5432\nTo: pgsync_test2 on localhost:5432", "--from postgres://localhost/pgsync_test1 --to postgres://localhost/pgsync_test2"
29 |   end
30 | 
31 |   # def test_destination_danger
32 |   #   assert_error "Danger! Add `to_safe: true` to `.pgsync.yml` if the destination is not localhost or 127.0.0.1", "--from pgsync_test1 --to postgres://hostname/db2"
33 |   # end
34 | 
35 |   def test_nonexistent_source
36 |     assert_error "FATAL:  database \"db1\" does not exist\n", "--from db1 --to pgsync_test2"
37 |   end
38 | 
39 |   def test_nonexistent_destination
40 |     assert_error "FATAL:  database \"db2\" does not exist\n", "--from pgsync_test1 --to db2"
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/test/in_batches_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class InBatchesTest < Minitest::Test
 4 |   def setup
 5 |     truncate_tables ["posts"]
 6 |   end
 7 | 
 8 |   def test_works
 9 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
10 |     dest = []
11 |     expected = source
12 |     assert_result("--in-batches --batch-size 1", source, dest, expected)
13 |   end
14 | 
15 |   def test_existing_data
16 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
17 |     dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}]
18 |     expected = dest
19 |     assert_result("--in-batches --batch-size 1", source, dest, expected)
20 |   end
21 | 
22 |   def test_overwrite
23 |     assert_error "Cannot use --overwrite with --in-batches", "posts --in-batches --overwrite", config: true
24 |   end
25 | 
26 |   def test_multiple_tables
27 |     assert_error "Cannot use --in-batches with multiple tables", "--in-batches", config: true
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/test/init_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class InitTest < Minitest::Test
 4 |   def test_works
 5 |     new_dir do
 6 |       assert_works "--init"
 7 |       assert_match "?sslmode=require", File.read(".pgsync.yml")
 8 |     end
 9 |   end
10 | 
11 |   def test_too_many_arguments
12 |     assert_error "Usage:", "--init arg1 arg2"
13 |   end
14 | 
15 |   def test_db_argument
16 |     new_dir do
17 |       assert_works "--init db2"
18 |       assert File.exist?(".pgsync-db2.yml")
19 |     end
20 |   end
21 | 
22 |   def test_db_option
23 |     new_dir do
24 |       assert_works "--init --db db2"
25 |       assert File.exist?(".pgsync-db2.yml")
26 |     end
27 |   end
28 | 
29 |   def test_config
30 |     new_dir do
31 |       assert_works "--init --config hi.yml"
32 |       assert File.exist?("hi.yml")
33 |     end
34 |   end
35 | 
36 |   def test_django
37 |     new_dir do
38 |       File.write("manage.py", "django")
39 |       assert_works "--init"
40 |       assert_excludes "django_migrations"
41 |     end
42 |   end
43 | 
44 |   def test_heroku
45 |     new_dir do
46 |       system "git init --quiet"
47 |       system "git remote add heroku https://git.heroku.com/test.git"
48 |       assert_works "--init"
49 |       assert_match "$(heroku config:get DATABASE_URL)?sslmode=require", File.read(".pgsync.yml")
50 |     end
51 |   end
52 | 
53 |   def test_laravel
54 |     new_dir do
55 |       File.write("artisan", "")
56 |       assert_works "--init"
57 |       assert_excludes "migrations"
58 |     end
59 |   end
60 | 
61 |   def test_rails
62 |     new_dir do
63 |       Dir.mkdir("bin")
64 |       File.write("bin/rails", "")
65 |       assert_works "--init"
66 |       assert_excludes "ar_internal_metadata"
67 |       assert_excludes "schema_migrations"
68 |     end
69 |   end
70 | 
71 |   def new_dir
72 |     Dir.chdir(Dir.mktmpdir) do
73 |       yield
74 |     end
75 |   end
76 | 
77 |   def assert_excludes(table)
78 |     assert_match "- #{table}", File.read(".pgsync.yml")
79 |   end
80 | end
81 | 


--------------------------------------------------------------------------------
/test/schema_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class SchemaTest < Minitest::Test
 4 |   def setup
 5 |     conn3.exec(File.read("test/support/schema3.sql"))
 6 |     truncate(conn1, "posts")
 7 |   end
 8 | 
 9 |   def test_schema_only
10 |     insert(conn1, "posts", [{"id" => 1}])
11 |     assert_equal [], tables(conn3)
12 |     assert_works "--from pgsync_test1 --to pgsync_test3 --schema-only --all-schemas"
13 |     assert_equal all_tables, tables(conn3)
14 |     assert_equal [], conn3.exec("SELECT * FROM posts").to_a
15 |     # make sure all_tables itself isn't broken
16 |     assert all_tables.size >= 10
17 |   end
18 | 
19 |   def test_schema_only_table
20 |     assert_works "posts --from pgsync_test1 --to pgsync_test3 --schema-only"
21 |     assert_equal ["public.posts"], tables(conn3)
22 |   end
23 | 
24 |   def test_schema_only_exclude
25 |     assert_works "--exclude Users --from pgsync_test1 --to pgsync_test3 --schema-only --all-schemas"
26 |     assert_equal all_tables - ["public.Users"], tables(conn3)
27 |   end
28 | 
29 |   def test_schema_only_schemas
30 |     assert_works "--schemas other --from pgsync_test1 --to pgsync_test3 --schema-only"
31 |     assert_equal all_tables.select { |t| t.start_with? "other." }, tables(conn3)
32 |     assert_equal [], conn3.exec("SELECT id FROM other.pets").to_a
33 |   end
34 | 
35 |   def test_schema_first
36 |     insert(conn1, "posts", [{"id" => 1}])
37 |     assert_equal [], tables(conn3)
38 |     assert_works "--from pgsync_test1 --to pgsync_test3 --schema-first --all-schemas"
39 |     assert_equal all_tables, tables(conn3)
40 |     assert_equal [{"id" => 1}], conn3.exec("SELECT id FROM posts").to_a
41 |   end
42 | 
43 |   def all_tables
44 |     tables(conn1)
45 |   end
46 | 
47 |   def tables(conn)
48 |     # sort in Ruby, as Postgres can return different order on different platforms
49 |     query = <<~SQL
50 |       SELECT
51 |         table_schema || '.' || table_name AS table
52 |       FROM
53 |         information_schema.tables
54 |       WHERE
55 |         table_schema NOT IN ('information_schema', 'pg_catalog')
56 |     SQL
57 |     conn.exec(query).map { |v| v["table"] }.sort
58 |   end
59 | end
60 | 


--------------------------------------------------------------------------------
/test/support/bad.yml:
--------------------------------------------------------------------------------
1 | from: $(exit 1)
2 | to: pgsync_test2
3 | 


--------------------------------------------------------------------------------
/test/support/config.yml:
--------------------------------------------------------------------------------
 1 | from: $(echo "pgsync_test1")
 2 | to: pgsync_test2
 3 | groups:
 4 |   group1:
 5 |     - Users
 6 |   variable:
 7 |     posts: "WHERE id = {1}"
 8 |   variable_id:
 9 |     posts: "WHERE id = {id}"
10 |   group_with_excluded:
11 |     - excluded
12 | exclude:
13 |   - excluded
14 |   - does_not_exist
15 | data_rules:
16 |   email: unique_email
17 |   phone: unique_phone
18 |   token: unique_secret
19 |   attempts: random_int
20 |   created_on: random_date
21 |   updated_at: random_time
22 |   public.Users.ip: random_ip
23 |   Users.name: random_letter
24 |   nonsense: random_string
25 | 


--------------------------------------------------------------------------------
/test/support/schema1.sql:
--------------------------------------------------------------------------------
 1 | DROP SCHEMA IF EXISTS public CASCADE;
 2 | DROP SCHEMA IF EXISTS other CASCADE;
 3 | 
 4 | CREATE SCHEMA public;
 5 | CREATE SCHEMA other;
 6 | 
 7 | DROP TYPE IF EXISTS mood;
 8 | CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
 9 | CREATE TABLE "Users" (
10 |   "Id" SERIAL PRIMARY KEY,
11 |   zip_code TEXT,
12 |   email TEXT,
13 |   phone TEXT,
14 |   token TEXT,
15 |   attempts INT,
16 |   created_on DATE,
17 |   updated_at TIMESTAMP,
18 |   ip TEXT,
19 |   name TEXT,
20 |   nonsense TEXT,
21 |   untouchable TEXT,
22 |   "column_with_punctuation?" BOOLEAN,
23 |   current_mood mood
24 | );
25 | 
26 | CREATE TABLE posts (
27 |   id SERIAL PRIMARY KEY,
28 |   title TEXT
29 | );
30 | 
31 | CREATE TABLE comments (
32 |   id SERIAL PRIMARY KEY,
33 |   post_id INTEGER REFERENCES posts(id)
34 | );
35 | 
36 | CREATE TABLE comments2 (
37 |   id SERIAL PRIMARY KEY,
38 |   post_id INTEGER REFERENCES posts(id)
39 | );
40 | 
41 | CREATE TABLE books (
42 |   id SERIAL,
43 |   id2 SERIAL,
44 |   title TEXT,
45 |   PRIMARY KEY (id, id2)
46 | );
47 | 
48 | CREATE TABLE authors (
49 |   first_name TEXT
50 | );
51 | 
52 | CREATE TABLE chapters (
53 |   pages INT
54 | );
55 | 
56 | CREATE TABLE stores (
57 |   name TEXT
58 | );
59 | 
60 | CREATE TABLE robots (
61 |   id SERIAL PRIMARY KEY,
62 |   name TEXT
63 | );
64 | 
65 | CREATE TABLE excluded (
66 |   id SERIAL PRIMARY KEY
67 | );
68 | 
69 | CREATE TABLE other.pets (
70 |   id SERIAL PRIMARY KEY
71 | );
72 | 
73 | INSERT INTO other.pets
74 | VALUES
75 |   (1),
76 |   (2),
77 |   (3);
78 | 


--------------------------------------------------------------------------------
/test/support/schema2.sql:
--------------------------------------------------------------------------------
 1 | DROP SCHEMA IF EXISTS public CASCADE;
 2 | DROP SCHEMA IF EXISTS other CASCADE;
 3 | 
 4 | CREATE SCHEMA public;
 5 | CREATE SCHEMA other;
 6 | 
 7 | CREATE TABLE "Users" (
 8 |   "Id" SERIAL PRIMARY KEY,
 9 |   email TEXT,
10 |   phone TEXT,
11 |   token TEXT,
12 |   attempts INT,
13 |   created_on DATE,
14 |   updated_at TIMESTAMP,
15 |   ip TEXT,
16 |   name TEXT,
17 |   nonsense TEXT,
18 |   untouchable TEXT,
19 |   "column_with_punctuation?" BOOLEAN
20 | );
21 | 
22 | CREATE TABLE posts (
23 |   id SERIAL PRIMARY KEY,
24 |   title TEXT
25 | );
26 | 
27 | CREATE TABLE comments (
28 |   id SERIAL PRIMARY KEY,
29 |   post_id INTEGER REFERENCES posts(id) DEFERRABLE
30 | );
31 | 
32 | CREATE TABLE comments2 (
33 |   id SERIAL PRIMARY KEY,
34 |   post_id INTEGER REFERENCES posts(id)
35 | );
36 | 
37 | CREATE TABLE books (
38 |   id SERIAL,
39 |   id2 SERIAL,
40 |   title TEXT,
41 |   PRIMARY KEY (id, id2)
42 | );
43 | 
44 | CREATE TABLE authors (
45 |   last_name TEXT
46 | );
47 | 
48 | CREATE TABLE chapters (
49 |   pages BIGINT
50 | );
51 | 
52 | CREATE TABLE robots (
53 |   id SERIAL PRIMARY KEY,
54 |   name TEXT
55 | );
56 | CREATE OR REPLACE FUNCTION nope()
57 | RETURNS trigger AS
58 | $$
59 | BEGIN
60 |   RAISE EXCEPTION 'Nope!';
61 | END;
62 | $$
63 | LANGUAGE plpgsql;
64 | CREATE TRIGGER nope_trigger BEFORE INSERT OR UPDATE ON robots FOR EACH ROW EXECUTE PROCEDURE nope();
65 | 
66 | CREATE TABLE excluded (
67 |   id SERIAL PRIMARY KEY
68 | );
69 | 
70 | CREATE TABLE other.pets (
71 |   id SERIAL PRIMARY KEY
72 | );
73 | 


--------------------------------------------------------------------------------
/test/support/schema3.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS public CASCADE;
2 | DROP SCHEMA IF EXISTS other CASCADE;
3 | 
4 | CREATE SCHEMA public;
5 | 


--------------------------------------------------------------------------------
/test/sync_test.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class SyncTest < Minitest::Test
  4 |   def setup
  5 |     truncate_tables ["posts", "comments", "books", "robots"]
  6 |   end
  7 | 
  8 |   def test_truncate
  9 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
 10 |     dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}]
 11 |     expected = source
 12 |     assert_result("", source, dest, expected)
 13 |   end
 14 | 
 15 |   def test_overwrite
 16 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
 17 |     dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}]
 18 |     expected = source + [dest[1]]
 19 |     assert_result("--overwrite", source, dest, expected)
 20 |   end
 21 | 
 22 |   def test_preserve
 23 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
 24 |     dest = [{"id" => 1, "title" => "First Post"}, {"id" => 4, "title" => "Post 4"}]
 25 |     expected = [dest[0]] + source[1..-1] + [dest[1]]
 26 |     assert_result("--preserve", source, dest, expected)
 27 |   end
 28 | 
 29 |   def test_where
 30 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
 31 |     dest = []
 32 |     expected = [source[0]]
 33 |     assert_result(" 'WHERE id = 1'", source, dest, expected)
 34 |   end
 35 | 
 36 |   def test_overwrite_multicolumn_primary_key
 37 |     source = [
 38 |       {"id" => 1, "id2" => 1, "title" => "Post 1"},
 39 |       {"id" => 1, "id2" => 2, "title" => "Post 2"},
 40 |       {"id" => 1, "id2" => 3, "title" => "Post 3"}
 41 |     ]
 42 |     dest = [{"id" => 1, "id2" => 1, "title" => "First Post"}, {"id" => 1, "id2" => 4, "title" => "Post 4"}]
 43 |     expected = source + [dest[1]]
 44 |     assert_result("--overwrite", source, dest, expected, "books")
 45 |   end
 46 | 
 47 |   def test_preserve_multicolumn_primary_key
 48 |     source = [
 49 |       {"id" => 1, "id2" => 1, "title" => "Post 1"},
 50 |       {"id" => 1, "id2" => 2, "title" => "Post 2"},
 51 |       {"id" => 2, "id2" => 4, "title" => "Post 3"}
 52 |     ]
 53 |     dest = [{"id" => 1, "id2" => 1, "title" => "First Post"}, {"id" => 3, "id2" => 4, "title" => "Post 4"}]
 54 |     expected = [dest[0]] + source[1..-1] + [dest[1]]
 55 |     assert_result("--preserve", source, dest, expected, "books")
 56 |   end
 57 | 
 58 |   def test_generated
 59 |     skip if server_version_num < 120000
 60 | 
 61 |     [conn1, conn2].each do |conn|
 62 |       conn.exec("DROP TABLE IF EXISTS shares")
 63 |       conn.exec <<~EOS
 64 |         CREATE TABLE shares (
 65 |           id SERIAL PRIMARY KEY,
 66 |           gen integer GENERATED ALWAYS AS (id + 1) STORED
 67 |         );
 68 |       EOS
 69 |     end
 70 | 
 71 |     source = 3.times.map { |i| {"id" => i + 1, "gen" => i + 2} }
 72 |     dest = []
 73 |     expected = source
 74 |     assert_result("", source, dest, expected, "shares")
 75 | 
 76 |     truncate_tables ["shares"]
 77 |     assert_result("--overwrite", source, dest, expected, "shares")
 78 | 
 79 |     truncate_tables ["shares"]
 80 |     assert_result("--preserve", source, dest, expected, "shares")
 81 |   end
 82 | 
 83 |   def test_overwrite_no_primary_key
 84 |     assert_error "chapters (Primary key required for --overwrite)", "chapters --overwrite", config: true
 85 |   end
 86 | 
 87 |   def test_preserve_no_primary_key
 88 |     assert_error "chapters (Primary key required for --preserve)", "chapters --preserve", config: true
 89 |   end
 90 | 
 91 |   def test_no_shared_fields
 92 |     assert_prints "authors: No fields to copy", "authors", config: true
 93 |   end
 94 | 
 95 |   def test_missing_column
 96 |     assert_prints "Missing columns: current_mood, zip_code", "Users", config: true
 97 |   end
 98 | 
 99 |   def test_extra_column
100 |     assert_prints "Extra columns: current_mood, zip_code", "Users --from pgsync_test2 --to pgsync_test1"
101 |   end
102 | 
103 |   def test_different_column_types
104 |     assert_prints "Different column types: pages (integer -> bigint)", "chapters", config: true
105 |   end
106 | 
107 |   def test_notice
108 |     skip if ENV["TRAVIS"]
109 |     assert_prints "NOTICE:  truncate cascades to table \"comments\"", "posts", config: true
110 |   end
111 | 
112 |   def test_defer_constraints_v1
113 |     insert(conn1, "posts", [{"id" => 1}])
114 |     insert(conn1, "comments", [{"post_id" => 1}])
115 |     assert_error "Sync failed for 1 table: comments", "comments,posts --jobs 1", config: true
116 |     assert_works "comments,posts --defer-constraints-v1", config: true
117 |     assert_works "comments,posts --defer-constraints-v1 --overwrite", config: true
118 |     assert_works "comments,posts --defer-constraints-v1 --preserve", config: true
119 |     assert_equal [{"id" => 1}], conn2.exec("SELECT id FROM posts ORDER BY id").to_a
120 |     assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a
121 |   end
122 | 
123 |   def test_defer_constraints_v1_not_deferrable
124 |     insert(conn1, "posts", [{"id" => 1}])
125 |     insert(conn1, "comments2", [{"post_id" => 1}])
126 |     assert_prints "Non-deferrable constraints: comments2_post_id_fkey", "comments2,posts --defer-constraints-v1", config: true
127 |     assert_error "violates foreign key constraint", "comments2,posts --defer-constraints-v1", config: true
128 |   end
129 | 
130 |   def test_defer_constraints
131 |     insert(conn1, "posts", [{"id" => 1}])
132 |     insert(conn1, "comments", [{"post_id" => 1}])
133 |     assert_error "Sync failed for 1 table: comments", "comments,posts --jobs 1", config: true
134 |     assert_works "comments,posts --defer-constraints", config: true
135 |     assert_works "comments,posts --defer-constraints --overwrite", config: true
136 |     assert_works "comments,posts --defer-constraints --preserve", config: true
137 |     assert_equal [{"id" => 1}], conn2.exec("SELECT id FROM posts ORDER BY id").to_a
138 |     assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a
139 |   end
140 | 
141 |   def test_defer_constraints_not_deferrable
142 |     insert(conn1, "posts", [{"id" => 1}])
143 |     insert(conn1, "comments2", [{"post_id" => 1}])
144 |     assert_error "Sync failed for 1 table: comments2", "comments2,posts --jobs 1", config: true
145 |     assert_works "comments2,posts --defer-constraints", config: true
146 |     assert_works "comments2,posts --defer-constraints --overwrite", config: true
147 |     assert_works "comments2,posts --defer-constraints --preserve", config: true
148 |     assert_equal [{"id" => 1}], conn2.exec("SELECT id FROM posts ORDER BY id").to_a
149 |     assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments2 ORDER BY post_id").to_a
150 |   end
151 | 
152 |   def test_disable_user_triggers
153 |     insert(conn1, "robots", [{"name" => "Test"}])
154 |     assert_error "Sync failed for 1 table: robots", "robots", config: true
155 |     assert_works "robots --disable-user-triggers", config: true
156 |     assert_equal [{"name" => "Test"}], conn2.exec("SELECT name FROM robots ORDER BY id").to_a
157 |   end
158 | 
159 |   def test_disable_user_triggers_defer_constraints
160 |     insert(conn1, "robots", [{"name" => "Test"}])
161 |     assert_works "robots --disable-user-triggers --defer-constraints", config: true
162 |     assert_equal [{"name" => "Test"}], conn2.exec("SELECT name FROM robots ORDER BY id").to_a
163 |   end
164 | 
165 |   def test_disable_integrity
166 |     insert(conn1, "posts", [{"id" => 1}])
167 |     insert(conn1, "comments", [{"post_id" => 1}])
168 |     assert_error "Sync failed for 1 table: comments", "comments", config: true
169 |     assert_works "comments --disable-integrity", config: true
170 |     # integrity is lost! (as expected)
171 |     assert_equal [], conn2.exec("SELECT * FROM posts ORDER BY id").to_a
172 |     assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a
173 |   end
174 | 
175 |   def test_disable_integrity_v2
176 |     insert(conn1, "posts", [{"id" => 1}])
177 |     insert(conn1, "comments", [{"post_id" => 1}])
178 |     assert_error "Sync failed for 1 table: comments", "comments", config: true
179 |     assert_works "comments --disable-integrity-v2", config: true
180 |     # integrity is lost! (as expected)
181 |     assert_equal [], conn2.exec("SELECT * FROM posts ORDER BY id").to_a
182 |     assert_equal [{"post_id" => 1}], conn2.exec("SELECT post_id FROM comments ORDER BY post_id").to_a
183 |   end
184 | end
185 | 


--------------------------------------------------------------------------------
/test/tables_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class TablesTest < Minitest::Test
 4 |   def test_default
 5 |     tables = list_tables
 6 |     assert_includes tables, "posts"
 7 |     refute_includes tables, "other.pets"
 8 |     refute_includes tables, "excluded"
 9 |   end
10 | 
11 |   def test_wildcard
12 |     tables = list_tables("p*")
13 |     assert_includes tables, "posts"
14 |     refute_includes tables, "other.pets"
15 |     refute_includes tables, "excluded"
16 |   end
17 | 
18 |   def test_wildcard_all_schemas
19 |     tables = list_tables("p* --all-schemas")
20 |     assert_includes tables, "posts"
21 |     assert_includes tables, "other.pets"
22 |   end
23 | 
24 |   def test_wildcard_schemas
25 |     tables = list_tables("p* --schemas public")
26 |     assert_includes tables, "posts"
27 |     refute_includes tables, "other.pets"
28 |   end
29 | 
30 |   def test_all_schemas
31 |     tables = list_tables("--all-schemas")
32 |     assert_includes tables, "posts"
33 |     assert_includes tables, "other.pets"
34 |     refute_includes tables, "excluded"
35 |   end
36 | 
37 |   def test_schemas
38 |     tables = list_tables("--schemas public")
39 |     assert_includes tables, "posts"
40 |     refute_includes tables, "other.pets"
41 |     refute_includes tables, "excluded"
42 |   end
43 | 
44 |   def test_exclude_wildcard
45 |     tables = list_tables("--exclude p*")
46 |     refute_includes tables, "posts"
47 |     assert_includes tables, "comments"
48 |   end
49 | 
50 |   def test_exclude_overrides_config
51 |     tables = list_tables("--exclude posts")
52 |     refute_includes tables, "posts"
53 |     assert_includes tables, "excluded"
54 |   end
55 | 
56 |   def test_exclude_not_applied_to_groups
57 |     tables = list_tables("group_with_excluded")
58 |     assert_includes tables, "excluded"
59 |   end
60 | 
61 |   def test_tables_option
62 |     tables = list_tables("--tables posts")
63 |     assert_includes tables, "posts"
64 |   end
65 | 
66 |   def test_tables_option_group
67 |     assert_error "Table not found in source: group1", "--tables group1", config: true
68 |   end
69 | 
70 |   def test_groups_option
71 |     tables = list_tables("--groups group1")
72 |     assert_includes tables, "Users"
73 |   end
74 | 
75 |   def test_groups_option_table
76 |     assert_error "Group not found: posts", "--groups posts", config: true
77 |   end
78 | 
79 |   def test_table_unknown
80 |     assert_error "Table not found in source: bad", "bad", config: true
81 |   end
82 | 
83 |   def test_table_invalid
84 |     assert_error "Cannot resolve table: bad.bad.bad", "bad.bad.bad", config: true
85 |   end
86 | 
87 |   def list_tables(command = "")
88 |     output = assert_works("--list #{command}", config: true)
89 |     output.split("\n")[2..-1]
90 |   end
91 | end
92 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
  1 | require "bundler/setup"
  2 | Bundler.require(:default)
  3 | require "minitest/autorun"
  4 | require "minitest/pride"
  5 | require "pg"
  6 | require "shellwords"
  7 | require "tmpdir"
  8 | require "open3"
  9 | 
 10 | def connect(dbname)
 11 |   conn = PG::Connection.open(dbname: dbname)
 12 |   conn.exec("SET client_min_messages TO WARNING")
 13 |   conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn)
 14 |   conn.exec(File.read("test/support/schema#{dbname[-1]}.sql"))
 15 |   conn
 16 | end
 17 | 
 18 | def conn1
 19 |   @conn1 ||= connect("pgsync_test1")
 20 | end
 21 | 
 22 | def conn2
 23 |   @conn2 ||= connect("pgsync_test2")
 24 | end
 25 | 
 26 | def conn3
 27 |   @conn3 ||= connect("pgsync_test3")
 28 | end
 29 | 
 30 | [conn1, conn2, conn3] # setup schema
 31 | 
 32 | class Minitest::Test
 33 |   def verbose?
 34 |     ENV["VERBOSE"]
 35 |   end
 36 | 
 37 |   # shelling out for each test is slower
 38 |   # but it prevents forking from messing up connections
 39 |   def run_command(command, config: false)
 40 |     command += " --config test/support/config.yml" if config
 41 |     if verbose?
 42 |       puts
 43 |       puts "$ pgsync #{command}"
 44 |     end
 45 |     exe = File.expand_path("../exe/pgsync", __dir__)
 46 |     output, status = Open3.capture2e(exe, *Shellwords.split(command))
 47 |     puts output if verbose?
 48 |     [output, status]
 49 |   end
 50 | 
 51 |   def assert_works(command, **options)
 52 |     output, status = run_command(command, **options)
 53 |     assert status.success?, "Command failed"
 54 |     output
 55 |   end
 56 | 
 57 |   def assert_error(message, command, **options)
 58 |     output, status = run_command(command, **options)
 59 |     assert !status.success?
 60 |     assert_match message, output
 61 |   end
 62 | 
 63 |   def assert_prints(message, command, **options)
 64 |     output, _ = run_command(command, **options)
 65 |     assert_match message, output
 66 |   end
 67 | 
 68 |   def truncate(conn, table)
 69 |     conn.exec("TRUNCATE #{quote_ident(table)} CASCADE")
 70 |   end
 71 | 
 72 |   def truncate_tables(tables)
 73 |     [conn1, conn2].each do |conn|
 74 |       tables.each do |table|
 75 |         truncate(conn, table)
 76 |       end
 77 |     end
 78 |   end
 79 | 
 80 |   def insert(conn, table, rows)
 81 |     return if rows.empty?
 82 | 
 83 |     keys = rows.flat_map { |r| r.keys }.uniq
 84 |     values = rows.map { |r| keys.map { |k| r[k] } }
 85 | 
 86 |     key_str = keys.map { |k| quote_ident(k) }.join(", ")
 87 |     params_str = values.size.times.map { |i| "(" + keys.size.times.map { |j| "$#{i * keys.size + j + 1}" }.join(", ") + ")" }.join(", ")
 88 |     insert_str = "INSERT INTO #{quote_ident(table)} (#{key_str}) VALUES #{params_str}"
 89 |     conn.exec_params(insert_str, values.flatten)
 90 |   end
 91 | 
 92 |   def quote_ident(ident)
 93 |     PG::Connection.quote_ident(ident)
 94 |   end
 95 | 
 96 |   def assert_result(command, source, dest, expected, table = "posts")
 97 |     insert(conn1, table, source.map(&:dup).map { |v| v.delete("gen"); v })
 98 |     insert(conn2, table, dest)
 99 | 
100 |     assert_equal source, conn1.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a
101 |     assert_equal dest, conn2.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a
102 | 
103 |     assert_works "#{table} #{command}", config: true
104 | 
105 |     assert_equal source, conn1.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a
106 |     assert_equal expected, conn2.exec("SELECT * FROM #{table} ORDER BY 1, 2").to_a
107 |   end
108 | 
109 |   def server_version_num
110 |     conn1.exec("SHOW server_version_num").first["server_version_num"].to_i
111 |   end
112 | end
113 | 


--------------------------------------------------------------------------------
/test/variable_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class VariableTest < Minitest::Test
 4 |   def setup
 5 |     truncate_tables ["posts"]
 6 |   end
 7 | 
 8 |   def test_number
 9 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
10 |     expected = [source[1]]
11 | 
12 |     insert(conn1, "posts", source)
13 |     assert_works "variable:2", config: true
14 |     assert_equal expected, conn2.exec("SELECT * FROM posts ORDER BY 1, 2").to_a
15 |   end
16 | 
17 |   def test_id
18 |     source = 3.times.map { |i| {"id" => i + 1, "title" => "Post #{i + 1}"} }
19 |     expected = [source[1]]
20 | 
21 |     insert(conn1, "posts", source)
22 |     assert_works "variable_id:2", config: true
23 |     assert_equal expected, conn2.exec("SELECT * FROM posts ORDER BY 1, 2").to_a
24 |   end
25 | 
26 |   def test_missing
27 |     assert_error "Missing variables: 1", "variable", config: true
28 |   end
29 | 
30 |   def test_table
31 |     assert_error "Cannot use parameters with tables", "posts:123", config: true
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------