├── .github └── workflows │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── exe └── pgslice ├── lib ├── pgslice.rb └── pgslice │ ├── cli.rb │ ├── cli │ ├── add_partitions.rb │ ├── analyze.rb │ ├── fill.rb │ ├── prep.rb │ ├── swap.rb │ ├── unprep.rb │ └── unswap.rb │ ├── helpers.rb │ ├── table.rb │ └── version.rb ├── pgslice.gemspec └── test ├── pgslice_test.rb ├── support └── schema.sql └── test_helper.rb /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | fail-fast: false 8 | matrix: 9 | include: 10 | - ruby: 3.4 11 | postgres: 17 12 | - ruby: 3.3 13 | postgres: 16 14 | - ruby: 3.2 15 | postgres: 15 16 | - ruby: 3.1 17 | postgres: 14 18 | - ruby: "3.0" 19 | postgres: 13 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: ruby/setup-ruby@v1 23 | with: 24 | ruby-version: ${{ matrix.ruby }} 25 | bundler-cache: true 26 | - uses: ankane/setup-postgres@v1 27 | with: 28 | postgres-version: ${{ matrix.postgres }} 29 | database: pgslice_test 30 | - run: bundle exec rake test 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.7.0 (2025-05-26) 2 | 3 | - Dropped support for Ruby < 3 4 | - Dropped support for Postgres < 13 5 | 6 | ## 0.6.1 (2023-04-26) 7 | 8 | - Fixed `uninitialized constant` error 9 | 10 | ## 0.6.0 (2023-04-22) 11 | 12 | - Added support for generated columns 13 | - Added compression and extended statistics to `prep` 14 | - Dropped support for Ruby < 2.7 15 | - Dropped support for Postgres < 11 16 | 17 | ## 0.5.0 (2023-01-29) 18 | 19 | - Dropped support for Ruby < 2.5 20 | 21 | ## 0.4.8 (2022-02-28) 22 | 23 | - Fixed error with pg 1.3 24 | - Reduced size of Docker image 25 | 26 | ## 0.4.7 (2020-08-14) 27 | 28 | - Added `--tablespace` option to `add_partitions` 29 | - Fixed sequence query if sequence in different schema than table 30 | 31 | ## 0.4.6 (2020-05-29) 32 | 33 | - Ensure correct order with multi-column primary keys 34 | - Ensure fill always uses correct date range (bug introduced in 0.4.5) 35 | 36 | ## 0.4.5 (2018-10-18) 37 | 38 | - Added support for Postgres 11 foreign key improvements 39 | - Improved versioning 40 | 41 | ## 0.4.4 (2018-08-18) 42 | 43 | - Added partitioning by `year` 44 | - Fixed `--source-table` and `--dest-table` options 45 | - Added descriptions to options 46 | 47 | ## 0.4.3 (2018-08-16) 48 | 49 | - Fixed sequence ownership 50 | - Improved help 51 | 52 | ## 0.4.2 (2018-07-23) 53 | 54 | - Added support for Postgres 11 index improvements 55 | - Added support for all connection options 56 | 57 | ## 0.4.1 (2018-04-30) 58 | 59 | - Better support for schemas 60 | - Use latest partition for schema 61 | - Added support for composite primary keys 62 | 63 | ## 0.4.0 (2017-10-07) 64 | 65 | - Added support for declarative partitioning 66 | - Added support for foreign keys 67 | 68 | ## 0.3.6 (2017-07-10) 69 | 70 | - Fixed drop trigger on `unprep` for non-lowercase tables 71 | - Fixed index creation for non-lowercase tables 72 | 73 | ## 0.3.5 (2017-07-06) 74 | 75 | - Added support for non-lowercase tables and columns 76 | 77 | ## 0.3.4 (2017-07-06) 78 | 79 | - Added `analyze` method 80 | - Fixed `fill` with `--dry-run` option 81 | - Better error message for tables without primary key 82 | 83 | ## 0.3.3 (2017-03-22) 84 | 85 | - Fixed error when creating partitions 86 | 87 | ## 0.3.2 (2016-12-15) 88 | 89 | - Exit with error code on interrupt 90 | - Fixed `--start` option with `--swapped` 91 | 92 | ## 0.3.1 (2016-12-13) 93 | 94 | - Fixed exception with `--no-partition` option 95 | - Use proper cast type in `fill` method for legacy `timestamptz` columns 96 | 97 | ## 0.3.0 (2016-12-12) 98 | 99 | - Better query performance for `timestamptz` columns 100 | - Added support for schemas other than `public` 101 | 102 | ## 0.2.3 (2016-10-10) 103 | 104 | - Added `--dest-table` option to `fill` 105 | - Fixed errors with `fill` when no partitions created 106 | 107 | ## 0.2.2 (2016-10-06) 108 | 109 | - Set `lock_timeout` on `swap` to prevent bad things from happening 110 | - Friendlier error messages 111 | 112 | ## 0.2.1 (2016-09-28) 113 | 114 | - Added `--where` option to `fill` 115 | - Fixed partition detection with `fill` 116 | - Fixed error for columns named `user` with `fill` 117 | 118 | ## 0.2.0 (2016-09-22) 119 | 120 | - Switched to new trigger, which is about 20% faster 121 | 122 | ## 0.1.7 (2016-09-14) 123 | 124 | - Added `--source-table` option to `fill` 125 | 126 | ## 0.1.6 (2016-08-04) 127 | 128 | - Added `--no-partition` option to `prep` 129 | - Added `--url` option 130 | 131 | ## 0.1.5 (2016-04-26) 132 | 133 | - Removed `activesupport` dependency for speed 134 | - Fixed `fill` for months 135 | 136 | ## 0.1.4 (2016-04-24) 137 | 138 | - Added sequence ownership 139 | - Default to 0 for `--past` and `--future` options 140 | - Better `fill` with `--swapped` 141 | 142 | ## 0.1.3 (2016-04-24) 143 | 144 | - Fixed table inheritance 145 | 146 | ## 0.1.2 (2016-04-24) 147 | 148 | - Added `--dry-run` option 149 | - Print sql to stdout instead of stderr 150 | 151 | ## 0.1.1 (2016-04-24) 152 | 153 | - Added sql commands to output 154 | 155 | ## 0.1.0 (2016-04-24) 156 | 157 | - First release 158 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3-alpine 2 | 3 | LABEL org.opencontainers.image.authors="Andrew Kane " 4 | 5 | RUN apk add --update build-base libpq-dev && \ 6 | gem install pgslice && \ 7 | apk del build-base && \ 8 | rm -rf /var/cache/apk/* 9 | 10 | ENTRYPOINT ["pgslice"] 11 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "rake" 6 | gem "minitest", ">= 5" 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2025 Andrew Kane 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pgslice 2 | 3 | Postgres partitioning as easy as pie. Works great for both new and existing tables, with zero downtime and minimal app changes. No need to install anything on your database server. Archive older data on a rolling basis to keep your database size under control. 4 | 5 | :tangerine: Battle-tested at [Instacart](https://www.instacart.com/opensource) 6 | 7 | [![Build Status](https://github.com/ankane/pgslice/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/pgslice/actions) 8 | 9 | ## Install 10 | 11 | pgslice is a command line tool. To install, run: 12 | 13 | ```sh 14 | gem install pgslice 15 | ``` 16 | 17 | This will give you the `pgslice` command. You can also install it with [Homebrew](#homebrew) or [Docker](#docker). If installation fails, you may need to install [dependencies](#dependencies). 18 | 19 | ## Steps 20 | 21 | 1. Ensure the table you want to partition has been created. We’ll refer to this as ``. 22 | 23 | 2. Specify your database credentials 24 | 25 | ```sh 26 | export PGSLICE_URL=postgres://localhost/myapp_development 27 | ``` 28 | 29 | 3. Create an intermediate table 30 | 31 | ```sh 32 | pgslice prep
33 | ``` 34 | 35 | The column should be a `timestamp`, `timestamptz`, or `date` column and period can be `day`, `month`, or `year`. 36 | 37 | This creates a partitioned table named `
_intermediate` using range partitioning. 38 | 39 | 4. Add partitions to the intermediate table 40 | 41 | ```sh 42 | pgslice add_partitions
--intermediate --past 3 --future 3 43 | ``` 44 | 45 | Use the `--past` and `--future` options to control the number of partitions. 46 | 47 | 5. *Optional, for tables with data* - Fill the partitions in batches with data from the original table 48 | 49 | ```sh 50 | pgslice fill
51 | ``` 52 | 53 | Use the `--batch-size` and `--sleep` options to control the speed (defaults to `10000` and `0` respectively) 54 | 55 | To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync). 56 | 57 | 6. Analyze tables 58 | 59 | ```sh 60 | pgslice analyze
61 | ``` 62 | 63 | 7. Swap the intermediate table with the original table 64 | 65 | ```sh 66 | pgslice swap
67 | ``` 68 | 69 | The original table is renamed `
_retired` and the intermediate table is renamed `
`. 70 | 71 | 8. Fill the rest (rows inserted between the first fill and the swap) 72 | 73 | ```sh 74 | pgslice fill
--swapped 75 | ``` 76 | 77 | 9. Back up the retired table with a tool like [pg_dump](https://www.postgresql.org/docs/current/static/app-pgdump.html) and drop it 78 | 79 | ```sql 80 | pg_dump -c -Fc -t
_retired $PGSLICE_URL >
_retired.dump 81 | psql -c "DROP TABLE
_retired" $PGSLICE_URL 82 | ``` 83 | 84 | ## Sample Output 85 | 86 | pgslice prints the SQL commands that were executed on the server. To print without executing, use the `--dry-run` option. 87 | 88 | ```sh 89 | pgslice prep visits created_at month 90 | ``` 91 | 92 | ```sql 93 | BEGIN; 94 | 95 | CREATE TABLE "public"."visits_intermediate" (LIKE "public"."visits" INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS INCLUDING STATISTICS INCLUDING GENERATED INCLUDING COMPRESSION) PARTITION BY RANGE ("created_at"); 96 | 97 | CREATE INDEX ON "public"."visits_intermediate" USING btree ("created_at"); 98 | 99 | COMMENT ON TABLE "public"."visits_intermediate" is 'column:created_at,period:month,cast:date,version:3'; 100 | 101 | COMMIT; 102 | ``` 103 | 104 | ```sh 105 | pgslice add_partitions visits --intermediate --past 1 --future 1 106 | ``` 107 | 108 | ```sql 109 | BEGIN; 110 | 111 | CREATE TABLE "public"."visits_202408" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2024-08-01') TO ('2024-09-01'); 112 | 113 | ALTER TABLE "public"."visits_202408" ADD PRIMARY KEY ("id"); 114 | 115 | CREATE TABLE "public"."visits_202409" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2024-09-01') TO ('2024-10-01'); 116 | 117 | ALTER TABLE "public"."visits_202409" ADD PRIMARY KEY ("id"); 118 | 119 | CREATE TABLE "public"."visits_202410" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2024-10-01') TO ('2024-11-01'); 120 | 121 | ALTER TABLE "public"."visits_202410" ADD PRIMARY KEY ("id"); 122 | 123 | COMMIT; 124 | ``` 125 | 126 | ```sh 127 | pgslice fill visits 128 | ``` 129 | 130 | ```sql 131 | /* 1 of 3 */ 132 | INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at") 133 | SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits" 134 | WHERE "id" > 0 AND "id" <= 10000 AND "created_at" >= '2024-08-01'::date AND "created_at" < '2024-11-01'::date 135 | 136 | /* 2 of 3 */ 137 | INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at") 138 | SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits" 139 | WHERE "id" > 10000 AND "id" <= 20000 AND "created_at" >= '2024-08-01'::date AND "created_at" < '2024-11-01'::date 140 | 141 | /* 3 of 3 */ 142 | INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at") 143 | SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits" 144 | WHERE "id" > 20000 AND "id" <= 30000 AND "created_at" >= '2024-08-01'::date AND "created_at" < '2024-11-01'::date 145 | ``` 146 | 147 | ```sh 148 | pgslice analyze visits 149 | ``` 150 | 151 | ```sql 152 | ANALYZE VERBOSE "public"."visits_202408"; 153 | 154 | ANALYZE VERBOSE "public"."visits_202409"; 155 | 156 | ANALYZE VERBOSE "public"."visits_202410"; 157 | 158 | ANALYZE VERBOSE "public"."visits_intermediate"; 159 | ``` 160 | 161 | ```sh 162 | pgslice swap visits 163 | ``` 164 | 165 | ```sql 166 | BEGIN; 167 | 168 | SET LOCAL lock_timeout = '5s'; 169 | 170 | ALTER TABLE "public"."visits" RENAME TO "visits_retired"; 171 | 172 | ALTER TABLE "public"."visits_intermediate" RENAME TO "visits"; 173 | 174 | ALTER SEQUENCE "public"."visits_id_seq" OWNED BY "public"."visits"."id"; 175 | 176 | COMMIT; 177 | ``` 178 | 179 | ## Adding Partitions 180 | 181 | To add partitions, use: 182 | 183 | ```sh 184 | pgslice add_partitions
--future 3 185 | ``` 186 | 187 | Add this as a cron job to create a new partition each day, month, or year. 188 | 189 | ```sh 190 | # day 191 | 0 0 * * * pgslice add_partitions
--future 3 --url ... 192 | 193 | # month 194 | 0 0 1 * * pgslice add_partitions
--future 3 --url ... 195 | 196 | # year 197 | 0 0 1 1 * pgslice add_partitions
--future 3 --url ... 198 | ``` 199 | 200 | Add a monitor to ensure partitions are being created. 201 | 202 | ```sql 203 | SELECT 1 FROM 204 | pg_catalog.pg_class c 205 | INNER JOIN 206 | pg_catalog.pg_namespace n ON n.oid = c.relnamespace 207 | WHERE 208 | c.relkind = 'r' AND 209 | n.nspname = 'public' AND 210 | c.relname = '
_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD') 211 | -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM') 212 | -- for years, use to_char(NOW() + INTERVAL '3 years', 'YYYY') 213 | ``` 214 | 215 | ## Archiving Partitions 216 | 217 | Back up and drop older partitions each day, month, or year. 218 | 219 | ```sh 220 | pg_dump -c -Fc -t
_202409 $PGSLICE_URL >
_202409.dump 221 | psql -c "DROP TABLE
_202409" $PGSLICE_URL 222 | ``` 223 | 224 | If you use [Amazon S3](https://aws.amazon.com/s3/) for backups, [s3cmd](https://github.com/s3tools/s3cmd) is a nice tool. 225 | 226 | ```sh 227 | s3cmd put
_202409.dump s3:///
_202409.dump 228 | ``` 229 | 230 | ## Schema Updates 231 | 232 | Once a table is partitioned, make schema updates on the master table only (not partitions). This includes adding, removing, and modifying columns, as well as adding and removing indexes and foreign keys. 233 | 234 | ## Additional Commands 235 | 236 | To undo prep (which will delete partitions), use: 237 | 238 | ```sh 239 | pgslice unprep
240 | ``` 241 | 242 | To undo swap, use: 243 | 244 | ```sh 245 | pgslice unswap
246 | ``` 247 | 248 | ## Additional Options 249 | 250 | Set the tablespace when adding partitions 251 | 252 | ```sh 253 | pgslice add_partitions
--tablespace fastspace 254 | ``` 255 | 256 | ## App Considerations 257 | 258 | This set up allows you to read and write with the original table name with no knowledge it’s partitioned. However, there are a few things to be aware of. 259 | 260 | ### Reads 261 | 262 | When possible, queries should include the column you partition on to limit the number of partitions the database needs to check. For instance, if you partition on `created_at`, try to include it in queries: 263 | 264 | ```sql 265 | SELECT * FROM 266 | visits 267 | WHERE 268 | user_id = 123 AND 269 | -- for performance 270 | created_at >= '2024-09-01' AND created_at < '2024-09-02' 271 | ``` 272 | 273 | For this to be effective, ensure `constraint_exclusion` is set to `partition` (the default value) or `on`. 274 | 275 | ```sql 276 | SHOW constraint_exclusion; 277 | ``` 278 | 279 | ## Frameworks 280 | 281 | ### Rails 282 | 283 | Specify the primary key for partitioned models to ensure it’s returned. 284 | 285 | ```ruby 286 | class Visit < ApplicationRecord 287 | self.primary_key = "id" 288 | end 289 | ``` 290 | 291 | ### Other Frameworks 292 | 293 | Please submit a PR if additional configuration is needed. 294 | 295 | ## One Off Tasks 296 | 297 | You can also use pgslice to reduce the size of a table without partitioning by creating a new table, filling it with a subset of records, and swapping it in. 298 | 299 | ```sh 300 | pgslice prep
--no-partition 301 | pgslice fill
--where "id > 1000" # use any conditions 302 | pgslice swap
303 | ``` 304 | 305 | ## Triggers 306 | 307 | Triggers aren’t copied from the original table. You can set up triggers on the intermediate table if needed. Note that Postgres doesn’t support `BEFORE / FOR EACH ROW` triggers on partitioned tables. 308 | 309 | ## Data Protection 310 | 311 | Always make sure your [connection is secure](https://ankane.org/postgres-sslmode-explained) when connecting to a database over a network you don’t fully trust. Your best option is to connect over SSH or a VPN. Another option is to use `sslmode=verify-full`. If you don’t do this, your database credentials can be compromised. 312 | 313 | ## Additional Installation Methods 314 | 315 | ### Homebrew 316 | 317 | With Homebrew, you can use: 318 | 319 | ```sh 320 | brew install ankane/brew/pgslice 321 | ``` 322 | 323 | ### Docker 324 | 325 | Get the [Docker image](https://hub.docker.com/r/ankane/pgslice) with: 326 | 327 | ```sh 328 | docker pull ankane/pgslice 329 | alias pgslice="docker run --rm -e PGSLICE_URL ankane/pgslice" 330 | ``` 331 | 332 | This will give you the `pgslice` command. 333 | 334 | ## Dependencies 335 | 336 | If installation fails, your system may be missing Ruby or libpq. 337 | 338 | On Mac, run: 339 | 340 | ```sh 341 | brew install libpq 342 | ``` 343 | 344 | On Ubuntu, run: 345 | 346 | ```sh 347 | sudo apt-get install ruby-dev libpq-dev build-essential 348 | ``` 349 | 350 | ## Upgrading 351 | 352 | Run: 353 | 354 | ```sh 355 | gem install pgslice 356 | ``` 357 | 358 | To use master, run: 359 | 360 | ```sh 361 | gem install specific_install 362 | gem specific_install https://github.com/ankane/pgslice.git 363 | ``` 364 | 365 | ## Reference 366 | 367 | - [PostgreSQL Manual](https://www.postgresql.org/docs/current/static/ddl-partitioning.html) 368 | 369 | ## Related Projects 370 | 371 | Also check out: 372 | 373 | - [Dexter](https://github.com/ankane/dexter) - The automatic indexer for Postgres 374 | - [PgHero](https://github.com/ankane/pghero) - A performance dashboard for Postgres 375 | - [pgsync](https://github.com/ankane/pgsync) - Sync Postgres data to your local machine 376 | 377 | ## History 378 | 379 | View the [changelog](https://github.com/ankane/pgslice/blob/master/CHANGELOG.md) 380 | 381 | ## Contributing 382 | 383 | Everyone is encouraged to help improve this project. Here are a few ways you can help: 384 | 385 | - [Report bugs](https://github.com/ankane/pgslice/issues) 386 | - Fix bugs and [submit pull requests](https://github.com/ankane/pgslice/pulls) 387 | - Write, clarify, or fix documentation 388 | - Suggest or add new features 389 | 390 | To get started with development: 391 | 392 | ```sh 393 | git clone https://github.com/ankane/pgslice.git 394 | cd pgslice 395 | bundle install 396 | createdb pgslice_test 397 | bundle exec rake test 398 | ``` 399 | 400 | To test against different versions of Postgres with Docker, use: 401 | 402 | ```sh 403 | docker run -p=8000:5432 postgres:16 404 | TZ=Etc/UTC PGSLICE_URL=postgres://postgres@localhost:8000/postgres bundle exec rake 405 | ``` 406 | 407 | On Mac, you must use [Docker Desktop](https://www.docker.com/products/docker-desktop/) for the port mapping to localhost to work. 408 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | Rake::TestTask.new(:test) do |t| 5 | t.libs << "test" 6 | t.libs << "lib" 7 | t.test_files = FileList["test/**/*_test.rb"] 8 | end 9 | 10 | task default: :test 11 | 12 | namespace :docker do 13 | task :build do 14 | require_relative "lib/pgslice/version" 15 | 16 | system "docker build --pull --no-cache -t ankane/pgslice:latest -t ankane/pgslice:v#{PgSlice::VERSION} .", exception: true 17 | end 18 | 19 | task :release do 20 | require_relative "lib/pgslice/version" 21 | 22 | system "docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 -t ankane/pgslice:latest -t ankane/pgslice:v#{PgSlice::VERSION} .", exception: true 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /exe/pgslice: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | # handle interrupts 4 | trap("SIGINT") { abort } 5 | 6 | require "pgslice" 7 | PgSlice::CLI.start 8 | -------------------------------------------------------------------------------- /lib/pgslice.rb: -------------------------------------------------------------------------------- 1 | # dependencies 2 | require "pg" 3 | require "thor" 4 | 5 | # stdlib 6 | require "cgi" 7 | require "time" 8 | require "uri" 9 | 10 | # modules 11 | require_relative "pgslice/helpers" 12 | require_relative "pgslice/table" 13 | require_relative "pgslice/version" 14 | 15 | # commands 16 | require_relative "pgslice/cli" 17 | require_relative "pgslice/cli/add_partitions" 18 | require_relative "pgslice/cli/analyze" 19 | require_relative "pgslice/cli/fill" 20 | require_relative "pgslice/cli/prep" 21 | require_relative "pgslice/cli/swap" 22 | require_relative "pgslice/cli/unprep" 23 | require_relative "pgslice/cli/unswap" 24 | -------------------------------------------------------------------------------- /lib/pgslice/cli.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI < Thor 3 | class << self 4 | attr_accessor :instance 5 | end 6 | 7 | include Helpers 8 | 9 | check_unknown_options! 10 | 11 | class_option :url, desc: "Database URL" 12 | class_option :dry_run, type: :boolean, default: false, desc: "Print statements without executing" 13 | 14 | map %w[--version -v] => :version 15 | 16 | def self.exit_on_failure? 17 | ENV["PGSLICE_ENV"] != "test" 18 | end 19 | 20 | def initialize(*args) 21 | PgSlice::CLI.instance = self 22 | $stdout.sync = true 23 | $stderr.sync = true 24 | super 25 | end 26 | 27 | desc "version", "Show version" 28 | def version 29 | log("pgslice #{PgSlice::VERSION}") 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/pgslice/cli/add_partitions.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "add_partitions TABLE", "Add partitions" 4 | option :intermediate, type: :boolean, default: false, desc: "Add to intermediate table" 5 | option :past, type: :numeric, default: 0, desc: "Number of past partitions to add" 6 | option :future, type: :numeric, default: 0, desc: "Number of future partitions to add" 7 | option :tablespace, type: :string, default: "", desc: "Tablespace to use" 8 | def add_partitions(table) 9 | original_table = create_table(table) 10 | table = options[:intermediate] ? original_table.intermediate_table : original_table 11 | trigger_name = original_table.trigger_name 12 | 13 | assert_table(table) 14 | 15 | future = options[:future] 16 | past = options[:past] 17 | tablespace = options[:tablespace] 18 | range = (-1 * past)..future 19 | 20 | period, field, cast, needs_comment, declarative, version = table.fetch_settings(original_table.trigger_name) 21 | unless period 22 | message = "No settings found: #{table}" 23 | message = "#{message}\nDid you mean to use --intermediate?" unless options[:intermediate] 24 | abort message 25 | end 26 | 27 | queries = [] 28 | 29 | if needs_comment 30 | queries << "COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(table)} IS 'column:#{field},period:#{period},cast:#{cast}';" 31 | end 32 | 33 | # today = utc date 34 | today = round_date(Time.now.utc.to_date, period) 35 | 36 | schema_table = 37 | if !declarative 38 | table 39 | elsif options[:intermediate] 40 | original_table 41 | else 42 | table.partitions.last 43 | end 44 | 45 | # indexes automatically propagate in Postgres 11+ 46 | if version < 3 47 | index_defs = schema_table.index_defs 48 | fk_defs = schema_table.foreign_keys 49 | else 50 | index_defs = [] 51 | fk_defs = [] 52 | end 53 | 54 | primary_key = schema_table.primary_key 55 | tablespace_str = tablespace.empty? ? "" : " TABLESPACE #{quote_ident(tablespace)}" 56 | 57 | added_partitions = [] 58 | range.each do |n| 59 | day = advance_date(today, period, n) 60 | 61 | partition = Table.new(original_table.schema, "#{original_table.name}_#{day.strftime(name_format(period))}") 62 | next if partition.exists? 63 | added_partitions << partition 64 | 65 | if declarative 66 | queries << <<~SQL 67 | CREATE TABLE #{quote_table(partition)} PARTITION OF #{quote_table(table)} FOR VALUES FROM (#{sql_date(day, cast, false)}) TO (#{sql_date(advance_date(day, period, 1), cast, false)})#{tablespace_str}; 68 | SQL 69 | else 70 | queries << <<~SQL 71 | CREATE TABLE #{quote_table(partition)} 72 | (CHECK (#{quote_ident(field)} >= #{sql_date(day, cast)} AND #{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)})) 73 | INHERITS (#{quote_table(table)})#{tablespace_str}; 74 | SQL 75 | end 76 | 77 | queries << "ALTER TABLE #{quote_table(partition)} ADD PRIMARY KEY (#{primary_key.map { |k| quote_ident(k) }.join(", ")});" if primary_key.any? 78 | 79 | index_defs.each do |index_def| 80 | queries << make_index_def(index_def, partition) 81 | end 82 | 83 | fk_defs.each do |fk_def| 84 | queries << make_fk_def(fk_def, partition) 85 | end 86 | end 87 | 88 | unless declarative 89 | # update trigger based on existing partitions 90 | current_defs = [] 91 | future_defs = [] 92 | past_defs = [] 93 | name_format = self.name_format(period) 94 | partitions = (table.partitions + added_partitions).uniq(&:name).sort_by(&:name) 95 | 96 | partitions.each do |partition| 97 | day = partition_date(partition, name_format) 98 | 99 | # note: does not support generated columns 100 | # could support by listing columns 101 | # but this would cause issues with schema changes 102 | sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN 103 | INSERT INTO #{quote_table(partition)} VALUES (NEW.*);" 104 | 105 | if day.to_date < today 106 | past_defs << sql 107 | elsif advance_date(day, period, 1) < today 108 | current_defs << sql 109 | else 110 | future_defs << sql 111 | end 112 | end 113 | 114 | # order by current period, future periods asc, past periods desc 115 | trigger_defs = current_defs + future_defs + past_defs.reverse 116 | 117 | if trigger_defs.any? 118 | queries << <<~SQL 119 | CREATE OR REPLACE FUNCTION #{quote_ident(trigger_name)}() 120 | RETURNS trigger AS $$ 121 | BEGIN 122 | IF #{trigger_defs.join("\n ELSIF ")} 123 | ELSE 124 | RAISE EXCEPTION 'Date out of range. Ensure partitions are created.'; 125 | END IF; 126 | RETURN NULL; 127 | END; 128 | $$ LANGUAGE plpgsql; 129 | SQL 130 | end 131 | end 132 | 133 | run_queries(queries) if queries.any? 134 | end 135 | end 136 | end 137 | -------------------------------------------------------------------------------- /lib/pgslice/cli/analyze.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "analyze TABLE", "Analyze tables" 4 | option :swapped, type: :boolean, default: false, desc: "Use swapped table" 5 | def analyze(table) 6 | table = create_table(table) 7 | parent_table = options[:swapped] ? table : table.intermediate_table 8 | 9 | analyze_list = parent_table.partitions + [parent_table] 10 | run_queries_without_transaction(analyze_list.map { |t| "ANALYZE VERBOSE #{quote_table(t)};" }) 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/pgslice/cli/fill.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "fill TABLE", "Fill the partitions in batches" 4 | option :batch_size, type: :numeric, default: 10000, desc: "Batch size" 5 | option :swapped, type: :boolean, default: false, desc: "Use swapped table" 6 | option :source_table, desc: "Source table" 7 | option :dest_table, desc: "Destination table" 8 | option :start, type: :numeric, desc: "Primary key to start" 9 | option :where, desc: "Conditions to filter" 10 | option :sleep, type: :numeric, desc: "Seconds to sleep between batches" 11 | def fill(table) 12 | table = create_table(table) 13 | source_table = create_table(options[:source_table]) if options[:source_table] 14 | dest_table = create_table(options[:dest_table]) if options[:dest_table] 15 | 16 | if options[:swapped] 17 | source_table ||= table.retired_table 18 | dest_table ||= table 19 | else 20 | source_table ||= table 21 | dest_table ||= table.intermediate_table 22 | end 23 | 24 | assert_table(source_table) 25 | assert_table(dest_table) 26 | 27 | period, field, cast, _, declarative, _ = dest_table.fetch_settings(table.trigger_name) 28 | 29 | if period 30 | name_format = self.name_format(period) 31 | 32 | partitions = dest_table.partitions 33 | if partitions.any? 34 | starting_time = partition_date(partitions.first, name_format) 35 | ending_time = advance_date(partition_date(partitions.last, name_format), period, 1) 36 | end 37 | end 38 | 39 | schema_table = period && declarative ? partitions.last : table 40 | 41 | primary_key = schema_table.primary_key[0] 42 | abort "No primary key" unless primary_key 43 | 44 | max_source_id = nil 45 | begin 46 | max_source_id = source_table.max_id(primary_key) 47 | rescue PG::UndefinedFunction 48 | abort "Only numeric primary keys are supported" 49 | end 50 | 51 | max_dest_id = 52 | if options[:start] 53 | options[:start] 54 | elsif options[:swapped] 55 | dest_table.max_id(primary_key, where: options[:where], below: max_source_id) 56 | else 57 | dest_table.max_id(primary_key, where: options[:where]) 58 | end 59 | 60 | if max_dest_id == 0 && !options[:swapped] 61 | min_source_id = source_table.min_id(primary_key, field, cast, starting_time, options[:where]) 62 | max_dest_id = min_source_id - 1 if min_source_id 63 | end 64 | 65 | starting_id = max_dest_id 66 | fields = source_table.columns.map { |c| quote_ident(c) }.join(", ") 67 | batch_size = options[:batch_size] 68 | 69 | i = 1 70 | batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil 71 | 72 | if batch_count == 0 73 | log_sql "/* nothing to fill */" 74 | end 75 | 76 | while starting_id < max_source_id 77 | where = "#{quote_ident(primary_key)} > #{starting_id} AND #{quote_ident(primary_key)} <= #{starting_id + batch_size}" 78 | if starting_time 79 | where << " AND #{quote_ident(field)} >= #{sql_date(starting_time, cast)} AND #{quote_ident(field)} < #{sql_date(ending_time, cast)}" 80 | end 81 | if options[:where] 82 | where << " AND #{options[:where]}" 83 | end 84 | 85 | query = <<~SQL 86 | /* #{i} of #{batch_count} */ 87 | INSERT INTO #{quote_table(dest_table)} (#{fields}) 88 | SELECT #{fields} FROM #{quote_table(source_table)} 89 | WHERE #{where} 90 | SQL 91 | 92 | run_query(query) 93 | 94 | starting_id += batch_size 95 | i += 1 96 | 97 | if options[:sleep] && starting_id <= max_source_id 98 | sleep(options[:sleep]) 99 | end 100 | end 101 | end 102 | end 103 | end 104 | -------------------------------------------------------------------------------- /lib/pgslice/cli/prep.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning" 4 | option :partition, type: :boolean, default: true, desc: "Partition the table" 5 | option :trigger_based, type: :boolean, default: false, desc: "Use trigger-based partitioning" 6 | option :test_version, type: :numeric, hide: true 7 | def prep(table, column = nil, period = nil) 8 | table = create_table(table) 9 | intermediate_table = table.intermediate_table 10 | trigger_name = table.trigger_name 11 | 12 | unless options[:partition] 13 | abort "Usage: \"pgslice prep TABLE --no-partition\"" if column || period 14 | abort "Can't use --trigger-based and --no-partition" if options[:trigger_based] 15 | end 16 | assert_table(table) 17 | assert_no_table(intermediate_table) 18 | 19 | if options[:partition] 20 | abort "Usage: \"pgslice prep TABLE COLUMN PERIOD\"" if !(column && period) 21 | abort "Column not found: #{column}" unless table.columns.include?(column) 22 | abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym] 23 | end 24 | 25 | queries = [] 26 | 27 | # version summary 28 | # 1. trigger-based (pg9) 29 | # 2. declarative, with indexes and foreign keys on child tables (pg10) 30 | # 3. declarative, with indexes and foreign keys on parent table (pg11+) 31 | version = options[:test_version] || (options[:trigger_based] ? 1 : 3) 32 | 33 | declarative = version > 1 34 | 35 | if declarative && options[:partition] 36 | including = ["DEFAULTS", "CONSTRAINTS", "STORAGE", "COMMENTS", "STATISTICS"] 37 | if server_version_num >= 120000 38 | including << "GENERATED" 39 | end 40 | if server_version_num >= 140000 41 | including << "COMPRESSION" 42 | end 43 | queries << <<~SQL 44 | CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} #{including.map { |v| "INCLUDING #{v}" }.join(" ")}) PARTITION BY RANGE (#{quote_ident(column)}); 45 | SQL 46 | 47 | if version == 3 48 | index_defs = table.index_defs 49 | index_defs.each do |index_def| 50 | queries << make_index_def(index_def, intermediate_table) 51 | end 52 | 53 | table.foreign_keys.each do |fk_def| 54 | queries << make_fk_def(fk_def, intermediate_table) 55 | end 56 | end 57 | 58 | # add comment 59 | cast = table.column_cast(column) 60 | queries << <<~SQL 61 | COMMENT ON TABLE #{quote_table(intermediate_table)} IS 'column:#{column},period:#{period},cast:#{cast},version:#{version}'; 62 | SQL 63 | else 64 | queries << <<~SQL 65 | CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING ALL); 66 | SQL 67 | 68 | table.foreign_keys.each do |fk_def| 69 | queries << make_fk_def(fk_def, intermediate_table) 70 | end 71 | end 72 | 73 | if options[:partition] && !declarative 74 | queries << <<~SQL 75 | CREATE FUNCTION #{quote_ident(trigger_name)}() 76 | RETURNS trigger AS $$ 77 | BEGIN 78 | RAISE EXCEPTION 'Create partitions first.'; 79 | END; 80 | $$ LANGUAGE plpgsql; 81 | SQL 82 | 83 | queries << <<~SQL 84 | CREATE TRIGGER #{quote_ident(trigger_name)} 85 | BEFORE INSERT ON #{quote_table(intermediate_table)} 86 | FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}(); 87 | SQL 88 | 89 | cast = table.column_cast(column) 90 | queries << <<~SQL 91 | COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(intermediate_table)} IS 'column:#{column},period:#{period},cast:#{cast}'; 92 | SQL 93 | end 94 | 95 | run_queries(queries) 96 | end 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /lib/pgslice/cli/swap.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "swap TABLE", "Swap the intermediate table with the original table" 4 | option :lock_timeout, default: "5s", desc: "Lock timeout" 5 | def swap(table) 6 | table = create_table(table) 7 | intermediate_table = table.intermediate_table 8 | retired_table = table.retired_table 9 | 10 | assert_table(table) 11 | assert_table(intermediate_table) 12 | assert_no_table(retired_table) 13 | 14 | queries = [ 15 | "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(retired_table)};", 16 | "ALTER TABLE #{quote_table(intermediate_table)} RENAME TO #{quote_no_schema(table)};" 17 | ] 18 | 19 | table.sequences.each do |sequence| 20 | queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_schema"])}.#{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};" 21 | end 22 | 23 | queries.unshift("SET LOCAL lock_timeout = #{escape_literal(options[:lock_timeout])};") 24 | 25 | run_queries(queries) 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/pgslice/cli/unprep.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "unprep TABLE", "Undo prep" 4 | def unprep(table) 5 | table = create_table(table) 6 | intermediate_table = table.intermediate_table 7 | trigger_name = table.trigger_name 8 | 9 | assert_table(intermediate_table) 10 | 11 | queries = [ 12 | "DROP TABLE #{quote_table(intermediate_table)} CASCADE;", 13 | "DROP FUNCTION IF EXISTS #{quote_ident(trigger_name)}();" 14 | ] 15 | run_queries(queries) 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/pgslice/cli/unswap.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class CLI 3 | desc "unswap TABLE", "Undo swap" 4 | def unswap(table) 5 | table = create_table(table) 6 | intermediate_table = table.intermediate_table 7 | retired_table = table.retired_table 8 | 9 | assert_table(table) 10 | assert_table(retired_table) 11 | assert_no_table(intermediate_table) 12 | 13 | queries = [ 14 | "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(intermediate_table)};", 15 | "ALTER TABLE #{quote_table(retired_table)} RENAME TO #{quote_no_schema(table)};" 16 | ] 17 | 18 | table.sequences.each do |sequence| 19 | queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_schema"])}.#{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};" 20 | end 21 | 22 | run_queries(queries) 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/pgslice/helpers.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | module Helpers 3 | SQL_FORMAT = { 4 | day: "YYYYMMDD", 5 | month: "YYYYMM", 6 | year: "YYYY" 7 | } 8 | 9 | protected 10 | 11 | # output 12 | 13 | def log(message = nil) 14 | error message 15 | end 16 | 17 | def log_sql(message = nil) 18 | say message 19 | end 20 | 21 | def abort(message) 22 | raise Thor::Error, message 23 | end 24 | 25 | # database connection 26 | 27 | def connection 28 | @connection ||= begin 29 | url = options[:url] || ENV["PGSLICE_URL"] 30 | abort "Set PGSLICE_URL or use the --url option" unless url 31 | 32 | uri = URI.parse(url) 33 | params = CGI.parse(uri.query.to_s) 34 | # remove schema 35 | @schema = Array(params.delete("schema") || "public")[0] 36 | uri.query = params.any? ? URI.encode_www_form(params) : nil 37 | 38 | ENV["PGCONNECT_TIMEOUT"] ||= "3" 39 | conn = PG::Connection.new(uri.to_s) 40 | conn.set_notice_processor do |message| 41 | say message 42 | end 43 | @server_version_num = conn.exec("SHOW server_version_num")[0]["server_version_num"].to_i 44 | if @server_version_num < 130000 45 | abort "This version of pgslice requires Postgres 13+" 46 | end 47 | conn 48 | end 49 | rescue PG::ConnectionBad => e 50 | abort e.message 51 | rescue URI::InvalidURIError 52 | abort "Invalid url" 53 | end 54 | 55 | def schema 56 | connection # ensure called first 57 | @schema 58 | end 59 | 60 | def execute(query, params = []) 61 | connection.exec_params(query, params).to_a 62 | end 63 | 64 | def run_queries(queries) 65 | connection.transaction do 66 | execute("SET LOCAL client_min_messages TO warning") unless options[:dry_run] 67 | log_sql "BEGIN;" 68 | log_sql 69 | run_queries_without_transaction(queries) 70 | log_sql "COMMIT;" 71 | end 72 | end 73 | 74 | def run_query(query) 75 | log_sql query 76 | unless options[:dry_run] 77 | begin 78 | execute(query) 79 | rescue PG::ServerError => e 80 | abort("#{e.class.name}: #{e.message}") 81 | end 82 | end 83 | log_sql 84 | end 85 | 86 | def run_queries_without_transaction(queries) 87 | queries.each do |query| 88 | run_query(query) 89 | end 90 | end 91 | 92 | def server_version_num 93 | connection # ensure called first 94 | @server_version_num 95 | end 96 | 97 | # helpers 98 | 99 | def sql_date(time, cast, add_cast = true) 100 | if cast == "timestamptz" 101 | fmt = "%Y-%m-%d %H:%M:%S UTC" 102 | else 103 | fmt = "%Y-%m-%d" 104 | end 105 | str = escape_literal(time.strftime(fmt)) 106 | add_cast ? "#{str}::#{cast}" : str 107 | end 108 | 109 | def name_format(period) 110 | case period.to_sym 111 | when :day 112 | "%Y%m%d" 113 | when :month 114 | "%Y%m" 115 | else 116 | "%Y" 117 | end 118 | end 119 | 120 | def partition_date(partition, name_format) 121 | DateTime.strptime(partition.name.split("_").last, name_format) 122 | end 123 | 124 | def round_date(date, period) 125 | date = date.to_date 126 | case period.to_sym 127 | when :day 128 | date 129 | when :month 130 | Date.new(date.year, date.month) 131 | else 132 | Date.new(date.year) 133 | end 134 | end 135 | 136 | def assert_table(table) 137 | abort "Table not found: #{table}" unless table.exists? 138 | end 139 | 140 | def assert_no_table(table) 141 | abort "Table already exists: #{table}" if table.exists? 142 | end 143 | 144 | def advance_date(date, period, count = 1) 145 | date = date.to_date 146 | case period.to_sym 147 | when :day 148 | date.next_day(count) 149 | when :month 150 | date.next_month(count) 151 | else 152 | date.next_year(count) 153 | end 154 | end 155 | 156 | def quote_ident(value) 157 | PG::Connection.quote_ident(value) 158 | end 159 | 160 | def escape_literal(value) 161 | connection.escape_literal(value) 162 | end 163 | 164 | def quote_table(table) 165 | table.quote_table 166 | end 167 | 168 | def quote_no_schema(table) 169 | quote_ident(table.name) 170 | end 171 | 172 | def create_table(name) 173 | if name.include?(".") 174 | schema, name = name.split(".", 2) 175 | else 176 | schema = self.schema 177 | end 178 | Table.new(schema, name) 179 | end 180 | 181 | def make_index_def(index_def, table) 182 | index_def.sub(/ ON \S+ USING /, " ON #{quote_table(table)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";" 183 | end 184 | 185 | def make_fk_def(fk_def, table) 186 | "ALTER TABLE #{quote_table(table)} ADD #{fk_def};" 187 | end 188 | 189 | def make_stat_def(stat_def, table) 190 | m = /ON (.+) FROM/.match(stat_def) 191 | # errors on duplicate names, but should be rare 192 | stat_name = "#{table}_#{m[1].split(", ").map { |v| v.gsub(/\W/i, "") }.join("_")}_stat" 193 | stat_def.sub(/ FROM \S+/, " FROM #{quote_table(table)}").sub(/ STATISTICS .+ ON /, " STATISTICS #{quote_ident(stat_name)} ON ") + ";" 194 | end 195 | end 196 | end 197 | -------------------------------------------------------------------------------- /lib/pgslice/table.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | class Table 3 | attr_reader :schema, :name 4 | 5 | def initialize(schema, name) 6 | @schema = schema 7 | @name = name 8 | end 9 | 10 | def to_s 11 | [schema, name].join(".") 12 | end 13 | 14 | def exists? 15 | execute("SELECT COUNT(*) FROM pg_catalog.pg_tables WHERE schemaname = $1 AND tablename = $2", [schema, name]).first["count"].to_i > 0 16 | end 17 | 18 | def columns 19 | execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND is_generated = 'NEVER'", [schema, name]).map { |r| r["column_name"] } 20 | end 21 | 22 | # http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL 23 | def sequences 24 | query = <<~SQL 25 | SELECT 26 | a.attname AS related_column, 27 | n.nspname AS sequence_schema, 28 | s.relname AS sequence_name 29 | FROM pg_class s 30 | INNER JOIN pg_depend d ON d.objid = s.oid 31 | INNER JOIN pg_class t ON d.objid = s.oid AND d.refobjid = t.oid 32 | INNER JOIN pg_attribute a ON (d.refobjid, d.refobjsubid) = (a.attrelid, a.attnum) 33 | INNER JOIN pg_namespace n ON n.oid = s.relnamespace 34 | INNER JOIN pg_namespace nt ON nt.oid = t.relnamespace 35 | WHERE s.relkind = 'S' 36 | AND nt.nspname = $1 37 | AND t.relname = $2 38 | ORDER BY s.relname ASC 39 | SQL 40 | execute(query, [schema, name]) 41 | end 42 | 43 | def foreign_keys 44 | execute("SELECT pg_get_constraintdef(oid) FROM pg_constraint WHERE conrelid = #{regclass} AND contype ='f'").map { |r| r["pg_get_constraintdef"] } 45 | end 46 | 47 | # https://stackoverflow.com/a/20537829 48 | # TODO can simplify with array_position in Postgres 9.5+ 49 | def primary_key 50 | query = <<~SQL 51 | SELECT 52 | pg_attribute.attname, 53 | format_type(pg_attribute.atttypid, pg_attribute.atttypmod), 54 | pg_attribute.attnum, 55 | pg_index.indkey 56 | FROM 57 | pg_index, pg_class, pg_attribute, pg_namespace 58 | WHERE 59 | nspname = $1 AND 60 | relname = $2 AND 61 | indrelid = pg_class.oid AND 62 | pg_class.relnamespace = pg_namespace.oid AND 63 | pg_attribute.attrelid = pg_class.oid AND 64 | pg_attribute.attnum = any(pg_index.indkey) AND 65 | indisprimary 66 | SQL 67 | rows = execute(query, [schema, name]) 68 | rows.sort_by { |r| r["indkey"].split(" ").index(r["attnum"]) }.map { |r| r["attname"] } 69 | end 70 | 71 | def index_defs 72 | execute("SELECT pg_get_indexdef(indexrelid) FROM pg_index WHERE indrelid = #{regclass} AND indisprimary = 'f'").map { |r| r["pg_get_indexdef"] } 73 | end 74 | 75 | def quote_table 76 | [quote_ident(schema), quote_ident(name)].join(".") 77 | end 78 | 79 | def intermediate_table 80 | self.class.new(schema, "#{name}_intermediate") 81 | end 82 | 83 | def retired_table 84 | self.class.new(schema, "#{name}_retired") 85 | end 86 | 87 | def trigger_name 88 | "#{name}_insert_trigger" 89 | end 90 | 91 | def column_cast(column) 92 | data_type = execute("SELECT data_type FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND column_name = $3", [schema, name, column])[0]["data_type"] 93 | data_type == "timestamp with time zone" ? "timestamptz" : "date" 94 | end 95 | 96 | def max_id(primary_key, below: nil, where: nil) 97 | query = "SELECT MAX(#{quote_ident(primary_key)}) FROM #{quote_table}" 98 | conditions = [] 99 | conditions << "#{quote_ident(primary_key)} <= #{below}" if below 100 | conditions << where if where 101 | query << " WHERE #{conditions.join(" AND ")}" if conditions.any? 102 | execute(query)[0]["max"].to_i 103 | end 104 | 105 | def min_id(primary_key, column, cast, starting_time, where) 106 | query = "SELECT MIN(#{quote_ident(primary_key)}) FROM #{quote_table}" 107 | conditions = [] 108 | conditions << "#{quote_ident(column)} >= #{sql_date(starting_time, cast)}" if starting_time 109 | conditions << where if where 110 | query << " WHERE #{conditions.join(" AND ")}" if conditions.any? 111 | (execute(query)[0]["min"] || 1).to_i 112 | end 113 | 114 | # ensure this returns partitions in the correct order 115 | def partitions 116 | query = <<~SQL 117 | SELECT 118 | nmsp_child.nspname AS schema, 119 | child.relname AS name 120 | FROM pg_inherits 121 | JOIN pg_class parent ON pg_inherits.inhparent = parent.oid 122 | JOIN pg_class child ON pg_inherits.inhrelid = child.oid 123 | JOIN pg_namespace nmsp_parent ON nmsp_parent.oid = parent.relnamespace 124 | JOIN pg_namespace nmsp_child ON nmsp_child.oid = child.relnamespace 125 | WHERE 126 | nmsp_parent.nspname = $1 AND 127 | parent.relname = $2 128 | ORDER BY child.relname ASC 129 | SQL 130 | execute(query, [schema, name]).map { |r| Table.new(r["schema"], r["name"]) } 131 | end 132 | 133 | def fetch_comment 134 | execute("SELECT obj_description(#{regclass}) AS comment")[0] 135 | end 136 | 137 | def fetch_trigger(trigger_name) 138 | execute("SELECT obj_description(oid, 'pg_trigger') AS comment FROM pg_trigger WHERE tgname = $1 AND tgrelid = #{regclass}", [trigger_name])[0] 139 | end 140 | 141 | # legacy 142 | def fetch_settings(trigger_name) 143 | needs_comment = false 144 | trigger_comment = fetch_trigger(trigger_name) 145 | comment = trigger_comment || fetch_comment 146 | if comment 147 | field, period, cast, version = comment["comment"].split(",").map { |v| v.split(":").last } rescue [] 148 | version = version.to_i if version 149 | end 150 | 151 | unless period 152 | needs_comment = true 153 | function_def = execute("SELECT pg_get_functiondef(oid) FROM pg_proc WHERE proname = $1", [trigger_name])[0] 154 | return [] unless function_def 155 | function_def = function_def["pg_get_functiondef"] 156 | sql_format = Helpers::SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") } 157 | return [] unless sql_format 158 | period = sql_format[0] 159 | field = /to_char\(NEW\.(\w+),/.match(function_def)[1] 160 | end 161 | 162 | # backwards compatibility with 0.2.3 and earlier (pre-timestamptz support) 163 | unless cast 164 | cast = "date" 165 | # update comment to explicitly define cast 166 | needs_comment = true 167 | end 168 | 169 | version ||= trigger_comment ? 1 : 2 170 | declarative = version > 1 171 | 172 | [period, field, cast, needs_comment, declarative, version] 173 | end 174 | 175 | protected 176 | 177 | def execute(*args) 178 | PgSlice::CLI.instance.send(:execute, *args) 179 | end 180 | 181 | def escape_literal(value) 182 | PgSlice::CLI.instance.send(:escape_literal, value) 183 | end 184 | 185 | def quote_ident(value) 186 | PG::Connection.quote_ident(value) 187 | end 188 | 189 | def regclass 190 | "#{escape_literal(quote_table)}::regclass" 191 | end 192 | 193 | def sql_date(time, cast, add_cast = true) 194 | if cast == "timestamptz" 195 | fmt = "%Y-%m-%d %H:%M:%S UTC" 196 | else 197 | fmt = "%Y-%m-%d" 198 | end 199 | str = escape_literal(time.strftime(fmt)) 200 | add_cast ? "#{str}::#{cast}" : str 201 | end 202 | end 203 | end 204 | -------------------------------------------------------------------------------- /lib/pgslice/version.rb: -------------------------------------------------------------------------------- 1 | module PgSlice 2 | VERSION = "0.7.0" 3 | end 4 | -------------------------------------------------------------------------------- /pgslice.gemspec: -------------------------------------------------------------------------------- 1 | require_relative "lib/pgslice/version" 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = "pgslice" 5 | spec.version = PgSlice::VERSION 6 | spec.summary = "Postgres partitioning as easy as pie" 7 | spec.homepage = "https://github.com/ankane/pgslice" 8 | spec.license = "MIT" 9 | 10 | spec.author = "Andrew Kane" 11 | spec.email = "andrew@ankane.org" 12 | 13 | spec.files = Dir["*.{md,txt}", "{lib,exe}/**/*"] 14 | spec.require_path = "lib" 15 | 16 | spec.bindir = "exe" 17 | spec.executables = ["pgslice"] 18 | 19 | spec.required_ruby_version = ">= 3" 20 | 21 | spec.add_dependency "pg", ">= 1" 22 | spec.add_dependency "thor" 23 | end 24 | -------------------------------------------------------------------------------- /test/pgslice_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class PgSliceTest < Minitest::Test 4 | def setup 5 | $conn.exec File.read("test/support/schema.sql") 6 | end 7 | 8 | def test_day 9 | assert_period "day" 10 | end 11 | 12 | def test_month 13 | assert_period "month" 14 | end 15 | 16 | def test_year 17 | assert_period "year" 18 | end 19 | 20 | def test_date 21 | assert_period "year", column: "createdOn" 22 | end 23 | 24 | def test_timestamptz 25 | assert_period "year", column: "createdAtTz" 26 | end 27 | 28 | def test_no_partition 29 | run_command "prep Posts --no-partition" 30 | assert table_exists?("Posts_intermediate") 31 | assert_equal 0, count("Posts_intermediate") 32 | 33 | run_command "fill Posts" 34 | assert_equal 10000, count("Posts_intermediate") 35 | 36 | run_command "swap Posts" 37 | assert !table_exists?("Posts_intermediate") 38 | assert table_exists?("Posts_retired") 39 | 40 | run_command "unswap Posts" 41 | assert table_exists?("Posts_intermediate") 42 | assert !table_exists?("Posts_retired") 43 | 44 | run_command "unprep Posts" 45 | assert !table_exists?("Posts_intermediate") 46 | end 47 | 48 | def test_trigger_based 49 | assert_period "month", trigger_based: true 50 | end 51 | 52 | def test_trigger_based_timestamptz 53 | assert_period "month", trigger_based: true, column: "createdAtTz" 54 | end 55 | 56 | def test_v2 57 | assert_period "month", version: 2 58 | end 59 | 60 | def test_tablespace 61 | assert_period "day", tablespace: true 62 | end 63 | 64 | def test_tablespace_trigger_based 65 | assert_period "month", trigger_based: true, tablespace: true 66 | end 67 | 68 | private 69 | 70 | def assert_period(period, column: "createdAt", trigger_based: false, tablespace: false, version: nil) 71 | $conn.exec('CREATE STATISTICS my_stats ON "Id", "UserId" FROM "Posts"') 72 | 73 | if server_version_num >= 120000 && !trigger_based 74 | $conn.exec('ALTER TABLE "Posts" ADD COLUMN "Gen" INTEGER GENERATED ALWAYS AS ("Id" * 10) STORED') 75 | end 76 | 77 | run_command "prep Posts #{column} #{period} #{"--trigger-based" if trigger_based} #{"--test-version #{version}" if version}" 78 | assert table_exists?("Posts_intermediate") 79 | 80 | run_command "add_partitions Posts --intermediate --past 1 --future 1 #{"--tablespace pg_default" if tablespace}" 81 | now = Time.now.utc 82 | time_format = 83 | case period 84 | when "day" 85 | "%Y%m%d" 86 | when "month" 87 | "%Y%m" 88 | else 89 | "%Y" 90 | end 91 | partition_name = "Posts_#{now.strftime(time_format)}" 92 | assert_primary_key partition_name 93 | assert_index partition_name 94 | assert_foreign_key partition_name 95 | 96 | declarative = !trigger_based 97 | 98 | if declarative 99 | refute_primary_key "Posts_intermediate" 100 | else 101 | assert_primary_key "Posts_intermediate" 102 | end 103 | 104 | if declarative && version == 2 105 | refute_index "Posts_intermediate" 106 | else 107 | assert_index "Posts_intermediate" 108 | end 109 | 110 | assert_equal 0, count("Posts_intermediate") 111 | run_command "fill Posts" 112 | assert_equal 10000, count("Posts_intermediate") 113 | 114 | # insert into old table 115 | $conn.exec('INSERT INTO "Posts" ("' + column + '") VALUES (\'' + now.iso8601 + '\') RETURNING "Id"').first 116 | 117 | run_command "analyze Posts" 118 | 119 | # TODO check sequence ownership 120 | output = run_command "swap Posts" 121 | assert_match "SET LOCAL lock_timeout = '5s';", output 122 | assert table_exists?("Posts") 123 | assert table_exists?("Posts_retired") 124 | refute table_exists?("Posts_intermediate") 125 | 126 | assert_equal 10000, count("Posts") 127 | run_command "fill Posts --swapped" 128 | assert_equal 10001, count("Posts") 129 | 130 | run_command "add_partitions Posts --future 3" 131 | days = 132 | case period 133 | when "day" 134 | 3 135 | when "month" 136 | 90 137 | else 138 | 365 * 3 139 | end 140 | new_partition_name = "Posts_#{(now + days * 86400).strftime(time_format)}" 141 | assert_primary_key new_partition_name 142 | assert_index new_partition_name 143 | assert_foreign_key new_partition_name 144 | 145 | # test insert works 146 | insert_result = $conn.exec('INSERT INTO "Posts" ("' + column + '") VALUES (\'' + now.iso8601 + '\') RETURNING "Id"').first 147 | assert_equal 10002, count("Posts") 148 | if declarative 149 | assert insert_result["Id"] 150 | else 151 | assert_nil insert_result 152 | assert_equal 0, count("Posts", only: true) 153 | end 154 | 155 | # test insert with null field 156 | error = assert_raises(PG::ServerError) do 157 | $conn.exec('INSERT INTO "Posts" ("UserId") VALUES (1)') 158 | end 159 | assert_includes error.message, "partition" 160 | 161 | # test foreign key 162 | error = assert_raises(PG::ServerError) do 163 | $conn.exec('INSERT INTO "Posts" ("' + column + '", "UserId") VALUES (NOW(), 1)') 164 | end 165 | assert_includes error.message, "violates foreign key constraint" 166 | 167 | # test adding column 168 | add_column "Posts", "updatedAt" 169 | assert_column "Posts", "updatedAt" 170 | assert_column partition_name, "updatedAt" 171 | assert_column new_partition_name, "updatedAt" 172 | 173 | run_command "analyze Posts --swapped" 174 | 175 | # pg_stats_ext view available with Postgres 12+ 176 | assert_statistics "Posts" if server_version_num >= 120000 && !trigger_based 177 | 178 | # TODO check sequence ownership 179 | run_command "unswap Posts" 180 | assert table_exists?("Posts") 181 | assert table_exists?("Posts_intermediate") 182 | refute table_exists?("Posts_retired") 183 | assert table_exists?(partition_name) 184 | assert table_exists?(new_partition_name) 185 | 186 | run_command "unprep Posts" 187 | assert table_exists?("Posts") 188 | refute table_exists?("Posts_intermediate") 189 | refute table_exists?(partition_name) 190 | refute table_exists?(new_partition_name) 191 | end 192 | 193 | def run_command(command) 194 | if verbose? 195 | puts "$ pgslice #{command}" 196 | puts 197 | end 198 | stdout, stderr = capture_io do 199 | PgSlice::CLI.start("#{command} --url #{$url}".split(" ")) 200 | end 201 | if verbose? 202 | puts stdout 203 | puts 204 | end 205 | assert_equal "", stderr 206 | stdout 207 | end 208 | 209 | def add_column(table, column) 210 | $conn.exec("ALTER TABLE \"#{table}\" ADD COLUMN \"#{column}\" timestamp") 211 | end 212 | 213 | def assert_column(table, column) 214 | assert_includes $conn.exec("SELECT * FROM \"#{table}\" LIMIT 0").fields, column 215 | end 216 | 217 | def table_exists?(table_name) 218 | result = $conn.exec <<~SQL 219 | SELECT * FROM information_schema.tables 220 | WHERE table_schema = 'public' AND table_name = '#{table_name}' 221 | SQL 222 | result.any? 223 | end 224 | 225 | def count(table_name, only: false) 226 | result = $conn.exec <<~SQL 227 | SELECT COUNT(*) FROM #{only ? "ONLY " : ""}"#{table_name}" 228 | SQL 229 | result.first["count"].to_i 230 | end 231 | 232 | def primary_key(table_name) 233 | result = $conn.exec <<~SQL 234 | SELECT pg_get_constraintdef(oid) AS def 235 | FROM pg_constraint 236 | WHERE contype = 'p' AND conrelid = '"#{table_name}"'::regclass 237 | SQL 238 | result.first 239 | end 240 | 241 | def assert_primary_key(table_name) 242 | result = primary_key(table_name) 243 | assert_match "PRIMARY KEY (\"Id\")", result["def"] 244 | end 245 | 246 | def refute_primary_key(table_name) 247 | assert_nil primary_key(table_name), "Unexpected primary key on #{table_name}" 248 | end 249 | 250 | def index(table_name) 251 | result = $conn.exec <<~SQL 252 | SELECT pg_get_indexdef(indexrelid) 253 | FROM pg_index 254 | WHERE indrelid = '"#{table_name}"'::regclass AND indisprimary = 'f' 255 | SQL 256 | result.first 257 | end 258 | 259 | def assert_index(table_name) 260 | assert index(table_name), "Missing index on #{table_name}" 261 | end 262 | 263 | def refute_index(table_name) 264 | refute index(table_name), "Unexpected index on #{table_name}" 265 | end 266 | 267 | def assert_foreign_key(table_name) 268 | result = $conn.exec <<~SQL 269 | SELECT pg_get_constraintdef(oid) AS def 270 | FROM pg_constraint 271 | WHERE contype = 'f' AND conrelid = '"#{table_name}"'::regclass 272 | SQL 273 | assert !result.detect { |row| row["def"] =~ /\AFOREIGN KEY \(.*\) REFERENCES "Users"\("Id"\)\z/ }.nil?, "Missing foreign key on #{table_name}" 274 | end 275 | 276 | # extended statistics are built on partitioned tables 277 | # https://github.com/postgres/postgres/commit/20b9fa308ebf7d4a26ac53804fce1c30f781d60c 278 | # (backported to Postgres 10) 279 | def assert_statistics(table_name) 280 | result = $conn.exec <<~SQL 281 | SELECT n_distinct 282 | FROM pg_stats_ext 283 | WHERE tablename = '#{table_name}' 284 | SQL 285 | assert result.any?, "Missing extended statistics on #{table_name}" 286 | assert_equal '{"1, 2": 10002}', result.first["n_distinct"] 287 | end 288 | 289 | def server_version_num 290 | $conn.exec("SHOW server_version_num").first["server_version_num"].to_i 291 | end 292 | 293 | def verbose? 294 | ENV["VERBOSE"] 295 | end 296 | end 297 | -------------------------------------------------------------------------------- /test/support/schema.sql: -------------------------------------------------------------------------------- 1 | SET client_min_messages = warning; 2 | 3 | DROP SCHEMA IF EXISTS public CASCADE; 4 | CREATE SCHEMA public; 5 | 6 | CREATE TABLE "Users" ( 7 | "Id" SERIAL PRIMARY KEY 8 | ); 9 | 10 | CREATE TABLE "Posts" ( 11 | "Id" SERIAL PRIMARY KEY, 12 | "UserId" INTEGER, 13 | "createdAt" timestamp, 14 | "createdAtTz" timestamptz, 15 | "createdOn" date, 16 | CONSTRAINT "foreign_key_1" FOREIGN KEY ("UserId") REFERENCES "Users"("Id") 17 | ); 18 | 19 | CREATE INDEX ON "Posts" ("createdAt"); 20 | 21 | INSERT INTO "Posts" ("createdAt", "createdAtTz", "createdOn") SELECT NOW(), NOW(), NOW() FROM generate_series(1, 10000) n; 22 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "bundler/setup" 2 | Bundler.require(:default) 3 | require "minitest/autorun" 4 | require "minitest/pride" 5 | 6 | ENV["PGSLICE_ENV"] = "test" 7 | 8 | $url = ENV["PGSLICE_URL"] || "postgres:///pgslice_test" 9 | $conn = PG::Connection.new($url) 10 | --------------------------------------------------------------------------------