├── 0001_explain_analyze_buffers.md
├── 0002_how_to_troubleshoot_and_speedup_postgres_restarts.md
├── 0003_how_to_troubleshoot_long_startup.md
├── 0004_tuple_sparsenes.md
├── 0005_pg_stat_statements_part_1.md
├── 0006_pg_stat_statements_part_2.md
├── 0007_pg_stat_statements_part_3.md
├── 0008_how_to_speed_up_pg_dump.md
├── 0009_lsn_values_and_wal_filenames.md
├── 0010_flamegraphs_for_postgres.md
├── 0011_ad_hoc_monitoring.md
├── 0012_from_pgss_to_explain__how_to_find_query_examples.md
├── 0013_how_to_benchmark.md
├── 0014_how_to_decide_if_query_too_slow.md
├── 0015_how_to_monitor_index_operations.md
├── 0016_how_to_get_into_trouble_using_some_postgres_features.md
├── 0017_how_to_determine_the_replication_lag.md
├── 0018_over_indexing.md
├── 0019_how_to_import_csv_to_postgres.md
├── 0020_how_to_use_pg_restore.md
├── 0021_how_to_set_application_name_without_extra_queries.md
├── 0022_how_to_analyze_heavyweight_locks_part_1.md
├── 0023_how_to_use_openai_apis_in_postgres.md
├── 0024_how_to_work_with_metadata.md
├── 0025_how_to_quit_from_psql.md
├── 0026_how_to_check_btree_indexes_for_corruption.md
├── 0027_how_to_compile_postgres_on_ubuntu_22.04.md
├── 0028_how_to_work_with_arrays_part_1.md
├── 0029_how_to_work_with_arrays_part_2.md
├── 0030_how_to_deal_with_long-running_transactions_oltp.md
├── 0031_how_to_troubleshoot_a_growing_pg_wal_directory.md
├── 0032_how_to_speed_up_bulk_load.md
├── 0033_how_to_redefine_a_PK_without_downtime.md
├── 0034_how_to_perform_postgres_tuning.md
├── 0035_how_to_use_subtransactions_in_postgres.md
├── 0036_find-or-insert_using_a_single_query.md
├── 0037_how_to_enable_data_checksums_without_downtime.md
├── 0038_how_to_not_get_screwed_as_a_dba.md
├── 0039_how_to_break_a_database_part_1_how_to_corrupt.md
├── 0040_how_to_break_a_database_part_2_simulate_xid_wraparound.md
├── 0041_harmful_workloads.md
├── 0042_how_to_analyze_heavyweight_locks_part_2.md
├── 0043_how_to_format_sql.md
├── 0044_how_to_monitor_transaction_id_wraparound_risks.md
├── 0045_how_to_monitor_xmin_horizon.md
├── 0046_how_to_deal_with_bloat.md
├── 0047_how_to_install_postgres_16_with_plpython3u.md
├── 0048_how_to_generate_fake_data.md
├── 0049_how_to_use_variables_in_psql_scripts.md
├── 0050_pre_and_post_steps_for_benchmark_iterations.md
├── 0051_learn_about_schema_metadata_via_psql.md
├── 0052_how_to_reduce_wal_generation_rates.md
├── 0053_index_maintenance.md
├── 0054_how_to_check_btree_indexes_for_corruption.md
├── 0055_how_to_drop_a_column.md
├── 0056_how_to_imitate_production_planner.md
├── 0057_how_to_convert_a_physical_replica_to_logical.md
├── 0058_how_to_use_docker_to_run_postgres.md
├── 0059_psql_tuning.md
├── 0060_how_to_add_a_column.md
├── 0061_how_to_create_an_index_part_1.md
├── 0062_how_to_create_an_index_part_2.md
├── 0063_how_to_help_others.md
├── 0064_how_to_use_uuid.md
├── 0065_uuid_v7_and_partitioning_timescaledb.md
├── 0066_how_many_tuples_can_be_inserted_in_a_page.md
├── 0067_autovacuum_queue_and_progress.md
├── 0068_psql_shortcuts.md
├── 0069_howd_tod_addd_ad_checkd_constraintd_withoutd_downtime.md
├── 0070_how_to_add_a_foreign_key.md
├── 0071_how_to_understand_what_is_blocking_ddl.md
├── 0072_how_to_remove_a_foreign_key.md
├── 0073_how_to_analyze_heavyweight_locks_part_3_persistent_monitoring.md
├── 0074_how_to_flush_caches.md
├── 0075_how_to_find_unused_indexes.md
├── 0076_how_to_find_redundent_indexes.md
├── 0077_zero_downtime_major_upgrade.md
├── 0078_estimate_yoy_table_growth.md
├── 0079_rebuild_indexes_without_deadlocks.md
├── 0080_how_to_find_int4_pks_with_out_of_range_risks.md
├── 0081_how_to_plot_graphs_right_in_psql_on_macos_iterm2.md
├── 0082_how_to_draw_frost_patterns_using_sql.md
├── 0083_how_to_quickly_check_data_type_and_storage_size_of_a_value.md
├── 0084_how_to_find_the_best_order_of_columns_to_save_on_storage.md
├── 0085_how_to_quickly_check_data_type_and_storage_size_of_a_value.md
├── 0086_how_to_make_e_work_in_psql.md
├── 0087_how_to_change_ownership_of_all_objects_in_a_database.md
├── 0088_how_to_tune_linux_parameters_for_oltp_postgres.md
├── 0089_rough_oltp_configuration_tuning.md
├── 0090_how_to_use_lib_pgquery_in_shell.md
├── 0091_how_to_format_text_output_in_psql_scripts.md
├── 0092_how_to_tune_work_mem.md
├── 0093_how_to_troubleshoot_streaming_replication_lag.md
├── README.md
└── files
    ├── 0002_cover.png
    ├── 0003_cover.png
    ├── 0004_cover.png
    ├── 0005_cover.png
    ├── 0006_cover.png
    ├── 0007_cover.png
    ├── 0008_cover.png
    ├── 0009_cover.png
    ├── 0010_cover.png
    ├── 0010_flamegraph.png
    ├── 0010_flamegraph.svg
    ├── 0010_perf_top_w_debug_symbols.png
    ├── 0010_perf_top_wo_debug_symbols.png
    ├── 0011_cover.png
    ├── 0012_cover_01.png
    ├── 0012_cover_03.png
    ├── 0013_cover.png
    ├── 0013_cover_02.png
    ├── 0013_cover_03.png
    ├── 0013_db_benchmark.png
    ├── 0014_cover.png
    ├── 0015_reindex.gif
    ├── 0018_degradation_with_indexes.png
    ├── 0018_flame_graph.png
    ├── 0018_plan_exec_time_vs_index.png
    ├── 0025_elephant_with_sunglasses.jpg
    ├── 0035_performance_drop_too_many_subtx.jpg
    ├── 0035_rolled_back_subtransaction_example.jpg
    ├── 0035_standby_server_killed.jpg
    ├── 0042_example_output.jpeg
    ├── 0044_wraparound_and_freeze.jpg
    ├── 0059_postgres_dba.jpg
    ├── 0059_pspg_improved_output.png
    ├── 0059_pspg_menus.jpg
    ├── 0059_psql_ugly_output.png
    ├── 0066-formula-1.gif
    ├── 0066-formula-2.gif
    ├── 0067_tables_to_be_autovacuumed.png
    ├── 0067_tables_to_be_autovacuumed_2.png
    ├── 0073_01.jpg
    ├── 0073_02.jpg
    ├── 0073_03.jpg
    ├── 0081_01.png
    ├── 0082_01.png
    ├── 0082_02.png
    ├── 0082_03.png
    └── 0091_result.png


/0001_explain_analyze_buffers.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1706689567355732052), [LinkedIn post](https://www.linkedin.com/feed/update/urn:li:activity:7112529621178347520/).
 2 | 
 3 | ---
 4 | 
 5 | # EXPLAIN ANALYZE or EXPLAIN (ANALYZE, BUFFERS)?
 6 | 
 7 | When analyzing Postgres query execution plans, I always recommend using the `BUFFERS` option:
 8 | ```sql
 9 | explain (analyze, buffers) <query>;
10 | ```
11 | 
12 | ## Example
13 | ``````
14 | test=# explain (analyze, buffers) select * from t1 where num > 10000 order by num limit 1000;
15 | QUERY PLAN
16 | ----------------------------------------------------------
17 | Limit  (cost=312472.59..312589.27 rows=1000 width=16) (actual time=314.798..316.400 rows=1000 loops=1)
18 | Buffers: shared hit=54173
19 | ...
20 | Rows Removed by Filter: 333161
21 | Buffers: shared hit=54055
22 | Planning Time: 0.212 ms
23 | Execution Time: 316.461 ms
24 | (18 rows)
25 | ``````
26 | 
27 | If `EXPLAIN ANALYZE` used without `BUFFERS`, the analysis lacks the information about the buffer pool IO.
28 | 
29 | ## Reasons to prefer using `EXPLAIN (ANALYZE, BUFFERS)` over just `EXPLAIN ANALYZE`
30 | 1. IO operations with the buffer pool available for each node in the plan.
31 | 2. It gives understanding about data volumes involved (note: the buffer hits numbers provided can involve "hitting" the same buffer multiple times).
32 | 3. If analysis is focusing on the IO numbers, then it is possible to use weaker hardware (less RAM, slower disks) but still have reliable data for query optimization.
33 | 
34 | For better understanding, it is recommended to convert buffer numbers to bytes. On most systems, 1 buffer is 8 KiB. So, 10 buffer reads is 80 KiB.
35 | 
36 | However, beware of possible confusion: it is worth remembering that the numbers provided by `EXPLAIN (ANALYZE, BUFFERS)` are not data volumes but rather IO numbers – the amount of that IO work that has been done. For example, for just a single buffer in memory, there may be 10 hits – in this case, we don't have 80 KiB present in the buffer pool, we just processed 80 KiB, dealing with the same buffer 10 times. It is, actually, an imperfect naming: it is presented as `Buffers: shared hit=5`, but this number is rather `buffer hits` than `buffers hit` – the number of operations, not the size of the data.
37 | 
38 | ## Summary
39 | Use `EXPLAIN (ANALYZE, BUFFERS)` always, not just `EXPLAIN ANALYZE` – so you can see the actual IO work done by Postgres when executing queries.
40 | 
41 | This gives a better understanding of the data volumes involved. Even better if you start translating buffer numbers to bytes – just multiplying them by the block size (8 KiB in most cases).
42 | 
43 | Don't think about the timing numbers when you're inside the optimization process – it may feel counter-intuitive, but this is what allows you to forget about differences in environments. And this is what allows working with thin clones – look at Database Lab Engine and what other companies do with it.
44 | 
45 | Finally, when optimizing a query, if you managed to reduce the BUFFERS numbers, this means that to execute this query, Postgres will need fewer buffers in the buffer pool involved, reducing IO, minimizing risks of contention, and leaving more space in the buffer pool for something else. Following this approach may eventually provide a global positive effect for the general performance of your database.
46 | 
47 | A blog post on this topic: [EXPLAIN (ANALYZE) needs BUFFERS to improve the Postgres query optimization process](https://postgres.ai/blog/20220106-explain-analyze-needs-buffers-to-improve-the-postgres-query-optimization-process)
48 | 


--------------------------------------------------------------------------------
/0009_lsn_values_and_wal_filenames.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1709818510787162429), [LinkedIn post](...).
 2 | 
 3 | ---
 4 | 
 5 | # How to understand LSN values and WAL filenames
 6 | 
 7 | <img src="files/0009_cover.png" width="600" />
 8 | 
 9 | ## How to read LSN values
10 | LSN – Log Sequence Number, a pointer to a location in the Write-Ahead Log (WAL). Understanding it and how to work with it is important for dealing with physical and logical replication, backups, recovery. Postgres docs:
11 | - [WAL Internals](https://postgresql.org/docs/current/wal-internals.html)
12 | - [pg_lsn Type](https://postgresql.org/docs/current/datatype-pg-lsn.html)
13 | 
14 | LSN is a 8-byte (32-bit) value ([source code](https://gitlab.com/postgres/postgres/blob/4f2994647ff1e1209829a0085ca0c8d237dbbbb4/src/include/access/xlogdefs.h#L17)). It can be represented in the form of `A/B` (more specifically, `A/BBbbbbbb`, see below), where both `A` and `B` are 4-byte values. For example:
15 | ```
16 | nik=# select pg_current_wal_lsn();
17 |  pg_current_wal_lsn
18 | --------------------
19 |  5D/257E19B0
20 | (1 row)
21 | ```
22 | 
23 | - `5D` here is higher 4-byte (32-bit) section of LSN
24 | - `257E19B0` can, in its turn, be split to two parts as well:
25 |     - `25` – lower 4-byte section of LSN (more specifically, only the highest 1 byte of of that 4-byte section)
26 |     - `7E19B0` – offset in WAL (which is `16 MiB` by default; in some cases, it's changed – e.g., RDS changed it to `64 MiB`)
27 | 
28 | Interesting that LSN values can be compared, and even subtracted one from each other – assuming we use the pg_lsn data type. The result will be in bytes:
29 | ```
30 | nik=# select pg_lsn '5D/257D6780' - '5D/251A8118';
31 |  ?column?
32 | ----------
33 |   6481512
34 | (1 row)
35 |  nik=# select pg_size_pretty(pg_lsn '5D/257D6780' - '5D/251A8118');
36 |  pg_size_pretty
37 | ----------------
38 |  6330 kB
39 | (1 row)
40 | ```
41 | 
42 | This also means that we can get the integer value of LSN just seeing how far we went from the "point 0" – value `0/0`:
43 | ```
44 | nik=# select pg_lsn '5D/257D6780' - '0/0';
45 |    ?column?
46 | --------------
47 |  400060934016
48 | (1 row)
49 | ```
50 | 
51 | ## How to read WAL filenames
52 | Now let's see how the LSN values correspond to WAL file names (files located in `$PGDATA/pg_wal`). We can get WAL file name for any given LSN using function `pg_walfile_name()`:
53 | ```
54 | nik=# select pg_current_wal_lsn(), pg_walfile_name(pg_current_wal_lsn());
55 |  pg_current_wal_lsn |     pg_walfile_name
56 | --------------------+--------------------------
57 |  5D/257E19B0        | 000000010000005D00000025
58 | (1 row)
59 | ```
60 | 
61 | Here `000000010000005D00000025` is WAL filename, it consists of three 4-byte (32-bit) words:
62 | 1. `00000001` – timeline ID (TimeLineID), a sequential "history number" that starts with 1 when Postgres cluster is initialized. It "identifies different database histories to prevent confusion after restoring a prior state of a database installation" ([source code](https://gitlab.com/postgres/postgres/blob/4f2994647ff1e1209829a0085ca0c8d237dbbbb4/src/include/access/xlogdefs.h#L50)).
63 | 2. `0000005D` – higher 4-byte section of sequence number.
64 | 3. `00000025` – can be viewed as two parts:
65 |     - `000000` – 6 leading zeroes,
66 |     - `25` – the highest byte of the lower section of sequence number.
67 | 
68 | Important to remember: the third 4-byte word in the WAL filename has 6 leading zeroes – often, this leads to confusion and mistakes when comparing two WAL filenames to understand what LSNs to expect inside them.
69 | 
70 | Useful illustration (from [this post](https://fluca1978.github.io/2020/05/28/PostgreSQLWalNames)):
71 | ```
72 | LSN:                5D/      25   7E19B0
73 | WAL: 00000001 0000005D 00000025
74 | ```
75 | 
76 | This can be very helpful if you need to work with LSN values or WAL filenames or with both of them, quickly navigating or comparing their values to understand the distance between them. Some examples when it can be useful to understand:
77 | 1. How much bytes the server generates per day
78 | 2. How much has passed since replication slot has been created
79 | 3. What's the distance between two backups
80 | 4. How much of WAL data needs to be replayed to reach consistency point
81 | 
82 | ## Good blog posts worth reading besides the official docs:
83 | - [Postgres 9.4 feature highlight - LSN datatype](https://paquier.xyz/postgresql-2/postgres-9-4-feature-highlight-lsn-datatype/)
84 | - [Postgres WAL Files and Sequence Numbers](https://crunchydata.com/blog/postgres-wal-files-and-sequuence-numbers)
85 | - [WAL, LSN, and File Names](https://fluca1978.github.io/2020/05/28/PostgreSQLWalNames)
86 | 
87 | ---
88 | 
89 | Thanks for reading! As usual: please share it with your colleagues and friends who work with #PostgreSQL.
90 | 


--------------------------------------------------------------------------------
/0014_how_to_decide_if_query_too_slow.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1711575029006418218), [LinkedIn post](...).
 2 | 
 3 | ---
 4 | 
 5 | # How to decide when a query is too slow and needs optimization
 6 | 
 7 | <img src="files/0014_cover.png" width="600" />
 8 | 
 9 | > I post a new PostgreSQL "howto" article every day. Join me in this
10 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
11 | 
12 | "Slow" is a relative concept. In some cases, we might be happy with query latency 1 minute (or no?), while in other
13 | scenarios, even 1 ms might seem to be too slow.
14 | 
15 | Decision when to apply optimization techniques is important for efficiency – as Donald Knuth famously stated in "The Art
16 | of Computer Programming":
17 | 
18 | > The real problem is that programmers have spent far too much time worrying about efficiency in the wrong places and at
19 | > the wrong times; premature optimization is the root of all evil (or at least most of it) in programming.
20 | 
21 | Below we assume that we work with OLTP or hybrid workloads and need to decide if a certain query is too slow and
22 | requires optimization.
23 | 
24 | ## How to conclude that a query is too slow
25 | 
26 | 1. Do you have an OLTP case or an analytical one, or hybrid? For OLTP cases, requirements are more strict and dictated
27 |    by human perception (see: [What is a slow SQL query?](https://postgres.ai/blog/20210909-what-is-a-slow-sql-query)),
28 |    while for analytical needs, we can usually wait a minute or two – unless it's also user-facing. If it is, we probably
29 |    consider 1 minute as too slow. In this case, consider using column-store database systems (and Postgres ecosystem has
30 |    a new offering here: check out [@hydradatabase](https://twitter.com/hydradatabase)). For OLTP, the majority of
31 |    user-facing queries should be below 100ms – ideally, below 10ms – so the complete
32 |    requests to your backends that users make, do not exceed 100-200ms (each request can issue several SQL queries,
33 |    depending on the case). Of course, non-user-facing queries such as those coming from background jobs, `pg_dump`, and so
34 |    on, can last longer – assuming that the next principles are met.
35 | 
36 | 2. In the case of OLTP, the second question should be: is this query "read-only" or it changes the data (be it DDL or
37 |    just writing DML – INSERT/UPDATE/DELETE)? In this case, in OLTP, we shouldn't allow it to run longer than a second or
38 |    two, unless we are 100% sure that this query won't block other queries for long. For massive writes, consider
39 |    splitting them in batches so each batch doesn't last longer than 1-2 seconds. For DDL, be careful with lock
40 |    acquisition and lock chains (read these
41 |    posts: [Common DB schema change mistakes](https://postgres.ai/blog/20220525-common-db-schema-change-mistakes#case-5-acquire-an-exclusive-lock--wait-in-transaction)
42 |    and
43 |    [Useful queries to analyze PostgreSQL lock trees (a.k.a. lock queues)](https://postgres.ai/blog/20211018-postgresql-lock-trees)).
44 | 
45 | 3. If you're dealing with a read-only query, make sure it's also not running for too long – long-running transactions
46 |    make Postgres hold old dead tuples for long ("xmin horizon" is not advancing), so autovacuum cannot delete dead
47 |    tuples that became dead after the start of our transaction. Aim to avoid transactions that last longer than one or a
48 |    few hours (and if you absolutely need such long transactions, prefer running them at low-activity hours, when XID is
49 |    progressing slowly, and do not run them often).
50 | 
51 | 4. Finally, even if a query is relatively fast – for instance, 10ms – it might still be considered too slow if its
52 |    frequency is high. For example, 10ms query running 1,000 times per second (you can check it via
53 |    `pg_stat_statements.calls`), then Postgres needs to spend 10 seconds *every* second to process this group of queries.
54 |    In this case, if lowering down the frequency is hard, the query should be considered slow, and an optimization
55 |    attempt needs to be performed, to reduce resource consumption (the goal here is to reduce
56 |    `pg_stat_statements.total_exec_time` – see
57 |    the [previous #PostgresMarathon posts about pgss](https://twitter.com/search?q=%23PostgresMarathon%20pg_stat_statements&src=typed_query&f=live)).
58 | 
59 | ## Summary
60 | 
61 | - All queries that last longer than 100-200 ms should be considered as slow, if they are user-facing. Good queries are
62 |   those that are below 10 ms.
63 | - Background processing queries are ok to last longer. If they modify data and might block user-facing queries, then
64 |   they should not be allowed to last longer than 1-2 s.
65 | - Be careful with DDLs – make sure they don't cause massive writes (if they do, it should be split into batches as
66 |   well), and use low `lock_timeout` and retries to avoid blocking chains.
67 | - Do not allow long-running transactions. Make sure the xmin horizon is progressing and autovacuum can remove dead
68 |   tuples promptly – do not allow transactions that last too long (>1-2h).
69 | - Optimize even fast (<100ms) queries if the corresponding `pg_stat_statements.calls` and
70 |   `pg_stat_statements.total_exec_time` are high.
71 | 


--------------------------------------------------------------------------------
/0015_how_to_monitor_index_operations.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1711982079838507225), [LinkedIn post](...).
  2 | 
  3 | ---
  4 | 
  5 | # How to monitor CREATE INDEX / REINDEX progress in Postgres 12+
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | To monitor the progress of long-running index building or rebuilding, you can use this query:
 11 | 
 12 | ```sql
 13 | select
 14 |    now(),
 15 |    query_start as started_at,
 16 |    now() - query_start as query_duration,
 17 |    format('[%s] %s', a.pid, a.query) as pid_and_query,
 18 |    index_relid::regclass as index_name,
 19 |    relid::regclass as table_name,
 20 |    (pg_size_pretty(pg_relation_size(relid))) as table_size,
 21 |    nullif(wait_event_type, '') || ': ' || wait_event as wait_type_and_event,
 22 |    phase,
 23 |    format(
 24 |            '%s (%s of %s)',
 25 |            coalesce((round(100 * blocks_done::numeric / nullif(blocks_total, 0), 2))::text || '%', 'N/A'),
 26 |            coalesce(blocks_done::text, '?'),
 27 |            coalesce(blocks_total::text, '?')
 28 |    ) as blocks_progress,
 29 |    format(
 30 |            '%s (%s of %s)',
 31 |            coalesce((round(100 * tuples_done::numeric / nullif(tuples_total, 0), 2))::text || '%', 'N/A'),
 32 |            coalesce(tuples_done::text, '?'),
 33 |            coalesce(tuples_total::text, '?')
 34 |    ) as tuples_progress,
 35 |    current_locker_pid,
 36 |    (select nullif(left(query, 150), '') || '...' from pg_stat_activity a where a.pid = current_locker_pid) as current_locker_query,
 37 |    format(
 38 |            '%s (%s of %s)',
 39 |            coalesce((round(100 * lockers_done::numeric / nullif(lockers_total, 0), 2))::text || '%', 'N/A'),
 40 |            coalesce(lockers_done::text, '?'),
 41 |            coalesce(lockers_total::text, '?')
 42 |    ) as lockers_progress,
 43 |    format(
 44 |            '%s (%s of %s)',
 45 |            coalesce((round(100 * partitions_done::numeric / nullif(partitions_total, 0), 2))::text || '%', 'N/A'),
 46 |            coalesce(partitions_done::text, '?'),
 47 |            coalesce(partitions_total::text, '?')
 48 |    ) as partitions_progress,
 49 |    (
 50 |       select
 51 |          format(
 52 |                  '%s (%s of %s)',
 53 |                  coalesce((round(100 * n_dead_tup::numeric / nullif(reltuples::numeric, 0), 2))::text || '%', 'N/A'),
 54 |                  coalesce(n_dead_tup::text, '?'),
 55 |                  coalesce(reltuples::int8::text, '?')
 56 |          )
 57 |       from pg_stat_all_tables t, pg_class tc
 58 |       where t.relid = p.relid and tc.oid = p.relid
 59 |    ) as table_dead_tuples
 60 | from pg_stat_progress_create_index p
 61 |         left join pg_stat_activity a on a.pid = p.pid
 62 | order by p.index_relid
 63 | ; -- in psql, use "\watch 5" instead of semicolon
 64 | ```
 65 | 
 66 | The same query, in [a better formatted form](https://gitlab.com/-/snippets/2138417).
 67 | 
 68 | How this query works:
 69 | 
 70 | 1. The basis of it
 71 |    is `pg_stat_progress_create_index` [added in Postgres 12](https://postgresql.org/docs/current/progress-reporting.html#CREATE-INDEX-PROGRESS-REPORTING).
 72 | 
 73 | 2. The documentation also has a list of `CREATE INDEX` phases. As you can see from the table provided in the docs,
 74 |    advanced variant, `CREATE INDEX CONCURRENTLY` / `REINDEX CONCURRENTLY` (a.k.a. CIC and RC), which takes longer but
 75 |    acts in a non-blocking fashion suitable for loaded production systems, has more phases. The current phase is
 76 |    presented in the column "phase" of the output.
 77 | 
 78 | 3. Index name (a temporary one in case of CIC/RC), table name are presented (using the useful trick to convert OIDs to
 79 |    names – note, e.g., `index_relid::regclass as index_name`). Additionally, the table size which is essential to form
 80 |    expectations of overall duration – the bigger the table is, the longer the index creation is going to take.
 81 | 
 82 | 4. `pg_stat_activity` (`pgsa`) provides a lot of additional useful information:
 83 |     - PID of Postgres backend
 84 |     - actual SQL query used
 85 |     - the moment when the work has started (`query_start`), allowing us to understand the elapsed
 86 |       time (`query_duration`)
 87 |     - `wait_event_type` & `wait_event` to understand what the process is currently waiting on
 88 |     - it also used (in a separate sub-query) to get the information of the session that blocks our process, when such an
 89 |       event occurs (`current_locker_pid`, `current_locker_query`)
 90 | 
 91 | 5. Function `format(...)` is very useful to consolidate data in convenient form without having to worry about `NULL`s,
 92 |    that
 93 |    would be a problem if we used a regular concatenation without `coalesce(...)`.
 94 | 
 95 | 6. However, in certain cases we use `coalesce(...)` just to put special symbols if the value is missing (`IS NULL`) –
 96 |    e.g., "?" or "N/A".
 97 | 
 98 | 7. Another interesting trick is combination of `coalesce(...)` and `nullif(...)`. The latter allows us to avoid division
 99 |    by
100 |    zero errors (replacing `0` with `NULL`, making the division result also `NULL`), while the former, again, is used to
101 |    substitute resulting `NULL` with some non-empty value (in this case, 'N/A').
102 | 
103 | When executed in `psql`, it can be convenient to use `\watch [seconds]` to run this report in loop and observe progress
104 | live:
105 | 
106 | ![tracking the progress of index building/rebuilding](files/0015_reindex.gif)
107 | 


--------------------------------------------------------------------------------
/0021_how_to_set_application_name_without_extra_queries.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1714153676212949355), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to set application_name without extra queries
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | `application_name` is useful to control what you and others is going to see in `pg_stat_activity` (it has a column with
11 | the same name), and various tools that use this system view. Additionally, it appears in Postgres log (when `%a` is
12 | included in `log_line_prefix`).
13 | 
14 | Docs: [application_name](https://postgresql.org/docs/current/runtime-config-logging.html#GUC-APPLICATION-NAME).
15 | 
16 | It's good practice to set `application_name` – for example, it can be very helpful for root-cause analysis during and
17 | after incidents.
18 | 
19 | The approaches below can be used to set up other settings (including regular Postgres parameters such as
20 | `statement_timeout` or `work_mem`), however here we'll focus on `application_name` particularly.
21 | 
22 | Typically, `application_name` is set via `SET` (apologies for the tautology):
23 | 
24 | ```
25 | nik=# show application_name;
26 |  application_name
27 | ------------------
28 |  psql
29 | (1 row)
30 | 
31 | nik=# set application_name = 'human_here';
32 | SET
33 | 
34 | nik=# select application_name, pid from pg_stat_activity where pid = pg_backend_pid() \gx
35 | -[ RECORD 1 ]----+-----------
36 | application_name | human_here
37 | pid              | 93285
38 | ```
39 | 
40 | However, having additional query – even a blazing fast one – means an extra RTT
41 | ([round-trip time](https://en.wikipedia.org/wiki/Round-trip_delay)), affecting latency, especially when communicating
42 | with a distant server.
43 | 
44 | To avoid it, use `libpq`'s options.
45 | 
46 | ## Method 1: via environment variable
47 | 
48 | ```bash
49 | ❯ PGAPPNAME=myapp1 psql \
50 |   -Xc "show application_name"
51 |  application_name
52 | ------------------
53 |  myapp1
54 | (1 row)
55 | ```
56 | 
57 | (`-X` means `ignore .psqlrc`, which is a good practice for automation scripts involving `psql`.)
58 | 
59 | ## Method 2: through connection URI
60 | 
61 | ```bash
62 | ❯ psql \
63 |   "postgresql://?application_name=myapp2" \
64 |   -Xc "show application_name"
65 |  application_name
66 | ------------------
67 |  myapp2
68 | (1 row)
69 | ```
70 | 
71 | The URI method takes precedence over `PGAPPNAME`.
72 | 
73 | ## In application code
74 | 
75 | The described methods can be used not only with psql. Node.js example:
76 | 
77 | ```bash
78 | ❯ node -e "
79 |  const { Client } = require('pg');
80 | 
81 |  const client = new Client({
82 |    connectionString: 'postgresql://?application_name=mynodeapp'
83 |  });
84 | 
85 |  client.connect()
86 |        .then(() => client.query('show application_name'))
87 |        .then(res => {
88 |            console.log(res.rows[0].application_name);
89 |            client.end();
90 |        });
91 |  "
92 |  mynodeapp
93 | ```
94 | 


--------------------------------------------------------------------------------
/0024_how_to_work_with_metadata.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1715249090202870113), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to work with metadata
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | When working with metadata – data about data – in Postgres, these reference docs are worth using:
 11 | 
 12 | - [System Catalogs](https://postgresql.org/docs/current/catalogs.html)
 13 | - [System Views](https://postgresql.org/docs/current/views.html)
 14 | - [The Cumulative Statistics System](https://postgresql.org/docs/current/monitoring-stats.html)
 15 | - [The Information Schema](https://postgresql.org/docs/current/information-schema.html)
 16 | 
 17 | It's unnecessary to repeat the material from the docs here. Instead, let's focus on some tricks and principles that make
 18 | your work more efficient. We'll cover these topics:
 19 | 
 20 | - `::oid`, `::regclass`
 21 | - `\?` and `ECHO_HIDDEN`
 22 | - Performance
 23 | - `INFORMATION_SCHEMA`
 24 | - `pg_stat_activity` is not a table
 25 | 
 26 | ## ::oid, ::regclass
 27 | 
 28 | In Postgres terminology, tables, indexes, views, materialized views are all called "relations". The metadata about them
 29 | can be seen in various ways, but the "central" place is the
 30 | [pg_class system catalog](https://postgresql.org/docs/current/catalog-pg-class.html). In other words, this is a tables
 31 | that stores
 32 | information about all tables, indexes, and so on. It has two keys:
 33 | 
 34 | - PK: `oid` - a number ([OID, object identifier](https://postgresql.org/docs/current/datatype-oid.html))
 35 | - UK: a pair of columns `(relname, relnamespace)`, relation name and OID of the schema.
 36 | 
 37 | A trick to remember: OID can be quickly converted to relation name, vice versa, using type conversion to `oid` and
 38 | `regclass` datatypes.
 39 | 
 40 | Simple examples for a table named `t1`:
 41 | 
 42 | ```
 43 | nik=# select 't1'::regclass;
 44 |  regclass
 45 | ----------
 46 |  t1
 47 | (1 row)
 48 | 
 49 | nik=# select 't1'::regclass::oid;
 50 |   oid
 51 | -------
 52 |  74298
 53 | (1 row)
 54 | 
 55 | nik=# select 74298::regclass;
 56 |  regclass
 57 | ----------
 58 |  t1
 59 | (1 row)
 60 | ```
 61 | 
 62 | So, there is no need to do `select oid from pg_class where relname = ...` – just memorize `::regclass` and `::oid`.
 63 | 
 64 | ## \? and ECHO_HIDDEN
 65 | 
 66 | `psql`'s `\?` command is crucial – this is how you can find description for all commands. For example:
 67 | 
 68 | ```
 69 | \d[S+]                 list tables, views, and sequences
 70 | ```
 71 | 
 72 | The "describing" commands produce some SQL implicitly – and it can be helpful to "spy" on them. For that, we first need
 73 | to turn on `ECHO_HIDDEN`:
 74 | 
 75 | ```
 76 | nik=# \set ECHO_HIDDEN on
 77 | ```
 78 | 
 79 | – or just use the option `-E` when starting `psql`. And then we can start spying:
 80 | 
 81 | ```
 82 | nik=# \d t1
 83 | /********* QUERY **********/
 84 | SELECT c.oid,
 85 |  n.nspname,
 86 |  c.relname
 87 | FROM pg_catalog.pg_class c
 88 |     LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
 89 | WHERE c.relname OPERATOR(pg_catalog.~) '^(t1)$' COLLATE pg_catalog.default
 90 |  AND pg_catalog.pg_table_is_visible(c.oid)
 91 | ORDER BY 2, 3;
 92 | /**************************/
 93 | 
 94 | [... + more queries to get info about "t1" ...]
 95 | ```
 96 | 
 97 | Examining these queries can assist in building various tooling to work with metadata.
 98 | 
 99 | ## Performance
100 | 
101 | In some cases, metadata queries can be heavy, slow. Here's what to do if it's so:
102 | 
103 | 1. Consider caching to reduce the frequency and necessity of metadata queries.
104 | 
105 | 2. Check for catalog bloat. For example `pg_class` can be bloated due to frequent DDL, use of temp tables, etc. In this
106 |    case, unfortunately, a `VACUUM FULL` is needed (`pg_repack` cannot repack system catalogs). If you need it, don't
107 |    forget the golden rule of zero-downtime DDLs in Postgres –
108 |    [use low lock_timeout and retries](https://postgres.ai/blog/20210923-zero-downtime-postgres-schema-migrations-lock-timeout-and-retries).
109 | 
110 | ## INFORMATION_SCHEMA
111 | 
112 | System catalogs and views are "native" ways to query table and index metadata – not standard, though. The standard way
113 | is called `INFORMATION_SCHEMA` and Postgres supports it following the SQL
114 | standard: [Docs](https://postgresql.org/docs/current/information-schema.html). What to use:
115 | 
116 | - Use the information schema for simple, cross-database compatible metadata queries.
117 | - Use native system catalogs for more complex, Postgres-specific queries or when you need detailed internal information.
118 | 
119 | ## pg_stat_activity is not a table
120 | 
121 | It's essential to remember that when querying metadata, you might deal with something that doesn't behave as normal
122 | table even if it looks so.
123 | 
124 | For instance, when you read records from `pg_stat_activity`, you're not dealing with a consistent snapshot of table
125 | data: reading the first and, theoretically, the last rows are produced at different moments of time, and you might see
126 | the queries which were not running simultaneously.
127 | 
128 | This phenomenon also explains why `select now() - query_start from pg_stat_activity;` might give you negative values:
129 | the function `now()` is executed at the beginning of the transaction, once, and doesn't change its value inside the
130 | transaction, no matter how many times you call it.
131 | 
132 | To get precise time intervals, use `clock_timestamp()` instead
133 | (`select clock_timestamp() - query_start from pg_stat_activity;`).
134 | 


--------------------------------------------------------------------------------
/0025_how_to_quit_from_psql.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1715636738578845831), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to quit from psql
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Just enter `quit` (or `exit`). That's it.
11 | 
12 | Unless you're on Postgres 10 or older – in that case, it's `\q`. Postgres 11 is retiring in a couple of weeks ([the final
13 | minor release, 11.22 is scheduled on November 9](https://postgresql.org/support/versioning/)), so we'll be able to say
14 | that for all supported versions it's just `quit`.
15 | 
16 | And if you need it to work in non-interactive mode, then it's also `\q`:
17 | 
18 | - `psql -c '\q'` – this works
19 | - `psql -c 'quit'` – this doesn't
20 | 
21 | Alternatively, `Ctrl-D` should also work. Being a standard way to exit a console, it works in various shells such as
22 | bash, zsh, irb, python, node, etc.
23 | 
24 | ---
25 | 
26 | This question remains in the [top-5 on StackOverflow](https://stackoverflow.com/q/9463318/459391) and other places.
27 | 


--------------------------------------------------------------------------------
/0026_how_to_check_btree_indexes_for_corruption.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1716001897839272288), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to check btree indexes for corruption
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | This snippet checks all btree indexes using `$N` parallel workers (parallelization is generally recommended):
11 | 
12 | ```bash
13 | pg_amcheck \
14 |   --install-missing \
15 |   --verbose \
16 |   --progress \
17 |   --jobs=$N \
18 |   --heapallindexed \
19 |   2>&1 \
20 |   | ts \
21 |   | tee -a amcheck_(date "+%F-%H-%M").log
22 | ```
23 | 
24 | Docs:
25 | 
26 | - [amcheck (extension)](https://postgresql.org/docs/current/amcheck.html)
27 | - [pg_amcheck (CLI tool)](https://postgresql.org/docs/current/app-pgamcheck.html)
28 | 
29 | Notes:
30 | 
31 | 1. This type of check acquires only `AccessShareLock` on indexes – hence, DML is not blocked. However, in general, this
32 |    kind of check produces a significant workload since it needs to read all indexes and corresponding tuples in headers.
33 | 
34 | 2. The option `--heapallindexed` here is optional but highly recommended. Without it, the check usually takes much less
35 |    time, but in this case only "light" index check is performed (not involving "heap" – it doesn't follow references to
36 |    tuples in tables, checking indexes only).
37 | 
38 | 3. There is another useful option not used here, `--parent-check`, which provides more comprehensive check of index
39 |    structure – it checks parent/child relationships in the index. This is a very useful for "deep" test of indexes.
40 |    However, it is slow and, unfortunately, requires ShareLock locks to be acquired. It means, while working in this
41 |    mode, such checks are blocking modifications (`INSERT`/`UPDATE`/`DELETE`). So, `--parent-check` can only be used
42 |    either on clones not receiving traffic or during maintenance window.
43 | 
44 | 4. Checking indexes with `amcheck` is not an alternative to having data checksums enabled. It is recommended to both
45 |    enable data checksums and use `amcheck` corruption checks regularly (e.g., after provisioning of replicas, backup
46 |    restore verification, or any time you suspect corruption in indexes).
47 | 
48 | 5. If errors are found, it means there is corruption (unless `amcheck` has a bug). If errors are NOT found, it does NOT
49 |    mean that there is no corruption – the corruption topic is very broad and no single tool can check for all possible
50 |    kinds of corruption.
51 | 
52 | 6. Potential cases when btree corruption might happen:
53 | 
54 |     - Postgres bugs (e.g. versions 14.0 – 14.3 had a bug in `REINDEX CONCURRENTLY`, potentially corrupting btree
55 |       indexes)
56 |     - Copying `PGDATA` from one OS version to another, silently switching to a different `glibc` version, introducing
57 |       changes in
58 |       collation rules. PG15+ produces a warning (`WARNING:  database XXX has a collation version mismatch`) in such
59 |       cases,
60 |       while older versions do not complain, so silent corruption risks are significant. It is always recommended to test
61 |       upgrade and verify indexes with `amcheck`.
62 |     - Performing a major upgrade, upgrading replicas using `rsync --data-only` without correctly handling the order in
63 |       which the primary and replicas are stopped:
64 |       [details](https://postgresql.org/message-id/flat/CAM527d8heqkjG5VrvjU3Xjsqxg41ufUyabD9QZccdAxnpbRH-Q%40mail.gmail.com).
65 | 
66 | 7. Testing **GiST** and **GIN** for corruption is still in development, though there
67 |    are [proposed patches](https://commitfest.postgresql.org/45/3733/) that can be used (with a note that they are not
68 |    yet officially released).
69 | 
70 | 8. Testing **unique indexes** for corruption is also in development.
71 | 


--------------------------------------------------------------------------------
/0027_how_to_compile_postgres_on_ubuntu_22.04.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1716359165806035147), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to compile Postgres on Ubuntu 22.04
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | This post describes how to quickly compile Postgres on Ubuntu 22.04.
11 | 
12 | The [official docs](https://postgresql.org/docs/current/installation.html). It is very detailed, and it's great to be
13 | used as a reference, but you won't find a concrete list of steps for a particular OS version, e.g., Ubuntu.
14 | 
15 | This howto is enough to build Postgres from the `master` branch and start using it. Extend this basic set of steps if
16 | needed.
17 | 
18 | A couple of more notes:
19 | 
20 | - The current (as of 2023, PG16) docs mention a new approach to building Postgres – [Meson](https://mesonbuild.com/) –
21 |   but it is still considered experimental, we won't use it here.
22 | 
23 | - The paths for binaries and `PGDATA` that we use here are provided just as examples (suitable for a "quick-n-dirty"
24 |   setup to test, for example, a new patch from the `pgsql-hackers` mailing list).
25 | 
26 | 1. Install required software
27 | 2. Get source code
28 | 3. Configure
29 | 4. Compile and install
30 | 5. Create a cluster and start using it
31 | 
32 | ## 1) Install required software
33 | 
34 | ```bash
35 | sudo apt update
36 | 
37 | sudo apt install -y \
38 |   build-essential libreadline-dev \
39 |   zlib1g-dev flex bison libxml2-dev \
40 |   libxslt-dev libssl-dev libxml2-utils \
41 |   xsltproc ccache
42 | ```
43 | 
44 | ## 2) Get source code
45 | 
46 | ```bash
47 | git clone https://gitlab.com/postgres/postgres.git
48 | cd ./postgres
49 | ```
50 | 
51 | ## 3) Configure
52 | 
53 | ```bash
54 | mkdir ~/pg16 # consider changing it!
55 | 
56 | ./configure \
57 |   --prefix=$(echo ~/pg16) \
58 |   --with-ssl=openssl \
59 |   --with-python \
60 |   --enable-depend \
61 |   --enable-cassert
62 | ```
63 | 
64 | (Review the list of options.)
65 | 
66 | ## 4) Compile and install
67 | 
68 | ```bash
69 | make -j$(grep -c processor /proc/cpuinfo)
70 | make install
71 | ```
72 | 
73 | ## 5) Create a cluster and start using it
74 | 
75 | ```bash
76 | ~/pg16/bin/initdb \
77 |   -D ~/pgdata \
78 |   --data-checksums \
79 |   --locale-provider=icu \
80 |   --icu-locale=en
81 | 
82 | ~/pg16/bin/pg_ctl \
83 |   -D ~/pgdata \
84 |   -l pg.log start
85 | ```
86 | 
87 | Now, check:
88 | 
89 | ```bash
90 | $ ~/pg16/bin/psql postgres -c 'show server_version'
91 |  server_version
92 | ----------------
93 |  17devel
94 | (1 row)
95 | ```
96 | 


--------------------------------------------------------------------------------
/0029_how_to_work_with_arrays_part_2.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1717044844227637530), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to work with arrays, part 2
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Part 1 can be found [here](0028_how_to_work_with_arrays_part_1.md).
 11 | 
 12 | ## How to search
 13 | 
 14 | One of the most interesting operators for array search is `<@` – "is contained in":
 15 | 
 16 | ```
 17 | nik=# select array[2, 1] <@ array[1, 3, 2];
 18 |  ?column?
 19 | ----------
 20 |  t
 21 | (1 row)
 22 | ```
 23 | 
 24 | When you need to check if some scalar value is present in an array, just create a single-element array and apply `<@` to
 25 | two arrays. For example, checking if "2" is contained in the analyzed array:
 26 | 
 27 | ```
 28 | nik=# select array[2] <@ array[1, 3, 2];
 29 |  ?column?
 30 | ----------
 31 |  t
 32 | (1 row)
 33 | ```
 34 | 
 35 | You can build an index to speed up search using `<@` - let's create a GIN index and compare the plans for a table with
 36 | a million rows:
 37 | 
 38 | ```
 39 | nik=# create table t1 (val int8[]);
 40 | CREATE TABLE
 41 | 
 42 | nik=# insert into t1
 43 | select array(
 44 |   select round(random() * 1000 + i)
 45 |   from generate_series(1, 10)
 46 |   limit (1 + random() * 10)::int
 47 | )
 48 | from generate_series(1, 1000000) as i;
 49 | INSERT 0 1000000
 50 | 
 51 | nik=# select * from t1 limit 3;
 52 |           val
 53 | -----------------------
 54 |  {390,13,405,333,358,592,756,677}
 55 |  {463,677,585,191,425,143}
 56 |  {825,918,303,602}
 57 | (3 rows)
 58 | 
 59 | nik=# vacuum analyze t1;
 60 | VACUUM
 61 | ```
 62 | 
 63 | – we created a single-column table with 1M rows containing `int8[]` arrays with various numbers.
 64 | 
 65 | Search all rows containing "123" in the array value:
 66 | 
 67 | ```
 68 | nik=# explain (analyze, buffers) select * from t1 where array[123]::int8[] <@ val;
 69 |                                                      QUERY PLAN
 70 | ---------------------------------------------------------------------------------------------------------------------
 71 |   Gather  (cost=1000.00..18950.33 rows=5000 width=68) (actual time=0.212..100.572 rows=2 loops=1)
 72 |     Workers Planned: 2
 73 |     Workers Launched: 2
 74 |     Buffers: shared hit=12554
 75 |     ->  Parallel Seq Scan on t1  (cost=0.00..17450.33 rows=2083 width=68) (actual time=61.293..94.212 rows=1 loops=3)
 76 |           Filter: ('{123}'::bigint[] <@ val)
 77 |           Rows Removed by Filter: 333333
 78 |           Buffers: shared hit=12554
 79 |   Planning:
 80 |     Buffers: shared hit=6
 81 |   Planning Time: 0.316 ms
 82 |   Execution Time: 100.586 ms
 83 | (12 rows)
 84 | ```
 85 | 
 86 | – 12554 buffer hits, or 12554 * 8 / 1024 ~= 98 MiB. Just to find those 2 rows with an array value containing "123" -
 87 | notice "rows=2". It's not efficient; we have a Seq Scan here.
 88 | 
 89 | Now, with a GIN index:
 90 | 
 91 | ```
 92 | nik=# create index on t1 using gin(val);
 93 | CREATE INDEX
 94 | 
 95 | nik=# explain (analyze, buffers) select * from t1 where array[123]::int8[] <@ val;
 96 |                                                       QUERY PLAN
 97 | -----------------------------------------------------------------------------------------------------------------------
 98 |   Bitmap Heap Scan on t1  (cost=44.75..4260.25 rows=5000 width=68) (actual time=0.021..0.022 rows=2 loops=1)
 99 |     Recheck Cond: ('{123}'::bigint[] <@ val)
100 |     Heap Blocks: exact=1
101 |     Buffers: shared hit=5
102 |     ->  Bitmap Index Scan on t1_val_idx  (cost=0.00..43.50 rows=5000 width=0) (actual time=0.016..0.016 rows=2 loops=1)
103 |           Index Cond: (val @> '{123}'::bigint[])
104 |           Buffers: shared hit=4
105 |   Planning:
106 |     Buffers: shared hit=16
107 |   Planning Time: 0.412 ms
108 |   Execution Time: 0.068 ms
109 | (11 rows)
110 | ```
111 | 
112 | – no more Seq Scan, and we have as few as 5 buffer hits, or 40 KiB to find 2 rows we need. This explains why the
113 | execution time went from ~100ms down to ~0.07ms, this is ~1400x faster.
114 | 
115 | More operators in [official docs](https://postgresql.org/docs/current/functions-array.html#FUNCTIONS-ARRAY).
116 | 


--------------------------------------------------------------------------------
/0031_how_to_troubleshoot_a_growing_pg_wal_directory.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1717773398586298703), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to troubleshoot a growing pg_wal directory
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Directory `$PGDATA/pg_wal` contains WAL files. WAL (Write-Ahead Log) is a core mechanism used for backups, recovery, and
11 | both physical and logical replication.
12 | 
13 | In certain cases, `pg_wal` size keeps growing, and this can become concerning due to increasing risks to be out of disk
14 | space.
15 | 
16 | Here are the things to check to troubleshoot a growing `pg_wal` directory.
17 | 
18 | ## Step 1: check replication slots
19 | 
20 | Unused or lagging replication slots keep WALs from being recycled, hence the `pg_wal` directory size grows.
21 | 
22 | Check on the primary:
23 | 
24 | ```sql
25 |   select
26 |     pg_size_pretty(pg_current_wal_lsn() - restart_lsn) as lag,
27 |     slot_name,
28 |     wal_status,
29 |     active
30 |   from pg_replication_slots
31 |   order by 1 desc;
32 | ```
33 | 
34 | Reference doc: [The view pg_replication_slots](https://postgresql.org/docs/current/view-pg-replication-slots.html)
35 | 
36 | - If there are inactive replication slots, consider dropping them to prevent reaching 100% of used disk space. Once the
37 |   problematic slot(s) are dropped, Postgres will remove old WALs.
38 | - Alternatively, consider using
39 |   [max_slot_wal_keep_size (PG13+)](https://postgresqlco.nf/doc/en/param/max_slot_wal_keep_size/).
40 | 
41 | ## Step 2: check if `archive_command` works well
42 | 
43 | If `archive_mode` and `archive_command` are configured to archive WALs (e.g., for backup purposes), but
44 | `archive_command` is failing (returns non-zero exit code) or lagging (WAL generation rates are higher than the speed of
45 | archiving), then this can be another reason of `pg_wal` growth.
46 | 
47 | How to monitor and troubleshoot it:
48 | 
49 | - check [pg_stat_archiver](https://postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-ARCHIVER-VIEW)
50 | - check Postgres logs (e.g., check for `archive command failed with exit code 1`)
51 | 
52 | Once the problem is identified, the `archive_command` needs to be either fixed or sped up (e.g., `wal_compression = on`,
53 | `max_wal_size` increased to have less WAL data generated; and, at the same time, use lighter compression in the archiver
54 | tool -- this depends on the tool used in `archive_command`; e.g., WAL-G support many options for compression, more or
55 | less CPU intensive).
56 | 
57 | The next two steps are to be considered as additional, since their effects on the `pg_wal` size growth are limited –
58 | they can cause only certain amount of extra WALs being kept in `pg_wal`
59 | (unlike the first two reasons we just discussed).
60 | 
61 | ## Step 3: check `wal_keep_size`
62 | 
63 | In some cases, `wal_keep_size` ([PG13+ docs](https://postgresqlco.nf/doc/en/param/wal_keep_size/); in PG12 and older,
64 | see `wal_keep_segments`) is set to a high value. When slots are used, it's not generally needed – this is an older (than
65 | slots) mechanism to avoid situations when some WAL is deleted and a lagging replica cannot catch up.
66 | 
67 | ## Step 4: check `max_wal_size` and `checkpoint_timeout`
68 | 
69 | When a successful checkpoint happens, Postgres can delete old WALs. In some cases, if checkpoint tuning was performed in
70 | favor of less frequent checkpoints, this can cause more WALs to be stored in `pg_wal` than one could expect. In this
71 | case, if it's a problem for disk space (specifically important on smaller servers), reconsider `max_wal_size` and
72 | `checkpoint_timeout` to lower values. In some cases, it also can make sense to run an explicit manual `CHECKPOINT`, to
73 | allow Postgres clean up some old files right away.
74 | 
75 | ## Summary
76 | 
77 | Most important:
78 | 
79 | 1. Check for unused or lagging replication slots
80 | 2. Check for failing or lagging `archive_command`
81 | 
82 | Additionally:
83 | 
84 | 3. Check `wal_keep_size`
85 | 4. Check `max_wal_size` and `checkpoint_timeout`
86 | 


--------------------------------------------------------------------------------
/0032_how_to_speed_up_bulk_load.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1718135635448660118), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to speed up bulk load
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | If you need to load a lot of data, here are the tips that can help you do it faster.
11 | 
12 | ## 1) COPY
13 | 
14 | Use `COPY` to load data, it's optimized for bulk load.
15 | 
16 | ## 2) Less frequent checkpoints
17 | 
18 | Consider increasing `max_wal_size` and `checkpoint_timeout` temporarily.
19 | 
20 | Changing them does not require restart.
21 | 
22 | Increased values lead to increased recovery time in case of failure, but benefit is that checkpoints occur less often,
23 | therefore:
24 | 1. less stress on disk,
25 | 2. less WAL data is written, thanks to decreased number of full page writes of the same pages (when load happens with
26 | existing indexes).
27 | 
28 | ## 3) Larger buffer pool
29 | 
30 | Increase `shared_buffers`, if you can.
31 | 
32 | ## 4) No (or fewer) indexes
33 | 
34 | If load happens into a new table, create indexes after data load. When loading into an existing table,
35 | [avoid over-indexing](0018_over_indexing.md).
36 | 
37 | Every additional index will significantly slow down the load.
38 | 
39 | ## 5) No (or fewer) FKs and triggers
40 | 
41 | Similarly to indexes, foreign key constraints and triggers may significantly slow down data load – consider (re)creating
42 | them after the bulk load.
43 | 
44 | Triggers can be disabled via `ALTER TABLE … DISABLE TRIGGERS ALL` – however, if triggers support some consistency
45 | checks, you need to make sure that those checks are not violated (e.g., run additional checks after data load). FKs are
46 | implemented via implicit triggers, and `ALTER TABLE … DISABLE TRIGGERS ALL` disables them too – loading data in this
47 | state should be done with care.
48 | 
49 | ## 6) Avoiding WAL writes
50 | 
51 | If this is a new table, consider completely avoiding WAL writes during the data load. Two options (both have limitations
52 | and require understanding that data can be lost if a crash happens):
53 | 
54 | - Use unlogged table: `CREATE UNLOGGED TABLE …`. Unlogged tables are not archived, not replicated, they are not persistent (though, they survive normal restarts). However, converting an unlogged table to a normal one takes time (likely, a lot – worth testing), because he data needs to be written to WAL. More about unlogged tables in [this post](https://crunchydata.com/blog/postgresl-unlogged-tables); also, see [this StackOverflow discussion](https://dba.stackexchange.com/questions/195780/set-postgresql-table-to-logged-after-data-loading/195829#195829).
55 | 
56 | - Use `COPY` with `wal_level ='minimal'`. `COPY` has to be executed inside the transaction that created the table.
57 |   In this case, due to `wal_level ='minimal'`, `COPY` writes won't be written to WAL
58 |   (as of PG16, this is so only if table is unpartitioned).
59 |   Additionally, consider using `COPY (FREEZE)` – this approach also provides a benefit: all tuples
60 |   are frozen after the data load. Setting `wal_level='minimal'`, unfortunately, requires a restart, and additional
61 |   changes (`archive_mode = 'off'`, `max_wal_senders = 0`). Of course, this method doesn't work well in most of the
62 |   production cases, but can be good for single-server setups. Details for the `wal_level='minimal'` + `COPY (FREEZE)`
63 |   recipe in [this post](https://cybertec-postgresql.com/en/loading-data-in-the-most-efficient-way/).
64 | 
65 | ## 7) Parallelization
66 | 
67 | Consider parallelization. This may or may not speed up the process, depending on the bottlenecks of the single-threaded
68 | process (e.g., if single-threaded load saturates disk IO, parallelization won't help). Two options:
69 | 
70 | - Partitioned tables and loading into multiple partitions using multiple workers
71 |   ([Day 20: pg_restore tips](0020_how_to_use_pg_restore.md)).
72 | 
73 | - Unpartitioned table and loading in big chunks. Such chunks require preparation of them – it can be CSV split into
74 |   pieces, or exported ranges of table data using multiple synchronized `REPEATABLE READ` transactions (working with the
75 |   same snapshot via `SET TRANSACTION SNAPSHOT`; see [Day 8: How to speed up pg_dump](0008_how_to_speed_up_pg_dump.md).
76 | 
77 | If you use TimescaleDB, consider [timescaledb-parallel-copy](https://github.com/timescale/timescaledb-parallel-copy).
78 | 
79 | Last but not least: after a massive data load, don't forget to run `ANALYZE`.
80 | 


--------------------------------------------------------------------------------
/0034_how_to_perform_postgres_tuning.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1718886593153630663), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to perform initial / rough Postgres tuning
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Modern Postgres provides more than 300 settings (a.k.a. GUC variables – "grand unified configuration"). Fine-tuning
11 | Postgres for a particular environment, database, and workload is a very complex task.
12 | 
13 | But in most cases, the Pareto principle (a.k.a. rule 80/20) works pretty well: you need to spend limited effort to
14 | address basic areas of tuning, and then focus on query performance. The reasoning behind this approach is simple and
15 | solid: yes, you can spend a lot of effort and find a better value of `shared_buffers` than the traditional 25% (which,
16 | as many people think, is far from ideal: e.g.,
17 | see [Andres Freund's Tweet](https://twitter.com/andresfreundtec/status/1178765225895399424)), and then
18 | find yourself in a position where a few queries with suboptimal performance – e.g., lacking proper indexes – ruin all
19 | the positive effect from that fine-tuning.
20 | 
21 | Therefore, I recommend this approach:
22 | 
23 | 1. Basic "rough" tuning
24 | 2. Log-related settings
25 | 3. Autovacuum tuning
26 | 4. Checkpointer tuning
27 | 5. Then focus on query optimization, reactive or proactive, and fine-tuning for specific areas only when there is a
28 |    strong reason for it
29 | 
30 | ## Basic rough tuning
31 | 
32 | For initial rough tuning, the empirical tools are "good enough" in most cases, following the 80/20 principle (actually,
33 | perhaps even 95/5 in this case):
34 | 
35 | - [PGTune](https://pgtune.leopard.in.ua) ([source code](https://github.com/le0pard/pgtune))
36 | - [PostgreSQL Configurator](https://pgconfigurator.cybertec.at)
37 | - for TimescaleDB users: [timescaledb-tune](https://github.com/timescale/timescaledb-tune)
38 | 
39 | Additionally, to the official docs, [this resource](https://postgresqlco.nf) is good to use as a reference (it has
40 | integrated information from various sources, not only official docs) – for example, check the page for
41 | [random_page_cost](https://postgresqlco.nf/doc/en/param/random_page_cost/), a parameter which is quite often forgotten.
42 | 
43 | If you use a managed Postgres service such as RDS, quite likely this level of tuning is performed already when you
44 | provision a server. But it's still worth double-checking – for example, some providers provision a server with SSD disk
45 | but leave `random_page_cost` default – `4` – which is an outdated value suitable for magnetic disks. Just set it to `1`
46 | if you have an SSD.
47 | 
48 | It is important to perform this level of tuning before query optimization efforts, because otherwise, you might need to
49 | re-do query optimization once you adjusted the basic configuration.
50 | 
51 | ## Log-related settings
52 | 
53 | A general rule here: the more logging, the better. Of course, assuming that you avoid saturation of two kinds:
54 | 
55 | - disk space (logs filled the disk)
56 | - disk IO (too many writes per second caused by logging)
57 | 
58 | In short my recommendations are (this is worth a separate detailed post):
59 | 
60 | - turn on checkpoint logging, `log_checkpoints='on'`  (fortunately, it's on by default in PG15+),
61 | - turn on all autovacuum logging, `log_autovacuum_min_duration=0` (or a very low value)
62 | - log temporary files except tiny ones (e.g., `log_temp_files = 100`)
63 | - log all DDL statements `log_statement='ddl'`
64 | - adjust `log_line_prefix`
65 | - set a low value for `log_min_duration_statement` (e.g., `500ms`) and/or use `auto_explain` to log slow queries with
66 |   plans
67 | 
68 | ## Autovacuum tuning
69 | 
70 | This is a big topic worth a separate post. In short, the key idea is that default settings don't suit for any modern
71 | OLTP case (web/mobile apps), so autovacuum has to be always tuned. If we don't do it, autovacuum becomes a "converter"
72 | of large portions of dead tuples to bloat, and this eventually negatively affects performance.
73 | 
74 | Two areas of tuning needs to be addressed:
75 | 
76 | 1. Increase the frequency of processing – lowering `**_scale_factor` / `**_threshold` settings, we make autovacuum
77 |    workers process tables when quite low value of dead tuples is accumulated
78 | 2. Allocate more resources for processing: more autovacuum workers (`autovacuum_workers`), more memory
79 |    (`autovacuum_work_mem`), and higher "quotas" for work (controlled via `**_cost_limit` / `**_cost_delay`).
80 | 
81 | ## Checkpointer tuning
82 | 
83 | Again, it's worth a separate post. But in short, you need to consider raising `checkpoint_timeout` and – most
84 | importantly – `max_wal_size` (whose default is very small for modern machines and data volumes, just `1GB`), so
85 | checkpoints occur less frequently, especially when a lot of writes happen. However, shifting settings in this direction
86 | mean longer recovery time in case of a crash or recovery from backups – this is a trade-off that needs to be analyzed
87 | for a particular case.
88 | 
89 | That's it. Generally, this initial/rough tuning of Postgres config shouldn't take long. For a particular cluster of type
90 | of clusters, it's a 1-2 day work for an engineer. You don't actually need AI for this, empirical tools work well –
91 | unless you do aim to squeeze 5-10% more (you might want it though, e.g., if you have thousands of servers).
92 | 


--------------------------------------------------------------------------------
/0035_how_to_use_subtransactions_in_postgres.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1719228501658882268), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to use subtransactions in Postgres
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | ## TL;DR
11 | 
12 | Don't use subtransactions, unless absolutely necessary.
13 | 
14 | ## What are subtransactions?
15 | 
16 | A subtransaction, also known as "nested transaction", is a transaction started by instruction within the scope of an
17 | already started transaction (src: [Wikipedia](https://en.wikipedia.org/wiki/Nested_transaction)). This feature allows
18 | users to partially roll back a transaction, which is helpful in many cases: fewer steps need to be repeated to retry the
19 | action if some error occurs.
20 | 
21 | The SQL standard defines two basic instructions describing this mechanism: `SAVEPOINT` and extension to the `ROLLBACK`
22 | statement – `ROLLBACK TO SAVEPOINT`. Postgres implements it, allowing slight deviations from the standard syntax – for
23 | example, allowing the omission of the word `SAVEPOINT` in the `RELEASE` and `ROLLBACK` statements.
24 | 
25 | You might be already using subtransactions, for example:
26 | 
27 | - In Django, using nested ["atomic()" blocks](https://docs.djangoproject.com/en/5.0/topics/db/transactions/#savepoints).
28 | - Implicitly: using the `BEGIN / EXCEPTION WHEN ... / END` blocks in PL/pgSQL functions.
29 | 
30 | ## How to use (if you really want)
31 | 
32 | Syntax:
33 | 
34 | - `SAVEPOINT savepoint_name` ([SAVEPOINT](https://postgresql.org/docs/current/sql-savepoint.html))
35 | - `ROLLBACK [ WORK | TRANSACTION ] TO [ SAVEPOINT ] savepoint_name`
36 |   ([ROLLBACK TO](https://postgresql.org/docs/current/sql-rollback-to.html))
37 | - `RELEASE [ SAVEPOINT ] savepoint_name` ([RELEASE SAVEPOINT](https://postgresql.org/docs/current/sql-release-savepoint.html))
38 | 
39 | An example:
40 | 
41 | ![Rolled-back subtransaction example](files/0035_rolled_back_subtransaction_example.jpg)
42 | 
43 | ## Recommendations
44 | 
45 | The only actual recommendation I have for any project that aims to grow OLTP-like workload (web and mobile apps) is:
46 | 
47 | > wherever possible, avoid subtransactions
48 | 
49 | ...if you don't want this to happen one day:
50 | 
51 | ![Performance drop for more than 64 subtransactions in a transaction](files/0035_performance_drop_too_many_subtx.jpg)
52 | 
53 | ...or this:
54 | 
55 | ![Performance drop of standby server with primary running a long transaction and many subtransactions](files/0035_standby_server_killed.jpg)
56 | 
57 | You can find the detailed analysis of four dangers of subtransactions in
58 | [PostgreSQL Subtransactions Considered Harmful](https://postgres.ai/blog/20210831-postgresql-subtransactions-considered-harmful).
59 | 
60 | As of 2023 / PG16, these issues are not yet solved, although there is some work in progress to implement various
61 | optimizations:
62 | 
63 | - [More scalable multixacts buffers and locking](https://commitfest.postgresql.org/45/2627/)
64 | - [suboverflowed subtransactions concurrency performance optimize](https://postgresql.org/message-id/flat/003201d79d7b%24189141f0%2449b3c5d0%24%40tju.edu.cn)
65 |   (unfortunately, patch reverted)
66 | 
67 | As a bottom line:
68 | 
69 | 1. If you can, don't use subtransactions
70 | 2. Keep an eye on pgsql-hackers threads related to them and if you can, participate (help test and improve)
71 | 3. If absolutely necessary, then
72 |    study [Problem 3: unexpected use of Multixact IDs](https://postgres.ai/blog/20210831-postgresql-subtransactions-considered-harmful#problem-3-unexpected-use-of-multixact-ids)
73 |    and:
74 |     - use them in lower-TPS systems only
75 |     - avoid deep nesting
76 |     - be very careful with `SELECT ... FOR UPDATE` in transactions that include subtransactions
77 |     - monitor the `pg_stat_slru` numbers (
78 |       PG13+, [Monitoring stats](https://postgresql.org/docs/current/monitoring-stats.html)) to quickly
79 |       notice and troubleshoot SLRU overflow if it happens
80 | 


--------------------------------------------------------------------------------
/0038_how_to_not_get_screwed_as_a_dba.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1720360315899240720), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to NOT get screwed as a DBA (DBRE)
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | The rules below are quite simple (but, often due to organizational reasons, not trivial to implement).
11 | 
12 | The following should be helpful for people responsible for databases in a fast-growing startup. Some items might be
13 | useful for DBAs/DBREs in large companies.
14 | 
15 | ## 1) Ensure that backup system is rock solid
16 | 
17 | 1. Do not use `pg_dump` as a backup tool – instead, use a system with PITR (e.g., `pgBackRest`, `WAL-G`). If using a
18 |    managed solution, learn in detail how backup systems are organized – do not trust blindly.
19 | 
20 | 2. Learn about RPO and RTO, measure actual values and define target values. Cover values with monitoring (e.g., lagging
21 |    `archive_command` should be considered as the highest priority incident).
22 | 
23 | 3. TEST backups. This is the most critical part. An untested backup is a _Schrödinger's Backup_ – the condition of any
24 |    backup is unknown until a restore is attempted. Automate testing.
25 | 
26 | 4. Partial restore automation and speedup: in some cases, it is necessary to be able to recover a manually deleted data,
27 |    not a whole database. In this case, to speed up recovery, consider options: (a) special delayed replica; (b) frequent
28 |    cloud snapshots + PITR; (c) DBLab with hourly snapshots and PITR on a clone.
29 | 
30 | 5. More topics to cover: proper encryption options, retention policy with old backups moved to a "colder" storage
31 |    (cheaper), secondary location in a different cloud with very limited access.
32 | 
33 | Without doubt, backups are the most important topic in database administration. Getting screwed in this area is the
34 | worst nightmare of any DBA. Pay maximum attention to backups, learn from other people's mistakes, not yours.
35 | 
36 | Reliable backup system is, perhaps, one of the biggest reasons why managed Postgres services are preferred in some
37 | organizations. But again: don't trust blindly - study all the details, and test them yourself.
38 | 
39 | ## 2) Corruption control
40 | 
41 | 1. Enable [data checksums](0037_how_to_enable_data_checksums_without_downtime.md)
42 | 
43 | 2. Be careful with OS / `glibc` upgrades – avoid index corruption.
44 | 
45 | 3. Use `amcheck` to test indexes, heap, sequences.
46 | 
47 | 4. Set up alerts to quickly react on error codes `XX000`, `XX001`, `XX002` in Postgres
48 |    logs ([PostgreSQL Error Codes](https://postgresql.org/docs/current/errcodes-appendix.html))
49 | 
50 | 5. There are many types of corruption – cover the low-hanging fruits and continue learning and implementing better
51 |    control and corruption prevention measures (a set of good materials on the
52 |    topic: [PostgreSQL Data Corruption and Bugs – Runbook](https://docs.google.com/spreadsheets/u/1/d/1zUH7IYOv46CVSmc-72CD7ROnMA6skJSQZjnm4yxvX9A/edit#gid=0))
53 | 
54 | ## 3) HA
55 | 
56 | - Have standbys
57 | - Consider using `synchronous_commit=remote_write` (or even `remote_apply`, depending on the case) and
58 |   `synchronous_standby_names` ([Multiple Synchronous Standbys](https://postgresql.org/docs/current/warm-standby.html#SYNCHRONOUS-REPLICATION-MULTIPLE-STANDBYS))
59 | - If self-managed, use **Patroni**. Otherwise, study all the HA options your provider offer and use them.
60 | 
61 | ## 4) Performance
62 | 
63 | - Have a good monitoring
64 |   system ([my PGCon slide deck on monitoring](https://twitter.com/samokhvalov/status/1664686535562625034))
65 | - Set up advanced query analysis:
66 |   `pg_stat_statements`, `pg_stat_kcache`, `pg_wait_sampling` / `pgsentinel`, `auto_explain`
67 | - Build scalable "lab" environments and process for experimentation – DBA should not be a bottleneck for engineers to
68 |   check ideas (spoiler: [@Database_Lab](https://twitter.com/Database_Lab) solves it).
69 | - Implement capacity planning, make sure you have room for growth, perform proactive benchmarks.
70 | - Architecture: microservices and sharding are all great and worth definitely considering, but the main question is:
71 |   When? From the very beginning of the startup, or later? The answer is "it depends". Remember, with current Postgres
72 |   performance and hardware any cloud offers, you can be fine and grow to dozens and hundreds of TiBs and hundreds of
73 |   thousands of TPS. Choose your priorities – there are many successful stories with any approach.
74 | - Don't hesitate to ask for help – community or paid consulting. Huge projects are running on Postgres, and there is a
75 |   lot of experience accumulated.
76 | 
77 | ## 5) Learn from other people's mistakes
78 | 
79 | - Postgres still has 32-bit transaction IDs. Make sure you don't hit the transaction ID (and multi-XID) wraparound –
80 |   [Sentry's case](https://blog.sentry.io/transaction-id-wraparound-in-postgres/) and
81 |   [Mailchimp's one](https://mailchimp.com/what-we-learned-from-the-recent-mandrill-outage/) are good lessons.
82 | - Subtransactions – I personally consider their use dangerous in heavily-loaded systems and recommend
83 |   [avoiding them](0035_how_to_use_subtransactions_in_postgres.md).
84 | - Tune `autovacuum` – don't allow Postgres to accumulate a lot of dead tuples and/or bloat (yes, these two are different
85 |   things). Turning off `autovacuum` is a good way to put your server down.
86 | - In the OLTP context, avoid long-running transactions and unused/lagging replication slots.
87 | - Learn how to deploy schema changes without downtime.
88 |   Useful articles:
89 |    - [Zero-downtime Postgres schema migrations need this: lock_timeout and retries](https://postgres.ai/blog/20210923-zero-downtime-postgres-schema-migrations-lock-timeout-and-retries)
90 |    - [Common DB schema change mistakes](https://postgres.ai/blog/20220525-common-db-schema-change-mistakes)
91 | 


--------------------------------------------------------------------------------
/0039_how_to_break_a_database_part_1_how_to_corrupt.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1720734029207732456), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to break a database, Part 1: How to corrupt
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Sometimes, you might want to damage a database – for educational purposes, to simulate failures, learn how to deal with
 11 | them, to test mitigation procedures.
 12 | 
 13 | Let's discuss some ways to break things.
 14 | 
 15 | <span style="padding: 1ex; background-color: yellow">
 16 | ⚠️ Don't do it in production unless you're a chaos engineer ⚠️
 17 | </span>
 18 | 
 19 | ## Corruption
 20 | 
 21 | There are many types of corruption and there are very simple ways to get a corrupted database, for example:
 22 | 
 23 | 👉 **Modifying system catalogs directly:**
 24 | 
 25 | ```sql
 26 | nik=# create table t1(id int8 primary key, val text);
 27 | CREATE TABLE
 28 | 
 29 | nik=# delete from pg_attribute where attrelid = 't1'::regclass and attname = 'val';
 30 | DELETE 1
 31 | 
 32 | nik=# table t1;
 33 | ERROR:  pg_attribute catalog is missing 1 attribute(s) for relation OID 107006
 34 | LINE 1: table t1;
 35 |           ^
 36 | ```
 37 | 
 38 | More ways can be found in this article:
 39 | [How to corrupt your PostgreSQL database](https://cybertec-postgresql.com/en/how-to-corrupt-your-postgresql-database/).
 40 | A couple of interesting methods from there:
 41 | 
 42 | - `fsync=off` + `kill -9` to Postgres (or `pg_ctl stop -m immediate`)
 43 | - `kill -9` + `pg_resetwal -f`
 44 | 
 45 | One useful method is to use `dd` to write to a data file directly. This can be used to simulate a corruption that can be
 46 | detected by checksum verification
 47 | ([Day 37: How to enable data checksums without downtime](0037_how_to_enable_data_checksums_without_downtime.md)). This
 48 | is also demonstrated in this article:
 49 | [pg_healer: repairing Postgres problems automatically](https://endpointdev.com/blog/2016/09/pghealer-repairing-postgres-problems/).
 50 | 
 51 | First, create a table and see where its data file is located:
 52 | 
 53 | ```sql
 54 | nik=# show data_checksums;
 55 |  data_checksums
 56 | ----------------
 57 |  on
 58 | (1 row)
 59 | 
 60 | nik=# create table t1 as select i from generate_series(1, 10000) i;
 61 | SELECT 10000
 62 | 
 63 | nik=# select count(*) from t1;
 64 |  count
 65 | -------
 66 |  10000
 67 | (1 row)
 68 | 
 69 | nik=# select format('%s/%s',
 70 |   current_setting('data_directory'),
 71 |   pg_relation_filepath('t1'));
 72 |                       format
 73 | ---------------------------------------------------
 74 |  /opt/homebrew/var/postgresql@15/base/16384/123388
 75 | (1 row)
 76 | ```
 77 | 
 78 | Now, let's write some garbage to this file directly, using `dd` (note that here we use a macOS version, where `dd` has
 79 | the option `oseek` – on Linux, it's `seek_bytes`), and then restart Postgres to make sure the table is not present in
 80 | the buffer pool anymore:
 81 | 
 82 | ```bash
 83 | ❯ echo -n "BOOoo" \
 84 |   | dd conv=notrunc bs=1 \
 85 |     oseek=4000 count=1 \
 86 |     of=/opt/homebrew/var/postgresql@15/base/16384/123388
 87 |  1+0 records in
 88 |  1+0 records out
 89 |  1 bytes transferred in 0.000240 secs (4167 bytes/sec)
 90 | 
 91 | ❯ brew services stop postgresql@15
 92 |  Stopping `postgresql@15`... (might take a while)
 93 |  ==> Successfully stopped `postgresql@15` (label: homebrew.mxcl.postgresql@15)
 94 | 
 95 | ❯ brew services start postgresql@15
 96 |  ==> Successfully started `postgresql@15` (label: homebrew.mxcl.postgresql@15)
 97 | ```
 98 | 
 99 | Successfully corrupted – the data checksums mechanism complains about it:
100 | 
101 | ```sql
102 | nik=# table t1;
103 | WARNING:  page verification failed, calculated checksum 52379 but expected 35499
104 | ERROR:  invalid page in block 0 of relation base/16384/123388
105 | ```
106 | 
107 | **🔜 To be continued ...**
108 | 


--------------------------------------------------------------------------------
/0041_harmful_workloads.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1721397029979779140), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to break a database, Part 3: Harmful workloads
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | See also
 11 | 
 12 | - [Part 1: How to Corrupt](0039_how_to_break_a_database_part_1_how_to_corrupt.md).
 13 | - [Part 2: Simulate infamous transaction ID wraparound](0040_how_to_break_a_database_part_2_simulate_xid_wraparound.md).
 14 | 
 15 | ## Too many connections
 16 | 
 17 | A simple snippet that creates 100 idle connections just with `psql` and a named pipe (a.k.a. FIFO, works in both macOS
 18 | and Linux):
 19 | 
 20 | ```bash
 21 | mkfifo dummy
 22 | 
 23 | for i in $(seq 100); do
 24 |   psql -Xf dummy >/dev/null 2>&1 &
 25 | done
 26 | 
 27 | ❯ psql -Xc 'select count(*) from pg_stat_activity'
 28 |   count
 29 | -------
 30 |   106
 31 | (1 row)
 32 | ```
 33 | 
 34 | To close these connections, we can open a writing file descriptor to the FIFO and close it without writing any data:
 35 | 
 36 | ```bash
 37 |   exec 3>dummy && exec 3>&-
 38 | ```
 39 | 
 40 | Now the 100 extra connections have gone:
 41 | 
 42 | ```bash
 43 |  ❯ psql -Xc 'select count(*) from pg_stat_activity'
 44 |   count
 45 |  -------
 46 |       6
 47 |  (1 row)
 48 | ```
 49 | 
 50 | And if the number of connections reaches `max_connections` when we perform the steps above, we should see this when
 51 | trying to establish a new connection:
 52 | 
 53 | ```bash
 54 | ❯ psql
 55 | psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: FATAL:  sorry, too many clients already
 56 | ```
 57 | 
 58 | ## Idle-in-transaction sessions
 59 | 
 60 | This recipe we used in the XID wraparound simulation:
 61 | 
 62 | ```bash
 63 | mkfifo dummy
 64 | 
 65 | psql -Xc "
 66 |  set idle_in_transaction_session_timeout = 0;
 67 |  begin;
 68 |  select pg_current_xact_id()
 69 |  " \
 70 | -f dummy &
 71 | ```
 72 | 
 73 | To release:
 74 | 
 75 | ```bash
 76 |   exec 3>dummy && exec 3>&-
 77 | ```
 78 | 
 79 | ## More types of harm using various tools
 80 | 
 81 | This tool can help you simulate various harmful workloads:
 82 | [noisia – harmful workload generator for PostgreSQL](https://github.com/lesovsky/noisia).
 83 | 
 84 | As of 2023, it supports:
 85 | 
 86 | - idle transactions - active transactions on hot-write tables that do nothing during their lifetime
 87 | - rollbacks - fake invalid queries that generate errors and increase rollbacks counter
 88 | - waiting transactions - transactions that lock hot-write tables and then idle, leading to other transactions getting
 89 |   stuck
 90 | - deadlocks - simultaneous transactions where each holds locks that the other transactions want
 91 | - temporary files - queries that produce on-disk temporary files due to lack of `work_mem`
 92 | - terminate backends - terminate random backends (or queries) using `pg_terminate_backend()`, `pg_cancel_backend()`
 93 | - failed connections - exhaust all available connections (other clients unable to connect to Postgres)
 94 | - fork connections - execute single, short query in a dedicated connection (lead to excessive forking of Postgres
 95 |   backends)
 96 | 
 97 | And this tool will crash your database periodically: [pg_crash](https://github.com/cybertec-postgresql/pg_crash)
 98 | 
 99 | For Aurora users, there are interesting functions: `aurora_inject_crash()`, `aurora_inject_replica_failure()`,
100 | `aurora_inject_disk_failure()`, `aurora_inject_disk_congestion()`: See
101 | [Testing Amazon Aurora PostgreSQL by using fault injection queries](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/AuroraPostgreSQL.Managing.FaultInjectionQueries.html).
102 | 
103 | ## Summary
104 | 
105 | The whole topic of chaos engineering is interesting and, in my opinion, has a great potential in there are of
106 | databases – to test recovery, failover, practice various incident situations. Some resources (beyond databases):
107 | 
108 | - Wikipedia: [Chaos engineering](https://en.wikipedia.org/wiki/Chaos_engineering)
109 | - Netflix's [Chaos Monkey](https://github.com/Netflix/chaosmonkey), a resiliency tool that helps applications tolerate
110 |   random instance failures
111 | 
112 | Ideally, mature processes of database administration. whether in the cloud or not, managed or not, should include:
113 | 
114 | 1. Regular simulation of incidents in non-production, to practice and improve runbooks for incident mitigation.
115 | 
116 | 2. Regular initiation of incidents in production to see how *actually* automated mitigation works. For example:
117 |    auto-removal of a crashed replica, autofailover, alerts and team response to long-running transactions.
118 | 


--------------------------------------------------------------------------------
/0044_how_to_monitor_transaction_id_wraparound_risks.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1722585894430105822), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to monitor transaction ID wraparound risks
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Transaction ID and MultiXact ID wraparound – one of the worst incidents that can happen with a Postgres database.
 11 | Infamous cases:
 12 | 
 13 | - [Sentry (2015)](https://blog.sentry.io/transaction-id-wraparound-in-postgres/)
 14 | - [Joyent (2015)](https://tritondatacenter.com/blog/manta-postmortem-7-27-2015)
 15 | - [Mailchimp (2019)](https://mailchimp.com/what-we-learned-from-the-recent-mandrill-outage/)
 16 | 
 17 | Good resources of knowledge:
 18 | 
 19 | - [Postgres docs](https://postgresql.org/docs/current/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND)
 20 | - Hironobu Suzuki's book "The Internals of PostgreSQL": [Vacuum Processing](https://interdb.jp/pg/pgsql06.html)
 21 | - Egor Rogov's book "PostgreSQL 14 Internals"
 22 | - [PostgreSQL/Wraparound and Freeze](https://en.wikibooks.org/wiki/PostgreSQL/Wraparound_and_Freeze)
 23 | 
 24 | There are two specific types of metrics that every monitoring setup must include (along with proper alerts):
 25 | 
 26 | 1. The oldest XID and MultiXID (a.k.a. MultiXact ID) values used in non-frozen tuples
 27 | 2. Monitoring for `xmin` horizon
 28 | 
 29 | Here we discuss the former.
 30 | 
 31 | ## 32-bit XIDs
 32 | 
 33 | XIDs and MultiXact IDs are 32-bit, so the overall space size is 2<sup>32</sup> ≈ 4.2B. But due to modulo arithmetic
 34 | used, there is a concept of "the future" for XIDs – one half of the space is considered the past, another half is the
 35 | future. Thus, the capacity we need to monitor is 2<sup>31</sup> ≈ 2.1B.
 36 | 
 37 | An illustration
 38 | from [Wikipedia "wraparound and freeze"](https://en.wikibooks.org/wiki/PostgreSQL/Wraparound_and_Freeze):
 39 | 
 40 | ![Wraparound and freeze](files/0044_wraparound_and_freeze.jpg)
 41 | 
 42 | To prevent XID/MultiXID wraparound, `autovacuum`, among other tasks, regularly performs "freezing" of old tuples,
 43 | marking tuples as frozen, meaning that these tuples belong to the past.
 44 | 
 45 | Therefore, monitoring the tuple XID/MultiXID ages is crucial to control the wraparound risks.
 46 | 
 47 | ## XID and MultiXID wraparound risk monitoring
 48 | 
 49 | The most common approach:
 50 | 
 51 | - Checking `pg_database.datfrozenxid` helps understand how well `autovacuum` performs its "freezing" duties, replacing
 52 |   old XIDs in tuples, at high level.
 53 | - For further drill down, `pg_class.relfrozenxid` can be inspected for each relation.
 54 | - XIDs are numbers, and to compare these values with the current XID, function `age(...)` can be used.
 55 | 
 56 | Another type of risk also worth checking (usually missing in observability tools) is
 57 | [MultiXact ID wraparound](https://postgresql.org/docs/current/routine-vacuuming.html#VACUUM-FOR-MULTIXACT-WRAPAROUND).
 58 | 
 59 | - `pg_database` has `datminmxid`
 60 | - `pg_class` has `relminmxid`
 61 | - instead of `age(...)`, we need to use `mxid_age(...)`.
 62 | 
 63 | An example of high-level (databases) cluster-wide monitoring query:
 64 | 
 65 | ```sql
 66 | with data as (
 67 |   select
 68 |     oid,
 69 |     datname,
 70 |     age(datfrozenxid) as xid_age,
 71 |     mxid_age(datminmxid) as mxid_age,
 72 |     pg_database_size(datname) as db_size
 73 |   from pg_database
 74 | )
 75 | select
 76 |   *,
 77 |   pg_size_pretty(db_size) as db_size_hr
 78 | from data
 79 | order by greatest(xid_age, mxid_age) desc;
 80 | ```
 81 | 
 82 | And at table-level inside a particular database, TOP-25:
 83 | 
 84 | ```sql
 85 | with data as (
 86 |   select
 87 |     format(
 88 |       '%I.%I',
 89 |       nspname,
 90 |       c.relname
 91 |     ) as table_name,
 92 |     greatest(age(c.relfrozenxid), age(t.relfrozenxid)) as xid_age,
 93 |     greatest(mxid_age(c.relminmxid), mxid_age(t.relminmxid)) as mxid_age,
 94 |     pg_table_size(c.oid) as table_size,
 95 |     pg_table_size(t.oid) as toast_size
 96 |   from pg_class as c
 97 |   join pg_namespace pn on pn.oid = c.relnamespace
 98 |   left join pg_class as t on c.reltoastrelid = t.oid
 99 |   where c.relkind in ('r', 'm')
100 | )
101 | select *
102 | from data
103 | order by greatest(xid_age, mxid_age) desc
104 | limit 25;
105 | ```
106 | 
107 | ## Alerts
108 | 
109 | If we ages grow above certain threshold (usually 200M, see
110 | [autovacuum_freeze_max_age](https://postgresqlco.nf/doc/en/param/autovacuum_freeze_max_age/)), this is a sign that
111 | something is blocking normal autovacuum work.
112 | 
113 | Therefore, depending on the autovacuum settings, monitoring systems have to be configured to issue alerts for the tuple
114 | XID and MultiXID age values exceeding predefined thresholds in range 300M-1B. Values above 1B have to be considered
115 | dangerous, requiring urgent mitigation actions.
116 | 


--------------------------------------------------------------------------------
/0046_how_to_deal_with_bloat.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1723333152847077428), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to deal with bloat
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | ## What is bloat?
 11 | 
 12 | Bloat is the free space inside pages, created when `autovacuum` deletes a large number of tuples.
 13 | 
 14 | When a row in a table is updated, Postgres doesn't overwrite the old data. Instead, it marks the old row version (tuple)
 15 | as "dead" and creates a new row version. Over time, as more rows are updated or deleted, the space taken up by these
 16 | dead tuples can accumulate. At some point, `autovacuum` (or manual `VACUUM`) deletes dead tuples, leaving free space
 17 | inside pages, available for reuse. But if large numbers of dead tuples accumulate, large volumes of free space can be
 18 | left behind – in the worst cases, it can occupy 99% of all table or index space, or even more.
 19 | 
 20 | Low values of bloat (say, below 40%) should not be considered a problem, while high values definitely should be, as they
 21 | lead to bad consequences:
 22 | 
 23 | 1. Higher disk usage
 24 | 2. More IO needed for read and write queries
 25 | 3. Lower cache efficiency (both buffer pool and OS file cache)
 26 | 4. As a result, worse query performance
 27 | 
 28 | ## How to check bloat
 29 | 
 30 | Index and table bloat should be regularly checked. Note that most queries that are commonly used are estimation-based
 31 | and are prone to false positives -- depending on table structure, it can show some non-existent bloat (I saw cases with
 32 | up to 40% of phantom bloat in freshly created tables). But such queries are fast and don't require additional extensions
 33 | installed. Examples:
 34 | 
 35 | - [Estimated table bloat](https://github.com/NikolayS/postgres_dba/blob/master/sql/b1_table_estimation.sql)
 36 | - [Estimated btree index bloat](https://github.com/NikolayS/postgres_dba/blob/master/sql/b2_btree_estimation.sql)
 37 | 
 38 | Recommendations for use in monitoring systems:
 39 | 
 40 | - There is little sense in running them often (e.g., every minute), as bloat levels don't change rapidly.
 41 | - There should be a warning provided to users that the results are estimates.
 42 | - For large databases, query execution may take a long time, up to many seconds, so the frequency of checks and
 43 |   `statement_timeout` might need to be adjusted.
 44 | 
 45 | Approaches to determine the bloat levels more precisely:
 46 | 
 47 | - queries based on `pgstattuple` (the extension has to be installed)
 48 | - checking DB object sizes on a clone, running `VACUUM FULL` (heavy and blocks queries, thus not for production), and
 49 |   then checking sizes again and comparing before/after
 50 | 
 51 | Periodical checks are definitely recommended to control bloat levels and react, when needed.
 52 | 
 53 | ## Index bloat mitigation (reactive)
 54 | 
 55 | Unfortunately, in databases that experience many `UPDATE`s and `DELETE`s, index health inevitably degrades over time.
 56 | This means, that indexes need to be rebuilt regularly.
 57 | 
 58 | Recommendations:
 59 | 
 60 | * Use `REINDEX CONCURRENTLY` to rebuild bloated index in a non-blocking fashion.
 61 | * Remember that `REINDEX CONCURRENTLY` holds the `xmin` horizon when running. This affects `autovacuum`'s ability to
 62 |   clean up freshly-dead tuples in all tables and indexes. This is another reason to use partitioning; do not allow
 63 |   tables to exceed certain threshold (say, more than 100 GiB).
 64 | * You can monitor the progress of reindexing using the approach
 65 |   from [Day 15: How to monitor CREATE INDEX / REINDEX](0015_how_to_monitor_index_operations.md).
 66 | * Prefer using Postgres versions 14+, since in PG14, btree indexes were significantly optimized to degrade much slower
 67 |   under written workloads.
 68 | 
 69 | ## Table bloat mitigation (reactive)
 70 | 
 71 | Some levels of table bloat may be considered as not a bad thing because they increase chances of having optimized
 72 | `UPDATE`s -- HOT (Heap-Only Tuples) `UPDATE`s.
 73 | 
 74 | However, if the level is concerning, consider using [pg_repack](https://github.com/reorg/pg_repack) to rebuild the table
 75 | without long-lasting exclusive locks. Alternative to `pg_repack`:
 76 | [pg_squeeze](https://github.com/cybertec-postgresql/pg_squeeze).
 77 | 
 78 | Normally, this process doesn't need to be scheduled and fully automated; usually, it is enough to apply it under control
 79 | only when high table bloat is detected.
 80 | 
 81 | ## Proactive bloat mitigation
 82 | 
 83 | * Tune `autovacuum`.
 84 | * Monitor the `xmin` horizon and don't allow it to be too far in the past --
 85 |   [Day 45: How to monitor xmin horizon to prevent XID/MultiXID wraparound and high bloat](0045_how_to_monitor_xmin_horizon.md).
 86 | * Do not allow unnecessary long-running transactions (e.g., > 1h), neither on the primary, nor on standbys with
 87 |   `hot_standby_feedback` turned on.
 88 | * If on Postgres 13 or older, consider upgrading to 14+ to benefit from btree index optimizations.
 89 | * Partition large (100+ GiB) tables.
 90 | * Use partitioning for tables with queue-like workloads, even if they are small, and use `TRUNCATE` or drop partitions
 91 |   with old data instead of using `DELETE`s; in this case, vacuuming is not needed, bloat is not an issue.
 92 | * Do not use massive `UPDATE`s and `DELETE`s, always work in batches (lasting not more than 1-2s);
 93 |   ensure that `autovacuum` cleans up dead tuples promptly or `VACUUM` manually when massive data changes need to happen.
 94 | 
 95 | ## Materials worth reading
 96 | 
 97 | * Postgres docs: [Routine Vacuuming](https://postgresql.org/docs/current/routine-vacuuming.html)
 98 | * [When `autovacuum` does not vacuum](https://2ndquadrant.com/en/blog/when-`autovacuum`-does-not-vacuum/)
 99 | * [How to Reduce Bloat in Large PostgreSQL Tables](https://timescale.com/learn/how-to-reduce-bloat-in-large-postgresql-tables/)
100 | 


--------------------------------------------------------------------------------
/0047_how_to_install_postgres_16_with_plpython3u.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1723535079451033697), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to install Postgres 16 with plpython3u: Recipes for macOS, Ubuntu, Debian, CentOS, Docker
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | PL/Python is a procedural language extension for PostgreSQL that allows you to write stored procedures and triggers in
11 | Python, a widely-used, high-level, and versatile programming language.
12 | 
13 | `plpython3u` is the "untrusted" version of PL/Python. This variant allows Python functions to perform operations such as
14 | file I/O, network communication, and other actions that could potentially affect the server's behavior or security.
15 | 
16 | We used `plpython3u` in
17 | [Day 23: How to use OpenAI APIs right from Postgres to implement semantic search and GPT chat](0023_how_to_use_openai_apis_in_postgres.md),
18 | let's now discuss how to install it.
19 | 
20 | And something tells me that we'll be using it more in the future, for various tasks.
21 | 
22 | This howto is for self-managed Postgres only.
23 | 
24 | ## macOS (Homebrew)
25 | 
26 | ```bash
27 | brew tap petere/postgresql
28 | brew install petere/postgresql/postgresql@16
29 | 
30 | psql postgres \
31 |   -c 'create extension plpython3u'
32 | ```
33 | 
34 | ## Ubuntu 22.04 LTS or Debian 12
35 | 
36 | ```bash
37 | sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" \
38 |   > /etc/apt/sources.list.d/pgdg.list'
39 | 
40 | curl -fsSL https://postgresql.org/media/keys/ACCC4CF8.asc \
41 |   | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
42 | 
43 | sudo apt update
44 | sudo apt install -y \
45 |   postgresql-16 \
46 |   postgresql-contrib-16 \
47 |   postgresql-plpython3-16
48 | 
49 | sudo -u postgres psql \
50 |   -c 'create extension plpython3u'
51 | ```
52 | 
53 | ## CentOS Stream 9
54 | 
55 | ```bash
56 | dnf module reset -y postgresql
57 | dnf module enable -y postgresql:16
58 | 
59 | dnf install -y \
60 |   postgresql-server \
61 |   postgresql \
62 |   postgresql-contrib \
63 |   postgresql-plpython3
64 | 
65 | postgresql-setup --initdb
66 | 
67 | systemctl enable --now postgresql
68 | 
69 | sudo -u postgres psql \
70 |   -c 'create extension plpython3u'
71 | ```
72 | 
73 | ## Docker
74 | 
75 | ```bash
76 | echo "FROM postgres:16
77 | RUN apt update
78 | RUN apt install -y postgresql-plpython3-16" \
79 | > postgres_plpython3u.Dockerfile
80 | 
81 | sudo docker build \
82 |   -t postgres-plpython3u:16 \
83 |   -f postgres_plpython3u.Dockerfile \
84 |   .
85 | 
86 | sudo docker run \
87 |   --detach \
88 |   --name pg16 \
89 |   -e POSTGRES_PASSWORD=secret \
90 |   -v $(echo ~)/pgdata:/var/lib/postgresql/data \
91 |   postgres-plpython3u:16
92 | 
93 | sudo docker exec -it pg16 \
94 |   psql -U postgres -c 'create extension plpython3u'
95 | ```
96 | 


--------------------------------------------------------------------------------
/0051_learn_about_schema_metadata_via_psql.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1725064379933319287), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # Learn how to work with schema metadata by spying after psql
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | ## Where to start when unsure
 11 | 
 12 | [psql Docs](https://postgresql.org/docs/current/app-psql.html)
 13 | 
 14 | `psql` has internal help – command `\?`; it's worth remembering and using it as a starting point when working
 15 | with `psql`.
 16 | 
 17 | When unsure about a function name, it can be helpful to use search `\df *keyword*`. For example:
 18 | 
 19 | ```
 20 | nik=# \df *clock*
 21 |                                   List of functions
 22 |    Schema   |      Name       |     Result data type     | Argument data types | Type
 23 | ------------+-----------------+--------------------------+---------------------+------
 24 |  pg_catalog | clock_timestamp | timestamp with time zone |                     | func
 25 | (1 row)
 26 | ```
 27 | 
 28 | ## How to see what psql is doing – ECHO_HIDDEN
 29 | 
 30 | Let's assume we want to observe the size of the table `t1` - for that, we could construct a query returning table size
 31 | (or just find it somewhere or ask an LLM). But staying inside `psql`, we can just use `\dt+ t1`:
 32 | 
 33 | ```
 34 | nik=# \dt+ t1
 35 |                                   List of relations
 36 |  Schema | Name | Type  |  Owner   | Persistence | Access method | Size  | Description
 37 | --------+------+-------+----------+-------------+---------------+-------+-------------
 38 |  public | t1   | table | postgres | permanent   | heap          | 25 MB |
 39 | (1 row)
 40 | ```
 41 | 
 42 | We would like to execute it in a loop, to observe how table size grows. For this, psql supports `\watch` – however, it
 43 | won't work with other backslash commands.
 44 | 
 45 | Solution – turn on `ECHO_HIDDEN` and see the SQL query behind `\dt+` (alternatively, you can use the option
 46 | `--echo-hidden` when starting `psql`):
 47 | 
 48 | ```sql
 49 | nik=# \set ECHO_HIDDEN 1
 50 | nik=#
 51 | nik=# \dt+ t1
 52 | ********* QUERY **********
 53 | SELECT n.nspname as "Schema",
 54 |   c.relname as "Name",
 55 |   CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'm' THEN 'materialized view' WHEN 'i' THEN 'index' WHEN 'S' THEN 'sequence' WHEN 't' THEN 'TOAST table' WHEN 'f' THEN 'foreign table' WHEN 'p' THEN 'partitioned table' WHEN 'I' THEN 'partitioned index' END as "Type",
 56 |   pg_catalog.pg_get_userbyid(c.relowner) as "Owner",
 57 |   CASE c.relpersistence WHEN 'p' THEN 'permanent' WHEN 't' THEN 'temporary' WHEN 'u' THEN 'unlogged' END as "Persistence",
 58 |   am.amname as "Access method",
 59 |   pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size",
 60 |   pg_catalog.obj_description(c.oid, 'pg_class') as "Description"
 61 | FROM pg_catalog.pg_class c
 62 |      LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
 63 |      LEFT JOIN pg_catalog.pg_am am ON am.oid = c.relam
 64 | WHERE c.relkind IN ('r','p','t','s','')
 65 |   AND c.relname OPERATOR(pg_catalog.~) '^(t1)$' COLLATE pg_catalog.default
 66 |   AND pg_catalog.pg_table_is_visible(c.oid)
 67 | ORDER BY 1,2;
 68 | **************************
 69 | 
 70 |                                   List of relations
 71 |  Schema | Name | Type  |  Owner   | Persistence | Access method | Size  | Description
 72 | --------+------+-------+----------+-------------+---------------+-------+-------------
 73 |  public | t1   | table | postgres | permanent   | heap          | 72 MB |
 74 | (1 row)
 75 | ```
 76 | 
 77 | Now we have the query, and we can use `\watch 5` to see the result every 5 seconds (while also omitting the fields we
 78 | don't need now):
 79 | 
 80 | ```sql
 81 | nik=#   SELECT n.nspname as "Schema",
 82 |     c.relname as "Name",
 83 |     pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size"
 84 |   FROM pg_catalog.pg_class c
 85 |        LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
 86 |        LEFT JOIN pg_catalog.pg_am am ON am.oid = c.relam
 87 |   WHERE c.relkind IN ('r','p','t','s','')
 88 |     AND c.relname OPERATOR(pg_catalog.~) '^(t1)$' COLLATE pg_catalog.default
 89 |     AND pg_catalog.pg_table_is_visible(c.oid)
 90 |   ORDER BY 1,2 \watch 5
 91 | Thu 16 Nov 2023 08:00:10 AM UTC (every 5s)
 92 | 
 93 |  Schema | Name | Size
 94 | --------+------+-------
 95 |  public | t1   | 73 MB
 96 | (1 row)
 97 | 
 98 | Thu 16 Nov 2023 08:00:15 AM UTC (every 5s)
 99 | 
100 |  Schema | Name | Size
101 | --------+------+-------
102 |  public | t1   | 75 MB
103 | (1 row)
104 | 
105 | Thu 16 Nov 2023 08:00:20 AM UTC (every 5s)
106 | 
107 |  Schema | Name | Size
108 | --------+------+-------
109 |  public | t1   | 77 MB
110 | (1 row)
111 | ```
112 | 


--------------------------------------------------------------------------------
/0053_index_maintenance.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1725784211809059101), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # Index maintenance
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Index maintenance is inevitable in a larger project. The sooner the process is established the better for performance.
 11 | 
 12 | ## Analyze index health
 13 | 
 14 | Index health analysis includes:
 15 | 
 16 | - Identifying invalid indexes
 17 | - Bloat analysis
 18 | - Finding unused indexes
 19 | - Finding redundant indexes
 20 | - Checking for corruption
 21 | 
 22 | ## Invalid indexes
 23 | 
 24 | Finding invalid indexes is simple:
 25 | 
 26 | ```
 27 | nik=# select indexrelid, indexrelid::regclass, indrelid::regclass
 28 | from pg_index
 29 | where not indisvalid;
 30 |  indexrelid | indexrelid | indrelid
 31 | ------------+------------+----------
 32 |       49193 | t_id_idx   | t1
 33 | (1 row)
 34 | ```
 35 | 
 36 | A bit more comprehensive query can be found in [Postgres DBA](https://github.com/NikolayS/postgres_dba/).
 37 | 
 38 | When analyzing this list, keep in mind that an invalid index may be a normal situation if this is an index that is being
 39 | built or rebuild by `CREATE INDEX CONCURRENTLY` / `REINDEX CONCURRENTLY`, so it is worth also checking
 40 | `pg_stat_activity` to identify such processes.
 41 | 
 42 | The other invalid indexes have to be rebuilt (`REINDEX CONCURRENTLY`) or dropped (`DROP INDEX CONCURRENTLY`).
 43 | 
 44 | ## Bloated indexes
 45 | 
 46 | How to analyze index bloat: see [Day 46: How to deal with bloat](./0046_how_to_deal_with_bloat.md).
 47 | 
 48 | Indexes with high bloat (real or estimated) – say, above 50% – have to be reindexed (`REINDEX CONCURRENTLY`).
 49 | 
 50 | Reindexing is needed because over time, due to updates index health degrades. Certain things can help to slow down this
 51 | degradation:
 52 | 
 53 | - Upgrading to Postgres 14 or newer (to benefit from btree optimizations).
 54 | - Optimize workload and schema to make
 55 |   more `UPDATES HOT` ([Heap Only Tuple](https://postgresql.org/docs/current/storage-hot.html)).
 56 | - Tune `autovacuum` for more active vacuuming (though, remember that `VACUUM` doesn't rebalance btree indexes).
 57 | 
 58 | Thus, from time to time, you need to use `REINDEX CONCURRENTLY` for those indexes that are known to be bloated. Ideally,
 59 | this procedure needs to be automated. Example: [pg_auto_reindexer](https://github.com/vitabaks/pg_auto_reindexer).
 60 | 
 61 | ## Unused indexes
 62 | 
 63 | Usage information can be found in [pg_stat_user_indexes](https://postgresql.org/docs/current/monitoring-stats.html).
 64 | An example of query for analysis can be found in [Postgres DBA](https://github.com/NikolayS/postgres_dba/).
 65 | 
 66 | When searching for unused indexes, be careful and avoid mistakes:
 67 | 
 68 | 1) Ensure that stats were reset long enough ago. For example, if it happened only a couple of days ago, and you think an
 69 |    index is unused, it might be a mistake – if this index will be needed to support some reports on the 1st day of the
 70 |    next month.
 71 | 2) Don't forget to analyze all the nodes that receive workload – the primary and all replicas. An index that looks
 72 |    unused on the primary may be needed on a replica.
 73 | 3) If you have multiple installation of your system, make sure you analyzed all of them or at least representative
 74 |    portion of them.
 75 | 
 76 | Once unused indexes are identified reliably, they need to be dropped using `DROP INDEX CONCURRENTLY`.
 77 | 
 78 | Can we soft-drop index ("hide" it from the planner to ensure that planner behavior doesn't change and if so, proceed
 79 | with real dropping, otherwise quickly reverting to the original state)? There is no simple answer here, unfortunately:
 80 | 
 81 | 1) [HypoPG 1.4.0](https://github.com/HypoPG/hypopg/releases/tag/1.4.0) has a feature to "hide" indexes – this is very
 82 |    useful, but you need to install it and, more importantly, and it might be challenging to use it for whole workload,
 83 |    since you need to call `hypopg_hide_index(oid)` for it.
 84 | 2) Some people use a trick with setting `indisvalid` to `false` to hide an index from the planner – but there is a
 85 |    reliable opinion that this is a not safe approach; see
 86 |    [Peter Geoghegan's Tweet](https://twitter.com/petervgeoghegan/status/1599191964045672449):
 87 | 
 88 |    > It's unsafe, basically. Though hard to say just how likely it is to break. Here is one hazard that I know of: in
 89 |    > general such an update might break a concurrent `pg_index.indcheckxmin = true` check. It will effectively
 90 |    > "change the xmin" of your affected row, confusing the check.
 91 | 
 92 | ## Redundant indexes
 93 | 
 94 | An index A supporting a set of queries is redundant to index B if B can efficiently support the same set of queries and,
 95 | optionally, more. A few examples:
 96 | 
 97 | - Index on column (a) is redundant to index on (a, b).
 98 | - Index on column (a) is NOT redundant to index on (b, a).
 99 | 
100 | Indexes having exactly the same definition are redundant to each other (a.k.a. duplicate indexes).
101 | 
102 | An example of a query to identify redundant indexes can be found in
103 | [Postgres DBA](https://github.com/NikolayS/postgres_dba/)
104 | or [Postgres Checkup](https://gitlab.com/postgres-ai/postgres-checkup).
105 | 
106 | Redundant indexes are usually safe to drop (after double-checking the list of them manually), even if the index which is
107 | going to be dropped, is currently used.
108 | 
109 | ## Corruption
110 | 
111 | Use `pg_amcheck` to identify corruption in btree indexes (TODO: details – will be in a separate howto).
112 | 
113 | As of 2023 / PG16, the following features are not yet supported by `pg_amcheck`, but there are plans to add them in the
114 | future:
115 | 
116 | - [Checking GIN and GIST](https://commitfest.postgresql.org/45/3733/)
117 | - Checking unique keys (already pushed, potentially to be released with PG17)
118 | 


--------------------------------------------------------------------------------
/0054_how_to_check_btree_indexes_for_corruption.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1726184669622989250), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to check btree indexes for corruption (pg_amcheck)
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | 🥳🤩🎂🎉 It's my birthday, plus we've just soft-launched our
11 | [postgres.ai bot](https://twitter.com/samokhvalov/status/1726177412755677283) – so forgive me a far from being complete article
12 | this time. Nevertheless, I keep posting 😅
13 | 
14 | There are many types of corruption. Some kinds of them can be identified by extension `amcheck`
15 | (see: [Using amcheck Effectively](https://postgresql.org/docs/current/amcheck.html#AMCHECK-USING-AMCHECK-EFFECTIVELY))
16 | which is included to standard pack of "contrib modules" – so, just create it:
17 | 
18 | ```sql
19 | create extension amcheck;
20 | ```
21 | 
22 | For Postgres 14+, prefer using CLI tool [pg_amcheck](https://postgresql.org/docs/current/app-pgamcheck.html), one of
23 | benefits of which is option `-j` (`--jobs=NUM`) – parallel workers to check multiple indexes faster.
24 | 
25 | An example of use (put your connection options and adjust the number of parallel workers):
26 | 
27 | ```bash
28 | pg_amcheck \
29 |     {{ connection options }} \
30 |     --install-missing \
31 |     --jobs=8 \
32 |     --verbose \
33 |     --progress \
34 |     --heapallindexed \
35 |     --parent-check \
36 |   2>&1 \
37 |   | ts \
38 |   | tee -a pg_amcheck.$(date "+%F-%H-%M").log
39 | ```
40 | 
41 | **IMPORTANT:** the options `--heapallindexed` and `--parent-check` trigger a long, but more advanced checking. The
42 | `--parent-check` option is blocking writes (`UPDATE`s, etc.), so do not use it on production nodes that receive user
43 | traffic. The option `--heapallindexed` increases the load and duration of the check, but can be used live with
44 | care. Without both of these options the check performed will be light, potentially not noticing some issues
45 | (read [Using amcheck Effectively](https://postgresql.org/docs/current/amcheck.html#AMCHECK-USING-AMCHECK-EFFECTIVELY)).
46 | 
47 | Once the snippet above is fully finished, check if the resulting log contains errors:
48 | 
49 | ```bash
50 | egrep 'ERROR:|DETAIL:|LOCATION:' \
51 |  pg_amcheck.$(date "+%F-%H-%M").log
52 | ```
53 | 
54 | The indexes where corruption is detected need to be carefully analyzed and, if the problem is confirmed, re-created
55 | (`REINDEX CONCURRENTLY`).
56 | 


--------------------------------------------------------------------------------
/0055_how_to_drop_a_column.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1726596564712571006), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to drop a column
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Dropping a column is easy:
 11 | 
 12 | ```sql
 13 | alter table t1 drop column c1;
 14 | ```
 15 | 
 16 | However, it is important to keep in mind a few complications that might occur in various situations.
 17 | 
 18 | ## Risk 1: application code not ready
 19 | 
 20 | Application code needs to stop using this column. It means that it needs to be deployed first.
 21 | 
 22 | ## Risk 2: partial downtime
 23 | 
 24 | Under heavy load, issuing such an alter without a low `lock_timeout` and retries is a bad idea because this statement
 25 | need to acquire AccessExclusiveLock on the table, and if an attempt to acquire it lasts a significant time (e.g. because
 26 | of existing transaction that holds any lock on this table - it can be a transaction that read a single row from this
 27 | table, or autovacuum processing this table to prevent transaction ID wraparound), then this attempt can be harmful for
 28 | all current queries to this table, since it will be blocking them. This causes partial downtime in projects under load.
 29 | Solution: low `lock_timeout` and retries. An example (more about this and a more advanced example can be found
 30 | in [zero-downtime Postgres schema migrations need this: lock_timeout and retries](https://postgres.ai/blog/20210923-zero-downtime-postgres-schema-migrations-lock-timeout-and-retries)):
 31 | 
 32 | ```sql
 33 | do $do$
 34 | declare
 35 |    lock_timeout constant text := '50ms';
 36 |    max_attempts constant int := 1000;
 37 |    ddl_completed boolean := false;
 38 | begin
 39 | 
 40 |    perform set_config('lock_timeout', lock_timeout, false);
 41 | 
 42 |    for i in 1..max_attempts loop
 43 |       begin
 44 |          execute 'alter table t1 drop column c1';
 45 |          ddl_completed := true;
 46 |          exit;
 47 |       exception
 48 |          when lock_not_available then
 49 |            null;
 50 |       end;
 51 |    end loop;
 52 | 
 53 |    if ddl_completed then
 54 |       raise info 'DDL successfully executed';
 55 |    else
 56 |       raise exception 'DDL execution failed';
 57 |    end if;
 58 | end $do$;
 59 | ```
 60 | 
 61 | Note that in this particular example, subtransactions are implicitly used (the `BEGIN/EXCEPTION WHEN/END` block). Which
 62 | can be a problem in case of very high `XID` growth rate (e.g., many writing transactions) and a long-running
 63 | transaction – this can trigger `SubtransSLRU` contention on standbys (see:
 64 | [PostgreSQL Subtransactions Considered Harmful](https://postgres.ai/blog/20210831-postgresql-subtransactions-considered-harmful)).
 65 | In this case, implement the retry logic at transaction level.
 66 | 
 67 | ## Risk 3: false expectations that the data is deleted
 68 | 
 69 | Finally, when copying data between various environments and removing sensitive data, remember that
 70 | `ALTER TABLE ... DROP COLUMN ...` is not secure, it doesn't remove the data. After column `c1` is dropped, there is
 71 | still information about it in metadata:
 72 | 
 73 | ```sql
 74 | nik=# select attname from pg_attribute where attrelid = 't1'::regclass::oid order by attnum;
 75 |            attname
 76 | ------------------------------
 77 |  tableoid
 78 |  cmax
 79 |  xmax
 80 |  cmin
 81 |  xmin
 82 |  ctid
 83 |  id
 84 |  ........pg.dropped.2........
 85 | (8 rows)
 86 | ```
 87 | 
 88 | A superuser can easily recover it:
 89 | 
 90 | ```sql
 91 | nik=# update pg_attribute
 92 |   set attname = 'c1', atttypid = 20, attisdropped = false
 93 |   where attname = '........pg.dropped.2........';
 94 | UPDATE 1
 95 | nik=# \d t1
 96 |                  Table "public.t1"
 97 |  Column |  Type   | Collation | Nullable | Default
 98 | --------+---------+-----------+----------+---------
 99 |  id     | bigint  |           |          |
100 |  c1     | bigint  |           |          |
101 | ```
102 | 
103 | Some solutions to this problem:
104 | 
105 | - `VACUUM FULL` to rebuild the table after dropping columns. In this case, though a restoration attempt will succeed,
106 |   the data will not be there.
107 | - Consider using restricted users and column-level privileges instead of dropping columns. The columns and data will
108 |   remain, but users will not be able to read it. Of course, this approach wouldn't suit if there is a strict requirement
109 |   to remove data.
110 | - Dump/restore after the column has already been dropped.
111 | 


--------------------------------------------------------------------------------
/0058_how_to_use_docker_to_run_postgres.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1727705412072554585), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to use Docker to run Postgres
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | This howto is for users who use or need to use Postgres, but are not experienced in using Docker.
 11 | 
 12 | Running docker in container for development and testing can help you align the sets of libraries, extensions, software
 13 | versions between multiple environments.
 14 | 
 15 | ## Docker installation – macOS
 16 | 
 17 | Installation using [Homebrew](https://brew.sh):
 18 | 
 19 | ```bash
 20 | brew install docker docker-compose
 21 | ```
 22 | 
 23 | ## Docker installation – Ubuntu
 24 | 
 25 | ```bash
 26 | sudo apt-get update
 27 | sudo apt-get install -y \
 28 |   apt-transport-https \
 29 |   ca-certificates \
 30 |   curl \
 31 |   gnupg-agent \
 32 |   software-properties-common
 33 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
 34 | 
 35 | sudo add-apt-repository -y \
 36 |  "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
 37 | 
 38 | sudo apt-get update && sudo apt-get install -y \
 39 |   docker-ce \
 40 |   docker-ce-cli \
 41 |   http://containerd.io \
 42 |   docker-compose-plugin
 43 | ```
 44 | 
 45 | To avoid the need to use `sudo` to run `docker` commands:
 46 | 
 47 | ```bash
 48 | sudo groupadd docker
 49 | sudo usermod -aG docker $USER
 50 | newgrp docker
 51 | ```
 52 | 
 53 | ## Run Postgres in container with persistent PGDATA
 54 | 
 55 | Assuming we want the data directory (`PGDATA`) be in `~/pgdata` and container named as `pg16`:
 56 | 
 57 | ```bash
 58 | sudo docker run \
 59 |   --detach \
 60 |   --name pg16 \
 61 |   -e POSTGRES_PASSWORD=secret \
 62 |   -v ~/pgdata:/var/lib/postgresql/data \
 63 |   --shm-size=128m \
 64 |   postgres:16
 65 | ```
 66 | 
 67 | ## Check logs
 68 | 
 69 | Last 5 minutes of logs, with timestamps, and observing new coming log entries:
 70 | 
 71 | ```bash
 72 | docker logs --since 5m -tf pg16
 73 | ```
 74 | 
 75 | ## Connect using psql
 76 | 
 77 | ```bash
 78 | ❯ docker exec -it pg16 psql -U postgres -c 'create table t()'
 79 | CREATE TABLE
 80 | 
 81 | ❯ docker exec -it pg16 psql -U postgres -c '\d t'
 82 |                 Table "public.t"
 83 |  Column | Type | Collation | Nullable | Default
 84 | --------+------+-----------+----------+---------
 85 | ```
 86 | 
 87 | For interactive psql, use:
 88 | 
 89 | ```bash
 90 | docker exec -it pg16 psql -U postgres
 91 | ```
 92 | 
 93 | ## Connect any application from outside
 94 | 
 95 | To connect an application from the host machine, we need to map ports. For this, we'll destroy this container, and
 96 | create a new one, with proper port mapping – noting that `PGDATA` persists (the table we created is there):
 97 | 
 98 | ```bash
 99 | ❯ docker stop pg16
100 | pg16
101 | 
102 | ❯ docker rm pg16
103 | pg16
104 | 
105 | ❯ docker run \
106 |   --detach \
107 |   --name pg16 \
108 |   -e POSTGRES_PASSWORD=secret \
109 |   -v ~/pgdata:/var/lib/postgresql/data \
110 |   --shm-size=128m \
111 |   -p 127.0.0.1:15432:5432 \
112 |   postgres:16
113 | 8b5370107e1be7d3fd01a3180999a253c53610ca9ab764125b1512f65e83b927
114 | 
115 | ❯ PGPASSWORD=secret psql -hlocalhost -p15432 -U postgres -c '\d t'
116 | Timing is on.
117 |                 Table "public.t"
118 |  Column | Type | Collation | Nullable | Default
119 | --------+------+-----------+----------+---------
120 | ```
121 | 
122 | ## Custom image with additional extensions
123 | 
124 | For example, here is how we can create our own image, based on the original one, to include `plpython3u` (continuing to
125 | work with the same `PGDATA`)
126 | 
127 | ```bash
128 | docker stop pg16
129 | 
130 | docker rm pg16
131 | 
132 | echo "FROM postgres:16
133 | RUN apt update
134 | RUN apt install -y postgresql-plpython3-16" \
135 | > postgres_plpython3u.Dockerfile
136 | 
137 | sudo docker build \
138 |   -t postgres-plpython3u:16 \
139 |   -f postgres_plpython3u.Dockerfile \
140 |   .
141 | 
142 | docker run \
143 |   --detach \
144 |   --name pg16 \
145 |   -e POSTGRES_PASSWORD=secret \
146 |   -v ~/pgdata:/var/lib/postgresql/data \
147 |   --shm-size=128m \
148 |   postgres-plpython3u:16
149 | 
150 | docker exec -it pg16 \
151 |   psql -U postgres -c 'create extension plpython3u'
152 | ```
153 | 
154 | ## Shared memory
155 | 
156 | If you see an error like this one:
157 | 
158 | ```
159 | > FATAL:  could not resize shared memory segment "/PostgreSQL.12345" to 1048576 bytes: No space left on device1
160 | ```
161 | 
162 | then increase the `--shm-size` value in the `docker run` command.
163 | 
164 | ## How to upgrade Postgres preserving data
165 | 
166 | 1) In-place upgrades:
167 | 
168 |    - Traditional Docker images for Postgres include binaries only for one major version, so running `pg_upgrade` is not
169 |      possible, unless you extend those images
170 |    - Alternatively, you can use images that include multiple binaries, –
171 |      e.g., [Spilo by Zalando](https://github.com/zalando/spilo).
172 | 
173 | 2) Simple dump/restore (here I show how to downgrade assuming there are no incompatibilities; upgrade can be done in the
174 |    same way):
175 | 
176 |     ```bash
177 |     docker exec -it pg16 pg_dumpall -U postgres \
178 |       | bzip2 > dumpall.bz2
179 | 
180 |     docker rm -f pg16
181 | 
182 |     rm -rf ~/pgdata
183 |     mkdir ~/pgdata
184 | 
185 |     docker run \
186 |       --detach \
187 |       --name pg15 \
188 |       -e POSTGRES_PASSWORD=secret \
189 |       -v ~/pgdata:/var/lib/postgresql/data \
190 |       --shm-size=128m \
191 |       postgres:15
192 | 
193 |     bzcat dumpall.bz2  \
194 |       | docker exec -i pg15 psql -U postgres \
195 |     >>dump_load.log \
196 |     2> >(tee -a dump_load.err >&2)
197 |     ```
198 | 


--------------------------------------------------------------------------------
/0059_psql_tuning.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1727982234119090364), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # psql tuning
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | ## .psqlrc
 11 | 
 12 | The file `~/.psqlrc` can be used to set some settings by default. For example:
 13 | 
 14 | ```bash
 15 | echo '\timing on' >> ~/.psqlrc
 16 | ```
 17 | 
 18 | Now if we start `psql`:
 19 | 
 20 | ```sql
 21 | ❯ psql -U postgres
 22 | Timing is on.
 23 | psql (15.5 (Debian 15.5-1.pgdg120+1))
 24 | Type "help" for help.
 25 | 
 26 | nik=# select pg_sleep(.5);
 27 |  pg_sleep
 28 | ----------
 29 | 
 30 | (1 row)
 31 | 
 32 | Time: 508.187 ms
 33 | ```
 34 | 
 35 | ❗**Important:** for scripting involving `psql`, it is a good idea to use the option `-X` – to ignore `~/.psqlrc`
 36 | settings, so the logic (e.g., `psql` output analysis) won't depend on `~/.psqlrc` at all.
 37 | 
 38 | ## pspg
 39 | 
 40 | [pspg](https://github.com/okbob/pspg) is awesome. If you can, install it.
 41 | 
 42 | For example, for the query `select * from pg_class limit 3;`, before installation:
 43 | 
 44 | ![psql ugly output](./files/0059_psql_ugly_output.png)
 45 | 
 46 | After installation:
 47 | 
 48 | ![pspg improved output](./files/0059_pspg_improved_output.png)
 49 | 
 50 | ![pspg menus](./files/0059_pspg_menus.jpg)
 51 | 
 52 | To install:
 53 | 
 54 | - on macOS/Homebrew: `brew install pspg`
 55 | - Ubuntu/Debian: `sudo apt update && sudo apt install -y pspg`
 56 | 
 57 | Then:
 58 | 
 59 | ```bash
 60 | echo "\setenv PAGER pspg
 61 | \pset border 2
 62 | \pset linestyle unicode
 63 | \set x '\setenv PAGER less'
 64 | \set xx '\setenv PAGER \'pspg -bX --no-mouse\''
 65 | " >> ~/.psqlrc
 66 | ```
 67 | 
 68 | ## NULLs
 69 | 
 70 | By default, `NULL`s are invisible in `psql` output:
 71 | 
 72 | ```sql
 73 | nik=# select null;
 74 |  ?column?
 75 | ----------
 76 | 
 77 | (1 row)
 78 | ```
 79 | 
 80 | To fix it (put it to `~/.psqrc` for persistency):
 81 | 
 82 | ```sql
 83 | \pset null 'Ø'
 84 | ```
 85 | 
 86 | Now:
 87 | 
 88 | ```
 89 | nik=# select null;
 90 |  ?column?
 91 | ----------
 92 |  Ø
 93 | (1 row)
 94 | ```
 95 | 
 96 | ## postgres_dba
 97 | 
 98 | [postgres_dba](https://github.com/NikolayS/postgres_dba) is my collection of scripts for `psql`, with menu support:
 99 | 
100 | ![postgres_dba menu support](./files/0059_postgres_dba.jpg)
101 | 


--------------------------------------------------------------------------------
/0061_how_to_create_an_index_part_1.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1728800785889395192), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to create an index, part 1
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Index creation is straightforward:
 11 | 
 12 | ```sql
 13 | create index concurrently on t1(c1);
 14 | ```
 15 | 
 16 | A longer form with explicit index naming:
 17 | 
 18 | ```sql
 19 | create index concurrently i_1 on t1(c1);
 20 | ```
 21 | 
 22 | And even longer, explicitly including both index name and type:
 23 | 
 24 | ```sql
 25 | create index concurrently i_1
 26 | on t1
 27 | using btree(c1);
 28 | ```
 29 | 
 30 | Below are a few "best practices" considerations.
 31 | 
 32 | ## Index names
 33 | 
 34 | When creating indexes:
 35 | 
 36 | 1. Use explicit naming for better control.
 37 | 2. Establish and follow some naming schema. For example, including the names of the table and columns to the index
 38 |    name: `i_table1_col1_col2`. The other properties to consider for inclusion:
 39 |     - is it a regular index or unique?
 40 |     - index type
 41 |     - is it a partial index?
 42 |     - ordering, expressions used
 43 |     - opclasses used
 44 | 
 45 | ## Settings
 46 | 
 47 | 1) `statement_timeout`: If you have `statement_timeout` set globally, unset it in the session where you are building an
 48 |    index:
 49 | 
 50 |    ```sql
 51 |    set statement_timeout to 0;
 52 |    ```
 53 | 
 54 |    Alternatively, you can create a special DB user for index creation and adjust `statement_timeout` for it:
 55 | 
 56 |    ```sql
 57 |    alter user index_creator set statement_timeout to 0;
 58 |    ```
 59 | 
 60 | 2) `maintenance_work_mem`: raise this parameter ([docs](https://postgresqlco.nf/doc/en/param/maintenance_work_mem/)) to
 61 |    hundreds of MiB or a few GiB (on larger systems) to support faster index creation.
 62 | 
 63 | ## CONCURRENTLY
 64 | 
 65 | Always prefer using the option `CONCURRENTLY` unless:
 66 | 
 67 | - you're building an index on a table that is known not to be used yet – e.g., a table that was just created (it is
 68 |   beneficial to avoid `CONCURRENTLY` in this case, to be able to include `CREATE INDEX` in a transaction block together
 69 |   with index creation);
 70 | - you're working with database alone.
 71 | 
 72 | `CONCURRENTLY` will increase index build time, but will handle locking gracefully, not blocking other sessions for long
 73 | time. With this method, an index is built with a careful balance between allowing ongoing access to the table while
 74 | creating a new index, maintaining data integrity and consistency, and minimizing disruptions to normal database
 75 | operations.
 76 | 
 77 | 👉 How it is
 78 | [implemented in PG16](https://github.com/postgres/postgres/blob/c136eb02981566d56e950f12ab7ee4a6ea51d698/src/backend/catalog/index.c#L1443-L1511).
 79 | 
 80 | When this option is used, index creation might fail due to various reasons – for example, if you make attempts to build
 81 | two indexes in parallel, for one of the attempts you'll see something like this:
 82 | 
 83 | ```
 84 | nik=# create index concurrently i_3 on t1 using btree(c1);
 85 | ERROR:  deadlock detected
 86 | DETAIL:  Process 518 waits for ShareLock on virtual transaction 3/459; blocked by process 553.
 87 | Process 553 waits for ShareUpdateExclusiveLock on relation 16402 of database 16401; blocked by process 518.
 88 | HINT:  See server log for query details.
 89 | ```
 90 | 
 91 | In general, Postgres has transaction support for DDL, but for `CREATE INDEX CONCURRENTLY`, it is not so:
 92 | 
 93 | - you cannot include `CREATE INDEX CONCURRENTLY` to a transaction block,
 94 | - if operation fails, it leaves an invalid index behind, so a cleanup is needed.
 95 | 
 96 | ## Cleanup and retries
 97 | 
 98 | Since we know that `CREATE INDEX CONCURRENTLY` might fail, we should be ready to retry, manually or automatically.
 99 | Before retrying, we need to cleanup an invalid index.
100 | Here the use of explicit naming and some schema convention pays off.
101 | 
102 | When cleaning up, also use `CONCURRENTLY`:
103 | 
104 | ```sql
105 | drop index concurrently i_1;
106 | ```
107 | 
108 | ## Progress monitoring
109 | 
110 | How to monitor index creation progress: See
111 | [Day 15: How to monitor CREATE INDEX / REINDEX progress in Postgres 12+](0015_how_to_monitor_index_operations.md).
112 | 
113 | ## ANALYZE
114 | 
115 | Generally, Postgres `autovacuum` maintains statistics for each column up-to-date, running `ANALYZE` for each table whose
116 | content changes enough.
117 | 
118 | After a new index creation, usually there is no need to rebuild statistics if you index columns only.
119 | 
120 | However, if you: build an index on an expression, e.g.:
121 | 
122 | ```sql
123 | create index i_t1_lower_email on t1 (lower(email));
124 | ```
125 | 
126 | Then you should run `ANALYZE` on the table so Postgres gathers statistics for expression:
127 | 
128 | ```sql
129 | analyze verbose t1;
130 | ```
131 | 


--------------------------------------------------------------------------------
/0062_how_to_create_an_index_part_2.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1729152164403249462), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to create an index, part 2
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | See also [Part 1](0061_how_to_create_an_index_part_1.md).
11 | 
12 | In part 1, we've covered the basics of how to build an index. Today we'll discuss parallelization and partitioning
13 | aspects related to index creation. These two parts don't provide instructions on which index type to choose, when to use
14 | partial indexes, indexes on expressions, or multi-column indexes – this will be covered in a separate howto.
15 | 
16 | ## Long index creation and ways to speed it up
17 | 
18 | As already mentioned, too long index build time – say, hours – is not only inconvenient, it prevents `autovacuum` from
19 | processing the table and also holds `xmin` horizon during the whole operation (which means that `autovacuum` cannot
20 | remove freshly-dead tuples in *all* tables in the database).
21 | 
22 | Thus, it is worth improving build time for individual indexes. General ideas:
23 | 
24 | 1. Configuration tuning:
25 | 
26 |    - higher `maintenance_work_mem` as already discussed
27 |      > 🎯 **TODO:** show how with an experiment
28 |    - checkpoint tuning: temporarily raised `max_wal_size` and `checkpoint_timeout` (doesn't require restart) reduces
29 |      checkpoint frequency, which may improve build time
30 |      > 🎯 **TODO:**  an experiment to check it
31 | 
32 | 2. Parallelization – use of multiple backends to speed up the whole operation.
33 | 
34 | 3. Partitioning – splitting table to multiple physical tables reduces time needed to create an individual index.
35 | 
36 | ## Parallel index build
37 | 
38 | The option `max_parallel_maintenance_workers`
39 | (PG11+; see [docs](https://postgresqlco.nf/doc/en/param/max_parallel_maintenance_workers/)) defines the maximum number
40 | of parallel workers for` CREATE INDEX`. Currently, (as of PG16), it works only for building B-tree indexes.
41 | 
42 | The default `max_parallel_maintenance_workers` is `2`, and it can be raised, it doesn't require a restart; can be done
43 | dynamically in session. The maximum depends on two settings:
44 | 
45 | - `max_parallel_workers`, default `8,` can be changed without a restart as well;
46 | - `max_worker_processes`, default `8`, requires a restart to change.
47 | 
48 | Raising `max_parallel_maintenance_workers` can significantly decrease index build time, but this
49 | should be done with proper analysis of CPU and disk IO utilization.
50 | 
51 | > 🎯 **TODO:**  experiment
52 | 
53 | ## Indexes on partitioned tables
54 | 
55 | As was already discussed multiple times in other howtos, large tables (say, those exceeding 100 GiB; not a hard rule)
56 | should be partitioned. Without it, if you have multi-terabyte tables, index creation will take very long time, during
57 | which, `autovacuum` cannot process the table. This leads to higher levels of bloat.
58 | 
59 | It is possible to create indexes for individual partitions. However, it makes sense to consider using the unified
60 | indexing approach for all partitions, and define an index on the partitioned table itself.
61 | 
62 | The `CONCURRENTLY` option cannot be used when creating an index on a partitioned table, it can only be used to index
63 | individual partitions. However, this issue can be solved (PG11+):
64 | 
65 | 1. Create indexes on all partitions separately, with `CONCURRENTLY`:
66 | 
67 |    ```
68 |    create index concurrently i_p_123 on partition_123 ...;
69 |    ```
70 | 
71 | 2. Then create an index on the partitioned table (parent), without `CONCURRENTLY`, and also using the keyword `ONLY` –
72 |    it will be fast since this is not a large table, physically, but it will be marked `INVALID` until the next step is
73 |    fully executed:
74 | 
75 |    ```
76 |    create index i_p_main on only partitioned_table ...;
77 |    ```
78 | 
79 | 3. Then, for every index on individual partitions, mark it as "attached" to the "main" index, using this slightly odd
80 |    syntax (note that here we use index names, not table names):
81 | 
82 |    ```
83 |    alter index i_p_main attach partition i_p_123;
84 |    ```
85 | 
86 | Docs:
87 | [Partition Maintenance](https://postgresql.org/docs/current/ddl-partitioning.html#DDL-PARTITIONING-DECLARATIVE-MAINTENANCE).
88 | 


--------------------------------------------------------------------------------
/0063_how_to_help_others.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1729321940496924977), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to help others
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Two key principles of helping others (it can be internal help to your teammates, or external consulting, doesn't
11 | matter):
12 | 
13 | 1. Rely on good sources.
14 | 2. Test everything.
15 | 
16 | ## Principle 1. Good sources
17 | 
18 | Rely on high-quality sources that you trust. For example, my point of view, ranking the quality/trustworthiness level,
19 | roughly:
20 | 
21 | - PG docs – 9/10
22 | - PG source code – 10/10 (source of truth!)
23 | - StackOverflow random answers – 5/10 or lower (excluding
24 |   [Erwin Brandstetter](https://stackoverflow.com/users/939860/erwin-brandstetter), [Laurenz Albe](https://stackoverflow.com/users/6464308/laurenz-albe), [Peter Eisentraut](https://stackoverflow.com/users/98530/peter-eisentraut) –
25 |   these
26 |   guys rock when they answer there, 8/10 or higher)
27 | - the same for random blog posts
28 | - both "Internals" books by [Suzuki](https://www.interdb.jp/pg/)
29 |   and [Rogov](https://postgrespro.com/blog/pgsql/5969637) – 8/10 or higher
30 | - etc.
31 | 
32 | And (important!) always provide the link to your sources; you get two benefits from this:
33 | 
34 | - advertise good source and pay them back;
35 | - share the responsibility to some extent (very helpful if you are not very experienced yet; everyone might make a
36 |   mistake).
37 | 
38 | ## Principle 2. Verification – database experiments
39 | 
40 | Always doubt everything, don't trust language models, regardless of their nature.
41 | 
42 | If someone (including you or me) says something without verification via an experiment (test), it needs to be fixed –
43 | using an experiment.
44 | 
45 | All decisions should be made based on data – the reliable data is gathered via testing.
46 | 
47 | Most database-related ideas are to be verified using database experiments.
48 | 
49 | Two types of database experiments:
50 | 
51 | 1. Multi-session experiments – full-fledged benchmarks like those conducted using `pgbench`, JMeter, `sysbench`,
52 |    `pgreplay-go`, etc.
53 | 
54 |    They aim to study the behavior of Postgres as a whole, all its components, and must be performed on dedicated
55 |    resources where nobody else is doing any work. Environment should match production (VM and disk type, PG version,
56 |    settings).
57 | 
58 |    Examples of such experiments include load testing, stress testing, performance regression testing. Main tools are
59 |    those that aim to study macro-level query analysis: `pgss`, wait event analysis (aka active session history or
60 |    performance/query insights), `auto_explain`, `pgBadger`, etc.
61 | 
62 |    More about this type of experiments: [Day 13: How to benchmark](0013_how_to_benchmark.md).
63 | 
64 | 2. Single-session experiments – testing one or a sequence of SQL queries using a single session (sometimes, two), to
65 |    check query syntax, study individual query behavior, optimize particular query, etc.
66 | 
67 |    These experiments can be conducted in shared environments, on weaker machines. However, to study query performance,
68 |    you need to have the same PG version, same or similar database, and matching planner settings (how to do it:
69 |    [Day 56: How to make the non-production Postgres planner behave like in production](0056_how_to_imitate_production_planner.md)).
70 |    In this case, it should be kept in mind that
71 |    timing metrics might be significantly off compared to production, and the main attention in the query optimization
72 |    process should be paid to execution plans and data volumes (actual rows, buffer operation counts provided by the
73 |    option `BUFFERS` in `EXPLAIN`).
74 | 
75 |    Examples of such experiments: checking syntax and logical behavior of a sequence of SQL queries, query performance
76 |    analysis `EXPLAIN (ANALYZE, BUFFERS)` and testing optimization ideas, schema change testing, etc. It is very helpful
77 |    to be able to quickly clone large databases not paying extra money for storage (Neon, Amazon Aurora). And it's even
78 |    better if no extra money is paid for both storage and compute, this truly unlocks testing activities including
79 |    automated tests in CI/CD (DBLab Engine [@Database_Lab](https://twitter.com/Database_Lab)).
80 | 
81 | ## Summary
82 | 
83 | As you can see, the principles are extremely simple:
84 | 
85 | - read good papers, and
86 | - don't blindly trust – test everything.
87 | 


--------------------------------------------------------------------------------
/0064_how_to_use_uuid.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1729900720189759801), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to use UUID
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | As of now (PG16, 2023), Postgres implements UUID versions from 1 to 5, based
 11 | on [RFC 4122](https://datatracker.ietf.org/doc/html/rfc4122).
 12 | 
 13 | - Docs: [UUID Data Type](https://postgresql.org/docs/current/datatype-uuid.html)
 14 | - Additional module [uuid-ossp](https://postgresql.org/docs/current/uuid-ossp.html)
 15 | 
 16 | A UUID value can be generated using `get_random_uuid()`, it generates UUID version 4
 17 | ([source code for PG16](https://github.com/postgres/postgres/blob/03749325d16c4215ecd6d6a6fe117d93931d84aa/src/backend/utils/adt/uuid.c#L405-L423)):
 18 | 
 19 | ```sql
 20 | nik=# select gen_random_uuid();
 21 |            gen_random_uuid
 22 | --------------------------------------
 23 |  c027497b-c510-413b-9092-8e6c99cf9596
 24 | (1 row)
 25 | 
 26 | nik=# select gen_random_uuid();
 27 |            gen_random_uuid
 28 | --------------------------------------
 29 |  08e63fed-f883-45d8-9896-8f087074bff5
 30 | (1 row)
 31 | ```
 32 | 
 33 | In standard UUIDs, the version can be understood looking at the first character after the 2nd hyphen:
 34 | 
 35 | <pre>
 36 | 08e63fed-f883-<u style="color: darkmagenta;font-weight: bolder">4</u> ...  👈 this means v4
 37 | </pre>
 38 | 
 39 | The values are coming in a "pseudorandom" order. This has certain negative impact on performance: in a B-tree index,
 40 | inserts happen in various locations, which, in general, affects write performance, as well as performance of Top-N
 41 | reads (selecting N latest rows).
 42 | 
 43 | There is a proposal to implement newer versions of UUID both in RFC and Postgres – v7 provides a time-based UUID that
 44 | includes a millisecond-precision timestamp, sequence number, and additional entropy in the form of random or fixed bits.
 45 | This kind of UUID not only ensures global uniqueness but also preserves the temporal aspect, which can be very
 46 | beneficial for performance.
 47 | 
 48 | - [Commitfest: UUID v7](https://commitfest.postgresql.org/45/4388/)
 49 | - [rfc4122bis proposal](https://datatracker.ietf.org/doc/draft-ietf-uuidrev-rfc4122bis/)
 50 | 
 51 | UUID values are 16-byte – the same as `timestamptz` or `timestamp` values.
 52 | 
 53 | Good materials explaining performance aspects:
 54 | 
 55 | - [The effect of Random UUID on database performance](https://twitter.com/hnasr/status/1695270411481796868) (video, ~19
 56 |   min) by [@hnasr](https://twitter.com/hnasr)
 57 | 
 58 | - [Identity Crisis: Sequence v. UUID as Primary Key](https://brandur.org/nanoglyphs/026-ids#ulids) by
 59 |   [@brandur](https://twitter.com/brandur)
 60 | 
 61 | Since Postgres doesn't support UUID v7 natively yet, there are two options to use them
 62 | 
 63 | - generate on client side
 64 | - implement a helper function in Postgres.
 65 | 
 66 | For the latter approach, here is [SQL function](https://gist.github.com/kjmph/5bd772b2c2df145aa645b837da7eca74)
 67 | (thanks [@DanielVerite](https://twitter.com/DanielVerite)):
 68 | 
 69 | ```sql
 70 | create or replace function uuid_generate_v7() returns uuid
 71 | as $$
 72 |   -- use random v4 uuid as starting point (which has the same variant we need)
 73 |   -- then overlay timestamp
 74 |   -- then set version 7 by flipping the 2 and 1 bit in the version 4 string
 75 | select encode(
 76 |   set_bit(
 77 |     set_bit(
 78 |       overlay(
 79 |         uuid_send(gen_random_uuid())
 80 |         placing substring(int8send(floor(extract(epoch from clock_timestamp()) * 1000)::bigint) from 3)
 81 |         from 1 for 6
 82 |       ),
 83 |       52, 1
 84 |     ),
 85 |     53, 1
 86 |   ),
 87 |   'hex')::uuid;
 88 | $$ language SQL volatile;
 89 | ```
 90 | 
 91 | Examples:
 92 | 
 93 | ```sql
 94 | nik=# select uuid_generate_v7();
 95 |            uuid_generate_v7
 96 | --------------------------------------
 97 |  018c1be3-e485-7252-b80f-76a71843466a
 98 | (1 row)
 99 | 
100 | nik=# select uuid_generate_v7();
101 |            uuid_generate_v7
102 | --------------------------------------
103 |  018c1be3-e767-76b9-93dc-23c0c48be6c7
104 | (1 row)
105 | 
106 | nik=# select uuid_generate_v7();
107 |            uuid_generate_v7
108 | --------------------------------------
109 |  018c1be3-e973-7704-82ad-5967b79cf5c4
110 | (1 row)
111 | ```
112 | 
113 | After a few minutes:
114 | 
115 | ```sql
116 | nik=# select uuid_generate_v7();
117 |            uuid_generate_v7
118 | --------------------------------------
119 |  018c1be8-5002-70ab-96c0-c96ad5afa151
120 | (1 row)
121 | ```
122 | 
123 | A few notes:
124 | 
125 | 1) If you use these value in the `ORDER BY` clause, the chronological order will persist.
126 | 
127 | 2) For the first 3 values (that we generated during a few seconds) there is a common prefix, `018c1be3-e`, and with the
128 |    last value that was generated slightly later, there is common prefix `018c1be`.
129 | 
130 | 3) Note `7` after the second hyphen in all values:
131 |    <pre>
132 |    018c1be3-e973-<u style="color: darkmagenta;font-weight: bolder">7</u>... 👈 this means v7
133 |    </pre>
134 | 
135 | 4) The function returns a value of the UUID type, so it's still 16-byte (while text representation of it would take 36
136 |    characters including hyphens, meaning 40 bytes total with `VARLENA` header):
137 | 
138 |    ```sql
139 |    nik=# select pg_column_size(gen_random_uuid());
140 |     pg_column_size
141 |    ----------------
142 |                 16
143 |    (1 row)
144 | 
145 |    nik=# select pg_column_size(uuid_generate_v7());
146 |     pg_column_size
147 |    ----------------
148 |                 16
149 |    (1 row)
150 |    ```
151 | 


--------------------------------------------------------------------------------
/0066_how_many_tuples_can_be_inserted_in_a_page.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1730609033860858080), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How many tuples can be inserted in a page
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | In Postgres, all tables have hidden, system columns; `ctid` being one of them. Reading it, we can see physical
 11 | location of the tuple (tuple = row physical version), the page number and offset inside it:
 12 | 
 13 | ```sql
 14 | nik=# create table t0 as select 1 as id;
 15 | SELECT 1
 16 | 
 17 | nik=# select ctid, id from t0;
 18 |  ctid  | id
 19 | -------+----
 20 |  (0,1) |  1
 21 | (1 row)
 22 | ```
 23 | 
 24 | 👉 page 0, position 1.
 25 | 
 26 | A single PostgreSQL page, which is 8 KiB by default, and can be checked by looking at `block_size`:
 27 | 
 28 | ```sql
 29 | nik=# show block_size;
 30 |  block_size
 31 | ------------
 32 |  8192
 33 | (1 row)
 34 | ```
 35 | 
 36 | How many tuples can fit into a single page? Let's see:
 37 | 
 38 | ```sql
 39 | nik=# create table t0 as select i
 40 | from generate_series(1, 1000) as i;
 41 | SELECT 1000
 42 | 
 43 | nik=# select count(*)
 44 | from t0
 45 | where (ctid::text::point)[0] = 0;
 46 |  count
 47 | -------
 48 |    226
 49 | (1 row)
 50 | 
 51 | nik=# select pg_column_size(i) from t0 limit 1;
 52 |  pg_column_size
 53 | ----------------
 54 |               4
 55 | (1 row)
 56 | ```
 57 | 
 58 | 👉 If we use 4-byte numbers, then it's 226 tuples. Here I used `(ctid::text::point)[0]` to convert `ctid` value to
 59 | "point" to get the first its component, then (the page number).
 60 | 
 61 | If we use 2-byte numbers or, say, 1-byte `boolean` values (yes, boolean needs 1 byte, not 1 bit), the number is the
 62 | same:
 63 | 
 64 | ```sql
 65 | nik=# drop table t0;
 66 | DROP TABLE
 67 | 
 68 | nik=# create table t0 as select true
 69 | from generate_series(1, 1000) as i;
 70 | SELECT 1000
 71 | 
 72 | nik=# select count(*)
 73 |   from t0
 74 |   where (ctid::text::point)[0] = 0;
 75 |  count
 76 | -------
 77 |    226
 78 | (1 row)
 79 | ```
 80 | 
 81 | Why 226 again? The thing is that, the size of the value doesn't matter here; it just needs to be less or equal to 8
 82 | bytes. For every row, alignment padding adds zeroes, so we'll always have 8 bytes for each row. Math:
 83 | 
 84 | ![floor((8192 - 24) / (4 + 24 + 8)) = 226](./files/0066-formula-1.gif)
 85 | 
 86 | 👉 What we have counted here:
 87 | 
 88 | 1. A single 24-byte page header (`PageHeaderData`).
 89 | 2. N pointers to each tuple – 4 bytes each (`ItemIdData`).
 90 | 3. N tuple headers – 23 bytes each, padded to 24 (`HeapTupleHeaderData`).
 91 | 4. N tuple values – if <= 8 bytes, then it's padded to 8 bytes.
 92 | 
 93 | Source code defining the
 94 | structures (for [PG16](https://github.com/postgres/postgres/blob/REL_16_STABLE/src/include/storage/bufpage.h)).
 95 | 
 96 | **Can we fit even MORE tuples?**
 97 | 
 98 | The answer is YES. Postgres allows tables without columns (!)  In this case, the math is:
 99 | 
100 | ![floor((8192 - 24) / (4 + 24)) = 291](./files/0066-formula-2.gif)
101 | 
102 | Let's see (note the empty column list in the `SELECT` clause):
103 | 
104 | ```sql
105 | nik=# create table t0 as select
106 | from generate_series(1, 1000) as i;
107 | SELECT 1000
108 | 
109 | nik=# select count(*)
110 | from t0
111 | where (ctid::text::point)[0] = 0;
112 |  count
113 | -------
114 |    291
115 | (1 row)
116 | ```
117 | 


--------------------------------------------------------------------------------
/0068_psql_shortcuts.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1731340441403215959), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # psql shortcuts
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | In my tool [postgres_dba](https://github.com/NikolayS/postgres_dba/), the setup instructions have this:
11 | 
12 | ```bash
13 | printf "%s %s %s %s\n" \\set dba \'\\\\i $(pwd)/start.psql\' >> ~/.psqlrc
14 | ```
15 | 
16 | This provides a way to call `start.psql` by just typing `:dba` in psql, and this line works in both bash and zsh (which
17 | is good because macOS switched to zsh as the default shell a few years ago, while bash is usually the default shell on
18 | Linux distros).
19 | 
20 | One can easily add their own instructions to `.psqlrc`, defining various handy shortcuts. For example:
21 | 
22 | ```sql
23 | \set pid 'select pg_backend_pid() as pid;'
24 | \set prim 'select not pg_is_in_recovery() as is_primary;'
25 | \set a 'select state, count(*) from pg_stat_activity where pid <> pg_backend_pid() group by 1 order by 1;'
26 | ```
27 | 
28 | 👉 This adds simple shortcuts:
29 | 
30 | 1) Current PID:
31 | 
32 |     ```sql
33 |     nik=# :pid
34 |       pid
35 |     --------
36 |      513553
37 |     (1 row)
38 |     ```
39 | 
40 | 2) Is this not a primary?
41 | 
42 |     ```sql
43 |     nik=# :prim
44 |      is_primary
45 |     ------------
46 |      t
47 |     (1 row)
48 |     ```
49 | 
50 | 3) Simple activity summary
51 | 
52 |     ```sql
53 |     nik=# :a
54 |             state        | count
55 |     ---------------------+-------
56 |      active              |    19
57 |      idle                |   193
58 |      idle in transaction |     2
59 |                          |     7
60 |     (4 rows)
61 |     ```
62 | 
63 | The value of a currently set psql variable can be passed in an SQL context as a string using this interesting syntax:
64 | 
65 | ```sql
66 | nik=# select :'pid';
67 |             ?column?
68 | ---------------------------------
69 |  select pg_backend_pid() as pid;
70 | (1 row)
71 | ```
72 | 
73 | It is important not to forget to use the option `-X` (` --no-psqlrc`) in scripting, so nothing that you put in `.psqlrc`
74 | would affect the logic of your scripts.
75 | 


--------------------------------------------------------------------------------
/0069_howd_tod_addd_ad_checkd_constraintd_withoutd_downtime.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1731707060193312953), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to add a CHECK constraint without downtime
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Adding `CHECK` constraints can be helpful to:
11 | 
12 | - maintain better data quality
13 | - define a `NOT NULL` constraint without downtime in PG12+
14 |   (more [here](0060_how_to_add_a_column.md?ref_type=heads#not-null))
15 | 
16 | To add a `CHECK` constraint without downtime, we need:
17 | 
18 | 1. Quickly define a constraint with the flag `NOT VALID`
19 | 2. In a separate transaction, "validate" the constraint for existing rows.
20 | 
21 | ## Adding CHECK with NOT VALID
22 | 
23 | Example:
24 | 
25 | ```sql
26 | alter table t
27 | add constraint c_id_is_positive
28 | check (id > 0) not valid;
29 | ```
30 | 
31 | This requires a very brief `AccessExclusiveLock` lock, so on loaded systems, the command has to be executed with low
32 | `lock_timeout` and retries (read:
33 | [Zero-downtime Postgres schema migrations need this: lock_timeout and retries](https://postgres.ai/blog/20210923-zero-downtime-postgres-schema-migrations-lock-timeout-and-retries)).
34 | 
35 | 🖋️ **Important:** Once the constraint with `NOT VALID` is in place, the new writes are checked (while old rows have not
36 | been yet verified and some of them might violate the constraint):
37 | 
38 | ```sql
39 | nik=# insert into t select -1;
40 | ERROR:  new row for relation "t" violates check constraint "c_id_is_positive"
41 | DETAIL:  Failing row contains (-1).
42 | ```
43 | 
44 | ## Validation
45 | 
46 | To complete the process, we need to validate the old rows:
47 | 
48 | ```sql
49 | alter table t
50 | validate constraint c_id_is_positive;
51 | ```
52 | 
53 | This scans whole table, so for a large table, it takes long time – but this query only
54 | acquires `ShareUpdateExclusiveLock` on the table, not blocking the sessions that run DML queries. However, a lock
55 | acquisition attempt is going to be blocked if there is `autovacuum` running in the transaction ID wraparound prevention
56 | mode and processing the table, or if there is another session that builds an index on this table or performs
57 | another `ALTER` – so we need to make sure none of these heavier operations are happening before we run our `ALTER`, to
58 | avoid excessive wait time.
59 | 


--------------------------------------------------------------------------------
/0070_how_to_add_a_foreign_key.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1732056107483636188), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to add a foreign key
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Adding a foreign key (FK) is straightforward:
 11 | 
 12 | ```sql
 13 | alter table messages
 14 | add constraint fk_messages_users
 15 | foreign key (user_id)
 16 | references users(id);
 17 | ```
 18 | 
 19 | However, this operation requires locks on both tables involved:
 20 | 
 21 | 1. `ShareRowExclusiveLock`, `RowShareLock`, and `AccessShareLock` on the referenced table, in this example it's
 22 |    `users`  (plus `AccessShareLock` on its primary key, PK). This blocks any data modifications to `users` (`UPDATE`,
 23 |    `DELETE`, `INSERT`), as well as DDL.
 24 | 
 25 | 2. `ShareRowExclusiveLock` and `AccessShareLock` to the referencing table, in this example `messages`
 26 |    (plus, `AccessShareLock` to its PK). Again, this blocks writes to this table, and DDL.
 27 | 
 28 | And to ensure that the existing data doesn't violate the constraint, full table scans are needed – so the more data the
 29 | tables have, the longer this implicit scan is going to take. During which, the locks are going to block all writes and
 30 | DDL to the table.
 31 | 
 32 | To avoid downtime, we need to create the FK in three steps:
 33 | 
 34 | 1. Quickly define a constraint with the flag `NOT VALID`.
 35 | 2. For the existing data, if needed, fix rows that would break the FK.
 36 | 3. In a separate transaction, `validate` the constraint for existing rows.
 37 | 
 38 | ## Step 1: Add FK with NOT VALID
 39 | 
 40 | Example:
 41 | 
 42 | ```sql
 43 | alter table messages
 44 | add constraint fk_messages_users
 45 | foreign key (user_id)
 46 | references users(id)
 47 | not valid;
 48 | ```
 49 | 
 50 | This requires a very brief `ShareRowExclusiveLock` and `AccessShareLock` on both tables, so on loaded systems, it is
 51 | still recommended to execute this with low `lock_timeout` and retries (read:
 52 | [Zero-downtime database schema migrations](https://postgres.ai/blog/20210923-zero-downtime-postgres-schema-migrations-lock-timeout-and-retries)),
 53 | to avoid lock queue blocking writes to the tables.
 54 | 
 55 | 🖋️ **Important:** once the constraint with `NOT VALID` is in place, new writes are checked (while old rows have not
 56 | been yet verified and some of them might violate the constraint):
 57 | 
 58 | ```sql
 59 | nik=# \d messages
 60 |               Table "public.messages"
 61 |  Column  |  Type  | Collation | Nullable | Default
 62 | ---------+--------+-----------+----------+---------
 63 |  id      | bigint |           | not null |
 64 |  user_id | bigint |           |          |
 65 | Indexes:
 66 |     "messages_pkey" PRIMARY KEY, btree (id)
 67 | Foreign-key constraints:
 68 |     "fk_messages_users" FOREIGN KEY (user_id) REFERENCES users(id) NOT VALID
 69 | 
 70 | nik=# insert into messages(id, user_id) select 1, -1;
 71 | ERROR:  insert or update on table `messages` violates foreign key constraint "fk_messages_users"
 72 | DETAIL:  Key (user_id)=(-1) is not present in table `users`.
 73 | ```
 74 | 
 75 | ## Step 2: Fix existing data if needed
 76 | 
 77 | Now, with the FK created with `NOT VALID`, we know that Postgres already checks all the new data against the new
 78 | constraint, but for the old data, some rows might still be violating it. Before the next step, it makes sense to ensure
 79 | there are no old rows violating our new FK. It can be done using this query:
 80 | 
 81 | ```sql
 82 | select id
 83 | from messages
 84 | where
 85 |   user_id not in (
 86 |     select id from users
 87 |   );
 88 | ```
 89 | 
 90 | This query scans the whole `messages` table, so it will take significant time. It is worth ensuring that `users` is
 91 | accessed via PK here (depends on the data volumes and planner settings).
 92 | 
 93 | The rows identified by this query will block the next step, so they need to either be deleted or changed to avoid the FK
 94 | violation.
 95 | 
 96 | ## Step 3: Validation
 97 | 
 98 | To complete the process, we need to `validate` the old rows in a separate transaction:
 99 | 
100 | ```sql
101 | alter table messages
102 | validate constraint fk_messages_users;
103 | ```
104 | 
105 | If the tables are large, this `ALTER` is going to take significant time. However, it only acquires
106 | `ShareUpdateExclusiveLock` and `AccessShareLock` on the referencing table (`messages` in this example).
107 | 
108 | Therefore, it doesn't block `UPDATE` / `DELETE` / `INSERT`, but it conflicts with DDL and `VACUUM` runs. On the
109 | referenced table (`users` here), `AccessShareLock` and `RowShareLock` are acquired.
110 | 
111 | As usual, if `autovacuum` processes this table in the transaction ID wraparound prevention mode, it won't yield – so
112 | before running this, make sure there is no `autovacuum` running in this mode or DDL in progress.
113 | 


--------------------------------------------------------------------------------
/0072_how_to_remove_a_foreign_key.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1732855546141933694), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to remove a foreign key
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Removing a foreign key (FK) is straightforward:
11 | 
12 | ```
13 | alter table messages
14 | drop constraint fk_messages_users;
15 | ```
16 | 
17 | However, under heavy load, this is not a safe operation, due to the reasons discussed in
18 | [zero-downtime Postgres schema migrations need this: lock_timeout and retries](https://postgres.ai/blog/20210923-zero-downtime-postgres-schema-migrations-lock-timeout-and-retries):
19 | 
20 | - this operation needs a brief `AccessExclusiveLock` on *both* tables
21 | - if at least one of the locks cannot be quickly acquired, it waits, potentially blocking other sessions (including
22 |   `SELECT`s), to both tables.
23 | 
24 | To solve this problem, we need to use a low lock_timeout and retries:
25 | 
26 | ```sql
27 | set lock_timeout to '100ms';
28 | alter ... -- be ready to fail and try again.
29 | ```
30 | 
31 | The same technique needs to be used in the first step of the operation of new FK creation, when we define a new FK with
32 | the `NOT VALID` option, as was discussed in [day 70, How to add a foreign key](0070_how_to_add_a_foreign_key.md).
33 | 
34 | See also: [day 71, How to understand what's blocking DDL](0071_how_to_understand_what_is_blocking_ddl.md).
35 | 


--------------------------------------------------------------------------------
/0073_how_to_analyze_heavyweight_locks_part_3_persistent_monitoring.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1733252574479040950), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to analyze heavyweight locks, part 3. Persistent monitoring
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | If a spike of active sessions (`pg_stat_activity`, `state='active'`) occurs along with a spike of exclusive locks
11 | (`pg_locks`, `mode ~* 'exclusive'`), we need to analyze the blocked and blocking sessions, to understand the root cause.
12 | 
13 | Ad-hoc approaches were previously discussed:
14 | 
15 | - [Day 22, How to analyze heavyweight locks, part 1](0022_how_to_analyze_heavyweight_locks_part_1.md)
16 | - [Day 42, How to analyze heavyweight locks, part 2: Lock trees (a.k.a. "lock queues", "wait queues", "blocking chains")](0042_how_to_analyze_heavyweight_locks_part_2.md)
17 | - Specifically for the cases, when DDL is
18 |   blocked: [Day 71, How to understand what's blocking DDL](0071_how_to_understand_what_is_blocking_ddl.md)
19 | 
20 | However, an ad-hoc approach is not helpful if the incident is already over. A big part of this is because even with
21 | `log_lock_waits = 'on'`, Postgres logs report only the "victim" (blocked session) query text (in the `STATEMENT` part of
22 | log entries), with only `PID` information available about the blocker session.
23 | 
24 | To troubleshoot past incidents and understand trends, we need lock analysis to be implemented in monitoring, providing
25 | details about the blocked and blocking sessions.
26 | 
27 | Here is a query that [@VKukharik](https://twitter.com/VKukharik) developed for
28 | the [pgwatch2 - Postgres.ai Edition](https://hub.docker.com/r/postgresai/pgwatch2). It doesn't involve
29 | the function `pg_blocking_pids()` because it would be unsafe due to possible observer effect.
30 | 
31 | ```sql
32 | with sa_snapshot as (
33 |   select *
34 |   from pg_stat_activity
35 |   where
36 |     datname = current_database()
37 |     and pid <> pg_backend_pid()
38 |     and state <> 'idle'
39 | )
40 | select
41 |   (extract(epoch from now()) * 1e9)::bigint as epoch_ns,
42 |   waiting.pid as waiting_pid,
43 |   waiting_stm.usename::text as tag_waiting_user,
44 |   waiting_stm.application_name::text as tag_waiting_appname,
45 |   waiting.mode as waiting_mode,
46 |   waiting.locktype as waiting_locktype,
47 |   waiting.relation::regclass::text as tag_waiting_table,
48 |   waiting_stm.query as waiting_query,
49 |   (extract(epoch from (now() - waiting_stm.state_change)) * 1000)::bigint as waiting_ms,
50 |   blocker.pid as blocker_pid,
51 |   blocker_stm.usename::text as tag_blocker_user,
52 |   blocker_stm.application_name::text as tag_blocker_appname,
53 |   blocker.mode as blocker_mode,
54 |   blocker.locktype as blocker_locktype,
55 |   blocker.relation::regclass::text as tag_blocker_table,
56 |   blocker_stm.query as blocker_query,
57 |   (extract(epoch from (now() - blocker_stm.xact_start)) * 1000)::bigint as blocker_tx_ms
58 | from pg_catalog.pg_locks as waiting
59 | join sa_snapshot as waiting_stm on waiting_stm.pid = waiting.pid
60 | join pg_catalog.pg_locks as blocker on
61 |   waiting.pid <> blocker.pid
62 |   and blocker.granted
63 |   and waiting.database = blocker.database
64 |   and (
65 |     waiting.relation  = blocker.relation
66 |     or waiting.transactionid = blocker.transactionid
67 |   )
68 | join sa_snapshot as blocker_stm on blocker_stm.pid = blocker.pid
69 | where not waiting.granted;
70 | ```
71 | 
72 | With this added to monitoring, here is what can be observed there during incidents involving lock contention spikes:
73 | 
74 | ![](./files/0073_01.jpg)
75 | 
76 | And it has all the details available for both blocked and blocking sessions, supporting the troubleshooting and
77 | root-cause analysis activities:
78 | 
79 | ![](./files/0073_02.jpg)
80 | 
81 | ![](./files/0073_03.jpg)
82 | 


--------------------------------------------------------------------------------
/0074_how_to_flush_caches.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1733652860435640705), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to flush caches (OS page cache and Postgres buffer pool)
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | For experiments, it is important to take into account the state of caches – Postgres buffer pool (size of which is
11 | controlled by `shared_buffers`) and OS page cache. If we decide to start each experiment run with cold caches, we need
12 | to flush them.
13 | 
14 | ## Flushing Postgres buffer pool
15 | 
16 | To flush Postgres buffer pool, restart Postgres.
17 | 
18 | To analyze the current state of the buffer pool,
19 | use [pg_buffercache](https://postgresql.org/docs/current/pgbuffercache.html).
20 | 
21 | ## Flushing OS page cache
22 | 
23 | To flush Linux page cache:
24 | 
25 | ```bash
26 | sync
27 | echo 3 > /proc/sys/vm/drop_caches
28 | ```
29 | 
30 | To see the current state of RAM consumption (in MiB) in Linux:
31 | 
32 | ```bash
33 | free -m
34 | ```
35 | 
36 | On macOS, to flush the page cache:
37 | 
38 | ```bash
39 | sync
40 | sudo purge
41 | ```
42 | 
43 | To see the current state of RAM on macOS:
44 | 
45 | ```bash
46 | vm_stat
47 | ```
48 | 


--------------------------------------------------------------------------------
/0078_estimate_yoy_table_growth.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1735568654996283501), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to estimate the YoY growth of a very large table using row creation timestamps and the planner statistics
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Assume we have a 10 TiB table created many years ago, partitioned or not, like this one:
11 | 
12 | ```sql
13 | create table t (
14 |   id int8 primary key,   -- of course, not int4
15 |   created_at timestamptz default now(),
16 |   ...
17 | );
18 | ```
19 | 
20 | and we need to quickly understand how the table grew year over year, assuming that no rows were deleted (or only a
21 | negligible amount). So, we just need to count rows for every year.
22 | 
23 | A straightforward approach would be:
24 | 
25 | ```sql
26 | select
27 |   date_trunc('year', created_at) as year,
28 |   count(*)
29 | from t
30 | group by year
31 | order by year;
32 | ```
33 | 
34 | However, for a 10 TiB table, we'd need to wait many hours, if not days, for this analysis to complete.
35 | 
36 | Here is fast, but not a precise way to get the row counts for each year (assuming the table has up-to-date stats; if in
37 | doubt, run `ANALYZE` on it first):
38 | 
39 | ```sql
40 | do $$
41 | declare
42 |   table_fqn text := 'public.t';
43 |   year_start int := 2000;
44 |   year_end int := extract(year from now())::int;
45 |   year int;
46 |   explain_json json;
47 | begin
48 |   for year in year_start..year_end loop
49 |     execute format(
50 |       $e$
51 |         explain (format json) select *
52 |         from %s
53 |         where created_at
54 |         between '%s-01-01' and '%s-12-31'
55 |       $e$,
56 |       table_fqn,
57 |       year,
58 |       year
59 |     ) into explain_json;
60 | 
61 |     raise info 'Year: %, Estimated rows: %',
62 |       year,
63 |       explain_json->0->'Plan'->>'Plan Rows';
64 |   end loop;
65 | end $$;
66 | ```
67 | 


--------------------------------------------------------------------------------
/0079_rebuild_indexes_without_deadlocks.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1735204785232715997), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to rebuild many indexes using many backends avoiding deadlocks
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Sometimes we need to reindex many indexes – or all of them – and want to do it faster.
11 | 
12 | For example, this makes sense after upgrading from pre-14 Postgres to version 14+, when we want to rebuild all B-tree
13 | indexes to benefit from optimizations that reduce bloat growth rates.
14 | 
15 | We might decide to use a single session and higher value of
16 | [max_parallel_maintenance_workers](https://postgresqlco.nf/doc/en/param/max_parallel_maintenance_workers/), processing
17 | one index at a time. But if we have powerful resources (a lot of vCPUs and fast disks), then the maximal value of
18 | `max_parallel_maintenance_workers` may not be enough to move as fast as we can (changing
19 | `max_parallel_maintenance_workers` doesn't require a restart, but we cannot use more than `max_worker_processes`
20 | workers, and changing that requires a restart). In this case, it may make sense to process multiple indexes in parallel,
21 | using `REINDEX INDEX CONCURRENTLY`.
22 | 
23 | But in this case, indexes need to be processed in proper order. The problem is that if you attempt to rebuild two
24 | indexes belonging to the same table in parallel, a deadlock will be detected, and one of the sessions will fail:
25 | 
26 | ```sql
27 | nik=# reindex index concurrently t1_hash_record_idx3;
28 | ERROR:  deadlock detected
29 | DETAIL:  Process 40 waits for ShareLock on virtual transaction 4/2506; blocked by process 1313.
30 | Process 1313 waits for ShareUpdateExclusiveLock on relation 16634 of database 16401; blocked by process 40.
31 | HINT:  See server log for query details.
32 | ```
33 | 
34 | To address this, we can use this approach:
35 | 
36 | 1. Decide how many reindexing sessions you want to use, taking into account `max_parallel_maintenance_workers` and the
37 |    planned resource utilization / saturation risks (CPU and disk IO).
38 | 
39 | 2. Assuming we want to use N reindexing sessions, build the full list of indexes, with the table names they belong to,
40 |    and "assign" each table to a particular reindexing session. See the query below that does it.
41 | 
42 | 3. Using this "assignment", divide the whole list of indexes to N separate lists, so all the indexes for a particular
43 |    table are present only in a single list – and now we can just run N sessions using these N lists.
44 | 
45 | For the step 2, here is a query that can help:
46 | 
47 | ```sql
48 | \set NUMBER_OF_SESSIONS 10
49 | 
50 | select
51 |   format('%I.%I', n.nspname, c.relname) as table_fqn,
52 |   format('%I.%I', n.nspname, i.relname) as index_fqn,
53 |   mod(
54 |     hashtext(format('%I.%I', n.nspname, c.relname)) & 2147483647,
55 |     :NUMBER_OF_SESSIONS
56 |   ) as session_id
57 | from
58 |   pg_index idx
59 |   join pg_class c on idx.indrelid = c.oid
60 |   join pg_class i on idx.indexrelid = i.oid
61 |   join pg_namespace n on c.relnamespace = n.oid
62 | where
63 |   n.nspname not in ('pg_catalog', 'pg_toast', 'information_schema')
64 |   -- and ... additional filters if needed
65 | order by
66 |   table_fqn, index_fqn;
67 | ```
68 | 


--------------------------------------------------------------------------------
/0080_how_to_find_int4_pks_with_out_of_range_risks.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1735925615185002701), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to find int4 PKs with out-of-range risks in a large database
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Modern ORMs like Rails or Django use int8 (bigint) primary keys (PKs) today. However, in older projects, there may be
 11 | old tables with int4 (integer, int, serial) PKs, that have grown and have risks of int4 overflow –
 12 | [max value for int4 is 2,147,483,647](https://postgresql.org/docs/current/datatype-numeric.html),
 13 | and PK conversion int4->int8 without downtime is not a trivial task (TODO: cover it in another howto).
 14 | 
 15 | Here is how we can quickly check if there are tables with int2 or int4 PKs and how much of the "capacity" has been used
 16 | in each case (postgres-checkup query):
 17 | 
 18 | ```sql
 19 | do $$
 20 | declare
 21 |   min_relpages int8 := 0; -- in very large DBs, skip small tables by setting this to 100
 22 |   rec record;
 23 |   out text := '';
 24 |   out1 json;
 25 |   i numeric := 0;
 26 |   val int8;
 27 |   ratio numeric;
 28 |   sql text;
 29 | begin
 30 |   for rec in
 31 |     select
 32 |       c.oid,
 33 |       spcname as tblspace,
 34 |       nspname as schema_name,
 35 |       relname as table_name,
 36 |       t.typname,
 37 |       pg_get_serial_sequence(format('%I.%I', nspname, relname), attname) as seq,
 38 |       min(attname) as attname
 39 |     from pg_index i
 40 |     join pg_class c on c.oid = i.indrelid
 41 |     left join pg_tablespace tsp on tsp.oid = reltablespace
 42 |     left join pg_namespace n on n.oid = c.relnamespace
 43 |     join pg_attribute a on
 44 |       a.attrelid = i.indrelid
 45 |       and a.attnum = any(i.indkey)
 46 |     join pg_type t on t.oid = atttypid
 47 |     where
 48 |       i.indisprimary
 49 |       and (
 50 |         c.relpages >= min_relpages
 51 |         or pg_get_serial_sequence(format('%I.%I', nspname, relname), attname) is not null
 52 |       ) and t.typname in ('int2', 'int4')
 53 |       and nspname <> 'pg_toast'
 54 |       group by 1, 2, 3, 4, 5, 6
 55 |       having count(*) = 1 -- skip PKs with 2+ columns (TODO: analyze them too)
 56 |   loop
 57 |     raise debug 'table: %', rec.table_name;
 58 | 
 59 |     if rec.seq is null then
 60 |       sql := format('select max(%I) from %I.%I;', rec.attname, rec.schema_name, rec.table_name);
 61 |     else
 62 |       sql := format('select last_value from %s;', rec.seq);
 63 |     end if;
 64 | 
 65 |     raise debug 'sql: %', sql;
 66 |     execute sql into val;
 67 | 
 68 |     if rec.typname = 'int4' then
 69 |       ratio := (val::numeric / 2^31)::numeric;
 70 |     elsif rec.typname = 'int2' then
 71 |       ratio := (val::numeric / 2^15)::numeric;
 72 |     else
 73 |       assert false, 'unreachable point';
 74 |     end if;
 75 | 
 76 |     if ratio > 0.1 then -- report only if > 10% of capacity is reached
 77 |       i := i + 1;
 78 | 
 79 |       out1 := json_build_object(
 80 |         'table', (
 81 |           coalesce(nullif(quote_ident(rec.schema_name), 'public') || '.', '')
 82 |           || quote_ident(rec.table_name)
 83 |         ),
 84 |         'pk', rec.attname,
 85 |         'type', rec.typname,
 86 |         'current_max_value', val,
 87 |         'capacity_used_percent', round(100 * ratio, 2)
 88 |       );
 89 | 
 90 |       raise debug 'cur: %', out1;
 91 | 
 92 |       if out <> '' then
 93 |         out := out || e',\n';
 94 |       end if;
 95 | 
 96 |       out := out || format('  "%s": %s', rec.table_name, out1);
 97 |     end if;
 98 |   end loop;
 99 | 
100 |   raise info e'{\n%\n}', out;
101 | end;
102 | $$ language plpgsql;
103 | ```
104 | 
105 | Output example:
106 | 
107 | ```json
108 | INFO:  {
109 |   "oldbig": {"table" : "oldbig", "pk" : "id", "type" : "int4", "current_max_value" : 2107480000, "capacity_used_percent" : 98.14},
110 |   "oldbig2": {"table" : "oldbig2", "pk" : "id", "type" : "int4", "current_max_value" : 1107480000, "capacity_used_percent" : 51.57}
111 | }
112 | ```
113 | 


--------------------------------------------------------------------------------
/0081_how_to_plot_graphs_right_in_psql_on_macos_iterm2.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1736301721155191218), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to plot graphs right in psql on macOS (iTerm2)
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | If you, like me, work with Postgres mostly in psql, you might want to be able to plot a simple graph not leaving psql.
11 | 
12 | The recipe below was originally described by
13 | [Alexander Korotkov](https://akorotkov.github.io/blog/2016/06/09/psql-graph/),
14 | I [slightly adjusted it](https://gist.github.com/NikolayS/d5f1af808f7275dc1491c37fb1e2dd11) to work with Python3. The
15 | recipe works thanks to [iTerm2's Inline Images Protocol](https://iterm2.com/documentation-images.html). For Linux, there
16 | are various ways to achieve similar results
17 | (see, for
18 | example, [How do I make my terminal display graphical pictures?](https://askubuntu.com/questions/97542/how-do-i-make-my-terminal-display-graphical-pictures)).
19 | 
20 | 1) Get the plotting script and install matplotlib:
21 | 
22 |    ```bash
23 |    wget \
24 |      -O ~/pg_graph.py \
25 |      https://gist.githubusercontent.com/NikolayS/d5f1af808f7275dc1491c37fb1e2dd11/raw/4f19a23222a6f7cf66eead3cae9617dd39bf07a5/pg_graph
26 | 
27 |    pip install matplotlib
28 |    ```
29 | 
30 | 2) Define a macro in ~/.psqlrc (this line should work in bash, zsh, and csh):
31 | 
32 |    ```bash
33 |    printf "%s %s %s %s %s %s\n" \\set graph \'\\\\g \|
34 |    python3 $(pwd)/pg_graph.py\' \
35 |      >> ~/.psqlrc
36 |    ```
37 | 
38 | 3) Start psql and try it out
39 | 
40 |    ```sql
41 |    nik=# with avg_temp(month, san_diego, vancouver, london) as (
42 |        values
43 |          ('Jan', 15, 4, 5),
44 |          ('Feb', 16, 5, 6),
45 |          ('Mar', 17, 7, 8),
46 |          ('Apr', 18, 10, 11),
47 |          ('May', 19, 14, 15),
48 |          ('Jun', 21, 17, 17),
49 |          ('Jul', 24, 20, 19),
50 |          ('Aug', 25, 21, 20),
51 |          ('Sep', 23, 18, 17),
52 |          ('Oct', 21, 12, 13),
53 |          ('Nov', 18, 8, 8),
54 |          ('Dec', 16, 5, 6)
55 |      )
56 |      select * from avg_temp;
57 | 
58 |     month | san_diego | vancouver | london
59 |    -------+-----------+-----------+--------
60 |     Jan   |        15 |         4 |      5
61 |     Feb   |        16 |         5 |      6
62 |     Mar   |        17 |         7 |      8
63 |     Apr   |        18 |        10 |     11
64 |     May   |        19 |        14 |     15
65 |     Jun   |        21 |        17 |     17
66 |     Jul   |        24 |        20 |     19
67 |     Aug   |        25 |        21 |     20
68 |     Sep   |        23 |        18 |     17
69 |     Oct   |        21 |        12 |     13
70 |     Nov   |        18 |         8 |      8
71 |     Dec   |        16 |         5 |      6
72 |    (12 rows)
73 | 
74 |    nik=# :graph
75 |    ```
76 | 
77 |    Result:
78 | 
79 |    ![Graph result](files/0081_01.png)
80 | 


--------------------------------------------------------------------------------
/0082_how_to_draw_frost_patterns_using_sql.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1736637759073517684), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to draw frost patterns using SQL ❄️
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | This original idea and implementation was done by Kirill Borovikov (kilor) – I just reformatted and slightly tuned it,
 11 | extending the character set.
 12 | 
 13 | Here is the query:
 14 | 
 15 | ```sql
 16 | with recursive t as (
 17 |   select
 18 |     0 as x,
 19 |     0 as y,
 20 |     '{"{0,0}"}'::text[] as c,
 21 |     0 as i
 22 | 
 23 |   union all
 24 | 
 25 |   (
 26 |     with z as (
 27 |       select
 28 |         dn.x,
 29 |         dn.y,
 30 |         t.c,
 31 |         t.i
 32 |       from t,
 33 |       lateral (
 34 |         select
 35 |           ((random() * 2 - 1) * 100)::integer as x,
 36 |           ((random() * 2 - 1) * 100)::integer as y
 37 |       ) as p,
 38 |       lateral (
 39 |         select *
 40 |         from (
 41 |           select
 42 |             (unnest::text[])[1]::integer as x,
 43 |             (unnest::text[])[2]::integer as y
 44 |           from unnest(t.c::text[])
 45 |         ) as t
 46 |         order by sqrt((x - p.x) ^ 2 + (y - p.y) ^ 2)
 47 |         limit 1
 48 |       ) as n,
 49 |       lateral (
 50 |         select
 51 |           n.x + dx as x,
 52 |           n.y + dy as y
 53 |         from
 54 |           generate_series(-1, 1) as dx,
 55 |           generate_series(-1, 1) as dy
 56 |         where (dx, dy) <> (0, 0)
 57 |         order by
 58 |           case
 59 |             when (p.x, p.y) = (n.x, n.y) then 0
 60 |             else abs(
 61 |               acos(
 62 |                 ((p.x - n.x) * dx + (p.y - n.y) * dy)
 63 |                 / sqrt((p.x - n.x) ^ 2 + (p.y - n.y) ^ 2)
 64 |                 / sqrt(dx ^ 2 + dy ^ 2)
 65 |               )
 66 |             )
 67 |           end
 68 |         limit 1
 69 |       ) as dn
 70 |     )
 71 |     select
 72 |       z.x,
 73 |       z.y,
 74 |       z.c || array[z.x, z.y]::text,
 75 |       z.i + 1
 76 |     from z
 77 |     where z.i < (1 << 10)
 78 |   )
 79 | ),
 80 | map as (
 81 |   select
 82 |     gx as x,
 83 |     gy as y,
 84 |     (
 85 |       select sqrt((gx - T.x) ^ 2 + (gy - T.y) ^ 2) v
 86 |       from t
 87 |       order by v
 88 |       limit 1
 89 |     ) as v
 90 |   from
 91 |     generate_series(-40, 40) as gx,
 92 |     generate_series(-30, 30) as gy
 93 | ),
 94 | gr as (
 95 |   select regexp_split_to_array('@%#*+=-:. ', '') as s
 96 | )
 97 | select
 98 |   string_agg(
 99 |     coalesce(s[(v * (array_length(s, 1) - 1))::integer + 1], ' '),
100 |     ' '
101 |     order by x
102 |   ) as frozen
103 | from
104 |   (
105 |     select
106 |       x,
107 |       y,
108 |       v::double precision / max(v) over() as v
109 |     from map
110 |   ) as t,
111 |   gr
112 | group by y
113 | order by y;
114 | ```
115 | 
116 | Every time it draws a new frozen pattern, here are several examples:
117 | 
118 | ![frozen pattern 1](files/0082_01.png)
119 | 
120 | ![frozen pattern 2](files/0082_02.png)
121 | 
122 | ![frozen pattern 3](files/0082_03.png)
123 | 
124 | Happy holiday season 🎅
125 | 


--------------------------------------------------------------------------------
/0083_how_to_quickly_check_data_type_and_storage_size_of_a_value.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1737019068987937049), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to change a Postgres parameter
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Follow these steps if you need to change a Postgres parameter (a.k.a., GUC, Grand Unified Configuration) for permanent
 11 | effect.
 12 | 
 13 | Docs: [Setting Parameters](https://postgresql.org/docs/current/config-setting.html.)
 14 | 
 15 | ## 1) Understand if a restart is needed
 16 | 
 17 | Two ways to quickly check if a restart is needed:
 18 | 
 19 | - Use [postgresqlco.nf](https://postgresqlco.nf) and look at the `Restart: xxx` field. For example, for
 20 |   [max_wal_size](https://postgresqlco.nf/doc/en/param/max_wal_size/), `Restart: false`, while for
 21 |   [shared_buffers](https://postgresqlco.nf/doc/en/param/shared_buffers), it's `true`.
 22 | 
 23 | - Check `context` in `pg_settings` – if it's `postmaster`, then a restart is needed, otherwise it's not (small
 24 |   exception: values `internal` – such parameters cannot be changed at all). For example:
 25 | 
 26 |   ```sql
 27 |   nik=# select name, context
 28 |   from pg_settings
 29 |   where name in ('max_wal_size', 'shared_buffers');
 30 |         name      |  context
 31 |   ----------------+------------
 32 |    max_wal_size   | sighup
 33 |    shared_buffers | postmaster
 34 |   (2 rows)
 35 |   ```
 36 | 
 37 | ## 2) Perform the change
 38 | 
 39 | Apply the change in Postgres config files (`postgresql.conf` or its dependencies, if `include` directive is used). It's
 40 | advisable to `ALTER SYSTEM` unless necessary, because it might lead to confusion in the future (it writes
 41 | to `postgresql.auto.conf` and later it can be easily overlooked; see also
 42 | [this discussion](https://postgresql.org/message-id/flat/CA%2BVUV5rEKt2%2BCdC_KUaPoihMu%2Bi5ChT4WVNTr4CD5-xXZUfuQw%40mail.gmail.com))
 43 | 
 44 | ## 3) Apply the change
 45 | 
 46 | If a restart is required, restart Postgres. If you forget to do it, you can later detect the situation of un-applied
 47 | changes by looking at `pending_restart` in `pg_settings`.
 48 | 
 49 | If a restart is not needed, execute under a superuser:
 50 | 
 51 | ```sql
 52 | select pg_reload_conf();
 53 | ```
 54 | 
 55 | Alternatively, you can use one of these methods:
 56 | 
 57 | - `pg_ctl reload $PGDATA`
 58 | - send a `SIGHUP` to the postmaster process, e.g.:
 59 | 
 60 |   ```bash
 61 |   kill -HUP $(cat "${PGDATA}/postmaster.pid")
 62 |   ```
 63 | 
 64 | When a non-restart-required change is applied, you'll see something like this in the Postgres log:
 65 | 
 66 | ```
 67 | LOG: received SIGHUP, reloading configuration files
 68 | LOG: parameter "max_wal_size" changed to "2GB"
 69 | ```
 70 | 
 71 | ## 4) Verify the change
 72 | 
 73 | Use `SHOW` or `current_setting(...)` to ensure that the change is applied, e.g.:
 74 | 
 75 | ```sql
 76 | nik=# show max_wal_size;
 77 |  max_wal_size
 78 | --------------
 79 |  2GB
 80 | (1 row)
 81 | ```
 82 | 
 83 | or
 84 | 
 85 | ```sql
 86 | nik=# select current_setting('max_wal_size');
 87 |  current_setting
 88 | -----------------
 89 |  2GB
 90 | (1 row)
 91 | ```
 92 | 
 93 | ## Bonus: database-, user-, and table-level settings
 94 | 
 95 | Settings with `context` in `pg_settings` being `user` or `superuser` can be adjusted at database or user level, e.g.:
 96 | 
 97 | ```sql
 98 | alter database verbosedb set log_statement = 'all';
 99 | alter user hero set statement_timeout = 0;
100 | ```
101 | 
102 | The result of this can be reviewed by looking at `pg_db_role_setting`:
103 | 
104 | ```sql
105 | nik=# select * from pg_db_role_setting;
106 |  setdatabase | setrole |       setconfig
107 | -------------+---------+-----------------------
108 |        24580 |       0 | {log_statement=all}
109 |            0 |   24581 | {statement_timeout=0}
110 | (2 rows)
111 | ```
112 | 
113 | Some settings can also be adjusted at individual table level, when using `CREATE TABLE` or `ALTER TABLE`, see
114 | [CREATE TABLE / Storage Parameters](https://postgresql.org/docs/current/sql-createtable.html#SQL-CREATETABLE-STORAGE-PARAMETERS).
115 | Keep in mind naming deviation: `autovacuum_enabled` enables or disables the autovacuum daemon for a particular table,
116 | while the global setting name is simply `autovacuum`.
117 | 


--------------------------------------------------------------------------------
/0084_how_to_find_the_best_order_of_columns_to_save_on_storage.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1737733195511312651), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to find the best order of columns to save on storage ("Column Tetris")
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Does the column order matter in Postgres, storage-wise?
 11 | 
 12 | The answer is **yes**. Let's consider an example (as usual, my advice is NOT to use int4 PKs, but here it is done for
 13 | educational purposes):
 14 | 
 15 | ```sql
 16 | create table t(
 17 |   id int4 primary key,
 18 |   created_at timestamptz,
 19 |   is_public boolean,
 20 |   modified_at timestamptz,
 21 |   verified boolean,
 22 |   published_at timestamptz,
 23 |   score int2
 24 | );
 25 | 
 26 | insert into t
 27 | select
 28 |   i,
 29 |   clock_timestamp(),
 30 |   true,
 31 |   clock_timestamp(),
 32 |   true,
 33 |   clock_timestamp(),
 34 |   0
 35 | from generate_series(1, 1000000) as i;
 36 | 
 37 | vacuum analyze t;
 38 | ```
 39 | 
 40 | Checking the size:
 41 | 
 42 | ```
 43 | nik=# \dt+ t
 44 |                    List of relations
 45 |  Schema | Name | Type  |  Owner   | Size  | Description
 46 | --------+------+-------+----------+-------+-------------
 47 |  public | t    | table | postgres | 81 MB |
 48 | (1 row)
 49 | ```
 50 | 
 51 | Now, let's use the report `p1` from [postgres_dba](https://github.com/NikolayS/postgres_dba) (assuming it's installed):
 52 | 
 53 | ```
 54 | :dba
 55 | 
 56 | Type your choice and press <Enter>:
 57 | p1
 58 |  Table | Table Size | Comment |    Wasted *     |  Suggested Columns Reorder
 59 | -------+------------+---------+-----------------+-----------------------------
 60 |  t     | 81 MB      |         | ~23 MB (28.40%) | is_public, score, verified +
 61 |        |            |         |                 | id, created_at, modified_at+
 62 |        |            |         |                 | published_at
 63 | (1 row)
 64 | ```
 65 | 
 66 | -- the report claims we can save `~28%` of disk space just changing the column order. Note that this is an estimate.
 67 | 
 68 | Let's check the optimized order:
 69 | 
 70 | ```sql
 71 | drop table t;
 72 | 
 73 | create table t(
 74 |   is_public boolean,
 75 |   verified boolean,
 76 |   score int2,
 77 |   id int4 primary key,
 78 |   created_at timestamptz,
 79 |   modified_at timestamptz,
 80 |   published_at timestamptz
 81 | );
 82 | 
 83 | insert into t
 84 | select
 85 |   true,
 86 |   true,
 87 |   0::int2,
 88 |   i::int4,
 89 |   clock_timestamp(),
 90 |   clock_timestamp(),
 91 |   clock_timestamp()
 92 | from generate_series(1, 1000000) as i;
 93 | 
 94 | vacuum analyze t;
 95 | ```
 96 | 
 97 | Checking the size:
 98 | 
 99 | ```
100 | nik=# \dt+ t
101 |                    List of relations
102 |  Schema | Name | Type  |  Owner   | Size  | Description
103 | --------+------+-------+----------+-------+-------------
104 |  public | t    | table | postgres | 57 MB |
105 | (1 row)
106 | ```
107 | 
108 | – we saved `~30%`, very close to what was expected (57 / 81 ~= 0.7037).
109 | 
110 | postgres_dba's report p1 doesn't show potential savings anymore:
111 | 
112 | ```
113 |   Type your choice and press <Enter>:
114 |   p1
115 |    Table | Table Size | Comment | Wasted * | Suggested Columns Reorder
116 |   -------+------------+---------+----------+---------------------------
117 |    t     | 57 MB      |         |          |
118 |   (1 row)
119 | ```
120 | 
121 | Why did we save?
122 | 
123 | In Postgres, the storage system may add alignment padding to column values. If a data type's natural alignment
124 | requirement is more than the size of the value, Postgres may pad the value with zeros up to the alignment boundary. For
125 | instance, when a column has a value with a size less than 8 bytes followed by a value requiring 8-byte alignment,
126 | Postgres pads the first value to align on an 8-byte boundary. This helps ensure that the values in memory are aligned
127 | with CPU word boundaries for the particular hardware architecture, which can lead to performance improvements.
128 | 
129 | As an example, a row (`int4`, `timestamptz`) occupies 16 bytes:
130 | 
131 | - 4 bytes for `int4`
132 | - 4 zeroes to align to 8-bytes
133 | - 8 bytes for `timestamptz`
134 | 
135 | Some people prefer an inverted approach: first we start with 16- and 8-byte columns, then proceed to smaller ones. In
136 | any case, it makes sense to put columns having `VARLENA` types (`text`, `varchar`, `json`, `jsonb`, array) in the end.
137 | 
138 | Articles to read on this topic:
139 | 
140 | - [StackOverflow answer by Erwin Brandstetter](https://stackoverflow.com/questions/2966524/calculating-and-saving-space-in-postgresql/7431468#7431468)
141 | - [Ordering Table Columns in PostgreSQL (GitLab)](https://docs.gitlab.com/ee/development/database/ordering_table_columns.html)
142 | - Docs: [Table Row Layout](https://postgresql.org/docs/current/storage-page-layout.html#STORAGE-TUPLE-LAYOUT)
143 | 


--------------------------------------------------------------------------------
/0085_how_to_quickly_check_data_type_and_storage_size_of_a_value.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1737747838573150476), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to quickly check data type and storage size of a value
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Here is how you can quickly check data type and size of a value, not looking in documentation.
 11 | 
 12 | ## How to check data type for a value
 13 | 
 14 | Use `pg_typeof(...)`:
 15 | 
 16 | ```sql
 17 | nik=# select pg_typeof(1);
 18 |  pg_typeof
 19 | -----------
 20 |  integer
 21 | (1 row)
 22 | 
 23 | nik=# select pg_typeof(1::int8);
 24 |  pg_typeof
 25 | -----------
 26 |  bigint
 27 | (1 row)
 28 | ```
 29 | 
 30 | ## How to check storage size for a value
 31 | 
 32 | Use `pg_column_size(...)` – even without actual column:
 33 | 
 34 | ```sql
 35 | nik=# select pg_column_size(1);
 36 |  pg_column_size
 37 | ----------------
 38 |               4
 39 | (1 row)
 40 | 
 41 | nik=# select pg_column_size(1::int8);
 42 |  pg_column_size
 43 | ----------------
 44 |               8
 45 | (1 row)
 46 | ```
 47 | 
 48 | 👉 `int4` (aka `int` or `integer`) occupies 4 bytes, as expected, while `int8` (`bigint`) – 8.
 49 | 
 50 | And `VARLENA` types such as `varchar`, `text`, `json`, `jsonb`, arrays have variable length (hence the name), and
 51 | additional header, e.g.:
 52 | 
 53 | ```sql
 54 | nik=# select pg_column_size('ok'::text);
 55 |  pg_column_size
 56 | ----------------
 57 |               6
 58 | (1 row)
 59 | ```
 60 | 
 61 | 👉 a 4-byte `VARLENA` header (for inline storage,
 62 | see [struct varlena header](https://github.com/postgres/postgres/blob/c161ab74f76af8e0f3c6b349438525ad9575683b/src/include/c.h#L661-L681))
 63 | and 2 bytes for data.
 64 | 
 65 | Boolean example:
 66 | 
 67 | ```
 68 | nik=# select pg_column_size(true), pg_column_size(false);
 69 |  pg_column_size | pg_column_size
 70 | ----------------+----------------
 71 |               1 |              1
 72 | (1 row)
 73 | ```
 74 | 
 75 | Remembering the previous howto, [Column Tetris](0084_how_to_find_the_best_order_of_columns_to_save_on_storage.md), here we
 76 | can conclude that not only we need 1 byte to store a bit (8x space), it becomes 8 bytes if we create a table
 77 | (`c1 boolean`, `c2 int8`), due to alignment padding – meaning that it's already 64 bits! So, in such "unfortunate" case, those
 78 | who store 'true' as text, don't lose anything at all:
 79 | 
 80 | ```sql
 81 | nik=# select pg_column_size('true'::text);
 82 |  pg_column_size
 83 | ----------------
 84 |               8
 85 | (1 row)
 86 | ```
 87 | 
 88 | 👉 it is also 8 bytes (4 bytes `VARLENA` header, and 4 bytes actual value). But if it's `false` as text, then – with
 89 | alignment padding (if the next column is 8- or 16-byte), it can quickly become even worse:
 90 | 
 91 | ```sql
 92 | nik=# select pg_column_size('false'::text);
 93 |  pg_column_size
 94 | ----------------
 95 |               9
 96 | (1 row)
 97 | ```
 98 | 
 99 | 👉 these 9 bytes are padded with zeroes to 16, when alignment padding is needed. Don't store true/false as text :) And
100 | for very optimized storage of multiple boolean "flag" values, consider using a single integer, and "packing" boolean
101 | values inside it, then bit operators `<<`, `~`, `|`, `&` to encode and decode
102 | values (docs: [Bit String Functions and Operators](https://postgresql.org/docs/current/functions-bitstring.html)) –
103 | doing so, you can "pack" 64 booleans inside a single `int8` value.
104 | 
105 | A couple of more examples:
106 | 
107 | ```sql
108 | nik=# select pg_column_size(now());
109 | pg_column_size
110 | ---------------
111 |              8
112 | (1 row)
113 | 
114 | nik=# select pg_column_size(interval '1s');
115 |  pg_column_size
116 | ----------------
117 |              16
118 | (1 row)
119 | ```
120 | 
121 | ## How to check storage size for a row
122 | 
123 | And a couple of more examples – how to check the size of a row:
124 | 
125 | ```sql
126 | nik=# select pg_column_size(row(true, now()));
127 |  pg_column_size
128 | ----------------
129 |              40
130 | (1 row)
131 | ```
132 | 
133 | 👉 a 24-byte tuple header (23 bytes padded with 1 zero), then 1-byte boolean (padded with 7 zeroes), and then 8-byte
134 | `timestamptz` value. Total: 24 + 1+7 + 8 = 40.
135 | 
136 | ```sql
137 | nik=# select pg_column_size(row(1, 2));
138 |  pg_column_size
139 | ----------------
140 |            32
141 | (1 row)
142 | ```
143 | 
144 | 👉 a 24-byte tuple header, then two 4-byte integers, no padding needed, total is 24 + 4 + 4 = 32.
145 | 
146 | ## No need to remember exact function names
147 | 
148 | When working in psql, there is no need to remember function names – use `\df+` to search function name:
149 | 
150 | ```sql
151 | nik=# \df *pg_*type*
152 |                                            List of functions
153 |    Schema   |                   Name                    | Result data type | Argument data types | Type
154 | ------------+-------------------------------------------+------------------+---------------------+------
155 |  pg_catalog | binary_upgrade_set_next_array_pg_type_oid | void             | oid                 | func
156 |  pg_catalog | binary_upgrade_set_next_pg_type_oid       | void             | oid                 | func
157 |  pg_catalog | binary_upgrade_set_next_toast_pg_type_oid | void             | oid                 | func
158 |  pg_catalog | pg_stat_get_backend_wait_event_type       | text             | integer             | func
159 |  pg_catalog | pg_type_is_visible                        | boolean          | oid                 | func
160 |  pg_catalog | pg_typeof                                 | regtype          | "any"               | func
161 | (6 rows)
162 | ```
163 | 


--------------------------------------------------------------------------------
/0086_how_to_make_e_work_in_psql.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1738267148395688349), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to make "\e" work in psql on a new machine ("editor/nano/vi not found")
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | Sometimes this happens, when you're attempting to edit a query in psql using the `\e` command:
11 | 
12 | ```
13 | nik=# \e
14 | /usr/bin/sensible-editor: 20: editor: not found
15 | /usr/bin/sensible-editor: 31: nano: not found
16 | /usr/bin/sensible-editor: 20: nano-tiny: not found
17 | /usr/bin/sensible-editor: 20: vi: not found
18 | Couldn't find an editor!
19 | Set the $EDITOR environment variable to your desired editor.
20 | ```
21 | 
22 | Setting the editor is simple (use `nano` or another editor you prefer):
23 | 
24 | ```
25 | \setenv PSQL_EDITOR vim
26 | ```
27 | 
28 | But if you work inside a container, or on a new machine, the desired editor might not yet be installed. You can install
29 | it without leaving `psql`, assuming that there are enough permissions to run installation. For example, inside a 
30 | "standard" Postgres, Debian-based (`sudo` is not needed here):
31 | 
32 | ```
33 | nik=# \! apt update && apt install -y vim
34 | ```
35 | 
36 | 👉 And then `\e` starts working!
37 | 
38 | To make this setting persistent, add this to `~/.bash_profile` (or `~/.zprofile`):
39 | 
40 | ```bash
41 | echo "export PSQL_EDITOR=vim" >> ~/.bash_profile
42 | source ~/.bash_profile
43 | ```
44 | 
45 | For Windows, see 
46 | [this blog post](https://cybertec-postgresql.com/en/psql_editor-fighting-with-sublime-text-under-windows/) 
47 | by [@PavloGolub](https://twitter.com/PavloGolub).
48 | 
49 | Docs: https://postgresql.org/docs/current/app-psql.html
50 | 


--------------------------------------------------------------------------------
/0087_how_to_change_ownership_of_all_objects_in_a_database.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1738464958386737401), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to change ownership of all objects in a database
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | If you need to change ownership of *all* database objects in current database, use this anonymous `DO`
 11 | block (or copy-paste from [here](https://gitlab.com/postgres-ai/database-lab/-/snippets/2075222)):
 12 | 
 13 | ```sql
 14 | do $$
 15 | declare
 16 |   new_owner text := 'NEW_OWNER_ROLE_NAME';
 17 |   object_type record;
 18 |   r record;
 19 |   sql text;
 20 | begin
 21 |   -- New owner for all schemas
 22 |   for r in select * from pg_namespace loop
 23 |     sql := format(
 24 |       'alter schema %I owner to %I;',
 25 |       r.nspname,
 26 |       new_owner
 27 |     );
 28 | 
 29 |     raise debug 'Execute SQL: %', sql;
 30 | 
 31 |     execute sql;
 32 |   end loop;
 33 | 
 34 |   -- Various DB objects
 35 |   -- c: composite type
 36 |   -- p: partitioned table
 37 |   -- i: index
 38 |   -- r: table
 39 |   -- v: view
 40 |   -- m: materialized view
 41 |   -- S: sequence
 42 |   for object_type in
 43 |     select
 44 |       unnest('{type,table,table,view,materialized view,sequence}'::text[]) type_name,
 45 |       unnest('{c,p,r,v,m,S}'::text[]) code
 46 |   loop
 47 |     for r in
 48 |       execute format(
 49 |         $sql$
 50 |           select n.nspname, c.relname
 51 |           from pg_class c
 52 |           join pg_namespace n on
 53 |             n.oid = c.relnamespace
 54 |             and not n.nspname in ('pg_catalog', 'information_schema')
 55 |             and c.relkind = %L
 56 |           order by c.relname
 57 |         $sql$,
 58 |         object_type.code
 59 |       )
 60 |     loop
 61 |       sql := format(
 62 |         'alter %s %I.%I owner to %I;',
 63 |         object_type.type_name,
 64 |         r.nspname,
 65 |         r.relname,
 66 |         new_owner
 67 |       );
 68 | 
 69 |       raise debug 'Execute SQL: %', sql;
 70 | 
 71 |       execute sql;
 72 |     end loop;
 73 |   end loop;
 74 | 
 75 |   -- Functions, procedures
 76 |   for r in 
 77 |     select
 78 |       p.proname,
 79 |       n.nspname,
 80 |       pg_catalog.pg_get_function_identity_arguments(p.oid) as args
 81 |     from pg_catalog.pg_namespace as n
 82 |     join pg_catalog.pg_proc as p on p.pronamespace = n.oid
 83 |     where
 84 |       not n.nspname in ('pg_catalog', 'information_schema')
 85 |       and p.proname not ilike 'dblink%' -- We do not want dblink to be involved (exclusion)
 86 |   loop
 87 |     sql := format(
 88 |       'alter function %I.%I(%s) owner to %I', -- todo: check support CamelStyle r.args
 89 |       r.nspname,
 90 |       r.proname,
 91 |       r.args,
 92 |       new_owner
 93 |     );
 94 | 
 95 |     raise debug 'Execute SQL: %', sql;
 96 | 
 97 |     execute sql;
 98 |   end loop;
 99 | 
100 |   -- Full text search dictionary
101 |   -- TODO: text search configuration
102 |   for r in 
103 |     select * 
104 |     from pg_catalog.pg_namespace n
105 |     join pg_catalog.pg_ts_dict d on d.dictnamespace = n.oid
106 |     where not n.nspname in ('pg_catalog', 'information_schema')
107 |   loop
108 |     sql := format(
109 |       'alter text search dictionary %I.%I owner to %I',
110 |       r.nspname,
111 |       r.dictname,
112 |       new_owner
113 |     );
114 | 
115 |     raise debug 'Execute SQL: %', sql;
116 | 
117 |     execute sql;
118 |   end loop;
119 | 
120 |   -- Domains
121 |   for r in 
122 |      select typname, nspname
123 |      from pg_catalog.pg_type
124 |      join pg_catalog.pg_namespace on pg_namespace.oid = pg_type.typnamespace
125 |      where typtype = 'd' and not nspname in ('pg_catalog', 'information_schema')
126 |   loop
127 |     sql := format(
128 |       'alter domain %I.%I owner to %I',
129 |       r.nspname,
130 |       r.typname,
131 |       new_owner
132 |     );
133 | 
134 |     raise debug 'Execute SQL: %', sql;
135 | 
136 |     execute sql;
137 |   end loop;
138 | end
139 | $$;
140 | ```
141 | 
142 | Do not forget to change the value of `new_owner`.
143 | 
144 | The query excludes schemas `pg_catalog` and `information_schema`, and it covers: schema objects, tables, views,
145 | materialized views, functions, text search dictionaries, and domains. Depending on your PG versions, there might be
146 | other objects you need to address. Adjust the code if/as needed.
147 | 
148 | To see the debug messages, change `client_min_messages` before running:
149 | 
150 | ```sql
151 | set client_min_messages to debug;
152 | ```
153 | 


--------------------------------------------------------------------------------
/0088_how_to_tune_linux_parameters_for_oltp_postgres.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1739545313311133849), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to tune Linux parameters for OLTP Postgres
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | Here are general recommendations for basic tuning of Linux to run Postgres under heavy OLTP (web/mobile apps) workloads.
 11 | Most of them are default settings used in [postgresql_cluster](https://github.com/vitabaks/postgresql_cluster).
 12 | 
 13 | Consider the parameters below as entry points for further study, and values provided as just rough tuning that is worth
 14 | reviewing for a particular situation.
 15 | 
 16 | Most of the parameters can be changed in `sysctl.conf`. After changing it, this needs to be called:
 17 | 
 18 | ```bash
 19 | sysctl -p /etc/sysctl.conf
 20 | ```
 21 | 
 22 | Temporary change (taking `vm.swappiness` as example):
 23 | 
 24 | ```bash
 25 | sudo sysctl -w vm.swappiness=1
 26 | ```
 27 | 
 28 | or:
 29 | 
 30 | ```bash
 31 | echo 1 | sudo tee /proc/sys/vm/swappiness
 32 | ```
 33 | 
 34 | ## Memory management
 35 | 
 36 | 1) `vm.overcommit_memory = 2`
 37 | 
 38 |     Avoid memory overallocation to prevent OOM killer from affecting Postgres.
 39 | 
 40 | 2) `vm.swappiness = 1`
 41 | 
 42 |     Minimalistic swap, not fully switching it off. 
 43 |     > 💀 This is a controversial topic; I personally have used 0 here under
 44 |     heavy loads in mission-critical systems and taking my chances with the OOM killer; however, many experts suggest not
 45 |     turning it off completely and using a low value – 1 or 10.
 46 | 
 47 |     **Good articles on this topic:**
 48 | 
 49 |     - [Deep PostgreSQL Thoughts: The Linux Assassin](https://crunchydata.com/blog/deep-postgresql-thoughts-the-linux-assassin) 
 50 |       (2021; k8s context) by [@josepheconway](https://twitter.com/josepheconway)
 51 |     
 52 |     - [PostgreSQL load tuning on Red Hat Enterprise Linux](https://redhat.com/en/blog/postgresql-load-tuning-red-hat-enterprise-linux) (2022) 
 53 | 
 54 | 3) `vm.min_free_kbytes = 102400`
 55 | 
 56 |     Ensure available memory for Postgres during memory allocation spikes.
 57 | 
 58 | 4) `transparent_hugepage/enabled=never`, `transparent_hugepage/defrag=never`
 59 | 
 60 |     Disable Transparent Huge Pages (THP) as they can induce latency and fragmentation not suitable for Postgres OLTP
 61 |     workloads. Disabling THP is generally recommended for OLTP systems (e.g., Oracle).
 62 | 
 63 |     - [Ubuntu/Debian](https://stackoverflow.com/questions/44800633/how-to-disable-transparent-huge-pages-thp-in-ubuntu-16-04lts)
 64 |     - [Red Hat](https://access.redhat.com/solutions/46111)
 65 | 
 66 | ## I/O Management
 67 | 
 68 | 5) `vm.dirty_background_bytes = 67108864`
 69 | 
 70 | 6) `vm.dirty_bytes = 536870912`
 71 | 
 72 |     These ^ two are to tune [pdflush](https://lwn.net/Articles/326552/) to prevent IO lag spikes. See
 73 |     also: [PgCookbook - a PostgreSQL documentation project](https://github.com/grayhemp/pgcookbook/blob/master/database_server_configuration.md) by
 74 |     [@grayhemp](https://twitter.com/grayhemp).
 75 | 
 76 | ## Network Configuration
 77 | 
 78 | > 📝 note that below ipv4 settings are provided; 
 79 | > 🎯 **TODO:** ipv6 options
 80 | 
 81 | 7) `net.ipv4.ip_local_port_range = 10000 65535`
 82 | 
 83 |     Allows handling of more client connections.
 84 | 
 85 | 8) `net.core.netdev_max_backlog = 10000`
 86 | 
 87 |     Handles bursts of network traffic without packet loss.
 88 | 
 89 | 9) `net.ipv4.tcp_max_syn_backlog = 8192`
 90 | 
 91 |     Accommodates high levels of concurrent connection attempts.
 92 | 
 93 | 10) `net.core.somaxconn = 65535`
 94 | 
 95 |     Increases the limit for queued socket connections.
 96 | 
 97 | 11) `net.ipv4.tcp_tw_reuse = 1`
 98 | 
 99 |     Reduces connection setup time for high throughput OLTP applications.
100 | 
101 | ## NUMA Configuration
102 | 
103 | 12) `vm.zone_reclaim_mode = 0`
104 | 
105 |     Avoids the performance impact of reclaiming memory across NUMA nodes for Postgres.
106 | 
107 | 13) `kernel.numa_balancing = 0`
108 | 
109 |     Disables automatic NUMA balancing to enhance CPU cache efficiency for Postgres.
110 | 
111 | 14) `kernel.sched_autogroup_enabled = 0`
112 | 
113 |     Improves process scheduling latency for Postgres.
114 | 
115 | ## Filesystem and File Handling
116 | 
117 | 15) `fs.file-max = 262144`
118 | 
119 |     Maximum number of file handles that the Linux kernel can allocate. When running a database server like Postgres, having
120 |     enough file descriptors is critical to handle numerous connections and files simultaneously.
121 | 
122 | > 🎯 **TODO:** review and adjust for various popular OSs
123 | 


--------------------------------------------------------------------------------
/0090_how_to_use_lib_pgquery_in_shell.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1740256615549599764), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to use lib_pgquery in shell to normalize and match queries from various sources
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | ## Why use lib_pgquery
 11 | 
 12 | In [Day 12: How to find query examples for problematic pg_stat_statements records](0012_from_pgss_to_explain__how_to_find_query_examples.md)
 13 | it was mentioned that query normalization can be done using [lib_pgquery](https://github.com/pganalyze/libpg_query). This
 14 | library builds a tree representation of query text, and also computes so-called "fingerprint" – a hash of the normalized
 15 | form of the query (query text where all parameters are removed).
 16 | 
 17 | This is helpful in various cases, for example:
 18 | 
 19 | - If you need to match normalized queries in `pg_stat_statements` with individual query texts from `pg_stat_activity` or
 20 |   Postgres logs, and you use Postgres version older than 14, where
 21 |   [compute_query_id](https://postgresqlco.nf/doc/en/param/compute_query_id/) was implemented to solve this problem.
 22 | - If you use newer version of Postgres, but `compute_query_id` is `off`.
 23 | - If you use query texts from different sources and/or are unsure that the standard `query_id` (aka "`queryid"` - the
 24 |   naming is not unified across tables) can be a reliable way of matching.
 25 | 
 26 | The basic `lib_pgquery` is written in C, and it is used in libraries for various languages:
 27 | 
 28 | - [Ruby](https://github.com/pganalyze/pg_query)
 29 | - [Python](https://pypi.org/project/pglast/)
 30 | - [Go](https://github.com/pganalyze/pg_query_go)
 31 | - [Node](https://github.com/pyramation/pgsql-parser)
 32 | 
 33 | ## Docker version for CLI use
 34 | 
 35 | For convenience, my colleagues took the Go version and wrapped it into docker, allowing its use in CLI style, in shell:
 36 | 
 37 | ```
 38 | ❯ docker run --rm postgresai/pg-query-normalize \
 39 |     'select c1 from t1 where c2 = 123;'
 40 | {
 41 |   "query": "select c1 from t1 where c2 = 123;",
 42 |   "normalizedQuery": "select c1 from t1 where c2 = $1;",
 43 |   "fingerprint": "0212acd45326d8972d886d4b3669a90be9dd4a9853",
 44 |   "tree": [...]
 45 |   ]
 46 | }
 47 | ```
 48 | 
 49 | – it produces a JSON.
 50 | 
 51 | To get the normalized query value, we can use `jq`:
 52 | 
 53 | ```
 54 | ❯ docker run --rm postgresai/pg-query-normalize \
 55 |       'select c1 from t1 where c2 = 123;' \
 56 |     | jq -r '.normalizedQuery'
 57 | select c1 from t1 where c2 = $1;
 58 | ```
 59 | 
 60 | To extract just the fingerprint as text:
 61 | 
 62 | ```
 63 | ❯ docker run --rm postgresai/pg-query-normalize \
 64 |     'select c1 from t1 where c2 = 123;' \
 65 |   | jq -r '.fingerprint'
 66 | 0212acd45326d8972d886d4b3669a90be9dd4a9853
 67 | ```
 68 | 
 69 | If we use different parameters, fingerprint doesn't change – let's use 0 instead of 123:
 70 | 
 71 | ```
 72 | ❯ docker run --rm postgresai/pg-query-normalize \
 73 |     'select c1 from t1 where c2 = 0;' \
 74 |   | jq -r '.fingerprint'
 75 | 0212acd45326d8972d886d4b3669a90be9dd4a9853
 76 | ```
 77 | 
 78 | ## Normalized queries as input
 79 | 
 80 | And – this is a very good property! – if a query is already normalized, and we have placeholders (`$1`, `$2`, etc.) instead
 81 | of parameters, the value remains the same:
 82 | 
 83 | ```
 84 | ❯ docker run --rm postgresai/pg-query-normalize \
 85 |     'select c1 from t1 where c2 = $1;' \
 86 |   | jq -r '.fingerprint'
 87 | 0212acd45326d8972d886d4b3669a90be9dd4a9853
 88 | ```
 89 | 
 90 | But it changes if the query is different – say, the `SELECT` clause is different, here we use `select *` instead of
 91 | `select c1`, resulting in a new fingerprint value:
 92 | 
 93 | ```
 94 | ❯ docker run --rm postgresai/pg-query-normalize \
 95 |     'select * from t1 where c2 = $1;' \
 96 |   | jq -r '.fingerprint'
 97 | 0293106c74feb862c398e267f188f071ffe85a30dd
 98 | ```
 99 | 
100 | ## IN lists
101 | 
102 | Finally, unlike the in-core `queryid`, `lib_pgquery` ignores the number of parameters in the `IN` clause – this behavior suits
103 | better for query normalization and matching. Compare:
104 | 
105 | ```
106 | ❯ docker run --rm postgresai/pg-query-normalize \
107 |     'select * from t1 where c2 in (1, 2, 3);' \
108 |   | jq -r '.fingerprint'
109 | 022fad3ad8fab1b289076f4cfd6ef0a21a15d01386
110 | 
111 | ❯ docker run --rm postgresai/pg-query-normalize \
112 |   'select * from t1 where c2 in (1000);' \
113 | | jq -r '.fingerprint'
114 | 022fad3ad8fab1b289076f4cfd6ef0a21a15d01386
115 | ```
116 | 


--------------------------------------------------------------------------------
/0091_how_to_format_text_output_in_psql_scripts.md:
--------------------------------------------------------------------------------
 1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1740257405198557348), [LinkedIn post]().
 2 | 
 3 | ---
 4 | 
 5 | # How to format text output in psql scripts
 6 | 
 7 | > I post a new PostgreSQL "howto" article every day. Join me in this
 8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
 9 | 
10 | For `psql`'s `\echo` command, using colors and basic text formatting such as bold or underlined can be done through the
11 | use of ANSI color codes. This can be useful when building complex scripts for `psql` (example:
12 | [postgres_dba](https://github.com/NikolayS/postgres_dba/)).
13 | 
14 | Examples:
15 | 
16 | ```
17 | \echo '\033[1;31mThis is red text\033[0m'
18 | \echo '\033[1;32mThis is green text\033[0m'
19 | \echo '\033[1;33mThis is yellow text\033[0m'
20 | \echo '\033[1;34mThis is blue text\033[0m'
21 | \echo '\033[1;35mThis is magenta text\033[0m'
22 | \echo '\033[1;36mThis is cyan text\033[0m'
23 | \echo '\033[1mThis text is bold\033[0m'
24 | \echo '\033[4mThis text is underlined\033[0m'
25 | \echo '\033[38;2;128;0;128mThis text is purple\033[0m'
26 | 
27 | -- RGB – arbitrary color
28 | \echo '\033[38;2;255;100;0mThis text is in orange (RGB 255,100,0)\033[0m'
29 | ```
30 | 
31 | Result:
32 | 
33 | ![](files/0091_result.png)
34 | 
35 | **Important:** the `\033[0m` sequence resets the text formatting to the default.
36 | 
37 | The formatting is preserved in non-interactive mode of `psql`, and when combined with `ts` (to prefix timestamps, included
38 | in the `moreutils` package in Ubuntu):
39 | 
40 | ```bash
41 | psql -Xc "\echo '\033[1;35mThis is magenta text\033[0m'" | ts
42 | ```
43 | 
44 | When `less` is used, formatting doesn't work – but it can be solved with option `-R`:
45 | 
46 | ```bash
47 | psql -Xc "\echo '\033[1;35mThis is magenta text\033[0m'" | less -R
48 | ```
49 | 


--------------------------------------------------------------------------------
/0092_how_to_tune_work_mem.md:
--------------------------------------------------------------------------------
  1 | Originally from: [tweet](https://twitter.com/samokhvalov/status/1740813478150189172), [LinkedIn post]().
  2 | 
  3 | ---
  4 | 
  5 | # How to tune work_mem
  6 | 
  7 | > I post a new PostgreSQL "howto" article every day. Join me in this
  8 | > journey – [subscribe](https://twitter.com/samokhvalov/), provide feedback, share!
  9 | 
 10 | `work_mem` is used for sorting and hashing operations during query execution.
 11 | 
 12 | By default it's 4MB.
 13 | 
 14 | Due to its nature, the tuning of `work_mem` is quite tricky.
 15 | 
 16 | One of the possible approaches is explained here.
 17 | 
 18 | ## Rough tuning and "safe" values of work_mem
 19 | 
 20 | First, apply rough optimization as described in
 21 | [Day 89: Rough configuration tuning (80/20 rule; OLTP)](0089_rough_oltp_configuration_tuning.md).
 22 | 
 23 | A query can "spend" `work_mem` multiple times (for multiple operations). But it is not allocated fully for
 24 | each operation – an operation can need a lower amount of memory.
 25 | 
 26 | Therefore, it is hard to reliably predict, how much memory we'll need to use to handle a workload, without actual
 27 | observation of the workload.
 28 | 
 29 | Moreover, in Postgres 13, new parameter was added,
 30 | [hash_mem_multiplier](https://postgresqlco.nf/doc/en/param/hash_mem_multiplier/), adjusting the logic. The default is 2
 31 | in PG13-16. It means that max memory used by a single hash operation is 2 * `work_mem`.
 32 | 
 33 | Worth mentioning, understanding how much memory is used by a session in Linux is very tricky per se – see a great
 34 | article by Andres Freund:
 35 | [Analyzing the Limits of Connection Scalability in Postgres](https://techcommunity.microsoft.com/t5/azure-database-for-postgresql/analyzing-the-limits-of-connection-scalability-in-postgres/ba-p/1757266#memory-usage) 
 36 | (2020).
 37 | 
 38 | A safe approach would be:
 39 | 
 40 | - estimate how much memory is free – subtracting `shared_buffers`, `maintenance_work_mem`, etc.,
 41 | - then divide the estimated available memory by `max_connections` and additional number such as 4-5 (or more, to be on
 42 |   even safer side) – assuming that each backend will be using up to 4*`work_mem` or 5*`work_mem`. Of course, this multiplier
 43 |   itself is a very rough estimate – in reality, OLTP workloads usually are much less hungry on average (e.g., having a
 44 |   lot of PK lookups mean that average memory consumption is very low).
 45 | 
 46 | In practice, it can make sense to adjust `work_mem` to a higher value, but this needs to be done after understanding the
 47 | behavior of Postgres under certain workload. The following steps are parts of iterative approach for further tuning.
 48 | 
 49 | ## Temp files monitoring
 50 | 
 51 | Monitor how often temporary files are created and the size of these files. Source of proper info:
 52 | 
 53 | 1) `pg_stat_database`, columns `temp_files` and `temp_bytes`.
 54 | 
 55 | 2) Postgres logs – set `log_temp_files` to a low value, down to 0 (being careful with the observer effect).
 56 | 
 57 | 3) `temp_blks_read` and `temp_blks_written` in `pg_stat_statements`.
 58 | 
 59 | The goal of further `work_mem` tuning is getting rid of temporary files completely, or minimizing the frequency of
 60 | creation of them and the size.
 61 | 
 62 | ## Optimize queries
 63 | 
 64 | If possible, consider optimizing queries that involve temporary files creation. Spend a reasonable amount of effort on
 65 | this - if there is no obvious optimization, proceed to the next step.
 66 | 
 67 | ## Raising work_mem further: how much?
 68 | 
 69 | If reasonable optimization efforts are done, it is time to consider raising `work_mem` further.
 70 | 
 71 | However, how much? The answer lies in the size of individual temporary files – from monitoring described above, we can
 72 | find maximum and average temp file size, and estimate the required raise from there. 
 73 | 
 74 | > 🎯 **TODO:** detailed steps
 75 | 
 76 | ## Raise work_mem for parts of workload
 77 | 
 78 | It makes sense to raise it for individual queries. Consider two options:
 79 | 
 80 | - `set work_mem ...` in individual sessions or transactions (`set local ...`), or
 81 | - if you use various DB users for various parts of your workload, consider adjusting `work_mem` for particular users 
 82 |   (e.g., for those that run analytical-like queries): `alter user ... set work_mem ...`.
 83 | 
 84 | ## Raise work_mem globally
 85 | 
 86 | Only if the previous steps are not suitable (e.g., it is hard to optimize queries and you cannot tune `work_mem` for parts
 87 | of workload), then consider raising `work_mem` globally, evaluating OOM risks.
 88 | 
 89 | ## Iterate
 90 | 
 91 | After some time, review data from monitoring to ensure that situation improved or decide to perform another iteration.
 92 | 
 93 | ## Extra: pg_get_backend_memory_contexts
 94 | 
 95 | In Postgres 14, a new function `pg_get_backend_memory_contexts()` and corresponding view were
 96 | added; see [docs](https://postgresql.org/docs/current/view-pg-backend-memory-contexts.html). This is helpful for detailed analysis
 97 | of how current session works with memory, but the major limitation of this capability is that it works only with the
 98 | current session.
 99 | 
100 | > 🎯 **TODO:** How to use it.
101 | 


--------------------------------------------------------------------------------
/0093_how_to_troubleshoot_streaming_replication_lag.md:
--------------------------------------------------------------------------------
 1 | # How to troubleshoot streaming replication lag
 2 | Streaming replication in Postgres allows for continuous data replication from a primary server to standby servers, to ensure high availability and balance read-only workloads. However, replication lag can occur, leading to delays in data synchronization. This guide provides steps to troubleshoot and mitigate replication lag.
 3 | 
 4 | ## Identifying the lag
 5 | To start investigation we need to understand where we actually have lag, on which stage of replication:
 6 | - sending WAL stream to replica via network by `walsender`
 7 | - receiving WAL stream on replica from network by `walreciever`
 8 | - writing WAL on disk on replica by `walreciever`
 9 | - applying (replaying) WAL as a recovery process
10 | 
11 |  Thus, streaming replication lag can be categorized into three types:
12 | - **Write Lag**: The delay between when a transaction is committed on the primary and when it is written to the WAL on the standby.
13 | - **Flush Lag**: The delay between when a transaction is written to the WAL on the standby and when it is flushed to the disk.
14 | - **Apply (Replay) Lag**: The delay between when a transaction is flushed to the disk and when it is applied to the database on the standby.
15 | 
16 | ### Analysis query
17 | To identify the lag, use the following SQL query:
18 | ```sql
19 | select
20 |   pid,
21 |   client_addr,
22 |   application_name,
23 |   state,
24 |   coalesce(pg_current_wal_lsn() - sent_lsn, 0) AS sent_lag_bytes,
25 |   coalesce(sent_lsn - write_lsn, 0) AS write_lag_bytes,
26 |   coalesce(write_lsn - flush_lsn, 0) AS flush_lag_bytes,
27 |   coalesce(flush_lsn - replay_lsn, 0) AS replay_lag_bytes,
28 |   coalesce(pg_current_wal_lsn() - replay_lsn, 0) AS total_lag_bytes
29 | from pg_stat_replication;
30 | ```
31 | 
32 | ### Example
33 | We will get something like this:
34 | ```
35 | postgres=# select
36 |   pid,
37 |   client_addr,
38 |   application_name,
39 |   state,
40 |   coalesce(pg_current_wal_lsn() - sent_lsn, 0) AS sent_lag_bytes,
41 |   coalesce(sent_lsn - write_lsn, 0) AS write_lag_bytes,
42 |   coalesce(write_lsn - flush_lsn, 0) AS flush_lag_bytes,
43 |   coalesce(flush_lsn - replay_lsn, 0) AS replay_lag_bytes,
44 |   coalesce(pg_current_wal_lsn() - replay_lsn, 0) AS total_lag_bytes
45 | from pg_stat_replication;
46 | 
47 |    pid   |  client_addr   | application_name | state     | sent_lag_bytes | write_lag_bytes | flush_lag_bytes | replay_lag_bytes | total_lag_bytes
48 | ---------+----------------+------------------+-----------+----------------+-----------------+-----------------+------------------+-----------------
49 |  3602908 | 10.122.224.101 | backupmachine1   | streaming |              0 |       728949184 |               0 |                0 |               0
50 |  2490863 | 10.122.224.102 | backupmachine1   | streaming |              0 |       519580176 |               0 |                0 |               0
51 |  2814582 | 10.122.224.103 | replica1         | streaming |              0 |          743384 |               0 |          1087208 |         1830592
52 |  3596177 | 10.122.224.104 | replica2         | streaming |              0 |         2426856 |               0 |          4271952 |         6698808
53 |   319473 | 10.122.224.105 | replica3         | streaming |              0 |          125080 |          162040 |          4186920 |         4474040
54 | ```
55 | 
56 | ### How to read results
57 | Meaning of those `_lsn` 
58 |  - `sent_lsn`: How much WAL (lsn position) has already been sent over the network
59 |  - `write_lsn`: How much WAL (lsn position) has been sent to the operating system (before flushing)
60 |  - `flush_lsn`: How much WAL (lsn position) has been flushed to the disk (written on the disk)
61 |  - `replay_lsn`: How much WAL (lsn position) has been applied (visible for queries)
62 | 
63 | So lag is a gap between `pg_current_wal_lsn` and `replay_lsn` (`total_lag_bytes`, and it's a good idea to add it to monitoring, but for troubleshooting purposes we will need all 4
64 | 
65 | - Lag on `sent_lag_bytes` means we have issues with sending the data, i.e. CPU saturated `WALsender` or overloaded network socket on the primary side
66 | - Lag on `write_lag_bytes` means we have issues with receiving the data, i.e. CPU saturated `WALreceiver` or overloaded network socket on the replica side
67 | - Lag on `flush_lag_bytes` means we have issues with writing the data on the disk on replica side, i.e. CPU saturated or IO contention of `WALreceiver`
68 | - Lag `replay_lag_bytes` means we have issues with applying WAL on replica,  usually CPU saturated or IO contention of postgres process
69 | 
70 | Once we pinpointed the problem, we need to troubleshoot the process(es) on the OS level to find the bottleneck.
71 | 
72 | 
73 | ## Possible bottlenecks
74 | TBD
75 | 
76 | ## Additional resources
77 | - [Streaming replication](https://www.postgresql.org/docs/current/warm-standby.html#STREAMING-REPLICATION) (official Postgres docs)
78 | - [pg_stat_replication view](https://www.postgresql.org/docs/current/monitoring-stats.html#PG-STAT-REPLICATION-VIEW) (official Postgres docs)
79 | - [Replication configuration parameters](https://www.postgresql.org/docs/current/runtime-config-replication.html) (official Postgres docs)
80 | 
81 | ## Authors/maintainers of this docs
82 | - Dmitry Fomin
83 | - Sadeq Dousti
84 | - Nikolay Samokhvalov


--------------------------------------------------------------------------------
/files/0002_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0002_cover.png


--------------------------------------------------------------------------------
/files/0003_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0003_cover.png


--------------------------------------------------------------------------------
/files/0004_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0004_cover.png


--------------------------------------------------------------------------------
/files/0005_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0005_cover.png


--------------------------------------------------------------------------------
/files/0006_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0006_cover.png


--------------------------------------------------------------------------------
/files/0007_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0007_cover.png


--------------------------------------------------------------------------------
/files/0008_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0008_cover.png


--------------------------------------------------------------------------------
/files/0009_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0009_cover.png


--------------------------------------------------------------------------------
/files/0010_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0010_cover.png


--------------------------------------------------------------------------------
/files/0010_flamegraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0010_flamegraph.png


--------------------------------------------------------------------------------
/files/0010_perf_top_w_debug_symbols.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0010_perf_top_w_debug_symbols.png


--------------------------------------------------------------------------------
/files/0010_perf_top_wo_debug_symbols.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0010_perf_top_wo_debug_symbols.png


--------------------------------------------------------------------------------
/files/0011_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0011_cover.png


--------------------------------------------------------------------------------
/files/0012_cover_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0012_cover_01.png


--------------------------------------------------------------------------------
/files/0012_cover_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0012_cover_03.png


--------------------------------------------------------------------------------
/files/0013_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0013_cover.png


--------------------------------------------------------------------------------
/files/0013_cover_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0013_cover_02.png


--------------------------------------------------------------------------------
/files/0013_cover_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0013_cover_03.png


--------------------------------------------------------------------------------
/files/0013_db_benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0013_db_benchmark.png


--------------------------------------------------------------------------------
/files/0014_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0014_cover.png


--------------------------------------------------------------------------------
/files/0015_reindex.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0015_reindex.gif


--------------------------------------------------------------------------------
/files/0018_degradation_with_indexes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0018_degradation_with_indexes.png


--------------------------------------------------------------------------------
/files/0018_flame_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0018_flame_graph.png


--------------------------------------------------------------------------------
/files/0018_plan_exec_time_vs_index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0018_plan_exec_time_vs_index.png


--------------------------------------------------------------------------------
/files/0025_elephant_with_sunglasses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0025_elephant_with_sunglasses.jpg


--------------------------------------------------------------------------------
/files/0035_performance_drop_too_many_subtx.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0035_performance_drop_too_many_subtx.jpg


--------------------------------------------------------------------------------
/files/0035_rolled_back_subtransaction_example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0035_rolled_back_subtransaction_example.jpg


--------------------------------------------------------------------------------
/files/0035_standby_server_killed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0035_standby_server_killed.jpg


--------------------------------------------------------------------------------
/files/0042_example_output.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0042_example_output.jpeg


--------------------------------------------------------------------------------
/files/0044_wraparound_and_freeze.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0044_wraparound_and_freeze.jpg


--------------------------------------------------------------------------------
/files/0059_postgres_dba.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0059_postgres_dba.jpg


--------------------------------------------------------------------------------
/files/0059_pspg_improved_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0059_pspg_improved_output.png


--------------------------------------------------------------------------------
/files/0059_pspg_menus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0059_pspg_menus.jpg


--------------------------------------------------------------------------------
/files/0059_psql_ugly_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0059_psql_ugly_output.png


--------------------------------------------------------------------------------
/files/0066-formula-1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0066-formula-1.gif


--------------------------------------------------------------------------------
/files/0066-formula-2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0066-formula-2.gif


--------------------------------------------------------------------------------
/files/0067_tables_to_be_autovacuumed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0067_tables_to_be_autovacuumed.png


--------------------------------------------------------------------------------
/files/0067_tables_to_be_autovacuumed_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0067_tables_to_be_autovacuumed_2.png


--------------------------------------------------------------------------------
/files/0073_01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0073_01.jpg


--------------------------------------------------------------------------------
/files/0073_02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0073_02.jpg


--------------------------------------------------------------------------------
/files/0073_03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0073_03.jpg


--------------------------------------------------------------------------------
/files/0081_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0081_01.png


--------------------------------------------------------------------------------
/files/0082_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0082_01.png


--------------------------------------------------------------------------------
/files/0082_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0082_02.png


--------------------------------------------------------------------------------
/files/0082_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0082_03.png


--------------------------------------------------------------------------------
/files/0091_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/postgres-ai/postgres-howtos/22be1f8d33978b9a7a9bc8abcc4e6c011ae46877/files/0091_result.png


--------------------------------------------------------------------------------