├── cluster-children.sh ├── schema-condensed.txt ├── migrate-to-partitioned-dialog.sql ├── auto-vacuum-tuning.sql ├── outline.md └── migrate-data.sql /cluster-children.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ==================================================================== 3 | # SCRIPT TO CLUSTER DIALOG CHILD TABLES USING PG_REPACK 4 | # ==================================================================== 5 | # This script physically reorders child tables based on their parent's 6 | # foreign key to improve JOIN performance. 7 | # It should be run periodically (e.g., weekly) via cron. 8 | # 9 | # REQUIREMENTS: 10 | # - pg_repack must be installed and in the system's PATH. 11 | # - The user running the script must have appropriate permissions. 12 | # ==================================================================== 13 | 14 | set -euo pipefail # Fail on error, unbound variable, or pipe failure 15 | 16 | # --- DATABASE CONNECTION DETAILS (EDIT THESE) --- 17 | DB_HOST="host.containers.internal" 18 | DB_PORT="5432" 19 | DB_USER="postgres" 20 | DB_NAME="dialogporten" 21 | 22 | # Set the PGPASSWORD environment variable to avoid password prompts 23 | # It's recommended to use a .pgpass file for production environments. 24 | export PGPASSWORD="supersecret" 25 | 26 | # --- TABLE AND CLUSTER KEY CONFIGURATION --- 27 | # Associative array mapping "TableName" to its "ClusteringKey" 28 | # Add or remove tables as needed. 29 | declare -A TABLES_TO_CLUSTER=( 30 | # Direct Children of Dialog 31 | ["DialogTransmission"]="DialogId" 32 | ["DialogActivity"]="DialogId" 33 | ["DialogContent"]="DialogId" 34 | ["Attachment"]="DialogId" 35 | ["DialogGuiAction"]="DialogId" 36 | ["DialogApiAction"]="DialogId" 37 | ["DialogEndUserContext"]="DialogId" 38 | ["DialogServiceOwnerContext"]="DialogId" 39 | ["DialogSeenLog"]="DialogId" 40 | ["DialogSearchTag"]="DialogId" 41 | 42 | # Grandchildren (and deeper in the hierarchy) 43 | ["Actor"]="ActivityId" # Actor's most direct link to the hierarchy. 44 | ["DialogTransmissionContent"]="TransmissionId" 45 | ["AttachmentUrl"]="AttachmentId" 46 | ["DialogApiActionEndpoint"]="ActionId" 47 | ["DialogServiceOwnerLabel"]="DialogServiceOwnerContextId" 48 | ["LabelAssignmentLog"]="ContextId" 49 | 50 | # Polymorphic/Complex Children 51 | ["LocalizationSet"]="DialogContentId" # Clustering by the most likely path 52 | ) 53 | 54 | # --- EXECUTION LOGIC --- 55 | log() { 56 | echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" 57 | } 58 | 59 | log "Starting child table clustering process for database '$DB_NAME'." 60 | 61 | for table in "${!TABLES_TO_CLUSTER[@]}"; do 62 | cluster_key="${TABLES_TO_CLUSTER[$table]}" 63 | log "Attempting to repack and cluster table '$table' on key '$cluster_key'..." 64 | 65 | # Construct the pg_repack command 66 | # The --no-order-by is a legacy alias, --order-by is correct. 67 | # We use double quotes to handle case-sensitive names. 68 | PGREPACK_CMD="pg_repack --host=$DB_HOST --port=$DB_PORT --username=$DB_USER --dbname=$DB_NAME --table='\"public\".\"$table\"' --order-by='\"$cluster_key\"' --wait-timeout=300" 69 | 70 | # Execute the command 71 | if eval "$PGREPACK_CMD"; then 72 | log "SUCCESS: Successfully repacked table '$table'." 73 | else 74 | log "ERROR: Failed to repack table '$table'. Check logs for details." 75 | # Decide if you want the script to exit on first failure or continue. 76 | # To continue, remove 'exit 1'. 77 | exit 1 78 | fi 79 | done 80 | 81 | log "Clustering process completed successfully." 82 | exit 0 83 | -------------------------------------------------------------------------------- /schema-condensed.txt: -------------------------------------------------------------------------------- 1 | This document provides a highly condensed, plain-text representation of the database schema, optimized for machine parsing and minimal token usage. 2 | 3 | Notation Guide 4 | * Structure: TableName(Column1:Type:Constraints...);Indexes 5 | * Data Types: 6 | * uuid, int, bool 7 | * ts: timestamp with time zone 8 | * vc(n): varchar(n) 9 | * Constraints & Keys: 10 | * pk: Primary Key (for single-column keys) 11 | nn: Not Null 12 | fk>Table(Col): Foreign Key 13 | uniq: Unique constraint/index on a single column 14 | * Indexes & Composite Keys (at end of line): 15 | * pk:col1,col2: Composite Primary Key 16 | * uniq:col1,col2: Composite Unique Index 17 | * idx:col1,...: Non-unique B-tree index 18 | * gin_idx:col: GIN index 19 | 20 | Schema Definition 21 | Dialog(Id:uuid:pk,Revision:uuid:nn,Deleted:bool:nn,DeletedAt:ts,Org:vc(255):nn,ServiceResource:vc(255):nn,ServiceResourceType:vc(255):nn,Party:vc(255):nn,Progress:int,ExtendedStatus:vc(255),ExternalReference:vc(255),VisibleFrom:ts,DueAt:ts,ExpiresAt:ts,StatusId:int:nn:fk>DialogStatus(Id),PrecedingProcess:vc(255),Process:vc(255),IdempotentKey:vc(36),IsApiOnly:bool:nn);idx:StatusId,Org,Party,ServiceResource,DueAt,CreatedAt;uniq:Org,IdempotentKey 22 | DialogTransmission(Id:uuid:pk,CreatedAt:ts:nn,AuthorizationAttribute:vc(255),ExtendedType:vc(1023),TypeId:int:nn:fk>DialogTransmissionType(Id),DialogId:uuid:nn:fk>Dialog(Id),RelatedTransmissionId:uuid:fk>DialogTransmission(Id),ExternalReference:vc(255));idx:DialogId,TypeId,RelatedTransmissionId 23 | DialogActivity(Id:uuid:pk,CreatedAt:ts:nn,ExtendedType:vc(1023),TypeId:int:nn:fk>DialogActivityType(Id),DialogId:uuid:nn:fk>Dialog(Id),TransmissionId:uuid:fk>DialogTransmission(Id));idx:DialogId,TypeId,TransmissionId 24 | Actor(Id:uuid:pk,ActorTypeId:int:nn:fk>ActorType(Id),Discriminator:vc(255):nn,ActivityId:uuid:fk>DialogActivity(Id):uniq,DialogSeenLogId:uuid:fk>DialogSeenLog(Id):uniq,TransmissionId:uuid:fk>DialogTransmission(Id):uniq,LabelAssignmentLogId:uuid:fk>LabelAssignmentLog(Id):uniq,ActorNameEntityId:uuid:fk>ActorName(Id));idx:ActorTypeId,ActorNameEntityId 25 | ActorName(Id:uuid:pk,ActorId:vc(255),Name:vc(255),CreatedAt:ts:nn);uniq:ActorId,Name 26 | DialogContent(Id:uuid:pk,MediaType:vc(255):nn,DialogId:uuid:nn:fk>Dialog(Id),TypeId:int:nn:fk>DialogContentType(Id));uniq:DialogId,TypeId 27 | DialogTransmissionContent(Id:uuid:pk,MediaType:vc(255):nn,TransmissionId:uuid:nn:fk>DialogTransmission(Id),TypeId:int:nn:fk>DialogTransmissionContentType(Id));uniq:TransmissionId,TypeId 28 | Attachment(Id:uuid:pk,Discriminator:vc(255):nn,DialogId:uuid:fk>Dialog(Id),TransmissionId:uuid:fk>DialogTransmission(Id)) 29 | AttachmentUrl(Id:uuid:pk,MediaType:vc(255),Url:vc(1023):nn,ConsumerTypeId:int:nn:fk>AttachmentUrlConsumerType(Id),AttachmentId:uuid:nn:fk>Attachment(Id)) 30 | DialogGuiAction(Id:uuid:pk,Action:vc(255):nn,Url:vc(1023):nn,AuthorizationAttribute:vc(255),IsDeleteDialogAction:bool:nn,PriorityId:int:nn:fk>DialogGuiActionPriority(Id),HttpMethodId:int:nn:fk>HttpVerb(Id),DialogId:uuid:nn:fk>Dialog(Id)) 31 | DialogApiAction(Id:uuid:pk,Action:vc(255):nn,AuthorizationAttribute:vc(255),DialogId:uuid:nn:fk>Dialog(Id),Name:vc(255)) 32 | DialogApiActionEndpoint(Id:uuid:pk,Version:vc(255),Url:vc(1023):nn,Deprecated:bool:nn,SunsetAt:ts,HttpMethodId:int:nn:fk>HttpVerb(Id),ActionId:uuid:nn:fk>DialogApiAction(Id)) 33 | DialogEndUserContext(Id:uuid:pk,Revision:uuid:nn,DialogId:uuid:fk>Dialog(Id):uniq,SystemLabelId:int:nn:fk>SystemLabel(Id)) 34 | DialogServiceOwnerContext(DialogId:uuid:pk:fk>Dialog(Id),Revision:uuid:nn) 35 | DialogServiceOwnerLabel(DialogServiceOwnerContextId:uuid:nn:fk>DialogServiceOwnerContext(DialogId),Value:vc(255):nn,CreatedAt:ts:nn);pk:DialogServiceOwnerContextId,Value 36 | DialogSeenLog(Id:uuid:pk,CreatedAt:ts:nn,IsViaServiceOwner:bool:nn,DialogId:uuid:nn:fk>Dialog(Id),EndUserTypeId:int:nn:fk>DialogUserType(Id)) 37 | DialogSearchTag(Id:uuid:pk,CreatedAt:ts:nn,Value:vc(63):nn,DialogId:uuid:nn:fk>Dialog(Id));gin_idx:Value 38 | LabelAssignmentLog(Id:uuid:pk,CreatedAt:ts:nn,Name:vc(255):nn,Action:vc(255):nn,ContextId:uuid:nn:fk>DialogEndUserContext(Id)) 39 | LocalizationSet(Id:uuid:pk,Discriminator:vc(255):nn);note:polymorphic_1-to-1_fks 40 | Localization(LocalizationSetId:uuid:nn:fk>LocalizationSet(Id),LanguageCode:vc(15):nn,Value:vc(4095):nn);pk:LocalizationSetId,LanguageCode;gin_idx:Value 41 | SubjectResource(Id:uuid:pk,Subject:vc(255):nn,Resource:vc(255):nn);uniq:Resource,Subject 42 | ResourcePolicyInformation(Id:uuid:pk,Resource:vc(255):nn:uniq,MinimumAuthenticationLevel:int:nn) 43 | NotificationAcknowledgement(EventId:uuid:nn,NotificationHandler:vc(255):nn,AcknowledgedAt:ts:nn);pk:EventId,NotificationHandler 44 | LookupTables(Id:int:pk,Name:vc(255):nn);tables:ActorType,DialogStatus,HttpVerb,SystemLabel,DialogUserType,etc 45 | FrameworkTables;tables:__EFMigrationsHistory,MassTransitInboxState,MassTransitOutboxState,MassTransitOutboxMessage 46 | -------------------------------------------------------------------------------- /migrate-to-partitioned-dialog.sql: -------------------------------------------------------------------------------- 1 | -- ==================================================================== 2 | -- SCRIPT TO MIGRATE DIALOG TABLE TO A TWO-LEVEL PARTITIONED STRUCTURE 3 | -- ==================================================================== 4 | -- 5 | -- WARNING! This script _will_ fail unless max_locks_per_transaction is 6 | -- increased to accomodate for creating all the partitions. This cannot be 7 | -- done without a server restart. See postgresql.conf and set 8 | -- max_locks_per_transaction = 10000 9 | -- ==================================================================== 10 | 11 | BEGIN; 12 | 13 | -- STEP 1: RENAME THE EXISTING DIALOG TABLE AND ITS PRIMARY KEY 14 | ALTER TABLE public."Dialog" RENAME TO "Dialog_Old"; 15 | ALTER INDEX public."PK_Dialog" RENAME TO "PK_Dialog_Old"; 16 | 17 | 18 | -- STEP 2: CREATE THE NEW TOP-LEVEL PARENT TABLE 19 | -- This table is partitioned by RANGE but holds no data itself. 20 | CREATE TABLE public."Dialog" ( 21 | LIKE public."Dialog_Old" INCLUDING DEFAULTS 22 | ) PARTITION BY RANGE ("ContentUpdatedAt"); 23 | 24 | -- Redefine the primary key to include all partition keys from both levels. 25 | ALTER TABLE public."Dialog" ADD PRIMARY KEY ("Id", "ContentUpdatedAt", "Party"); 26 | 27 | 28 | -- STEP 3: CREATE THE PARTITION MANAGEMENT FUNCTION 29 | -- This function creates a monthly partition and its 64 hash sub-partitions. 30 | -- It is idempotent and can be run safely multiple times. 31 | CREATE OR REPLACE FUNCTION public.make_dialog_month(p_year int, p_month int) 32 | RETURNS void AS $$ 33 | DECLARE 34 | monthly_partition_name text; 35 | monthly_from_date date; 36 | monthly_to_date date; 37 | BEGIN 38 | monthly_from_date := make_date(p_year, p_month, 1); 39 | monthly_to_date := monthly_from_date + interval '1 month'; 40 | monthly_partition_name := 'dialog_p' || to_char(monthly_from_date, 'YYYY_MM'); 41 | 42 | -- Create the monthly range partition if it doesn't exist. 43 | -- This table will be the parent for the hash sub-partitions. 44 | EXECUTE format( 45 | 'CREATE TABLE IF NOT EXISTS public.%I PARTITION OF public."Dialog" FOR VALUES FROM (%L) TO (%L) PARTITION BY HASH ("Party");', 46 | monthly_partition_name, monthly_from_date, monthly_to_date 47 | ); 48 | 49 | -- Create the 64 hash sub-partitions for this month if they don't exist. 50 | FOR i IN 0..63 LOOP 51 | EXECUTE format( 52 | 'CREATE TABLE IF NOT EXISTS public.%I_p%s PARTITION OF public.%I FOR VALUES WITH (MODULUS 64, REMAINDER %s);', 53 | monthly_partition_name, i, monthly_partition_name, i 54 | ); 55 | END LOOP; 56 | END; 57 | $$ LANGUAGE plpgsql; 58 | 59 | 60 | -- STEP 4: MANUALLY CREATE HISTORICAL AND RECENT PARTITIONS 61 | -- This section creates all necessary partitions from the start date up to a few 62 | -- months into the future. pg_cron will take over management from this point. 63 | 64 | -- Create YEARLY partitions from 2000 to 2019 65 | DO $$ 66 | DECLARE 67 | yearly_partition_name text; 68 | yearly_from_date date; 69 | yearly_to_date date; 70 | BEGIN 71 | FOR year_val IN 2000..2019 LOOP 72 | yearly_from_date := make_date(year_val, 1, 1); 73 | yearly_to_date := make_date(year_val + 1, 1, 1); 74 | yearly_partition_name := 'dialog_y' || year_val; 75 | 76 | EXECUTE format( 77 | 'CREATE TABLE IF NOT EXISTS public.%I PARTITION OF public."Dialog" FOR VALUES FROM (%L) TO (%L) PARTITION BY HASH ("Party");', 78 | yearly_partition_name, yearly_from_date, yearly_to_date 79 | ); 80 | 81 | -- Create 64 hash sub-partitions for the yearly partition 82 | FOR i IN 0..63 LOOP 83 | EXECUTE format( 84 | 'CREATE TABLE IF NOT EXISTS public.%I_p%s PARTITION OF public.%I FOR VALUES WITH (MODULUS 64, REMAINDER %s);', 85 | yearly_partition_name, i, yearly_partition_name, i 86 | ); 87 | END LOOP; 88 | END LOOP; 89 | END; 90 | $$; 91 | 92 | 93 | -- Create MONTHLY partitions from Jan 2020 up to 4 months in the future by calling the new function 94 | DO $$ 95 | DECLARE 96 | d date; 97 | start_date date := '2020-01-01'; 98 | end_date date := (date_trunc('month', now()) + interval '4 months')::date; 99 | BEGIN 100 | FOR d IN SELECT generate_series(start_date, end_date, '1 month'::interval) LOOP 101 | PERFORM public.make_dialog_month(EXTRACT(YEAR FROM d)::int, EXTRACT(MONTH FROM d)::int); 102 | END LOOP; 103 | END; 104 | $$; 105 | 106 | 107 | -- STEP 5: CREATE INDEXES ON THE NEW PARTITIONED TABLE 108 | -- These indexes will be automatically created on all new and existing partitions. 109 | CREATE INDEX ON public."Dialog" ("Party", "ContentUpdatedAt" DESC); 110 | CREATE INDEX ON public."Dialog" ("Id"); 111 | 112 | 113 | COMMIT; 114 | 115 | -- ==================================================================== 116 | -- 117 | -- POST-SCRIPT ACTIONS: 118 | -- 1. DATA MIGRATION: You must now migrate data from "Dialog_Old" to the 119 | -- new partitioned "Dialog" table. 120 | -- 121 | -- 2. FOREIGN KEYS: After migration, update foreign keys to point to the 122 | -- new "Dialog" table. 123 | -- 124 | -- 3. SCHEDULE FUTURE PARTITION CREATION WITH PG_CRON: 125 | -- To ensure partitions are always available for new data, schedule a 126 | -- job to run the make_dialog_month function. 127 | -- 128 | -- Example: Schedule a job to run on the 1st of every month at 2 AM 129 | -- to create partitions for the current month and the next four months. 130 | -- 131 | -- SELECT cron.schedule( 132 | -- 'monthly-partition-maintenance', 133 | -- '0 2 1 * *', -- Cron syntax for 2 AM on the 1st day of every month 134 | -- $$ 135 | -- DO $do$ 136 | -- DECLARE 137 | -- d date; 138 | -- BEGIN 139 | -- -- Loop from the current month to 4 months ahead 140 | -- FOR d IN SELECT generate_series(date_trunc('month', now())::date, (date_trunc('month', now()) + interval '4 months')::date, '1 month'::interval) LOOP 141 | -- PERFORM public.make_dialog_month(EXTRACT(YEAR FROM d)::int, EXTRACT(MONTH FROM d)::int); 142 | -- END LOOP; 143 | -- END; 144 | -- $do$; 145 | -- $$ 146 | -- ); 147 | -- ==================================================================== 148 | -------------------------------------------------------------------------------- /auto-vacuum-tuning.sql: -------------------------------------------------------------------------------- 1 | -- ==================================================================== 2 | -- PostgreSQL Per-Table AUTOVACUUM Tuning Script 3 | -- 4 | -- This script enables autovacuum and applies a comprehensive, tiered 5 | -- tuning strategy to all tables that previously had autovacuum disabled. 6 | -- 7 | -- Key Improvements: 8 | -- 1. Explicitly sets 'autovacuum_enabled = true' for all tables. 9 | -- 2. Adds vacuum settings (scale factor, threshold) to all tiers 10 | -- to actively combat table bloat. 11 | -- 3. Consolidates all parameters into a single ALTER TABLE command 12 | -- per table for clarity and efficiency. 13 | -- ==================================================================== 14 | 15 | -- ==================================================================== 16 | -- Tier 1: Critical Churn Tables (Aggressive VACUUM & ANALYZE) 17 | -- 18 | -- Goal: Prevent bloat and keep stats fresh in queue-like tables with 19 | -- millions of updates/deletes. 20 | -- ==================================================================== 21 | 22 | -- Justification: These tables function as high-speed queues. An aggressive 23 | -- vacuum strategy (low scale factor, moderate threshold) is essential. 24 | -- Analyze is also made more frequent than the default. 25 | DO $$ 26 | DECLARE 27 | t_name TEXT; 28 | tables_to_tune TEXT[] := ARRAY[ 29 | 'MassTransitOutboxMessage', 30 | 'MassTransitOutboxState' 31 | ]; 32 | BEGIN 33 | FOREACH t_name IN ARRAY tables_to_tune 34 | LOOP 35 | EXECUTE format('ALTER TABLE %I SET ( 36 | autovacuum_enabled = true, 37 | autovacuum_vacuum_scale_factor = 0.01, 38 | autovacuum_vacuum_threshold = 1000, 39 | autovacuum_analyze_scale_factor = 0.05, 40 | autovacuum_analyze_threshold = 1000 41 | )', t_name); 42 | END LOOP; 43 | END $$; 44 | 45 | -- ==================================================================== 46 | -- Tier 2: Extremely Large Tables (Aggressive ANALYZE, Responsive VACUUM) 47 | -- 48 | -- Goal: Keep planner statistics fresh and prevent catastrophic bloat 49 | -- for tables with hundreds of millions to billions of rows. 50 | -- ==================================================================== 51 | 52 | -- Justification: 53 | -- ANALYZE: A tiny scale factor (0.1%) is critical for the query planner. 54 | -- VACUUM: The default 20% scale factor would mean waiting for hundreds of 55 | -- millions of dead rows. A lower scale factor (1%) and a high fixed 56 | -- threshold prevents this, ensuring bloat is managed proactively. 57 | 58 | DO $$ 59 | DECLARE 60 | t_name TEXT; 61 | tables_to_tune TEXT[] := ARRAY[ 62 | 'Actor', 63 | 'Attachment', 64 | 'AttachmentUrl', 65 | 'DialogGuiAction', 66 | 'DialogApiAction', 67 | 'DialogApiActionEndpoint', 68 | 'DialogContent', 69 | 'DialogActivity', 70 | 'DialogTransmission', 71 | 'DialogTransmissionContent', 72 | 'DialogEndUserContext', 73 | 'Dialog', 74 | 'Localization', 75 | 'LocalizationSet' 76 | ]; 77 | BEGIN 78 | FOREACH t_name IN ARRAY tables_to_tune 79 | LOOP 80 | EXECUTE format('ALTER TABLE %I SET ( 81 | autovacuum_enabled = true, 82 | autovacuum_vacuum_scale_factor = 0.01, 83 | autovacuum_vacuum_threshold = 10000, 84 | autovacuum_analyze_scale_factor = 0.001 85 | )', t_name); 86 | END LOOP; 87 | END $$; 88 | 89 | -- ==================================================================== 90 | -- Tier 3: Large Append-Mostly Tables (Responsive ANALYZE & VACUUM) 91 | -- 92 | -- Goal: Ensure timely statistics updates and bloat management for 93 | -- tables with millions of rows. 94 | -- ==================================================================== 95 | 96 | -- Justification: A uniform scale factor for both ANALYZE (2%) and 97 | -- VACUUM (5%) makes autovacuum more responsive than the defaults without 98 | -- being overly aggressive. A fixed threshold of 5000 rows provides a 99 | -- failsafe against large, sudden delete operations. 100 | DO $$ 101 | DECLARE 102 | t_name TEXT; 103 | tables_to_tune TEXT[] := ARRAY[ 104 | 'DialogSeenLog', 105 | 'DialogSearchTag', 106 | 'ActorName', 107 | 'DialogServiceOwnerContext' 108 | ]; 109 | BEGIN 110 | FOREACH t_name IN ARRAY tables_to_tune 111 | LOOP 112 | EXECUTE format('ALTER TABLE %I SET ( 113 | autovacuum_enabled = true, 114 | autovacuum_vacuum_scale_factor = 0.05, 115 | autovacuum_vacuum_threshold = 5000, 116 | autovacuum_analyze_scale_factor = 0.02, 117 | autovacuum_analyze_threshold = 5000 118 | )', t_name); 119 | END LOOP; 120 | END $$; 121 | 122 | 123 | -- ==================================================================== 124 | -- After applying these settings, it is wise to run a manual ANALYZE 125 | -- on the tables one last time to ensure the planner has fresh 126 | -- stats immediately. 127 | -- 128 | -- Example: 129 | -- 130 | -- ANALYZE VERBOSE public."Localization"; 131 | -- ANALYZE VERBOSE public."LocalizationSet"; 132 | -- ANALYZE VERBOSE public."Dialog"; 133 | -- ANALYZE VERBOSE public."DialogSearchTag"; 134 | -- ANALYZE VERBOSE public."DialogApiActionEndpoint"; 135 | -- ANALYZE VERBOSE public."DialogApiAction"; 136 | -- ANALYZE VERBOSE public."DialogContent"; 137 | -- ANALYZE VERBOSE public."Attachment"; 138 | -- ANALYZE VERBOSE public."AttachmentUrl"; 139 | -- ANALYZE VERBOSE public."Actor"; 140 | -- ANALYZE VERBOSE public."DialogActivity"; 141 | -- ANALYZE VERBOSE public."DialogGuiAction"; 142 | -- ANALYZE VERBOSE public."DialogTransmissionContent"; 143 | -- ANALYZE VERBOSE public."DialogEndUserContext"; 144 | -- ANALYZE VERBOSE public."DialogTransmission"; 145 | -- ANALYZE VERBOSE public."MassTransitOutboxMessage"; 146 | -- ANALYZE VERBOSE public."MassTransitOutboxState"; 147 | -- 148 | -- Then, monitor the output of the following query over the next few days 149 | -- to confirm that 'last_autovacuum' and 'last_autoanalyze' are updating: 150 | -- 151 | -- SELECT relname, last_autovacuum, last_autoanalyze, n_live_tup, n_dead_tup 152 | -- FROM pg_stat_user_tables 153 | -- WHERE relname IN ('Dialog', 'Localization', 'MassTransitOutboxMessage'); 154 | -- 155 | -- 156 | -- This displays current auto_* settings for all tables 157 | -- 158 | -- SELECT 159 | -- n.nspname AS schema_name, 160 | -- c.relname AS table_name, 161 | -- MAX(CASE WHEN kv.key = 'autovacuum_enabled' THEN kv.value ELSE NULL END) AS autovacuum_enabled, 162 | -- MAX(CASE WHEN kv.key = 'autovacuum_vacuum_threshold' THEN kv.value ELSE NULL END) AS vacuum_threshold, 163 | -- MAX(CASE WHEN kv.key = 'autovacuum_vacuum_scale_factor' THEN kv.value ELSE NULL END) AS vacuum_scale_factor, 164 | -- MAX(CASE WHEN kv.key = 'autovacuum_analyze_threshold' THEN kv.value ELSE NULL END) AS analyze_threshold, 165 | -- MAX(CASE WHEN kv.key = 'autovacuum_analyze_scale_factor' THEN kv.value ELSE NULL END) AS analyze_scale_factor 166 | -- FROM pg_class c 167 | -- JOIN pg_namespace n ON n.oid = c.relnamespace 168 | -- LEFT JOIN LATERAL ( 169 | -- SELECT 170 | -- split_part(opt, '=', 1) AS key, 171 | -- split_part(opt, '=', 2) AS value 172 | -- FROM unnest(c.reloptions) AS opt 173 | -- ) kv ON TRUE 174 | -- WHERE c.relkind = 'r' -- regular table 175 | -- AND n.nspname NOT IN ('pg_catalog', 'information_schema') 176 | -- GROUP BY n.nspname, c.relname 177 | -- ORDER BY n.nspname, c.relname; 178 | -- ==================================================================== 179 | -------------------------------------------------------------------------------- /outline.md: -------------------------------------------------------------------------------- 1 | ### **Strategy Outline: Phased Partitioning & Clustering** 2 | 3 | This document outlines a strategy to ensure high performance and scalability for the `Dialog` table and its related queries. It targets the most critical bottlenecks: slow lookups on the `Dialog` table and inefficient `JOIN`s to its direct children. 4 | 5 | #### **1. Architectural Strategy** 6 | 7 | * **Partition `Dialog` Table:** The `Dialog` table will be partitioned using the two-level scheme: first by `RANGE(ContentUpdatedAt)`, then sub-partitioned by `HASH("Party")`. This provides both query performance and a foundation for data lifecycle management. 8 | 9 | * **Cluster Child Tables:** Large child and grandchild tables in the transactional hierarchy (e.g., `DialogActivity`, `Actor`) will **not** be partitioned at this stage. Instead, they will be physically clustered (`pg_repack`) by their parent's foreign key (e.g., `DialogId`, `ActivityId`) to optimize `JOIN`s. 10 | 11 | * **Shared Reference Tables:** Shared lookup tables (e.g., `ActorName`) will remain as standard, non-partitioned tables. 12 | 13 | #### **2. Implementation Examples** 14 | 15 | **a. Define the Partitioned `Dialog` Table:** 16 | 17 | ```sql 18 | -- The new table structure with a two-level partitioning definition. 19 | -- The primary key must include all partition key columns. 20 | CREATE TABLE "Dialog" ( 21 | "Id" uuid NOT NULL, 22 | "ContentUpdatedAt" timestamptz NOT NULL, 23 | "Party" varchar(255) NOT NULL, 24 | -- ... other columns 25 | PRIMARY KEY ("Id", "ContentUpdatedAt", "Party") 26 | ) PARTITION BY RANGE ("ContentUpdatedAt"); 27 | ``` 28 | 29 | **b. Automate Partition Creation with `pg_partman`:** 30 | This requires a template table that defines the sub-partition structure. 31 | 32 | ```sql 33 | -- Step 1: Create a template table that defines the second partition level. 34 | CREATE TABLE public.dialog_template ( 35 | "Id" uuid NOT NULL, 36 | "ContentUpdatedAt" timestamptz NOT NULL, 37 | "Party" varchar(255) NOT NULL, 38 | -- ... other columns 39 | PRIMARY KEY ("Id", "ContentUpdatedAt", "Party") 40 | ) PARTITION BY HASH ("Party"); 41 | 42 | -- Step 2: Create the hash sub-partitions on the template table. 43 | -- pg_partman will copy this structure for each new time-slice partition. 44 | CREATE TABLE public.dialog_template_p0 PARTITION OF public.dialog_template FOR VALUES WITH (MODULUS 64, REMAINDER 0); 45 | -- ... and so on for all 64 sub-partitions. 46 | 47 | -- Step 3: Configure pg_partman to use the template. 48 | SELECT partman.create_parent( 49 | p_parent_table := 'public.Dialog', 50 | p_control := 'ContentUpdatedAt', 51 | p_type := 'native', 52 | p_interval := '1 month', 53 | p_premake := 4, 54 | p_template_table := 'public.dialog_template' 55 | ); 56 | ``` 57 | 58 | **c. Define Indexes:** 59 | 60 | ```sql 61 | -- Composite index on the partition keys for efficient filtering and sorting. 62 | CREATE INDEX ON "Dialog" ("Party", "ContentUpdatedAt" DESC); 63 | 64 | -- Global index for fast direct lookups by DialogId. 65 | CREATE INDEX ON "Dialog" ("Id"); 66 | ``` 67 | 68 | #### **3. Performance Impact** 69 | 70 | * **`Dialog` Lookups:** Will be extremely fast due to partition pruning. 71 | 72 | * **`JOIN`s:** Will be significantly faster. Instead of random I/O across a massive child table, the join will become a fast sequential scan on a specific, physically sorted section of that table. 73 | 74 | * **Overall:** This phase solves the most critical performance issues and will result in a dramatic improvement over the current system. 75 | 76 | ### **Future: A Theoretical Path for Extreme Future Scale** 77 | 78 | While partitioning of dialog will solve the immediate performance issues and likely suffice for many years, it's worth understanding the theoretical "ultimate" architecture for extreme, multi-decade scale. This next phase represents a **significant increase in complexity** and should only be considered a distant future possibility if the child tables grow to an unmanageable size where clustering is no longer sufficient. 79 | 80 | #### **1. Architectural Strategy (future)** 81 | 82 | * **Co-locate Transactional Hierarchy:** All tables in the transactional hierarchy (`Dialog`, `DialogActivity`, `Actor`, etc.) would be partitioned using the identical two-level scheme. 83 | 84 | * **Denormalization:** This would require the extreme step of denormalizing the `ContentUpdatedAt` and `Party` columns down to every table in the hierarchy to be used as their partition keys. 85 | 86 | #### **2. Justification (future)** 87 | 88 | * **Partition-Wise Joins:** The key benefit is enabling Partition-Wise Joins. The database would no longer join a small `Dialog` partition against a massive child table. Instead, it would join small, corresponding sub-partitions (e.g., `Dialog_2025_06_p17` with `Activity_2025_06_p17`). This is the most efficient way to perform `JOIN`s at extreme scale. 89 | 90 | * **Manageable Maintenance:** `pg_repack` and `VACUUM` jobs would always run on tiny sub-partitions across the entire hierarchy, keeping them fast and non-disruptive forever. 91 | 92 | #### **3. Implementation (future)** 93 | 94 | * **Schema Changes:** The child tables would need to be altered to include the denormalized `Party` and `ContentUpdatedAt` columns. 95 | 96 | * **Migration:** A live, online migration (e.g., using logical replication) would be required to move data from the monolithic child tables into the new co-partitioned structure with minimal downtime. 97 | 98 | ### **General Considerations** 99 | 100 | #### **Query Pattern Constraints** 101 | 102 | * **Mandatory `Party` Filter:** To be performant, all queries against the `Dialog` hierarchy must include a filter on `Party`. Queries without this filter will be slow as they cannot use hash partition pruning. 103 | 104 | * **Time-Range for Alternate Sorts:** Queries sorting by a column other than `ContentUpdatedAt` (e.g., `CreatedAt`) should be required to include a time-range filter on `ContentUpdatedAt` to enable time-based partition pruning and prevent scans across all historical data. 105 | 106 | #### **Entity Framework Integration** 107 | 108 | * **Manual Migrations:** All schema changes for any partitioned table must be handled with manually written SQL inside an empty EF migration. The script must alter the parent table, the `pg_partman` template, and all existing partitions to ensure consistency. 109 | 110 | * **Example: Adding a New Column with `migrationBuilder.Sql()`** 111 | 112 | ```csharp 113 | // Inside the Up() method of a new, empty EF Migration file. 114 | 115 | // Step 1: Alter the parent table 116 | migrationBuilder.Sql(@"ALTER TABLE ""Dialog"" ADD COLUMN ""NewColumn"" text NULL;"); 117 | 118 | // Step 2: Alter the pg_partman template table for future partitions 119 | migrationBuilder.Sql(@"ALTER TABLE public.dialog_template ADD COLUMN ""NewColumn"" text NULL;"); 120 | 121 | // Step 3: Loop through all existing partitions to apply the change. 122 | migrationBuilder.Sql(@" 123 | DO $$ 124 | DECLARE 125 | partition_name text; 126 | BEGIN 127 | FOR partition_name IN 128 | SELECT child.relname 129 | FROM pg_inherits 130 | JOIN pg_class parent ON pg_inherits.inhparent = parent.oid 131 | JOIN pg_class child ON pg_inherits.inhrelid = child.oid 132 | WHERE parent.relname = 'dialog_template' OR parent.relname = 'Dialog' 133 | LOOP 134 | EXECUTE format('ALTER TABLE public.%I ADD COLUMN IF NOT EXISTS ""NewColumn"" text NULL;', partition_name); 135 | END LOOP; 136 | END; 137 | $$; 138 | "); 139 | ``` 140 | -------------------------------------------------------------------------------- /migrate-data.sql: -------------------------------------------------------------------------------- 1 | -- ==================================================================== 2 | -- SCRIPT: Migrate Dialog Data and Update Foreign Keys (LARGE SCALE) 3 | -- ==================================================================== 4 | -- DESCRIPTION: 5 | -- This script safely handles the migration of data from "Dialog_Old" 6 | -- to a new partitioned "Dialog" table, specifically designed for 7 | -- tables with millions of rows (e.g., 100M+). 8 | -- 9 | -- METHOD (Designed for minimal downtime): 10 | -- 1. DROPS all foreign key constraints from child tables that point to 11 | -- "Dialog_Old". This is done first to prevent errors and locks. 12 | -- 2. Migrates data in small, manageable batches using a LOOP. Each 13 | -- batch is its own transaction to minimize lock time and resource usage. 14 | -- 3. RE-CREATES the foreign key constraints as 'NOT VALID' after the 15 | -- data migration is complete. This is a fast, metadata-only change. 16 | -- 4. VALIDATES the constraints in a final, automated step. This step is 17 | -- resource-intensive and will lock tables. 18 | -- 19 | -- WARNING: 20 | -- During Step 2 (the data migration loop), your database will operate 21 | -- without foreign key integrity between the child tables and the Dialog 22 | -- table. This is a necessary trade-off for a migration of this scale. 23 | -- ==================================================================== 24 | 25 | DO $$ 26 | BEGIN 27 | RAISE INFO 'Starting LARGE SCALE Dialog data migration script...'; 28 | END; 29 | $$; 30 | 31 | 32 | -- ==================================================================== 33 | -- STEP 1: DROP ALL FOREIGN KEY CONSTRAINTS 34 | -- ==================================================================== 35 | RAISE INFO 'STEP 1: Dropping foreign key constraints pointing to Dialog_Old...'; 36 | BEGIN; 37 | ALTER TABLE public."Attachment" DROP CONSTRAINT IF EXISTS "FK_Attachment_Dialog_DialogId"; 38 | ALTER TABLE public."DialogActivity" DROP CONSTRAINT IF EXISTS "FK_DialogActivity_Dialog_DialogId"; 39 | ALTER TABLE public."DialogApiAction" DROP CONSTRAINT IF EXISTS "FK_DialogApiAction_Dialog_DialogId"; 40 | ALTER TABLE public."DialogContent" DROP CONSTRAINT IF EXISTS "FK_DialogContent_Dialog_DialogId"; 41 | ALTER TABLE public."DialogEndUserContext" DROP CONSTRAINT IF EXISTS "FK_DialogEndUserContext_Dialog_DialogId"; 42 | ALTER TABLE public."DialogGuiAction" DROP CONSTRAINT IF EXISTS "FK_DialogGuiAction_Dialog_DialogId"; 43 | ALTER TABLE public."DialogSearchTag" DROP CONSTRAINT IF EXISTS "FK_DialogSearchTag_Dialog_DialogId"; 44 | ALTER TABLE public."DialogSeenLog" DROP CONSTRAINT IF EXISTS "FK_DialogSeenLog_Dialog_DialogId"; 45 | ALTER TABLE public."DialogServiceOwnerContext" DROP CONSTRAINT IF EXISTS "FK_DialogServiceOwnerContext_Dialog_DialogId"; 46 | ALTER TABLE public."DialogTransmission" DROP CONSTRAINT IF EXISTS "FK_DialogTransmission_Dialog_DialogId"; 47 | COMMIT; 48 | RAISE INFO 'All relevant foreign keys have been dropped.'; 49 | 50 | 51 | -- ==================================================================== 52 | -- STEP 2: DATA MIGRATION IN BATCHES 53 | -- ==================================================================== 54 | RAISE INFO 'STEP 2: Starting batched data migration from Dialog_Old to Dialog...'; 55 | DO $$ 56 | DECLARE 57 | batch_size INT := 50000; -- Tune batch size based on your server's capacity 58 | rows_migrated INT; 59 | total_rows_migrated BIGINT := 0; 60 | last_id uuid := '00000000-0000-0000-0000-000000000000'; -- Start with the lowest possible UUID 61 | current_batch_last_id uuid; 62 | BEGIN 63 | LOOP 64 | -- Find the last ID from the rows to be inserted in this batch 65 | SELECT "Id" INTO current_batch_last_id FROM public."Dialog_Old" WHERE "Id" > last_id ORDER BY "Id" ASC LIMIT 1 OFFSET (batch_size - 1); 66 | 67 | -- If no more rows, we are done with this batch size. 68 | IF current_batch_last_id IS NULL THEN 69 | -- Handle the final, smaller batch 70 | INSERT INTO public."Dialog" 71 | SELECT * FROM public."Dialog_Old" WHERE "Id" > last_id; 72 | GET DIAGNOSTICS rows_migrated = ROW_COUNT; 73 | total_rows_migrated := total_rows_migrated + rows_migrated; 74 | RAISE INFO 'Migrated final batch of % rows. Total migrated: %', rows_migrated, total_rows_migrated; 75 | EXIT; -- Exit the loop 76 | END IF; 77 | 78 | -- Insert a full batch of rows up to and including the found last ID 79 | INSERT INTO public."Dialog" 80 | SELECT * FROM public."Dialog_Old" WHERE "Id" > last_id AND "Id" <= current_batch_last_id; 81 | 82 | GET DIAGNOSTICS rows_migrated = ROW_COUNT; 83 | total_rows_migrated := total_rows_migrated + rows_migrated; 84 | 85 | RAISE INFO 'Migrated batch of % rows. Total migrated: %. Last ID in batch: %', rows_migrated, total_rows_migrated, current_batch_last_id; 86 | 87 | -- Set the starting point for the next batch 88 | last_id := current_batch_last_id; 89 | 90 | -- Small delay to yield resources if needed 91 | -- PERFORM pg_sleep(0.1); 92 | 93 | END LOOP; 94 | RAISE INFO 'Finished migrating all data.'; 95 | END; 96 | $$; 97 | 98 | 99 | -- ==================================================================== 100 | -- STEP 3: RE-CREATE FOREIGN KEY CONSTRAINTS (as NOT VALID) 101 | -- ==================================================================== 102 | RAISE INFO 'STEP 3: Re-creating foreign key constraints as NOT VALID...'; 103 | BEGIN; 104 | ALTER TABLE public."Attachment" ADD CONSTRAINT "FK_Attachment_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 105 | ALTER TABLE public."DialogActivity" ADD CONSTRAINT "FK_DialogActivity_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 106 | ALTER TABLE public."DialogApiAction" ADD CONSTRAINT "FK_DialogApiAction_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 107 | ALTER TABLE public."DialogContent" ADD CONSTRAINT "FK_DialogContent_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 108 | ALTER TABLE public."DialogEndUserContext" ADD CONSTRAINT "FK_DialogEndUserContext_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE SET NULL NOT VALID; 109 | ALTER TABLE public."DialogGuiAction" ADD CONSTRAINT "FK_DialogGuiAction_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 110 | ALTER TABLE public."DialogSearchTag" ADD CONSTRAINT "FK_DialogSearchTag_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 111 | ALTER TABLE public."DialogSeenLog" ADD CONSTRAINT "FK_DialogSeenLog_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 112 | ALTER TABLE public."DialogServiceOwnerContext" ADD CONSTRAINT "FK_DialogServiceOwnerContext_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 113 | ALTER TABLE public."DialogTransmission" ADD CONSTRAINT "FK_DialogTransmission_Dialog_DialogId" FOREIGN KEY ("DialogId") REFERENCES public."Dialog"("Id") ON DELETE CASCADE NOT VALID; 114 | COMMIT; 115 | RAISE INFO 'All foreign key constraints have been re-created as NOT VALID.'; 116 | 117 | 118 | -- ==================================================================== 119 | -- STEP 4: VALIDATE CONSTRAINTS 120 | -- 121 | -- !! IMPORTANT !! 122 | -- This step is resource-intensive and will lock tables. It is STRONGLY 123 | -- recommended to run this during a low-traffic maintenance window. 124 | -- ==================================================================== 125 | RAISE INFO 'STEP 4: Beginning validation of all new constraints. This may take a long time.'; 126 | BEGIN; 127 | RAISE INFO 'Validating constraint on "Attachment"...'; 128 | ALTER TABLE public."Attachment" VALIDATE CONSTRAINT "FK_Attachment_Dialog_DialogId"; 129 | RAISE INFO 'Constraint on "Attachment" validated.'; 130 | 131 | RAISE INFO 'Validating constraint on "DialogActivity"...'; 132 | ALTER TABLE public."DialogActivity" VALIDATE CONSTRAINT "FK_DialogActivity_Dialog_DialogId"; 133 | RAISE INFO 'Constraint on "DialogActivity" validated.'; 134 | 135 | RAISE INFO 'Validating constraint on "DialogApiAction"...'; 136 | ALTER TABLE public."DialogApiAction" VALIDATE CONSTRAINT "FK_DialogApiAction_Dialog_DialogId"; 137 | RAISE INFO 'Constraint on "DialogApiAction" validated.'; 138 | 139 | RAISE INFO 'Validating constraint on "DialogContent"...'; 140 | ALTER TABLE public."DialogContent" VALIDATE CONSTRAINT "FK_DialogContent_Dialog_DialogId"; 141 | RAISE INFO 'Constraint on "DialogContent" validated.'; 142 | 143 | RAISE INFO 'Validating constraint on "DialogEndUserContext"...'; 144 | ALTER TABLE public."DialogEndUserContext" VALIDATE CONSTRAINT "FK_DialogEndUserContext_Dialog_DialogId"; 145 | RAISE INFO 'Constraint on "DialogEndUserContext" validated.'; 146 | 147 | RAISE INFO 'Validating constraint on "DialogGuiAction"...'; 148 | ALTER TABLE public."DialogGuiAction" VALIDATE CONSTRAINT "FK_DialogGuiAction_Dialog_DialogId"; 149 | RAISE INFO 'Constraint on "DialogGuiAction" validated.'; 150 | 151 | RAISE INFO 'Validating constraint on "DialogSearchTag"...'; 152 | ALTER TABLE public."DialogSearchTag" VALIDATE CONSTRAINT "FK_DialogSearchTag_Dialog_DialogId"; 153 | RAISE INFO 'Constraint on "DialogSearchTag" validated.'; 154 | 155 | RAISE INFO 'Validating constraint on "DialogSeenLog"...'; 156 | ALTER TABLE public."DialogSeenLog" VALIDATE CONSTRAINT "FK_DialogSeenLog_Dialog_DialogId"; 157 | RAISE INFO 'Constraint on "DialogSeenLog" validated.'; 158 | 159 | RAISE INFO 'Validating constraint on "DialogServiceOwnerContext"...'; 160 | ALTER TABLE public."DialogServiceOwnerContext" VALIDATE CONSTRAINT "FK_DialogServiceOwnerContext_Dialog_DialogId"; 161 | RAISE INFO 'Constraint on "DialogServiceOwnerContext" validated.'; 162 | 163 | RAISE INFO 'Validating constraint on "DialogTransmission"...'; 164 | ALTER TABLE public."DialogTransmission" VALIDATE CONSTRAINT "FK_DialogTransmission_Dialog_DialogId"; 165 | RAISE INFO 'Constraint on "DialogTransmission" validated.'; 166 | COMMIT; 167 | RAISE INFO 'All constraints have been successfully validated.'; 168 | 169 | 170 | -- ==================================================================== 171 | -- POST-SCRIPT ACTIONS: 172 | -- 1. CLEANUP: Once all constraints are validated and you have verified 173 | -- the application, you can safely drop the old table. 174 | -- 175 | -- DROP TABLE public."Dialog_Old"; 176 | -- ==================================================================== 177 | DO $$ 178 | BEGIN 179 | RAISE INFO 'Migration script finished successfully!'; 180 | END; 181 | $$; 182 | --------------------------------------------------------------------------------