├── CHANGELOG.md ├── LICENSE ├── README.md ├── pg_reindex.py └── requirements.txt /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 1.0.6 (May 29, 2019) 2 | 3 | ### Fixes 4 | 5 | * Fixed output of print requests for primary keys 6 | 7 | # 1.0.5 (May 24, 2019) 8 | 9 | ### Changes 10 | 11 | * Merged two transactions into one in the place where the old index is replaced with a new index and there is a need to keep the original index name 12 | 13 | # 1.0.4 (May 22, 2019) 14 | 15 | ### Fixes 16 | 17 | * Fixed output of print requests for self-reindexation when it was not possible to do it automatically due to a long lock 18 | 19 | # 1.0.3 (December 26, 2018) 20 | 21 | ### Fixes 22 | 23 | * Fixed processing mixed case relation names by using quote_ident where applicable (Phil Krylov) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Avito Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PG Reindex 2 | Console utility for rebuilding indexes and primary keys for PostgreSQL in automatic mode with analysis of index bloating and without table locking 3 | 4 | ### Usage 5 | 6 | #### 1. Create new indexes with the new version in the name, without alter the index and locking on the table 7 | ```bash 8 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --change-index-name --print-queries 9 | ``` 10 | #### Delete all old unused indexes from the previous script start, which was not earlier than one hour ago 11 | ```bash 12 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --delete-old-indexes 13 | ``` 14 | #### 2. Create new indexes with preservation of old names, but with alter the index and locking on the table 15 | ```bash 16 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --statement-timeout 200 --deadlock-timeout 20 --print-queries 17 | ``` 18 | #### Delete all old unused indexes from the previous script start, which was not earlier than one hour ago 19 | ```bash 20 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --delete-old-indexes 21 | ``` 22 | #### 3. Create new indexes with the new version in the name, without alter the index and locking on the table and deleting old indexes after creating new ones 23 | ```bash 24 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --change-index-name --print-queries --delete-index-after-create 25 | ``` 26 | #### 4. Create new indexes with preservation of old names, but with alter the index and locking on the table and deleting old indexes after creating new ones 27 | ```bash 28 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --statement-timeout 200 --deadlock-timeout 20 --print-queries --delete-index-after-create 29 | ``` 30 | #### 5. Create a new primary key on a table with a table lock 31 | ```bash 32 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --statement-timeout 200 --deadlock-timeout 20 --print-queries --index base.items_pkey 33 | ``` 34 | #### 6. Create a new index with a table lock and delete the old index after creating a new one 35 | ```bash 36 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --statement-timeout 200 --deadlock-timeout 20 --print-queries --delete-index-after-create --index base.items_time_idx 37 | ``` 38 | #### 7. Create new indexes for the tables specified in the schema and deleting old indexes after creating new ones 39 | ```bash 40 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --statement-timeout 200 --deadlock-timeout 20 --print-queries --delete-index-after-create --schema base 41 | ``` 42 | #### 8. Print only statistics and queries, without affecting any data and without using the statistics of the bloat 43 | ```bash 44 | ./pg_reindex.py --host=localhost --dbname=base --user=postgres --print-queries --delete-index-after-create --force --dry-run 45 | ``` 46 | 47 | ### Info 48 | 49 | Optional arguments: 50 | ``` 51 | pg_reindex.py 52 | [-h] [--host HOST] [--port PORT] [--user USER] 53 | [--password PASSWORD] [--dbname DBNAME] 54 | [--schema [SCHEMA [SCHEMA ...]]] 55 | [--exclude-schema [EXCLUDE_SCHEMA [EXCLUDE_SCHEMA ...]]] 56 | [--table [TABLE [TABLE ...]]] 57 | [--exclude-table [EXCLUDE_TABLE [EXCLUDE_TABLE ...]]] 58 | [--index [INDEX [INDEX ...]]] [--dry-run] 59 | [--print-queries] [--force] [--delete-index-after-create] 60 | [--delete-old-indexes] [--pgstattuple-install] 61 | [--change-index-name] 62 | [--minimal-compact-percent MINIMAL_COMPACT_PERCENT] 63 | [--reindex-retry-max-count REINDEX_RETRY_MAX_COUNT] 64 | [--statement-timeout STATEMENT_TIMEOUT] 65 | [--deadlock-timeout DEADLOCK_TIMEOUT] 66 | [--log-level {DEBUG,INFO,ERROR}] 67 | 68 | -h, --help Show this help message and exit 69 | --host HOST A database host. By default localhost. 70 | --port PORT A database port. By default 5432. 71 | --user USER A database user. By default current system user. 72 | --password PASSWORD A password for the user. 73 | --dbname DBNAME A database to process. By default all the user 74 | databses of the instance are processed. 75 | --schema [SCHEMA [SCHEMA ...]] 76 | A schema to process. By default all the schemas of the 77 | specified database are processed. 78 | --exclude-schema [EXCLUDE_SCHEMA [EXCLUDE_SCHEMA ...]] 79 | A schema to exclude from processing. 80 | --table [TABLE [TABLE ...]] 81 | A table to process. By default all the tables of the 82 | specified schema are processed. 83 | --exclude-table [EXCLUDE_TABLE [EXCLUDE_TABLE ...]] 84 | A table to exclude from processing. 85 | --index [INDEX [INDEX ...]] 86 | A index to process. By default all the indexes of the 87 | specified tables are processed. 88 | --dry-run Print statistics only, without affecting any data. 89 | --print-queries Print reindex and drop queries. Useful if you want to 90 | perform manual reindex or drop later. 91 | --force Try to compact indexes that do not meet minimal bloat 92 | requirements. 93 | --delete-index-after-create 94 | Delete old indexes after creating new ones. 95 | --delete-old-indexes Delete all old unused indexes from the previous script 96 | start, which was not earlier than one hour ago. 97 | --pgstattuple-install 98 | Installing the pgstattuple extension. 99 | --change-index-name Create indexes with the new version in the name, 100 | without alter the index and locking. 101 | --minimal-compact-percent MINIMAL_COMPACT_PERCENT 102 | Minimal compact percent. By default 20. 103 | --reindex-retry-max-count REINDEX_RETRY_MAX_COUNT 104 | Reindex retry max count. By default 10. 105 | --statement-timeout STATEMENT_TIMEOUT 106 | Postgres statement timeout, ms. By default 100. 107 | --deadlock-timeout DEADLOCK_TIMEOUT 108 | Postgres deadlock timeout, ms. By default 10. 109 | --log-level {DEBUG,INFO,ERROR} 110 | A log level. By default INFO. 111 | ``` 112 | 113 | ### Author 114 | 115 | Nikolay Vorobev (nvorobev@avito.ru) 116 | 117 | ### License 118 | 119 | MIT 120 | -------------------------------------------------------------------------------- /pg_reindex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # author: @nvorobev 3 | 4 | import os 5 | import re 6 | import sys 7 | import time 8 | import argparse 9 | import getpass 10 | import random 11 | import datetime 12 | import readline 13 | import logging 14 | 15 | import psycopg2 16 | from psycopg2.extensions import quote_ident 17 | 18 | 19 | def format_message(message, color=None): 20 | """ 21 | Message output format 22 | """ 23 | def red(msg): 24 | return "\033[91m{}\033[00m".format(msg) 25 | 26 | def green(msg): 27 | return "\033[92m{}\033[00m".format(msg) 28 | 29 | if not message: 30 | message = '' 31 | 32 | message = str(message).replace('\n', '') 33 | message = message.strip() 34 | 35 | if color == 'red': 36 | return red(message) 37 | elif color == 'green': 38 | return green(message) 39 | else: 40 | return message 41 | 42 | 43 | def prompt(message, suffix=' '): 44 | """ 45 | Returns the user input 46 | """ 47 | prompt_text = "{}{}".format(message, suffix) 48 | 49 | input_value = input(prompt_text) 50 | 51 | if not input_value: 52 | input_value = '' 53 | else: 54 | input_value = re.sub(r'\s', '', input_value).lower() 55 | 56 | return input_value 57 | 58 | 59 | def get_args(params): 60 | """ 61 | Converting a list of arguments to a string, separated by commas 62 | """ 63 | return ', '.join(map(lambda x: "'" + x + "'", params)) 64 | 65 | 66 | def get_database_tables(curs, schema=None, table=None, exclude_schema=None, exclude_table=None): 67 | """ 68 | Getting a list of tables with indexes for further processing 69 | """ 70 | extra_conditions = '' 71 | 72 | if schema: 73 | if len(schema) > 0: 74 | extra_conditions += " and schemaname in ({schema})\n".format( 75 | schema=get_args(schema) 76 | ) 77 | 78 | if exclude_schema: 79 | if len(exclude_schema) > 0: 80 | extra_conditions += " and schemaname not in ({exclude_schema})\n".format( 81 | exclude_schema=get_args(exclude_schema) 82 | ) 83 | 84 | if table: 85 | if len(table) > 0: 86 | extra_conditions = " and schemaname || '.' || tablename in ({table})\n".format( 87 | table=get_args(table) 88 | ) 89 | 90 | if exclude_table: 91 | if len(exclude_table) > 0: 92 | extra_conditions += " and schemaname || '.' || tablename not in ({exclude_table})\n".format( 93 | exclude_table=get_args(exclude_table) 94 | ) 95 | 96 | query = """ 97 | select 98 | sq.schemaname, 99 | sq.tablename 100 | from ( 101 | select 102 | schemaname, 103 | tablename, 104 | pg_indexes_size(quote_ident(schemaname)||'.'||quote_ident(tablename)) as indexes_size 105 | from pg_catalog.pg_tables 106 | where 107 | schemaname !~ 'pg_(temp|toast|catalog).*' and 108 | schemaname !~ '(information_schema|pg_catalog|kill|tmp|pgq|londiste|londiste_undo)' and 109 | tablename !~ '(pg_index|kill)' 110 | {extra_conditions} 111 | ) sq 112 | where 113 | sq.indexes_size > 0 114 | order by 115 | sq.schemaname, 116 | sq.tablename 117 | """.format( 118 | extra_conditions=extra_conditions 119 | ) 120 | 121 | curs.execute(query) 122 | rows = curs.fetchall() 123 | 124 | return rows 125 | 126 | 127 | def get_pgstattuple_schema_name(curs): 128 | """ 129 | Getting the schema where the pgstattuple extension is installed 130 | """ 131 | query = """ 132 | select 133 | n.nspname::text, 134 | e.extversion::numeric 135 | from pg_catalog.pg_extension e 136 | join pg_catalog.pg_namespace as n on 137 | n.oid = e.extnamespace 138 | where 139 | e.extname = 'pgstattuple' 140 | """ 141 | 142 | curs.execute(query) 143 | 144 | try: 145 | r = curs.fetchone() 146 | return [r[0], r[1]] 147 | except: 148 | return [None, None] 149 | 150 | 151 | def advisory_lock(curs, schemaname, tablename): 152 | """ 153 | Table locking for protection against parallel processing 154 | """ 155 | query = """ 156 | select 157 | pg_try_advisory_lock('pg_catalog.pg_class'::regclass::integer, 158 | (quote_ident('{schemaname}') || '.' || quote_ident('{tablename}'))::regclass::integer 159 | )::boolean 160 | """.format( 161 | schemaname=schemaname, 162 | tablename=tablename 163 | ) 164 | 165 | curs.execute(query) 166 | 167 | lock = curs.fetchone()[0] 168 | 169 | if lock: 170 | advisory_locks.append([schemaname, tablename]) 171 | else: 172 | log.info('Skipping processing: another instance is working with table "{}.{}"'.format(schemaname, tablename)) 173 | 174 | return lock 175 | 176 | 177 | def advisory_unlock_all(curs): 178 | """ 179 | Removing all installed locks from all tables 180 | """ 181 | for lock in advisory_locks: 182 | (schemaname, tablename) = lock 183 | 184 | query = """ 185 | select 186 | pg_advisory_unlock('pg_catalog.pg_class'::regclass::integer, 187 | (quote_ident('{schemaname}') || '.' || quote_ident('{tablename}'))::regclass::integer 188 | )::boolean 189 | """.format( 190 | schemaname=schemaname, 191 | tablename=tablename 192 | ) 193 | 194 | curs.execute(query) 195 | 196 | 197 | def get_index_data_list(curs, schemaname, tablename, indexname=None): 198 | """ 199 | Getting indices with the necessary attributes from a given list of indexes or tables 200 | """ 201 | extra_conditions = '' 202 | 203 | if indexname: 204 | if len(indexname) > 0: 205 | extra_conditions = " and schemaname || '.' || indexname in ({indexname})\n".format( 206 | indexname=get_args(indexname) 207 | ) 208 | 209 | query = """ 210 | select 211 | indexname, 212 | tablespace, 213 | indexdef, 214 | regexp_replace(indexdef, e'.* USING (\\\\w+) .*', e'\\\\1') as indmethod, 215 | conname, 216 | case 217 | when contype = 'p' then 'PRIMARY KEY' 218 | when contype = 'u' then 'UNIQUE' 219 | else null 220 | end as contypedef, 221 | ( 222 | select 223 | bool_and( 224 | deptype in ('n', 'a', 'i') and 225 | not (refobjid = indexoid and deptype = 'n') and 226 | not ( 227 | objid = indexoid and deptype = 'i' and 228 | (version < array[9,1] or contype not in ('p', 'u')))) 229 | from pg_catalog.pg_depend 230 | left join pg_catalog.pg_constraint on 231 | pg_catalog.pg_constraint.oid = refobjid 232 | where 233 | (objid = indexoid and classid = pgclassid) or 234 | (refobjid = indexoid and refclassid = pgclassid) 235 | )::integer as allowed, 236 | ( 237 | select 238 | string_to_array(indkey::text, ' ')::int[] @> array[0::int] 239 | from pg_catalog.pg_index 240 | where indexrelid = indexoid 241 | )::integer as is_functional, 242 | condeferrable as is_deferrable, 243 | condeferred as is_deferred 244 | from ( 245 | select 246 | indexname, tablespace, indexdef, 247 | ( 248 | quote_ident(schemaname) || '.' || 249 | quote_ident(indexname))::regclass as indexoid, 250 | 'pg_catalog.pg_class'::regclass as pgclassid, 251 | string_to_array( 252 | regexp_replace( 253 | version(), e'.*PostgreSQL (\\\\d+\\\\.\\\\d+).*', e'\\\\1'), 254 | '.')::integer[] as version 255 | from pg_catalog.pg_indexes 256 | where 257 | schemaname = quote_ident('{schemaname}') and 258 | tablename = quote_ident('{tablename}') and 259 | indexname !~ '^tmp_\\\\d+_\\\\d+' 260 | {extra_conditions} 261 | ) as sq 262 | left join pg_catalog.pg_constraint on 263 | conindid = indexoid and contype in ('p', 'u') 264 | order by 265 | indexname 266 | """.format( 267 | schemaname=schemaname, 268 | tablename=tablename, 269 | extra_conditions=extra_conditions 270 | ) 271 | 272 | curs.execute(query) 273 | rows = curs.fetchall() 274 | 275 | return rows 276 | 277 | 278 | def get_table_by_index(curs, indexname): 279 | """ 280 | Getting the schema name and table by index 281 | """ 282 | query = """ 283 | select 284 | schemaname, 285 | tablename 286 | from pg_catalog.pg_indexes 287 | where 288 | schemaname || '.' || indexname = '{indexname}' 289 | """.format( 290 | indexname=indexname 291 | ) 292 | 293 | curs.execute(query) 294 | 295 | return curs.fetchone() 296 | 297 | 298 | def get_table_by_name(curs, tablename): 299 | """ 300 | Getting the schema name and table from the table 301 | """ 302 | query = """ 303 | select 304 | schemaname, 305 | tablename 306 | from pg_catalog.pg_tables 307 | where 308 | schemaname || '.' || tablename = '{tablename}' 309 | """.format( 310 | tablename=tablename 311 | ) 312 | 313 | curs.execute(query) 314 | 315 | return curs.fetchone() 316 | 317 | 318 | def get_index_size_statistics(curs, schemaname, indexname): 319 | """ 320 | Getting the size of the index and the number of pages 321 | """ 322 | query = """ 323 | select 324 | size, 325 | ceil(size / bs) as page_count 326 | from ( 327 | select 328 | pg_catalog.pg_relation_size((quote_ident('{schemaname}') || '.' || quote_ident('{indexname}'))::regclass) as size, 329 | current_setting('block_size')::real as bs 330 | ) as sq 331 | """.format( 332 | schemaname=schemaname, 333 | indexname=indexname 334 | ) 335 | 336 | curs.execute(query) 337 | 338 | return curs.fetchone() 339 | 340 | 341 | def get_index_bloat_stats(curs, pgstattuple, pgstattuple_ver, schemaname, indexname): 342 | """ 343 | Getting bloat index statistics based on the pgstattuple extension 344 | """ 345 | query = """ 346 | select 347 | case 348 | when avg_leaf_density = 'NaN' then 0 349 | else 350 | round( 351 | (100 * (1 - avg_leaf_density / fillfactor))::numeric, 2 352 | ) 353 | end as free_percent, 354 | case 355 | when avg_leaf_density = 'NaN' then 0 356 | else 357 | ceil( 358 | index_size * (1 - avg_leaf_density / fillfactor) 359 | ) 360 | end as free_space 361 | from ( 362 | select 363 | coalesce( 364 | ( 365 | select ( 366 | regexp_matches( 367 | reloptions::text, e'.*fillfactor=(\\\\d+).*'))[1]), 368 | '90')::real as fillfactor, 369 | pgsi.* 370 | from pg_catalog.pg_class c 371 | join pg_catalog.pg_namespace n on 372 | n.oid = c.relnamespace 373 | cross join {pgstattuple}.pgstatindex({object}) as pgsi 374 | where 375 | c.oid = (quote_ident('{schemaname}') || '.' || quote_ident('{indexname}'))::regclass 376 | ) as oq 377 | """.format( 378 | pgstattuple=pgstattuple, 379 | object='c.oid' if pgstattuple_ver > 1 else "n.nspname || '.' || c.relname", 380 | schemaname=schemaname, 381 | indexname=indexname 382 | ) 383 | 384 | curs.execute(query) 385 | 386 | return curs.fetchone() 387 | 388 | 389 | def get_reindex_query(indexname, indexdef, tablespace, conname): 390 | """ 391 | Getting a request to re-create an index 392 | """ 393 | if args.change_index_name and not conname: 394 | tmp_name = indexname 395 | 396 | digits = re.findall(r'(_\d+)', tmp_name) 397 | if len(digits) > 0: 398 | last = digits[len(digits)-1] 399 | need_replace = tmp_name.index(last) + len(last) == len(tmp_name) 400 | if need_replace: 401 | for d in digits: 402 | if len(d) > 2: 403 | tmp_name = tmp_name.replace(d, '') 404 | 405 | ver_id_search = re.findall(r'_ver[0-9]+$', tmp_name) 406 | 407 | if ver_id_search: 408 | ver_id = ver_id_search[0][4:] 409 | new_ver_id = int(ver_id) + 1 410 | reindex_indexname = re.sub(r'_ver[0-9]+$', '_ver' + str(new_ver_id), tmp_name) 411 | else: 412 | reindex_indexname = tmp_name + '_ver1' 413 | else: 414 | reindex_indexname = '_tmp_' + indexname 415 | 416 | if len(reindex_indexname) > 63 or len(reindex_indexname) == 0: 417 | readline.parse_and_bind('tab: complete') 418 | while len(reindex_indexname) > 63 or len(reindex_indexname) == 0: 419 | if reindex_indexname.find('_tmp_') == 0: 420 | reindex_indexname = '_tmp_{}_{}'.format(int(time.time()), random.randint(1000000000, 9000000000)) 421 | else: 422 | if len(reindex_indexname) > 63: 423 | reindex_indexname = prompt( 424 | "The name length for new index '{reindex_indexname}' exceeds 63 characters ({reindex_indexname_len}).\n" 425 | "Please enter name for new index:".format( 426 | reindex_indexname=reindex_indexname, 427 | reindex_indexname_len=len(reindex_indexname) 428 | ), 429 | suffix='\n>>> ' 430 | ) 431 | else: 432 | reindex_indexname = prompt( 433 | "The name for new index can not be empty!\n" 434 | "Please enter name for new index:", 435 | suffix='\n>>> ' 436 | ) 437 | 438 | reindex_query = indexdef.replace('INDEX ' + indexname, 'INDEX CONCURRENTLY ' + reindex_indexname) 439 | 440 | if tablespace: 441 | reindex_query += ' TABLESPACE ' + tablespace 442 | 443 | reindex_query += ';' 444 | 445 | return [reindex_indexname, reindex_query] 446 | 447 | 448 | def drop_temp_index(curs, schemaname, reindex_indexname): 449 | """ 450 | Delete a new index created to replace the old one 451 | """ 452 | drop_query = """ 453 | DROP INDEX CONCURRENTLY {schemaname}.{reindex_indexname}; 454 | """.format( 455 | schemaname=quote_ident(schemaname, curs), 456 | reindex_indexname=quote_ident(reindex_indexname, curs) 457 | ) 458 | 459 | try: 460 | curs.execute(drop_query) 461 | log.info('Drop temporary index: "{}", done'.format(reindex_indexname)) 462 | except Exception as e: 463 | log.error(format_message(message='Unable drop temporary index "{}", {}'.format(reindex_indexname, e), color='red')) 464 | 465 | 466 | def drop_old_index(curs, drop_query): 467 | """ 468 | Deleting an old index 469 | """ 470 | drop_query = drop_query.strip() 471 | deleted = False 472 | 473 | try: 474 | if not args.dry_run: 475 | curs.execute(drop_query) 476 | deleted = True 477 | if not args.delete_index_after_create: 478 | log.info(drop_query) 479 | except Exception as e: 480 | log.error(format_message(message=e, color='red')) 481 | 482 | return deleted 483 | 484 | 485 | def exist_table(curs, schemaname, tablename): 486 | """ 487 | Checking the table availability in the database 488 | """ 489 | query = """ 490 | select 1 491 | from pg_tables 492 | where 493 | schemaname = '{schemaname}' and 494 | tablename = '{tablename}' 495 | """.format( 496 | schemaname=schemaname, 497 | tablename=tablename 498 | ) 499 | 500 | curs.execute(query) 501 | 502 | return curs.rowcount == 1 503 | 504 | 505 | def drop_old_index_later(curs, drop_query): 506 | """ 507 | Saving a request to delete an old index to delete later 508 | """ 509 | try: 510 | if not exist_table(curs, 'public', 'zdrop_index_later'): 511 | curs.execute(""" 512 | create table if not exists public.zdrop_index_later 513 | ( 514 | drop_query text NOT NULL PRIMARY KEY, 515 | create_time timestamp NOT NULL DEFAULT now() 516 | ); 517 | comment on table public.zdrop_index_later 518 | is 'The table was created by the script pg_reindex.py, it stores information about old indexes that will be deleted.'; 519 | """) 520 | curs.execute("""insert into zdrop_index_later (drop_query) values (%s)""", [str(drop_query.strip())]) 521 | except Exception as e: 522 | log.error(format_message(message=e, color='red')) 523 | 524 | 525 | def drop_old_indexes(curs): 526 | """ 527 | Removing old indexes for which more than an hour ago created new indexes 528 | """ 529 | query = """ 530 | select 531 | format('DROP INDEX CONCURRENTLY %s.%s', quote_ident(n.nspname), quote_ident(c.relname)) as drop_query 532 | from pg_class c 533 | join pg_catalog.pg_namespace n on 534 | n.oid = c.relnamespace 535 | join pg_index i on 536 | i.indexrelid = c.oid 537 | where 538 | c.relkind = 'i' and 539 | ( 540 | c.relname ~ '^_tmp_' or 541 | ( 542 | c.relname ~ '^tmp_\\\\d+_\\\\d+' and 543 | now() - to_timestamp(substring(c.relname, 5, 10)::integer)::timestamp > '1 hour'::interval 544 | ) 545 | ) 546 | order by 547 | c.relname 548 | """ 549 | 550 | curs.execute(query) 551 | rows = curs.fetchall() 552 | 553 | for row in rows: 554 | drop_old_index(curs, row[0]) 555 | 556 | if exist_table(curs, 'public', 'zdrop_index_later'): 557 | 558 | query = """ 559 | select 560 | drop_query 561 | from zdrop_index_later 562 | where 563 | now() - create_time > '1 hour'::interval 564 | order by 565 | create_time 566 | """ 567 | 568 | curs.execute(query) 569 | rows = curs.fetchall() 570 | 571 | for row in rows: 572 | if drop_old_index(curs, row[0]): 573 | curs.execute("""delete from zdrop_index_later where drop_query = %s""", [str(row[0])]) 574 | 575 | curs.execute("""select count(*) from zdrop_index_later""") 576 | row_count = curs.fetchone()[0] 577 | 578 | if row_count == 0: 579 | curs.execute("""drop table if exists zdrop_index_later""") 580 | 581 | 582 | def index_is_valid(curs, schemaname, reindex_indexname): 583 | """ 584 | Validating an index created with CONCURRENTLY 585 | """ 586 | query = """ 587 | select 588 | i.indisvalid 589 | from pg_class c 590 | join pg_index i on 591 | i.indexrelid = c.oid 592 | join pg_namespace ns on 593 | c.relnamespace = ns.oid 594 | where 595 | ns.nspname = '{schemaname}' and 596 | c.relname = '{reindex_indexname}' and 597 | c.relkind = 'i' 598 | """.format( 599 | schemaname=schemaname, 600 | reindex_indexname=reindex_indexname 601 | ) 602 | 603 | isvalid = False 604 | try: 605 | curs.execute(query) 606 | isvalid = curs.fetchone()[0] 607 | except Exception as e: 608 | log.error(format_message(message='Could not check the validity of the index "{}", {}'.format(indexname, e), color='red')) 609 | 610 | if not isvalid: 611 | log.info('Index "{}" did not pass the validity check'.format(indexname)) 612 | 613 | return isvalid 614 | 615 | 616 | def get_alter_drop_index_query(schemaname, tablename, indexname, reindex_indexname, conname, contypedef, is_deferrable, is_deferred): 617 | """ 618 | Getting a request to replace a new index if the index name is stored, and deleting the old one, if the deletion is not deferred 619 | """ 620 | alter_query = None 621 | drop_query = None 622 | 623 | if conname: 624 | conname_options = contypedef + ' USING INDEX ' + indexname 625 | 626 | if is_deferrable: 627 | conname_options += ' DEFERRABLE' 628 | if is_deferred: 629 | conname_options += ' INITIALLY DEFERRED' 630 | 631 | alter_query = """ 632 | BEGIN; 633 | SET LOCAL statement_timeout TO {statement_timeout}; 634 | SET LOCAL deadlock_timeout TO {deadlock_timeout}; 635 | ALTER TABLE {schemaname}.{tablename} DROP CONSTRAINT {conname}; 636 | ALTER INDEX {schemaname}.{reindex_indexname} RENAME TO {indexname}; 637 | ALTER TABLE {schemaname}.{tablename} ADD CONSTRAINT {conname} {conname_options}; 638 | END; 639 | """.format( 640 | statement_timeout=args.statement_timeout, 641 | deadlock_timeout=args.deadlock_timeout, 642 | schemaname=schemaname, 643 | tablename=tablename, 644 | reindex_indexname=reindex_indexname, 645 | indexname=indexname, 646 | conname=conname, 647 | conname_options=conname_options 648 | ) 649 | else: 650 | if args.change_index_name: 651 | drop_query = """ 652 | DROP INDEX CONCURRENTLY {schemaname}.{indexname}; 653 | """.format( 654 | schemaname=schemaname, 655 | indexname=indexname 656 | ) 657 | else: 658 | tmp_index_name = 'tmp_{}_{}'.format(int(time.time()), random.randint(1000000000, 9000000000)) 659 | 660 | alter_query = """ 661 | BEGIN; 662 | SET LOCAL statement_timeout TO {statement_timeout}; 663 | SET LOCAL deadlock_timeout TO {deadlock_timeout}; 664 | ALTER INDEX {schemaname}.{indexname} RENAME TO {tmp_index_name}; 665 | ALTER INDEX {schemaname}.{reindex_indexname} RENAME TO {indexname}; 666 | END; 667 | """.format( 668 | statement_timeout=args.statement_timeout, 669 | deadlock_timeout=args.deadlock_timeout, 670 | schemaname=schemaname, 671 | indexname=indexname, 672 | reindex_indexname=reindex_indexname, 673 | tmp_index_name=tmp_index_name 674 | ) 675 | 676 | drop_query = """ 677 | DROP INDEX CONCURRENTLY {schemaname}.{tmp_index_name}; 678 | """.format( 679 | schemaname=schemaname, 680 | tmp_index_name=tmp_index_name 681 | ) 682 | 683 | return [alter_query, drop_query] 684 | 685 | 686 | def print_queries(reindex_query=None, alter_query=None, drop_query=None): 687 | """ 688 | Displays requests for reindexing, etc. 689 | """ 690 | if reindex_query is not None or alter_query is not None or drop_query is not None: 691 | formatter = logging.Formatter(fmt='%(message)s') 692 | handler.setFormatter(formatter) 693 | screen_handler.setFormatter(formatter) 694 | log.addHandler(handler) 695 | log.addHandler(screen_handler) 696 | 697 | log.info('') 698 | 699 | for sql in [reindex_query, alter_query, drop_query]: 700 | tab = False 701 | if sql: 702 | queries = sql.split(';') 703 | for query in queries: 704 | query = query.strip() 705 | if query: 706 | if not args.delete_index_after_create and query.count('DROP INDEX') > 0: 707 | continue 708 | if (query + ';').count('END;') > 0: 709 | tab = False 710 | query = ' ' + query + ';' if tab else query + ';' 711 | log.info(query) 712 | if query.count('BEGIN;') > 0: 713 | tab = True 714 | 715 | log.info('') 716 | 717 | formatter = logging.Formatter(fmt='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 718 | handler.setFormatter(formatter) 719 | screen_handler.setFormatter(formatter) 720 | log.addHandler(handler) 721 | log.addHandler(screen_handler) 722 | 723 | 724 | def size_pretty(curs, size): 725 | """ 726 | Convert size to readable view 727 | """ 728 | curs.execute('select pg_size_pretty(%s::numeric)', [size]) 729 | 730 | return curs.fetchone()[0] 731 | 732 | 733 | if __name__ == '__main__': 734 | 735 | parser = argparse.ArgumentParser() 736 | parser.add_argument('--host', type=str, action="store", default='localhost', 737 | help='A database host. By default localhost.') 738 | parser.add_argument('--port', type=int, action="store", default=5432, 739 | help='A database port. By default 5432.') 740 | parser.add_argument('--user', type=str, action="store", default=os.getenv('PGUSER', getpass.getuser()), 741 | help='A database user. By default current system user.') 742 | parser.add_argument('--password', type=str, action="store", default=os.getenv('PGPASSWORD'), 743 | help='A password for the user.') 744 | parser.add_argument('--dbname', type=str, action="store", 745 | help='A database to process. By default all the user databses of the instance are processed.') 746 | parser.add_argument('--schema', type=str, nargs='*', action="append", default=[], 747 | help='A schema to process. By default all the schemas of the specified database are processed.') 748 | parser.add_argument('--exclude-schema', type=str, nargs='*', action="append", default=[], 749 | help='A schema to exclude from processing.') 750 | parser.add_argument('--table', type=str, nargs='*', action="append", default=[], 751 | help='A table to process. By default all the tables of the specified schema are processed.') 752 | parser.add_argument('--exclude-table', type=str, nargs='*', action="append", default=[], 753 | help='A table to exclude from processing.') 754 | parser.add_argument('--index', type=str, nargs='*', action="append", default=[], 755 | help='A index to process. By default all the indexes of the specified tables are processed.') 756 | parser.add_argument('--dry-run', action='store_true', default=False, 757 | help='Print statistics only, without affecting any data.') 758 | parser.add_argument('--print-queries', action='store_true', default=False, 759 | help='Print reindex and drop queries. Useful if you want to perform manual reindex or drop later.') 760 | parser.add_argument('--force', action='store_true', default=False, 761 | help='Try to compact indexes that do not meet minimal bloat requirements.') 762 | parser.add_argument('--delete-index-after-create', action='store_true', default=False, 763 | help='Delete old indexes after creating new ones.') 764 | parser.add_argument('--delete-old-indexes', action='store_true', default=False, 765 | help='Delete all old unused indexes from the previous script start, which was not earlier than one hour ago.') 766 | parser.add_argument('--pgstattuple-install', action='store_true', default=False, 767 | help='Installing the pgstattuple extension.') 768 | parser.add_argument('--change-index-name', action='store_true', default=False, 769 | help='Create indexes with the new version in the name, without alter the index and locking.') 770 | parser.add_argument('--minimal-compact-percent', type=int, action="store", default=20, 771 | help='Minimal compact percent. By default 20.') 772 | parser.add_argument('--reindex-retry-max-count', type=int, action="store", default=10, 773 | help='Reindex retry max count. By default 10.') 774 | parser.add_argument('--statement-timeout', type=int, action="store", default=100, 775 | help='Postgres statement timeout, ms. By default 100.') 776 | parser.add_argument('--deadlock-timeout', type=int, action="store", default=10, 777 | help='Postgres deadlock timeout, ms. By default 10.') 778 | parser.add_argument('--log-level', choices=['DEBUG', 'INFO', 'ERROR'], default='INFO', 779 | help='A log level. By default INFO.') 780 | args = parser.parse_args() 781 | 782 | args.schema = [el for elements in args.schema for el in elements] 783 | args.exclude_schema = [el for elements in args.exclude_schema for el in elements] 784 | args.table = ['public.' + el if el.count('.') == 0 else el for elements in args.table for el in elements] 785 | args.exclude_table = [el for elements in args.exclude_table for el in elements] 786 | args.index = ['public.' + el if el.count('.') == 0 else el for elements in args.index for el in elements] 787 | 788 | LEVELS = {'DEBUG': logging.DEBUG, 'INFO': logging.INFO, 'ERROR': logging.ERROR} 789 | 790 | level = LEVELS.get(args.log_level, logging.INFO) 791 | 792 | log_file = 'pg_reindex_{}_{}_{}.log'.format(args.host, args.dbname, int(time.time())) 793 | 794 | formatter = logging.Formatter(fmt='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 795 | 796 | handler = logging.FileHandler(log_file, mode='w') 797 | handler.setFormatter(formatter) 798 | 799 | screen_handler = logging.StreamHandler(stream=sys.stdout) 800 | screen_handler.setFormatter(formatter) 801 | 802 | log = logging.getLogger(__name__) 803 | log.setLevel(level) 804 | log.addHandler(handler) 805 | log.addHandler(screen_handler) 806 | 807 | try: 808 | conn = psycopg2.connect( 809 | 'host={host} port={port} dbname={dbname} {user} {password}'.format( 810 | host=args.host, 811 | port=args.port, 812 | dbname=args.dbname, 813 | user=(' user=' + args.user if args.user else ''), 814 | password=(' password=' + args.password if args.password else '') 815 | ) 816 | ) 817 | except psycopg2.OperationalError as e: 818 | log.error(format_message(message='No connect to server', color='red')) 819 | sys.exit(1) 820 | conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) 821 | curs = conn.cursor() 822 | 823 | if args.delete_old_indexes: 824 | drop_old_indexes(curs) 825 | sys.exit(0) 826 | 827 | log.info('Process started, host: {}, dbname: {}'.format(args.host, args.dbname)) 828 | 829 | free_space_total = 0 830 | free_space_total_plan = 0 831 | 832 | pgstattuple = None 833 | pgstattuple_ver = None 834 | 835 | advisory_locks = list() 836 | 837 | try: 838 | if len(args.index) > 0: 839 | for index in args.index: 840 | index_info = get_table_by_index(curs, index) 841 | if index_info is None: 842 | log.error(format_message(message='Index "{}" not found'.format(index), color='red')) 843 | sys.exit(0) 844 | (schemaname, tablename) = index_info 845 | if schemaname is not None and tablename is not None: 846 | if args.schema.count(schemaname) == 0: 847 | args.schema.append(schemaname) 848 | if args.table.count(schemaname + '.' + tablename) == 0: 849 | args.table.append(schemaname + '.' + tablename) 850 | elif not args.force: 851 | (pgstattuple, pgstattuple_ver) = get_pgstattuple_schema_name(curs) 852 | if pgstattuple is None: 853 | if args.pgstattuple_install: 854 | curs.execute('create extension pgstattuple') 855 | (pgstattuple, pgstattuple_ver) = get_pgstattuple_schema_name(curs) 856 | if pgstattuple is None: 857 | log.error(format_message(message='Skip handling database {}: pgstattuple extension is not found'.format(args.dbname), color='red')) 858 | sys.exit(0) 859 | 860 | if len(args.table) > 0: 861 | for table in args.table: 862 | table_info = get_table_by_name(curs, table) 863 | if table_info is None: 864 | log.error(format_message(message='Table "{}" not found'.format(table), color='red')) 865 | sys.exit(0) 866 | (schemaname, tablename) = table_info 867 | if schemaname is not None and tablename is not None: 868 | if args.schema.count(schemaname) == 0: 869 | args.schema.append(schemaname) 870 | if args.table.count(schemaname + '.' + tablename) == 0: 871 | args.table.append(schemaname + '.' + tablename) 872 | 873 | tables = get_database_tables(curs, args.schema, args.table, args.exclude_schema, args.exclude_table) 874 | 875 | if tables is None: 876 | log.error(format_message(message='No tables to process', color='red')) 877 | sys.exit(0) 878 | 879 | for table in tables: 880 | (schemaname, tablename) = table 881 | 882 | if not exist_table(curs, schemaname, tablename): 883 | log.info('Table "{}.{}" not found'.format(schemaname, tablename)) 884 | continue 885 | 886 | if advisory_lock(curs, schemaname, tablename): 887 | indexes = get_index_data_list(curs, schemaname, tablename, args.index) 888 | 889 | if indexes is None: 890 | continue 891 | 892 | for index in indexes: 893 | (indexname, tablespace, indexdef, indmethod, conname, contypedef, allowed, is_functional, is_deferrable, is_deferred) = index 894 | 895 | (size, page_count) = get_index_size_statistics(curs, schemaname, indexname) 896 | 897 | if page_count <= 5: 898 | log.info('Skipping reindex: "{}.{}", empty or 5 page index'.format(schemaname, indexname)) 899 | continue 900 | 901 | if indmethod != 'btree': 902 | log.info('Skipping reindex: "{}.{}" index is {} a not btree, reindexing is up to you'.format(schemaname, indexname, indmethod)) 903 | continue 904 | 905 | if allowed != 1: 906 | log.info('Skipping reindex: "{}.{}", can not reindex without heavy locks because of its dependencies, reindexing is up to you'.format(schemaname, indexname)) 907 | continue 908 | 909 | if len(args.index) == 0 and not args.force: 910 | (free_percent, free_space) = get_index_bloat_stats(curs, pgstattuple, pgstattuple_ver, schemaname, indexname) 911 | 912 | if free_percent < args.minimal_compact_percent: 913 | log.info('Skipping reindex: "%s.%s", %d%% space to compact from %d%% minimum required' % (schemaname, indexname, free_percent, args.minimal_compact_percent)) 914 | continue 915 | 916 | free_space_total_plan += free_space 917 | 918 | log.info('Bloat stats: "%s.%s" - free_percent %d%%, free_space %s' % (schemaname, indexname, free_percent, size_pretty(curs, free_space))) 919 | 920 | (reindex_indexname, reindex_query) = get_reindex_query(indexname, indexdef, tablespace, conname) 921 | 922 | (alter_query, drop_query) = get_alter_drop_index_query(quote_ident(schemaname, curs), 923 | quote_ident(tablename, curs), 924 | quote_ident(indexname, curs), 925 | quote_ident(reindex_indexname, curs), 926 | quote_ident(conname, curs) if conname else conname, 927 | contypedef, is_deferrable, is_deferred) 928 | 929 | if args.print_queries: 930 | print_queries(reindex_query, alter_query, drop_query) 931 | 932 | if args.dry_run: 933 | continue 934 | 935 | reindex_time = datetime.datetime.now() 936 | 937 | try: 938 | queries = reindex_query.split(';') 939 | for query in queries: 940 | query = query.strip() 941 | if query: 942 | query += ';' 943 | curs.execute(query) 944 | except psycopg2.Error as e: 945 | log.error(format_message(message='{}, {}'.format(e.pgcode, e.pgerror), color='red')) 946 | drop_temp_index(curs, schemaname, reindex_indexname) 947 | continue 948 | 949 | if not index_is_valid(curs, schemaname, reindex_indexname): 950 | drop_temp_index(curs, schemaname, reindex_indexname) 951 | continue 952 | 953 | if is_functional == 1: 954 | try: 955 | query = 'ANALYZE {}.{}'.format(schemaname, tablename) 956 | curs.execute(query) 957 | log.info('{} - done'.format(query)) 958 | except psycopg2.Error as e: 959 | log.error(format_message(message='{}, {}'.format(e.pgcode, e.pgerror), color='red')) 960 | drop_temp_index(curs, schemaname, reindex_indexname) 961 | continue 962 | 963 | locked_alter_attempt = 0 964 | 965 | if alter_query is not None: 966 | while locked_alter_attempt < args.reindex_retry_max_count: 967 | try: 968 | queries = alter_query.split(';') 969 | for query in queries: 970 | query = query.strip() 971 | if query: 972 | query += ';' 973 | curs.execute(query) 974 | break 975 | except psycopg2.Error as e: 976 | curs.execute('ROLLBACK;') 977 | locked_alter_attempt += 1 978 | if e.pgerror.find('canceling statement due to statement timeout') != -1: 979 | log.info('Reindex: "{}.{}", lock retry {}'.format(schemaname, indexname, locked_alter_attempt)) 980 | else: 981 | log.error(format_message(message='{}, {}'.format(e.pgcode, e.pgerror), color='red')) 982 | 983 | if locked_alter_attempt >= args.reindex_retry_max_count: 984 | log.info('Reindex: "{}.{}", unable lock, reindexing is up to you, queries:'.format(schemaname, indexname)) 985 | print_queries(alter_query=alter_query, drop_query=drop_query) 986 | continue 987 | 988 | reindex_time = (datetime.datetime.now() - reindex_time).total_seconds() 989 | 990 | if args.change_index_name and not conname: 991 | (new_size, new_page_count) = get_index_size_statistics(curs, schemaname, reindex_indexname) 992 | else: 993 | (new_size, new_page_count) = get_index_size_statistics(curs, schemaname, indexname) 994 | 995 | free_percent = int(100 * (1 - float(new_size) / size)) 996 | free_space = size - new_size 997 | free_space_total += free_space 998 | 999 | if args.change_index_name and not conname: 1000 | message = 'Reindex: %s.%s, initial size %s pages (%s), has been reduced by %d%% (%s), duration %d seconds. New index name: %s' % ( 1001 | schemaname, indexname, size_pretty(curs, size), page_count, 1002 | free_percent, size_pretty(curs, free_space), reindex_time, reindex_indexname 1003 | ) 1004 | log.info(format_message(message=message, color='green')) 1005 | else: 1006 | message = 'Reindex: %s.%s, initial size %s pages (%s), has been reduced by %d%% (%s), duration %d seconds, attempts %d' % ( 1007 | schemaname, indexname, size_pretty(curs, size), page_count, 1008 | free_percent, size_pretty(curs, free_space), reindex_time, locked_alter_attempt 1009 | ) 1010 | log.info(format_message(message=message, color='green')) 1011 | 1012 | if not conname: 1013 | if args.delete_index_after_create: 1014 | drop_old_index(curs, drop_query) 1015 | elif args.change_index_name: 1016 | drop_old_index_later(curs, drop_query) 1017 | except Exception as e: 1018 | log.error(format_message(message=e, color='red')) 1019 | finally: 1020 | if free_space_total_plan > 0 and args.dry_run: 1021 | log.info(format_message(message='Total has been reduced by {}'.format(size_pretty(curs, free_space_total_plan)), color='green')) 1022 | elif free_space_total > 0: 1023 | log.info(format_message(message='Total has been reduced by {}'.format(size_pretty(curs, free_space_total)), color='green')) 1024 | advisory_unlock_all(curs) 1025 | log.info('Process completed, host: {}, dbname: {}'.format(args.host, args.dbname)) 1026 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | psycopg2 --------------------------------------------------------------------------------