├── .coveragerc ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .gitmodules ├── AUTHORS ├── CHANGELOG ├── CONTRIBUTING ├── INSTALL ├── LICENSE ├── MANIFEST.in ├── README.rst ├── contrib_update.sh ├── pg_analyse ├── __init__.py ├── cli.py ├── formatters.py ├── inspections │ ├── __init__.py │ ├── base.py │ ├── bundled.py │ └── contrib │ │ ├── __init__.py │ │ └── index_health │ │ ├── __init__.py │ │ └── inspections.py ├── settings.py └── toolbox.py ├── pg_analyse_cli.gif ├── setup.cfg ├── setup.py ├── tests ├── conftest.py └── test_module.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = pg_analyse/ 3 | omit = pg_analyse/cli.py 4 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.10", 3.11, 3.12] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install deps 26 | run: | 27 | python -m pip install pytest coverage coveralls psycopg[binary] tabulate 28 | git submodule update --init 29 | - name: Run tests 30 | env: 31 | GITHUB_TOKEN: ${{ secrets.github_token }} 32 | run: | 33 | coverage run --source=pg_analyse -m pytest 34 | coveralls --service=github 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .project 2 | .pydevproject 3 | .idea 4 | .tox 5 | __pycache__ 6 | *.pyc 7 | *.pyo 8 | *.egg-info 9 | *.env* 10 | docs/_build/ 11 | 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pg_analyse/sql/contrib/index_health"] 2 | path = pg_analyse/sql/contrib/index_health 3 | url = https://github.com/mfvanek/pg-index-health-sql.git 4 | update = merge 5 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | pg_analyse authors 2 | ================== 3 | 4 | Created by Igor `idle sign` Starikov. 5 | 6 | 7 | Contributors 8 | ------------ 9 | 10 | lospejos 11 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | pg_analyse changelog 2 | ==================== 3 | 4 | 5 | v0.5.0 [2020-04-28] 6 | ------------------- 7 | + Now supported passing common inspections params (see #2). 8 | * Fixed wrong detection of Duplicated indexes with different collations. 9 | 10 | 11 | v0.4.0 [2020-04-11] 12 | ------------------- 13 | + Added exception handling. 14 | + Added Slowest queries [q_slowest] inspection. 15 | + CLI. Added indent to inspection tables. 16 | * Updated `index_health` contrib. 17 | 18 | 19 | v0.3.0 [2020-03-13] 20 | ------------------- 21 | + Added basic protection from SQL-injections. 22 | 23 | 24 | v0.2.2 [2020-03-12] 25 | ------------------- 26 | * Fix. Repack with SQLs previously missing. 27 | 28 | 29 | v0.2.1 [2020-03-12] 30 | ------------------- 31 | * CLI. Fixed 'run' command without --args option. 32 | 33 | 34 | v0.2.0 [2020-03-11] 35 | ------------------- 36 | + Added support to pass arguments to inspections. 37 | 38 | 39 | v0.1.0 [2020-03-10] 40 | ------------------- 41 | + Basic functionality. -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | pg_analyse contributing 2 | ======================= 3 | 4 | 5 | Submit issues 6 | ------------- 7 | 8 | If you spotted something weird in application behavior or want to propose a feature you are welcome. 9 | 10 | 11 | Write code 12 | ---------- 13 | If you are eager to participate in application development and to work on an existing issue (whether it should 14 | be a bugfix or a feature implementation), fork, write code, and make a pull request right from the forked project page. 15 | 16 | 17 | Spread the word 18 | --------------- 19 | 20 | If you have some tips and tricks or any other words that you think might be of interest for the others — publish it 21 | wherever you find convenient. 22 | 23 | 24 | See also: https://github.com/idlesign/pg_analyse 25 | 26 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | pg_analyse installation 2 | ======================= 3 | 4 | 5 | Python ``pip`` package is required to install ``pg_analyse``. 6 | 7 | 8 | From sources 9 | ------------ 10 | 11 | Use the following command line to install ``pg_analyse`` from sources directory (containing setup.py): 12 | 13 | pip install . 14 | 15 | or 16 | 17 | python setup.py install 18 | 19 | 20 | From PyPI 21 | --------- 22 | 23 | Alternatively you can install ``pg_analyse`` from PyPI: 24 | 25 | pip install pg_analyse 26 | 27 | 28 | Use `-U` flag for upgrade: 29 | 30 | pip install -U pg_analyse 31 | 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020-2025, Igor `idle sign` Starikov 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the pg_analyse nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS 2 | include CHANGELOG 3 | include INSTALL 4 | include LICENSE 5 | include README.rst 6 | 7 | include docs/Makefile 8 | recursive-include docs *.rst 9 | recursive-include docs *.py 10 | recursive-include tests * 11 | recursive-include pg_analyse/sql * 12 | 13 | recursive-exclude * __pycache__ 14 | recursive-exclude * *.py[co] 15 | recursive-exclude * empty 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pg_analyse 2 | ========== 3 | https://github.com/idlesign/pg_analyse 4 | 5 | |release| |lic| |coverage| 6 | 7 | .. |release| image:: https://img.shields.io/pypi/v/pg_analyse.svg 8 | :target: https://pypi.python.org/pypi/pg_analyse 9 | 10 | .. |lic| image:: https://img.shields.io/pypi/l/pg_analyse.svg 11 | :target: https://pypi.python.org/pypi/pg_analyse 12 | 13 | .. |coverage| image:: https://img.shields.io/coveralls/idlesign/pg_analyse/master.svg 14 | :target: https://coveralls.io/r/idlesign/pg_analyse 15 | 16 | 17 | .. image:: https://github.com/idlesign/pg_analyse/blob/master/pg_analyse_cli.gif 18 | 19 | 20 | Description 21 | ----------- 22 | 23 | *Tools to gather useful information from PostgreSQL* 24 | 25 | This package can function both as Python module and as a command line utility. 26 | Command line interface can show gathered information in the form of tables or ``JSON``. 27 | 28 | Use it to gather information manually or in Continuous Integration. 29 | 30 | Can give you some information on: 31 | 32 | * Index health (bloat, duplicates, unused, etc.); 33 | * Tables missing PKs and indexes; 34 | * Slowest queries. 35 | 36 | 37 | .. note:: SQLs used for inspections are available in https://github.com/mfvanek/pg-index-health-sql 38 | 39 | 40 | Requirements 41 | ------------ 42 | 43 | * Python 3.10+ 44 | * psycopg 3 45 | 46 | 47 | Installation 48 | ------------ 49 | 50 | .. code-block:: bash 51 | 52 | ; If you do not have psycopg3 yet, install it as `psycopg` or `psycopg[binary]`. 53 | ; You may also want to install `envbox` to get PG connection settings from .env files. 54 | ; Your distribution may require issuing `pip3` command instead of plain `pip`. 55 | $ pip install psycopg[binary] envbox 56 | 57 | ; If you want to use it just as Python module: 58 | $ pip install pg_analyse 59 | 60 | ; If you want to use it from command line: 61 | $ pip install pg_analyse[cli] 62 | 63 | 64 | Usage 65 | ----- 66 | 67 | Hint 68 | ~~~~ 69 | 70 | One can set ``PG_ANALYSE_DSN`` environment variable instead of explicitly passing DSN 71 | to connect to PostgreSQL. If `envbox `_ is installed this 72 | variable can be defined in ``.env`` file . 73 | 74 | Python module 75 | ~~~~~~~~~~~~~ 76 | 77 | 78 | .. code-block:: python 79 | 80 | from pg_analyse.toolbox import Analyser, analyse_and_format 81 | 82 | analyser = Analyser(dsn='user=test') 83 | 84 | inspections = analyser.run() 85 | inspection = inspections[0] 86 | 87 | print(inspection.alias) 88 | print(inspection.result) 89 | 90 | # Shortcut function is available: 91 | out = analyse_and_format() 92 | 93 | 94 | CLI 95 | ~~~ 96 | 97 | .. code-block:: bash 98 | 99 | ; Show known inspections and descriptions: 100 | $ pg_analyse inspections 101 | 102 | ; Use DSN from the environment variable (see hint above), 103 | ; print out complex values (e.g. sizes) in human-friendly way: 104 | $ pg_analyse run --human 105 | 106 | ; Run certain inspections, override default params. 107 | ; Use "common" keyword to pass params common for all inspections. 108 | $ pg_analyse run --one idx_unused --one idx_bloat --args "idx_bloat:schema=my,bloat_min=20;common:schema=my" 109 | 110 | ; Use explicitly passed DSN: 111 | $ pg_analyse run --dsn "host=myhost.net port=6432 user=test password=xxx sslmode=verify-full sslrootcert=/home/my.pem" 112 | ; Local connection as `postgres` user with password: 113 | $ pg_analyse run --dsn "host=127.0.0.1 user=postgres password=yourpass" 114 | 115 | ; Output analysis result as json (instead of tables): 116 | $ pg_analyse run --fmt json 117 | 118 | 119 | Adding Inspections 120 | ------------------ 121 | 122 | To add a new inspection to ``pg_analyse``: 123 | 124 | 1. Compose SQL for inspection and put it into a file under ``sql/`` directory. 125 | 2. Add a subclass of ``Inspection`` into ``inspections/bundled.py``. Fill in ``alias``, ``sql_name`` attributes (see docstrings in ``Inspection``). 126 | -------------------------------------------------------------------------------- /contrib_update.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Updates already initialized contrib Git submodules 3 | git submodule update --remote --merge 4 | git add pg_analyse/sql/contrib/index_health/ 5 | -------------------------------------------------------------------------------- /pg_analyse/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | VERSION = (0, 5, 0) 3 | """Application version number tuple.""" 4 | 5 | VERSION_STR = '.'.join(map(str, VERSION)) 6 | """Application version number string.""" -------------------------------------------------------------------------------- /pg_analyse/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from functools import partial 3 | from textwrap import wrap, indent 4 | 5 | import click 6 | 7 | from pg_analyse import VERSION_STR 8 | from pg_analyse.formatters import Formatter 9 | from pg_analyse.inspections.base import Inspection 10 | from pg_analyse.toolbox import analyse_and_format, parse_args_string 11 | 12 | 13 | @click.group() 14 | @click.version_option(version=VERSION_STR) 15 | def entry_point(): 16 | """pg_analyse allows you to run various .""" 17 | 18 | 19 | @entry_point.command() 20 | @click.option('--dsn', help='DSN to connect to PG', default='') 21 | @click.option( 22 | '--fmt', 23 | help='Format used for output', 24 | type=click.Choice(Formatter.formatters_all.keys()), 25 | ) 26 | @click.option( 27 | '--one', 28 | help='Inspection name to limit runs', 29 | multiple=True 30 | ) 31 | @click.option( 32 | '--human', 33 | help='Use human friendly values formatting (e.g. sizes)', 34 | is_flag=True 35 | ) 36 | @click.option( 37 | '--args', 38 | help='Arguments to pass to inspections. E.g.: "idx_bloat:schema=my,bloat_min=20;idx_unused:schema=my"', 39 | default='' 40 | ) 41 | def run(dsn, fmt, one, human, args): 42 | """Run analysis.""" 43 | 44 | click.secho(analyse_and_format( 45 | dsn=dsn, 46 | fmt=fmt or '', 47 | only=one, 48 | human=human, 49 | arguments=parse_args_string(args) 50 | )) 51 | 52 | 53 | @entry_point.command() 54 | def inspections(): 55 | """List known inspections.""" 56 | 57 | for inspection in Inspection.inspections_all: 58 | click.secho(f'* {inspection.title} [{inspection.alias}]', fg='blue') 59 | 60 | shift = partial(indent, prefix=' ') 61 | 62 | click.secho(shift('\n'.join(wrap(f'{inspection.__doc__}')))) 63 | click.secho() 64 | 65 | click.secho(shift('Parameters:')) 66 | 67 | shift = partial(indent, prefix=' ') 68 | for key, val in inspection.params.items(): 69 | click.secho(shift(f'{key}: {val}')) 70 | 71 | click.secho() 72 | 73 | 74 | def main(): 75 | entry_point(obj={}) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /pg_analyse/formatters.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | from typing import Type, Dict, List 4 | from textwrap import indent 5 | 6 | if False: # pragma: nocover 7 | from .toolbox import Inspection 8 | 9 | 10 | class Formatter: 11 | """Base inspection result formatter.""" 12 | 13 | alias: str = '' 14 | """Short distinctive name for this formatter.""" 15 | 16 | formatters_all: Dict[str, Type['Formatter']] = {} 17 | """Registry of all known formatters.""" 18 | 19 | def __init__(self, inspection: 'Inspection', *, human: bool = False): 20 | """ 21 | 22 | :param inspection: 23 | :param human: Use human friendly values formatting (e.g. sizes). 24 | 25 | """ 26 | self.inspection = inspection 27 | self.human = human 28 | 29 | def __init_subclass__(cls): 30 | super().__init_subclass__() 31 | 32 | alias = cls.alias 33 | 34 | if alias: 35 | cls.formatters_all[alias] = cls 36 | 37 | @staticmethod 38 | def humanize_size(bytes_size: int) -> str: 39 | """Returns human readable size. 40 | 41 | :param bytes_size: 42 | 43 | """ 44 | if not bytes_size: 45 | return '0 B' 46 | 47 | names = ('B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB') 48 | 49 | name_idx = int(math.floor(math.log(bytes_size, 1024))) 50 | size = round(bytes_size / math.pow(1024, name_idx), 2) 51 | 52 | return '%s %s' % (size, names[name_idx]) 53 | 54 | def _get_rows_processed(self) -> list: 55 | 56 | result = self.inspection.result 57 | 58 | if not result: 59 | return [] 60 | 61 | column_casters = [] 62 | human = self.human 63 | 64 | for name in result.columns: 65 | func = lambda value: value 66 | 67 | if human and 'size' in name: 68 | func = self.humanize_size 69 | 70 | column_casters.append(func) 71 | 72 | out = [] 73 | 74 | for row in result.rows: 75 | out.append([column_casters[idx](chunk) for idx, chunk in enumerate(row)]) 76 | 77 | return out 78 | 79 | def run(self) -> str: # pragma: nocover 80 | """Must format data from self.inspection into a string.""" 81 | raise NotImplementedError 82 | 83 | @classmethod 84 | def wrap(cls, lines: List[str]) -> str: # pragma: nocover 85 | """Must wrap a list into a single string. 86 | 87 | :param lines: Multiple results from self.run. 88 | 89 | """ 90 | raise NotImplementedError 91 | 92 | 93 | class TableFormatter(Formatter): 94 | """Format inspection result as table.""" 95 | 96 | alias: str = 'table' 97 | 98 | def run(self) -> str: 99 | from tabulate import tabulate 100 | 101 | inspection = self.inspection 102 | 103 | columns = [ 104 | column.replace('_', ' ').capitalize() 105 | for column in getattr(inspection.result, 'columns', [])] 106 | 107 | lines = [] 108 | 109 | errors = inspection.errors 110 | 111 | if errors: 112 | lines.append('\n'.join(errors)) 113 | 114 | else: 115 | lines.append(f'{tabulate(self._get_rows_processed(), headers=columns)}') 116 | 117 | return f'{inspection.title} [{inspection.alias}]\n\n' + indent('\n'.join(lines), ' ') 118 | 119 | @classmethod 120 | def wrap(cls, lines: List[str]) -> str: 121 | return '\n\n\n'.join(lines) 122 | 123 | 124 | class JsonFormatter(Formatter): 125 | """Format inspection result as JSON.""" 126 | 127 | alias: str = 'json' 128 | 129 | def run(self) -> str: 130 | inspection = self.inspection 131 | 132 | line = { 133 | 'title': inspection.title, 134 | 'alias': inspection.alias, 135 | 'arguments': inspection.arguments, 136 | 'errors': inspection.errors, 137 | 'result': { 138 | 'rows': self._get_rows_processed(), 139 | 'columns': getattr(inspection.result, 'columns', []), 140 | }, 141 | } 142 | 143 | return json.dumps(line) 144 | 145 | @classmethod 146 | def wrap(cls, lines: List[str]) -> str: 147 | return f"[{','.join(lines)}]" 148 | -------------------------------------------------------------------------------- /pg_analyse/inspections/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Inspection, InspectionResult 2 | from .bundled import * 3 | from .contrib import * 4 | -------------------------------------------------------------------------------- /pg_analyse/inspections/base.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from pathlib import Path 3 | from typing import List, Type, Optional, Dict 4 | 5 | from ..settings import DIR_SQL 6 | 7 | InspectionResult = namedtuple('InspectionResult', ['columns', 'rows']) 8 | 9 | 10 | class Inspection: 11 | """Base class for inspections.""" 12 | 13 | title: str = '' 14 | """Human-friendly inspection title.""" 15 | 16 | alias: str = '' 17 | """Inspection short alias to address it easily.""" 18 | 19 | params: dict = {} 20 | """Parameters accepted by this inspection.""" 21 | 22 | params_aliases: Dict[str, str] = {} 23 | """Param alias mapping name->sqlname.""" 24 | 25 | sql_name: str = '' 26 | """SQL template file name.""" 27 | 28 | sql_dir: Path = DIR_SQL 29 | """SQL template directory.""" 30 | 31 | inspections_all: List[Type['Inspection']] = [] 32 | 33 | def __init_subclass__(cls): 34 | super().__init_subclass__() 35 | 36 | if cls.alias: 37 | cls.inspections_all.append(cls) 38 | 39 | def __init__(self, *, args: Dict[str, str] = None): 40 | 41 | self.title = self.title or self.alias 42 | 43 | self.sql_name = self.sql_name or self.alias 44 | 45 | self.arguments = {**self.params, **(args or {})} 46 | """User supplied arguments to replace defaults.""" 47 | 48 | self.errors: List[str] = [] 49 | """Inspection errors description.""" 50 | 51 | self.result: Optional[InspectionResult] = None 52 | """Inspection run result. Populated runtime.""" 53 | 54 | def _get_sql_dir(self) -> Path: 55 | """Returns SQL directory.""" 56 | return self.sql_dir 57 | 58 | def get_sql_path(self) -> str: 59 | """Returns executed SQL path.""" 60 | return str(self._get_sql_dir() / f'{self.sql_name}.sql') 61 | 62 | def _tpl_read(self) -> str: 63 | """Reads from filesystem SQL template and returns it.""" 64 | with open(self.get_sql_path()) as f: 65 | return f.read() 66 | 67 | def get_sql(self) -> str: 68 | """Returns SQL ready to be executed.""" 69 | 70 | # Here we replace ":var"-like param placeholders 71 | # with "%(var)s"-like acceptable for psycopg2, 72 | # escaping % with %%. 73 | 74 | out = self._tpl_read().replace('%', '%%') 75 | aliases = self.params_aliases 76 | 77 | for name, value in self.arguments.items(): 78 | name_sql = aliases.get(name, name) 79 | out = out.replace(f':{name_sql}', f'%({name})s') 80 | 81 | return out 82 | 83 | 84 | class ContribInspection(Inspection): 85 | """Base class for contributed inspections.""" 86 | 87 | sql_dir: Path = DIR_SQL / 'contrib' 88 | -------------------------------------------------------------------------------- /pg_analyse/inspections/bundled.py: -------------------------------------------------------------------------------- 1 | from .base import Inspection 2 | -------------------------------------------------------------------------------- /pg_analyse/inspections/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | from .index_health import * 2 | -------------------------------------------------------------------------------- /pg_analyse/inspections/contrib/index_health/__init__.py: -------------------------------------------------------------------------------- 1 | from .inspections import * 2 | -------------------------------------------------------------------------------- /pg_analyse/inspections/contrib/index_health/inspections.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Dict 3 | 4 | from ...base import ContribInspection 5 | 6 | 7 | class _IndexHealthInspection(ContribInspection): 8 | """Base check for Index Health contrib. 9 | 10 | https://github.com/mfvanek/pg-index-health-sql 11 | 12 | """ 13 | sql_dir: Path = ContribInspection.sql_dir / 'index_health' / 'sql' 14 | 15 | 16 | class SeqOverflow(_IndexHealthInspection): 17 | """Reveals sequences exhaustion.""" 18 | 19 | title: str = 'Sequences exhaustion' 20 | alias: str = 'seq_exh' 21 | sql_name: str = 'sequence_overflow' 22 | 23 | params: dict = { 24 | 'schema': 'public', 25 | 'left_min': 20, 26 | } 27 | 28 | params_aliases: Dict[str, str] = { 29 | 'schema': 'schema_name_param', 30 | 'left_min': 'remaining_percentage_threshold', 31 | } 32 | 33 | 34 | class IndexesBloated(_IndexHealthInspection): 35 | """Reveals bloated indexes.""" 36 | 37 | title: str = 'Bloating indexes' 38 | alias: str = 'idx_bloat' 39 | sql_name: str = 'bloated_indexes' 40 | 41 | params: dict = { 42 | 'schema': 'public', 43 | 'bloat_min': 50, 44 | } 45 | 46 | params_aliases: Dict[str, str] = { 47 | 'schema': 'schema_name_param', 48 | 'bloat_min': 'bloat_percentage_threshold', 49 | } 50 | 51 | 52 | class IndexesDuplicated(_IndexHealthInspection): 53 | """Reveals duplicated/identical indexes.""" 54 | 55 | title: str = 'Duplicated indexes' 56 | alias: str = 'idx_dub' 57 | sql_name: str = 'duplicated_indexes' 58 | 59 | params: dict = { 60 | 'schema': 'public', 61 | } 62 | 63 | params_aliases: Dict[str, str] = { 64 | 'schema': 'schema_name_param', 65 | } 66 | 67 | 68 | class IndexesMissingForFk(_IndexHealthInspection): 69 | """Reveals foreign keys without indexes.""" 70 | 71 | title: str = 'Foreign keys without indexes' 72 | alias: str = 'idx_fk' 73 | sql_name: str = 'foreign_keys_without_index' 74 | 75 | params: dict = { 76 | 'schema': 'public', 77 | } 78 | 79 | params_aliases: Dict[str, str] = { 80 | 'schema': 'schema_name_param', 81 | } 82 | 83 | 84 | class IndexesBtreeArray(_IndexHealthInspection): 85 | """Reveal B-Tree indexes on array columns.""" 86 | 87 | title: str = 'B-Tree indexes on array columns' 88 | alias: str = 'idx_btree_arr' 89 | sql_name: str = 'btree_indexes_on_array_columns' 90 | 91 | params: dict = { 92 | 'schema': 'public', 93 | } 94 | 95 | params_aliases: Dict[str, str] = { 96 | 'schema': 'schema_name_param', 97 | } 98 | 99 | 100 | class IndexesWithNulls(_IndexHealthInspection): 101 | """Reveals indexes with NULL values.""" 102 | 103 | title: str = 'Indexes with NULLs' 104 | alias: str = 'idx_nulls' 105 | sql_name: str = 'indexes_with_null_values' 106 | 107 | params: dict = { 108 | 'schema': 'public', 109 | } 110 | 111 | params_aliases: Dict[str, str] = { 112 | 'schema': 'schema_name_param', 113 | } 114 | 115 | 116 | class IndexesBoolean(_IndexHealthInspection): 117 | """Reveals indexes on boolean.""" 118 | 119 | title: str = 'Indexes on Boolean' 120 | alias: str = 'idx_bool' 121 | sql_name: str = 'indexes_with_boolean' 122 | 123 | params: dict = { 124 | 'schema': 'public', 125 | } 126 | 127 | params_aliases: Dict[str, str] = { 128 | 'schema': 'schema_name_param', 129 | } 130 | 131 | 132 | class IndexesWithIntersections(_IndexHealthInspection): 133 | """Reveals partially identical (intersected) indexes.""" 134 | 135 | title: str = 'Intersecting indexes' 136 | alias: str = 'idx_intersect' 137 | sql_name: str = 'intersected_indexes' 138 | 139 | params: dict = { 140 | 'schema': 'public', 141 | } 142 | 143 | params_aliases: Dict[str, str] = { 144 | 'schema': 'schema_name_param', 145 | } 146 | 147 | 148 | class IndexesInvalid(_IndexHealthInspection): 149 | """Reveals invalid/broken indexes.""" 150 | 151 | title: str = 'Invalid indexes' 152 | alias: str = 'idx_invalid' 153 | sql_name: str = 'invalid_indexes' 154 | 155 | params: dict = { 156 | 'schema': 'public', 157 | } 158 | 159 | params_aliases: Dict[str, str] = { 160 | 'schema': 'schema_name_param', 161 | } 162 | 163 | 164 | class IndexesUnused(_IndexHealthInspection): 165 | """Reveals unused indexes.""" 166 | 167 | title: str = 'Unused indexes' 168 | alias: str = 'idx_unused' 169 | sql_name: str = 'unused_indexes' 170 | 171 | params: dict = { 172 | 'schema': 'public', 173 | } 174 | 175 | params_aliases: Dict[str, str] = { 176 | 'schema': 'schema_name_param', 177 | } 178 | 179 | 180 | class ConstraintsInvalid(_IndexHealthInspection): 181 | """Reveal not valid constraints.""" 182 | 183 | title: str = 'Not valid constraints' 184 | alias: str = 'constr_invalid' 185 | sql_name: str = 'not_valid_constraints' 186 | 187 | params: dict = { 188 | 'schema': 'public', 189 | } 190 | 191 | params_aliases: Dict[str, str] = { 192 | 'schema': 'schema_name_param', 193 | } 194 | 195 | 196 | class TablesBloated(_IndexHealthInspection): 197 | """Reveals bloated tables.""" 198 | 199 | title: str = 'Bloating tables' 200 | alias: str = 'tbl_bloat' 201 | sql_name: str = 'bloated_tables' 202 | 203 | params: dict = { 204 | 'schema': 'public', 205 | 'bloat_min': 50, 206 | } 207 | 208 | params_aliases: Dict[str, str] = { 209 | 'schema': 'schema_name_param', 210 | 'bloat_min': 'bloat_percentage_threshold', 211 | } 212 | 213 | 214 | class TablesMissingIndexes(_IndexHealthInspection): 215 | """Reveals tables with missing indexes.""" 216 | 217 | title: str = 'Tables lacking indexes' 218 | alias: str = 'tbl_noindex' 219 | sql_name: str = 'tables_with_missing_indexes' 220 | 221 | params: dict = { 222 | 'schema': 'public', 223 | } 224 | 225 | params_aliases: Dict[str, str] = { 226 | 'schema': 'schema_name_param', 227 | } 228 | 229 | 230 | class TablesMissingPk(_IndexHealthInspection): 231 | """Reveals tables missing primary keys.""" 232 | 233 | title: str = 'Tables without Primary Key' 234 | alias: str = 'tbl_nopk' 235 | sql_name: str = 'tables_without_primary_key' 236 | 237 | params: dict = { 238 | 'schema': 'public', 239 | } 240 | 241 | params_aliases: Dict[str, str] = { 242 | 'schema': 'schema_name_param', 243 | } 244 | 245 | 246 | class ColumnsJson(_IndexHealthInspection): 247 | """Reveals columns using JSON type (jsonb advised).""" 248 | 249 | title: str = 'Columns using JSON type' 250 | alias: str = 'col_json' 251 | sql_name: str = 'columns_with_json_type' 252 | 253 | params: dict = { 254 | 'schema': 'public', 255 | } 256 | 257 | params_aliases: Dict[str, str] = { 258 | 'schema': 'schema_name_param', 259 | } 260 | 261 | 262 | class ColumnsSerialPrimary(_IndexHealthInspection): 263 | """Reveals columns using serial types but non-primary or primary+foreign.""" 264 | 265 | title: str = 'Serial types in relation to primary key' 266 | alias: str = 'col_serial' 267 | sql_name: str = 'columns_with_serial_types' 268 | 269 | params: dict = { 270 | 'schema': 'public', 271 | } 272 | 273 | params_aliases: Dict[str, str] = { 274 | 'schema': 'schema_name_param', 275 | } 276 | 277 | 278 | class ColumnsUnconventionalNames(_IndexHealthInspection): 279 | """Reveals columns that have to be enclosed in double-quotes due 280 | to not following naming conventions.""" 281 | 282 | title: str = 'Columns with unconventional names' 283 | alias: str = 'col_unconv' 284 | sql_name: str = 'columns_not_following_naming_convention' 285 | 286 | params: dict = { 287 | 'schema': 'public', 288 | } 289 | 290 | params_aliases: Dict[str, str] = { 291 | 'schema': 'schema_name_param', 292 | } 293 | 294 | 295 | class FkDuplicated(_IndexHealthInspection): 296 | """Reveals duplicated foreign keys.""" 297 | 298 | title: str = 'FK duplicated' 299 | alias: str = 'fk_dub' 300 | sql_name: str = 'duplicated_foreign_keys' 301 | 302 | params: dict = { 303 | 'schema': 'public', 304 | } 305 | 306 | params_aliases: Dict[str, str] = { 307 | 'schema': 'schema_name_param', 308 | } 309 | 310 | 311 | class FkUnmatchedType(_IndexHealthInspection): 312 | """Reveals foreign keys with the constrained column type not matching 313 | the type in the referenced table.""" 314 | 315 | title: str = 'FK unmatched types' 316 | alias: str = 'fk_typematch' 317 | sql_name: str = 'foreign_keys_with_unmatched_column_type' 318 | 319 | params: dict = { 320 | 'schema': 'public', 321 | } 322 | 323 | params_aliases: Dict[str, str] = { 324 | 'schema': 'schema_name_param', 325 | } 326 | 327 | 328 | class FkIntersecting(_IndexHealthInspection): 329 | """Reveals foreign keys with overlapping sets of columns.""" 330 | 331 | title: str = 'FK intersected' 332 | alias: str = 'fk_isect' 333 | sql_name: str = 'intersected_foreign_keys' 334 | 335 | params: dict = { 336 | 'schema': 'public', 337 | } 338 | 339 | params_aliases: Dict[str, str] = { 340 | 'schema': 'schema_name_param', 341 | } 342 | 343 | 344 | class QueriesSlowest(_IndexHealthInspection): 345 | """Reveals slowest queries. Requires the pg_stat_statement extension""" 346 | 347 | title: str = 'Slowest queries' 348 | alias: str = 'q_slowest' 349 | 350 | sql_dir: Path = _IndexHealthInspection.sql_dir / 'ext' 351 | sql_name: str = 'slowest_queries_by_total_execution_time' 352 | 353 | params: dict = { 354 | 'count': 10, 355 | } 356 | 357 | params_aliases: Dict[str, str] = { 358 | 'count': 'limit_count', 359 | } 360 | -------------------------------------------------------------------------------- /pg_analyse/settings.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | DIR_SQL = Path(__file__).parent.absolute() / 'sql' 5 | """Base directory holding SQL templates.""" 6 | 7 | ENV_VAR = 'PG_ANALYSE_DSN' 8 | """Name of environment variable to search PostgreSQL DSN in.""" 9 | -------------------------------------------------------------------------------- /pg_analyse/toolbox.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Set, Dict 2 | 3 | try: 4 | import psycopg 5 | 6 | except ImportError: 7 | import psycopg2 as psycopg 8 | 9 | from .formatters import Formatter, TableFormatter 10 | from .inspections import Inspection, InspectionResult 11 | from .settings import ENV_VAR 12 | 13 | try: # pragma: nocover 14 | from envbox import get_environment 15 | environ = get_environment() 16 | 17 | except ImportError: 18 | from os import environ 19 | 20 | 21 | TypeOnly = Union[List[str], Set[str]] 22 | TypeInspectionsArgs = Dict[str, Dict[str, str]] 23 | 24 | 25 | class Analyser: 26 | """Performs the analysis running known inspections.""" 27 | 28 | def __init__(self, *, dsn: str = ''): 29 | """ 30 | 31 | :param dsn: DSN to connection to PostgreSQL. 32 | 33 | """ 34 | if not dsn: 35 | dsn = environ.get(ENV_VAR, '') 36 | 37 | self.dsn = dsn 38 | 39 | def _sql_exec(self, *, connection, sql: str, params: dict) -> InspectionResult: 40 | 41 | with connection.cursor() as cursor: 42 | cursor.execute(sql, params) 43 | columns = [column.name for column in cursor.description] 44 | rows = cursor.fetchall() 45 | 46 | return InspectionResult(columns, rows) 47 | 48 | def run(self, *, only: TypeOnly = None, arguments: TypeInspectionsArgs = None) -> List[Inspection]: 49 | """Run analysis. 50 | 51 | :param only: Names of inspections we're interested in. 52 | If not set all inspections are run. 53 | 54 | :param arguments: Arguments to pass to inspections. 55 | Pseudo-inspection alias "common" can be used to pass params common for all inspections. 56 | 57 | Example: 58 | { 59 | 'insp_alias': {'param1': 'value', 'param2': 'value'}, 60 | 'common': {'schema': 'nonpublic'}, 61 | } 62 | 63 | """ 64 | results = [] 65 | only = set(only or []) 66 | arguments = arguments or {} 67 | arguments_common = arguments.get('common', {}) 68 | 69 | with psycopg.connect(self.dsn) as connection: 70 | 71 | for inspection_cls in Inspection.inspections_all: 72 | 73 | alias = inspection_cls.alias 74 | 75 | if not only or alias in only: 76 | 77 | inspection = inspection_cls(args={ 78 | **arguments_common, 79 | **arguments.get(alias, {}), 80 | } or None) 81 | 82 | try: 83 | 84 | inspection.result = self._sql_exec( 85 | connection=connection, 86 | sql=inspection.get_sql(), 87 | params=inspection.arguments, 88 | ) 89 | 90 | except Exception as e: 91 | inspection.errors.append(f'{e}') 92 | 93 | results.append(inspection) 94 | 95 | return results 96 | 97 | 98 | def analyse_and_format( 99 | *, 100 | dsn: str = '', 101 | fmt: str = '', 102 | only: TypeOnly = None, 103 | human: bool = False, 104 | arguments: TypeInspectionsArgs = None 105 | ) -> str: 106 | """Performs the analysis and returns results as a string. 107 | 108 | :param dsn: DSN to connection to PostgreSQL. 109 | 110 | :param fmt: Formatter alias to be used to format analysis results. 111 | 112 | :param only: Names of inspections we're interested in. 113 | If not set all inspections are run. 114 | 115 | :param human: Use human friendly values formatting (e.g. sizes). 116 | 117 | :param arguments: Arguments to pass to inspections. 118 | Pseudo-inspection alias "common" can be used to pass params common for all inspections. 119 | 120 | Example: 121 | { 122 | 'insp_alias': {'param1': 'value', 'param2': 'value'}, 123 | 'common': {'schema': 'nonpublic'}, 124 | } 125 | 126 | """ 127 | analyser = Analyser(dsn=dsn) 128 | inspections = analyser.run(only=only, arguments=arguments) 129 | 130 | fmt = fmt or TableFormatter.alias 131 | formatter_cls = Formatter.formatters_all[fmt] 132 | 133 | out = [] 134 | 135 | for inspection in inspections: 136 | out.append(formatter_cls(inspection, human=human).run()) 137 | 138 | return formatter_cls.wrap(out) 139 | 140 | 141 | def parse_args_string(val: str) -> TypeInspectionsArgs: 142 | """Parses inspections args string into a dict. 143 | 144 | :param val: E.g.: idx_bloat:schema=my,bloat_min=20;common:schema=my 145 | 146 | """ 147 | out = {} 148 | 149 | for chunk in val.split(';'): 150 | args = {} 151 | 152 | alias, _, argstr = chunk.strip().partition(':') 153 | argstr = argstr.strip() 154 | 155 | for arg in argstr.split(','): 156 | name, _, val = arg.partition('=') 157 | val = val.strip() 158 | 159 | if val: 160 | args[name.strip()] = val 161 | 162 | if args: 163 | out[alias.strip()] = args 164 | 165 | return out 166 | -------------------------------------------------------------------------------- /pg_analyse_cli.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/idlesign/pg_analyse/abc7c5f7776ffa33f595d107774dbaffcee377b8/pg_analyse_cli.gif -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | release = clean --all sdist bdist_wheel upload 3 | 4 | test = pytest 5 | 6 | [wheel] 7 | universal = 1 8 | 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | import sys 5 | 6 | from setuptools import setup, find_packages 7 | 8 | try: 9 | import psycopg 10 | install_requires = [] 11 | 12 | except ImportError: 13 | 14 | try: 15 | import psycopg2 as psycopg 16 | install_requires = [] 17 | except ImportError: 18 | install_requires = ['psycopg2'] # To not to build on install. 19 | 20 | 21 | PATH_BASE = os.path.dirname(__file__) 22 | 23 | 24 | def check_submodule(): 25 | if not os.path.exists(os.path.join(PATH_BASE, 'pg_analyse', 'sql', 'contrib', 'index_health', 'sql')): 26 | raise Exception('Submodules not initialized. Use "$ git submodule update --init" and retry.') 27 | 28 | 29 | check_submodule() 30 | 31 | 32 | def read_file(fpath): 33 | """Reads a file within package directories.""" 34 | with io.open(os.path.join(PATH_BASE, fpath)) as f: 35 | return f.read() 36 | 37 | 38 | def get_version(): 39 | """Returns version number, without module import (which can lead to ImportError 40 | if some dependencies are unavailable before install.""" 41 | contents = read_file(os.path.join('pg_analyse', '__init__.py')) 42 | version = re.search('VERSION = \(([^)]+)\)', contents) 43 | version = version.group(1).replace(', ', '.').strip() 44 | return version 45 | 46 | 47 | setup( 48 | name='pg_analyse', 49 | version=get_version(), 50 | url='https://github.com/idlesign/pg_analyse', 51 | 52 | description='Tools to gather useful information from PostgreSQL', 53 | long_description=read_file('README.rst'), 54 | license='BSD 3-Clause License', 55 | 56 | author='Igor `idle sign` Starikov', 57 | author_email='idlesign@yandex.ru', 58 | 59 | packages=find_packages(exclude=['tests']), 60 | include_package_data=True, 61 | zip_safe=False, 62 | 63 | install_requires=install_requires, 64 | 65 | extras_require={ 66 | 'cli': [ 67 | 'click', 68 | 'tabulate', 69 | ], 70 | }, 71 | 72 | setup_requires=(['pytest-runner'] if 'test' in sys.argv else []) + [], 73 | 74 | entry_points={ 75 | 'console_scripts': ['pg_analyse = pg_analyse.cli:main'], 76 | }, 77 | 78 | python_requires='>=3.10', 79 | 80 | classifiers=[ 81 | # As in https://pypi.python.org/pypi?:action=list_classifiers 82 | 'Development Status :: 4 - Beta', 83 | 'Operating System :: OS Independent', 84 | 'Programming Language :: Python', 85 | 'Programming Language :: Python :: 3', 86 | 'Programming Language :: Python :: 3.10', 87 | 'Programming Language :: Python :: 3.11', 88 | 'Programming Language :: Python :: 3.12', 89 | 'License :: OSI Approved :: BSD License' 90 | ], 91 | ) 92 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import pytest 4 | 5 | 6 | class PgMock: 7 | 8 | def __init__(self, columns, rows, *, exception=None): 9 | self.columns = columns 10 | self.rows = rows 11 | self.exception = exception 12 | 13 | @property 14 | def description(self): 15 | column_descr = namedtuple('column_descr', ['name']) 16 | return [column_descr(column) for column in self.columns] 17 | 18 | def fetchall(self): 19 | return self.rows 20 | 21 | def connect(self, *arg, **kwargs): 22 | return self 23 | 24 | def cursor(self): 25 | return self 26 | 27 | def execute(self, *args, **kwargs): 28 | exception = self.exception 29 | if exception: 30 | raise ValueError(exception) 31 | return 32 | 33 | def __enter__(self): 34 | return self 35 | 36 | def __exit__(self, exc_type, exc_val, exc_tb): 37 | pass 38 | 39 | 40 | @pytest.fixture 41 | def mock_pg(monkeypatch): 42 | 43 | def mock_pg_(columns, rows, *, exception=None): 44 | mock_ = PgMock(columns, rows, exception=exception) 45 | monkeypatch.setattr('pg_analyse.toolbox.psycopg', mock_) 46 | return mock_ 47 | 48 | return mock_pg_ 49 | -------------------------------------------------------------------------------- /tests/test_module.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import environ 3 | 4 | from pg_analyse.settings import ENV_VAR 5 | from pg_analyse.toolbox import analyse_and_format, parse_args_string 6 | 7 | 8 | def test_parse_args(): 9 | 10 | assert parse_args_string('idx_bloat :schema=my, bloat_min=20 ; idx_unused:schema =my') == { 11 | 'idx_bloat': { 12 | 'schema': 'my', 13 | 'bloat_min': '20', 14 | }, 15 | 'idx_unused': { 16 | 'schema': 'my', 17 | }, 18 | } 19 | 20 | assert parse_args_string('xxxx:') == {} 21 | 22 | 23 | def test_analyse_and_format(mock_pg): 24 | 25 | mock_pg( 26 | ['some_size', 'size'], [ 27 | [123456789, 0], 28 | ]) 29 | 30 | environ[ENV_VAR] = 'host=localhost user=postgres password=postgres' 31 | 32 | out = analyse_and_format() 33 | assert '123456789' in out 34 | 35 | out = analyse_and_format( 36 | fmt='json', 37 | human=True, 38 | only=['idx_unused', 'idx_bloat'], 39 | arguments={'idx_bloat': {'bloat_min': '70'}} 40 | ) 41 | out = json.loads(out) 42 | 43 | assert out == [ 44 | {'title': 'Bloating indexes', 'alias': 'idx_bloat', 45 | 'arguments': {'schema': 'public', 'bloat_min': '70'}, 'errors': [], 46 | 'result': {'rows': [['117.74 MB', '0 B']], 'columns': ['some_size', 'size']} 47 | }, 48 | {'title': 'Unused indexes', 'alias': 'idx_unused', 49 | 'arguments': {'schema': 'public'}, 'errors': [], 50 | 'result': {'rows': [['117.74 MB', '0 B']], 'columns': ['some_size', 'size']}}] 51 | 52 | 53 | def test_common_params(mock_pg): 54 | 55 | mock_pg( 56 | ['some_size', 'size'], [ 57 | [123456789, 0], 58 | ]) 59 | 60 | out = analyse_and_format( 61 | fmt='json', 62 | human=True, 63 | only=['idx_unused', 'idx_bloat'], 64 | arguments={'idx_bloat': {'bloat_min': '70', 'schema': 'overridden'}, 'common': {'schema': 'shared'}} 65 | ) 66 | out = json.loads(out) 67 | assert out == [ 68 | {'title': 'Bloating indexes', 'alias': 'idx_bloat', 'arguments': {'schema': 'overridden', 'bloat_min': '70'}, 69 | 'errors': [], 'result': {'rows': [['117.74 MB', '0 B']], 'columns': ['some_size', 'size']}}, 70 | {'title': 'Unused indexes', 'alias': 'idx_unused', 'arguments': {'schema': 'shared'}, 'errors': [], 71 | 'result': {'rows': [['117.74 MB', '0 B']], 'columns': ['some_size', 'size']}}] 72 | 73 | 74 | def test_exceptions(mock_pg): 75 | 76 | mock_pg([], [], exception='bang!') 77 | 78 | environ[ENV_VAR] = 'host=localhost user=postgres password=postgres' 79 | 80 | out = analyse_and_format() 81 | assert 'bang!' in out 82 | 83 | out = analyse_and_format(fmt='json', only=['idx_unused'],) 84 | out = json.loads(out) 85 | assert out == [{ 86 | 'title': 'Unused indexes', 'alias': 'idx_unused', 87 | 'arguments': {'schema': 'public'}, 88 | 'errors': ['bang!'], 'result': {'rows': [], 'columns': []}}] 89 | 90 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # See http://tox.readthedocs.org/en/latest/examples.html for samples. 2 | [tox] 3 | envlist = 4 | py{310,311,312} 5 | 6 | skip_missing_interpreters = True 7 | 8 | install_command = pip install {opts} {packages} 9 | 10 | [testenv] 11 | commands = 12 | python setup.py test 13 | 14 | deps = 15 | 16 | --------------------------------------------------------------------------------