├── lib ├── pgdexter.rb ├── dexter │ ├── version.rb │ ├── sources │ │ ├── statement_source.rb │ │ ├── log_source.rb │ │ ├── pg_stat_statements_source.rb │ │ └── pg_stat_activity_source.rb │ ├── parsers │ │ ├── sql_log_parser.rb │ │ ├── log_parser.rb │ │ ├── json_log_parser.rb │ │ ├── csv_log_parser.rb │ │ └── stderr_log_parser.rb │ ├── logging.rb │ ├── query.rb │ ├── collector.rb │ ├── processor.rb │ ├── index_creator.rb │ ├── column_resolver.rb │ ├── connection.rb │ ├── table_resolver.rb │ ├── client.rb │ └── indexer.rb └── dexter.rb ├── test ├── support │ ├── queries.sql │ ├── queries.log │ ├── queries.csv │ ├── queries.json │ └── schema.sql ├── connection_test.rb ├── test_helper.rb ├── create_test.rb ├── batching_test.rb ├── indexing_test.rb ├── input_test.rb └── statement_test.rb ├── .gitignore ├── exe └── dexter ├── Gemfile ├── Dockerfile ├── pgdexter.gemspec ├── guides ├── Linux.md └── Hosted-Postgres.md ├── LICENSE.txt ├── Rakefile ├── .github └── workflows │ └── build.yml ├── CHANGELOG.md └── README.md /lib/pgdexter.rb: -------------------------------------------------------------------------------- 1 | require_relative "dexter" 2 | -------------------------------------------------------------------------------- /test/support/queries.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM posts WHERE id = 1; 2 | -------------------------------------------------------------------------------- /lib/dexter/version.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | VERSION = "0.6.3" 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | -------------------------------------------------------------------------------- /exe/dexter: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | # handle interrupts 4 | trap("SIGINT") { abort } 5 | 6 | require "dexter" 7 | Dexter::Client.start 8 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "rake" 6 | gem "minitest", ">= 5" 7 | gem "benchmark-ips", require: false 8 | -------------------------------------------------------------------------------- /test/support/queries.log: -------------------------------------------------------------------------------- 1 | 2017-12-19 02:50:28 UTC [417-3935] dexter@dexter_test LOG: duration: 21.701 ms execute : SELECT * FROM posts WHERE id = $1 2 | 2017-12-19 02:50:28 UTC [417-3935] dexter@dexter_test DETAIL: parameters: $1 = '1' 3 | -------------------------------------------------------------------------------- /test/support/queries.csv: -------------------------------------------------------------------------------- 1 | 2017-12-22 02:51:27.549 UTC,"dexter","dexter_test",21239,"127.0.0.1:52302",5a3c732f.52f7,1,"SELECT",2017-12-22 02:51:27 UTC,15/6,0,LOG,00000,"duration: 18.249 ms execute : SELECT * FROM posts 2 | WHERE id = $1","parameters: $1 = '1'",,,,,,,"app" 3 | -------------------------------------------------------------------------------- /lib/dexter/sources/statement_source.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class StatementSource 3 | def initialize(statements) 4 | @statements = statements 5 | end 6 | 7 | def perform(collector) 8 | @statements.each do |statement| 9 | collector.add(statement, 0, 0, true) 10 | end 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3-alpine 2 | 3 | LABEL org.opencontainers.image.authors="Andrew Kane " 4 | 5 | RUN apk add --update build-base libpq-dev && \ 6 | gem install google-protobuf pg --platform ruby && \ 7 | gem install pgdexter && \ 8 | apk del build-base && \ 9 | rm -rf /var/cache/apk/* 10 | 11 | ENTRYPOINT ["dexter"] 12 | -------------------------------------------------------------------------------- /lib/dexter/parsers/sql_log_parser.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class SqlLogParser < LogParser 3 | def perform(collector) 4 | # TODO support streaming 5 | @logfile.read.split(";").each do |statement| 6 | statement = statement.strip 7 | collector.add(statement, 0) unless statement.empty? 8 | end 9 | end 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /test/support/queries.json: -------------------------------------------------------------------------------- 1 | {"timestamp":"2022-10-14 23:20:37.479 UTC","user":"dexter","dbname":"dexter_test","pid":66145,"remote_host":"[local]","session_id":"634a5135.10261","line_num":3,"ps":"SELECT","session_start":"2022-10-14 23:20:37 UTC","vxid":"4/132","txid":0,"error_severity":"LOG","message":"duration: 12.197 ms execute : SELECT * FROM posts WHERE id = $1","detail":"parameters: $1 = '1'","application_name":"app","backend_type":"client backend","query_id":0} 2 | -------------------------------------------------------------------------------- /lib/dexter/logging.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | module Logging 3 | COLOR_CODES = { 4 | red: 31, 5 | green: 32, 6 | yellow: 33, 7 | cyan: 36 8 | } 9 | 10 | def output 11 | $dexter_output || $stdout 12 | end 13 | 14 | def log(message = "") 15 | output.puts(message) unless $log_level == "error" 16 | end 17 | 18 | def colorize(message, color) 19 | if output.tty? 20 | "\e[#{COLOR_CODES[color]}m#{message}\e[0m" 21 | else 22 | message 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/dexter/sources/log_source.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class LogSource 3 | def initialize(logfile, input_format) 4 | @log_parser = 5 | case input_format 6 | when "csv" 7 | CsvLogParser.new(logfile) 8 | when "json" 9 | JsonLogParser.new(logfile) 10 | when "sql" 11 | SqlLogParser.new(logfile) 12 | else 13 | StderrLogParser.new(logfile) 14 | end 15 | @stdin = logfile == STDIN 16 | end 17 | 18 | def perform(collector) 19 | @log_parser.perform(collector) 20 | end 21 | 22 | def stdin? 23 | @stdin 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/dexter/parsers/log_parser.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class LogParser 3 | REGEX = /duration: (\d+\.\d+) ms (statement|execute [^:]+): (.+)/ 4 | 5 | def initialize(logfile) 6 | @logfile = logfile 7 | end 8 | 9 | private 10 | 11 | def add_parameters(active_line, details) 12 | if details.start_with?("parameters: ") 13 | params = Hash[details[12..-1].split(", ").map { |s| s.split(" = ", 2) }] 14 | 15 | # make sure parsing was successful 16 | unless params.value?(nil) 17 | params.each do |k, v| 18 | active_line.sub!(k, v) 19 | end 20 | end 21 | end 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/dexter/parsers/json_log_parser.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class JsonLogParser < LogParser 3 | FIRST_LINE_REGEX = /\A.+/ 4 | 5 | def perform(collector) 6 | @logfile.each_line do |line| 7 | row = JSON.parse(line.chomp) 8 | if (m = REGEX.match(row["message"])) 9 | # replace first line with match 10 | # needed for multiline queries 11 | active_line = row["message"].sub(FIRST_LINE_REGEX, m[3]) 12 | 13 | add_parameters(active_line, row["detail"]) if row["detail"] 14 | collector.add(active_line, m[1].to_f) 15 | end 16 | end 17 | rescue JSON::ParserError => e 18 | raise Error, "ERROR: #{e.message}" 19 | ensure 20 | @logfile.close 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/dexter/parsers/csv_log_parser.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class CsvLogParser < LogParser 3 | FIRST_LINE_REGEX = /\A.+/ 4 | 5 | def perform(collector) 6 | CSV.new(@logfile.to_io).each do |row| 7 | message = row[13] 8 | detail = row[14] 9 | 10 | if (m = REGEX.match(message)) 11 | # replace first line with match 12 | # needed for multiline queries 13 | active_line = message.sub(FIRST_LINE_REGEX, m[3]) 14 | 15 | add_parameters(active_line, detail) if detail 16 | collector.add(active_line, m[1].to_f) 17 | end 18 | end 19 | rescue CSV::MalformedCSVError => e 20 | raise Error, "ERROR: #{e.message}" 21 | ensure 22 | @logfile.close 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /pgdexter.gemspec: -------------------------------------------------------------------------------- 1 | require_relative "lib/dexter/version" 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = "pgdexter" 5 | spec.version = Dexter::VERSION 6 | spec.summary = "The automatic indexer for Postgres" 7 | spec.homepage = "https://github.com/ankane/dexter" 8 | spec.license = "MIT" 9 | 10 | spec.author = "Andrew Kane" 11 | spec.email = "andrew@ankane.org" 12 | 13 | spec.files = Dir["*.{md,txt}", "{lib,exe}/**/*"] 14 | spec.require_path = "lib" 15 | 16 | spec.bindir = "exe" 17 | spec.executables = ["dexter"] 18 | 19 | spec.required_ruby_version = ">= 3" 20 | 21 | spec.add_dependency "csv" 22 | spec.add_dependency "pg", ">= 1" 23 | spec.add_dependency "pg_query", "~> 6" 24 | spec.add_dependency "slop", ">= 4.10.1" 25 | end 26 | -------------------------------------------------------------------------------- /lib/dexter/parsers/stderr_log_parser.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class StderrLogParser < LogParser 3 | LINE_SEPARATOR = ": ".freeze 4 | DETAIL_LINE = "DETAIL: ".freeze 5 | 6 | def perform(collector) 7 | active_line = nil 8 | duration = nil 9 | 10 | @logfile.each_line do |line| 11 | if active_line 12 | if line.include?(DETAIL_LINE) 13 | add_parameters(active_line, line.chomp.split(DETAIL_LINE)[1]) 14 | elsif line.include?(LINE_SEPARATOR) 15 | collector.add(active_line, duration) 16 | active_line = nil 17 | else 18 | active_line << line 19 | end 20 | end 21 | 22 | if !active_line && (m = REGEX.match(line.chomp)) 23 | duration = m[1].to_f 24 | active_line = m[3] 25 | end 26 | end 27 | collector.add(active_line, duration) if active_line 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/dexter/sources/pg_stat_statements_source.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class PgStatStatementsSource 3 | def initialize(connection) 4 | @connection = connection 5 | end 6 | 7 | def perform(collector) 8 | stat_statements.each do |row| 9 | collector.add(row["query"], row["duration_ms"].to_f, row["calls"].to_i) 10 | end 11 | end 12 | 13 | # could group, sum, and filter min_time/min_calls in SQL, but keep simple for now 14 | def stat_statements 15 | sql = <<~SQL 16 | SELECT 17 | query, 18 | total_plan_time + total_exec_time AS duration_ms, 19 | calls 20 | FROM 21 | pg_stat_statements 22 | INNER JOIN 23 | pg_database ON pg_database.oid = pg_stat_statements.dbid 24 | WHERE 25 | datname = current_database() 26 | ORDER BY 27 | 1 28 | SQL 29 | @connection.execute(sql) 30 | rescue PG::UndefinedTable => e 31 | raise Error, e.message 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /guides/Linux.md: -------------------------------------------------------------------------------- 1 | # Linux Packages 2 | 3 | - [Ubuntu](#ubuntu) 4 | - [Debian](#debian) 5 | 6 | ### Ubuntu 7 | 8 | ```sh 9 | wget -qO- https://dl.packager.io/srv/pghero/dexter/key | sudo apt-key add - 10 | sudo wget -O /etc/apt/sources.list.d/dexter.list \ 11 | https://dl.packager.io/srv/pghero/dexter/master/installer/ubuntu/$(. /etc/os-release && echo $VERSION_ID).repo 12 | sudo apt-get update 13 | sudo apt-get -y install dexter 14 | ``` 15 | 16 | Supports Ubuntu 22.04 (Jammy) and 24.04 (Noble) 17 | 18 | ### Debian 19 | 20 | ```sh 21 | sudo apt-get -y install apt-transport-https 22 | wget -qO- https://dl.packager.io/srv/pghero/dexter/key | sudo apt-key add - 23 | sudo wget -O /etc/apt/sources.list.d/dexter.list \ 24 | https://dl.packager.io/srv/pghero/dexter/master/installer/debian/$(. /etc/os-release && echo $VERSION_ID).repo 25 | sudo apt-get update 26 | sudo apt-get -y install dexter 27 | ``` 28 | 29 | Supports Debian 11 (Bullseye) and 12 (Bookworm) 30 | 31 | ## Credits 32 | 33 | :heart: Made possible by [Packager](https://packager.io/) 34 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2025 Andrew Kane 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /test/connection_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ConnectionTest < Minitest::Test 4 | def test_flag 5 | assert_connection "-d", "dexter_test" 6 | end 7 | 8 | def test_string 9 | assert_connection "dbname=dexter_test" 10 | end 11 | 12 | def test_url_postgres 13 | assert_connection "postgres:///dexter_test" 14 | end 15 | 16 | def test_url_postgresql 17 | assert_connection "postgresql:///dexter_test" 18 | end 19 | 20 | def test_host 21 | assert_connection_error "could not translate host name", "-h", "bad" 22 | end 23 | 24 | def test_port 25 | assert_connection_error "5433", "-p", "5433" 26 | end 27 | 28 | def test_user 29 | assert_connection_error (/(role|user) "bad"/), "-U", "bad" 30 | end 31 | 32 | private 33 | 34 | def assert_connection(*args) 35 | output = run_command(*args, "-s", "SELECT 1", add_conninfo: false) 36 | assert_match "No new indexes found", output 37 | end 38 | 39 | def assert_connection_error(expected, *args) 40 | error = assert_raises(Dexter::Error) do 41 | assert_connection(*args) 42 | end 43 | assert_match expected, error.message 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/dexter/query.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class Query 3 | attr_reader :statement, :fingerprint, :total_time, :calls, :plans 4 | attr_accessor :tables, :missing_tables, :new_cost, :indexes, :suggest_index, :pass1_indexes, :pass2_indexes, :pass3_indexes, :candidate_tables, :tables_from_views, :index_mapping, :columns, :candidate_columns 5 | 6 | def initialize(statement, fingerprint = nil, total_time: nil, calls: nil) 7 | @statement = statement 8 | @fingerprint = fingerprint 9 | @total_time = total_time 10 | @calls = calls 11 | @plans = [] 12 | @tables_from_views = [] 13 | @candidate_tables = [] 14 | @columns = [] 15 | @candidate_columns = [] 16 | end 17 | 18 | def parser_result 19 | unless defined?(@parser_result) 20 | @parser_result = PgQuery.parse(statement) rescue nil 21 | end 22 | @parser_result 23 | end 24 | 25 | def tree 26 | parser_result.tree 27 | end 28 | 29 | def fully_analyzed? 30 | plans.size >= 3 31 | end 32 | 33 | def costs 34 | plans.map { |plan| plan["Total Cost"] } 35 | end 36 | 37 | def initial_cost 38 | costs[0] 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | Rake::TestTask.new(:test) do |t| 5 | t.libs << "test" 6 | t.libs << "lib" 7 | t.test_files = FileList["test/**/*_test.rb"] 8 | end 9 | 10 | task default: :test 11 | 12 | namespace :docker do 13 | task :build do 14 | require_relative "lib/dexter/version" 15 | 16 | system "docker build --pull --no-cache -t ankane/dexter:latest -t ankane/dexter:v#{Dexter::VERSION} .", exception: true 17 | end 18 | 19 | task :release do 20 | require_relative "lib/dexter/version" 21 | 22 | system "docker buildx build --push --pull --no-cache --platform linux/amd64,linux/arm64 -t ankane/dexter:latest -t ankane/dexter:v#{Dexter::VERSION} .", exception: true 23 | end 24 | end 25 | 26 | namespace :bench do 27 | task :find_columns do 28 | require "benchmark/ips" 29 | require "dexter" 30 | 31 | resolver = Dexter::ColumnResolver.new(nil, [], log_level: nil) 32 | query = Dexter::Query.new("SELECT * FROM posts WHERE user_id = 1 ORDER BY blog_id LIMIT 1000") 33 | Benchmark.ips do |x| 34 | x.report("find_columns") do 35 | resolver.send(:find_columns, query.tree) 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/dexter.rb: -------------------------------------------------------------------------------- 1 | # dependencies 2 | require "pg" 3 | require "pg_query" 4 | require "slop" 5 | 6 | # stdlib 7 | require "csv" 8 | require "json" 9 | require "set" 10 | require "time" 11 | 12 | # modules 13 | require_relative "dexter/logging" 14 | require_relative "dexter/client" 15 | require_relative "dexter/collector" 16 | require_relative "dexter/column_resolver" 17 | require_relative "dexter/connection" 18 | require_relative "dexter/index_creator" 19 | require_relative "dexter/indexer" 20 | require_relative "dexter/processor" 21 | require_relative "dexter/query" 22 | require_relative "dexter/table_resolver" 23 | require_relative "dexter/version" 24 | 25 | # parsers 26 | require_relative "dexter/parsers/log_parser" 27 | require_relative "dexter/parsers/csv_log_parser" 28 | require_relative "dexter/parsers/json_log_parser" 29 | require_relative "dexter/parsers/sql_log_parser" 30 | require_relative "dexter/parsers/stderr_log_parser" 31 | 32 | # sources 33 | require_relative "dexter/sources/log_source" 34 | require_relative "dexter/sources/pg_stat_activity_source" 35 | require_relative "dexter/sources/pg_stat_statements_source" 36 | require_relative "dexter/sources/statement_source" 37 | 38 | module Dexter 39 | class Error < StandardError; end 40 | end 41 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | fail-fast: false 8 | matrix: 9 | include: 10 | - postgres: 17 11 | ruby: 3.4 12 | - postgres: 16 13 | ruby: 3.3 14 | - postgres: 15 15 | ruby: 3.2 16 | - postgres: 14 17 | ruby: 3.1 18 | - postgres: 13 19 | ruby: "3.0" 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: ruby/setup-ruby@v1 23 | with: 24 | ruby-version: ${{ matrix.ruby }} 25 | bundler-cache: true 26 | - uses: ankane/setup-postgres@v1 27 | with: 28 | postgres-version: ${{ matrix.postgres }} 29 | database: dexter_test 30 | dev-files: true 31 | config: | 32 | shared_preload_libraries = 'pg_stat_statements' 33 | - run: | 34 | cd /tmp 35 | curl -L https://github.com/HypoPG/hypopg/archive/1.4.2.tar.gz | tar xz 36 | cd hypopg-1.4.2 37 | make 38 | export PG_CONFIG=`which pg_config` 39 | sudo --preserve-env=PG_CONFIG make install 40 | - run: bundle exec rake test 41 | -------------------------------------------------------------------------------- /lib/dexter/collector.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class Collector 3 | def initialize(min_time:, min_calls:) 4 | @top_queries = {} 5 | @new_queries = Set.new 6 | @mutex = Mutex.new 7 | @min_time = min_time * 60000 # convert minutes to ms 8 | @min_calls = min_calls 9 | end 10 | 11 | def add(query, total_time, calls = 1, keep_all = false) 12 | fingerprint = PgQuery.fingerprint(query) rescue nil 13 | fingerprint ||= "unknown" if keep_all 14 | return if fingerprint.nil? 15 | 16 | @top_queries[fingerprint] ||= {calls: 0, total_time: 0} 17 | @top_queries[fingerprint][:calls] += calls 18 | @top_queries[fingerprint][:total_time] += total_time 19 | @top_queries[fingerprint][:query] = query 20 | @mutex.synchronize do 21 | @new_queries << fingerprint 22 | end 23 | end 24 | 25 | def fetch_queries 26 | new_queries = nil 27 | 28 | @mutex.synchronize do 29 | new_queries = @new_queries.dup 30 | @new_queries.clear 31 | end 32 | 33 | queries = [] 34 | @top_queries.each do |fingerprint, query| 35 | if new_queries.include?(fingerprint) && query[:total_time] >= @min_time && query[:calls] >= @min_calls 36 | queries << Query.new(query[:query], fingerprint, total_time: query[:total_time], calls: query[:calls]) 37 | end 38 | end 39 | 40 | queries 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /guides/Hosted-Postgres.md: -------------------------------------------------------------------------------- 1 | # Hosted Postgres 2 | 3 | Some hosted providers like Heroku do not support the HypoPG extension, which Dexter needs to run. Hopefully this will change with time. For now, we can spin up a separate database instance to run Dexter. It’s not super convenient, but can be useful to do from time to time. 4 | 5 | ### Install Postgres, HypoPG, and Dexter 6 | 7 | Ubuntu 8 | 9 | ```sh 10 | sudo apt-get install -y postgresql-common 11 | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y 12 | sudo apt-get install -y postgresql-17 postgresql-17-hypopg 13 | sudo service postgresql start 14 | sudo -u postgres createuser $(whoami) -s 15 | sudo apt-get install -y build-essential libpq-dev ruby ruby-dev 16 | sudo gem install pgdexter 17 | ``` 18 | 19 | Mac 20 | 21 | ```sh 22 | brew install postgresql@17 hypopg dexter 23 | ``` 24 | 25 | ### Download logs 26 | 27 | #### Heroku 28 | 29 | Production-tier databases only 30 | 31 | ```sh 32 | heroku logs -p postgres > postgresql.log 33 | ``` 34 | 35 | ### Dump and restore 36 | 37 | We recommend creating a new instance from a snapshot for the dump to avoid affecting customers. 38 | 39 | ```sh 40 | pg_dump -v -j 8 -Fd -f /tmp/newout.dir 41 | ``` 42 | 43 | Then shutdown the dump instance. Restore with: 44 | 45 | ```sh 46 | createdb dexter_restore 47 | pg_restore -v -j 8 -x -O --format=d -d dexter_restore /tmp/newout.dir/ 48 | ``` 49 | 50 | ### Run Dexter 51 | 52 | ```sh 53 | dexter dexter_restore postgresql.log* --analyze 54 | ``` 55 | 56 | :tada: 57 | -------------------------------------------------------------------------------- /lib/dexter/sources/pg_stat_activity_source.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class PgStatActivitySource 3 | def initialize(connection) 4 | @connection = connection 5 | end 6 | 7 | def perform(collector) 8 | previous_queries = {} 9 | 10 | 10.times do 11 | active_queries = {} 12 | processed_queries = {} 13 | 14 | stat_activity.each do |row| 15 | if row["state"] == "active" 16 | active_queries[row["id"]] = row 17 | else 18 | collector.add(row["query"], row["duration_ms"].to_f) 19 | processed_queries[row["id"]] = true 20 | end 21 | end 22 | 23 | # store queries after they complete 24 | previous_queries.each do |id, row| 25 | if !active_queries[id] && !processed_queries[id] 26 | collector.add(row["query"], row["duration_ms"].to_f) 27 | end 28 | end 29 | 30 | previous_queries = active_queries 31 | 32 | sleep($dexter_test ? 0 : 0.1) 33 | end 34 | end 35 | 36 | def stat_activity 37 | sql = <<~SQL 38 | SELECT 39 | pid || ':' || COALESCE(query_start, xact_start) AS id, 40 | query, 41 | state, 42 | EXTRACT(EPOCH FROM NOW() - COALESCE(query_start, xact_start)) * 1000.0 AS duration_ms 43 | FROM 44 | pg_stat_activity 45 | WHERE 46 | datname = current_database() 47 | AND pid != pg_backend_pid() 48 | ORDER BY 49 | 1 50 | SQL 51 | @connection.execute(sql) 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "bundler/setup" 2 | Bundler.require(:default) 3 | require "minitest/autorun" 4 | require "minitest/pride" 5 | require "stringio" 6 | 7 | $dexter_test = true 8 | $url = "postgres:///dexter_test" 9 | $conn = PG::Connection.new($url) 10 | $conn.exec("SET client_min_messages = warning") 11 | $conn.exec(File.read(File.expand_path("support/schema.sql", __dir__))) 12 | 13 | class Minitest::Test 14 | def assert_index(statement, index, *args) 15 | assert_output "Index found: #{index}", "-s", statement, "--log-level", "debug2", *args 16 | end 17 | 18 | def assert_no_index(statement, *args, reason: nil) 19 | output = run_command("-s", statement, "--log-level", "debug2", *args) 20 | assert_match "No new indexes found", output 21 | assert_match reason, output if reason 22 | end 23 | 24 | def run_command(*args, add_conninfo: true) 25 | $dexter_output = StringIO.new(+"") 26 | args.unshift($url) if add_conninfo 27 | client = Dexter::Client.new(args) 28 | ex = nil 29 | begin 30 | client.perform 31 | rescue => e 32 | ex = e 33 | end 34 | stdout = $dexter_output.string 35 | puts stdout if ENV["VERBOSE"] 36 | raise ex if ex 37 | stdout 38 | end 39 | 40 | def assert_output(expected, *args) 41 | assert_match expected, run_command(*args) 42 | end 43 | 44 | def assert_error(expected, *args) 45 | error = assert_raises(Dexter::Error) do 46 | run_command(*args) 47 | end 48 | assert_match expected, error.message 49 | end 50 | 51 | def server_version 52 | @server_version ||= execute("SHOW server_version_num").first["server_version_num"].to_i / 10000 53 | end 54 | 55 | def execute(statement) 56 | $conn.exec(statement) 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /test/support/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION IF NOT EXISTS hstore; 2 | CREATE EXTENSION IF NOT EXISTS hypopg; 3 | 4 | DROP TABLE IF EXISTS posts CASCADE; 5 | CREATE TABLE posts ( 6 | id int, 7 | blog_id int, 8 | user_id int, 9 | json json, 10 | jsonb jsonb, 11 | hstore hstore, 12 | point point, 13 | indexed int 14 | ); 15 | INSERT INTO posts (id, blog_id, user_id, indexed) SELECT n, n % 1000, n % 10, n FROM generate_series(1, 100000) n; 16 | CREATE INDEX ON posts (indexed); 17 | CREATE VIEW posts_view AS SELECT id AS view_id FROM posts; 18 | CREATE MATERIALIZED VIEW posts_materialized AS SELECT * FROM posts; 19 | ANALYZE posts; 20 | 21 | DROP TABLE IF EXISTS blogs; 22 | CREATE TABLE blogs ( 23 | id int PRIMARY KEY 24 | ); 25 | INSERT INTO blogs (id) SELECT n FROM generate_series(1, 1000) n; 26 | ANALYZE blogs; 27 | 28 | DROP TABLE IF EXISTS events CASCADE; 29 | CREATE TABLE events ( 30 | id int, 31 | blog_id int 32 | ) PARTITION BY HASH (blog_id); 33 | CREATE TABLE events_0 PARTITION OF events FOR VALUES WITH (MODULUS 3, REMAINDER 0); 34 | CREATE TABLE events_1 PARTITION OF events FOR VALUES WITH (MODULUS 3, REMAINDER 1); 35 | CREATE TABLE events_2 PARTITION OF events FOR VALUES WITH (MODULUS 3, REMAINDER 2); 36 | INSERT INTO events (id, blog_id) SELECT n, n FROM generate_series(1, 100000) n; 37 | ANALYZE events; 38 | 39 | DROP SCHEMA IF EXISTS "Bar" CASCADE; 40 | CREATE SCHEMA "Bar"; 41 | CREATE TABLE "Bar"."Foo"("Id" int); 42 | INSERT INTO "Bar"."Foo" SELECT * FROM generate_series(1, 100000); 43 | ANALYZE "Bar"."Foo"; 44 | 45 | CREATE EXTENSION IF NOT EXISTS postgres_fdw; 46 | CREATE SERVER IF NOT EXISTS other FOREIGN DATA WRAPPER postgres_fdw; 47 | DROP FOREIGN TABLE IF EXISTS comments; 48 | CREATE FOREIGN TABLE comments (post_id int) SERVER other; 49 | -------------------------------------------------------------------------------- /test/create_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class CreateTest < Minitest::Test 4 | def teardown 5 | execute "DROP INDEX IF EXISTS posts_id_idx" 6 | super 7 | end 8 | 9 | def test_create 10 | expected = %{Creating index: CREATE INDEX CONCURRENTLY ON "public"."posts" ("id")} 11 | assert_output expected, "-s", "SELECT * FROM posts WHERE id = 1", "--create" 12 | end 13 | 14 | def test_tablespace 15 | expected = %{Creating index: CREATE INDEX CONCURRENTLY ON "public"."posts" ("id") TABLESPACE "pg_default"} 16 | assert_output expected, "-s", "SELECT * FROM posts WHERE id = 1", "--create", "--tablespace", "pg_default" 17 | end 18 | 19 | def test_non_concurrently 20 | expected = %{Creating index: CREATE INDEX ON "public"."posts" ("id")} 21 | assert_output expected, "-s", "SELECT * FROM posts WHERE id = 1", "--create", "--non-concurrently" 22 | end 23 | 24 | def test_partitioned_table 25 | expected = %{cannot create index on partitioned table "events" concurrently} 26 | assert_error expected, "-s", "SELECT * FROM events WHERE blog_id = 1", "--create" 27 | end 28 | 29 | def test_partitioned_table_non_concurrently 30 | expected = %{Creating index: CREATE INDEX ON "public"."events" ("blog_id")} 31 | assert_output expected, "-s", "SELECT * FROM events WHERE blog_id = 1", "--create", "--non-concurrently" 32 | ensure 33 | execute "DROP INDEX IF EXISTS events_blog_id_idx" 34 | end 35 | 36 | def test_partition 37 | expected = %{Creating index: CREATE INDEX CONCURRENTLY ON "public"."events_0" ("blog_id")} 38 | assert_output expected, "-s", "SELECT * FROM events_0 WHERE blog_id = 1", "--create" 39 | ensure 40 | execute "DROP INDEX IF EXISTS events_0_blog_id_idx" 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/batching_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class BatchingTest < Minitest::Test 4 | def test_many_queries 5 | skip unless ENV["TEST_BATCHING"] 6 | 7 | nc = 100 8 | create_table(nc) 9 | 10 | queries = [] 11 | nc.times do |i| 12 | (i + 1).upto(nc - 1) do |j| 13 | queries << "SELECT * FROM t WHERE c%02d = 0 AND c%02d = 1" % [i, j] 14 | end 15 | end 16 | queries.shuffle! 17 | 18 | tempfile = Tempfile.new 19 | queries.each do |query| 20 | tempfile << "#{query};\n" 21 | end 22 | tempfile.flush 23 | 24 | output = run_command(tempfile.path, "--input-format", "sql", "--log-level", "debug2") 25 | assert_equal nc, output.scan(/Index found/).size 26 | assert_match "Batch 24", output 27 | refute_match "Batch 25", output 28 | end 29 | 30 | def test_many_columns 31 | nc = 100 32 | create_table(nc) 33 | 34 | statement = "SELECT * FROM t WHERE #{nc.times.map { |i| "c%02d = 1" % i }.join(" AND ")}" 35 | output = run_command("-s", statement, "--log-level", "debug2") 36 | assert_match "WARNING: Limiting index candidates", output 37 | assert_match "Index found", output 38 | end 39 | 40 | def test_column_resolution 41 | statement = "SELECT id, blog_id FROM posts" 42 | # TODO ideally would be 0 43 | assert_output "4 hypothetical indexes", "-s", statement, "--log-level", "debug2" 44 | end 45 | 46 | def test_column_resolution_view 47 | statement = "SELECT view_id FROM posts_view WHERE view_id = 1" 48 | # TODO ideally would be 1 49 | assert_output "25 hypothetical indexes", "-s", statement, "--log-level", "debug2" 50 | end 51 | 52 | private 53 | 54 | def create_table(nc) 55 | execute "DROP TABLE IF EXISTS t" 56 | execute "CREATE TABLE t (#{nc.times.map { |i| "c%02d int" % i }.join(", ")})" 57 | execute "INSERT INTO t SELECT #{nc.times.map { "n" }.join(", ")} FROM generate_series(1, 2000) n" 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /lib/dexter/processor.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class Processor 3 | include Logging 4 | 5 | def initialize(source, collector, indexer, interval:) 6 | @source = source 7 | @collector = collector 8 | @indexer = indexer 9 | 10 | @starting_interval = 3 11 | @interval = interval 12 | 13 | @mutex = Mutex.new 14 | @last_checked_at = {} 15 | 16 | log "Started" if !@source.is_a?(PgStatStatementsSource) && !@source.is_a?(StatementSource) 17 | end 18 | 19 | def perform 20 | if @source.is_a?(LogSource) && @source.stdin? 21 | Thread.abort_on_exception = true 22 | Thread.new do 23 | sleep(@starting_interval) 24 | loop do 25 | begin 26 | process_queries 27 | rescue PG::ServerError => e 28 | log colorize("ERROR: #{e.class.name}: #{e.message}", :red) 29 | end 30 | sleep(@interval) 31 | end 32 | end 33 | end 34 | 35 | begin 36 | @source.perform(@collector) 37 | rescue Errno::ENOENT => e 38 | raise Error, "ERROR: #{e.message}" 39 | end 40 | 41 | process_queries 42 | end 43 | 44 | private 45 | 46 | def process_queries 47 | @mutex.synchronize do 48 | process_queries_without_lock 49 | end 50 | end 51 | 52 | def process_queries_without_lock 53 | now = Process.clock_gettime(Process::CLOCK_MONOTONIC) 54 | min_checked_at = now - 3600 # don't recheck for an hour 55 | queries = [] 56 | @collector.fetch_queries.each do |query| 57 | if !@last_checked_at[query.fingerprint] || @last_checked_at[query.fingerprint] < min_checked_at 58 | queries << query 59 | @last_checked_at[query.fingerprint] = now 60 | end 61 | end 62 | 63 | log "Processing #{queries.size} new query fingerprints" unless @source.is_a?(StatementSource) 64 | @indexer.process_queries(queries) if queries.any? 65 | end 66 | end 67 | end 68 | -------------------------------------------------------------------------------- /test/indexing_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class IndexingTest < Minitest::Test 4 | def test_enable_hypopg 5 | execute "DROP EXTENSION hypopg" 6 | 7 | expected = "Run `CREATE EXTENSION hypopg` or pass --enable-hypopg" 8 | assert_error expected, "-s", "SELECT 1" 9 | 10 | expected = "[sql] CREATE EXTENSION hypopg" 11 | assert_output expected, "-s", "SELECT 1", "--enable-hypopg", "--log-sql" 12 | ensure 13 | execute "CREATE EXTENSION IF NOT EXISTS hypopg" 14 | end 15 | 16 | def test_exclude 17 | assert_no_index "SELECT * FROM posts WHERE id = 1", "--exclude", "posts", reason: "No candidate tables for indexes" 18 | end 19 | 20 | def test_exclude_other 21 | assert_index "SELECT * FROM posts WHERE id = 1", "public.posts (id)", "--exclude", "other" 22 | end 23 | 24 | def test_include 25 | assert_index "SELECT * FROM posts WHERE id = 1", "public.posts (id)", "--include", "posts" 26 | end 27 | 28 | def test_include_other 29 | assert_no_index "SELECT * FROM posts WHERE id = 1", "--include", "other", reason: "No candidate tables for indexes" 30 | end 31 | 32 | def test_min_cost 33 | assert_no_index "SELECT * FROM posts WHERE id = 1", "--min-cost", "10000", reason: "Low initial cost" 34 | end 35 | 36 | def test_min_cost_savings_pct 37 | assert_no_index "SELECT * FROM posts WHERE id = 1", "--min-cost-savings-pct", "100", reason: "Need 100% cost savings to suggest index" 38 | end 39 | 40 | def test_analyze 41 | # last analyze time not reset consistently 42 | skip if server_version < 15 43 | 44 | execute "SELECT pg_stat_reset()" 45 | args = ["-s", "SELECT * FROM posts WHERE id = 1", "--log-sql"] 46 | refute_match "ANALYZE", run_command(*args) 47 | 48 | output = run_command(*args, "--analyze") 49 | assert_match %{Running analyze: ANALYZE "public"."posts"}, output 50 | assert_match %{[sql] ANALYZE "public"."posts"}, output 51 | 52 | refute_match "ANALYZE", run_command(*args, "--analyze") 53 | end 54 | 55 | def test_log_level_invalid 56 | assert_error "Unknown log level", "-s", "SELECT 1", "--log-level", "bad" 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /test/input_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class InputTest < Minitest::Test 4 | def test_stderr 5 | assert_index_file "queries.log" 6 | end 7 | 8 | def test_csv 9 | assert_index_file "queries.csv" 10 | end 11 | 12 | def test_csv_invalid 13 | assert_error "Illegal quoting", support_path("queries.json"), "--input-format", "csv" 14 | end 15 | 16 | def test_json 17 | assert_index_file "queries.json" 18 | end 19 | 20 | def test_json_invalid 21 | assert_error "unexpected token", support_path("queries.log"), "--input-format", "json" 22 | end 23 | 24 | def test_sql 25 | assert_index_file "queries.sql" 26 | end 27 | 28 | def test_pg_stat_activity 29 | execute "SELECT * FROM posts WHERE id = 1" 30 | assert_output "Index found: public.posts (id)", "--pg-stat-activity" 31 | end 32 | 33 | def test_pg_stat_statements 34 | execute "CREATE EXTENSION IF NOT EXISTS pg_stat_statements" 35 | execute "SELECT pg_stat_statements_reset()" 36 | execute "SELECT * FROM posts WHERE id = 1" 37 | execute "REFRESH MATERIALIZED VIEW posts_materialized" 38 | assert_output "Index found: public.posts (id)", "--pg-stat-statements" 39 | assert_output "Index found: public.posts (id)", "--pg-stat-statements", "--min-calls", "1" 40 | end 41 | 42 | def test_pg_stat_statements_missing 43 | execute "DROP EXTENSION IF EXISTS pg_stat_statements" 44 | assert_error %{relation "pg_stat_statements" does not exist}, "--pg-stat-statements" 45 | end 46 | 47 | def test_multiple_statements 48 | output = run_command("-s", "SELECT * FROM posts WHERE id = 1", "-s", "SELECT * FROM posts WHERE user_id = 1") 49 | assert_match "Index found: public.posts (id)", output 50 | assert_match "Index found: public.posts (user_id)", output 51 | end 52 | 53 | def test_no_source 54 | assert_error "Specify a source of queries" 55 | end 56 | 57 | def test_input_format_invalid 58 | assert_error "Unknown input format", support_path("queries.json"), "--input-format", "bad" 59 | end 60 | 61 | private 62 | 63 | def support_path(file) 64 | File.expand_path("support/#{file}", __dir__) 65 | end 66 | 67 | def assert_index_file(file) 68 | output = run_command(support_path(file)) 69 | assert_match "Index found: public.posts (id)", output 70 | assert_match "Processing 1 new query fingerprints", output 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/dexter/index_creator.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class IndexCreator 3 | include Logging 4 | 5 | def initialize(connection, indexer, new_indexes, tablespace, concurrently) 6 | @connection = connection 7 | @indexer = indexer 8 | @new_indexes = new_indexes 9 | @tablespace = tablespace 10 | @concurrently = concurrently 11 | end 12 | 13 | # 1. create lock 14 | # 2. refresh existing index list 15 | # 3. create indexes that still don't exist 16 | # 4. release lock 17 | def perform 18 | with_advisory_lock do 19 | @new_indexes.each do |index| 20 | unless index_exists?(index) 21 | statement = String.new("CREATE INDEX") 22 | statement << " CONCURRENTLY" if @concurrently 23 | statement << " ON #{@connection.quote_ident(index[:table])} (#{index[:columns].map { |c| @connection.quote_ident(c) }.join(", ")})" 24 | statement << " TABLESPACE #{@connection.quote_ident(@tablespace)}" if @tablespace 25 | log "Creating index: #{statement}" 26 | started_at = monotonic_time 27 | begin 28 | @connection.execute(statement) 29 | log "Index created: #{((monotonic_time - started_at) * 1000).to_i} ms" 30 | rescue PG::LockNotAvailable 31 | log "Could not acquire lock: #{index[:table]}" 32 | rescue PG::FeatureNotSupported => e 33 | raise Error, e.message 34 | end 35 | end 36 | end 37 | end 38 | end 39 | 40 | private 41 | 42 | def monotonic_time 43 | Process.clock_gettime(Process::CLOCK_MONOTONIC) 44 | end 45 | 46 | def with_advisory_lock 47 | lock_id = 123456 48 | first_time = true 49 | while @connection.execute("SELECT pg_try_advisory_lock($1)", params: [lock_id]).first["pg_try_advisory_lock"] != "t" 50 | if first_time 51 | log "Waiting for lock..." 52 | first_time = false 53 | end 54 | sleep(1) 55 | end 56 | yield 57 | ensure 58 | suppress_messages do 59 | @connection.execute("SELECT pg_advisory_unlock($1)", params: [lock_id]) 60 | end 61 | end 62 | 63 | def suppress_messages 64 | @connection.send(:conn).set_notice_processor do |message| 65 | # do nothing 66 | end 67 | yield 68 | ensure 69 | # clear notice processor 70 | @connection.send(:conn).set_notice_processor 71 | end 72 | 73 | def index_exists?(index) 74 | @indexer.send(:indexes, [index[:table]]).find { |i| i["columns"] == index[:columns] } 75 | end 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /lib/dexter/column_resolver.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class ColumnResolver 3 | include Logging 4 | 5 | def initialize(connection, queries, log_level:) 6 | @connection = connection 7 | @queries = queries 8 | @log_level = log_level 9 | end 10 | 11 | def perform 12 | tables = Set.new(@queries.flat_map(&:candidate_tables)) 13 | columns = tables.any? ? self.columns(tables) : [] 14 | columns_by_table = columns.group_by { |c| c[:table] }.transform_values { |v| v.to_h { |c| [c[:column], c] } } 15 | columns_by_table.default = {} 16 | 17 | @queries.each do |query| 18 | log "Finding columns: #{query.statement}" if @log_level == "debug3" 19 | columns = Set.new 20 | begin 21 | find_columns(query.tree).each do |col| 22 | last_col = col["fields"].last 23 | if last_col["String"] 24 | columns << last_col["String"]["sval"] 25 | end 26 | end 27 | rescue JSON::NestingError 28 | if @log_level.start_with?("debug") 29 | log colorize("ERROR: Cannot get columns", :red) 30 | end 31 | end 32 | 33 | possible_columns = [] 34 | columns.each do |column| 35 | query.candidate_tables.each do |table| 36 | resolved = columns_by_table.dig(table, column) 37 | possible_columns << resolved if resolved 38 | end 39 | end 40 | # use all columns in tables from views (not ideal) 41 | (query.tables_from_views & query.candidate_tables).each do |table| 42 | possible_columns.concat(columns_by_table[table].values) 43 | end 44 | query.columns = possible_columns.uniq 45 | end 46 | end 47 | 48 | private 49 | 50 | def find_columns(plan) 51 | plan = JSON.parse(plan.to_json, max_nesting: 1000) 52 | Indexer.find_by_key(plan, "ColumnRef") 53 | end 54 | 55 | def columns(tables) 56 | query = <<~SQL 57 | SELECT 58 | s.nspname || '.' || t.relname AS table_name, 59 | a.attname AS column_name, 60 | pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type 61 | FROM pg_attribute a 62 | JOIN pg_class t on a.attrelid = t.oid 63 | JOIN pg_namespace s on t.relnamespace = s.oid 64 | WHERE a.attnum > 0 65 | AND NOT a.attisdropped 66 | AND s.nspname || '.' || t.relname IN (#{tables.size.times.map { |i| "$#{i + 1}" }.join(", ")}) 67 | ORDER BY 68 | 1, 2 69 | SQL 70 | columns = @connection.execute(query, params: tables.to_a) 71 | columns.map { |v| {table: v["table_name"], column: v["column_name"], type: v["data_type"]} } 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/dexter/connection.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class Connection 3 | include Logging 4 | 5 | def initialize(dbname:, host:, port:, username:, log_sql:) 6 | @dbname = dbname 7 | @host = host 8 | @port = port 9 | @username = username 10 | @log_sql = log_sql 11 | @mutex = Mutex.new 12 | end 13 | 14 | def setup(enable_hypopg) 15 | if server_version_num < 130000 16 | raise Error, "This version of Dexter requires Postgres 13+" 17 | end 18 | 19 | check_extension(enable_hypopg) 20 | 21 | execute("SET lock_timeout = '5s'") 22 | end 23 | 24 | def execute(query, pretty: true, params: [], use_exec: false) 25 | # use exec_params instead of exec when possible for security 26 | # 27 | # Unlike PQexec, PQexecParams allows at most one SQL command in the given string. 28 | # (There can be semicolons in it, but not more than one nonempty command.) 29 | # This is a limitation of the underlying protocol, but has some usefulness 30 | # as an extra defense against SQL-injection attacks. 31 | # https://www.postgresql.org/docs/current/static/libpq-exec.html 32 | query = squish(query) if pretty 33 | log colorize("[sql] #{query}#{params.any? ? " /*#{params.to_json}*/" : ""}", :cyan) if @log_sql 34 | 35 | @mutex.synchronize do 36 | if use_exec 37 | conn.exec("#{query} /*dexter*/").to_a 38 | else 39 | conn.exec_params("#{query} /*dexter*/", params).to_a 40 | end 41 | end 42 | end 43 | 44 | def quote_ident(value) 45 | value.split(".").map { |v| conn.quote_ident(v) }.join(".") 46 | end 47 | 48 | def server_version_num 49 | @server_version_num ||= execute("SHOW server_version_num").first["server_version_num"].to_i 50 | end 51 | 52 | private 53 | 54 | def check_extension(enable_hypopg) 55 | extension = execute("SELECT installed_version FROM pg_available_extensions WHERE name = 'hypopg'").first 56 | 57 | if extension.nil? 58 | raise Error, "Install HypoPG first: https://github.com/ankane/dexter#installation" 59 | end 60 | 61 | if extension["installed_version"].nil? 62 | if enable_hypopg 63 | execute("CREATE EXTENSION hypopg") 64 | else 65 | raise Error, "Run `CREATE EXTENSION hypopg` or pass --enable-hypopg" 66 | end 67 | end 68 | end 69 | 70 | def conn 71 | @conn ||= begin 72 | # set connect timeout if none set 73 | ENV["PGCONNECT_TIMEOUT"] ||= "3" 74 | 75 | if @dbname.to_s.start_with?("postgres://", "postgresql://") 76 | config = @dbname 77 | else 78 | config = { 79 | host: @host, 80 | port: @port, 81 | dbname: @dbname, 82 | user: @username 83 | }.reject { |_, value| value.to_s.empty? } 84 | config = config[:dbname] if config.keys == [:dbname] && config[:dbname].include?("=") 85 | end 86 | PG::Connection.new(config) 87 | end 88 | rescue PG::ConnectionBad => e 89 | raise Error, e.message 90 | end 91 | 92 | def squish(str) 93 | str.to_s.gsub(/\s+/, " ").strip 94 | end 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /lib/dexter/table_resolver.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class TableResolver 3 | include Logging 4 | 5 | def initialize(connection, queries, log_level:) 6 | @connection = connection 7 | @queries = queries 8 | @log_level = log_level 9 | end 10 | 11 | def perform 12 | tables = Set.new(database_tables + materialized_views) 13 | no_schema_tables = self.no_schema_tables(tables) 14 | view_tables = self.view_tables(no_schema_tables) 15 | 16 | @queries.each do |query| 17 | # add schema to table if needed 18 | query_tables = self.tables(query).map { |t| no_schema_tables[t] || t } 19 | 20 | # substitute view tables 21 | query.tables = query_tables.flat_map { |t| view_tables[t] || [t] }.uniq 22 | query.tables_from_views = query.tables - query_tables 23 | 24 | query.missing_tables = !query.tables.all? { |t| tables.include?(t) } 25 | end 26 | end 27 | 28 | private 29 | 30 | def tables(query) 31 | query.parser_result&.tables || [] 32 | rescue => e 33 | # possible pg_query bug 34 | $stderr.puts "Error extracting tables. Please report to https://github.com/ankane/dexter/issues" 35 | $stderr.puts "#{e.class.name}: #{e.message}" 36 | $stderr.puts query.statement 37 | [] 38 | end 39 | 40 | def no_schema_tables(tables) 41 | search_path_index = Hash[search_path.map.with_index.to_a] 42 | tables.group_by { |t| t.split(".")[-1] }.to_h do |group, t2| 43 | [group, t2.sort_by { |t| [search_path_index[t.split(".")[0]] || 1000000, t] }[0]] 44 | end 45 | end 46 | 47 | def view_tables(no_schema_tables) 48 | # add tables from views 49 | view_tables = database_view_tables 50 | view_tables.each do |v, vt| 51 | view_tables[v] = vt.map { |t| no_schema_tables[t] || t } 52 | end 53 | 54 | # fully resolve tables 55 | # make sure no views in result 56 | view_tables.each do |v, vt| 57 | view_tables[v] = vt.flat_map { |t| view_tables[t] || [t] }.uniq 58 | end 59 | 60 | view_tables 61 | end 62 | 63 | def execute(...) 64 | @connection.execute(...) 65 | end 66 | 67 | def search_path 68 | execute("SELECT current_schemas(true)")[0]["current_schemas"][1..-2].split(",") 69 | end 70 | 71 | def database_tables 72 | result = execute <<~SQL 73 | SELECT 74 | table_schema || '.' || table_name AS table_name 75 | FROM 76 | information_schema.tables 77 | WHERE 78 | table_catalog = current_database() 79 | AND table_type IN ('BASE TABLE', 'VIEW') 80 | SQL 81 | result.map { |r| r["table_name"] } 82 | end 83 | 84 | def materialized_views 85 | result = execute <<~SQL 86 | SELECT 87 | schemaname || '.' || matviewname AS table_name 88 | FROM 89 | pg_matviews 90 | SQL 91 | result.map { |r| r["table_name"] } 92 | end 93 | 94 | def views 95 | execute <<~SQL 96 | SELECT 97 | schemaname || '.' || viewname AS table_name, 98 | definition 99 | FROM 100 | pg_views 101 | WHERE 102 | schemaname NOT IN ('information_schema', 'pg_catalog') 103 | SQL 104 | end 105 | 106 | def database_view_tables 107 | view_tables = {} 108 | views.each do |row| 109 | begin 110 | view_tables[row["table_name"]] = PgQuery.parse(row["definition"]).tables 111 | rescue PgQuery::ParseError 112 | if @log_level.start_with?("debug") 113 | log colorize("ERROR: Cannot parse view definition: #{row["table_name"]}", :red) 114 | end 115 | end 116 | end 117 | view_tables 118 | end 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /test/statement_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class StatementTest < Minitest::Test 4 | def test_basic_index 5 | assert_index "SELECT * FROM posts WHERE id = 1", "public.posts (id)" 6 | end 7 | 8 | def test_basic_no_index 9 | assert_no_index "SELECT * FROM posts" 10 | end 11 | 12 | def test_multicolumn 13 | assert_index "SELECT * FROM posts WHERE user_id = 1 AND blog_id = 2", "public.posts (user_id, blog_id)" 14 | end 15 | 16 | def test_multicolumn_3pass 17 | assert_index "SELECT * FROM posts WHERE user_id = 1 AND blog_id < 10", "public.posts (blog_id)" 18 | end 19 | 20 | def test_multicolumn_order 21 | assert_index "SELECT * FROM posts WHERE user_id = 1 ORDER BY blog_id LIMIT 1000", "public.posts (user_id, blog_id)" 22 | end 23 | 24 | def test_join 25 | assert_index "SELECT * FROM posts INNER JOIN blogs ON blogs.id = posts.blog_id WHERE blogs.id = 1", "public.posts (blog_id)" 26 | end 27 | 28 | def test_update 29 | assert_index "UPDATE posts SET user_id = 2 WHERE user_id = 1", "public.posts (user_id)" 30 | end 31 | 32 | def test_delete 33 | assert_index "DELETE FROM posts WHERE user_id = 1", "public.posts (user_id)" 34 | end 35 | 36 | def test_view 37 | assert_index "SELECT * FROM posts_view WHERE view_id = 1", "public.posts (id)" 38 | end 39 | 40 | def test_materialized_view 41 | assert_index "SELECT * FROM posts_materialized WHERE id = 1", "public.posts_materialized (id)" 42 | end 43 | 44 | def test_missing_table 45 | assert_no_index "SELECT * FROM missing", reason: "Tables not present in current database" 46 | end 47 | 48 | def test_partitioned_table 49 | assert_index "SELECT * FROM events WHERE blog_id = 1", "public.events (blog_id)" 50 | end 51 | 52 | def test_partitioned_table_and_partition 53 | output = run_command("-s", "SELECT * FROM events WHERE blog_id = 1", "-s", "SELECT * FROM events_0 WHERE blog_id = 1") 54 | assert_match "Index found: public.events (blog_id)", output 55 | # TODO fix 56 | assert_match "Index found: public.events_0 (blog_id)", output 57 | end 58 | 59 | def test_foreign_table 60 | assert_no_index "SELECT * FROM comments WHERE post_id = 1", reason: "Tables not present in current database" 61 | end 62 | 63 | def test_cte 64 | assert_index "WITH cte AS (SELECT * FROM posts WHERE id = 1) SELECT * FROM cte", "public.posts (id)" 65 | end 66 | 67 | def test_cte_fence 68 | assert_index "WITH cte AS (SELECT * FROM posts) SELECT * FROM cte WHERE id = 1", "public.posts (id)" 69 | end 70 | 71 | def test_materialized_cte 72 | assert_no_index "WITH cte AS MATERIALIZED (SELECT * FROM posts) SELECT * FROM cte WHERE id = 1" 73 | end 74 | 75 | def test_not_materialized_cte 76 | assert_index "WITH cte AS NOT MATERIALIZED (SELECT * FROM posts) SELECT * FROM cte WHERE id = 1", "public.posts (id)" 77 | end 78 | 79 | def test_order 80 | assert_index "SELECT * FROM posts ORDER BY user_id DESC LIMIT 10", "public.posts (user_id)" 81 | end 82 | 83 | def test_order_column_number 84 | assert_index "SELECT user_id FROM posts ORDER BY 1 DESC LIMIT 10", "public.posts (user_id)" 85 | end 86 | 87 | def test_order_column_number_star 88 | # not ideal 89 | assert_no_index "SELECT * FROM posts ORDER BY 1 DESC LIMIT 10", reason: "No candidate columns for indexes" 90 | end 91 | 92 | def test_order_column_alias 93 | assert_index "SELECT user_id AS u FROM posts ORDER BY u DESC LIMIT 10", "public.posts (user_id)" 94 | end 95 | 96 | def test_order_multiple 97 | assert_index "SELECT * FROM posts ORDER BY user_id, blog_id LIMIT 10", "public.posts (user_id, blog_id)" 98 | end 99 | 100 | def test_order_multiple_direction 101 | skip 102 | assert_index "SELECT * FROM posts ORDER BY user_id DESC, blog_id LIMIT 10", "public.posts (user_id DESC, blog_id)" 103 | end 104 | 105 | def test_schema 106 | assert_index "SELECT * FROM \"Bar\".\"Foo\" WHERE \"Id\" = 10000", "Bar.Foo (Id)" 107 | end 108 | 109 | def test_normalized 110 | assert_index "SELECT * FROM posts WHERE id = $1", "public.posts (id)" 111 | end 112 | 113 | def test_no_tables 114 | assert_no_index "SELECT 1", reason: "No tables" 115 | end 116 | 117 | def test_information_schema 118 | assert_no_index "SELECT * FROM information_schema.tables", reason: "No candidate tables for indexes" 119 | end 120 | 121 | def test_pg_catalog 122 | assert_no_index "SELECT * FROM pg_catalog.pg_index", reason: "No candidate tables for indexes" 123 | end 124 | 125 | def test_pg_index 126 | assert_no_index "SELECT * FROM pg_index", reason: "No candidate tables for indexes" 127 | end 128 | 129 | def test_parse_error 130 | assert_no_index "SELECT +", reason: "Could not parse query" 131 | end 132 | 133 | def test_explain_error 134 | assert_no_index "SELECT bad FROM posts", reason: "Could not run explain" 135 | end 136 | 137 | def test_indexed 138 | assert_no_index "SELECT * FROM posts WHERE indexed = 1", reason: "Low initial cost" 139 | end 140 | end 141 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.6.3 (2025-10-15) 2 | 3 | - Added `--non-concurrently` option 4 | - Added support for multiple `--statement` options 5 | 6 | ## 0.6.2 (2025-07-27) 7 | 8 | - Fixed error with utility statements 9 | 10 | ## 0.6.1 (2025-06-08) 11 | 12 | - Fixed error with column types without `btree` support 13 | 14 | ## 0.6.0 (2025-06-01) 15 | 16 | - Added Linux packages for Ubuntu 24.04 and Debian 12 17 | - Fixed error with hypothetical index limit 18 | - Dropped support for Linux package for Ubuntu 20.04 19 | - Dropped support for Ruby < 3 20 | - Dropped support for Postgres < 13 21 | 22 | ## 0.5.6 (2025-02-01) 23 | 24 | - Updated pg_query 25 | - Fixed Docker image for `linux/arm64` 26 | 27 | ## 0.5.5 (2024-06-02) 28 | 29 | - Updated pg_query to 5.1+ to fix installation on Windows 30 | - Fixed error with `--pg-stat-statements` and `--min-calls` 31 | 32 | ## 0.5.4 (2024-04-03) 33 | 34 | - Fixed issue with processing over 500 query fingerprints (introduced in 0.5.3) 35 | - Require google-protobuf < 4 36 | 37 | ## 0.5.3 (2024-03-05) 38 | 39 | - Fixed error with hypothetical index limit 40 | - Fixed error with foreign tables 41 | 42 | ## 0.5.2 (2024-01-10) 43 | 44 | - Added Docker image for `linux/arm64` 45 | - Switched to `GENERIC_PLAN` for Postgres 16 46 | - Fixed error with `auto_explain` 47 | - Fixed warning with Ruby 3.3 48 | 49 | ## 0.5.1 (2023-05-27) 50 | 51 | - Fixed `JSON::NestingError` 52 | 53 | ## 0.5.0 (2023-04-18) 54 | 55 | - Added support for normalized queries 56 | - Added `--stdin` option (now required to read from stdin) 57 | - Added `--enable-hypopg` option (now required to enable HypoPG) 58 | - Improved output when HypoPG not installed 59 | - Changed `--pg-stat-activity` to sample 10 times and exit 60 | - Detect input format based on file extension 61 | - Dropped support for experimental `--log-table` option 62 | - Dropped support for Linux packages for Ubuntu 18.04 and Debian 10 63 | - Dropped support for Ruby < 2.7 64 | - Dropped support for Postgres < 11 65 | 66 | ## 0.4.3 (2023-03-26) 67 | 68 | - Added experimental `--log-table` option 69 | - Improved help 70 | - Require pg_query < 4 71 | 72 | ## 0.4.2 (2023-01-29) 73 | 74 | - Fixed `--pg-stat-statements` option for Postgres 13+ 75 | 76 | ## 0.4.1 (2022-10-15) 77 | 78 | - Added support for `json` format 79 | 80 | ## 0.4.0 (2022-07-27) 81 | 82 | - Added support for pg_query 2 83 | - Switched to monotonic time 84 | - Dropped support for Ruby < 2.5 85 | 86 | ## 0.3.10 (2021-03-25) 87 | 88 | - Require pg_query < 2 89 | 90 | ## 0.3.9 (2020-11-23) 91 | 92 | - Added `--tablespace` option 93 | 94 | ## 0.3.8 (2020-08-17) 95 | 96 | - Colorize output 97 | - Fixed error when unable to parse view definitions 98 | 99 | ## 0.3.7 (2020-07-10) 100 | 101 | - Fixed help output 102 | 103 | ## 0.3.6 (2020-03-30) 104 | 105 | - Fixed warning with Ruby 2.7 106 | 107 | ## 0.3.5 (2018-04-30) 108 | 109 | - Added `sql` input format 110 | - Fixed error for queries with double dash comments 111 | - Fixed connection threading issue with `--pg-stat-activity` option 112 | 113 | ## 0.3.4 (2018-04-09) 114 | 115 | - Fixed `--username` option 116 | - Fixed `JSON::NestingError` 117 | - Added `--pg-stat-activity` option 118 | 119 | ## 0.3.3 (2018-02-22) 120 | 121 | - Added support for views and materialized views 122 | - Better handle case when multiple indexes are found for a query 123 | - Added `--min-cost-savings-pct` option 124 | 125 | ## 0.3.2 (2018-01-04) 126 | 127 | - Fixed parsing issue with named prepared statements 128 | - Fixed parsing issue with multiline queries in csv format 129 | - Better explanations for indexing decisions 130 | 131 | ## 0.3.1 (2017-12-28) 132 | 133 | - Added support for queries with bind variables 134 | - Fixed error with streaming logs as csv format 135 | - Handle malformed CSV gracefully 136 | 137 | ## 0.3.0 (2017-12-22) 138 | 139 | - Added support for schemas 140 | - Added support for csv format 141 | - Added `--analyze` option and do not analyze by default 142 | - Added `--min-calls` option 143 | - Fixed debug output when indexes not found 144 | 145 | ## 0.2.1 (2017-09-02) 146 | 147 | - Fixed bad suggestions 148 | - Improved debugging output 149 | 150 | ## 0.2.0 (2017-08-27) 151 | 152 | - Added same connection options as `psql` 153 | - Added support for multiple files 154 | - Added `error` log level 155 | - Better error messages when cannot connect 156 | 157 | Breaking 158 | 159 | - `-h` option changed to `--host` instead of `--help` for consistency with `psql` 160 | 161 | ## 0.1.6 (2017-08-26) 162 | 163 | - Significant performance improvements 164 | - Added `--include` option 165 | 166 | ## 0.1.5 (2017-08-14) 167 | 168 | - Added support for non-`SELECT` queries 169 | - Added `--pg-stat-statements` option 170 | - Added advisory locks 171 | - Added support for running as a non-superuser 172 | 173 | ## 0.1.4 (2017-07-02) 174 | 175 | - Added support for multicolumn indexes 176 | 177 | ## 0.1.3 (2017-06-30) 178 | 179 | - Fixed error with non-lowercase columns 180 | - Fixed error with `json` columns 181 | 182 | ## 0.1.2 (2017-06-26) 183 | 184 | - Added `--exclude` option 185 | - Added `--log-sql` option 186 | 187 | ## 0.1.1 (2017-06-25) 188 | 189 | - Added `--interval` option 190 | - Added `--min-time` option 191 | - Added `--log-level` option 192 | 193 | ## 0.1.0 (2017-06-24) 194 | 195 | - Launched 196 | -------------------------------------------------------------------------------- /lib/dexter/client.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class Client 3 | extend Logging 4 | include Logging 5 | 6 | attr_reader :arguments, :options 7 | 8 | def self.start 9 | Client.new(ARGV).perform 10 | rescue Error => e 11 | abort colorize(e.message.strip, :red) 12 | end 13 | 14 | def initialize(args) 15 | @arguments, @options = parse_args(args) 16 | end 17 | 18 | def perform 19 | STDOUT.sync = true 20 | STDERR.sync = true 21 | 22 | connection = Connection.new(**options.slice(:dbname, :host, :port, :username, :log_sql)) 23 | connection.setup(options[:enable_hypopg]) 24 | 25 | source = 26 | if options[:statement].any? 27 | # TODO raise error for --interval, --min-calls, --min-time 28 | StatementSource.new(options[:statement]) 29 | elsif options[:pg_stat_statements] 30 | # TODO support streaming option 31 | PgStatStatementsSource.new(connection) 32 | elsif options[:pg_stat_activity] 33 | PgStatActivitySource.new(connection) 34 | elsif arguments.any? 35 | ARGV.replace(arguments) 36 | if !options[:input_format] 37 | ext = ARGV.map { |v| File.extname(v) }.uniq 38 | options[:input_format] = ext.first[1..-1] if ext.size == 1 39 | end 40 | LogSource.new(ARGF, options[:input_format]) 41 | elsif options[:stdin] 42 | LogSource.new(STDIN, options[:input_format]) 43 | else 44 | raise Error, "Specify a source of queries: --pg-stat-statements, --pg-stat-activity, --stdin, or a path" 45 | end 46 | 47 | collector = Collector.new(**options.slice(:min_time, :min_calls)) 48 | 49 | indexer = Indexer.new(connection: connection, **options) 50 | 51 | Processor.new(source, collector, indexer, **options.slice(:interval)).perform 52 | end 53 | 54 | def parse_args(args) 55 | opts = Slop.parse(args) do |o| 56 | o.banner = <<~BANNER 57 | Usage: 58 | dexter [options] 59 | BANNER 60 | 61 | o.separator "Input options:" 62 | o.string "--input-format", "input format" 63 | o.boolean "--pg-stat-activity", "use pg_stat_activity", default: false 64 | o.boolean "--pg-stat-statements", "use pg_stat_statements", default: false, help: false 65 | o.boolean "--stdin", "use stdin", default: false 66 | o.array "-s", "--statement", "process a single statement", delimiter: nil 67 | o.separator "" 68 | 69 | o.separator "Connection options:" 70 | o.string "-d", "--dbname", "database name" 71 | o.string "-h", "--host", "database host" 72 | o.integer "-p", "--port", "database port" 73 | o.string "-U", "--username", "database user" 74 | o.separator "" 75 | 76 | o.separator "Processing options:" 77 | o.integer "--interval", "time to wait between processing queries, in seconds", default: 60 78 | o.integer "--min-calls", "only process queries that have been called a certain number of times", default: 0 79 | o.float "--min-time", "only process queries that have consumed a certain amount of DB time, in minutes", default: 0 80 | o.separator "" 81 | 82 | o.separator "Indexing options:" 83 | o.boolean "--analyze", "analyze tables that haven't been analyzed in the past hour", default: false 84 | o.boolean "--create", "create indexes", default: false 85 | o.boolean "--enable-hypopg", "enable the HypoPG extension", default: false 86 | o.array "--exclude", "prevent specific tables from being indexed" 87 | o.string "--include", "only include specific tables" 88 | o.integer "--min-cost", default: 100, help: false 89 | o.integer "--min-cost-savings-pct", default: 50, help: false 90 | o.string "--tablespace", "tablespace to create indexes" 91 | o.boolean "--non-concurrently", "use non-concurrent index creation", default: false 92 | o.separator "" 93 | 94 | o.separator "Logging options:" 95 | o.boolean "--log-explain", "log explain", default: false, help: false 96 | o.string "--log-level", "log level", default: "info" 97 | o.boolean "--log-sql", "log sql", default: false 98 | o.separator "" 99 | 100 | o.separator "Other options:" 101 | o.on "-v", "--version", "print the version" do 102 | log Dexter::VERSION 103 | exit 104 | end 105 | o.on "--help", "prints help" do 106 | log o 107 | exit 108 | end 109 | end 110 | 111 | arguments = opts.arguments 112 | options = opts.to_hash 113 | 114 | options[:dbname] = arguments.shift unless options[:dbname] 115 | 116 | # TODO remove global variable 117 | $log_level = options[:log_level].to_s.downcase 118 | unless ["error", "info", "debug", "debug2", "debug3"].include?($log_level) 119 | raise Error, "Unknown log level" 120 | end 121 | 122 | unless [nil, "csv", "json", "sql"].include?(options[:input_format]) 123 | raise Error, "Unknown input format" 124 | end 125 | 126 | [arguments, options] 127 | rescue Slop::Error => e 128 | raise Error, e.message 129 | end 130 | end 131 | end 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dexter 2 | 3 | The automatic indexer for Postgres 4 | 5 | [Read about how it works](https://ankane.org/introducing-dexter) or [watch the talk](https://www.youtube.com/watch?v=Mni_1yTaNbE) 6 | 7 | [![Build Status](https://github.com/ankane/dexter/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/dexter/actions) 8 | 9 | ## Installation 10 | 11 | First, install [HypoPG](https://github.com/HypoPG/hypopg) on your database server. This doesn’t require a restart. 12 | 13 | ```sh 14 | cd /tmp 15 | curl -L https://github.com/HypoPG/hypopg/archive/1.4.2.tar.gz | tar xz 16 | cd hypopg-1.4.2 17 | make 18 | make install # may need sudo 19 | ``` 20 | 21 | And enable it in databases where you want to use Dexter: 22 | 23 | ```sql 24 | CREATE EXTENSION hypopg; 25 | ``` 26 | 27 | See the [installation notes](#hypopg-installation-notes) if you run into issues. 28 | 29 | Then install the command line tool with: 30 | 31 | ```sh 32 | gem install pgdexter 33 | ``` 34 | 35 | The command line tool is also available with [Docker](#docker), [Homebrew](#homebrew), or as a [Linux package](guides/Linux.md). 36 | 37 | ## How to Use 38 | 39 | Dexter needs a connection to your database and a source of queries (like [pg_stat_statements](https://www.postgresql.org/docs/current/pgstatstatements.html)) to process. 40 | 41 | ```sh 42 | dexter -d dbname --pg-stat-statements 43 | ``` 44 | 45 | This finds slow queries and generates output like: 46 | 47 | ``` 48 | Started 49 | Processing 189 new query fingerprints 50 | Index found: public.genres_movies (genre_id) 51 | Index found: public.genres_movies (movie_id) 52 | Index found: public.movies (title) 53 | Index found: public.ratings (movie_id) 54 | Index found: public.ratings (rating) 55 | Index found: public.ratings (user_id) 56 | ``` 57 | 58 | To be safe, Dexter will not create indexes unless you pass the `--create` flag. In this case, you’ll see: 59 | 60 | ``` 61 | Index found: public.ratings (user_id) 62 | Creating index: CREATE INDEX CONCURRENTLY ON "public"."ratings" ("user_id") 63 | Index created: 15243 ms 64 | ``` 65 | 66 | ## Connection Options 67 | 68 | Dexter supports the same connection options as psql. 69 | 70 | ``` 71 | -h host -U user -p 5432 -d dbname 72 | ``` 73 | 74 | This includes URIs: 75 | 76 | ``` 77 | postgresql://user:pass@host:5432/dbname 78 | ``` 79 | 80 | and connection strings: 81 | 82 | ``` 83 | host=localhost port=5432 dbname=mydb 84 | ``` 85 | 86 | Always make sure your [connection is secure](https://ankane.org/postgres-sslmode-explained) when connecting to a database over a network you don’t fully trust. 87 | 88 | ## Collecting Queries 89 | 90 | Dexter can collect queries from a number of sources. 91 | 92 | - [Query stats](#query-stats) 93 | - [Live queries](#live-queries) 94 | - [Log files](#log-files) 95 | - [SQL files](#sql-files) 96 | 97 | ### Query Stats 98 | 99 | Enable [pg_stat_statements](https://www.postgresql.org/docs/current/pgstatstatements.html) in your database. 100 | 101 | ```psql 102 | CREATE EXTENSION pg_stat_statements; 103 | ``` 104 | 105 | And use: 106 | 107 | ```sh 108 | dexter --pg-stat-statements 109 | ``` 110 | 111 | ### Live Queries 112 | 113 | Get live queries from the [pg_stat_activity](https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-ACTIVITY-VIEW) view with: 114 | 115 | ```sh 116 | dexter --pg-stat-activity 117 | ``` 118 | 119 | ### Log Files 120 | 121 | Enable logging for slow queries in your Postgres config file. 122 | 123 | ```ini 124 | log_min_duration_statement = 10 # ms 125 | ``` 126 | 127 | And use: 128 | 129 | ```sh 130 | dexter postgresql.log 131 | ``` 132 | 133 | Supports `stderr`, `csvlog`, and `jsonlog` formats. 134 | 135 | For real-time indexing, pipe your logfile: 136 | 137 | ```sh 138 | tail -F -n +1 postgresql.log | dexter --stdin 139 | ``` 140 | 141 | And pass `--input-format csv` or `--input-format json` if needed. 142 | 143 | ### SQL Files 144 | 145 | Pass a SQL file with: 146 | 147 | ```sh 148 | dexter queries.sql 149 | ``` 150 | 151 | Pass a single query with: 152 | 153 | ```sh 154 | dexter -s "SELECT * FROM ..." 155 | ``` 156 | 157 | ## Collection Options 158 | 159 | To prevent one-off queries from being indexed, specify a minimum number of calls before a query is considered for indexing 160 | 161 | ```sh 162 | dexter --min-calls 100 163 | ``` 164 | 165 | You can do the same for total time a query has run 166 | 167 | ```sh 168 | dexter --min-time 10 # minutes 169 | ``` 170 | 171 | When streaming logs, specify the time to wait between processing queries 172 | 173 | ```sh 174 | dexter --interval 60 # seconds 175 | ``` 176 | 177 | ## Analyze 178 | 179 | For best results, make sure your tables have been recently analyzed so statistics are up-to-date. You can ask Dexter to analyze tables it comes across that haven’t been analyzed in the past hour with: 180 | 181 | ```sh 182 | dexter --analyze 183 | ``` 184 | 185 | ## Tables 186 | 187 | You can exclude large or write-heavy tables from indexing with: 188 | 189 | ```sh 190 | dexter --exclude table1,table2 191 | ``` 192 | 193 | Alternatively, you can specify which tables to index with: 194 | 195 | ```sh 196 | dexter --include table3,table4 197 | ``` 198 | 199 | ## Debugging 200 | 201 | See how Dexter is processing queries with: 202 | 203 | ```sh 204 | dexter --log-sql --log-level debug2 205 | ``` 206 | 207 | ## Hosted Postgres 208 | 209 | The `hypopg` extension, which Dexter needs to run, is available on [these providers](https://github.com/ankane/dexter/issues/44). 210 | 211 | For other providers, see [this guide](guides/Hosted-Postgres.md). To request a new extension: 212 | 213 | - Google Cloud SQL - vote or comment on [this page](https://issuetracker.google.com/issues/69250435) 214 | - DigitalOcean Managed Databases - vote or comment on [this page](https://ideas.digitalocean.com/managed-database/p/support-hypopg-for-postgres) 215 | 216 | ## HypoPG Installation Notes 217 | 218 | ### Postgres Location 219 | 220 | If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with: 221 | 222 | ```sh 223 | export PG_CONFIG=/Applications/Postgres.app/Contents/Versions/latest/bin/pg_config 224 | ``` 225 | 226 | Then re-run the installation instructions (run `make clean` before `make` if needed) 227 | 228 | ### Missing Header 229 | 230 | If compilation fails with `fatal error: postgres.h: No such file or directory`, make sure Postgres development files are installed on the server. 231 | 232 | For Ubuntu and Debian, use: 233 | 234 | ```sh 235 | sudo apt-get install postgresql-server-dev-17 236 | ``` 237 | 238 | Note: Replace `17` with your Postgres server version 239 | 240 | ## Additional Installation Methods 241 | 242 | ### Docker 243 | 244 | Get the [Docker image](https://hub.docker.com/r/ankane/dexter) with: 245 | 246 | ```sh 247 | docker pull ankane/dexter 248 | ``` 249 | 250 | And run it with: 251 | 252 | ```sh 253 | docker run -ti ankane/dexter 254 | ``` 255 | 256 | For databases on the host machine, use `host.docker.internal` as the hostname (on Linux, this requires `--add-host=host.docker.internal:host-gateway`). 257 | 258 | ### Homebrew 259 | 260 | With Homebrew, you can use: 261 | 262 | ```sh 263 | brew install dexter 264 | ``` 265 | 266 | ## Future Work 267 | 268 | [Here are some ideas](https://github.com/ankane/dexter/issues/45) 269 | 270 | ## Upgrading 271 | 272 | Run: 273 | 274 | ```sh 275 | gem install pgdexter 276 | ``` 277 | 278 | To use master, run: 279 | 280 | ```sh 281 | gem install specific_install 282 | gem specific_install https://github.com/ankane/dexter.git 283 | ``` 284 | 285 | ## Thanks 286 | 287 | This software wouldn’t be possible without [HypoPG](https://github.com/HypoPG/hypopg), which allows you to create hypothetical indexes, and [pg_query](https://github.com/lfittl/pg_query), which allows you to parse and fingerprint queries. A big thanks to Dalibo and Lukas Fittl respectively. Also, thanks to YugabyteDB for [this article](https://dev.to/yugabyte/explain-from-pgstatstatements-normalized-queries-how-to-always-get-the-generic-plan-in--5cfi) on how to explain normalized queries. 288 | 289 | ## Research 290 | 291 | This is known as the Index Selection Problem (ISP). 292 | 293 | ## History 294 | 295 | View the [changelog](https://github.com/ankane/dexter/blob/master/CHANGELOG.md) 296 | 297 | ## Contributing 298 | 299 | Everyone is encouraged to help improve this project. Here are a few ways you can help: 300 | 301 | - [Report bugs](https://github.com/ankane/dexter/issues) 302 | - Fix bugs and [submit pull requests](https://github.com/ankane/dexter/pulls) 303 | - Write, clarify, or fix documentation 304 | - Suggest or add new features 305 | 306 | To get started with development, run: 307 | 308 | ```sh 309 | git clone https://github.com/ankane/dexter.git 310 | cd dexter 311 | bundle install 312 | bundle exec rake install 313 | ``` 314 | 315 | To run tests, use: 316 | 317 | ```sh 318 | createdb dexter_test 319 | bundle exec rake test 320 | ``` 321 | -------------------------------------------------------------------------------- /lib/dexter/indexer.rb: -------------------------------------------------------------------------------- 1 | module Dexter 2 | class Indexer 3 | include Logging 4 | 5 | def initialize(connection:, **options) 6 | @connection = connection 7 | @create = options[:create] 8 | @tablespace = options[:tablespace] 9 | @log_level = options[:log_level] 10 | @exclude_tables = options[:exclude] 11 | @include_tables = Array(options[:include].split(",")) if options[:include] 12 | @log_explain = options[:log_explain] 13 | @analyze = options[:analyze] 14 | @min_cost = options[:min_cost].to_i 15 | @min_cost_savings_pct = options[:min_cost_savings_pct].to_i 16 | @options = options 17 | @server_version_num = @connection.server_version_num 18 | end 19 | 20 | # TODO recheck server version? 21 | def process_queries(queries) 22 | TableResolver.new(@connection, queries, log_level: @log_level).perform 23 | candidate_queries = queries.reject(&:missing_tables) 24 | 25 | tables = determine_tables(candidate_queries) 26 | candidate_queries.each do |query| 27 | query.candidate_tables = query.tables.select { |t| tables.include?(t) }.sort 28 | end 29 | candidate_queries.select! { |q| q.candidate_tables.any? } 30 | 31 | if tables.any? 32 | # analyze tables if needed 33 | # TODO remove @log_level in 0.7.0 34 | analyze_tables(tables) if @analyze || @log_level == "debug2" 35 | 36 | # get initial costs for queries 37 | reset_hypothetical_indexes 38 | calculate_plan(candidate_queries) 39 | candidate_queries.select! { |q| q.initial_cost && q.initial_cost >= @min_cost } 40 | 41 | # find columns 42 | ColumnResolver.new(@connection, candidate_queries, log_level: @log_level).perform 43 | candidate_queries.each do |query| 44 | # no reason to use btree index for json columns 45 | # TODO check type supports btree 46 | query.candidate_columns = query.columns.reject { |c| ["json", "jsonb", "point"].include?(c[:type]) }.sort_by { |c| [c[:table], c[:column]] } 47 | end 48 | candidate_queries.select! { |q| q.candidate_columns.any? } 49 | 50 | # create hypothetical indexes and explain queries 51 | batch_hypothetical_indexes(candidate_queries) 52 | end 53 | 54 | # see if new indexes were used and meet bar 55 | new_indexes = determine_indexes(queries, tables) 56 | 57 | # display new indexes 58 | show_new_indexes(new_indexes) 59 | 60 | # display debug info 61 | show_debug_info(new_indexes, queries) if @log_level.start_with?("debug") 62 | 63 | # create new indexes 64 | IndexCreator.new(@connection, self, new_indexes, @tablespace, !@options[:non_concurrently]).perform if @create && new_indexes.any? 65 | end 66 | 67 | private 68 | 69 | def reset_hypothetical_indexes 70 | execute("SELECT hypopg_reset()") 71 | end 72 | 73 | def determine_tables(candidate_queries) 74 | # set tables 75 | tables = Set.new(candidate_queries.flat_map(&:tables)) 76 | 77 | # must come after missing tables set 78 | if @include_tables 79 | include_set = Set.new(@include_tables) 80 | tables.keep_if { |t| include_set.include?(t) || include_set.include?(t.split(".")[-1]) } 81 | end 82 | 83 | if @exclude_tables.any? 84 | exclude_set = Set.new(@exclude_tables) 85 | tables.delete_if { |t| exclude_set.include?(t) || exclude_set.include?(t.split(".")[-1]) } 86 | end 87 | 88 | # remove system tables 89 | tables.delete_if { |t| t.start_with?("information_schema.", "pg_catalog.") } 90 | 91 | tables 92 | end 93 | 94 | def analyze_stats(tables) 95 | query = <<~SQL 96 | SELECT 97 | schemaname || '.' || relname AS table, 98 | last_analyze, 99 | last_autoanalyze 100 | FROM 101 | pg_stat_user_tables 102 | WHERE 103 | schemaname || '.' || relname IN (#{tables.size.times.map { |i| "$#{i + 1}" }.join(", ")}) 104 | SQL 105 | execute(query, params: tables) 106 | end 107 | 108 | def analyze_tables(tables) 109 | tables = tables.to_a.sort 110 | 111 | last_analyzed = {} 112 | analyze_stats(tables).each do |stats| 113 | last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"] 114 | end 115 | 116 | tables.each do |table| 117 | la = last_analyzed[table] 118 | 119 | if @log_level == "debug2" 120 | time_str = la ? la.iso8601 : "Unknown" 121 | log "Last analyze: #{table} : #{time_str}" 122 | end 123 | 124 | if @analyze && (!la || la < Time.now - 3600) 125 | statement = "ANALYZE #{@connection.quote_ident(table)}" 126 | log "Running analyze: #{statement}" 127 | execute(statement) 128 | end 129 | end 130 | end 131 | 132 | def calculate_plan(queries) 133 | queries.each do |query| 134 | if @log_explain 135 | puts "Explaining query" 136 | puts 137 | end 138 | begin 139 | plan = self.plan(query.statement) 140 | query.plans << plan if plan && plan["Total Cost"] 141 | rescue PG::Error, JSON::NestingError => e 142 | if @log_explain 143 | log e.message 144 | end 145 | end 146 | puts if @log_explain 147 | end 148 | end 149 | 150 | # process in batches to prevent "hypopg: not more oid available" error 151 | # https://hypopg.readthedocs.io/en/rel1_stable/usage.html#configuration 152 | def batch_hypothetical_indexes(candidate_queries) 153 | batch_count = 0 154 | batch = [] 155 | single_column_indexes = Set.new 156 | multicolumn_indexes = Set.new 157 | 158 | # sort to improve batching 159 | # TODO improve 160 | candidate_queries.sort_by! { |q| q.candidate_columns.map { |c| [c[:table], c[:column]] } } 161 | 162 | candidate_queries.each do |query| 163 | batch << query 164 | 165 | single_column_indexes.merge(query.candidate_columns) 166 | 167 | # TODO for multicolumn indexes, use ordering 168 | columns_by_table = query.candidate_columns.group_by { |c| c[:table] } 169 | columns_by_table.each do |_, columns| 170 | multicolumn_indexes.merge(columns.permutation(2).to_a) 171 | end 172 | 173 | if single_column_indexes.size + multicolumn_indexes.size >= 500 174 | create_hypothetical_indexes(batch, single_column_indexes, multicolumn_indexes, batch_count) 175 | batch_count += 1 176 | batch.clear 177 | single_column_indexes.clear 178 | multicolumn_indexes.clear 179 | end 180 | end 181 | 182 | if batch.any? 183 | create_hypothetical_indexes(batch, single_column_indexes, multicolumn_indexes, batch_count) 184 | end 185 | end 186 | 187 | def create_candidate_indexes(candidate_indexes, index_mapping) 188 | candidate_indexes.each do |columns| 189 | begin 190 | index_name = create_hypothetical_index(columns[0][:table], columns.map { |c| c[:column] }) 191 | index_mapping[index_name] = columns 192 | rescue PG::UndefinedObject 193 | # data type x has no default operator class for access method "btree" 194 | end 195 | end 196 | rescue PG::InternalError 197 | # hypopg: not more oid available 198 | log colorize("WARNING: Limiting index candidates", :yellow) if @log_level == "debug2" 199 | end 200 | 201 | def create_hypothetical_indexes(queries, single_column_indexes, multicolumn_indexes, batch_count) 202 | index_mapping = {} 203 | reset_hypothetical_indexes 204 | 205 | # check single column indexes 206 | create_candidate_indexes(single_column_indexes.map { |c| [c] }, index_mapping) 207 | calculate_plan(queries) 208 | 209 | # check multicolumn indexes 210 | create_candidate_indexes(multicolumn_indexes, index_mapping) 211 | calculate_plan(queries) 212 | 213 | # save index mapping for analysis 214 | queries.each do |query| 215 | query.index_mapping = index_mapping 216 | end 217 | 218 | # TODO different log level? 219 | log "Batch #{batch_count + 1}: #{queries.size} queries, #{index_mapping.size} hypothetical indexes" if @log_level == "debug2" 220 | end 221 | 222 | def find_indexes(plan) 223 | self.class.find_by_key(plan, "Index Name") 224 | end 225 | 226 | def self.find_by_key(plan, key) 227 | result = [] 228 | queue = [plan] 229 | while queue.any? 230 | node = queue.pop 231 | case node 232 | when Hash 233 | node.each do |k, v| 234 | if k == key 235 | result << v 236 | elsif !v.nil? 237 | queue << v 238 | end 239 | end 240 | when Array 241 | queue.concat(node) 242 | end 243 | end 244 | result 245 | end 246 | 247 | def hypo_indexes_from_plan(index_mapping, plan, index_set) 248 | query_indexes = [] 249 | 250 | find_indexes(plan).uniq.sort.each do |index_name| 251 | columns = index_mapping[index_name] 252 | 253 | if columns 254 | index = { 255 | table: columns[0][:table], 256 | columns: columns.map { |c| c[:column] } 257 | } 258 | 259 | unless index_set.include?([index[:table], index[:columns]]) 260 | query_indexes << index 261 | end 262 | end 263 | end 264 | 265 | query_indexes 266 | end 267 | 268 | def determine_indexes(queries, tables) 269 | new_indexes = {} 270 | 271 | # filter out existing indexes 272 | # this must happen at end of process 273 | # since sometimes hypothetical indexes 274 | # can give lower cost than actual indexes 275 | index_set = Set.new 276 | if tables.any? 277 | indexes(tables).each do |index| 278 | if index["using"] == "btree" 279 | # don't add indexes that are already covered 280 | index_set << [index["table"], index["columns"].first(1)] 281 | index_set << [index["table"], index["columns"].first(2)] 282 | end 283 | end 284 | end 285 | 286 | savings_ratio = 1 - @min_cost_savings_pct / 100.0 287 | 288 | queries.each do |query| 289 | if query.fully_analyzed? 290 | new_cost, new_cost2 = query.costs[1..2] 291 | 292 | cost_savings = new_cost < query.initial_cost * savings_ratio 293 | 294 | # set high bar for multicolumn indexes 295 | cost_savings2 = new_cost > 100 && new_cost2 < new_cost * savings_ratio 296 | 297 | key = cost_savings2 ? 2 : 1 298 | query_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[key], index_set) 299 | 300 | # likely a bad suggestion, so try single column 301 | if cost_savings2 && query_indexes.size > 1 302 | query_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[1], index_set) 303 | cost_savings2 = false 304 | end 305 | 306 | suggest_index = cost_savings || cost_savings2 307 | 308 | cost_savings3 = false 309 | new_cost3 = nil 310 | 311 | # if multiple indexes are found (for either single or multicolumn) 312 | # determine the impact of each individually 313 | # there may be a better single index that we're not considering 314 | # that didn't get picked up by pass1 or pass2 315 | # TODO clean this up 316 | # TODO suggest more than one index from this if savings are there 317 | if suggest_index && query_indexes.size > 1 318 | winning_index = nil 319 | winning_cost = nil 320 | winning_plan = nil 321 | 322 | query_indexes.each do |query_index| 323 | reset_hypothetical_indexes 324 | create_hypothetical_index(query_index[:table], query_index[:columns]) 325 | plan3 = plan(query.statement) 326 | cost3 = plan3["Total Cost"] 327 | 328 | if !winning_cost || cost3 < winning_cost 329 | winning_cost = cost3 330 | winning_index = query_index 331 | winning_plan = plan3 332 | end 333 | end 334 | 335 | query.plans << winning_plan 336 | 337 | # duplicated from above 338 | # TODO DRY 339 | use_winning = 340 | if cost_savings2 341 | new_cost > 100 && winning_cost < new_cost * savings_ratio 342 | else 343 | winning_cost < query.initial_cost * savings_ratio 344 | end 345 | 346 | query_indexes = [winning_index] 347 | new_cost3 = winning_cost 348 | query.pass3_indexes = query_indexes 349 | 350 | if use_winning 351 | cost_savings3 = true 352 | else 353 | suggest_index = false 354 | end 355 | end 356 | 357 | if suggest_index 358 | query_indexes.each do |index| 359 | new_indexes[index] ||= index.dup 360 | (new_indexes[index][:queries] ||= []) << query 361 | end 362 | end 363 | 364 | query.indexes = query_indexes 365 | query.suggest_index = suggest_index 366 | query.new_cost = 367 | if suggest_index 368 | cost_savings3 ? new_cost3 : (cost_savings2 ? new_cost2 : new_cost) 369 | else 370 | query.initial_cost 371 | end 372 | 373 | # TODO optimize 374 | if @log_level.start_with?("debug") 375 | query.pass1_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[1], index_set) 376 | query.pass2_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[2], index_set) 377 | end 378 | end 379 | end 380 | 381 | # filter out covered indexes 382 | covered = Set.new 383 | new_indexes.values.each do |index| 384 | if index[:columns].size > 1 385 | covered << [index[:table], index[:columns].first(1)] 386 | end 387 | end 388 | 389 | new_indexes.values.reject { |i| covered.include?([i[:table], i[:columns]]) }.sort_by(&:to_a) 390 | end 391 | 392 | def log_indexes(indexes) 393 | if indexes.any? 394 | indexes.map { |i| "#{i[:table]} (#{i[:columns].join(", ")})" }.join(", ") 395 | else 396 | "None" 397 | end 398 | end 399 | 400 | def show_new_indexes(new_indexes) 401 | if new_indexes.any? 402 | new_indexes.each do |index| 403 | log colorize("Index found: #{index[:table]} (#{index[:columns].join(", ")})", :green) 404 | end 405 | else 406 | log "No new indexes found" 407 | end 408 | end 409 | 410 | def show_debug_info(new_indexes, queries) 411 | index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) } 412 | if @log_level == "debug2" 413 | fingerprints = Set.new(index_queries.map(&:fingerprint)) 414 | index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint)) 415 | end 416 | index_queries.each do |query| 417 | log "-" * 80 418 | log "Query #{query.fingerprint}" 419 | log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.calls > 0 420 | 421 | if query.fingerprint == "unknown" 422 | log "Could not parse query" 423 | elsif query.tables.empty? 424 | log "No tables" 425 | elsif query.missing_tables 426 | log "Tables not present in current database" 427 | elsif query.candidate_tables.empty? 428 | log "No candidate tables for indexes" 429 | elsif !query.initial_cost 430 | log "Could not run explain" 431 | elsif query.initial_cost < @min_cost 432 | log "Low initial cost: #{query.initial_cost}" 433 | elsif query.candidate_columns.empty? 434 | log "No candidate columns for indexes" 435 | elsif query.fully_analyzed? 436 | query_indexes = query.indexes || [] 437 | log "Start: #{query.costs[0]}" 438 | log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}" 439 | log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}" 440 | if query.costs[3] 441 | log "Pass3: #{query.costs[3]} : #{log_indexes(query.pass3_indexes || [])}" 442 | end 443 | log "Final: #{query.new_cost} : #{log_indexes(query.suggest_index ? query_indexes : [])}" 444 | if (query.pass1_indexes.any? || query.pass2_indexes.any?) && !query.suggest_index 445 | log "Need #{@min_cost_savings_pct}% cost savings to suggest index" 446 | end 447 | else 448 | log "Could not run explain" 449 | end 450 | log 451 | log query.statement 452 | log 453 | end 454 | end 455 | 456 | def execute(...) 457 | @connection.execute(...) 458 | end 459 | 460 | def plan(query) 461 | prepared = false 462 | transaction = false 463 | 464 | # try to EXPLAIN normalized queries 465 | # https://dev.to/yugabyte/explain-from-pgstatstatements-normalized-queries-how-to-always-get-the-generic-plan-in--5cfi 466 | normalized = query.include?("$1") 467 | generic_plan = normalized && @server_version_num >= 160000 468 | explain_normalized = normalized && !generic_plan 469 | if explain_normalized 470 | prepared_name = "dexter_prepared" 471 | execute("PREPARE #{prepared_name} AS #{safe_statement(query)}", pretty: false) 472 | prepared = true 473 | params = execute("SELECT array_length(parameter_types, 1) AS params FROM pg_prepared_statements WHERE name = $1", params: [prepared_name]).first["params"].to_i 474 | query = "EXECUTE #{prepared_name}(#{params.times.map { "NULL" }.join(", ")})" 475 | 476 | execute("BEGIN") 477 | transaction = true 478 | 479 | execute("SET LOCAL plan_cache_mode = force_generic_plan") 480 | end 481 | 482 | explain_prefix = generic_plan ? "GENERIC_PLAN, " : "" 483 | 484 | # strip semi-colons as another measure of defense 485 | plan = JSON.parse(execute("EXPLAIN (#{explain_prefix}FORMAT JSON) #{safe_statement(query)}", pretty: false, use_exec: generic_plan).first["QUERY PLAN"], max_nesting: 1000).first["Plan"] 486 | 487 | if @log_explain 488 | # Pass format to prevent ANALYZE 489 | puts execute("EXPLAIN (#{explain_prefix}FORMAT TEXT) #{safe_statement(query)}", pretty: false, use_exec: generic_plan).map { |r| r["QUERY PLAN"] }.join("\n") 490 | end 491 | 492 | plan 493 | ensure 494 | if explain_normalized 495 | execute("ROLLBACK") if transaction 496 | execute("DEALLOCATE #{prepared_name}") if prepared 497 | end 498 | end 499 | 500 | def create_hypothetical_index(table, columns) 501 | execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{@connection.quote_ident(table)} (#{columns.map { |c| @connection.quote_ident(c) }.join(", ")})')").first["indexname"] 502 | end 503 | 504 | def indexes(tables) 505 | query = <<~SQL 506 | SELECT 507 | schemaname || '.' || t.relname AS table, 508 | ix.relname AS name, 509 | regexp_replace(pg_get_indexdef(i.indexrelid), '^[^\\(]*\\((.*)\\)$', '\\1') AS columns, 510 | regexp_replace(pg_get_indexdef(i.indexrelid), '.* USING ([^ ]*) \\(.*', '\\1') AS using 511 | FROM 512 | pg_index i 513 | INNER JOIN 514 | pg_class t ON t.oid = i.indrelid 515 | INNER JOIN 516 | pg_class ix ON ix.oid = i.indexrelid 517 | LEFT JOIN 518 | pg_stat_user_indexes ui ON ui.indexrelid = i.indexrelid 519 | WHERE 520 | schemaname || '.' || t.relname IN (#{tables.size.times.map { |i| "$#{i + 1}" }.join(", ")}) AND 521 | indisvalid = 't' AND 522 | indexprs IS NULL AND 523 | indpred IS NULL 524 | ORDER BY 525 | 1, 2 526 | SQL 527 | execute(query, params: tables.to_a).map { |v| v["columns"] = v["columns"].sub(") WHERE (", " WHERE ").split(", ").map { |c| unquote(c) }; v } 528 | end 529 | 530 | def unquote(part) 531 | if part && part.start_with?('"') && part.end_with?('"') 532 | part[1..-2] 533 | else 534 | part 535 | end 536 | end 537 | 538 | def safe_statement(statement) 539 | statement.gsub(";", "") 540 | end 541 | end 542 | end 543 | --------------------------------------------------------------------------------