├── tmp
└── .exists
├── .gitignore
├── INSTALL
├── MANIFEST.SKIP
├── MANIFEST
├── README
├── t
├── 00_basic.t
├── 99_yaml.t
├── 00_signature.t
├── 99_cleanup.t
├── 99_pod.t
├── 00_release.t
├── 99_spellcheck.t
├── 02_pgsi.t
└── 99_perlcritic.t
├── META.yml
├── Makefile.PL
├── LICENSE
├── Changes
├── SIGNATURE
├── .perlcriticrc
├── pgsi.html
└── pgsi.pl
/tmp/.exists:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | blib/
3 | pgsi_test_database/
4 | pm_to_blib
5 | tmp/
6 | pgsi-*.tar.gz
7 | MYMETA.*
8 | *.tmp
9 | Makefile.old
10 | test.*
--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 | Install pgsi like any other Perl module:
2 |
3 | perl Makefile.PL
4 | make
5 | make test
6 | make install
7 |
8 | If you find any problems when running the above, please report them!
9 |
10 |
--------------------------------------------------------------------------------
/MANIFEST.SKIP:
--------------------------------------------------------------------------------
1 | ~$
2 | ^#
3 | Makefile$
4 | Makefile\.old$
5 | pm_to_blib$
6 | \.tar\.gz$
7 | ^tmp
8 | .git/
9 | pgsi_test_database
10 | log$
11 | ^RCS
12 | ^blib
13 | pgsi.html
14 | .*\.tmp
15 | MYMETA.*
16 | test.*
17 |
--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
1 | pgsi.pl
2 | README
3 | Changes
4 | Makefile.PL
5 | INSTALL
6 | LICENSE
7 | MANIFEST
8 | MANIFEST.SKIP
9 | META.yml
10 | SIGNATURE
11 | .perlcriticrc
12 | .gitignore
13 | t/00_release.t
14 | t/00_basic.t
15 | t/00_signature.t
16 | t/02_pgsi.t
17 | t/99_cleanup.t
18 | t/99_perlcritic.t
19 | t/99_spellcheck.t
20 | t/99_pod.t
21 | t/99_yaml.t
22 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | pgsi.pl is a Perl utility for parsing and analyzing PostgreSQL logs to produce
2 | wiki-ready system impact reports. It was originally developed for Backcountry.com.
3 |
4 | Documentation is available as POD within the script:
5 |
6 | perldoc pgsi.pl
7 |
8 | More information is at the website:
9 |
10 | http://bucardo.org/wiki/pgsi/
11 |
12 |
13 |
--------------------------------------------------------------------------------
/t/00_basic.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Simply test that the main script compiles
5 |
6 | use 5.006;
7 | use strict;
8 | use warnings;
9 | use Test::More tests => 1;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 |
14 | eval {
15 | system "perl -c pgsi.pl 2>/dev/null";
16 | };
17 | is ($@, q{}, 'Program compiled cleanly');
18 |
--------------------------------------------------------------------------------
/t/99_yaml.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Test META.yml for YAMLiciousness, requires Test::YAML::Meta
5 |
6 | use 5.006;
7 | use strict;
8 | use warnings;
9 | use Test::More;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 |
14 | plan tests => 2;
15 |
16 | my $V = 0.03;
17 | eval {
18 | require Test::YAML::Meta;
19 | Test::YAML::Meta->import;
20 | };
21 | if ($@) {
22 | SKIP: {
23 | skip ('Skipping Test::YAML::Meta tests: module not found', 2);
24 | }
25 | }
26 | elsif ($Test::YAML::Meta::VERSION < $V) {
27 | SKIP: {
28 | skip ("Skipping Test::YAML::Meta tests: need version $V, but only have $Test::YAML::Meta::VERSION", 2);
29 | }
30 | }
31 | else {
32 | meta_spec_ok ('META.yml', 1.3);
33 | }
34 |
--------------------------------------------------------------------------------
/META.yml:
--------------------------------------------------------------------------------
1 | --- #YAML:1.0
2 | name : pgsi
3 | version : 1.7.2
4 | abstract : Postgres system impact report
5 | author:
6 | - Mark Johnson
7 |
8 | license : bsd
9 | dynamic_config : 1
10 | distribution_type : module
11 |
12 | requires:
13 | IO::Handle : 1.24
14 | build_requires:
15 | Test::Harness : 2.03
16 | Test::More : 0.61
17 | Module::Signature : 0.50
18 |
19 | provides:
20 | pgsi:
21 | file : pgsi.pl
22 | version : 1.7.2
23 |
24 | resources:
25 | homepage : http://bucardo.org/pgsi/index.html
26 | bugtracker : http://bucardo.org/pgsi/bugs.html
27 | repository : http://bucardo.org/pgsi/repo.html
28 | license : http://bucardo.org/pgsi/license.txt
29 | meta-spec:
30 | version : 1.3
31 | url : http://module-build.sourceforge.net/META-spec-v1.3.html
32 | generated_by : emacs
33 |
--------------------------------------------------------------------------------
/t/00_signature.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Test that our SIGNATURE file is valid - requires TEST_SIGNATURE env
5 |
6 | use 5.006;
7 | use strict;
8 | use warnings;
9 | use Test::More;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 |
14 | if (!$ENV{TEST_SIGNATURE}) {
15 | plan skip_all => 'Set the environment variable TEST_SIGNATURE to enable this test';
16 | }
17 | plan tests => 1;
18 |
19 | SKIP: {
20 | if (!eval { require Module::Signature; 1 }) {
21 | skip ('Must have Module::Signature to test SIGNATURE file', 1);
22 | }
23 | elsif ( !-e 'SIGNATURE' ) {
24 | fail ('SIGNATURE file was not found');
25 | }
26 | elsif ( ! -s 'SIGNATURE') {
27 | fail ('SIGNATURE file was empty');
28 | }
29 | else {
30 | my $ret = Module::Signature::verify();
31 | if ($ret eq Module::Signature::SIGNATURE_OK()) {
32 | pass ('Valid SIGNATURE file');
33 | }
34 | else {
35 | fail ('Invalid SIGNATURE file');
36 | }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/Makefile.PL:
--------------------------------------------------------------------------------
1 | use ExtUtils::MakeMaker;
2 | use strict;
3 | use warnings;
4 | use 5.008003;
5 |
6 | my $VERSION = '1.7.2';
7 |
8 | WriteMakefile(
9 | NAME => 'pgsi',
10 | VERSION_FROM => 'pgsi.pl',
11 | PREREQ_PM => {
12 | 'Test::More' => '0.61',
13 | 'Test::Harness' => '2.03',
14 | 'IO::Handle' => '1.24',
15 | },
16 | EXE_FILES => ['pgsi.pl'],
17 | ABSTRACT => 'Postgres system impact report',
18 | AUTHOR => 'Mark Johnson ',
19 | LICENSE => 'BSD',
20 | NO_META => 1,
21 | NEEDS_LINKING => 0,
22 | NORECURS => 1,
23 | PM => { },
24 | clean => { FILES => '*~ tmp/* test.pg.log pgsi.html' },
25 | );
26 |
27 | package MY;
28 |
29 | sub manifypods {
30 | my $after = qq{\t\$(NOECHO) pod2html pgsi.pl > pgsi.html\n};
31 | $after .= qq{\t\$(NOECHO) \$(RM_F) pod*.tmp pm_to_blib\n};
32 | $after .= qq{\t\$(NOECHO) \$(PERL) -pi -e "s///" pgsi.html\n};
33 | $after .= qq{\t\$(NOECHO) \$(ECHO) Created pgsi.html\n};
34 | return shift->SUPER::manifypods(@_) . $after;
35 | }
36 |
37 |
--------------------------------------------------------------------------------
/t/99_cleanup.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Cleanup all database objects we may have created
5 | ## Shutdown the test database if we created one
6 |
7 | use 5.006;
8 | use strict;
9 | use warnings;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 | use Test::More tests => 1;
14 |
15 | my $testdir = 'pgsi_test_database';
16 |
17 | if (! -d $testdir) {
18 | pass ("(Cleanup) Test database directory does not exist\n");
19 | exit;
20 | }
21 |
22 | my $pidfile = "$testdir/postmaster.pid";
23 | if (! -e $pidfile) {
24 | pass ("(Cleanup) Test database PID file does not exist\n");
25 | exit;
26 | }
27 |
28 | open my $fh, '<', $pidfile or die qq{Could not open "$pidfile": $!\n};
29 | <$fh> =~ /(\d+)/ or die qq{No PID found in file "$pidfile"\n};
30 | my $pid = $1;
31 | close $fh or die qq{Could not close "$pidfile": $!\n};
32 |
33 | my $count = kill 0 => $pid;
34 | if ($count == 0) {
35 | unlink $pidfile;
36 | pass ("(Cleanup) Test database process not found, removed $pidfile\n");
37 | exit;
38 | }
39 |
40 | diag "Shutting down test database\n";
41 |
42 | kill 15 => $pid;
43 |
44 | pass ("(Cleanup) Test database asked to shutdown with a kill -15\n");
45 |
--------------------------------------------------------------------------------
/t/99_pod.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Check our Pod, requires Test::Pod
5 | ## Also done if available: Test::Pod::Coverage
6 | ## Requires TEST_AUTHOR env
7 |
8 | use 5.006;
9 | use strict;
10 | use warnings;
11 | use Test::More;
12 | use IO::Handle;
13 | *STDOUT->autoflush(1);
14 | *STDERR->autoflush(1);
15 |
16 | if (!$ENV{TEST_AUTHOR}) {
17 | plan skip_all => 'Set the environment variable TEST_AUTHOR to enable this test';
18 | }
19 |
20 | plan tests => 2;
21 |
22 | my $PODVERSION = '0.95';
23 | eval {
24 | require Test::Pod;
25 | Test::Pod->import;
26 | };
27 | SKIP: {
28 | if ($@ or $Test::Pod::VERSION < $PODVERSION) {
29 | skip ("Test::Pod $PODVERSION is required", 1);
30 | }
31 | pod_file_ok ('pgsi.pl');
32 | }
33 |
34 | ## We won't require everyone to have this, so silently move on if not found
35 | my $PODCOVERVERSION = '1.04';
36 | eval {
37 | require Test::Pod::Coverage;
38 | Test::Pod::Coverage->import;
39 | };
40 | SKIP: {
41 |
42 | if ($@ or $Test::Pod::Coverage::VERSION < $PODCOVERVERSION) {
43 | skip ("Test::Pod::Coverage $PODCOVERVERSION is required", 1);
44 | }
45 |
46 | my $trusted_names =
47 | [
48 | ];
49 |
50 | my $t='pgsi.pl pod coverage okay';
51 | pod_coverage_ok ('pgsi.pl', {trustme => $trusted_names}, $t);
52 | }
53 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2008, 2009, 2010
2 | Mark Johnson . All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | 1. Redistributions of source code must retain the above copyright notice,
8 | this list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
14 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
16 | EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
17 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
18 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
19 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
20 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
21 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
22 | OF SUCH DAMAGE.
23 |
--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
1 | Changes for the Postgres System Impact program pgsi.pl
2 |
3 |
4 | Version 1.7.2
5 |
6 | - Better removal of comments (Joshua Tolley)
7 |
8 |
9 | Version 1.7.1, released June 1, 2011
10 |
11 | - Add "--help" output (Greg Sabino Mullane)
12 |
13 | - Better logic for median values (razvan-panda @ github)
14 |
15 |
16 | Version 1.7.0, released February 28, 2011
17 |
18 | - Special parsing of crowded queries (x=1234AND y=2345) (Greg Sabino Mullane)
19 |
20 | - Add "bare" mode, to handle stderr logs with no log_line_prefix (Joshua Tolley)
21 |
22 | - Add --interval option, for cases in which the log does not say (Joshua Tolley)
23 |
24 |
25 | Version 1.6.0, released January 31, 2011
26 |
27 | - Add in 'csv' as a valid mode (Abraham Ingersoll)
28 |
29 | - Add 'quiet' option (Greg Sabino Mullane)
30 |
31 |
32 | Version 1.5.0, released December 15, 2010
33 |
34 | - Allow for multiple files to be parsed by multiple --file= calls.
35 |
36 | - Always put COPY last
37 |
38 | - Reduce the number of decimal places to two from three.
39 |
40 | - Rename "Avg" to "Mean"
41 |
42 |
43 | Version 1.4.0, released December 9, 2010
44 |
45 | - Support for alternate syslog format
46 |
47 | - Fix bug when last process line was not a STATEMENT
48 |
49 | - Add median
50 |
51 |
52 | Version 1.3.0, released July 9, 2010
53 |
54 | - Handle lines with both duration and statement
55 |
56 | - Show the filename in the output
57 |
58 | - Better handling of SQL comments
59 |
60 | - Better regexes, keyword highlighting
61 |
62 | - Don't die on invalid lines, but warn instead, and only in verbose mode.
63 |
64 | - Do not show negative system impact values or negative durations
65 |
66 |
67 | Version 1.2.0, released January 2, 2010
68 |
69 | - Major code cleanup, add HTML mode
70 |
71 | - Much more formatting of results, with CSS colorings
72 |
73 |
74 | Version 1.1.1, released January 2009
75 |
76 | - First public release
77 |
--------------------------------------------------------------------------------
/SIGNATURE:
--------------------------------------------------------------------------------
1 | This file contains message digests of all files listed in MANIFEST,
2 | signed via the Module::Signature module, version 0.68.
3 |
4 | To verify the content in this distribution, first make sure you have
5 | Module::Signature installed, then type:
6 |
7 | % cpansign -v
8 |
9 | It will check each file's integrity, as well as the signature's
10 | validity. If "==> Signature verified OK! <==" is not displayed,
11 | the distribution may already have been compromised, and you should
12 | not run its Makefile.PL or Build.PL.
13 |
14 | -----BEGIN PGP SIGNED MESSAGE-----
15 | Hash: RIPEMD160
16 |
17 | SHA1 1a95fb0825122ba03a0e923c21befa4005e565eb .gitignore
18 | SHA1 f4c9d744f9805c07f67df245eacfbd7cc6b4e800 .perlcriticrc
19 | SHA1 f58ac79d456f565b7eff4b5e2ac08331ecbe8488 Changes
20 | SHA1 6b8822e6cafae62bafddc2b2466b51f5433b495e INSTALL
21 | SHA1 4e7d5e6453a4a552a1a670e0c5b8b96d2d043cfe LICENSE
22 | SHA1 e9d7f4e3e957eeee05020eb4379a7b8b2f17d797 MANIFEST
23 | SHA1 17e2f87d23167cf4cd34d43c393130e3396e8c5c MANIFEST.SKIP
24 | SHA1 f739b8faa3f8ed43f62573762fcd359579b42b10 META.yml
25 | SHA1 d6382baa4f40cac14856742532574d9f6c3aa918 Makefile.PL
26 | SHA1 d0d3bd82ef735575210c260d64810c2a5402bd39 README
27 | SHA1 1cd2974d1d658d84ff243a6c8a3bd9f2716dc496 pgsi.pl
28 | SHA1 7933b5aa8cc486885ff70b8d31349d8cde05929c t/00_basic.t
29 | SHA1 dc7459ce2f73becd40d7a624ac71c7554efce458 t/00_release.t
30 | SHA1 0ba35bd2edbabe1ab191cd006721f96998f2c732 t/00_signature.t
31 | SHA1 445dcb8fc226c7cece83aedbceb29ef7f17de2d9 t/02_pgsi.t
32 | SHA1 041a8261061e52e37be5ff77fed1b9a799cd5400 t/99_cleanup.t
33 | SHA1 f5a4e98989a41cdfc7f292f174d14849b3b3f8f8 t/99_perlcritic.t
34 | SHA1 58dfe7e3b035ebe0e8f36a236c4c169190ee1295 t/99_pod.t
35 | SHA1 24cda361ace47b0c6fa1def503783fcd6bf3be0a t/99_spellcheck.t
36 | SHA1 9f1900d5a79447a5b594f721b6c52cf0b30b59b4 t/99_yaml.t
37 | -----BEGIN PGP SIGNATURE-----
38 |
39 | iEYEAREDAAYFAk/I5uQACgkQvJuQZxSWSshs2wCdHbOZHBekQsGjKGpOKWCgwZjg
40 | ke8AoI0AkdKT6Il9vGAycN/1SHrv6ZP0
41 | =lU2x
42 | -----END PGP SIGNATURE-----
43 |
--------------------------------------------------------------------------------
/.perlcriticrc:
--------------------------------------------------------------------------------
1 |
2 | ## perlcritic file for pgsi
3 | ## Usage: perlcritic -profile
4 |
5 | verbose = 8
6 | severity = 1
7 |
8 | [-Bangs::ProhibitFlagComments]
9 | [-Bangs::ProhibitNumberedNames]
10 | [-Bangs::ProhibitVagueNames]
11 |
12 | [-BuiltinFunctions::ProhibitBooleanGrep]
13 | [-BuiltinFunctions::ProhibitComplexMappings]
14 | [-BuiltinFunctions::ProhibitReverseSortBlock]
15 |
16 | [-CodeLayout::ProhibitHardTabs]
17 | [-CodeLayout::ProhibitParensWithBuiltins]
18 | [-CodeLayout::ProhibitQuotedWordLists]
19 | [-CodeLayout::RequireTidyCode]
20 | [-CodeLayout::RequireTrailingCommaAtNewline]
21 | [-CodeLayout::RequireUseUTF8]
22 |
23 | [-Compatibility::ProhibitUnixDevNull]
24 |
25 | [-ControlStructures::ProhibitCascadingIfElse]
26 | [-ControlStructures::ProhibitCStyleForLoops]
27 | [-ControlStructures::ProhibitDeepNests]
28 | [-ControlStructures::ProhibitPostfixControls]
29 | [-ControlStructures::ProhibitUnlessBlocks]
30 |
31 | [-Documentation::PodSpelling]
32 | [-Documentation::RequirePodSections]
33 | [-Documentation::RequirePODUseEncodingUTF8]
34 |
35 | [-Editor::RequireEmacsFileVariables]
36 |
37 | [-ErrorHandling::RequireCarping]
38 | [-ErrorHandling::RequireUseOfExceptions]
39 | [-ErrorHandling::RequireCheckingReturnValueOfEval]
40 |
41 | [-InputOutput::ProhibitBacktickOperators]
42 | [-InputOutput::ProhibitInteractiveTest]
43 | [-InputOutput::RequireBracedFileHandleWithPrint]
44 | [-InputOutput::RequireBriefOpen]
45 | [-InputOutput::RequireCheckedSyscalls]
46 |
47 | [-Lax::ProhibitComplexMappings::LinesNotStatements]
48 | [-Lax::ProhibitEmptyQuotes::ExceptAsFallback]
49 |
50 | [-Miscellanea::ProhibitTies]
51 | [-Miscellanea::RequireRcsKeywords]
52 | [-Miscellanea::ProhibitUnrestrictedNoCritic]
53 |
54 | [-Modules::PerlMinimumVersion]
55 | [-Modules::ProhibitExcessMainComplexity]
56 | [-Modules::RequireFilenameMatchesPackage]
57 |
58 | [-NamingConventions::ProhibitAmbiguousNames]
59 | [-NamingConventions::Capitalization]
60 |
61 | [-References::ProhibitDoubleSigils]
62 |
63 | [-RegularExpressions::ProhibitCaptureWithoutTest]
64 | [-RegularExpressions::ProhibitComplexRegexes]
65 | [-RegularExpressions::ProhibitEnumeratedClasses]
66 | [-RegularExpressions::ProhibitEscapedMetacharacters]
67 | [-RegularExpressions::RequireBracesForMultiline]
68 | [-RegularExpressions::RequireDotMatchAnything]
69 | [-RegularExpressions::RequireExtendedFormatting]
70 | [-RegularExpressions::RequireLineBoundaryMatching]
71 |
72 | [-Subroutines::ProhibitQualifiedSubDeclarations]
73 | [-Subroutines::RequireArgUnpacking]
74 |
75 | [-Tics::ProhibitLongLines]
76 |
77 | [-ValuesAndExpressions::ProhibitAccessOfPrivateData]
78 | [-ValuesAndExpressions::ProhibitCommaSeparatedStatements]
79 | [-ValuesAndExpressions::ProhibitEmptyQuotes]
80 | [-ValuesAndExpressions::ProhibitImplicitNewlines]
81 | [-ValuesAndExpressions::ProhibitInterpolationOfLiterals]
82 | [-ValuesAndExpressions::ProhibitMagicNumbers]
83 | [-ValuesAndExpressions::ProhibitMixedBooleanOperators]
84 | [-ValuesAndExpressions::ProhibitNoisyQuotes]
85 | [-ValuesAndExpressions::ProhibitVersionStrings]
86 | [-ValuesAndExpressions::RequireNumberSeparators]
87 | [-ValuesAndExpressions::RequireNumericVersion]
88 | [-ValuesAndExpressions::RestrictLongStrings]
89 |
90 | [-Variables::ProhibitLocalVars]
91 | [-Variables::ProhibitPackageVars]
92 | [-Variables::ProhibitPunctuationVars]
93 | [-Variables::RequireLocalizedPunctuationVars]
94 | [-Variables::RequireNegativeIndices]
95 |
96 |
97 | [-Modules::RequireVersionVar]
98 |
99 | [CodeLayout::ProhibitTrailingWhitespace]
100 | severity = 5
101 |
102 | [Subroutines::ProhibitBuiltinHomonyms]
103 | severity = 5
104 |
--------------------------------------------------------------------------------
/t/00_release.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Make sure the version number is consistent in all places
5 | ## Check all files in MANIFEST for tabs and odd characters
6 |
7 | use 5.006;
8 | use strict;
9 | use warnings;
10 | use Data::Dumper;
11 | use Test::More;
12 | use lib 't','.';
13 |
14 | if (! $ENV{RELEASE_TESTING}) {
15 | plan (skip_all => 'Test skipped unless environment variable RELEASE_TESTING is set');
16 | }
17 |
18 | ## Grab all files from the MANIFEST to generate a test count
19 | my $file = 'MANIFEST';
20 | my @mfiles;
21 | open my $mfh, '<', $file or die qq{Could not open "$file": $!\n};
22 | while (<$mfh>) {
23 | next if /^#/;
24 | push @mfiles => $1 if /(\S.+)/o;
25 | }
26 | close $mfh or warn qq{Could not close "$file": $!\n};
27 |
28 | plan tests => 1 + @mfiles;
29 |
30 | my %v;
31 | my $vre = qr{(\d+\.\d+\.\d+\_?\d*)};
32 |
33 | ## Grab version from various files
34 | $file = 'META.yml';
35 | open my $fh, '<', $file or die qq{Could not open "$file": $!\n};
36 | while (<$fh>) {
37 | push @{$v{$file}} => [$1,$.] if /version\s*:\s*$vre/;
38 | }
39 | close $fh or warn qq{Could not close "$file": $!\n};
40 |
41 | $file = 'Makefile.PL';
42 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
43 | while (<$fh>) {
44 | push @{$v{$file}} => [$1,$.] if /VERSION = '$vre'/;
45 | }
46 | close $fh or warn qq{Could not close "$file": $!\n};
47 |
48 | $file = 'pgsi.pl';
49 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
50 | while (<$fh>) {
51 | push @{$v{$file}} => [$1,$.] if (/VERSION = '$vre'/ or /refers to version $vre/);
52 | }
53 | close $fh or warn qq{Could not close "$file": $!\n};
54 |
55 | $file = 'pgsi.html';
56 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
57 | while (<$fh>) {
58 | push @{$v{$file}} => [$1,$.] if /version $vre/;
59 | }
60 | close $fh or warn qq{Could not close "$file": $!\n};
61 |
62 | $file = 'Changes';
63 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
64 | while (<$fh>) {
65 | push @{$v{$file}} => [$1,$.] and last if /Version $vre/;
66 | }
67 | close $fh or warn qq{Could not close "$file": $!\n};
68 |
69 | my $good = 1;
70 | my $lastver;
71 | for my $filename (keys %v) {
72 | for my $glob (@{$v{$filename}}) {
73 | my ($ver,$line) = @$glob;
74 | if (! defined $lastver) {
75 | $lastver = $ver;
76 | }
77 | elsif ($ver ne $lastver) {
78 | $good = 0;
79 | }
80 | }
81 | }
82 |
83 | if ($good) {
84 | pass ("All version numbers are the same ($lastver)");
85 | }
86 | else {
87 | fail ('All version numbers were not the same!');
88 | for my $filename (sort keys %v) {
89 | for my $glob (@{$v{$filename}}) {
90 | my ($ver,$line) = @$glob;
91 | diag "File: $filename. Line: $line. Version: $ver\n";
92 | }
93 | }
94 | }
95 |
96 | ## Make sure all files in the MANIFEST are "clean": no tabs, no unusual characters
97 |
98 | for my $mfile (@mfiles) {
99 | file_is_clean($mfile);
100 | }
101 |
102 | sub file_is_clean {
103 |
104 | my $filename = shift or die;
105 |
106 | if (!open $fh, '<', $filename) {
107 | fail qq{Could not open "$filename": $!\n};
108 | return;
109 | }
110 | $good = 1;
111 | my $inside_copy = 0;
112 | while (<$fh>) {
113 | if (/^COPY .+ FROM stdin/i) {
114 | $inside_copy = 1;
115 | }
116 | if (/^\\./ and $inside_copy) {
117 | $inside_copy = 0;
118 | }
119 | if (/\t/ and $filename ne 'Makefile.PL' and $filename !~ /\.html$/ and ! $inside_copy) {
120 | diag "Found a tab at line $. of $filename\n";
121 | $good = 0;
122 | }
123 | if (! /^[\S ]*/) {
124 | diag "Invalid character at line $. of $filename: $_\n";
125 | $good = 0; die;
126 | }
127 | }
128 | close $fh or warn qq{Could not close "$filename": $!\n};
129 |
130 | if ($good) {
131 | pass ("The $filename file has no tabs or unusual characters");
132 | }
133 | else {
134 | fail ("The $filename file did not pass inspection!");
135 | }
136 |
137 | }
138 |
139 | exit;
140 |
--------------------------------------------------------------------------------
/t/99_spellcheck.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Spellcheck as much as we can
5 | ## Requires TEST_SPELL to be set
6 |
7 | use 5.006;
8 | use strict;
9 | use warnings;
10 | use Test::More;
11 | use IO::Handle;
12 | *STDOUT->autoflush(1);
13 | *STDERR->autoflush(1);
14 |
15 | my (@testfiles, $fh);
16 |
17 | if (!$ENV{TEST_SPELL}) {
18 | plan skip_all => 'Set the environment variable TEST_SPELL to enable this test';
19 | }
20 | elsif (!eval { require Text::SpellChecker; 1 }) {
21 | plan skip_all => 'Could not find Text::SpellChecker';
22 | }
23 | else {
24 | opendir my $dir, 't' or die qq{Could not open directory 't': $!\n};
25 | @testfiles = map { "t/$_" } grep { /^.+\.(t|pl)$/ } readdir $dir;
26 | closedir $dir or die qq{Could not closedir "$dir": $!\n};
27 | plan tests => 4+@testfiles;
28 | }
29 |
30 | my %okword;
31 | my $file = 'Common';
32 | while () {
33 | if (/^## (.+):/) {
34 | $file = $1;
35 | next;
36 | }
37 | next if /^#/ or ! /\w/;
38 | for (split) {
39 | $okword{$file}{$_}++;
40 | }
41 | }
42 |
43 |
44 | sub spellcheck {
45 |
46 | my ($desc, $text, $filename) = @_;
47 | my $check = Text::SpellChecker->new(text => $text);
48 | my %badword;
49 | while (my $word = $check->next_word) {
50 | next if $okword{Common}{$word} or $okword{$filename}{$word};
51 | $badword{$word}++;
52 | }
53 | my $count = keys %badword;
54 | if (! $count) {
55 | pass ("Spell check passed for $desc");
56 | return;
57 | }
58 | fail ("Spell check failed for $desc. Bad words: $count");
59 | for (sort keys %badword) {
60 | diag "$_\n";
61 | }
62 | return;
63 | }
64 |
65 |
66 | ## First, the plain ol' textfiles
67 | for my $file (qw/README Changes/) {
68 | if (!open $fh, '<', $file) {
69 | fail (qq{Could not find the file "$file"!});
70 | }
71 | else {
72 | { local $/; $_ = <$fh>; } ## no critic
73 | close $fh or warn qq{Could not close "$file": $!\n};
74 | spellcheck ($file => $_, $file);
75 | }
76 | }
77 |
78 | ## Now the embedded POD
79 | SKIP: {
80 | if (!eval { require Pod::Spell; 1 }) {
81 | skip ('Need Pod::Spell to test the spelling of embedded POD', 1);
82 | }
83 |
84 | for my $file (qw{pgsi.pl}) {
85 | if (! -e $file) {
86 | fail (qq{Could not find the file "$file"!});
87 | next;
88 | }
89 | my $string = qx{podspell $file};
90 | spellcheck ("POD from $file" => $string, $file);
91 | }
92 | }
93 |
94 | ## Now the comments
95 | SKIP: {
96 | if (!eval { require File::Comments; 1 }) {
97 | skip ('Need File::Comments to test the spelling inside comments', 1+@testfiles);
98 | }
99 |
100 | my $fc = File::Comments->new();
101 |
102 | my @files;
103 | for (sort @testfiles) {
104 | push @files, "$_";
105 | }
106 |
107 |
108 | for my $file (@testfiles, qw{pgsi.pl}) {
109 | ## Tests as well?
110 | if (! -e $file) {
111 | fail (qq{Could not find the file "$file"!});
112 | }
113 | my $string = $fc->comments($file);
114 | if (! $string) {
115 | fail (qq{Could not get comments from file $file});
116 | next;
117 | }
118 | $string = join "\n" => @$string;
119 | $string =~ s/=head1.+//sm;
120 | spellcheck ("comments from $file" => $string, $file);
121 | }
122 |
123 |
124 | }
125 |
126 |
127 | __DATA__
128 | ## These words are okay
129 |
130 | ## Common:
131 |
132 | Backcountry
133 | backend
134 | cardinality
135 | conf
136 | cperl
137 | cwd
138 | durations
139 | env
140 | http
141 | logfile
142 | Mullane
143 | namespace
144 | ol
145 | params
146 | perl
147 | perldoc
148 | pglog
149 | pgsi
150 | Postgres
151 | postgresql
152 | PostgreSQL
153 | regex
154 | Regex
155 | Sabino
156 | SELECTs
157 | Spellcheck
158 | SQL
159 | stdin
160 | stdout
161 | textfiles
162 | UPDATEs
163 | usr
164 | wiki
165 | YAML
166 | YAMLiciousness
167 | yml
168 |
169 | ## pgsi.pl
170 |
171 | arg
172 | csv
173 | Globals
174 | html
175 | Ingersoll
176 | pid
177 | refactor
178 | syslog
179 | Tolley
180 |
181 | ## README
182 |
183 | bucardo
184 |
185 | ## Changes
186 |
187 | colorings
188 | CSS
189 | durations
190 | filename
191 | github
192 | razvan
193 | regexes
194 | stderr
195 |
196 |
--------------------------------------------------------------------------------
/t/02_pgsi.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Simply test that the main script compiles and gives a version
5 |
6 | use 5.006;
7 | use strict;
8 | use warnings;
9 | use DBI;
10 | use Data::Dumper;
11 | use IO::Handle;
12 | use POSIX;
13 | *STDOUT->autoflush(1);
14 | *STDERR->autoflush(1);
15 | use Test::More tests => 6;
16 |
17 | use vars qw/$COM $info $count $t/;
18 |
19 | eval {
20 | system "perl -c pgsi.pl 2>/dev/null";
21 | };
22 | is ($@, q{}, 'Program compiled cleanly');
23 |
24 | ## Create a test database as needed
25 | my $testdir = 'pgsi_test_database';
26 | if (! -d $testdir) {
27 | diag "Creating test database cluster in $testdir\n";
28 | $COM = "initdb -D $testdir --locale=C -E UTF8 2>&1";
29 | eval {
30 | $info = qx{$COM};
31 | };
32 | $@ and BAIL_OUT "Failed to initdb: $@\n";
33 | }
34 |
35 | ## Make custom changes to the postgresql.conf
36 | my $file = "$testdir/postgresql.conf";
37 | open my $fh, '+<', $file or die qq{Could not open "$file": $!\n};
38 | my $found = 0;
39 | while (<$fh>) {
40 | if (/PGSI TESTING/) {
41 | $found = 1;
42 | last;
43 | }
44 | }
45 | my $fn = 'pg.%C.log';
46 | if (! $found) {
47 | diag "Configuring postgresql.conf\n";
48 | print $fh <<"EOT"
49 |
50 | ## PGSI TESTING
51 | port = 5555
52 | listen_addresses = ''
53 | max_connections = 5
54 | log_statement = 'all'
55 | log_duration = 'on'
56 | log_line_prefix = '%t %h postgres[%p]: [%l-1] ' ## Simulate syslog entries
57 | log_destination = 'stderr'
58 | log_directory = '.'
59 | log_filename = '$fn'
60 |
61 | EOT
62 | }
63 | close $fh or die qq{Could not close "$file": $!\n};
64 |
65 |
66 | my $pidfile = "$testdir/postmaster.pid";
67 | my $startup = 1;
68 | my $logfile = POSIX::strftime("$testdir/$fn", localtime); ## no critic
69 | if (-e $pidfile) {
70 | open my $fh, '<', $pidfile or die qq{Could not open "$pidfile": $!\n};
71 | <$fh> =~ /(\d+)/ or die qq{No PID found in file "$pidfile"\n};
72 | my $pid = $1;
73 | close $fh or die qq{Could not close "$pidfile": $!\n};
74 | ## Make sure it's still around
75 | $count = kill 0 => $pid;
76 | if ($count != 1) {
77 | warn qq{Server seems to have died, removing file "$pidfile"\n};
78 | unlink $pidfile or die qq{Could not remove file "$pidfile"\n};
79 | }
80 | }
81 | if (! -e $pidfile) {
82 | diag "Starting up test database\n";
83 | $COM = "pg_ctl -D $testdir -l $logfile start";
84 | eval {
85 | $info = qx{$COM};
86 | };
87 | $@ and BAIL_OUT "Failed to start database: $@\n";
88 | {
89 | last if -e $pidfile;
90 | sleep 0.1;
91 | redo;
92 | }
93 | ## Wait for "ready to accept connections"
94 | open my $fh, '<', $logfile or die qq{Could not open "$logfile": $!\n};
95 | seek $fh, -100, 2;
96 | LOOP: {
97 | while (<$fh>) {
98 | last LOOP if /system is ready/;
99 | }
100 | sleep 0.1;
101 | seek $fh, 0, 1;
102 | redo;
103 | }
104 | close $fh or die qq{Could not close "$logfile": $!\n};
105 | }
106 |
107 | ## Start tracking things sent to the logfile.
108 | ## Write a copy, so we only get things since we started up.
109 | my $testlog = 'test.pg.log';
110 | open my $tfh, '>', $testlog or die qq{Could not open "$testlog": $!\n};
111 | open my $lfh, '<', $logfile or die qq{Could not open "$logfile": $!\n};
112 | seek $lfh, 0, 2;
113 |
114 | ## Send a few commands to the backend, then test basic functionality
115 | my $dbh = DBI->connect('dbi:Pg:port=5555;dbname=postgres', '', '', {AutoCommit=>1, RaiseError=>1});
116 |
117 | $dbh->do("SELECT 999");
118 | $dbh->do("SELECT 888");
119 | $dbh->do("SELECT 777");
120 |
121 | $dbh->do("SELECT pg_client_encoding()");
122 |
123 | update_log_copy();
124 |
125 | $info = qx{perl pgsi.pl --file $testlog};
126 |
127 | ## Got the standard header?
128 | $t=q{pgsi returned the expected header when run};
129 | like ($info, qr{Query System Impact}, $t);
130 |
131 | ## Got the proper count?
132 | $t=q{pgsi returned the expected count};
133 | like ($info, qr{^\Q3
}ms, $t);
134 |
135 | $t=q{pgsi returned the expected query};
136 | like ($info, qr{SELECT.+\?}ms, $t);
137 |
138 | $t=q{pgsi returned the expected query};
139 | like ($info, qr{SELECT pg_client_encoding\(\)}ms, $t);
140 |
141 | $t=q{pgsi returned an average duration line};
142 | if ($info =~ qr{^(\d+)\.\d+ ms
}ms) {
143 | pass ($t);
144 | }
145 | else {
146 | fail ($t);
147 | }
148 |
149 | close $tfh or die qq{Could not close "$testlog": $!\n};
150 |
151 | exit;
152 |
153 | sub update_log_copy {
154 |
155 | my $action = shift || 0;
156 |
157 | seek $lfh, 0, 1;
158 | while (<$lfh>) {
159 | print $tfh $_;
160 | }
161 | return;
162 |
163 | } ## end of update_log_copy
164 |
--------------------------------------------------------------------------------
/t/99_perlcritic.t:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Run Perl::Critic against the source code and the tests
5 | ## This is highly customized, so take with a grain of salt
6 | ## Requires TEST_CRITIC to be set
7 |
8 | use 5.006;
9 | use strict;
10 | use warnings;
11 | use Test::More;
12 | use Data::Dumper;
13 | use IO::Handle;
14 | *STDOUT->autoflush(1);
15 | *STDERR->autoflush(1);
16 |
17 | my (@testfiles,%fileslurp,$t);
18 |
19 | if (!$ENV{TEST_CRITIC}) {
20 | plan skip_all => 'Set the environment variable TEST_CRITIC to enable this test';
21 | }
22 | elsif (!eval { require Perl::Critic; 1 }) {
23 | plan skip_all => 'Could not find Perl::Critic';
24 | }
25 | elsif ($Perl::Critic::VERSION < 0.23) {
26 | plan skip_all => 'Perl::Critic must be version 0.23 or higher';
27 | }
28 | else {
29 | $ENV{LANG} = 'C';
30 | opendir my $dir, 't' or die qq{Could not open directory 't': $!\n};
31 | @testfiles = map { "t/$_" } grep { /^.+\.(t|pl)$/ } readdir $dir;
32 | closedir $dir or die qq{Could not closedir "$dir": $!\n};
33 |
34 | my $testmore = 0;
35 | for my $file (@testfiles) {
36 | open my $fh, '<', $file or die qq{Could not open "$file": $!\n};
37 | my $line;
38 | while (defined($line = <$fh>)) {
39 | last if $line =~ /__DATA__/; ## perlcritic.t
40 | for my $func (qw/ok isnt pass fail cmp cmp_ok is_deeply unlike like/) { ## no skip
41 | next if $line !~ /\b$func\b/;
42 | next if $line =~ /$func \w/; ## e.g. 'skip these tests'
43 | next if $line =~ /[\$\%]$func/; ## e.g. $ok %ok
44 | $fileslurp{$file}{$.}{$func} = $line;
45 | $testmore++;
46 | }
47 | }
48 | close $fh or die qq{Could not close "$file": $!\n};
49 | }
50 | plan tests => 3 + @testfiles + $testmore;
51 | }
52 | ok (@testfiles, 'Found files in test directory');
53 |
54 | ## Make sure all Test::More function calls are standardized
55 | for my $file (sort keys %fileslurp) {
56 | for my $linenum (sort {$a <=> $b} keys %{$fileslurp{$file}}) {
57 | for my $func (sort keys %{$fileslurp{$file}{$linenum}}) {
58 | $t=qq{Test::More method "$func" is in standard format inside $file at line $linenum};
59 | ## Must be at start of line (optional whitespace and comment), a space, a paren, and something interesting
60 | like ($fileslurp{$file}{$linenum}{$func}, qr{^\s*#?$func \(['\S]}, $t);
61 | }
62 | }
63 | }
64 |
65 | ## Check some non-test files
66 | my $critic = Perl::Critic->new(-severity => 1);
67 |
68 | for my $filename (qw{pgsi.pl}) {
69 |
70 | if ($ENV{TEST_CRITIC_SKIPNONTEST}) {
71 | pass (qq{Skipping non-test file "$filename"});
72 | next;
73 | }
74 |
75 | -e $filename or die qq{Could not find "$filename"!};
76 | open my $oldstderr, '>&', \*STDERR or die 'Could not dupe STDERR';
77 | close STDERR or die qq{Could not close STDERR: $!};
78 | my @vio = $critic->critique($filename);
79 | open STDERR, '>&', $oldstderr or die 'Could not recreate STDERR'; ## no critic
80 | close $oldstderr or die qq{Could not close STDERR copy: $!};
81 | my $vios = 0;
82 | VIO: for my $v (@vio) {
83 | my $d = $v->description();
84 | (my $policy = $v->policy()) =~ s/Perl::Critic::Policy:://;
85 | my $source = $v->source();
86 |
87 | ## Allow our sql and qw blocks to have tabs:
88 | next if $policy =~ /ProhibitHardTabs/ and ($source =~ /sql = qq/i or $source =~ /qw[\(\/]/);
89 |
90 | $vios++;
91 | my $f = $v->filename();
92 | my $l = $v->location();
93 | my $line = $l->[0];
94 | diag "\nFile: $f (line $line)\n";
95 | diag "Vio: $d\n";
96 | diag "Policy: $policy\n";
97 | diag "Source: $source\n\n";
98 | }
99 | if ($vios) {
100 | fail (qq{Failed Perl::Critic tests for file "$filename": $vios});
101 | }
102 | else {
103 | pass (qq{File "$filename" passed all Perl::Critic tests});
104 | }
105 |
106 | }
107 |
108 | ## Specific exclusions for test scripts:
109 | my %ok =
110 | (yaml => {
111 | sub => 'meta_spec_ok',
112 | },
113 | pod => {
114 | sub => 'pod_file_ok pod_coverage_ok',
115 | },
116 | signature => {
117 | sub => 'verify SIGNATURE_OK',
118 | },
119 | );
120 | for my $f (keys %ok) {
121 | for my $ex (keys %{$ok{$f}}) {
122 | if ($ex eq 'sub') {
123 | for my $foo (split /\s+/ => $ok{$f}{sub}) {
124 | push @{$ok{$f}{OK}} => qr{Subroutine "$foo" (?:is neither|not exported)};
125 | }
126 | }
127 | else {
128 | die "Unknown exception '$ex'\n";
129 | }
130 | }
131 | }
132 |
133 | ## Allow Test::More subroutines
134 | my $tm = join '|' => (qw/skip plan pass fail is ok diag BAIL_OUT/);
135 | my $testmoreok = qr{Subroutine "$tm" is neither};
136 |
137 | ## Create a new critic for the tests
138 | $critic = Perl::Critic->new(-severity => 1);
139 |
140 | my $count = 1;
141 | for my $filename (sort @testfiles) {
142 | -e $filename or die qq{Could not find "$filename"!};
143 | my @vio = $critic->critique($filename);
144 | my $vios = 0;
145 | VIO: for my $v (@vio) {
146 | my $d = $v->description();
147 | (my $policy = $v->policy()) =~ s/Perl::Critic::Policy:://;
148 | my $source = $v->source();
149 | my $f = $v->filename();
150 |
151 | ## Skip common Test::More subroutines:
152 | next if $d =~ $testmoreok;
153 |
154 | ## Skip other specific items:
155 | for my $k (sort keys %ok) {
156 | next unless $f =~ /$k/;
157 | for (@{$ok{$k}{OK}}) {
158 | next VIO if $d =~ $_;
159 | }
160 | }
161 |
162 | ## Skip included file package warning
163 | next if $policy =~ /RequireExplicitPackage/ and $filename =~ /setup/;
164 |
165 | $vios++;
166 | my $l = $v->location();
167 | my $line = $l->[0];
168 | diag "\nFile: $f (line $line)\n";
169 | diag "Vio: $d\n";
170 | diag "Policy: $policy\n";
171 | diag "Source: $source\n\n";
172 | }
173 | if ($vios) {
174 | fail (qq{Failed Perl::Critic tests for file "$filename": $vios});
175 | }
176 | else {
177 | pass (qq{File "$filename" passed all Perl::Critic tests});
178 | }
179 | }
180 |
181 | pass ('Finished Perl::Critic testing');
182 |
183 |
--------------------------------------------------------------------------------
/pgsi.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | pgsi.pl - Produce system impact reports for a PostgreSQL database.
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | pgsi.pl - Produce system impact reports for a PostgreSQL database.
42 |
43 |
44 |
45 |
46 | This documentation refers to version 1.7.2
47 |
48 |
49 |
50 |
51 | pgsi.pl [options] < pglog_slice.log
52 |
53 | or...
54 | pgsi.pl --file pglog_slice.log [options]
55 |
56 | - Options
57 |
58 | -
59 |
60 | --file
61 | --query-types
62 | --top-10
63 | --all
64 | --pg-version
65 | --offenders
66 |
67 |
68 |
69 |
70 |
71 |
72 | System Impact (SI) is a measure of the overall load a given query imposes on a
73 | server. It is expressed as a percentage of a query's average duration over its
74 | average interval between successive calls. E.g., SI=80 indicates that a given
75 | query is active 80% of the time during the entire log interval. SI=200
76 | indicates the query is running twice at all times on average. Thus, the lower
77 | the SI, the better.
78 | The goal of SI is to identify those queries most likely to cause performance
79 | degradation on the database during heaviest traffic periods. Focusing
80 | exclusively on the least efficient queries can hide relatively fast-running
81 | queries that saturate the system more because they are called far more
82 | frequently. By contrast, focusing only on the most-frequently called queries
83 | will tend to emphasize small, highly optimized queries at the expense of
84 | slightly less popular queries that spend much more of their time between
85 | successive calls in an active state. These are often smaller queries that have
86 | failed to be optimized and punish a system severely under heavy load.
87 | One thing SI does not do is distinguish between high-value queries represented
88 | by extended active states or long durations due to blocking locks. Either
89 | condition is worthy of attention, but determining which is to blame will
90 | require independent investigation.
91 | Queries are canonized with placeholders representing literals or arguments.
92 | Further, IN lists are canonized so that variation from query to query only
93 | in the number of elements in the IN list will not be treated as distinct
94 | queries.
95 | Some examples of the "same" query:
96 |
97 | -
98 |
99 | SELECT col FROM table WHERE code = 'One';
100 | SELECT col FROM table WHERE code = 'Sixty-Three';
101 |
102 | -
103 |
104 | SELECT foo FROM bar WHERE fuzz = $1 AND color IN ('R','G','B');
105 | Select FOO
106 | from bar
107 | WhErE fuzz = '56'
108 | AND color IN ('R', $1);
109 |
110 |
111 | Differences in capitalization and whitespace are irrelevant.
112 |
113 |
114 |
115 | Pass in log data on stdin:
116 |
117 | pgsi.pl < some_log_slice.log
118 | cat some_log_slice.log | pgsi.pl
119 | Or use the --file option:
120 |
121 | pgsi.pl --file=some_log_slice.log
122 | Or read in more than one file at a time:
123 |
124 | pgsi.pl --file=logfile1.log --file=logfile2.log
125 | If more than one file is given, they must be given in chronological order.
126 | Log data must comply with a specific format and must be from contiguous
127 | activity. The code makes the assumption that the overall interval of activity
128 | is the time elapsed between the first and last log entries. If there are
129 | several blocks of logs to analyze, they must be run separately.
130 | Required format is the following in syslog:
131 | YYYY-MM-DDTHH24:MI:SS(-TZ:00)? server postgres[pid]:
132 | This also requires that log_statement is set to 'all' and
133 | that log_duration be set to 'on' in postgresql.conf.
134 | If you are not using syslog, you can simulate the format with the following:
135 | log_line_prefix = '%t %h postgres[%p]: [%l-1] ' ## Simulate syslog for pgsi.
136 |
137 |
138 |
139 |
140 | - --query-types
141 |
142 | -
143 |
Query impact is segregated by types. I.e., all the SELECTs together, all
144 | UPDATEs together, etc. Typically it is assumed that SELECT is the most
145 | interesting (and is by itself the default), but any query type may be analyzed.
146 | Multiples are provided as space- or comma-separated lists.
147 |
148 | pgsi.pl --query-types="select, update, copy, create"
149 | The code will produce a unique report for each type when used with the --all
150 | and/or --top-10 file-pattern options (see below).
151 |
152 | - --top-10, --all
153 |
154 | -
155 |
Supplies a file pattern and optional directory path into which the reports
156 | should be written per --query-type. The pattern is prefixed with the
157 | --query-type and host for this report and placed into the requested directory
158 | (or cwd if no path is present).
159 | --all will list every canonized query encountered, which is likely to
160 | contain a large number of queries of no interest (those with negligible
161 | impact).
162 | --top-10 limits the report to only the 10 entries with the greatest SI.
163 |
164 | pgsi.pl \
165 | --query-types=select,update \
166 | --all=si_reports/monday_10am_1pm.all.txt \
167 | --top-10=si_reports/monday_10am_1pm.t10.txt
168 | This will produce the following reports in si_reports/ for a database running
169 | on server db1:
170 |
171 | SELECT-db1-monday_10am_1pm.all.txt
172 | UPDATE-db1-monday_10am_1pm.all.txt
173 | SELECT-db1-monday_10am_1pm.t10.txt
174 | UPDATE-db1-monday_10am_1pm.t10.txt
175 | If --top-10 is not supplied, then no top 10 report is generated. If --all is
176 | not supplied, then the report(s) print to stdout.
177 |
178 | - --pg-version
179 |
180 | -
181 |
Currently, this might better be described as either "before DETAIL" or "after
182 | DETAIL". The code was written against PG 8.1 originally, but when 8.2 came out
183 | the addition of DETAIL log entries forced a different parser. That unfortunate
184 | timing led to the assumption that log construction would change with each
185 | release. Going forward, --pg-version will be (other than 8.1) the first version
186 | in which this log format was encountered.
187 | --pg-version is only either 8.1 or 8.2 (8.2 is default). It's unknown how far
188 | back in versions the 8.1 format holds, but 8.2 holds for itself and 8.3. So,
189 | unless you're working against logs generated by a PG version less than 8.2, you
190 | do not need to include this option (but it might save you some trouble if a new
191 | format comes at a later version and the default bumps up to the most recent
192 | while you stay on your older version).
193 |
194 | pgsi.pl --pg-version=8.1
195 |
196 | - --offenders
197 |
198 | -
199 |
Number of best and worst queries to included with the report, in terms of
200 | overall duration of execution. Enough log information is listed along with the
201 | duration such that tracking down the original query (not the canonized
202 | version) is straightforward. The offenders list can be very useful for a query
203 | that is causing trouble in a handful of permutations, but most of the time is
204 | behaving well.
205 | The list in conjunction with standard deviation gives an overall indication of
206 | performance volatility.
207 | --offenders=5 produces additional output in the report that looks something
208 | like the following example:
209 |
210 | Best
211 | 1. 2009-01-12T10:11:49-07:00 db1 postgres[4692] -- 4.833 ms
212 | 2. 2009-01-12T10:31:19-07:00 db1 postgres[1937] -- 4.849 ms
213 | 3. 2009-01-12T09:16:20-07:00 db1 postgres[20294] -- 4.864 ms
214 | 4. 2009-01-12T10:16:54-07:00 db1 postgres[20955] -- 4.867 ms
215 | 5. 2009-01-12T10:32:16-07:00 db1 postgres[5010] -- 4.871 ms
216 |
217 | Worst
218 | 1. 2009-01-12T10:00:07-07:00 db1 postgres[2804] -- 2175.650 ms
219 | 2. 2009-01-12T09:30:07-07:00 db1 postgres[2804] -- 2090.914 ms
220 | 3. 2009-01-12T10:00:18-07:00 db1 postgres[2804] -- 2046.608 ms
221 | 4. 2009-01-12T09:30:10-07:00 db1 postgres[2804] -- 1954.604 ms
222 | 5. 2009-01-12T11:20:11-07:00 db1 postgres[2804] -- 1788.576 ms
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 | -
231 |
If queries contain exceptionally long IN lists, the regex that attempts to
232 | flatten them can run into a perl recursion limit. In that event, the query will
233 | keep the placeholders of the IN list, making it unique compared to the same
234 | query with a different cardinality of list params in the same IN. This
235 | deficiency should only surface on IN lists with composite parameters [e.g., IN
236 | ((?,?,...,?),(?,?,...,?),...,(?,?,...,?))]. For scalar IN lists, there should
237 | be no such limit.
238 |
239 |
240 |
241 |
242 |
243 |
244 | Original code:
245 | Mark Johnson (mark@endpoint.com), End Point Corp.
246 | Contributions:
247 | Ethan Rowe (ethan@endpoint.com), End Point Corp.
248 | Greg Sabino Mullane (greg@endpoint.com), End Point Corp.
249 | Daniel Browning (db@endpoint.com), End Point Corp.
250 | Joshua Tolley <josh@endpoint.com>, End Point Corp.
251 | Abraham Ingersoll <abe@abe.us>
252 |
253 |
254 |
255 |
256 | Copyright 2008-2011 Mark Johnson (mark@endpoint.com)
257 | This module is free software; you can redistribute it and/or modify it
258 | under the same terms as Perl itself. See the LICENSE file.
259 |
260 |
261 |
262 |
263 |
--------------------------------------------------------------------------------
/pgsi.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
3 |
4 | ## Parse Postgres logs and determine the system impact
5 | ##
6 | ## Usage: pgsi.pl [options] < pglog_slice.log
7 | ##
8 | ## See the POD inside this file for full documentation:
9 | ## perldoc pgsi.pl
10 | ##
11 | ## Mark Johnson
12 |
13 | package PGSI;
14 |
15 | use strict;
16 | use warnings;
17 | use Data::Dumper qw( Dumper );
18 |
19 | use Time::Local qw();
20 | use Getopt::Long;
21 | use IO::Handle;
22 | use 5.008003;
23 |
24 | our $VERSION = '1.7.2';
25 |
26 | *STDOUT->autoflush(1);
27 | *STDERR->autoflush(1);
28 |
29 | my $resolve_called = 0;
30 |
31 | my (
32 | %query,
33 | %canonical_q,
34 | $first_line,
35 | $last_line,
36 | %seen,
37 | );
38 |
39 | my %opt = (
40 | 'top-10' => '',
41 | 'all' => '',
42 | 'query-types' => '',
43 | 'pg-version' => '',
44 | 'offenders' => 0,
45 | 'verbose' => 0,
46 | 'format' => 'html',
47 | 'mode' => 'pid',
48 | 'color' => 1,
49 | 'quiet' => 0,
50 | 'interval' => undef,
51 | );
52 |
53 | my $USAGE = qq{Usage: $0 -f filename [options]\n};
54 |
55 | GetOptions ( ## no critic
56 | \%opt,
57 | (
58 | 'top-10=s',
59 | 'all=s',
60 | 'query-types=s',
61 | 'pg-version=s',
62 | 'offenders=i',
63 | 'version',
64 | 'help',
65 | 'verbose+',
66 | 'file|f=s@',
67 | 'format=s',
68 | 'mode=s',
69 | 'color!',
70 | 'quiet',
71 | 'interval=s',
72 | )
73 | ) or die $USAGE;
74 |
75 | if ($opt{version}) {
76 | print "$0 version $VERSION\n";
77 | exit 0;
78 | }
79 |
80 | if ($opt{help}) {
81 | print $USAGE;
82 | print "Full documentation at: http://bucardo.org/wiki/pgsi\n";
83 | exit 0;
84 | }
85 |
86 | ## Prepare formatting vars based on opt{format}
87 | ## The default is 'html':
88 | my $fmstartbold = q{};
89 | my $fmendbold = q{};
90 | my $fmstartheader1 = q{};
91 | my $fmendheader1 = q{
};
92 | my $fmstartheader2 = q{};
93 | my $fmendheader2 = q{
};
94 | my $fmstartheader3 = q{};
95 | my $fmendheader3 = q{
};
96 | my $fmsep = q{
};
97 | my $fmstartquery = '';
98 | my $fmendquery = '
';
99 | if ($opt{format} eq 'mediawiki') {
100 | $fmstartbold = q{'''};
101 | $fmendbold = q{'''};
102 | $fmstartheader1 = q{==};
103 | $fmendheader1 = q{==};
104 | $fmstartheader2 = q{====};
105 | $fmendheader2 = q{====};
106 | $fmstartheader3 = q{=====};
107 | $fmendheader3 = q{=====};
108 | $fmsep = q{----};
109 | $fmstartquery = ' ';
110 | $fmendquery = '';
111 | }
112 |
113 | if ($opt{format} eq 'tsv') {
114 | $fmstartbold = q{};
115 | $fmendbold = q{};
116 | $fmstartheader1 = q{};
117 | $fmendheader1 = q{};
118 | $fmstartheader2 = q{};
119 | $fmendheader2 = q{};
120 | $fmstartheader3 = q{};
121 | $fmendheader3 = q{};
122 | $fmsep = q{};
123 | $fmstartquery = '';
124 | $fmendquery = '';
125 | }
126 |
127 | my $minwrap1 = 80;
128 | my $minwrap2 = 40; ## WHEN .. THEN
129 |
130 | ## Any keywords after the first
131 | my $indent1 = ' ' x 1;
132 |
133 | ## Wrapping of long lines of a single type
134 | my $indent2 = ' ' x 3;
135 |
136 | ## Secondary wrapping of a single type
137 | my $indent3 = ' ' x 5;
138 |
139 | ## Even more indenting
140 | my $indent4 = ' ' x 7;
141 |
142 | ## Special strings for internal comments
143 | my $STARTCOMMENT = "startpgsicomment";
144 | my $ENDCOMMENT = "endpgsicomment";
145 |
146 | ## We either read from a file or from stdin
147 | my (@fh, $fh);
148 | if ($opt{file}) {
149 | my $x = 0;
150 | my %dupe;
151 | for my $file (@{$opt{file}}) {
152 | if ($dupe{$file}++) {
153 | die "File specified more than once: $file\n";
154 | }
155 | open $fh[$x], '<', $file or die qq{Could not open "$file": $!\n};
156 | $x++;
157 | }
158 | }
159 | else {
160 | push @fh => \*STDIN;
161 | }
162 |
163 | for (@fh) {
164 | $fh = $_;
165 | ## Do the actual parsing. Depends on what kind of log file we have
166 | if ('pid' eq $opt{mode}) {
167 | parse_pid_log();
168 | }
169 | elsif ('syslog' eq $opt{mode}) {
170 | parse_syslog_log();
171 | }
172 | elsif ('csv' eq $opt{mode}) {
173 | parse_csv_log();
174 | }
175 | elsif ('bare' eq $opt{mode}) {
176 | parse_bare_log();
177 | }
178 | else {
179 | die qq{Unknown mode: $opt{mode}\n};
180 | }
181 | }
182 |
183 |
184 | sub parse_csv_log {
185 |
186 | ## Each line of interest, with PIDs as the keys
187 | my %logline;
188 |
189 | ## The last PID we saw. Used to populate multi-line statements correctly.
190 | my $lastpid = 0;
191 |
192 | require Text::CSV_XS;
193 | my $csv = Text::CSV_XS->new({ binary => 1 }) or die;
194 | $csv->column_names(qw(log_time user_name database_name process_id connection_from session_id session_line_num command_tag session_start_time virtual_transaction_id transaction_id error_severity sql_state_code message detail hint internal_query internal_query_pos context query query_pos location application_name));
195 | while (my $line = $csv->getline_hr($fh)) {
196 |
197 | if ($opt{verbose} >= 2) {
198 | warn "Checking line (" . Dumper($line) . ")\n";
199 | }
200 |
201 | my $date = $line->{log_time};
202 | my $pid = $line->{process_id};
203 | my $more = $line->{message};
204 |
205 | ## All we care about is statements and durations
206 |
207 | ## Got a duration? Store it for this PID and move on
208 | if ($more =~ /duration: (\d+\.\d+) ms$/o) {
209 | my $duration = $1;
210 | ## Store this duration, overwriting what is (presumably) -1
211 | $logline{$pid}{duration} = $duration;
212 | next;
213 | }
214 |
215 | ## Got a statement with optional duration
216 | ## Handle the old statement and store the new
217 | if ($more =~ /(?:duration: (\d+\.\d+) ms )?statement:\s+(.+)/o) {
218 |
219 | my ($duration,$statement) = ($1,$2);
220 |
221 | ## Make sure any subsequent multi-line statements know where to go
222 | $lastpid = $pid;
223 |
224 | ## If this PID has something stored, process it first
225 | if (exists $logline{$pid}) {
226 | resolve_pid_statement($logline{$pid});
227 | }
228 |
229 | ## Store and blow away any old value
230 | $logline{$pid} = {
231 | line => $line,
232 | statement => $statement,
233 | duration => -1,
234 | date => $date,
235 | };
236 |
237 | if (defined $duration) {
238 | $logline{$pid}{duration} = $duration;
239 | }
240 |
241 | ## Make sure we have a first and a last line
242 | if (not defined $first_line) {
243 | $first_line = $last_line = $line;
244 | }
245 |
246 | } ## end duration + statement
247 |
248 | } ## end each line
249 |
250 | defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
251 |
252 | ## Process any PIDS that are left
253 | for my $pid (keys %logline) {
254 | resolve_pid_statement($logline{$pid});
255 | }
256 |
257 | ## Store the last PID seen as the last line
258 | $last_line = $logline{$lastpid}{line};
259 |
260 | return;
261 |
262 | } ## end of parse_csv_log
263 |
264 |
265 | sub parse_pid_log {
266 |
267 | ## Parse a log file in which the pid appears in the log_line_prefix
268 | ## and multi-line statements start with tabs
269 |
270 | ## Each line of interest, with PIDs as the keys
271 | my %logline;
272 |
273 | ## The last PID we saw. Used to populate multi-line statements correctly.
274 | my $lastpid = 0;
275 |
276 | ## We only store multi-line if the previous real line was a log: statement
277 | my $lastwaslog=0;
278 |
279 | while (my $line = <$fh>) {
280 |
281 | if ($opt{verbose} >= 2) {
282 | chomp $line;
283 | warn "Checking line ($line)\n";
284 | }
285 |
286 | ## There are only two possiblities we care about:
287 | ## 1. tab-prefixed line (continuation of a literal)
288 | ## 2. new date-and-pid-prefixed line
289 |
290 | if ($line =~ /^\t(.*)/) {
291 | ## If the last real line was a statement, append this to last PID seen
292 | if ($lastwaslog) {
293 | (my $extra = $1) =~ s/^\s+//;
294 | ## If a comment, treat carefully
295 | $extra =~ s/^(\s*\-\-.+)/$STARTCOMMENT $1 $ENDCOMMENT /;
296 | $logline{$lastpid}{statement} .= " $extra";
297 | }
298 | next;
299 | }
300 |
301 | ## Got a valid PID line?
302 | if ($line =~ /^(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d)\D+(\d+)\s*(.+)/o) {
303 |
304 | my ($date,$pid,$more) = ($1,$2,$3);
305 |
306 | ## Example:
307 | ## 2009-12-03 08:12:05 PST 11717 4b17e355.2dc5 127.0.0.1 dbuser dbname LOG: statement: SELECT ...
308 |
309 | ## Reset the last log indicator
310 | $lastwaslog = 0;
311 |
312 | ## All we care about is statements and durations
313 | next if ($more =~ /LOG: (?:duration: (\d+\.\d+) ms )?(?:bind|parse) [^:]+:.*/o);
314 |
315 | ## Got a duration? Store it for this PID and move on
316 | if ($more =~ /LOG: duration: (\d+\.\d+) ms$/o) {
317 | my $duration = $1;
318 | ## Store this duration, overwriting what is (presumably) -1
319 | $logline{$pid}{duration} = $duration;
320 | next;
321 | }
322 |
323 | ## Got a statement with optional duration
324 | ## Handle the old statement and store the new
325 | if ($more =~ /
326 | LOG:\s\s
327 | (?:duration:\s(\d+\.\d+)\sms\s\s)?
328 | ((?:(?:statement)|(?:execute\s[^:]+)))
329 | :(.*)$
330 | /x) {
331 |
332 | my ($duration,$isexecute,$statement) = ($1,$2,$3);
333 |
334 | ## Slurp in any multi-line continuatiouns after this
335 | $lastwaslog = 1;
336 |
337 | ## Make sure any subsequent multi-line statements know where to go
338 | $lastpid = $pid;
339 |
340 | ## If this PID has something stored, process it first
341 | if (exists $logline{$pid}) {
342 | resolve_pid_statement($logline{$pid});
343 | }
344 |
345 | ## Store and blow away any old value
346 | $logline{$pid} = {
347 | line => $line,
348 | statement => $statement,
349 | duration => -1,
350 | date => $date,
351 | };
352 |
353 | if (defined $duration) {
354 | $logline{$pid}{duration} = $duration;
355 | }
356 |
357 | ## Make sure we have a first and a last line
358 | if (not defined $first_line) {
359 | $first_line = $last_line = $line;
360 | }
361 |
362 | } ## end LOG: statement
363 |
364 | next;
365 |
366 | } ## end if valid PID line
367 |
368 | if ($opt{verbose}) {
369 | chomp $line;
370 | warn "Invalid line $.: $line\n";
371 | }
372 |
373 | } ## End while
374 |
375 | defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
376 |
377 | ## Process any PIDS that are left
378 | for my $pid (keys %logline) {
379 | resolve_pid_statement($logline{$pid});
380 | }
381 |
382 | ## Store the last PID seen as the last line
383 | $last_line = $logline{$lastpid}{line};
384 |
385 | return;
386 |
387 | } ## end of parse_pid_log
388 |
389 | sub parse_bare_log {
390 |
391 | ## Parse a log file in which multi-line statements start with tabs
392 |
393 | ## Each line of interest
394 | my @logline;
395 |
396 | my $more;
397 |
398 | ## We only store multi-line if the previous real line was a log: statement
399 | my $lastwaslog=0;
400 |
401 | while (my $line = <$fh>) {
402 |
403 | if ($opt{verbose} >= 2) {
404 | chomp $line;
405 | warn "Checking line ($line)\n";
406 | }
407 |
408 | ## There are only two possiblities we care about:
409 | ## 1. tab-prefixed line (continuation of a literal)
410 | ## 2. new line
411 |
412 | if ($line =~ /^\t(.*)/) {
413 | ## If the last real line was a statement, append this to last PID seen
414 | if ($lastwaslog) {
415 | (my $extra = $1) =~ s/^\s+//;
416 | ## If a comment, treat carefully
417 | $extra =~ s/^(\s*\-\-.+)/$STARTCOMMENT $1 $ENDCOMMENT /;
418 | $logline[$#logline]->{statement} .= " $extra";
419 | }
420 | next;
421 | }
422 |
423 | ## Lines will contain "parse <.*>", "bind <.*>", "execute <.*>" or "statement", with a duration
424 | if ($line =~ /^LOG: /) {
425 | $more = $line;
426 |
427 | ## Example:
428 | ## 2009-12-03 08:12:05 PST 11717 4b17e355.2dc5 127.0.0.1 dbuser dbname LOG: statement: SELECT ...
429 |
430 | ## Reset the last log indicator
431 | $lastwaslog = 0;
432 |
433 | ## All we care about is statements/executes and durations
434 | next if ($more =~ /LOG: (?:duration: (\d+\.\d+) ms )?(?:bind|parse) [^:]+:.*/o);
435 |
436 | ## Got a duration? Store it for this PID and move on
437 | if ($more =~ /LOG: duration: (\d+\.\d+) ms$/o) {
438 | my $duration = $1;
439 | ## Store this duration, overwriting what is (presumably) -1
440 | $logline[$#logline]->{duration} = $duration;
441 | next;
442 | }
443 |
444 | ## Got a statement with optional duration
445 | ## Handle the old statement and store the new
446 | if ($more =~ /
447 | LOG:\s\s
448 | (?:duration:\s(\d+\.\d+)\sms\s\s)?
449 | ((?:(?:statement)|(?:execute\s[^:]+)))
450 | :(.*)$
451 | /x) {
452 |
453 | my ($duration,$statement, $other) = ($1, $3, $2);
454 |
455 | ## Slurp in any multi-line continuatiouns after this
456 | $lastwaslog = 1;
457 |
458 | ## resolve whatever's already in this line
459 | resolve_pid_statement($logline[$#logline]) if $#logline > -1;
460 | shift @logline;
461 |
462 | ## Store and blow away any old value
463 | push @logline, {
464 | statement => $statement,
465 | duration => $duration,
466 | };
467 |
468 | if (defined $duration) {
469 | $logline[$#logline]->{duration} = $duration;
470 | }
471 |
472 | ## Make sure we have a first and a last line
473 | if (not defined $first_line) {
474 | $first_line = $last_line = $line;
475 | }
476 |
477 | } ## end LOG: statement
478 |
479 | next;
480 |
481 | } ## end if valid def line
482 |
483 | if ($opt{verbose}) {
484 | chomp $line;
485 | warn "Invalid line $.: $line\n";
486 | }
487 |
488 | } ## End while
489 |
490 | defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
491 |
492 | ## Process any PIDS that are left
493 | for my $line (@logline) {
494 | resolve_pid_statement($line);
495 | }
496 |
497 | ## Store the last PID seen as the last line
498 | $last_line = $logline[$#logline]->{statement};
499 |
500 | return;
501 |
502 | } ## end of parse_def_log
503 |
504 |
505 | ## Globals used by syslog: move or refactor
506 | my ($extract_query_re, $extract_duration_re);
507 |
508 | sub parse_syslog_log {
509 |
510 | # Todo: Move this out
511 |
512 | # Build an or-list for regex extraction of
513 | # the query types of interest.
514 | my $query_types = $opt{'query-types'}
515 | ? join (
516 | '|',
517 | grep { /\w/ && !$seen{$_}++ }
518 | split (
519 | /[,\s]+/,
520 | "\L$opt{'query-types'}"
521 | )
522 | )
523 | : 'select'
524 | ;
525 |
526 | # Partition the top-10 or all file-patterns
527 | # into their path and file-name components
528 | for my $k ( qw/top-10 all/ ) {
529 | local ($_) = delete $opt{$k};
530 | my @v =
531 | map { defined ($_) ? $_ : '' }
532 | m{(.*/)?(.*)};
533 | my @k =
534 | map {"$k-$_"} qw/path file/;
535 | @opt{@k} = @v;
536 | }
537 |
538 | # Base regex to capture the main pieces of
539 | # each log line entry
540 | my $statement_re =
541 | qr{
542 | ^(.+?
543 | postgres\[
544 | (\d+)
545 | \]
546 | ):
547 | \s+
548 | \[
549 | \d+
550 | -
551 | (\d+)
552 | \]
553 | \s
554 | (.*)
555 | }xms;
556 |
557 | # Create the appropriate regex to pull out the actual query depending on the
558 | # format implied by --pg-version. Blesses regex into the appropriate namespace
559 | # to simplify access to routines that deviate between log formats.
560 | $extract_query_re =
561 | make_extractor(
562 | $opt{'pg-version'},
563 | $query_types
564 | );
565 |
566 | # Regex specifically for the concluding duration
567 | # entry after a query finishes.
568 | $extract_duration_re =
569 | qr{
570 | log:
571 | \s+
572 | duration:
573 | \s
574 | (
575 | \d+
576 | [.]
577 | \d{3}
578 | )
579 | }ixms;
580 |
581 | while (my $line = <$fh>) {
582 |
583 | if ($opt{verbose} >= 2) {
584 | chomp $line;
585 | warn "Checking line ($line)\n";
586 | }
587 |
588 | # Lines that don't match the basic format are ignored.
589 | if ($line !~ $statement_re) {
590 | chomp $line;
591 | $opt{verbose} and warn "Line $. did not match: $line\n";
592 | next;
593 | }
594 |
595 | my ($st_id, $pid, $st_seq, $st_frag) = ($1, $2, $3, $4);
596 |
597 | $first_line = $last_line = $line
598 | unless defined $first_line;
599 |
600 | # Allows for blocks of log to be unordered. Assumes earliest found timestamp
601 | # is start time, and latest end time, regardless of the order in which
602 | # they're encountered.
603 | if (defined $line) {
604 | $first_line = $line
605 | if get_timelocal_from_line($line) < get_timelocal_from_line($first_line);
606 | $last_line = $line
607 | if get_timelocal_from_line($line) > get_timelocal_from_line($first_line);
608 | }
609 |
610 | my $arr;
611 |
612 | # Starting a new statement. Close off possible previous one and begin new one.
613 | resolve_syslog_stmt($pid)
614 | if $st_seq == 1;
615 |
616 | # Skip any entries that start the log
617 | # after their first entry.
618 | next unless $arr = $query{$pid}{fragments};
619 |
620 | # "statement_id" is the earliest timestamp/pid
621 | # entry found for the statement in question.
622 | # It should suffice for a human to identify
623 | # the query within the logfiles.
624 | $query{$pid}{statement_id} = $st_id
625 | if $st_seq == 1;
626 |
627 | push (
628 | @$arr,
629 | $st_frag
630 | );
631 |
632 | } # end while
633 |
634 | defined $first_line or die qq{Could not find any matching lines: incorrect format?\n};
635 |
636 | # Go through all entries that haven't been resolved
637 | # (indicated by the presence of elements in the
638 | # fragments array) and add them to the canonical list.
639 | while (my ($pid, $hsh) = each %query) {
640 | next unless exists $hsh->{fragments} and @{ $hsh->{fragments} };
641 | resolve_syslog_stmt($pid);
642 | }
643 |
644 | return;
645 |
646 | } ## end of parse_syslog_log
647 |
648 | # Determine the server and start/end times
649 | # from the oldest and newest log lines.
650 | my ($host, $start_time, $end_time) =
651 | log_meta($first_line, $last_line);
652 |
653 | # Calculate the ms interval of all log activity.
654 | my $log_int_ms = log_interval($start_time, $end_time);
655 |
656 | for my $hsh (values %canonical_q) {
657 |
658 | # Mean runtime.
659 | my $mean = $hsh->{duration} /= $hsh->{count};
660 |
661 | # Average (mean) time between successive calls.
662 | $hsh->{interval} = $opt{interval} || $log_int_ms / $hsh->{count};
663 |
664 | # SI, expressed as a percent. 100 implies the query
665 | # was, on average, constantly running on a single
666 | # instance. If interval is invalid (after all,
667 | # syslog precision is only 1 second), set to -1
668 | $hsh->{sys_impact} =
669 | $hsh->{interval} != 0
670 | ? 100 * $hsh->{duration} / $hsh->{interval}
671 | : -1
672 | ;
673 |
674 | ## No sense in showing negative numbers unless exactly -1
675 | if ($hsh->{sys_impact} < 0 and $hsh->{sys_impact} != -1) {
676 | $hsh->{sys_impact} = 0;
677 | }
678 |
679 | # Determine standard deviation and median. If count <= 1,
680 | # set to -1 to indicate not applicable.
681 | if ($hsh->{count} > 1) {
682 | my $sum = 0;
683 | for my $duration ( @{$hsh->{durations}} ) {
684 | $sum += ($duration - $mean)**2;
685 | }
686 | $hsh->{deviation} = sqrt($sum / ($hsh->{count} - 1));
687 | my @sorted = sort { $a <=> $b } @{$hsh->{durations}};
688 | if (($#sorted + 1) % 2 != 0) {
689 | $hsh->{median} = $sorted[int($#sorted / 2)];
690 | }
691 | else {
692 | $hsh->{median} = ($sorted[int($#sorted / 2)] + $sorted[int($#sorted / 2) + 1]) / 2;
693 | }
694 | }
695 | else {
696 | $hsh->{deviation} = -1;
697 | $hsh->{median} = -1;
698 | }
699 | }
700 |
701 | ## Format each query and return a hash based on query_type
702 | my $out = process_all_queries();
703 |
704 | ## If using HTML format, print a simple head and table of contents
705 | if ($opt{format} eq 'html') {
706 |
707 | print qq{
710 |
711 |
712 | Postgres System Impact Report
713 |
714 | \n};
715 |
716 | if ($opt{color}) {
717 | print qq!
724 | !;
725 | }
726 | print qq{\n\n};
727 |
728 | if ($opt{file}) {
729 | if (! defined $opt{file}[1]) {
730 | print qq{Log file: $opt{file}[0]
\n};
731 | }
732 | else {
733 | print qq{Log files:
\n};
734 | for my $file (@{$opt{file}}) {
735 | print qq{- $file
\n};
736 | }
737 | print qq{
\n};
738 | }
739 | }
740 |
741 | print "\n";
742 |
743 | for my $qtype (
744 | map { $_->[0] }
745 | sort { $a->[1] <=> $b->[1] or $b->[2] <=> $a->[2] }
746 | map { [$_, ($_ eq 'COPY' ? 1 : 0), scalar @{$out->{$_}} ] }
747 | keys %$out) {
748 |
749 | my $count = @{$out->{$qtype}};
750 |
751 | my $safename = "${host}_$qtype";
752 | print qq{- $qtype ($count)
\n};
753 | }
754 | print "
\n";
755 | }
756 | elsif ($opt{format} eq 'tsv') {
757 | # print out the field headers
758 | print join "\t" => qw( qtype query count duration interval deviation sysimpact minimum_threshold
759 | maximum_threshold durations );
760 | print "\n";
761 |
762 | no warnings 'uninitialized';
763 |
764 | while (my ($q,$v) = each %canonical_q) {
765 | print join "\t" => $v->{qtype}, $q,
766 | @{$v}{qw/count duration interval deviation sysimpact minimum_threshold maximum_threshold/},
767 | (join q{,} => @{$v->{durations}}),
768 | ;
769 | print "\n";
770 | }
771 | exit;
772 | }
773 |
774 | for my $qtype (
775 | map { $_->[0] }
776 | sort { $a->[1] <=> $b->[1] or $b->[2] <=> $a->[2] }
777 | map { [$_, ($_ eq 'COPY' ? 1 : 0), scalar @{$out->{$_}} ] }
778 | keys %$out) {
779 |
780 | my $arr = $out->{$qtype};
781 |
782 | my $type_top_ten = $opt{'top-10-file'}
783 | ? "$opt{'top-10-path'}$host-$qtype-$opt{'top-10-file'}"
784 | : '/dev/null'
785 | ;
786 |
787 | my ($all_fh, $type_all);
788 |
789 | if ($opt{'all-file'}) {
790 | $type_all = "$opt{'all-path'}$host-$qtype-$opt{'all-file'}";
791 | open ($all_fh, '>', $type_all)
792 | or die "Can't open '$type_all' to write: $!";
793 | }
794 | else {
795 | $all_fh = \*STDOUT;
796 | }
797 |
798 | open (my $top_ten_fh, '>', $type_top_ten)
799 | or die "Can't open '$type_top_ten' to write: $!";
800 |
801 | # Start off reports with appropriate Wiki naming
802 | # convention. Can automate the posting of reports
803 | # with code that strips first line and uses it
804 | # for Wiki page.
805 |
806 | my $a1 = '';
807 | my $a2 = '';
808 | if ($opt{format} eq 'html') {
809 | my $safename = "${host}_$qtype";
810 | $a1 = qq{};
811 | $a2 = qq{};
812 | my $phost = length $host ? qq{ : $host :} : ':';
813 | print $all_fh qq{${fmstartheader2}${a1}Query System Impact $phost $qtype${a2}${fmendheader2}\n};
814 | }
815 | else {
816 | print $all_fh qq{Query_System_Impact:$host:$qtype\n};
817 | }
818 |
819 | # Top 10 lists are put into templates, assuming
820 | # they will be pulled in to a collection with
821 | # other top 10s, typically for the same host.
822 | print $top_ten_fh <<"EOP";
823 | Template:${host}_SI_Top_10:$qtype
824 | EOP
825 |
826 | for my $fh ($all_fh, $top_ten_fh) {
827 | print $fh "${fmstartheader3}Log activity from $start_time to $end_time${fmendheader3}\n";
828 | }
829 |
830 | print $all_fh join ("$fmsep\n", @$arr);
831 | print $top_ten_fh join ("$fmsep", grep { defined $_ } @$arr[0..9]);
832 |
833 | close ($top_ten_fh) or warn "Error closing '$type_top_ten': $!";
834 | if ($type_all) {
835 | close ($all_fh) or warn "Error closing '$type_all': $!";
836 | }
837 | }
838 |
839 |
840 | if ($opt{format} eq 'html') {
841 | print "\n";
842 | }
843 |
844 | if (! $opt{quiet}) {
845 | warn "Items processed: $resolve_called\n";
846 | }
847 |
848 | exit;
849 |
850 | sub resolve_syslog_stmt {
851 |
852 | my $pid = shift;
853 |
854 | $query{$pid} ||= {};
855 | my $prev = $query{$pid}{fragments};
856 |
857 |
858 | # First time to see this pid. Initialize
859 | # fragments array, and no previous
860 | # statement to resolve.
861 | unless (ref ($prev) eq 'ARRAY') {
862 | $query{$pid}{fragments} = [];
863 | return;
864 | }
865 |
866 | # Now have collected a full query and need to
867 | # canonize and store.
868 | my $full_statement = lc (join (' ', @$prev));
869 |
870 | # Handle SQL comments, carefully
871 | $full_statement =~ s{/[*].*?[*]/}{}msg;
872 |
873 | # Tidy up spaces
874 | $full_statement =~ s/#011/ /g;
875 | $full_statement =~ s/^\s+|\s+$//g;
876 | $full_statement =~ s/\s+/ /g;
877 |
878 | # Special transform for crowded queries
879 | $full_statement =~ s/=\s*(\d+)(and|or) /= $1 $2 /gio;
880 |
881 | # If closing a query, store until we get
882 | # subsequent duration statement
883 | if (my @match_args = $full_statement =~ $extract_query_re) {
884 |
885 | my ($main_query, $query_type) =
886 | $extract_query_re->query_info(@match_args);
887 |
888 | # Clean out arguments
889 | # Quoted string
890 | $main_query =~ s/'(?:''|\\+'?|[^'\\]+)*'/?/g;
891 |
892 | # Numeric no quote, and bind params
893 | $main_query =~ s/(?<=[^\w.])[\$-]?(?:\d*[.])?\d+\b/?/g;
894 |
895 | # Collapsing IN () lists, so queries deviating
896 | # only by 'IN (?,?)' and 'IN (?,?,?)' are logged
897 | # as "the same"
898 | $main_query =~
899 | s{
900 | # Starts IN ...
901 | \s in \s?
902 | # Outermost paren for IN list
903 | [(]
904 | (?:
905 | # Could be list of rows
906 | [(] [?,\s\$]* [)]
907 | |
908 | # or the standard stuff of a scalar list
909 | [?,\s\$]+
910 | )+
911 | # Until we close the full IN list
912 | [)]
913 | }
914 | { in (?+)}xmsg;
915 |
916 | # Remove remaining comments (instances of '--' that didn't indicate
917 | # comments should have been removed already)
918 | $main_query =~ s/--[^\n]+$//gm;
919 |
920 | # Remove blank lines
921 | $main_query =~ s/^\s*\n//gm;
922 |
923 | # Store in temporary statement hashkey,
924 | # along with UPPER type
925 | $query{$pid}{statement} = $main_query;
926 | $query{$pid}{qtype} = "\U$query_type";
927 | }
928 | if (
929 | exists $query{$pid}{statement}
930 | &&
931 | $full_statement =~ $extract_duration_re
932 | )
933 | {
934 | my $duration = $1 || 0;
935 | my $stored = $query{$pid};
936 |
937 | # Add canonical query to count hash
938 | my $hsh =
939 | $canonical_q{ delete $stored->{statement} }
940 | ||= {
941 | count => 0,
942 | duration => 0,
943 | deviation => 0,
944 | qtype => delete $stored->{qtype},
945 | minimum_offenders => [ [ $stored->{statement_id} => $duration ] ],
946 | minimum_threshold => $duration,
947 | maximum_offenders => [ [ $stored->{statement_id} => $duration ] ],
948 | maximum_threshold => $duration,
949 | durations => [],
950 | };
951 |
952 | ++$hsh->{count};
953 | $hsh->{duration} += $duration;
954 | push @{$hsh->{durations}}, $duration;
955 |
956 | # If we're tracking offenders (best/worst queries)
957 | # add them in if the newest one measures as one of the
958 | # best or worst.
959 | if ($opt{offenders}) {
960 | if ($duration > $hsh->{maximum_threshold}) {
961 | my $array = $hsh->{maximum_offenders};
962 | @$array = (
963 | sort { $b->[1] <=> $a->[1] }
964 | @$array,
965 | [ $stored->{statement_id}, $duration ],
966 | );
967 | splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
968 | $hsh->{maximum_threshold} = $array->[-1]->[1];
969 | }
970 |
971 | if ($duration < $hsh->{minimum_threshold}) {
972 | my $array = $hsh->{minimum_offenders};
973 | @$array = (
974 | sort { $a->[1] <=> $b->[1] }
975 | @$array,
976 | [ $stored->{statement_id}, $duration ],
977 | );
978 | splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
979 | $hsh->{maximum_threshold} = $array->[-1]->[1];
980 | }
981 | }
982 | }
983 |
984 | @$prev = ();
985 | $resolve_called++;
986 |
987 | return 1;
988 |
989 | } ## end of resolve_syslog_statement
990 |
991 |
992 | sub resolve_pid_statement {
993 |
994 | my $info = shift or die;
995 |
996 | $resolve_called++;
997 |
998 | my $string = $info->{statement} ? lc $info->{statement} : '';
999 | my $duration = $info->{duration} || 0;
1000 |
1001 | # Handle SQL comments, carefully
1002 | $string =~ s{/[*].*?[*]/}{}msg;
1003 |
1004 | ## Lose the final semi-colon
1005 | $string =~ s/;\s*$//;
1006 |
1007 | # Tidy up spaces
1008 | $string =~ s/#011/ /g;
1009 | $string =~ s/^\s+|\s+$//g;
1010 | $string =~ s/\s+/ /g;
1011 |
1012 | # Special transform for crowded queries
1013 | $string =~ s/=\s*(\d+)(and|or) /= $1 $2 /gio;
1014 |
1015 | my $main_query = $string;
1016 |
1017 | # Clean out arguments
1018 | # Quoted string
1019 | $main_query =~ s/'(?:''|\\+'?|[^'\\]+)*'/?/g;
1020 |
1021 | # Numeric no quote, and bind params
1022 | $main_query =~ s/(?<=[^\w.])[\$-]?(?:\d*[.])?\d+\b/?/g;
1023 |
1024 | # Collapsing IN () lists, so queries deviating
1025 | # only by 'IN (?,?)' and 'IN (?,?,?)' are logged
1026 | # as "the same"
1027 | $main_query =~
1028 | s{
1029 | # Starts IN ...
1030 | \s in \s?
1031 | # Outermost paren for IN list
1032 | [(]
1033 | (?:
1034 | # Could be list of rows
1035 | [(] [?,\s\$]* [)]
1036 | |
1037 | # or the standard stuff of a scalar list
1038 | [?,\s\$]+
1039 | )+
1040 | # Until we close the full IN list
1041 | [)]
1042 | }
1043 | { in (?+)}xmsg;
1044 |
1045 | $main_query =~ s/^begin;//io unless $main_query =~ m{^begin;$}; ## no critic
1046 |
1047 | # Remove remaining comments (instances of '--' that didn't indicate
1048 | # comments should have been removed already)
1049 | $main_query =~ s/--[^\n]+$//gm;
1050 |
1051 | # Remove blank lines
1052 | $main_query =~ s/^\s*\n//gm;
1053 |
1054 | return 0 if $main_query !~ /\w/; ## e.g. a single ;
1055 |
1056 | # Figure out what type of query this is
1057 | if ($main_query !~ m{(\w+)}) {
1058 | warn Dumper $info;
1059 | warn "Could not find the type of query for $main_query from $string\n";
1060 | exit;
1061 | }
1062 | my $query_type = uc $1;
1063 |
1064 | # Add canonical query to count hash
1065 | my $hsh =
1066 | $canonical_q{ $main_query }
1067 | ||= {
1068 | count => 0,
1069 | duration => 0,
1070 | deviation => 0,
1071 | qtype => $query_type,
1072 | minimum_offenders => [ [ -1, $duration ] ],
1073 | minimum_threshold => $duration,
1074 | maximum_offenders => [ [ -1, $duration ] ],
1075 | maximum_threshold => $duration,
1076 | durations => [],
1077 | };
1078 |
1079 | ++$hsh->{count};
1080 | $hsh->{duration} += $duration;
1081 | push @{$hsh->{durations}}, $duration;
1082 |
1083 | # If we're tracking offenders (best/worst queries)
1084 | # add them in if the newest one measures as one of the
1085 | # best or worst.
1086 | if ($opt{offenders}) {
1087 | my $fixme = -1;
1088 | if ($duration > $hsh->{maximum_threshold}) {
1089 | my $array = $hsh->{maximum_offenders};
1090 | @$array = (
1091 | sort { $b->[1] <=> $a->[1] }
1092 | @$array,
1093 | [ $fixme, $duration ],
1094 | );
1095 | splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
1096 | $hsh->{maximum_threshold} = $array->[-1]->[1];
1097 | }
1098 |
1099 | if ($duration < $hsh->{minimum_threshold}) {
1100 | my $array = $hsh->{minimum_offenders};
1101 | @$array = (
1102 | sort { $a->[1] <=> $b->[1] }
1103 | @$array,
1104 | [ $fixme, $duration ],
1105 | );
1106 | splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
1107 | $hsh->{maximum_threshold} = $array->[-1]->[1];
1108 | }
1109 | }
1110 |
1111 | return 1;
1112 |
1113 | } ## end of resolve_pid_statement
1114 |
1115 |
1116 | # Expects first arg as log entry of earliest time
1117 | # and second arg as log entry of latest time
1118 | sub log_interval {
1119 | my ($first, $second) = @_;
1120 | $opt{verbose} and print "First and second lines:\n\t$first\n\t$second\n";
1121 |
1122 | my $first_timelocal = get_timelocal_from_line($first);
1123 | my $second_timelocal = get_timelocal_from_line($second);
1124 | my $interval_in_sec = $second_timelocal - $first_timelocal;
1125 |
1126 | # Full log-slice interval in ms
1127 | return $interval_in_sec * 1000;
1128 | }
1129 |
1130 | sub get_timelocal_from_line {
1131 | my ($line) = @_;
1132 |
1133 | return 0 if $opt{mode} eq 'bare';
1134 |
1135 | my @datetime = get_date_from_line($line);
1136 |
1137 | # timelocal uses 0..11 for months instead of 1..12
1138 | --$datetime[1];
1139 |
1140 | my $int_in_sec =
1141 | Time::Local::timelocal(reverse(@datetime));
1142 |
1143 | return $int_in_sec;
1144 | }
1145 |
1146 | sub get_date_from_line {
1147 | my ($line) = @_;
1148 |
1149 | my ($year, $mon, $day, $hour, $min, $sec);
1150 | if ($line =~ m/^\d{4}/) {
1151 | ($year, $mon, $day, $hour, $min, $sec) =
1152 | $line =~ m{
1153 | (\d{4})
1154 | -
1155 | (\d{1,2})
1156 | -
1157 | (\d{1,2})
1158 | [T ]
1159 | 0?
1160 | (\d{1,2})
1161 | :
1162 | 0?
1163 | (\d{1,2})
1164 | :
1165 | 0?
1166 | (\d{1,2})
1167 | }x;
1168 | }
1169 | else {
1170 | my @localtime = localtime(time);
1171 | $year = $localtime[5] + 1900 ;
1172 | my $mon_name;
1173 | ($mon_name, $day, $hour, $min, $sec) =
1174 | $line =~ m{
1175 | ^(\w{3}) \s+
1176 | (\d{1,2}) \s
1177 | (\d\d) :
1178 | (\d\d) :
1179 | (\d\d)
1180 | }x;
1181 | my %months = (
1182 | Jan => '01',
1183 | Feb => '02',
1184 | Mar => '03',
1185 | Apr => '04',
1186 | May => '05',
1187 | Jun => '06',
1188 | Jul => '07',
1189 | Aug => '08',
1190 | Sep => '09',
1191 | Oct => '10',
1192 | Nov => '11',
1193 | Dec => '12',
1194 | );
1195 | $mon = $months{$mon_name};
1196 | }
1197 |
1198 | return ($year, $mon, $day, $hour, $min, $sec);
1199 | }
1200 |
1201 | sub prettify_query {
1202 |
1203 | local ($_) = shift;
1204 |
1205 | # Perform some basic transformations to try to make the query more readable.
1206 | # It's not perfect, but much better than all one line, all lower case.
1207 |
1208 | # Hide away any comments so they don't get transformed
1209 | my @comment;
1210 | s{($STARTCOMMENT) (.+?) ($ENDCOMMENT)}{push @comment => $2; "$1 comment $3"}ge;
1211 |
1212 | my $keywords = qr{
1213 | select |
1214 | exists |
1215 | distinct |
1216 | deferred |
1217 | show |
1218 | grant |
1219 | revoke |
1220 | commit |
1221 | begin |
1222 | from |
1223 | left |
1224 | right |
1225 | outer |
1226 | inner |
1227 | where |
1228 | (?:
1229 | group |
1230 | order
1231 | ) \s by |
1232 | and |
1233 | or |
1234 | not |
1235 | in |
1236 | between |
1237 | as |
1238 | on |
1239 | using |
1240 | left |
1241 | right |
1242 | vacuum |
1243 | verbose |
1244 | rollback |
1245 | fetch |
1246 | full |
1247 | join |
1248 | limit |
1249 | offset |
1250 | count |
1251 | coalesce |
1252 | max |
1253 | min |
1254 | sum |
1255 | all |
1256 | desc |
1257 | asc |
1258 | union |
1259 | intersect |
1260 | except |
1261 | is |
1262 | null |
1263 | true |
1264 | false |
1265 | case |
1266 | when |
1267 | then |
1268 | else |
1269 | end |
1270 | i? like |
1271 | having |
1272 | insert |
1273 | (?:in)? to |
1274 | update |
1275 | delete |
1276 | set |
1277 | values |
1278 | copy |
1279 | create |
1280 | drop |
1281 | add |
1282 | alter |
1283 | table |
1284 | trigger |
1285 | rule |
1286 | references |
1287 | substring |
1288 | foreign
1289 | \s key |
1290 | (?:
1291 | en |
1292 | dis
1293 | ) able |
1294 | listen |
1295 | notify |
1296 | index}xi;
1297 |
1298 | # Change all known keywords to uppercase
1299 | s{\b($keywords)\b}{\U$1}msg;
1300 |
1301 | s{,CASE}{, CASE}g;
1302 |
1303 | # line break after certain sql keywords
1304 | s{
1305 | (?\?}{<> ?}go;
1341 |
1342 | ## Space out comma items
1343 | s{ *, *}{, }gso;
1344 |
1345 | ## Subselects start a new line
1346 | s{^( +FROM \()}{$1\n}mgo;
1347 |
1348 | # Wrap long SET lines
1349 | s{^(\s*SET .{$minwrap1,}?,\s*)(.+? = .*)}{$1\n$indent2$2}mgo;
1350 |
1351 | # Trim long lines at a comma if starts with:
1352 | # SELECT, GROUP BY, VALUE, INSERT
1353 | s{^(\s*(?:SELECT|GROUP BY|INSERT|VALUES) .{$minwrap1,}?, )(.+)}{$1\n$indent2$2}mgo;
1354 |
1355 | # Wrap long WHERE..AND lines
1356 | s{^(\s*WHERE .{$minwrap1,}?)(\bAND .+)}{$1\n$indent2$2}mgo;
1357 | 1 while $_ =~ s{^(${indent2}AND .{$minwrap1,}?)(\bAND .+)}{$1\n$indent2$2}mgo;
1358 |
1359 | # Much munging of CASE .. WHEN .. END
1360 | s{^\s*(CASE .*?)(\bWHEN .+)}{${indent2}$1\n$indent3$2}mgo;
1361 | # Wrap the rest of the WHENs
1362 | 1 while $_ =~ s{^( +WHEN .+? )(WHEN.+)}{$1\n$indent3$2}mgo;
1363 | # Same for WHEN THEN but indent more, and only over a certain level
1364 | 1 while $_ =~ s{^( +(?:WHEN|THEN) .{$minwrap2,}? )(THEN.+)}{$1\n$indent4$2}mgo;
1365 | # Put END AS on its own line
1366 | s{ +(END AS \w+)}{\n${indent2}$1}go;
1367 | # Wrap stuff beyond the end properly
1368 | s{^(\s+END AS \w+,) *(\w)}{$1\n${indent2}$2}mgo;
1369 |
1370 | ## Trim any hanging non-keyword lines from above
1371 | 1 while $_ =~ s{^(${indent2}(?:[a-z]|NULL|\?).{$minwrap1,}?, )(\w.*)}{$1\n$indent2$2}mgo;
1372 |
1373 | # Wrap if we are doing multiple commands on one line
1374 | s{;\s*([A-Z])}{\n;\n$1}go;
1375 |
1376 | ## Add in any optional CSS to make the formatting even prettier
1377 | if ($opt{color}) {
1378 | ## Table aliases and tables:
1379 | s{((?:FROM|JOIN)\s+(?!position)\w+\s+AS)\s+(\w+)}{$1 $2}gos;
1380 | 1 while $_ =~ s{((?:FROM|JOIN)\s+.+?, \w+\s+AS)\s+(\w+)}{$1 $2}gos;
1381 | s{(FROM|JOIN)\s+(?!position)(\w+)}{$1 $2}go;
1382 | 1 while $_ =~ s{((?:FROM|JOIN)\s+.+?,) (\w+)}{$1 $2}gos;
1383 |
1384 | ## Highlight all keywords
1385 | s{\b($keywords)\b}{$1}go;
1386 |
1387 | ## Highlight semicolons indicating multiple statements
1388 | s{\n;\n}{\n;\n}go;
1389 |
1390 | ## Highlight placeholders (question marks)
1391 | s{(\W)(\?)(\W|\Z)}{$1$2$3}gso;
1392 | }
1393 |
1394 | # Restore any comments
1395 | if (@comment) {
1396 | s{($STARTCOMMENT) comment ($ENDCOMMENT)}{"\n" . (shift @comment) . "\n"}ge;
1397 | }
1398 |
1399 | return $_;
1400 |
1401 | } ## end of prettify_query
1402 |
1403 |
1404 | sub log_meta {
1405 |
1406 | my @lines = @_;
1407 |
1408 | my ($hostname, $start, $end);
1409 |
1410 | # Pull out host, start time, and end time.
1411 | # Assumes start as first arg and end as second.
1412 |
1413 | for my $line (@lines) {
1414 |
1415 | if ($opt{verbose} >= 1) {
1416 | chomp $line;
1417 | warn "Checking meta line ($line)\n";
1418 | }
1419 |
1420 | my $line_regex = qr{FAIL};
1421 | if ('pid' eq $opt{mode}) {
1422 | $line_regex = qr{(.{19}) [A-Z]+ \d+ [^ ]+ [^ ]+ (\w+)};
1423 | }
1424 | elsif ('syslog' eq $opt{mode}) {
1425 | $line_regex = qr{(.{19})(?:[+-]\d+:\d+|\s[A-Z]+)\s( \S+ )};
1426 | }
1427 | elsif ('bare' eq $opt{mode}) {
1428 | $line_regex = qr{(.{19}) [A-Z]+ \d+ [^ ]+ [^ ]+ (\w+)};
1429 | }
1430 | elsif ('csv' eq $opt{mode}) {
1431 | $hostname = $line->{connection_from};
1432 | $end = $line->{log_time};
1433 | $start ||= $end;
1434 | return ($hostname, $start, $end);
1435 | }
1436 | else {
1437 | die qq{Invalid mode: $opt{mode}\n};
1438 | }
1439 |
1440 | if ($line =~ /$line_regex/ms) { ## no critic
1441 | $end = $1; ## no critic
1442 | $hostname ||= $2; ## no critic
1443 | $start ||= $end;
1444 | }
1445 | ## Sometimes we don't have a host
1446 | elsif ($line =~ /(.{19})/) {
1447 | $end = $1;
1448 | $hostname = '';
1449 | $start ||= $end;
1450 | }
1451 | }
1452 |
1453 | defined $start or die qq{Unable to find the starting time\n};
1454 |
1455 | defined $end or die qq{Unable to find the ending time\n};
1456 |
1457 | return ($hostname, $start, $end);
1458 |
1459 | } ## end of log_meta
1460 |
1461 |
1462 | sub process_all_queries {
1463 |
1464 | my %out;
1465 |
1466 | # Sort by SI descending and print out reports.
1467 | for (
1468 | sort {
1469 | $canonical_q{$b}{sys_impact}
1470 | <=>
1471 | $canonical_q{$a}{sys_impact}
1472 | }
1473 | keys %canonical_q
1474 | )
1475 | {
1476 |
1477 | my $hsh = $canonical_q{$_};
1478 |
1479 | my $system_impact;
1480 | if ($hsh->{sys_impact} < 0.001) {
1481 | $system_impact = sprintf '%0.6f', $hsh->{sys_impact};
1482 | }
1483 | else {
1484 | $system_impact = sprintf '%0.2f', $hsh->{sys_impact};
1485 | }
1486 |
1487 | ## No sense in showing negative numbers
1488 | $hsh->{duration} = 0 if $hsh->{duration} < 0;
1489 |
1490 | my $duration = sprintf '%0.2f ms', $hsh->{duration};
1491 | my $count = $hsh->{count};
1492 | my $interval;
1493 | if ($hsh->{interval} > 10000) {
1494 | $interval = sprintf '%d seconds', $hsh->{interval}/1000;
1495 | }
1496 | elsif ($hsh->{interval} < 1000) {
1497 | $interval = sprintf '%0.2f ms', $hsh->{interval};
1498 | }
1499 | else {
1500 | $interval = sprintf '%d ms', $hsh->{interval};
1501 | }
1502 | my $deviation = sprintf '%0.2f ms', $hsh->{deviation};
1503 | my $median = sprintf '%0.2f ms', $hsh->{median};
1504 | if ($count == 1) {
1505 | $deviation = 'N/A';
1506 | $median = 'N/A';
1507 | }
1508 |
1509 | my $arr =
1510 | $out{ $hsh->{qtype} }
1511 | ||= [];
1512 |
1513 | # If user provides positive integer
1514 | # in --offenders, add in the actual
1515 | # durations of the best and worst
1516 | # number of queries requested in
1517 | # report. Entry includes full beginning
1518 | # piece of log entry. Offenders was
1519 | # used since initially it only displayed
1520 | # the worst queries, or worst offenders.
1521 | # Best was just added in for balance.
1522 | my $offenders = '';
1523 | if ($opt{offenders}) {
1524 | $offenders = sprintf(qq{
1525 |
1526 |
1527 | |
1528 | ${fmstartbold}Best$fmendbold
1529 | |
1530 |
1531 | ${fmstartbold}Worst$fmendbold
1532 | |
1533 |
1534 |
1535 | %s |
1536 | %s |
1537 |
1538 |
},
1539 | map { join '', map {
1540 | if ($_->[0] == -1) { "$_->[1] ms" }
1541 | else { "$_->[0] -- $_->[1] ms" }
1542 | } @$_ } @$hsh{
1543 | qw( minimum_offenders maximum_offenders )
1544 | }
1545 | );
1546 | }
1547 |
1548 | my $queries = prettify_query($_) . $offenders;
1549 |
1550 | my $fmshowborder = $opt{format} eq 'html' ? q{border='1'} : '';
1551 |
1552 | push (@$arr, <<"EOP");
1553 |
1554 |
1555 |
1556 | ${fmstartbold}System Impact:$fmendbold
1557 | Mean Duration:
1558 | Median Duration:
1559 | Total Count:
1560 | Mean Interval:
1561 | Std. Deviation:
1562 | |
1563 |
1564 | $fmstartbold$system_impact$fmendbold
1565 | $duration
1566 | $median
1567 | $count
1568 | $interval
1569 | $deviation
1570 | |
1571 |
1572 |
1573 | ${fmstartquery}$queries$fmendquery
1574 | EOP
1575 |
1576 | }
1577 |
1578 | return \%out;
1579 |
1580 | } ## end of process_all_queries
1581 |
1582 |
1583 | sub make_extractor {
1584 | (my $pg_version = shift) =~ s/\W+/_/g;
1585 | $pg_version ||= '8_2';
1586 | my $class = "PG_$pg_version";
1587 | no strict 'refs'; ## no critic
1588 | return bless (&$class(shift), $class);
1589 | }
1590 |
1591 | sub PG_8_1 {
1592 | my $query_types = shift;
1593 |
1594 | # Regex for log format prior to DETAIL
1595 | # entries in 8.2. Not certain how many versions
1596 | # prior to 8.1 for which this will work.
1597 |
1598 | return qr{
1599 | \A
1600 | log:
1601 | \s
1602 | statement:
1603 | \s
1604 | (?:
1605 | execute
1606 | \s
1607 | <[^>]*>
1608 | \s
1609 | (\[)
1610 | prepare:
1611 | \s
1612 | )?
1613 | (
1614 | ($query_types)
1615 | .*?
1616 | )
1617 | (\])?
1618 | \Z
1619 | }xms;
1620 | }
1621 |
1622 | sub PG_8_2 {
1623 | my $query_types = shift;
1624 |
1625 | # Regex for log format after DETAIL
1626 | # entries in 8.2. Works for v's 8.2 and 8.3.
1627 |
1628 | return qr{
1629 | log:
1630 | \s+
1631 | (?:duration: .*)?
1632 | (
1633 | statement |
1634 | execute (?: \s <.*> )
1635 | )
1636 | [^:]* :
1637 | \s
1638 | (
1639 | ($query_types)
1640 | .*
1641 | )
1642 | }ixms;
1643 | }
1644 |
1645 | sub PG_8_1::query_info {
1646 |
1647 | my $self = shift;
1648 | my ($open_sq, $main_query, $query_type, $close_sq) = @_;
1649 |
1650 | # Query may either be direct statement or the EXECUTE
1651 | # of a PREPAREd statement. If EXECUTE variety, the log
1652 | # partitions the query off in sq. brackets, so we tail
1653 | # strip possible space between the query end and the
1654 | # right sq. bracket that used to end the string. Else,
1655 | # the query itself might end in right sq. bracket, so
1656 | # we put it back on if found.
1657 | if ($open_sq) {
1658 | $main_query =~ s/\s+$//;
1659 | }
1660 | else {
1661 | $main_query .= $close_sq || '';
1662 | }
1663 |
1664 | return ($main_query, $query_type);
1665 | }
1666 |
1667 | sub PG_8_2::query_info {
1668 |
1669 | my $self = shift;
1670 | my ($stm_or_exec, $main_query, $query_type) = @_;
1671 |
1672 | # EXECUTEd queries end with a DETAIL segment showing
1673 | # the bound parameters, which is great, except in
1674 | # our case, where we are interested in flattening
1675 | # out arguments.
1676 | $main_query =~ s/ detail:.*?$//
1677 | if $stm_or_exec eq 'execute';
1678 |
1679 | return ($main_query, $query_type);
1680 | }
1681 |
1682 | __END__
1683 |
1684 | =head1 NAME
1685 |
1686 | pgsi.pl - Produce system impact reports for a PostgreSQL database.
1687 |
1688 | =head1 VERSION
1689 |
1690 | This documentation refers to version 1.7.2
1691 |
1692 | =head1 USAGE
1693 |
1694 |
1695 | pgsi.pl [options] < pglog_slice.log
1696 |
1697 | or...
1698 |
1699 | pgsi.pl --file pglog_slice.log [options]
1700 |
1701 | =over 3
1702 |
1703 | =item Options
1704 |
1705 | --format
1706 | --file
1707 | --query-types
1708 | --top-10
1709 | --all
1710 | --pg-version
1711 | --offenders
1712 |
1713 | =back
1714 |
1715 | =head1 DESCRIPTION
1716 |
1717 | System Impact (SI) is a measure of the overall load a given query imposes on a
1718 | server. It is expressed as a percentage of a query's average duration over its
1719 | average interval between successive calls. E.g., SI=80 indicates that a given
1720 | query is active 80% of the time during the entire log interval. SI=200
1721 | indicates the query is running twice at all times on average. Thus, the lower
1722 | the SI, the better.
1723 |
1724 | The goal of SI is to identify those queries most likely to cause performance
1725 | degradation on the database during heaviest traffic periods. Focusing
1726 | exclusively on the least efficient queries can hide relatively fast-running
1727 | queries that saturate the system more because they are called far more
1728 | frequently. By contrast, focusing only on the most-frequently called queries
1729 | will tend to emphasize small, highly optimized queries at the expense of
1730 | slightly less popular queries that spend much more of their time between
1731 | successive calls in an active state. These are often smaller queries that have
1732 | failed to be optimized and punish a system severely under heavy load.
1733 |
1734 | One thing SI does not do is distinguish between high-value queries represented
1735 | by extended active states or long durations due to blocking locks. Either
1736 | condition is worthy of attention, but determining which is to blame will
1737 | require independent investigation.
1738 |
1739 | Queries are canonized with placeholders representing literals or arguments.
1740 | Further, IN lists are canonized so that variation from query to query only
1741 | in the number of elements in the IN list will not be treated as distinct
1742 | queries.
1743 |
1744 | Some examples of the "same" query:
1745 |
1746 | =over 3
1747 |
1748 | =item *
1749 |
1750 | SELECT col FROM table WHERE code = 'One';
1751 | SELECT col FROM table WHERE code = 'Sixty-Three';
1752 |
1753 | =item *
1754 |
1755 | SELECT foo FROM bar WHERE fuzz = $1 AND color IN ('R','G','B');
1756 | Select FOO
1757 | from bar
1758 | WhErE fuzz = '56'
1759 | AND color IN ('R', $1);
1760 |
1761 | =back
1762 |
1763 | Differences in capitalization and whitespace are irrelevant.
1764 |
1765 | =head2 Log Data
1766 |
1767 | Pass in log data on stdin:
1768 |
1769 | pgsi.pl < some_log_slice.log
1770 | cat some_log_slice.log | pgsi.pl
1771 |
1772 | Or use the --file option:
1773 |
1774 | pgsi.pl --file=some_log_slice.log
1775 |
1776 | Or read in more than one file at a time:
1777 |
1778 | pgsi.pl --file=logfile1.log --file=logfile2.log
1779 |
1780 | If more than one file is given, they must be given in chronological order.
1781 |
1782 | Log data must comply with a specific format and must be from contiguous
1783 | activity. The code makes the assumption that the overall interval of activity
1784 | is the time elapsed between the first and last log entries. If there are
1785 | several blocks of logs to analyze, they must be run separately.
1786 |
1787 | Required format is the following in syslog:
1788 |
1789 | YYYY-MM-DDTHH24:MI:SS(-TZ:00)? server postgres[I]:
1790 |
1791 | This also requires that log_statement is set to 'all' and
1792 | that log_duration be set to 'on' in postgresql.conf.
1793 | If you are not using syslog, you can simulate the format with the following:
1794 |
1795 | log_line_prefix = '%t %h postgres[%p]: [%l-1] ' ## Simulate syslog for pgsi.
1796 |
1797 | =head2 Options
1798 |
1799 | =over 4
1800 |
1801 | =item --format
1802 |
1803 | Output format for the resulting data.
1804 |
1805 | pgsi.pl --format=tsv
1806 |
1807 | Valid values here are "html" (default), "mediawiki", and "tsv", each of which
1808 | will generate the report in the corresponding format.
1809 |
1810 | The "tsv" format (tab-separated value) will not generate a full report, but will
1811 | dump the normalized queries only along with the specific metrics into a tab-
1812 | separated file.
1813 |
1814 | =item --query-types
1815 |
1816 | Query impact is segregated by types. I.e., all the SELECTs together, all
1817 | UPDATEs together, etc. Typically it is assumed that SELECT is the most
1818 | interesting (and is by itself the default), but any query type may be analyzed.
1819 | Multiples are provided as space- or comma-separated lists.
1820 |
1821 | pgsi.pl --query-types="select, update, copy, create"
1822 |
1823 | The code will produce a unique report for each type when used with the --all
1824 | and/or --top-10 file-pattern options (see below).
1825 |
1826 | =item --top-10, --all
1827 |
1828 | Supplies a file I and optional directory path into which the reports
1829 | should be written per --query-type. The pattern is prefixed with the
1830 | --query-type and host for this report and placed into the requested directory
1831 | (or cwd if no path is present).
1832 |
1833 | --all will list every canonized query encountered, which is likely to
1834 | contain a large number of queries of no interest (those with negligible
1835 | impact).
1836 |
1837 | --top-10 limits the report to only the 10 entries with the greatest SI.
1838 |
1839 | pgsi.pl \
1840 | --query-types=select,update \
1841 | --all=si_reports/monday_10am_1pm.all.txt \
1842 | --top-10=si_reports/monday_10am_1pm.t10.txt
1843 |
1844 | This will produce the following reports in si_reports/ for a database running
1845 | on server db1:
1846 |
1847 | SELECT-db1-monday_10am_1pm.all.txt
1848 | UPDATE-db1-monday_10am_1pm.all.txt
1849 | SELECT-db1-monday_10am_1pm.t10.txt
1850 | UPDATE-db1-monday_10am_1pm.t10.txt
1851 |
1852 | If --top-10 is not supplied, then no top 10 report is generated. If --all is
1853 | not supplied, then the report(s) print to stdout.
1854 |
1855 | =item --pg-version
1856 |
1857 | Currently, this might better be described as either "before DETAIL" or "after
1858 | DETAIL". The code was written against PG 8.1 originally, but when 8.2 came out
1859 | the addition of DETAIL log entries forced a different parser. That unfortunate
1860 | timing led to the assumption that log construction would change with each
1861 | release. Going forward, --pg-version will be (other than 8.1) the first version
1862 | in which this log format was encountered.
1863 |
1864 | --pg-version is only either 8.1 or 8.2 (8.2 is default). It's unknown how far
1865 | back in versions the 8.1 format holds, but 8.2 holds for itself and 8.3. So,
1866 | unless you're working against logs generated by a PG version less than 8.2, you
1867 | do not need to include this option (but it might save you some trouble if a new
1868 | format comes at a later version and the default bumps up to the most recent
1869 | while you stay on your older version).
1870 |
1871 | pgsi.pl --pg-version=8.1
1872 |
1873 | =item --offenders
1874 |
1875 | Number of best and worst queries to included with the report, in terms of
1876 | overall duration of execution. Enough log information is listed along with the
1877 | duration such that tracking down the original query (not the canonized
1878 | version) is straightforward. The offenders list can be very useful for a query
1879 | that is causing trouble in a handful of permutations, but most of the time is
1880 | behaving well.
1881 |
1882 | The list in conjunction with standard deviation gives an overall indication of
1883 | performance volatility.
1884 |
1885 | --offenders=5 produces additional output in the report that looks something
1886 | like the following example:
1887 |
1888 | Best
1889 | 1. 2009-01-12T10:11:49-07:00 db1 postgres[4692] -- 4.833 ms
1890 | 2. 2009-01-12T10:31:19-07:00 db1 postgres[1937] -- 4.849 ms
1891 | 3. 2009-01-12T09:16:20-07:00 db1 postgres[20294] -- 4.864 ms
1892 | 4. 2009-01-12T10:16:54-07:00 db1 postgres[20955] -- 4.867 ms
1893 | 5. 2009-01-12T10:32:16-07:00 db1 postgres[5010] -- 4.871 ms
1894 |
1895 | Worst
1896 | 1. 2009-01-12T10:00:07-07:00 db1 postgres[2804] -- 2175.650 ms
1897 | 2. 2009-01-12T09:30:07-07:00 db1 postgres[2804] -- 2090.914 ms
1898 | 3. 2009-01-12T10:00:18-07:00 db1 postgres[2804] -- 2046.608 ms
1899 | 4. 2009-01-12T09:30:10-07:00 db1 postgres[2804] -- 1954.604 ms
1900 | 5. 2009-01-12T11:20:11-07:00 db1 postgres[2804] -- 1788.576 ms
1901 |
1902 | =back
1903 |
1904 | =head1 BUGS
1905 |
1906 | =over
1907 |
1908 | =item *
1909 |
1910 | If queries contain exceptionally long IN lists, the regex that attempts to
1911 | flatten them can run into a perl recursion limit. In that event, the query will
1912 | keep the placeholders of the IN list, making it unique compared to the same
1913 | query with a different cardinality of list params in the same IN. This
1914 | deficiency should only surface on IN lists with composite parameters [e.g., IN
1915 | ((?,?,...,?),(?,?,...,?),...,(?,?,...,?))]. For scalar IN lists, there should
1916 | be no such limit.
1917 |
1918 | =back
1919 |
1920 | =head1 AUTHOR
1921 |
1922 | Original code:
1923 | Mark Johnson (mark@endpoint.com), End Point Corp.
1924 |
1925 | Contributions:
1926 | Ethan Rowe (ethan@endpoint.com), End Point Corp.
1927 | Greg Sabino Mullane (greg@endpoint.com), End Point Corp.
1928 | Daniel Browning (db@endpoint.com), End Point Corp.
1929 | Joshua Tolley , End Point Corp.
1930 | Abraham Ingersoll
1931 |
1932 | =head1 LICENSE AND COPYRIGHT
1933 |
1934 | Copyright 2008-2011 Mark Johnson (mark@endpoint.com)
1935 |
1936 | This module is free software; you can redistribute it and/or modify it
1937 | under the same terms as Perl itself. See the LICENSE file.
1938 |
1939 | =cut
1940 |
--------------------------------------------------------------------------------