├── tmp
    └── .exists
├── .gitignore
├── INSTALL
├── MANIFEST.SKIP
├── MANIFEST
├── README
├── t
    ├── 00_basic.t
    ├── 99_yaml.t
    ├── 00_signature.t
    ├── 99_cleanup.t
    ├── 99_pod.t
    ├── 00_release.t
    ├── 99_spellcheck.t
    ├── 02_pgsi.t
    └── 99_perlcritic.t
├── META.yml
├── Makefile.PL
├── LICENSE
├── Changes
├── SIGNATURE
├── .perlcriticrc
├── pgsi.html
└── pgsi.pl


/tmp/.exists:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Makefile
 2 | blib/
 3 | pgsi_test_database/
 4 | pm_to_blib
 5 | tmp/
 6 | pgsi-*.tar.gz
 7 | MYMETA.*
 8 | *.tmp
 9 | Makefile.old
10 | test.*


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
 1 | Install pgsi like any other Perl module:
 2 | 
 3 | perl Makefile.PL
 4 | make
 5 | make test
 6 | make install
 7 | 
 8 | If you find any problems when running the above, please report them!
 9 | 
10 | 


--------------------------------------------------------------------------------
/MANIFEST.SKIP:
--------------------------------------------------------------------------------
 1 | ~$
 2 | ^#
 3 | Makefile$
 4 | Makefile\.old$
 5 | pm_to_blib$
 6 | \.tar\.gz$
 7 | ^tmp
 8 | .git/
 9 | pgsi_test_database
10 | log$
11 | ^RCS
12 | ^blib
13 | pgsi.html
14 | .*\.tmp
15 | MYMETA.*
16 | test.*
17 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
 1 | pgsi.pl
 2 | README
 3 | Changes
 4 | Makefile.PL
 5 | INSTALL
 6 | LICENSE
 7 | MANIFEST
 8 | MANIFEST.SKIP
 9 | META.yml
10 | SIGNATURE
11 | .perlcriticrc
12 | .gitignore
13 | t/00_release.t
14 | t/00_basic.t
15 | t/00_signature.t
16 | t/02_pgsi.t
17 | t/99_cleanup.t
18 | t/99_perlcritic.t
19 | t/99_spellcheck.t
20 | t/99_pod.t
21 | t/99_yaml.t
22 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | pgsi.pl is a Perl utility for parsing and analyzing PostgreSQL logs to produce
 2 | wiki-ready system impact reports. It was originally developed for Backcountry.com.
 3 | 
 4 | Documentation is available as POD within the script:
 5 | 
 6 |  perldoc pgsi.pl
 7 | 
 8 | More information is at the website:
 9 | 
10 |  http://bucardo.org/wiki/pgsi/
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/t/00_basic.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
 3 | 
 4 | ## Simply test that the main script compiles
 5 | 
 6 | use 5.006;
 7 | use strict;
 8 | use warnings;
 9 | use Test::More tests => 1;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 | 
14 | eval {
15 |     system "perl -c pgsi.pl 2>/dev/null";
16 | };
17 | is ($@, q{}, 'Program compiled cleanly');
18 | 


--------------------------------------------------------------------------------
/t/99_yaml.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
 3 | 
 4 | ## Test META.yml for YAMLiciousness, requires Test::YAML::Meta
 5 | 
 6 | use 5.006;
 7 | use strict;
 8 | use warnings;
 9 | use Test::More;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 | 
14 | plan tests => 2;
15 | 
16 | my $V = 0.03;
17 | eval {
18 |     require Test::YAML::Meta;
19 |     Test::YAML::Meta->import;
20 | };
21 | if ($@) {
22 |     SKIP: {
23 |         skip ('Skipping Test::YAML::Meta tests: module not found', 2);
24 |     }
25 | }
26 | elsif ($Test::YAML::Meta::VERSION < $V) {
27 |     SKIP: {
28 |         skip ("Skipping Test::YAML::Meta tests: need version $V, but only have $Test::YAML::Meta::VERSION", 2);
29 |     }
30 | }
31 | else {
32 |     meta_spec_ok ('META.yml', 1.3);
33 | }
34 | 


--------------------------------------------------------------------------------
/META.yml:
--------------------------------------------------------------------------------
 1 | --- #YAML:1.0
 2 | name                : pgsi
 3 | version             : 1.7.2
 4 | abstract            : Postgres system impact report
 5 | author:              
 6 |   - Mark Johnson <mark@endpoint.com>
 7 | 
 8 | license             : bsd
 9 | dynamic_config      : 1
10 | distribution_type   : module
11 | 
12 | requires:
13 |   IO::Handle        : 1.24
14 | build_requires:
15 |   Test::Harness     : 2.03
16 |   Test::More        : 0.61
17 |   Module::Signature : 0.50
18 | 
19 | provides:
20 |   pgsi:
21 |     file            : pgsi.pl
22 |     version         : 1.7.2
23 | 
24 | resources:
25 |   homepage          : http://bucardo.org/pgsi/index.html
26 |   bugtracker        : http://bucardo.org/pgsi/bugs.html
27 |   repository        : http://bucardo.org/pgsi/repo.html
28 |   license           : http://bucardo.org/pgsi/license.txt
29 | meta-spec:
30 |   version           : 1.3
31 |   url               : http://module-build.sourceforge.net/META-spec-v1.3.html
32 | generated_by        : emacs
33 | 


--------------------------------------------------------------------------------
/t/00_signature.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
 3 | 
 4 | ## Test that our SIGNATURE file is valid - requires TEST_SIGNATURE env
 5 | 
 6 | use 5.006;
 7 | use strict;
 8 | use warnings;
 9 | use Test::More;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 | 
14 | if (!$ENV{TEST_SIGNATURE}) {
15 |     plan skip_all => 'Set the environment variable TEST_SIGNATURE to enable this test';
16 | }
17 | plan tests => 1;
18 | 
19 | SKIP: {
20 |     if (!eval { require Module::Signature; 1 }) {
21 |         skip ('Must have Module::Signature to test SIGNATURE file', 1);
22 |     }
23 |     elsif ( !-e 'SIGNATURE' ) {
24 |         fail ('SIGNATURE file was not found');
25 |     }
26 |     elsif ( ! -s 'SIGNATURE') {
27 |         fail ('SIGNATURE file was empty');
28 |     }
29 |     else {
30 |         my $ret = Module::Signature::verify();
31 |         if ($ret eq Module::Signature::SIGNATURE_OK()) {
32 |             pass ('Valid SIGNATURE file');
33 |         }
34 |         else {
35 |             fail ('Invalid SIGNATURE file');
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/Makefile.PL:
--------------------------------------------------------------------------------
 1 | use ExtUtils::MakeMaker;
 2 | use strict;
 3 | use warnings;
 4 | use 5.008003;
 5 | 
 6 | my $VERSION = '1.7.2';
 7 | 
 8 | WriteMakefile(
 9 |     NAME          => 'pgsi',
10 |     VERSION_FROM  => 'pgsi.pl',
11 |     PREREQ_PM     => {
12 |                      'Test::More'          => '0.61',
13 |                      'Test::Harness'       => '2.03',
14 |                      'IO::Handle'          => '1.24',
15 |                   },
16 |     EXE_FILES     => ['pgsi.pl'],
17 |     ABSTRACT      => 'Postgres system impact report',
18 |     AUTHOR        => 'Mark Johnson <mark@endpoint.com>',
19 |     LICENSE       => 'BSD',
20 |     NO_META       => 1,
21 |     NEEDS_LINKING => 0,
22 |     NORECURS      => 1,
23 |     PM            => { },
24 |     clean         => { FILES => '*~ tmp/* test.pg.log pgsi.html' },
25 | );
26 | 
27 | package MY;
28 | 
29 | sub manifypods {
30 |     my $after = qq{\t\$(NOECHO) pod2html pgsi.pl > pgsi.html\n};
31 |     $after .= qq{\t\$(NOECHO) \$(RM_F) pod*.tmp pm_to_blib\n};
32 |     $after .= qq{\t\$(NOECHO) \$(PERL) -pi -e "s/<link.*?>//" pgsi.html\n};
33 |     $after .= qq{\t\$(NOECHO) \$(ECHO) Created pgsi.html\n};
34 |     return shift->SUPER::manifypods(@_) . $after;
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/t/99_cleanup.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
 3 | 
 4 | ## Cleanup all database objects we may have created
 5 | ## Shutdown the test database if we created one
 6 | 
 7 | use 5.006;
 8 | use strict;
 9 | use warnings;
10 | use IO::Handle;
11 | *STDOUT->autoflush(1);
12 | *STDERR->autoflush(1);
13 | use Test::More tests => 1;
14 | 
15 | my $testdir = 'pgsi_test_database';
16 | 
17 | if (! -d $testdir) {
18 |     pass ("(Cleanup) Test database directory does not exist\n");
19 |     exit;
20 | }
21 | 
22 | my $pidfile = "$testdir/postmaster.pid";
23 | if (! -e $pidfile) {
24 |     pass ("(Cleanup) Test database PID file does not exist\n");
25 |     exit;
26 | }
27 | 
28 | open my $fh, '<', $pidfile or die qq{Could not open "$pidfile": $!\n};
29 | <$fh> =~ /(\d+)/ or die qq{No PID found in file "$pidfile"\n};
30 | my $pid = $1;
31 | close $fh or die qq{Could not close "$pidfile": $!\n};
32 | 
33 | my $count = kill 0 => $pid;
34 | if ($count == 0) {
35 |     unlink $pidfile;
36 |     pass ("(Cleanup) Test database process not found, removed $pidfile\n");
37 |     exit;
38 | }
39 | 
40 | diag "Shutting down test database\n";
41 | 
42 | kill 15 => $pid;
43 | 
44 | pass ("(Cleanup) Test database asked to shutdown with a kill -15\n");
45 | 


--------------------------------------------------------------------------------
/t/99_pod.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
 3 | 
 4 | ## Check our Pod, requires Test::Pod
 5 | ## Also done if available: Test::Pod::Coverage
 6 | ## Requires TEST_AUTHOR env
 7 | 
 8 | use 5.006;
 9 | use strict;
10 | use warnings;
11 | use Test::More;
12 | use IO::Handle;
13 | *STDOUT->autoflush(1);
14 | *STDERR->autoflush(1);
15 | 
16 | if (!$ENV{TEST_AUTHOR}) {
17 |     plan skip_all => 'Set the environment variable TEST_AUTHOR to enable this test';
18 | }
19 | 
20 | plan tests => 2;
21 | 
22 | my $PODVERSION = '0.95';
23 | eval {
24 |     require Test::Pod;
25 |     Test::Pod->import;
26 | };
27 | SKIP: {
28 |     if ($@ or $Test::Pod::VERSION < $PODVERSION) {
29 |         skip ("Test::Pod $PODVERSION is required", 1);
30 |     }
31 |     pod_file_ok ('pgsi.pl');
32 | }
33 | 
34 | ## We won't require everyone to have this, so silently move on if not found
35 | my $PODCOVERVERSION = '1.04';
36 | eval {
37 |     require Test::Pod::Coverage;
38 |     Test::Pod::Coverage->import;
39 | };
40 | SKIP: {
41 | 
42 |     if ($@ or $Test::Pod::Coverage::VERSION < $PODCOVERVERSION) {
43 |         skip ("Test::Pod::Coverage $PODCOVERVERSION is required", 1);
44 |     }
45 | 
46 |     my $trusted_names  =
47 |         [
48 |         ];
49 | 
50 |     my $t='pgsi.pl pod coverage okay';
51 |     pod_coverage_ok ('pgsi.pl', {trustme => $trusted_names}, $t);
52 | }
53 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2008, 2009, 2010
 2 |   Mark Johnson <mark@endpoint.com>. All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without 
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 |   1. Redistributions of source code must retain the above copyright notice, 
 8 |      this list of conditions and the following disclaimer.
 9 |   2. Redistributions in binary form must reproduce the above copyright notice, 
10 |      this list of conditions and the following disclaimer in the documentation 
11 |      and/or other materials provided with the distribution.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED 
14 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
15 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 
16 | EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
17 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
18 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
19 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
20 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 
21 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
22 | OF SUCH DAMAGE.
23 | 


--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
 1 | Changes for the Postgres System Impact program pgsi.pl
 2 | 
 3 | 
 4 | Version 1.7.2
 5 | 
 6 |   - Better removal of comments (Joshua Tolley)
 7 | 
 8 | 
 9 | Version 1.7.1, released June 1, 2011
10 | 
11 |   - Add "--help" output (Greg Sabino Mullane)
12 | 
13 |   - Better logic for median values (razvan-panda @ github)
14 | 
15 | 
16 | Version 1.7.0, released February 28, 2011
17 | 
18 |   - Special parsing of crowded queries (x=1234AND y=2345) (Greg Sabino Mullane)
19 | 
20 |   - Add "bare" mode, to handle stderr logs with no log_line_prefix (Joshua Tolley)
21 |     
22 |   - Add --interval option, for cases in which the log does not say (Joshua Tolley)
23 | 
24 | 
25 | Version 1.6.0, released January 31, 2011
26 | 
27 |   - Add in 'csv' as a valid mode (Abraham Ingersoll)
28 | 
29 |   - Add 'quiet' option (Greg Sabino Mullane)
30 | 
31 | 
32 | Version 1.5.0, released December 15, 2010
33 | 
34 |  - Allow for multiple files to be parsed by multiple --file= calls.
35 | 
36 |  - Always put COPY last
37 | 
38 |  - Reduce the number of decimal places to two from three.
39 | 
40 |  - Rename "Avg" to "Mean"
41 | 
42 | 
43 | Version 1.4.0, released December 9, 2010
44 | 
45 |  - Support for alternate syslog format
46 | 
47 |  - Fix bug when last process line was not a STATEMENT
48 | 
49 |  - Add median
50 | 
51 | 
52 | Version 1.3.0, released July 9, 2010
53 | 
54 |  - Handle lines with both duration and statement
55 | 
56 |  - Show the filename in the output
57 | 
58 |  - Better handling of SQL comments
59 | 
60 |  - Better regexes, keyword highlighting
61 | 
62 |  - Don't die on invalid lines, but warn instead, and only in verbose mode.
63 | 
64 |  - Do not show negative system impact values or negative durations
65 | 
66 | 
67 | Version 1.2.0, released January 2, 2010
68 | 
69 |  - Major code cleanup, add HTML mode
70 | 
71 |  - Much more formatting of results, with CSS colorings
72 | 
73 | 
74 | Version 1.1.1, released January 2009
75 | 
76 | - First public release
77 | 


--------------------------------------------------------------------------------
/SIGNATURE:
--------------------------------------------------------------------------------
 1 | This file contains message digests of all files listed in MANIFEST,
 2 | signed via the Module::Signature module, version 0.68.
 3 | 
 4 | To verify the content in this distribution, first make sure you have
 5 | Module::Signature installed, then type:
 6 | 
 7 |     % cpansign -v
 8 | 
 9 | It will check each file's integrity, as well as the signature's
10 | validity.  If "==> Signature verified OK! <==" is not displayed,
11 | the distribution may already have been compromised, and you should
12 | not run its Makefile.PL or Build.PL.
13 | 
14 | -----BEGIN PGP SIGNED MESSAGE-----
15 | Hash: RIPEMD160
16 | 
17 | SHA1 1a95fb0825122ba03a0e923c21befa4005e565eb .gitignore
18 | SHA1 f4c9d744f9805c07f67df245eacfbd7cc6b4e800 .perlcriticrc
19 | SHA1 f58ac79d456f565b7eff4b5e2ac08331ecbe8488 Changes
20 | SHA1 6b8822e6cafae62bafddc2b2466b51f5433b495e INSTALL
21 | SHA1 4e7d5e6453a4a552a1a670e0c5b8b96d2d043cfe LICENSE
22 | SHA1 e9d7f4e3e957eeee05020eb4379a7b8b2f17d797 MANIFEST
23 | SHA1 17e2f87d23167cf4cd34d43c393130e3396e8c5c MANIFEST.SKIP
24 | SHA1 f739b8faa3f8ed43f62573762fcd359579b42b10 META.yml
25 | SHA1 d6382baa4f40cac14856742532574d9f6c3aa918 Makefile.PL
26 | SHA1 d0d3bd82ef735575210c260d64810c2a5402bd39 README
27 | SHA1 1cd2974d1d658d84ff243a6c8a3bd9f2716dc496 pgsi.pl
28 | SHA1 7933b5aa8cc486885ff70b8d31349d8cde05929c t/00_basic.t
29 | SHA1 dc7459ce2f73becd40d7a624ac71c7554efce458 t/00_release.t
30 | SHA1 0ba35bd2edbabe1ab191cd006721f96998f2c732 t/00_signature.t
31 | SHA1 445dcb8fc226c7cece83aedbceb29ef7f17de2d9 t/02_pgsi.t
32 | SHA1 041a8261061e52e37be5ff77fed1b9a799cd5400 t/99_cleanup.t
33 | SHA1 f5a4e98989a41cdfc7f292f174d14849b3b3f8f8 t/99_perlcritic.t
34 | SHA1 58dfe7e3b035ebe0e8f36a236c4c169190ee1295 t/99_pod.t
35 | SHA1 24cda361ace47b0c6fa1def503783fcd6bf3be0a t/99_spellcheck.t
36 | SHA1 9f1900d5a79447a5b594f721b6c52cf0b30b59b4 t/99_yaml.t
37 | -----BEGIN PGP SIGNATURE-----
38 | 
39 | iEYEAREDAAYFAk/I5uQACgkQvJuQZxSWSshs2wCdHbOZHBekQsGjKGpOKWCgwZjg
40 | ke8AoI0AkdKT6Il9vGAycN/1SHrv6ZP0
41 | =lU2x
42 | -----END PGP SIGNATURE-----
43 | 


--------------------------------------------------------------------------------
/.perlcriticrc:
--------------------------------------------------------------------------------
  1 | 
  2 | ## perlcritic file for pgsi
  3 | ## Usage: perlcritic -profile <thisfile> <scriptname>
  4 | 
  5 | verbose = 8
  6 | severity = 1
  7 | 
  8 | [-Bangs::ProhibitFlagComments]
  9 | [-Bangs::ProhibitNumberedNames]
 10 | [-Bangs::ProhibitVagueNames]
 11 | 
 12 | [-BuiltinFunctions::ProhibitBooleanGrep]
 13 | [-BuiltinFunctions::ProhibitComplexMappings]
 14 | [-BuiltinFunctions::ProhibitReverseSortBlock]
 15 | 
 16 | [-CodeLayout::ProhibitHardTabs]
 17 | [-CodeLayout::ProhibitParensWithBuiltins]
 18 | [-CodeLayout::ProhibitQuotedWordLists]
 19 | [-CodeLayout::RequireTidyCode]
 20 | [-CodeLayout::RequireTrailingCommaAtNewline]
 21 | [-CodeLayout::RequireUseUTF8]
 22 | 
 23 | [-Compatibility::ProhibitUnixDevNull]
 24 | 
 25 | [-ControlStructures::ProhibitCascadingIfElse]
 26 | [-ControlStructures::ProhibitCStyleForLoops]
 27 | [-ControlStructures::ProhibitDeepNests]
 28 | [-ControlStructures::ProhibitPostfixControls]
 29 | [-ControlStructures::ProhibitUnlessBlocks]
 30 | 
 31 | [-Documentation::PodSpelling]
 32 | [-Documentation::RequirePodSections]
 33 | [-Documentation::RequirePODUseEncodingUTF8]
 34 | 
 35 | [-Editor::RequireEmacsFileVariables]
 36 | 
 37 | [-ErrorHandling::RequireCarping]
 38 | [-ErrorHandling::RequireUseOfExceptions]
 39 | [-ErrorHandling::RequireCheckingReturnValueOfEval]
 40 | 
 41 | [-InputOutput::ProhibitBacktickOperators]
 42 | [-InputOutput::ProhibitInteractiveTest]
 43 | [-InputOutput::RequireBracedFileHandleWithPrint]
 44 | [-InputOutput::RequireBriefOpen]
 45 | [-InputOutput::RequireCheckedSyscalls]
 46 | 
 47 | [-Lax::ProhibitComplexMappings::LinesNotStatements]
 48 | [-Lax::ProhibitEmptyQuotes::ExceptAsFallback]
 49 | 
 50 | [-Miscellanea::ProhibitTies]
 51 | [-Miscellanea::RequireRcsKeywords]
 52 | [-Miscellanea::ProhibitUnrestrictedNoCritic]
 53 | 
 54 | [-Modules::PerlMinimumVersion]
 55 | [-Modules::ProhibitExcessMainComplexity]
 56 | [-Modules::RequireFilenameMatchesPackage]
 57 | 
 58 | [-NamingConventions::ProhibitAmbiguousNames]
 59 | [-NamingConventions::Capitalization]
 60 | 
 61 | [-References::ProhibitDoubleSigils]
 62 | 
 63 | [-RegularExpressions::ProhibitCaptureWithoutTest]
 64 | [-RegularExpressions::ProhibitComplexRegexes]
 65 | [-RegularExpressions::ProhibitEnumeratedClasses]
 66 | [-RegularExpressions::ProhibitEscapedMetacharacters]
 67 | [-RegularExpressions::RequireBracesForMultiline]
 68 | [-RegularExpressions::RequireDotMatchAnything]
 69 | [-RegularExpressions::RequireExtendedFormatting]
 70 | [-RegularExpressions::RequireLineBoundaryMatching]
 71 | 
 72 | [-Subroutines::ProhibitQualifiedSubDeclarations]
 73 | [-Subroutines::RequireArgUnpacking]
 74 | 
 75 | [-Tics::ProhibitLongLines]
 76 | 
 77 | [-ValuesAndExpressions::ProhibitAccessOfPrivateData]
 78 | [-ValuesAndExpressions::ProhibitCommaSeparatedStatements]
 79 | [-ValuesAndExpressions::ProhibitEmptyQuotes]
 80 | [-ValuesAndExpressions::ProhibitImplicitNewlines]
 81 | [-ValuesAndExpressions::ProhibitInterpolationOfLiterals]
 82 | [-ValuesAndExpressions::ProhibitMagicNumbers]
 83 | [-ValuesAndExpressions::ProhibitMixedBooleanOperators]
 84 | [-ValuesAndExpressions::ProhibitNoisyQuotes]
 85 | [-ValuesAndExpressions::ProhibitVersionStrings]
 86 | [-ValuesAndExpressions::RequireNumberSeparators]
 87 | [-ValuesAndExpressions::RequireNumericVersion]
 88 | [-ValuesAndExpressions::RestrictLongStrings]
 89 | 
 90 | [-Variables::ProhibitLocalVars]
 91 | [-Variables::ProhibitPackageVars]
 92 | [-Variables::ProhibitPunctuationVars]
 93 | [-Variables::RequireLocalizedPunctuationVars]
 94 | [-Variables::RequireNegativeIndices]
 95 | 
 96 | 
 97 | [-Modules::RequireVersionVar]
 98 | 
 99 | [CodeLayout::ProhibitTrailingWhitespace]
100 | severity = 5
101 | 
102 | [Subroutines::ProhibitBuiltinHomonyms]
103 | severity = 5
104 | 


--------------------------------------------------------------------------------
/t/00_release.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
  3 | 
  4 | ## Make sure the version number is consistent in all places
  5 | ## Check all files in MANIFEST for tabs and odd characters
  6 | 
  7 | use 5.006;
  8 | use strict;
  9 | use warnings;
 10 | use Data::Dumper;
 11 | use Test::More;
 12 | use lib 't','.';
 13 | 
 14 | if (! $ENV{RELEASE_TESTING}) {
 15 |     plan (skip_all =>  'Test skipped unless environment variable RELEASE_TESTING is set');
 16 | }
 17 | 
 18 | ## Grab all files from the MANIFEST to generate a test count
 19 | my $file = 'MANIFEST';
 20 | my @mfiles;
 21 | open my $mfh, '<', $file or die qq{Could not open "$file": $!\n};
 22 | while (<$mfh>) {
 23 |     next if /^#/;
 24 |     push @mfiles => $1 if /(\S.+)/o;
 25 | }
 26 | close $mfh or warn qq{Could not close "$file": $!\n};
 27 | 
 28 | plan tests => 1 + @mfiles;
 29 | 
 30 | my %v;
 31 | my $vre = qr{(\d+\.\d+\.\d+\_?\d*)};
 32 | 
 33 | ## Grab version from various files
 34 | $file = 'META.yml';
 35 | open my $fh, '<', $file or die qq{Could not open "$file": $!\n};
 36 | while (<$fh>) {
 37 |     push @{$v{$file}} => [$1,$.] if /version\s*:\s*$vre/;
 38 | }
 39 | close $fh or warn qq{Could not close "$file": $!\n};
 40 | 
 41 | $file = 'Makefile.PL';
 42 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
 43 | while (<$fh>) {
 44 |     push @{$v{$file}} => [$1,$.] if /VERSION = '$vre'/;
 45 | }
 46 | close $fh or warn qq{Could not close "$file": $!\n};
 47 | 
 48 | $file = 'pgsi.pl';
 49 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
 50 | while (<$fh>) {
 51 |     push @{$v{$file}} => [$1,$.] if (/VERSION = '$vre'/ or /refers to version $vre/);
 52 | }
 53 | close $fh or warn qq{Could not close "$file": $!\n};
 54 | 
 55 | $file = 'pgsi.html';
 56 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
 57 | while (<$fh>) {
 58 |     push @{$v{$file}} => [$1,$.] if /version $vre/;
 59 | }
 60 | close $fh or warn qq{Could not close "$file": $!\n};
 61 | 
 62 | $file = 'Changes';
 63 | open $fh, '<', $file or die qq{Could not open "$file": $!\n};
 64 | while (<$fh>) {
 65 |     push @{$v{$file}} => [$1,$.] and last if /Version $vre/;
 66 | }
 67 | close $fh or warn qq{Could not close "$file": $!\n};
 68 | 
 69 | my $good = 1;
 70 | my $lastver;
 71 | for my $filename (keys %v) {
 72 |     for my $glob (@{$v{$filename}}) {
 73 |         my ($ver,$line) = @$glob;
 74 |         if (! defined $lastver) {
 75 |             $lastver = $ver;
 76 |         }
 77 |         elsif ($ver ne $lastver) {
 78 |             $good = 0;
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | if ($good) {
 84 |     pass ("All version numbers are the same ($lastver)");
 85 | }
 86 | else {
 87 |     fail ('All version numbers were not the same!');
 88 |     for my $filename (sort keys %v) {
 89 |         for my $glob (@{$v{$filename}}) {
 90 |             my ($ver,$line) = @$glob;
 91 |             diag "File: $filename. Line: $line. Version: $ver\n";
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | ## Make sure all files in the MANIFEST are "clean": no tabs, no unusual characters
 97 | 
 98 | for my $mfile (@mfiles) {
 99 |     file_is_clean($mfile);
100 | }
101 | 
102 | sub file_is_clean {
103 | 
104 |     my $filename = shift or die;
105 | 
106 |     if (!open $fh, '<', $filename) {
107 |         fail qq{Could not open "$filename": $!\n};
108 |         return;
109 |     }
110 |     $good = 1;
111 |     my $inside_copy = 0;
112 |     while (<$fh>) {
113 |         if (/^COPY .+ FROM stdin/i) {
114 |             $inside_copy = 1;
115 |         }
116 |         if (/^\\./ and $inside_copy) {
117 |             $inside_copy = 0;
118 |         }
119 |         if (/\t/ and $filename ne 'Makefile.PL' and $filename !~ /\.html$/ and ! $inside_copy) {
120 |             diag "Found a tab at line $. of $filename\n";
121 |             $good = 0;
122 |         }
123 |         if (! /^[\S ]*/) {
124 |             diag "Invalid character at line $. of $filename: $_\n";
125 |             $good = 0; die;
126 |         }
127 |     }
128 |     close $fh or warn qq{Could not close "$filename": $!\n};
129 | 
130 |     if ($good) {
131 |         pass ("The $filename file has no tabs or unusual characters");
132 |     }
133 |     else {
134 |         fail ("The $filename file did not pass inspection!");
135 |     }
136 | 
137 | }
138 | 
139 | exit;
140 | 


--------------------------------------------------------------------------------
/t/99_spellcheck.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
  3 | 
  4 | ## Spellcheck as much as we can
  5 | ## Requires TEST_SPELL to be set
  6 | 
  7 | use 5.006;
  8 | use strict;
  9 | use warnings;
 10 | use Test::More;
 11 | use IO::Handle;
 12 | *STDOUT->autoflush(1);
 13 | *STDERR->autoflush(1);
 14 | 
 15 | my (@testfiles, $fh);
 16 | 
 17 | if (!$ENV{TEST_SPELL}) {
 18 |     plan skip_all => 'Set the environment variable TEST_SPELL to enable this test';
 19 | }
 20 | elsif (!eval { require Text::SpellChecker; 1 }) {
 21 |     plan skip_all => 'Could not find Text::SpellChecker';
 22 | }
 23 | else {
 24 |     opendir my $dir, 't' or die qq{Could not open directory 't': $!\n};
 25 |     @testfiles = map { "t/$_" } grep { /^.+\.(t|pl)$/ } readdir $dir;
 26 |     closedir $dir or die qq{Could not closedir "$dir": $!\n};
 27 |     plan tests => 4+@testfiles;
 28 | }
 29 | 
 30 | my %okword;
 31 | my $file = 'Common';
 32 | while (<DATA>) {
 33 |     if (/^## (.+):/) {
 34 |         $file = $1;
 35 |         next;
 36 |     }
 37 |     next if /^#/ or ! /\w/;
 38 |     for (split) {
 39 |         $okword{$file}{$_}++;
 40 |     }
 41 | }
 42 | 
 43 | 
 44 | sub spellcheck {
 45 | 
 46 |     my ($desc, $text, $filename) = @_;
 47 |     my $check = Text::SpellChecker->new(text => $text);
 48 |     my %badword;
 49 |     while (my $word = $check->next_word) {
 50 |         next if $okword{Common}{$word} or $okword{$filename}{$word};
 51 |         $badword{$word}++;
 52 |     }
 53 |     my $count = keys %badword;
 54 |     if (! $count) {
 55 |         pass ("Spell check passed for $desc");
 56 |         return;
 57 |     }
 58 |     fail ("Spell check failed for $desc. Bad words: $count");
 59 |     for (sort keys %badword) {
 60 |         diag "$_\n";
 61 |     }
 62 |     return;
 63 | }
 64 | 
 65 | 
 66 | ## First, the plain ol' textfiles
 67 | for my $file (qw/README Changes/) {
 68 |     if (!open $fh, '<', $file) {
 69 |         fail (qq{Could not find the file "$file"!});
 70 |     }
 71 |     else {
 72 |         { local $/; $_ = <$fh>; } ## no critic
 73 |         close $fh or warn qq{Could not close "$file": $!\n};
 74 |         spellcheck ($file => $_, $file);
 75 |     }
 76 | }
 77 | 
 78 | ## Now the embedded POD
 79 | SKIP: {
 80 |     if (!eval { require Pod::Spell; 1 }) {
 81 |         skip ('Need Pod::Spell to test the spelling of embedded POD', 1);
 82 |     }
 83 | 
 84 |     for my $file (qw{pgsi.pl}) {
 85 |         if (! -e $file) {
 86 |             fail (qq{Could not find the file "$file"!});
 87 |             next;
 88 |         }
 89 |         my $string = qx{podspell $file};
 90 |         spellcheck ("POD from $file" => $string, $file);
 91 |     }
 92 | }
 93 | 
 94 | ## Now the comments
 95 | SKIP: {
 96 |     if (!eval { require File::Comments; 1 }) {
 97 |         skip ('Need File::Comments to test the spelling inside comments', 1+@testfiles);
 98 |     }
 99 | 
100 |     my $fc = File::Comments->new();
101 | 
102 |     my @files;
103 |     for (sort @testfiles) {
104 |         push @files, "$_";
105 |     }
106 | 
107 | 
108 |     for my $file (@testfiles, qw{pgsi.pl}) {
109 |         ## Tests as well?
110 |         if (! -e $file) {
111 |             fail (qq{Could not find the file "$file"!});
112 |         }
113 |         my $string = $fc->comments($file);
114 |         if (! $string) {
115 |             fail (qq{Could not get comments from file $file});
116 |             next;
117 |         }
118 |         $string = join "\n" => @$string;
119 |         $string =~ s/=head1.+//sm;
120 |         spellcheck ("comments from $file" => $string, $file);
121 |     }
122 | 
123 | 
124 | }
125 | 
126 | 
127 | __DATA__
128 | ## These words are okay
129 | 
130 | ## Common:
131 | 
132 | Backcountry
133 | backend
134 | cardinality
135 | conf
136 | cperl
137 | cwd
138 | durations
139 | env
140 | http
141 | logfile
142 | Mullane
143 | namespace
144 | ol
145 | params
146 | perl
147 | perldoc
148 | pglog
149 | pgsi
150 | Postgres
151 | postgresql
152 | PostgreSQL
153 | regex
154 | Regex
155 | Sabino
156 | SELECTs
157 | Spellcheck
158 | SQL
159 | stdin
160 | stdout
161 | textfiles
162 | UPDATEs
163 | usr
164 | wiki
165 | YAML
166 | YAMLiciousness
167 | yml
168 | 
169 | ## pgsi.pl
170 | 
171 | arg
172 | csv
173 | Globals
174 | html
175 | Ingersoll
176 | pid
177 | refactor
178 | syslog
179 | Tolley
180 | 
181 | ## README
182 | 
183 | bucardo
184 | 
185 | ## Changes
186 | 
187 | colorings
188 | CSS
189 | durations
190 | filename
191 | github
192 | razvan
193 | regexes
194 | stderr
195 | 
196 | 


--------------------------------------------------------------------------------
/t/02_pgsi.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
  3 | 
  4 | ## Simply test that the main script compiles and gives a version
  5 | 
  6 | use 5.006;
  7 | use strict;
  8 | use warnings;
  9 | use DBI;
 10 | use Data::Dumper;
 11 | use IO::Handle;
 12 | use POSIX;
 13 | *STDOUT->autoflush(1);
 14 | *STDERR->autoflush(1);
 15 | use Test::More tests => 6;
 16 | 
 17 | use vars qw/$COM $info $count $t/;
 18 | 
 19 | eval {
 20 |     system "perl -c pgsi.pl 2>/dev/null";
 21 | };
 22 | is ($@, q{}, 'Program compiled cleanly');
 23 | 
 24 | ## Create a test database as needed
 25 | my $testdir = 'pgsi_test_database';
 26 | if (! -d $testdir) {
 27 |     diag "Creating test database cluster in $testdir\n";
 28 |     $COM = "initdb -D $testdir --locale=C -E UTF8 2>&1";
 29 |     eval {
 30 |         $info = qx{$COM};
 31 |     };
 32 |     $@ and BAIL_OUT "Failed to initdb: $@\n";
 33 | }
 34 | 
 35 | ## Make custom changes to the postgresql.conf
 36 | my $file = "$testdir/postgresql.conf";
 37 | open my $fh, '+<', $file or die qq{Could not open "$file": $!\n};
 38 | my $found = 0;
 39 | while (<$fh>) {
 40 |     if (/PGSI TESTING/) {
 41 |         $found = 1;
 42 |         last;
 43 |     }
 44 | }
 45 | my $fn = 'pg.%C.log';
 46 | if (! $found) {
 47 |     diag "Configuring postgresql.conf\n";
 48 |     print $fh <<"EOT"
 49 | 
 50 | ## PGSI TESTING
 51 | port             = 5555
 52 | listen_addresses = ''
 53 | max_connections  = 5
 54 | log_statement    = 'all'
 55 | log_duration     = 'on'
 56 | log_line_prefix  = '%t %h postgres[%p]: [%l-1] ' ## Simulate syslog entries
 57 | log_destination  = 'stderr'
 58 | log_directory    = '.'
 59 | log_filename     = '$fn'
 60 | 
 61 | EOT
 62 | }
 63 | close $fh or die qq{Could not close "$file": $!\n};
 64 | 
 65 | 
 66 | my $pidfile = "$testdir/postmaster.pid";
 67 | my $startup = 1;
 68 | my $logfile = POSIX::strftime("$testdir/$fn", localtime); ## no critic
 69 | if (-e $pidfile) {
 70 |     open my $fh, '<', $pidfile or die qq{Could not open "$pidfile": $!\n};
 71 |     <$fh> =~ /(\d+)/ or die qq{No PID found in file "$pidfile"\n};
 72 |     my $pid = $1;
 73 |     close $fh or die qq{Could not close "$pidfile": $!\n};
 74 |     ## Make sure it's still around
 75 |     $count = kill 0 => $pid;
 76 |     if ($count != 1) {
 77 |         warn qq{Server seems to have died, removing file "$pidfile"\n};
 78 |         unlink $pidfile or die qq{Could not remove file "$pidfile"\n};
 79 |     }
 80 | }
 81 | if (! -e $pidfile) {
 82 |     diag "Starting up test database\n";
 83 |     $COM = "pg_ctl -D $testdir -l $logfile start";
 84 |     eval {
 85 |         $info = qx{$COM};
 86 |     };
 87 |     $@ and BAIL_OUT "Failed to start database: $@\n";
 88 |     {
 89 |         last if -e $pidfile;
 90 |         sleep 0.1;
 91 |         redo;
 92 |     }
 93 |     ## Wait for "ready to accept connections"
 94 |     open my $fh, '<', $logfile or die qq{Could not open "$logfile": $!\n};
 95 |     seek $fh, -100, 2;
 96 |     LOOP: {
 97 |           while (<$fh>) {
 98 |               last LOOP if /system is ready/;
 99 |           }
100 |           sleep 0.1;
101 |           seek $fh, 0, 1;
102 |           redo;
103 |       }
104 |     close $fh or die qq{Could not close "$logfile": $!\n};
105 | }
106 | 
107 | ## Start tracking things sent to the logfile.
108 | ## Write a copy, so we only get things since we started up.
109 | my $testlog = 'test.pg.log';
110 | open my $tfh, '>', $testlog or die qq{Could not open "$testlog": $!\n};
111 | open my $lfh, '<', $logfile or die qq{Could not open "$logfile": $!\n};
112 | seek $lfh, 0, 2;
113 | 
114 | ## Send a few commands to the backend, then test basic functionality
115 | my $dbh = DBI->connect('dbi:Pg:port=5555;dbname=postgres', '', '', {AutoCommit=>1, RaiseError=>1});
116 | 
117 | $dbh->do("SELECT 999");
118 | $dbh->do("SELECT 888");
119 | $dbh->do("SELECT 777");
120 | 
121 | $dbh->do("SELECT pg_client_encoding()");
122 | 
123 | update_log_copy();
124 | 
125 | $info = qx{perl pgsi.pl --file $testlog};
126 | 
127 | ## Got the standard header?
128 | $t=q{pgsi returned the expected header when run};
129 | like ($info, qr{Query System Impact}, $t);
130 | 
131 | ## Got the proper count?
132 | $t=q{pgsi returned the expected count};
133 | like ($info, qr{^\Q3<br />}ms, $t);
134 | 
135 | $t=q{pgsi returned the expected query};
136 | like ($info, qr{SELECT</span>.+\?}ms, $t);
137 | 
138 | $t=q{pgsi returned the expected query};
139 | like ($info, qr{SELECT</span> pg_client_encoding\(\)}ms, $t);
140 | 
141 | $t=q{pgsi returned an average duration line};
142 | if ($info =~ qr{^(\d+)\.\d+ ms<br />}ms) {
143 |     pass ($t);
144 | }
145 | else {
146 |     fail ($t);
147 | }
148 | 
149 | close $tfh or die qq{Could not close "$testlog": $!\n};
150 | 
151 | exit;
152 | 
153 | sub update_log_copy {
154 | 
155 |     my $action = shift || 0;
156 | 
157 |     seek $lfh, 0, 1;
158 |     while (<$lfh>) {
159 |         print $tfh $_;
160 |     }
161 |     return;
162 | 
163 | } ## end of update_log_copy
164 | 


--------------------------------------------------------------------------------
/t/99_perlcritic.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
  3 | 
  4 | ## Run Perl::Critic against the source code and the tests
  5 | ## This is highly customized, so take with a grain of salt
  6 | ## Requires TEST_CRITIC to be set
  7 | 
  8 | use 5.006;
  9 | use strict;
 10 | use warnings;
 11 | use Test::More;
 12 | use Data::Dumper;
 13 | use IO::Handle;
 14 | *STDOUT->autoflush(1);
 15 | *STDERR->autoflush(1);
 16 | 
 17 | my (@testfiles,%fileslurp,$t);
 18 | 
 19 | if (!$ENV{TEST_CRITIC}) {
 20 |     plan skip_all => 'Set the environment variable TEST_CRITIC to enable this test';
 21 | }
 22 | elsif (!eval { require Perl::Critic; 1 }) {
 23 |     plan skip_all => 'Could not find Perl::Critic';
 24 | }
 25 | elsif ($Perl::Critic::VERSION < 0.23) {
 26 |     plan skip_all => 'Perl::Critic must be version 0.23 or higher';
 27 | }
 28 | else {
 29 |     $ENV{LANG} = 'C';
 30 |     opendir my $dir, 't' or die qq{Could not open directory 't': $!\n};
 31 |     @testfiles = map { "t/$_" } grep { /^.+\.(t|pl)$/ } readdir $dir;
 32 |     closedir $dir or die qq{Could not closedir "$dir": $!\n};
 33 | 
 34 |     my $testmore = 0;
 35 |     for my $file (@testfiles) {
 36 |         open my $fh, '<', $file or die qq{Could not open "$file": $!\n};
 37 |         my $line;
 38 |         while (defined($line = <$fh>)) {
 39 |             last if $line =~ /__DATA__/; ## perlcritic.t
 40 |             for my $func (qw/ok isnt pass fail cmp cmp_ok is_deeply unlike like/) { ## no skip
 41 |                 next if $line !~ /\b$func\b/;
 42 |                 next if $line =~ /$func \w/; ## e.g. 'skip these tests'
 43 |                 next if $line =~ /[\$\%]$func/; ## e.g. $ok %ok
 44 |                 $fileslurp{$file}{$.}{$func} = $line;
 45 |                 $testmore++;
 46 |             }
 47 |         }
 48 |         close $fh or die qq{Could not close "$file": $!\n};
 49 |     }
 50 |     plan tests => 3 + @testfiles + $testmore;
 51 | }
 52 | ok (@testfiles, 'Found files in test directory');
 53 | 
 54 | ## Make sure all Test::More function calls are standardized
 55 | for my $file (sort keys %fileslurp) {
 56 |     for my $linenum (sort {$a <=> $b} keys %{$fileslurp{$file}}) {
 57 |         for my $func (sort keys %{$fileslurp{$file}{$linenum}}) {
 58 |             $t=qq{Test::More method "$func" is in standard format inside $file at line $linenum};
 59 |             ## Must be at start of line (optional whitespace and comment), a space, a paren, and something interesting
 60 |             like ($fileslurp{$file}{$linenum}{$func}, qr{^\s*#?$func \(['\S]}, $t);
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | ## Check some non-test files
 66 | my $critic = Perl::Critic->new(-severity => 1);
 67 | 
 68 | for my $filename (qw{pgsi.pl}) {
 69 | 
 70 |     if ($ENV{TEST_CRITIC_SKIPNONTEST}) {
 71 |         pass (qq{Skipping non-test file "$filename"});
 72 |         next;
 73 |     }
 74 | 
 75 |     -e $filename or die qq{Could not find "$filename"!};
 76 |     open my $oldstderr, '>&', \*STDERR or die 'Could not dupe STDERR';
 77 |     close STDERR or die qq{Could not close STDERR: $!};
 78 |     my @vio = $critic->critique($filename);
 79 |     open STDERR, '>&', $oldstderr or die 'Could not recreate STDERR'; ## no critic
 80 |     close $oldstderr or die qq{Could not close STDERR copy: $!};
 81 |     my $vios = 0;
 82 |   VIO: for my $v (@vio) {
 83 |         my $d = $v->description();
 84 |         (my $policy = $v->policy()) =~ s/Perl::Critic::Policy:://;
 85 |         my $source = $v->source();
 86 | 
 87 |         ## Allow our sql and qw blocks to have tabs:
 88 |         next if $policy =~ /ProhibitHardTabs/ and ($source =~ /sql = qq/i or $source =~ /qw[\(\/]/);
 89 | 
 90 |         $vios++;
 91 |         my $f = $v->filename();
 92 |         my $l = $v->location();
 93 |         my $line = $l->[0];
 94 |         diag "\nFile: $f (line $line)\n";
 95 |         diag "Vio: $d\n";
 96 |         diag "Policy: $policy\n";
 97 |         diag "Source: $source\n\n";
 98 |     }
 99 |     if ($vios) {
100 |         fail (qq{Failed Perl::Critic tests for file "$filename": $vios});
101 |     }
102 |     else {
103 |         pass (qq{File "$filename" passed all Perl::Critic tests});
104 |     }
105 | 
106 | }
107 | 
108 | ## Specific exclusions for test scripts:
109 | my %ok =
110 |     (yaml => {
111 |               sub => 'meta_spec_ok',
112 |               },
113 |      pod => {
114 |              sub => 'pod_file_ok pod_coverage_ok',
115 |              },
116 |      signature => {
117 |              sub => 'verify SIGNATURE_OK',
118 |              },
119 | );
120 | for my $f (keys %ok) {
121 |     for my $ex (keys %{$ok{$f}}) {
122 |         if ($ex eq 'sub') {
123 |             for my $foo (split /\s+/ => $ok{$f}{sub}) {
124 |                 push @{$ok{$f}{OK}} => qr{Subroutine "$foo" (?:is neither|not exported)};
125 |             }
126 |         }
127 |         else {
128 |             die "Unknown exception '$ex'\n";
129 |         }
130 |     }
131 | }
132 | 
133 | ## Allow Test::More subroutines
134 | my $tm = join '|' => (qw/skip plan pass fail is ok diag BAIL_OUT/);
135 | my $testmoreok = qr{Subroutine "$tm" is neither};
136 | 
137 | ## Create a new critic for the tests
138 | $critic = Perl::Critic->new(-severity => 1);
139 | 
140 | my $count = 1;
141 | for my $filename (sort @testfiles) {
142 |     -e $filename or die qq{Could not find "$filename"!};
143 |     my @vio = $critic->critique($filename);
144 |     my $vios = 0;
145 |   VIO: for my $v (@vio) {
146 |         my $d = $v->description();
147 |         (my $policy = $v->policy()) =~ s/Perl::Critic::Policy:://;
148 |         my $source = $v->source();
149 |         my $f = $v->filename();
150 | 
151 |         ## Skip common Test::More subroutines:
152 |         next if $d =~ $testmoreok;
153 | 
154 |         ## Skip other specific items:
155 |         for my $k (sort keys %ok) {
156 |             next unless $f =~ /$k/;
157 |             for (@{$ok{$k}{OK}}) {
158 |                 next VIO if $d =~ $_;
159 |             }
160 |         }
161 | 
162 |         ## Skip included file package warning
163 |         next if $policy =~ /RequireExplicitPackage/ and $filename =~ /setup/;
164 | 
165 |         $vios++;
166 |         my $l = $v->location();
167 |         my $line = $l->[0];
168 |         diag "\nFile: $f (line $line)\n";
169 |         diag "Vio: $d\n";
170 |         diag "Policy: $policy\n";
171 |         diag "Source: $source\n\n";
172 |     }
173 |     if ($vios) {
174 |         fail (qq{Failed Perl::Critic tests for file "$filename": $vios});
175 |     }
176 |     else {
177 |         pass (qq{File "$filename" passed all Perl::Critic tests});
178 |     }
179 | }
180 | 
181 | pass ('Finished Perl::Critic testing');
182 | 
183 | 


--------------------------------------------------------------------------------
/pgsi.html:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 | <html xmlns="http://www.w3.org/1999/xhtml">
  4 | <head>
  5 | <title>pgsi.pl - Produce system impact reports for a PostgreSQL database.</title>
  6 | <meta http-equiv="content-type" content="text/html; charset=utf-8" />
  7 | 
  8 | </head>
  9 | 
 10 | <body style="background-color: white">
 11 | 
 12 | 
 13 | <!-- INDEX BEGIN -->
 14 | <div name="index">
 15 | <p><a name="__index__"></a></p>
 16 | 
 17 | <ul>
 18 | 
 19 | 	<li><a href="#name">NAME</a></li>
 20 | 	<li><a href="#version">VERSION</a></li>
 21 | 	<li><a href="#usage">USAGE</a></li>
 22 | 	<li><a href="#description">DESCRIPTION</a></li>
 23 | 	<ul>
 24 | 
 25 | 		<li><a href="#log_data">Log Data</a></li>
 26 | 		<li><a href="#options">Options</a></li>
 27 | 	</ul>
 28 | 
 29 | 	<li><a href="#bugs">BUGS</a></li>
 30 | 	<li><a href="#author">AUTHOR</a></li>
 31 | 	<li><a href="#license_and_copyright">LICENSE AND COPYRIGHT</a></li>
 32 | </ul>
 33 | 
 34 | <hr name="index" />
 35 | </div>
 36 | <!-- INDEX END -->
 37 | 
 38 | <p>
 39 | </p>
 40 | <h1><a name="name">NAME</a></h1>
 41 | <p>pgsi.pl - Produce system impact reports for a PostgreSQL database.</p>
 42 | <p>
 43 | </p>
 44 | <hr />
 45 | <h1><a name="version">VERSION</a></h1>
 46 | <p>This documentation refers to version 1.7.2</p>
 47 | <p>
 48 | </p>
 49 | <hr />
 50 | <h1><a name="usage">USAGE</a></h1>
 51 | <p>pgsi.pl [options] &lt; pglog_slice.log</p>
 52 | <pre>
 53 |  or...</pre>
 54 | <p>pgsi.pl --file pglog_slice.log [options]</p>
 55 | <dl>
 56 | <dt><strong><a name="options" class="item">Options</a></strong></dt>
 57 | 
 58 | <dd>
 59 | <pre>
 60 |  --file
 61 |  --query-types
 62 |  --top-10
 63 |  --all
 64 |  --pg-version
 65 |  --offenders</pre>
 66 | </dd>
 67 | </dl>
 68 | <p>
 69 | </p>
 70 | <hr />
 71 | <h1><a name="description">DESCRIPTION</a></h1>
 72 | <p>System Impact (SI) is a measure of the overall load a given query imposes on a
 73 | server. It is expressed as a percentage of a query's average duration over its
 74 | average interval between successive calls. E.g., SI=80 indicates that a given
 75 | query is active 80% of the time during the entire log interval. SI=200
 76 | indicates the query is running twice at all times on average. Thus, the lower
 77 | the SI, the better.</p>
 78 | <p>The goal of SI is to identify those queries most likely to cause performance
 79 | degradation on the database during heaviest traffic periods. Focusing
 80 | exclusively on the least efficient queries can hide relatively fast-running
 81 | queries that saturate the system more because they are called far more
 82 | frequently. By contrast, focusing only on the most-frequently called queries
 83 | will tend to emphasize small, highly optimized queries at the expense of
 84 | slightly less popular queries that spend much more of their time between
 85 | successive calls in an active state. These are often smaller queries that have
 86 | failed to be optimized and punish a system severely under heavy load.</p>
 87 | <p>One thing SI does not do is distinguish between high-value queries represented
 88 | by extended active states or long durations due to blocking locks. Either
 89 | condition is worthy of attention, but determining which is to blame will
 90 | require independent investigation.</p>
 91 | <p>Queries are canonized with placeholders representing literals or arguments.
 92 | Further, IN lists are canonized so that variation from query to query only
 93 | in the number of elements in the IN list will not be treated as distinct
 94 | queries.</p>
 95 | <p>Some examples of the &quot;same&quot; query:</p>
 96 | <ul>
 97 | <li>
 98 | <pre>
 99 |  SELECT col FROM table WHERE code = 'One';
100 |  SELECT col FROM table WHERE code = 'Sixty-Three';</pre>
101 | </li>
102 | <li>
103 | <pre>
104 |  SELECT foo FROM bar WHERE fuzz = $1 AND color IN ('R','G','B');
105 |  Select FOO
106 |  from bar
107 |  WhErE fuzz = '56'
108 |      AND color IN ('R', $1);</pre>
109 | </li>
110 | </ul>
111 | <p>Differences in capitalization and whitespace are irrelevant.</p>
112 | <p>
113 | </p>
114 | <h2><a name="log_data">Log Data</a></h2>
115 | <p>Pass in log data on stdin:</p>
116 | <pre>
117 |     pgsi.pl &lt; some_log_slice.log
118 |     cat some_log_slice.log | pgsi.pl</pre>
119 | <p>Or use the --file option:</p>
120 | <pre>
121 |     pgsi.pl --file=some_log_slice.log</pre>
122 | <p>Or read in more than one file at a time:</p>
123 | <pre>
124 |     pgsi.pl --file=logfile1.log --file=logfile2.log</pre>
125 | <p>If more than one file is given, they must be given in chronological order.</p>
126 | <p>Log data must comply with a specific format and must be from contiguous
127 | activity. The code makes the assumption that the overall interval of activity
128 | is the time elapsed between the first and last log entries. If there are
129 | several blocks of logs to analyze, they must be run separately.</p>
130 | <p>Required format is the following in syslog:</p>
131 | <p>YYYY-MM-DDTHH24:MI:SS(-TZ:00)? server postgres[<em>pid</em>]:</p>
132 | <p>This also requires that log_statement is set to 'all' and 
133 | that log_duration be set to 'on' in postgresql.conf.
134 | If you are not using syslog, you can simulate the format with the following:</p>
135 | <p>log_line_prefix  = '%t %h postgres[%p]: [%l-1] ' ## Simulate syslog for pgsi.</p>
136 | <p>
137 | </p>
138 | <h2><a name="options">Options</a></h2>
139 | <dl>
140 | <dt><strong><a name="query_types" class="item">--query-types</a></strong></dt>
141 | 
142 | <dd>
143 | <p>Query impact is segregated by types. I.e., all the SELECTs together, all
144 | UPDATEs together, etc. Typically it is assumed that SELECT is the most
145 | interesting (and is by itself the default), but any query type may be analyzed.
146 | Multiples are provided as space- or comma-separated lists.</p>
147 | <pre>
148 |     pgsi.pl --query-types=&quot;select, update, copy, create&quot;</pre>
149 | <p>The code will produce a unique report for each type when used with the --all
150 | and/or --top-10 file-pattern options (see below).</p>
151 | </dd>
152 | <dt><strong><a name="top_10_all" class="item">--top-10, --all</a></strong></dt>
153 | 
154 | <dd>
155 | <p>Supplies a file <em>pattern</em> and optional directory path into which the reports
156 | should be written per --query-type. The pattern is prefixed with the
157 | --query-type and host for this report and placed into the requested directory
158 | (or cwd if no path is present).</p>
159 | <p>--all will list every canonized query encountered, which is likely to
160 | contain a large number of queries of no interest (those with negligible
161 | impact).</p>
162 | <p>--top-10 limits the report to only the 10 entries with the greatest SI.</p>
163 | <pre>
164 |     pgsi.pl \
165 |         --query-types=select,update \
166 |         --all=si_reports/monday_10am_1pm.all.txt \
167 |         --top-10=si_reports/monday_10am_1pm.t10.txt</pre>
168 | <p>This will produce the following reports in si_reports/ for a database running
169 | on server db1:</p>
170 | <pre>
171 |     SELECT-db1-monday_10am_1pm.all.txt
172 |     UPDATE-db1-monday_10am_1pm.all.txt
173 |     SELECT-db1-monday_10am_1pm.t10.txt
174 |     UPDATE-db1-monday_10am_1pm.t10.txt</pre>
175 | <p>If --top-10 is not supplied, then no top 10 report is generated. If --all is
176 | not supplied, then the report(s) print to stdout.</p>
177 | </dd>
178 | <dt><strong><a name="pg_version" class="item">--pg-version</a></strong></dt>
179 | 
180 | <dd>
181 | <p>Currently, this might better be described as either &quot;before DETAIL&quot; or &quot;after
182 | DETAIL&quot;. The code was written against PG 8.1 originally, but when 8.2 came out
183 | the addition of DETAIL log entries forced a different parser. That unfortunate
184 | timing led to the assumption that log construction would change with each
185 | release. Going forward, --pg-version will be (other than 8.1) the first version
186 | in which this log format was encountered.</p>
187 | <p>--pg-version is only either 8.1 or 8.2 (8.2 is default). It's unknown how far
188 | back in versions the 8.1 format holds, but 8.2 holds for itself and 8.3. So,
189 | unless you're working against logs generated by a PG version less than 8.2, you
190 | do not need to include this option (but it might save you some trouble if a new
191 | format comes at a later version and the default bumps up to the most recent
192 | while you stay on your older version).</p>
193 | <pre>
194 |     pgsi.pl --pg-version=8.1</pre>
195 | </dd>
196 | <dt><strong><a name="offenders" class="item">--offenders</a></strong></dt>
197 | 
198 | <dd>
199 | <p>Number of best and worst queries to included with the report, in terms of
200 | overall duration of execution. Enough log information is listed along with the
201 | duration such that tracking down the original query (not the canonized
202 | version) is straightforward. The offenders list can be very useful for a query
203 | that is causing trouble in a handful of permutations, but most of the time is
204 | behaving well.</p>
205 | <p>The list in conjunction with standard deviation gives an overall indication of
206 | performance volatility.</p>
207 | <p>--offenders=5 produces additional output in the report that looks something
208 | like the following example:</p>
209 | <pre>
210 |  Best
211 |    1. 2009-01-12T10:11:49-07:00 db1 postgres[4692] -- 4.833 ms
212 |    2. 2009-01-12T10:31:19-07:00 db1 postgres[1937] -- 4.849 ms
213 |    3. 2009-01-12T09:16:20-07:00 db1 postgres[20294] -- 4.864 ms
214 |    4. 2009-01-12T10:16:54-07:00 db1 postgres[20955] -- 4.867 ms
215 |    5. 2009-01-12T10:32:16-07:00 db1 postgres[5010] -- 4.871 ms</pre>
216 | <pre>
217 |  Worst
218 |    1. 2009-01-12T10:00:07-07:00 db1 postgres[2804] -- 2175.650 ms
219 |    2. 2009-01-12T09:30:07-07:00 db1 postgres[2804] -- 2090.914 ms
220 |    3. 2009-01-12T10:00:18-07:00 db1 postgres[2804] -- 2046.608 ms
221 |    4. 2009-01-12T09:30:10-07:00 db1 postgres[2804] -- 1954.604 ms
222 |    5. 2009-01-12T11:20:11-07:00 db1 postgres[2804] -- 1788.576 ms</pre>
223 | </dd>
224 | </dl>
225 | <p>
226 | </p>
227 | <hr />
228 | <h1><a name="bugs">BUGS</a></h1>
229 | <ul>
230 | <li>
231 | <p>If queries contain exceptionally long IN lists, the regex that attempts to
232 | flatten them can run into a perl recursion limit. In that event, the query will
233 | keep the placeholders of the IN list, making it unique compared to the same
234 | query with a different cardinality of list params in the same IN. This
235 | deficiency should only surface on IN lists with composite parameters [e.g., IN
236 | ((?,?,...,?),(?,?,...,?),...,(?,?,...,?))]. For scalar IN lists, there should
237 | be no such limit.</p>
238 | </li>
239 | </ul>
240 | <p>
241 | </p>
242 | <hr />
243 | <h1><a name="author">AUTHOR</a></h1>
244 | <p>Original code:
245 |     Mark Johnson (<a href="mailto:mark@endpoint.com">mark@endpoint.com</a>), End Point Corp.</p>
246 | <p>Contributions:
247 |     Ethan Rowe (<a href="mailto:ethan@endpoint.com">ethan@endpoint.com</a>), End Point Corp.
248 |     Greg Sabino Mullane (<a href="mailto:greg@endpoint.com">greg@endpoint.com</a>), End Point Corp.
249 |     Daniel Browning (<a href="mailto:db@endpoint.com">db@endpoint.com</a>), End Point Corp.
250 |     Joshua Tolley &lt;<a href="mailto:josh@endpoint.com">josh@endpoint.com</a>&gt;, End Point Corp.
251 |     Abraham Ingersoll &lt;<a href="mailto:abe@abe.us">abe@abe.us</a>&gt;</p>
252 | <p>
253 | </p>
254 | <hr />
255 | <h1><a name="license_and_copyright">LICENSE AND COPYRIGHT</a></h1>
256 | <p>Copyright 2008-2011 Mark Johnson (<a href="mailto:mark@endpoint.com">mark@endpoint.com</a>)</p>
257 | <p>This module is free software; you can redistribute it and/or modify it
258 | under the same terms as Perl itself. See the LICENSE file.</p>
259 | 
260 | </body>
261 | 
262 | </html>
263 | 


--------------------------------------------------------------------------------
/pgsi.pl:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env perl
   2 | # -*-mode:cperl; indent-tabs-mode: nil-*-
   3 | 
   4 | ## Parse Postgres logs and determine the system impact
   5 | ##
   6 | ## Usage: pgsi.pl [options] < pglog_slice.log
   7 | ##
   8 | ## See the POD inside this file for full documentation:
   9 | ## perldoc pgsi.pl
  10 | ##
  11 | ## Mark Johnson <mark@endpoint.com>
  12 | 
  13 | package PGSI;
  14 | 
  15 | use strict;
  16 | use warnings;
  17 | use Data::Dumper qw( Dumper );
  18 | 
  19 | use Time::Local  qw();
  20 | use Getopt::Long;
  21 | use IO::Handle;
  22 | use 5.008003;
  23 | 
  24 | our $VERSION = '1.7.2';
  25 | 
  26 | *STDOUT->autoflush(1);
  27 | *STDERR->autoflush(1);
  28 | 
  29 | my $resolve_called = 0;
  30 | 
  31 | my (
  32 |     %query,
  33 |     %canonical_q,
  34 |     $first_line,
  35 |     $last_line,
  36 |     %seen,
  37 | );
  38 | 
  39 | my %opt = (
  40 |     'top-10'      => '',
  41 |     'all'         => '',
  42 |     'query-types' => '',
  43 |     'pg-version'  => '',
  44 |     'offenders'   => 0,
  45 |     'verbose'     => 0,
  46 |     'format'      => 'html',
  47 |     'mode'        => 'pid',
  48 |     'color'       => 1,
  49 |     'quiet'       => 0,
  50 |     'interval'    => undef,
  51 | );
  52 | 
  53 | my $USAGE = qq{Usage: $0 -f filename [options]\n};
  54 | 
  55 | GetOptions ( ## no critic
  56 |     \%opt,
  57 |     (
  58 |         'top-10=s',
  59 |         'all=s',
  60 |         'query-types=s',
  61 |         'pg-version=s',
  62 |         'offenders=i',
  63 |         'version',
  64 |         'help',
  65 |         'verbose+',
  66 |         'file|f=s@',
  67 |         'format=s',
  68 |         'mode=s',
  69 |         'color!',
  70 |         'quiet',
  71 |         'interval=s',
  72 |     )
  73 | ) or die $USAGE;
  74 | 
  75 | if ($opt{version}) {
  76 |     print "$0 version $VERSION\n";
  77 |     exit 0;
  78 | }
  79 | 
  80 | if ($opt{help}) {
  81 |     print $USAGE;
  82 |     print "Full documentation at: http://bucardo.org/wiki/pgsi\n";
  83 |     exit 0;
  84 | }
  85 | 
  86 | ## Prepare formatting vars based on opt{format}
  87 | ## The default is 'html':
  88 | my $fmstartbold = q{<b>};
  89 | my $fmendbold = q{</b>};
  90 | my $fmstartheader1 = q{<h1>};
  91 | my $fmendheader1 = q{</h1>};
  92 | my $fmstartheader2 = q{<h2>};
  93 | my $fmendheader2 = q{</h2>};
  94 | my $fmstartheader3 = q{<h3>};
  95 | my $fmendheader3 = q{</h3>};
  96 | my $fmsep = q{<hr />};
  97 | my $fmstartquery = '<pre>';
  98 | my $fmendquery = '</pre>';
  99 | if ($opt{format} eq 'mediawiki') {
 100 |     $fmstartbold    = q{'''};
 101 |     $fmendbold      = q{'''};
 102 |     $fmstartheader1 = q{==};
 103 |     $fmendheader1   = q{==};
 104 |     $fmstartheader2 = q{====};
 105 |     $fmendheader2   = q{====};
 106 |     $fmstartheader3 = q{=====};
 107 |     $fmendheader3   = q{=====};
 108 |     $fmsep          = q{----};
 109 |     $fmstartquery   = '  ';
 110 |     $fmendquery     = '';
 111 | }
 112 | 
 113 | if ($opt{format} eq 'tsv') {
 114 |     $fmstartbold    = q{};
 115 |     $fmendbold      = q{};
 116 |     $fmstartheader1 = q{};
 117 |     $fmendheader1   = q{};
 118 |     $fmstartheader2 = q{};
 119 |     $fmendheader2   = q{};
 120 |     $fmstartheader3 = q{};
 121 |     $fmendheader3   = q{};
 122 |     $fmsep          = q{};
 123 |     $fmstartquery   = '';
 124 |     $fmendquery     = '';
 125 | }
 126 | 
 127 | my $minwrap1 = 80;
 128 | my $minwrap2 = 40; ## WHEN .. THEN
 129 | 
 130 | ## Any keywords after the first
 131 | my $indent1 = ' ' x 1;
 132 | 
 133 | ## Wrapping of long lines of a single type
 134 | my $indent2 = ' ' x 3;
 135 | 
 136 | ## Secondary wrapping of a single type
 137 | my $indent3 = ' ' x 5;
 138 | 
 139 | ## Even more indenting
 140 | my $indent4 = ' ' x 7;
 141 | 
 142 | ## Special strings for internal comments
 143 | my $STARTCOMMENT = "startpgsicomment";
 144 | my $ENDCOMMENT = "endpgsicomment";
 145 | 
 146 | ## We either read from a file or from stdin
 147 | my (@fh, $fh);
 148 | if ($opt{file}) {
 149 |     my $x = 0;
 150 |     my %dupe;
 151 |     for my $file (@{$opt{file}}) {
 152 |         if ($dupe{$file}++) {
 153 |             die "File specified more than once: $file\n";
 154 |         }
 155 |         open $fh[$x], '<', $file or die qq{Could not open "$file": $!\n};
 156 |         $x++;
 157 |     }
 158 | }
 159 | else {
 160 |     push @fh => \*STDIN;
 161 | }
 162 | 
 163 | for (@fh) {
 164 |     $fh = $_;
 165 |     ## Do the actual parsing. Depends on what kind of log file we have
 166 |     if ('pid' eq $opt{mode}) {
 167 |         parse_pid_log();
 168 |     }
 169 |     elsif ('syslog' eq $opt{mode}) {
 170 |         parse_syslog_log();
 171 |     }
 172 |     elsif ('csv' eq $opt{mode}) {
 173 |         parse_csv_log();
 174 |     }
 175 |     elsif ('bare' eq $opt{mode}) {
 176 |         parse_bare_log();
 177 |     }
 178 |     else {
 179 |         die qq{Unknown mode: $opt{mode}\n};
 180 |     }
 181 | }
 182 | 
 183 | 
 184 | sub parse_csv_log {
 185 | 
 186 |     ## Each line of interest, with PIDs as the keys
 187 |     my %logline;
 188 | 
 189 |     ## The last PID we saw. Used to populate multi-line statements correctly.
 190 |     my $lastpid = 0;
 191 | 
 192 |     require Text::CSV_XS;
 193 |     my $csv = Text::CSV_XS->new({ binary => 1 }) or die;
 194 |     $csv->column_names(qw(log_time user_name database_name process_id connection_from session_id session_line_num command_tag session_start_time virtual_transaction_id transaction_id error_severity sql_state_code message detail hint internal_query internal_query_pos context query query_pos location application_name));
 195 |     while (my $line = $csv->getline_hr($fh)) {
 196 | 
 197 |         if ($opt{verbose} >= 2) {
 198 |             warn "Checking line (" . Dumper($line) . ")\n";
 199 |         }
 200 | 
 201 |         my $date = $line->{log_time};
 202 |         my $pid = $line->{process_id};
 203 |         my $more = $line->{message};
 204 | 
 205 |         ## All we care about is statements and durations
 206 | 
 207 |         ## Got a duration? Store it for this PID and move on
 208 |         if ($more =~ /duration: (\d+\.\d+) ms$/o) {
 209 |             my $duration = $1;
 210 |             ## Store this duration, overwriting what is (presumably) -1
 211 |             $logline{$pid}{duration} = $duration;
 212 |             next;
 213 |         }
 214 | 
 215 |         ## Got a statement with optional duration
 216 |         ## Handle the old statement and store the new
 217 |         if ($more =~ /(?:duration: (\d+\.\d+) ms  )?statement:\s+(.+)/o) {
 218 | 
 219 |             my ($duration,$statement) = ($1,$2);
 220 | 
 221 |             ## Make sure any subsequent multi-line statements know where to go
 222 |             $lastpid = $pid;
 223 | 
 224 |             ## If this PID has something stored, process it first
 225 |             if (exists $logline{$pid}) {
 226 |                  resolve_pid_statement($logline{$pid});
 227 |             }
 228 | 
 229 |             ## Store and blow away any old value
 230 |             $logline{$pid} = {
 231 |                 line      => $line,
 232 |                 statement => $statement,
 233 |                 duration  => -1,
 234 |                 date      => $date,
 235 |             };
 236 | 
 237 |             if (defined $duration) {
 238 |                 $logline{$pid}{duration} = $duration;
 239 |             }
 240 | 
 241 |             ## Make sure we have a first and a last line
 242 |             if (not defined $first_line) {
 243 |                 $first_line = $last_line = $line;
 244 |             }
 245 | 
 246 |         } ## end duration + statement
 247 | 
 248 |     } ## end each line
 249 | 
 250 |     defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
 251 | 
 252 |     ## Process any PIDS that are left
 253 |     for my $pid (keys %logline) {
 254 |         resolve_pid_statement($logline{$pid});
 255 |     }
 256 | 
 257 |     ## Store the last PID seen as the last line
 258 |     $last_line = $logline{$lastpid}{line};
 259 | 
 260 |     return;
 261 | 
 262 | } ## end of parse_csv_log
 263 | 
 264 | 
 265 | sub parse_pid_log {
 266 | 
 267 |     ## Parse a log file in which the pid appears in the log_line_prefix
 268 |     ## and multi-line statements start with tabs
 269 | 
 270 |     ## Each line of interest, with PIDs as the keys
 271 |     my %logline;
 272 | 
 273 |     ## The last PID we saw. Used to populate multi-line statements correctly.
 274 |     my $lastpid = 0;
 275 | 
 276 |     ## We only store multi-line if the previous real line was a log: statement
 277 |     my $lastwaslog=0;
 278 | 
 279 |     while (my $line = <$fh>) {
 280 | 
 281 |         if ($opt{verbose} >= 2) {
 282 |             chomp $line;
 283 |             warn "Checking line ($line)\n";
 284 |         }
 285 | 
 286 |         ## There are only two possiblities we care about:
 287 |         ## 1. tab-prefixed line (continuation of a literal)
 288 |         ## 2. new date-and-pid-prefixed line
 289 | 
 290 |         if ($line =~ /^\t(.*)/) {
 291 |             ## If the last real line was a statement, append this to last PID seen
 292 |             if ($lastwaslog) {
 293 |                 (my $extra = $1) =~ s/^\s+//;
 294 |                 ## If a comment, treat carefully
 295 |                 $extra =~ s/^(\s*\-\-.+)/$STARTCOMMENT $1 $ENDCOMMENT /;
 296 |                 $logline{$lastpid}{statement} .= " $extra";
 297 |             }
 298 |             next;
 299 |         }
 300 | 
 301 |         ## Got a valid PID line?
 302 |         if ($line =~ /^(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d)\D+(\d+)\s*(.+)/o) {
 303 | 
 304 |             my ($date,$pid,$more) = ($1,$2,$3);
 305 | 
 306 |             ## Example:
 307 |             ## 2009-12-03 08:12:05 PST 11717 4b17e355.2dc5 127.0.0.1 dbuser dbname LOG:  statement: SELECT ...
 308 | 
 309 |             ## Reset the last log indicator
 310 |             $lastwaslog = 0;
 311 | 
 312 |             ## All we care about is statements and durations
 313 |             next if ($more =~ /LOG:  (?:duration: (\d+\.\d+) ms  )?(?:bind|parse) [^:]+:.*/o);
 314 | 
 315 |             ## Got a duration? Store it for this PID and move on
 316 |             if ($more =~ /LOG:  duration: (\d+\.\d+) ms$/o) {
 317 |                 my $duration = $1;
 318 |                 ## Store this duration, overwriting what is (presumably) -1
 319 |                 $logline{$pid}{duration} = $duration;
 320 |                 next;
 321 |             }
 322 | 
 323 |             ## Got a statement with optional duration
 324 |             ## Handle the old statement and store the new
 325 |             if ($more =~ /
 326 |                             LOG:\s\s
 327 |                             (?:duration:\s(\d+\.\d+)\sms\s\s)?
 328 |                             ((?:(?:statement)|(?:execute\s[^:]+)))
 329 |                             :(.*)$
 330 |                          /x) {
 331 | 
 332 |                 my ($duration,$isexecute,$statement) = ($1,$2,$3);
 333 | 
 334 |                 ## Slurp in any multi-line continuatiouns after this
 335 |                 $lastwaslog = 1;
 336 | 
 337 |                 ## Make sure any subsequent multi-line statements know where to go
 338 |                 $lastpid = $pid;
 339 | 
 340 |                 ## If this PID has something stored, process it first
 341 |                 if (exists $logline{$pid}) {
 342 |                     resolve_pid_statement($logline{$pid});
 343 |                 }
 344 | 
 345 |                 ## Store and blow away any old value
 346 |                 $logline{$pid} = {
 347 |                     line      => $line,
 348 |                     statement => $statement,
 349 |                     duration  => -1,
 350 |                     date      => $date,
 351 |                 };
 352 | 
 353 |                 if (defined $duration) {
 354 |                     $logline{$pid}{duration} = $duration;
 355 |                 }
 356 | 
 357 |                 ## Make sure we have a first and a last line
 358 |                 if (not defined $first_line) {
 359 |                     $first_line = $last_line = $line;
 360 |                 }
 361 | 
 362 |             } ## end LOG: statement
 363 | 
 364 |             next;
 365 | 
 366 |         } ## end if valid PID line
 367 | 
 368 |         if ($opt{verbose}) {
 369 |             chomp $line;
 370 |             warn "Invalid line $.: $line\n";
 371 |         }
 372 | 
 373 |     } ## End while
 374 | 
 375 |     defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
 376 | 
 377 |     ## Process any PIDS that are left
 378 |     for my $pid (keys %logline) {
 379 |         resolve_pid_statement($logline{$pid});
 380 |     }
 381 | 
 382 |     ## Store the last PID seen as the last line
 383 |     $last_line = $logline{$lastpid}{line};
 384 | 
 385 |     return;
 386 | 
 387 | } ## end of parse_pid_log
 388 | 
 389 | sub parse_bare_log {
 390 | 
 391 |     ## Parse a log file in which multi-line statements start with tabs
 392 | 
 393 |     ## Each line of interest
 394 |     my @logline;
 395 | 
 396 |     my $more;
 397 | 
 398 |     ## We only store multi-line if the previous real line was a log: statement
 399 |     my $lastwaslog=0;
 400 | 
 401 |     while (my $line = <$fh>) {
 402 | 
 403 |         if ($opt{verbose} >= 2) {
 404 |             chomp $line;
 405 |             warn "Checking line ($line)\n";
 406 |         }
 407 | 
 408 |         ## There are only two possiblities we care about:
 409 |         ## 1. tab-prefixed line (continuation of a literal)
 410 |         ## 2. new line
 411 | 
 412 |         if ($line =~ /^\t(.*)/) {
 413 |             ## If the last real line was a statement, append this to last PID seen
 414 |             if ($lastwaslog) {
 415 |                 (my $extra = $1) =~ s/^\s+//;
 416 |                 ## If a comment, treat carefully
 417 |                 $extra =~ s/^(\s*\-\-.+)/$STARTCOMMENT $1 $ENDCOMMENT /;
 418 |                 $logline[$#logline]->{statement} .= " $extra";
 419 |             }
 420 |             next;
 421 |         }
 422 | 
 423 |         ## Lines will contain "parse <.*>", "bind <.*>", "execute <.*>" or "statement", with a duration
 424 |         if ($line =~ /^LOG: /) {
 425 |             $more = $line;
 426 | 
 427 |             ## Example:
 428 |             ## 2009-12-03 08:12:05 PST 11717 4b17e355.2dc5 127.0.0.1 dbuser dbname LOG:  statement: SELECT ...
 429 | 
 430 |             ## Reset the last log indicator
 431 |             $lastwaslog = 0;
 432 | 
 433 |             ## All we care about is statements/executes and durations
 434 |             next if ($more =~ /LOG:  (?:duration: (\d+\.\d+) ms  )?(?:bind|parse) [^:]+:.*/o);
 435 | 
 436 |             ## Got a duration? Store it for this PID and move on
 437 |             if ($more =~ /LOG:  duration: (\d+\.\d+) ms$/o) {
 438 |                 my $duration = $1;
 439 |                 ## Store this duration, overwriting what is (presumably) -1
 440 |                 $logline[$#logline]->{duration} = $duration;
 441 |                 next;
 442 |             }
 443 | 
 444 |             ## Got a statement with optional duration
 445 |             ## Handle the old statement and store the new
 446 |             if ($more =~ /
 447 |                             LOG:\s\s
 448 |                             (?:duration:\s(\d+\.\d+)\sms\s\s)?
 449 |                             ((?:(?:statement)|(?:execute\s[^:]+)))
 450 |                             :(.*)$
 451 |                          /x) {
 452 | 
 453 |                 my ($duration,$statement, $other) = ($1, $3, $2);
 454 | 
 455 |                 ## Slurp in any multi-line continuatiouns after this
 456 |                 $lastwaslog = 1;
 457 | 
 458 |                 ## resolve whatever's already in this line
 459 |                 resolve_pid_statement($logline[$#logline]) if $#logline > -1;
 460 |                 shift @logline;
 461 | 
 462 |                 ## Store and blow away any old value
 463 |                 push @logline, {
 464 |                     statement => $statement,
 465 |                     duration => $duration,
 466 |                 };
 467 | 
 468 |                 if (defined $duration) {
 469 |                     $logline[$#logline]->{duration} = $duration;
 470 |                 }
 471 | 
 472 |                 ## Make sure we have a first and a last line
 473 |                 if (not defined $first_line) {
 474 |                     $first_line = $last_line = $line;
 475 |                 }
 476 | 
 477 |             } ## end LOG: statement
 478 | 
 479 |             next;
 480 | 
 481 |         } ## end if valid def line
 482 | 
 483 |         if ($opt{verbose}) {
 484 |             chomp $line;
 485 |             warn "Invalid line $.: $line\n";
 486 |         }
 487 | 
 488 |     } ## End while
 489 | 
 490 |     defined $first_line or die qq{Could not find any matching lines: incorrect format??\n};
 491 | 
 492 |     ## Process any PIDS that are left
 493 |     for my $line (@logline) {
 494 |         resolve_pid_statement($line);
 495 |     }
 496 | 
 497 |     ## Store the last PID seen as the last line
 498 |     $last_line = $logline[$#logline]->{statement};
 499 | 
 500 |     return;
 501 | 
 502 | } ## end of parse_def_log
 503 | 
 504 | 
 505 | ## Globals used by syslog: move or refactor
 506 | my ($extract_query_re, $extract_duration_re);
 507 | 
 508 | sub parse_syslog_log {
 509 | 
 510 |     # Todo: Move this out
 511 | 
 512 |     # Build an or-list for regex extraction of
 513 |     # the query types of interest.
 514 |     my $query_types = $opt{'query-types'}
 515 |         ? join (
 516 |             '|',
 517 |             grep { /\w/ && !$seen{$_}++ }
 518 |                 split (
 519 |                     /[,\s]+/,
 520 |                     "\L$opt{'query-types'}"
 521 |             )
 522 |         )
 523 |             : 'select'
 524 |     ;
 525 | 
 526 |     # Partition the top-10 or all file-patterns
 527 |     # into their path and file-name components
 528 |     for my $k ( qw/top-10 all/ ) {
 529 |         local ($_) = delete $opt{$k};
 530 |         my @v =
 531 |             map { defined ($_) ? $_ : '' }
 532 |                 m{(.*/)?(.*)};
 533 |         my @k =
 534 |             map {"$k-$_"} qw/path file/;
 535 |         @opt{@k} = @v;
 536 |     }
 537 | 
 538 |     # Base regex to capture the main pieces of
 539 |     # each log line entry
 540 |     my $statement_re =
 541 |         qr{
 542 |               ^(.+?
 543 |                   postgres\[
 544 |                   (\d+)
 545 |                   \]
 546 |               ):
 547 |               \s+
 548 |               \[
 549 |               \d+
 550 |               -
 551 |               (\d+)
 552 |               \]
 553 |               \s
 554 |               (.*)
 555 |       }xms;
 556 | 
 557 |     # Create the appropriate regex to pull out the actual query depending on the
 558 |     # format implied by --pg-version. Blesses regex into the appropriate namespace
 559 |     # to simplify access to routines that deviate between log formats.
 560 |     $extract_query_re =
 561 |         make_extractor(
 562 |             $opt{'pg-version'},
 563 |             $query_types
 564 |     );
 565 | 
 566 |     # Regex specifically for the concluding duration
 567 |     # entry after a query finishes.
 568 |     $extract_duration_re =
 569 |     qr{
 570 |         log:
 571 |         \s+
 572 |         duration:
 573 |         \s
 574 |         (
 575 |             \d+
 576 |             [.]
 577 |             \d{3}
 578 |         )
 579 |     }ixms;
 580 | 
 581 |     while (my $line = <$fh>) {
 582 | 
 583 |         if ($opt{verbose} >= 2) {
 584 |             chomp $line;
 585 |             warn "Checking line ($line)\n";
 586 |         }
 587 | 
 588 |         # Lines that don't match the basic format are ignored.
 589 |         if ($line !~ $statement_re) {
 590 |             chomp $line;
 591 |             $opt{verbose} and warn "Line $. did not match: $line\n";
 592 |             next;
 593 |         }
 594 | 
 595 |         my ($st_id, $pid, $st_seq, $st_frag) = ($1, $2, $3, $4);
 596 | 
 597 |         $first_line = $last_line = $line
 598 |             unless defined $first_line;
 599 | 
 600 |         # Allows for blocks of log to be unordered. Assumes earliest found timestamp 
 601 |         # is start time, and latest end time, regardless of the order in which 
 602 |         # they're encountered.
 603 |         if (defined $line) {
 604 |             $first_line = $line
 605 |                 if get_timelocal_from_line($line) < get_timelocal_from_line($first_line);
 606 |             $last_line = $line
 607 |                 if get_timelocal_from_line($line) > get_timelocal_from_line($first_line);
 608 |         }
 609 | 
 610 |         my $arr;
 611 | 
 612 |         # Starting a new statement. Close off possible previous one and begin new one.
 613 |         resolve_syslog_stmt($pid)
 614 |             if $st_seq == 1;
 615 | 
 616 |         # Skip any entries that start the log
 617 |         # after their first entry.
 618 |         next unless $arr = $query{$pid}{fragments};
 619 | 
 620 |         # "statement_id" is the earliest timestamp/pid
 621 |         # entry found for the statement in question.
 622 |         # It should suffice for a human to identify
 623 |         # the query within the logfiles.
 624 |         $query{$pid}{statement_id} = $st_id
 625 |             if $st_seq == 1;
 626 | 
 627 |         push (
 628 |             @$arr,
 629 |             $st_frag
 630 |         );
 631 | 
 632 |     } # end while
 633 | 
 634 |     defined $first_line or die qq{Could not find any matching lines: incorrect format?\n};
 635 | 
 636 |     # Go through all entries that haven't been resolved
 637 |     # (indicated by the presence of elements in the
 638 |     # fragments array) and add them to the canonical list.
 639 |     while (my ($pid, $hsh) = each %query) {
 640 |         next unless exists $hsh->{fragments} and @{ $hsh->{fragments} };
 641 |         resolve_syslog_stmt($pid);
 642 |     }
 643 | 
 644 |     return;
 645 | 
 646 | } ## end of parse_syslog_log
 647 | 
 648 | # Determine the server and start/end times
 649 | # from the oldest and newest log lines.
 650 | my ($host, $start_time, $end_time) =
 651 |     log_meta($first_line, $last_line);
 652 | 
 653 | # Calculate the ms interval of all log activity.
 654 | my $log_int_ms = log_interval($start_time, $end_time);
 655 | 
 656 | for my $hsh (values %canonical_q) {
 657 | 
 658 |     # Mean runtime.
 659 |     my $mean = $hsh->{duration} /= $hsh->{count};
 660 | 
 661 |     # Average (mean) time between successive calls.
 662 |     $hsh->{interval} = $opt{interval} || $log_int_ms / $hsh->{count};
 663 | 
 664 |     # SI, expressed as a percent. 100 implies the query
 665 |     # was, on average, constantly running on a single
 666 |     # instance. If interval is invalid (after all,
 667 |     # syslog precision is only 1 second), set to -1
 668 |     $hsh->{sys_impact} =
 669 |         $hsh->{interval} != 0
 670 |             ? 100 * $hsh->{duration} / $hsh->{interval}
 671 |             : -1
 672 |     ;
 673 | 
 674 |     ## No sense in showing negative numbers unless exactly -1
 675 |     if ($hsh->{sys_impact} < 0 and $hsh->{sys_impact} != -1) {
 676 |         $hsh->{sys_impact} = 0;
 677 |     }
 678 | 
 679 |     # Determine standard deviation and median. If count <= 1,
 680 |     # set to -1 to indicate not applicable.
 681 |     if ($hsh->{count} > 1) {
 682 |         my $sum = 0;
 683 |         for my $duration ( @{$hsh->{durations}} ) {
 684 |             $sum += ($duration - $mean)**2;
 685 |         }
 686 |         $hsh->{deviation} = sqrt($sum / ($hsh->{count} - 1));
 687 |         my @sorted = sort { $a <=> $b } @{$hsh->{durations}};
 688 |         if (($#sorted + 1) % 2 != 0) {
 689 |             $hsh->{median} = $sorted[int($#sorted / 2)];
 690 |         }
 691 |         else {
 692 |             $hsh->{median} = ($sorted[int($#sorted / 2)] + $sorted[int($#sorted / 2) + 1]) / 2;
 693 |         }
 694 |     }
 695 |     else {
 696 |         $hsh->{deviation} = -1;
 697 |         $hsh->{median} = -1;
 698 |     }
 699 | }
 700 | 
 701 | ## Format each query and return a hash based on query_type
 702 | my $out = process_all_queries();
 703 | 
 704 | ## If using HTML format, print a simple head and table of contents
 705 | if ($opt{format} eq 'html') {
 706 | 
 707 |     print qq{<!DOCTYPE html
 708 | PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 709 | "http://www.w3.org/TR/xhtml11/DTD/xhtml1-transitional.dtd">
 710 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
 711 | <head>
 712 | <title>Postgres System Impact Report</title>
 713 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
 714 | \n};
 715 | 
 716 |     if ($opt{color}) {
 717 |         print qq!<style type="text/css">
 718 | span.prealias    { color: red;    font-weight: bolder;                           }
 719 | span.pretable    { color: purple; font-weight: bolder;                           }
 720 | span.prekeyword  { color: blue;   font-weight: bolder;                           }
 721 | span.presemi     { color: white;  font-weight: normal; background-color: purple; }
 722 | span.prequestion { color: red;    font-weight: normal                            }
 723 | </style>
 724 | !;
 725 |     }
 726 |     print qq{</head>\n<body>\n};
 727 | 
 728 |     if ($opt{file}) {
 729 |         if (! defined $opt{file}[1]) {
 730 |             print qq{<p>Log file: $opt{file}[0]</p>\n};
 731 |         }
 732 |         else {
 733 |             print qq{<p>Log files:<ul>\n};
 734 |             for my $file (@{$opt{file}}) {
 735 |                 print qq{<li>$file</li>\n};
 736 |             }
 737 |             print qq{</ul></p>\n};
 738 |         }
 739 |     }
 740 | 
 741 |     print "<ul>\n";
 742 | 
 743 |     for my $qtype (
 744 |                    map  { $_->[0] }
 745 |                    sort { $a->[1] <=> $b->[1] or $b->[2] <=> $a->[2] }
 746 |                    map  { [$_, ($_ eq 'COPY' ? 1 : 0), scalar @{$out->{$_}} ] }
 747 |                    keys %$out) {
 748 | 
 749 |         my $count = @{$out->{$qtype}};
 750 | 
 751 |         my $safename = "${host}_$qtype";
 752 |         print qq{<li><a href="#$safename">$qtype</a> ($count)</li>\n};
 753 |     }
 754 |     print "</ul>\n";
 755 | }
 756 | elsif ($opt{format} eq 'tsv') {
 757 |     # print out the field headers
 758 |     print join "\t" => qw( qtype query count duration interval deviation sysimpact minimum_threshold
 759 |                     maximum_threshold durations );
 760 |     print "\n";
 761 | 
 762 |     no warnings 'uninitialized';
 763 | 
 764 |     while (my ($q,$v) = each %canonical_q) {
 765 |         print join "\t" => $v->{qtype}, $q,
 766 |             @{$v}{qw/count duration interval deviation sysimpact minimum_threshold maximum_threshold/},
 767 |             (join q{,} => @{$v->{durations}}),
 768 |         ;
 769 |         print "\n";
 770 |     }
 771 |     exit;
 772 | }
 773 | 
 774 | for my $qtype (
 775 |                map  { $_->[0] }
 776 |                sort { $a->[1] <=> $b->[1] or $b->[2] <=> $a->[2] }
 777 |                map  { [$_, ($_ eq 'COPY' ? 1 : 0), scalar @{$out->{$_}} ] }
 778 |                keys %$out) {
 779 | 
 780 |     my $arr = $out->{$qtype};
 781 | 
 782 |     my $type_top_ten = $opt{'top-10-file'}
 783 |         ? "$opt{'top-10-path'}$host-$qtype-$opt{'top-10-file'}"
 784 |         : '/dev/null'
 785 |     ;
 786 | 
 787 |     my ($all_fh, $type_all);
 788 | 
 789 |     if ($opt{'all-file'}) {
 790 |         $type_all = "$opt{'all-path'}$host-$qtype-$opt{'all-file'}";
 791 |         open ($all_fh, '>', $type_all)
 792 |             or die "Can't open '$type_all' to write: $!";
 793 |     }
 794 |     else {
 795 |         $all_fh = \*STDOUT;
 796 |     }
 797 | 
 798 |     open (my $top_ten_fh, '>', $type_top_ten)
 799 |         or die "Can't open '$type_top_ten' to write: $!";
 800 | 
 801 |     # Start off reports with appropriate Wiki naming
 802 |     # convention. Can automate the posting of reports
 803 |     # with code that strips first line and uses it
 804 |     # for Wiki page.
 805 | 
 806 |     my $a1 = '';
 807 |     my $a2 = '';
 808 |     if ($opt{format} eq 'html') {
 809 |         my $safename = "${host}_$qtype";
 810 |         $a1 = qq{<a name="$safename">};
 811 |         $a2 = qq{</a>};
 812 |         my $phost = length $host ? qq{ : $host :} : ':';
 813 |         print $all_fh qq{${fmstartheader2}${a1}Query System Impact $phost $qtype${a2}${fmendheader2}\n};
 814 |     }
 815 |     else {
 816 |         print $all_fh qq{Query_System_Impact:$host:$qtype\n};
 817 |     }
 818 | 
 819 |     # Top 10 lists are put into templates, assuming
 820 |     # they will be pulled in to a collection with
 821 |     # other top 10s, typically for the same host.
 822 |     print $top_ten_fh <<"EOP";
 823 | Template:${host}_SI_Top_10:$qtype
 824 | EOP
 825 | 
 826 |     for my $fh ($all_fh, $top_ten_fh) {
 827 |         print $fh "${fmstartheader3}Log activity from $start_time to $end_time${fmendheader3}\n";
 828 |     }
 829 | 
 830 |     print $all_fh join ("$fmsep\n", @$arr);
 831 |     print $top_ten_fh join ("$fmsep", grep { defined $_ } @$arr[0..9]);
 832 | 
 833 |     close ($top_ten_fh) or warn "Error closing '$type_top_ten': $!";
 834 |     if ($type_all) {
 835 |         close ($all_fh) or warn "Error closing '$type_all': $!";
 836 |     }
 837 | }
 838 | 
 839 | 
 840 | if ($opt{format} eq 'html') {
 841 |     print "</body></html>\n";
 842 | }
 843 | 
 844 | if (! $opt{quiet}) {
 845 |     warn "Items processed: $resolve_called\n";
 846 | }
 847 | 
 848 | exit;
 849 | 
 850 | sub resolve_syslog_stmt {
 851 | 
 852 |     my $pid = shift;
 853 | 
 854 |     $query{$pid} ||= {};
 855 |     my $prev = $query{$pid}{fragments};
 856 | 
 857 | 
 858 |     # First time to see this pid. Initialize
 859 |     # fragments array, and no previous
 860 |     # statement to resolve.
 861 |     unless (ref ($prev) eq 'ARRAY') {
 862 |         $query{$pid}{fragments} = [];
 863 |         return;
 864 |     }
 865 | 
 866 |     # Now have collected a full query and need to
 867 |     # canonize and store.
 868 |     my $full_statement = lc (join (' ', @$prev));
 869 | 
 870 |     # Handle SQL comments, carefully
 871 |     $full_statement =~ s{/[*].*?[*]/}{}msg;
 872 | 
 873 |     # Tidy up spaces
 874 |     $full_statement =~ s/#011/ /g;
 875 |     $full_statement =~ s/^\s+|\s+$//g;
 876 |     $full_statement =~ s/\s+/ /g;
 877 | 
 878 |     # Special transform for crowded queries
 879 |     $full_statement =~ s/=\s*(\d+)(and|or) /= $1 $2 /gio;
 880 | 
 881 |     # If closing a query, store until we get
 882 |     # subsequent duration statement
 883 |     if (my @match_args = $full_statement =~ $extract_query_re) {
 884 | 
 885 |         my ($main_query, $query_type) =
 886 |             $extract_query_re->query_info(@match_args);
 887 | 
 888 |         # Clean out arguments
 889 |         # Quoted string
 890 |         $main_query =~ s/'(?:''|\\+'?|[^'\\]+)*'/?/g;
 891 | 
 892 |         # Numeric no quote, and bind params
 893 |         $main_query =~ s/(?<=[^\w.])[\$-]?(?:\d*[.])?\d+\b/?/g;
 894 | 
 895 |         # Collapsing IN () lists, so queries deviating
 896 |         # only by 'IN (?,?)' and 'IN (?,?,?)' are logged
 897 |         # as "the same"
 898 |         $main_query =~
 899 |             s{
 900 |                 # Starts IN ...
 901 |                 \s in \s?
 902 |                 # Outermost paren for IN list
 903 |                 [(]
 904 |                     (?:
 905 |                         # Could be list of rows
 906 |                         [(] [?,\s\$]* [)]
 907 |                             |
 908 |                         # or the standard stuff of a scalar list
 909 |                         [?,\s\$]+
 910 |                     )+
 911 |                 # Until we close the full IN list
 912 |                 [)]
 913 |             }
 914 |             { in (?+)}xmsg;
 915 | 
 916 |         # Remove remaining comments (instances of '--' that didn't indicate
 917 |         # comments should have been removed already)
 918 |         $main_query =~ s/--[^\n]+$//gm;
 919 |         
 920 |         # Remove blank lines
 921 |         $main_query =~ s/^\s*\n//gm;
 922 | 
 923 |         # Store in temporary statement hashkey,
 924 |         # along with UPPER type
 925 |         $query{$pid}{statement} = $main_query;
 926 |         $query{$pid}{qtype} = "\U$query_type";
 927 |     }
 928 |     if (
 929 |             exists $query{$pid}{statement}
 930 |             &&
 931 |             $full_statement =~ $extract_duration_re
 932 |         )
 933 |     {
 934 |         my $duration = $1 || 0;
 935 |         my $stored = $query{$pid};
 936 | 
 937 |         # Add canonical query to count hash
 938 |         my $hsh =
 939 |             $canonical_q{ delete $stored->{statement} }
 940 |                 ||= {
 941 |                         count => 0,
 942 |                         duration => 0,
 943 |                         deviation => 0,
 944 |                         qtype => delete $stored->{qtype},
 945 |                         minimum_offenders => [ [ $stored->{statement_id} => $duration ] ],
 946 |                         minimum_threshold => $duration,
 947 |                         maximum_offenders => [ [ $stored->{statement_id} => $duration ] ],
 948 |                         maximum_threshold => $duration,
 949 |                         durations => [],
 950 |                     };
 951 | 
 952 |         ++$hsh->{count};
 953 |         $hsh->{duration} += $duration;
 954 |         push @{$hsh->{durations}}, $duration;
 955 | 
 956 |         # If we're tracking offenders (best/worst queries)
 957 |         # add them in if the newest one measures as one of the
 958 |         # best or worst.
 959 |         if ($opt{offenders}) {
 960 |             if ($duration > $hsh->{maximum_threshold}) {
 961 |                 my $array = $hsh->{maximum_offenders};
 962 |                 @$array = (
 963 |                     sort { $b->[1] <=> $a->[1] }
 964 |                     @$array,
 965 |                     [ $stored->{statement_id}, $duration ],
 966 |                 );
 967 |                 splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
 968 |                 $hsh->{maximum_threshold} = $array->[-1]->[1];
 969 |             }
 970 | 
 971 |             if ($duration < $hsh->{minimum_threshold}) {
 972 |                 my $array = $hsh->{minimum_offenders};
 973 |                 @$array = (
 974 |                     sort { $a->[1] <=> $b->[1] }
 975 |                     @$array,
 976 |                     [ $stored->{statement_id}, $duration ],
 977 |                 );
 978 |                 splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
 979 |                 $hsh->{maximum_threshold} = $array->[-1]->[1];
 980 |             }
 981 |         }
 982 |     }
 983 | 
 984 |     @$prev = ();
 985 |     $resolve_called++;
 986 | 
 987 |     return 1;
 988 | 
 989 | } ## end of resolve_syslog_statement
 990 | 
 991 | 
 992 | sub resolve_pid_statement {
 993 | 
 994 |     my $info = shift or die;
 995 | 
 996 |     $resolve_called++;
 997 | 
 998 |     my $string = $info->{statement} ? lc $info->{statement} : '';
 999 |     my $duration = $info->{duration} || 0;
1000 | 
1001 |     # Handle SQL comments, carefully
1002 |     $string =~ s{/[*].*?[*]/}{}msg;
1003 | 
1004 |     ## Lose the final semi-colon
1005 |     $string =~ s/;\s*$//;
1006 | 
1007 |     # Tidy up spaces
1008 |     $string =~ s/#011/ /g;
1009 |     $string =~ s/^\s+|\s+$//g;
1010 |     $string =~ s/\s+/ /g;
1011 | 
1012 |     # Special transform for crowded queries
1013 |     $string =~ s/=\s*(\d+)(and|or) /= $1 $2 /gio;
1014 | 
1015 |     my $main_query = $string;
1016 | 
1017 |     # Clean out arguments
1018 |     # Quoted string
1019 |     $main_query =~ s/'(?:''|\\+'?|[^'\\]+)*'/?/g;
1020 | 
1021 |     # Numeric no quote, and bind params
1022 |     $main_query =~ s/(?<=[^\w.])[\$-]?(?:\d*[.])?\d+\b/?/g;
1023 | 
1024 |     # Collapsing IN () lists, so queries deviating
1025 |     # only by 'IN (?,?)' and 'IN (?,?,?)' are logged
1026 |     # as "the same"
1027 |     $main_query =~
1028 |         s{
1029 |                 # Starts IN ...
1030 |                 \s in \s?
1031 |                 # Outermost paren for IN list
1032 |                 [(]
1033 |                     (?:
1034 |                         # Could be list of rows
1035 |                         [(] [?,\s\$]* [)]
1036 |                             |
1037 |                         # or the standard stuff of a scalar list
1038 |                         [?,\s\$]+
1039 |                     )+
1040 |                 # Until we close the full IN list
1041 |                 [)]
1042 |             }
1043 |             { in (?+)}xmsg;
1044 | 
1045 |     $main_query =~ s/^begin;//io unless $main_query =~ m{^begin;$}; ## no critic
1046 | 
1047 |     # Remove remaining comments (instances of '--' that didn't indicate
1048 |     # comments should have been removed already)
1049 |     $main_query =~ s/--[^\n]+$//gm;
1050 |     
1051 |     # Remove blank lines
1052 |     $main_query =~ s/^\s*\n//gm;
1053 | 
1054 |     return 0 if $main_query !~ /\w/; ## e.g. a single ;
1055 | 
1056 |     # Figure out what type of query this is
1057 |     if ($main_query !~ m{(\w+)}) {
1058 |         warn Dumper $info;
1059 |         warn "Could not find the type of query for $main_query from $string\n";
1060 |         exit;
1061 |     }
1062 |     my $query_type = uc $1;
1063 | 
1064 |     # Add canonical query to count hash
1065 |     my $hsh =
1066 |         $canonical_q{ $main_query }
1067 |             ||= {
1068 |                 count             => 0,
1069 |                 duration          => 0,
1070 |                 deviation         => 0,
1071 |                 qtype             => $query_type,
1072 |                 minimum_offenders => [ [ -1, $duration ] ],
1073 |                 minimum_threshold => $duration,
1074 |                 maximum_offenders => [ [ -1, $duration ] ],
1075 |                 maximum_threshold => $duration,
1076 |                 durations         => [],
1077 |             };
1078 | 
1079 |     ++$hsh->{count};
1080 |     $hsh->{duration} += $duration;
1081 |     push @{$hsh->{durations}}, $duration;
1082 | 
1083 |     # If we're tracking offenders (best/worst queries)
1084 |     # add them in if the newest one measures as one of the
1085 |     # best or worst.
1086 |     if ($opt{offenders}) {
1087 |         my $fixme = -1;
1088 |         if ($duration > $hsh->{maximum_threshold}) {
1089 |             my $array = $hsh->{maximum_offenders};
1090 |             @$array = (
1091 |                 sort { $b->[1] <=> $a->[1] }
1092 |                     @$array,
1093 |                 [ $fixme, $duration ],
1094 |         );
1095 |             splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
1096 |             $hsh->{maximum_threshold} = $array->[-1]->[1];
1097 |         }
1098 | 
1099 |         if ($duration < $hsh->{minimum_threshold}) {
1100 |             my $array = $hsh->{minimum_offenders};
1101 |             @$array = (
1102 |                 sort { $a->[1] <=> $b->[1] }
1103 |                     @$array,
1104 |                 [ $fixme, $duration ],
1105 |         );
1106 |             splice (@$array, $opt{offenders}) if @$array > $opt{offenders};
1107 |             $hsh->{maximum_threshold} = $array->[-1]->[1];
1108 |         }
1109 |     }
1110 | 
1111 |     return 1;
1112 | 
1113 | } ## end of resolve_pid_statement
1114 | 
1115 | 
1116 | # Expects first arg as log entry of earliest time
1117 | # and second arg as log entry of latest time
1118 | sub log_interval {
1119 |     my ($first, $second) = @_;
1120 |     $opt{verbose} and print "First and second lines:\n\t$first\n\t$second\n";
1121 | 
1122 |     my $first_timelocal = get_timelocal_from_line($first);
1123 |     my $second_timelocal = get_timelocal_from_line($second);
1124 |     my $interval_in_sec = $second_timelocal - $first_timelocal;
1125 | 
1126 |     # Full log-slice interval in ms
1127 |     return $interval_in_sec * 1000;
1128 | }
1129 | 
1130 | sub get_timelocal_from_line {
1131 |     my ($line) = @_;
1132 | 
1133 |     return 0 if $opt{mode} eq 'bare';
1134 | 
1135 |     my @datetime = get_date_from_line($line);
1136 | 
1137 |     # timelocal uses 0..11 for months instead of 1..12
1138 |     --$datetime[1];
1139 | 
1140 |     my $int_in_sec =
1141 |         Time::Local::timelocal(reverse(@datetime));
1142 | 
1143 |     return $int_in_sec;
1144 | }
1145 | 
1146 | sub get_date_from_line {
1147 |     my ($line) = @_;
1148 | 
1149 |     my ($year, $mon, $day, $hour, $min, $sec);
1150 |     if ($line =~ m/^\d{4}/) {
1151 |         ($year, $mon, $day, $hour, $min, $sec) =
1152 |             $line =~ m{
1153 |             (\d{4})
1154 |             -
1155 |             (\d{1,2})
1156 |             -
1157 |             (\d{1,2})
1158 |             [T ]
1159 |             0?
1160 |             (\d{1,2})
1161 |             :
1162 |             0?
1163 |             (\d{1,2})
1164 |             :
1165 |             0?
1166 |             (\d{1,2})
1167 |         }x;
1168 |     }
1169 |     else {
1170 |         my @localtime = localtime(time);
1171 |         $year = $localtime[5] + 1900 ;
1172 |         my $mon_name;
1173 |         ($mon_name, $day, $hour, $min, $sec) =
1174 |             $line =~ m{
1175 |                 ^(\w{3})   \s+
1176 |                 (\d{1,2})  \s
1177 |                 (\d\d)     :
1178 |                 (\d\d)     :
1179 |                 (\d\d)
1180 |             }x;
1181 |         my %months = (
1182 |             Jan => '01',
1183 |             Feb => '02',
1184 |             Mar => '03',
1185 |             Apr => '04',
1186 |             May => '05',
1187 |             Jun => '06',
1188 |             Jul => '07',
1189 |             Aug => '08',
1190 |             Sep => '09',
1191 |             Oct => '10',
1192 |             Nov => '11',
1193 |             Dec => '12',
1194 |         );
1195 |         $mon = $months{$mon_name};
1196 |     }
1197 | 
1198 |     return ($year, $mon, $day, $hour, $min, $sec);
1199 | }
1200 | 
1201 | sub prettify_query {
1202 | 
1203 |     local ($_) = shift;
1204 | 
1205 |     # Perform some basic transformations to try to make the query more readable.
1206 |     # It's not perfect, but much better than all one line, all lower case.
1207 | 
1208 |     # Hide away any comments so they don't get transformed
1209 |     my @comment;
1210 |     s{($STARTCOMMENT) (.+?) ($ENDCOMMENT)}{push @comment => $2; "$1 comment $3"}ge;
1211 | 
1212 |     my $keywords = qr{
1213 |             select     |
1214 |             exists     |
1215 |             distinct   |
1216 |             deferred   |
1217 |             show       |
1218 |             grant      |
1219 |             revoke     |
1220 |             commit     |
1221 |             begin      |
1222 |             from       |
1223 |             left       |
1224 |             right      |
1225 |             outer      |
1226 |             inner      |
1227 |             where      |
1228 |             (?:
1229 |                 group  |
1230 |                 order
1231 |             ) \s by    |
1232 |             and        |
1233 |             or         |
1234 |             not        |
1235 |             in         |
1236 |             between    |
1237 |             as         |
1238 |             on         |
1239 |             using      |
1240 |             left       |
1241 |             right      |
1242 |             vacuum     |
1243 |             verbose    |
1244 |             rollback   |
1245 |             fetch      |
1246 |             full       |
1247 |             join       |
1248 |             limit      |
1249 |             offset     |
1250 |             count      |
1251 |             coalesce   |
1252 |             max        |
1253 |             min        |
1254 |             sum        |
1255 |             all        |
1256 |             desc       |
1257 |             asc        |
1258 |             union      |
1259 |             intersect  |
1260 |             except     |
1261 |             is         |
1262 |             null       |
1263 |             true       |
1264 |             false      |
1265 |             case       |
1266 |             when       |
1267 |             then       |
1268 |             else       |
1269 |             end        |
1270 |             i? like    |
1271 |             having     |
1272 |             insert     |
1273 |             (?:in)? to |
1274 |             update     |
1275 |             delete     |
1276 |             set        |
1277 |             values     |
1278 |             copy       |
1279 |             create     |
1280 |             drop       |
1281 |             add        |
1282 |             alter      |
1283 |             table      |
1284 |             trigger    |
1285 |             rule       |
1286 |             references |
1287 |             substring  |
1288 |             foreign
1289 |             \s key     |
1290 |             (?:
1291 |                 en |
1292 |                 dis
1293 |             ) able     |
1294 |             listen     |
1295 |             notify     |
1296 |             index}xi;
1297 | 
1298 |     # Change all known keywords to uppercase
1299 |     s{\b($keywords)\b}{\U$1}msg;
1300 | 
1301 |     s{,CASE}{, CASE}g;
1302 | 
1303 |     # line break after certain sql keywords
1304 |     s{
1305 |         (?<!DELETE)
1306 |         \s
1307 |         (
1308 |             SELECT    |
1309 |             (?: INNER \s JOIN) |
1310 |             CASE      |
1311 |             (?:FROM(?!\s\?)(?!\sposition))|
1312 |             (?:
1313 |                 (?: LEFT | RIGHT | FULL )
1314 |                 (?: \s OUTER )?
1315 |                 \s
1316 |             )?
1317 |             JOIN      |
1318 |             WHERE     |
1319 |             (?:
1320 |                 GROUP |
1321 |                 ORDER
1322 |             )\s BY    |
1323 |             LIMIT     |
1324 |             OFFSET    |
1325 |             UNION     |
1326 |             INTERSECT |
1327 |             EXCEPT    |
1328 |             SET       |
1329 |             VALUES    |
1330 |             ADD       |
1331 |             DROP      |
1332 |             REFERENCES
1333 |         )
1334 |     }
1335 |     {\n${indent1}$1}xmsg;
1336 | 
1337 |     ## Straighten out multiple question marks into a consistent style
1338 |     s{\?\s*,\s*\?}{?, ?}go;
1339 |     s{\s*\=\s*\?}{ = ?}go;
1340 |     s{<>\?}{<> ?}go;
1341 | 
1342 |     ## Space out comma items
1343 |     s{ *, *}{, }gso;
1344 | 
1345 |     ## Subselects start a new line
1346 |     s{^( +FROM \()}{$1\n}mgo;
1347 | 
1348 |     # Wrap long SET lines
1349 |     s{^(\s*SET .{$minwrap1,}?,\s*)(.+? = .*)}{$1\n$indent2$2}mgo;
1350 | 
1351 |     # Trim long lines at a comma if starts with:
1352 |     # SELECT, GROUP BY, VALUE, INSERT
1353 |     s{^(\s*(?:SELECT|GROUP BY|INSERT|VALUES) .{$minwrap1,}?, )(.+)}{$1\n$indent2$2}mgo;
1354 | 
1355 |     # Wrap long WHERE..AND lines
1356 |     s{^(\s*WHERE .{$minwrap1,}?)(\bAND .+)}{$1\n$indent2$2}mgo;
1357 |     1 while $_ =~ s{^(${indent2}AND .{$minwrap1,}?)(\bAND .+)}{$1\n$indent2$2}mgo;
1358 | 
1359 |     # Much munging of CASE .. WHEN .. END
1360 |     s{^\s*(CASE .*?)(\bWHEN .+)}{${indent2}$1\n$indent3$2}mgo;
1361 |     # Wrap the rest of the WHENs
1362 |     1 while $_ =~ s{^( +WHEN .+? )(WHEN.+)}{$1\n$indent3$2}mgo;
1363 |     # Same for WHEN THEN but indent more, and only over a certain level
1364 |     1 while $_ =~ s{^( +(?:WHEN|THEN) .{$minwrap2,}? )(THEN.+)}{$1\n$indent4$2}mgo;
1365 |     # Put END AS on its own line
1366 |     s{ +(END AS \w+)}{\n${indent2}$1}go;
1367 |     # Wrap stuff beyond the end properly
1368 |     s{^(\s+END AS \w+,) *(\w)}{$1\n${indent2}$2}mgo;
1369 | 
1370 |     ## Trim any hanging non-keyword lines from above
1371 |     1 while $_ =~ s{^(${indent2}(?:[a-z]|NULL|\?).{$minwrap1,}?, )(\w.*)}{$1\n$indent2$2}mgo;
1372 | 
1373 |     # Wrap if we are doing multiple commands on one line
1374 |     s{;\s*([A-Z])}{\n;\n$1}go;
1375 | 
1376 |     ## Add in any optional CSS to make the formatting even prettier
1377 |     if ($opt{color}) {
1378 |         ## Table aliases and tables:
1379 |         s{((?:FROM|JOIN)\s+(?!position)\w+\s+AS)\s+(\w+)}{$1 <span class="prealias">$2</span>}gos;
1380 |         1 while $_ =~ s{((?:FROM|JOIN)\s+.+?, \w+\s+AS)\s+(\w+)}{$1 <span class="prealias">$2</span>}gos;
1381 |         s{(FROM|JOIN)\s+(?!position)(\w+)}{$1 <span class="pretable">$2</span>}go;
1382 |         1 while $_ =~ s{((?:FROM|JOIN)\s+.+?,) (\w+)}{$1 <span class="pretable">$2</span>}gos;
1383 | 
1384 |         ## Highlight all keywords
1385 |         s{\b($keywords)\b}{<span class="prekeyword">$1</span>}go;
1386 | 
1387 |         ## Highlight semicolons indicating multiple statements
1388 |         s{\n;\n}{\n<span class="presemi">;</span>\n}go;
1389 | 
1390 |         ## Highlight placeholders (question marks)
1391 |         s{(\W)(\?)(\W|\Z)}{$1<span class="prequestion">$2</span>$3}gso;
1392 |     }
1393 | 
1394 |     # Restore any comments
1395 |     if (@comment) {
1396 |         s{($STARTCOMMENT) comment ($ENDCOMMENT)}{"\n" . (shift @comment) . "\n"}ge;
1397 |     }
1398 | 
1399 |     return $_;
1400 | 
1401 | } ## end of prettify_query
1402 | 
1403 | 
1404 | sub log_meta {
1405 | 
1406 |     my @lines = @_;
1407 | 
1408 |     my ($hostname, $start, $end);
1409 | 
1410 |     # Pull out host, start time, and end time.
1411 |     # Assumes start as first arg and end as second.
1412 | 
1413 |     for my $line (@lines) {
1414 | 
1415 |         if ($opt{verbose} >= 1) {
1416 |             chomp $line;
1417 |             warn "Checking meta line ($line)\n";
1418 |         }
1419 | 
1420 |         my $line_regex = qr{FAIL};
1421 |         if ('pid' eq $opt{mode}) {
1422 |             $line_regex = qr{(.{19}) [A-Z]+ \d+ [^ ]+ [^ ]+ (\w+)};
1423 |         }
1424 |         elsif ('syslog' eq $opt{mode}) {
1425 |             $line_regex = qr{(.{19})(?:[+-]\d+:\d+|\s[A-Z]+)\s( \S+ )};
1426 |         }
1427 |         elsif ('bare' eq $opt{mode}) {
1428 |             $line_regex = qr{(.{19}) [A-Z]+ \d+ [^ ]+ [^ ]+ (\w+)};
1429 |         }
1430 |         elsif ('csv' eq $opt{mode}) {
1431 |             $hostname = $line->{connection_from};
1432 |             $end = $line->{log_time};
1433 |             $start ||= $end;
1434 |             return ($hostname, $start, $end);
1435 |         }
1436 |         else {
1437 |             die qq{Invalid mode: $opt{mode}\n};
1438 |         }
1439 | 
1440 |         if ($line =~ /$line_regex/ms) { ## no critic
1441 |             $end = $1; ## no critic
1442 |             $hostname ||= $2; ## no critic
1443 |             $start ||= $end;
1444 |         }
1445 |         ## Sometimes we don't have a host
1446 |         elsif ($line =~ /(.{19})/) {
1447 |             $end = $1;
1448 |             $hostname = '';
1449 |             $start ||= $end;
1450 |         }
1451 |     }
1452 | 
1453 |     defined $start or die qq{Unable to find the starting time\n};
1454 | 
1455 |     defined $end or die qq{Unable to find the ending time\n};
1456 | 
1457 |     return ($hostname, $start, $end);
1458 | 
1459 | } ## end of log_meta
1460 | 
1461 | 
1462 | sub process_all_queries {
1463 | 
1464 |     my %out;
1465 | 
1466 |     # Sort by SI descending and print out reports.
1467 |     for (
1468 |         sort {
1469 |             $canonical_q{$b}{sys_impact}
1470 |             <=>
1471 |             $canonical_q{$a}{sys_impact}
1472 |             }
1473 |         keys %canonical_q
1474 |     )
1475 |         {
1476 | 
1477 |             my $hsh = $canonical_q{$_};
1478 | 
1479 |             my $system_impact;
1480 |             if ($hsh->{sys_impact} < 0.001) {
1481 |                 $system_impact = sprintf '%0.6f', $hsh->{sys_impact};
1482 |             }
1483 |             else {
1484 |                 $system_impact = sprintf '%0.2f', $hsh->{sys_impact};
1485 |             }
1486 | 
1487 |             ## No sense in showing negative numbers
1488 |             $hsh->{duration} = 0 if $hsh->{duration} < 0;
1489 | 
1490 |             my $duration = sprintf '%0.2f ms', $hsh->{duration};
1491 |             my $count = $hsh->{count};
1492 |             my $interval;
1493 |             if ($hsh->{interval} > 10000) {
1494 |                 $interval = sprintf '%d seconds', $hsh->{interval}/1000;
1495 |             }
1496 |             elsif ($hsh->{interval} < 1000) {
1497 |                 $interval = sprintf '%0.2f ms', $hsh->{interval};
1498 |             }
1499 |             else {
1500 |                 $interval = sprintf '%d ms', $hsh->{interval};
1501 |             }
1502 |             my $deviation = sprintf '%0.2f ms', $hsh->{deviation};
1503 |             my $median = sprintf '%0.2f ms', $hsh->{median};
1504 |             if ($count == 1) {
1505 |                 $deviation = 'N/A';
1506 |                 $median = 'N/A';
1507 |             }
1508 | 
1509 |             my $arr =
1510 |                 $out{ $hsh->{qtype} }
1511 |                     ||= [];
1512 | 
1513 |             # If user provides positive integer
1514 |             # in --offenders, add in the actual
1515 |             # durations of the best and worst
1516 |             # number of queries requested in
1517 |             # report. Entry includes full beginning
1518 |             # piece of log entry. Offenders was
1519 |             # used since initially it only displayed
1520 |             # the worst queries, or worst offenders.
1521 |             # Best was just added in for balance.
1522 |             my $offenders = '';
1523 |             if ($opt{offenders}) {
1524 |                 $offenders = sprintf(qq{
1525 | <table>
1526 |   <tr>
1527 |     <td align="center">
1528 |       ${fmstartbold}Best$fmendbold
1529 |     </td>
1530 |     <td align="center">
1531 |       ${fmstartbold}Worst$fmendbold
1532 |     </td>
1533 |   </tr>
1534 |   <tr>
1535 |     <td align="left"><ol>%s</ol></td>
1536 |     <td align="left"><ol>%s</ol></td>
1537 |   </tr>
1538 | </table>},
1539 |                     map { join '', map {
1540 |                         if ($_->[0] == -1) { "<li>$_->[1] ms</li>" }
1541 |                         else { "<li>$_->[0] -- $_->[1] ms</li>" }
1542 |                     } @$_ } @$hsh{
1543 |                         qw( minimum_offenders maximum_offenders )
1544 |                     }
1545 |              );
1546 |             }
1547 | 
1548 |             my $queries = prettify_query($_) . $offenders;
1549 | 
1550 |             my $fmshowborder = $opt{format} eq 'html' ? q{border='1'} : '';
1551 | 
1552 |             push (@$arr, <<"EOP");
1553 | <table $fmshowborder>
1554 | <tr>
1555 | <td align="right">
1556 | ${fmstartbold}System Impact:$fmendbold<br />
1557 | Mean Duration:<br />
1558 | Median Duration:<br />
1559 | Total Count:<br />
1560 | Mean Interval:<br />
1561 | Std. Deviation:
1562 | </td>
1563 | <td>
1564 | $fmstartbold$system_impact$fmendbold<br />
1565 | $duration<br />
1566 | $median<br />
1567 | $count<br />
1568 | $interval<br />
1569 | $deviation
1570 | </td>
1571 | </tr>
1572 | </table>
1573 | ${fmstartquery}$queries$fmendquery
1574 | EOP
1575 | 
1576 |         }
1577 | 
1578 |     return \%out;
1579 | 
1580 | } ## end of process_all_queries
1581 | 
1582 | 
1583 | sub make_extractor {
1584 |     (my $pg_version = shift) =~ s/\W+/_/g;
1585 |     $pg_version ||= '8_2';
1586 |     my $class = "PG_$pg_version";
1587 |     no strict 'refs'; ## no critic
1588 |     return bless (&$class(shift), $class);
1589 | }
1590 | 
1591 | sub PG_8_1 {
1592 |     my $query_types = shift;
1593 | 
1594 |     # Regex for log format prior to DETAIL
1595 |     # entries in 8.2. Not certain how many versions
1596 |     # prior to 8.1 for which this will work.
1597 | 
1598 |     return qr{
1599 |         \A
1600 |         log:
1601 |         \s
1602 |         statement:
1603 |         \s
1604 |         (?:
1605 |             execute
1606 |             \s
1607 |             <[^>]*>
1608 |             \s
1609 |             (\[)
1610 |             prepare:
1611 |             \s
1612 |         )?
1613 |         (
1614 |             ($query_types)
1615 |             .*?
1616 |         )
1617 |         (\])?
1618 |         \Z
1619 |     }xms;
1620 | }
1621 | 
1622 | sub PG_8_2 {
1623 |     my $query_types = shift;
1624 | 
1625 |     # Regex for log format after DETAIL
1626 |     # entries in 8.2. Works for v's 8.2 and 8.3.
1627 | 
1628 |     return qr{
1629 |         log:
1630 |         \s+
1631 |         (?:duration: .*)?
1632 |         (
1633 |             statement |
1634 |             execute (?: \s <.*> )
1635 |         )
1636 |         [^:]* :
1637 |         \s
1638 |         (
1639 |             ($query_types)
1640 |             .*
1641 |         )
1642 |     }ixms;
1643 | }
1644 | 
1645 | sub PG_8_1::query_info {
1646 | 
1647 |     my $self = shift;
1648 |     my ($open_sq, $main_query, $query_type, $close_sq) = @_;
1649 | 
1650 |     # Query may either be direct statement or the EXECUTE
1651 |     # of a PREPAREd statement. If EXECUTE variety, the log
1652 |     # partitions the query off in sq. brackets, so we tail
1653 |     # strip possible space between the query end and the
1654 |     # right sq. bracket that used to end the string. Else,
1655 |     # the query itself might end in right sq. bracket, so
1656 |     # we put it back on if found.
1657 |     if ($open_sq) {
1658 |         $main_query =~ s/\s+$//;
1659 |     }
1660 |     else {
1661 |         $main_query .= $close_sq || '';
1662 |     }
1663 | 
1664 |     return ($main_query, $query_type);
1665 | }
1666 | 
1667 | sub PG_8_2::query_info {
1668 | 
1669 |     my $self = shift;
1670 |     my ($stm_or_exec, $main_query, $query_type) = @_;
1671 | 
1672 |     # EXECUTEd queries end with a DETAIL segment showing
1673 |     # the bound parameters, which is great, except in
1674 |     # our case, where we are interested in flattening
1675 |     # out arguments.
1676 |     $main_query =~ s/ detail:.*?$//
1677 |         if $stm_or_exec eq 'execute';
1678 | 
1679 |     return ($main_query, $query_type);
1680 | }
1681 | 
1682 | __END__
1683 | 
1684 | =head1 NAME
1685 | 
1686 | pgsi.pl - Produce system impact reports for a PostgreSQL database.
1687 | 
1688 | =head1 VERSION
1689 | 
1690 | This documentation refers to version 1.7.2
1691 | 
1692 | =head1 USAGE
1693 | 
1694 | 
1695 | pgsi.pl [options] < pglog_slice.log
1696 | 
1697 |  or...
1698 | 
1699 | pgsi.pl --file pglog_slice.log [options]
1700 | 
1701 | =over 3
1702 | 
1703 | =item Options
1704 | 
1705 |  --format
1706 |  --file
1707 |  --query-types
1708 |  --top-10
1709 |  --all
1710 |  --pg-version
1711 |  --offenders
1712 | 
1713 | =back
1714 | 
1715 | =head1 DESCRIPTION
1716 | 
1717 | System Impact (SI) is a measure of the overall load a given query imposes on a
1718 | server. It is expressed as a percentage of a query's average duration over its
1719 | average interval between successive calls. E.g., SI=80 indicates that a given
1720 | query is active 80% of the time during the entire log interval. SI=200
1721 | indicates the query is running twice at all times on average. Thus, the lower
1722 | the SI, the better.
1723 | 
1724 | The goal of SI is to identify those queries most likely to cause performance
1725 | degradation on the database during heaviest traffic periods. Focusing
1726 | exclusively on the least efficient queries can hide relatively fast-running
1727 | queries that saturate the system more because they are called far more
1728 | frequently. By contrast, focusing only on the most-frequently called queries
1729 | will tend to emphasize small, highly optimized queries at the expense of
1730 | slightly less popular queries that spend much more of their time between
1731 | successive calls in an active state. These are often smaller queries that have
1732 | failed to be optimized and punish a system severely under heavy load.
1733 | 
1734 | One thing SI does not do is distinguish between high-value queries represented
1735 | by extended active states or long durations due to blocking locks. Either
1736 | condition is worthy of attention, but determining which is to blame will
1737 | require independent investigation.
1738 | 
1739 | Queries are canonized with placeholders representing literals or arguments.
1740 | Further, IN lists are canonized so that variation from query to query only
1741 | in the number of elements in the IN list will not be treated as distinct
1742 | queries.
1743 | 
1744 | Some examples of the "same" query:
1745 | 
1746 | =over 3
1747 | 
1748 | =item *
1749 | 
1750 |  SELECT col FROM table WHERE code = 'One';
1751 |  SELECT col FROM table WHERE code = 'Sixty-Three';
1752 | 
1753 | =item *
1754 | 
1755 |  SELECT foo FROM bar WHERE fuzz = $1 AND color IN ('R','G','B');
1756 |  Select FOO
1757 |  from bar
1758 |  WhErE fuzz = '56'
1759 |      AND color IN ('R', $1);
1760 | 
1761 | =back
1762 | 
1763 | Differences in capitalization and whitespace are irrelevant.
1764 | 
1765 | =head2 Log Data
1766 | 
1767 | Pass in log data on stdin:
1768 | 
1769 |     pgsi.pl < some_log_slice.log
1770 |     cat some_log_slice.log | pgsi.pl
1771 | 
1772 | Or use the --file option:
1773 | 
1774 |     pgsi.pl --file=some_log_slice.log
1775 | 
1776 | Or read in more than one file at a time:
1777 | 
1778 |     pgsi.pl --file=logfile1.log --file=logfile2.log
1779 | 
1780 | If more than one file is given, they must be given in chronological order.
1781 | 
1782 | Log data must comply with a specific format and must be from contiguous
1783 | activity. The code makes the assumption that the overall interval of activity
1784 | is the time elapsed between the first and last log entries. If there are
1785 | several blocks of logs to analyze, they must be run separately.
1786 | 
1787 | Required format is the following in syslog:
1788 | 
1789 | YYYY-MM-DDTHH24:MI:SS(-TZ:00)? server postgres[I<pid>]:
1790 | 
1791 | This also requires that log_statement is set to 'all' and 
1792 | that log_duration be set to 'on' in postgresql.conf.
1793 | If you are not using syslog, you can simulate the format with the following:
1794 | 
1795 | log_line_prefix  = '%t %h postgres[%p]: [%l-1] ' ## Simulate syslog for pgsi.
1796 | 
1797 | =head2 Options
1798 | 
1799 | =over 4
1800 | 
1801 | =item --format
1802 | 
1803 | Output format for the resulting data.
1804 | 
1805 |     pgsi.pl --format=tsv
1806 | 
1807 | Valid values here are "html" (default), "mediawiki", and "tsv", each of which
1808 | will generate the report in the corresponding format.
1809 | 
1810 | The "tsv" format (tab-separated value) will not generate a full report, but will
1811 | dump the normalized queries only along with the specific metrics into a tab-
1812 | separated file.
1813 | 
1814 | =item --query-types
1815 | 
1816 | Query impact is segregated by types. I.e., all the SELECTs together, all
1817 | UPDATEs together, etc. Typically it is assumed that SELECT is the most
1818 | interesting (and is by itself the default), but any query type may be analyzed.
1819 | Multiples are provided as space- or comma-separated lists.
1820 | 
1821 |     pgsi.pl --query-types="select, update, copy, create"
1822 | 
1823 | The code will produce a unique report for each type when used with the --all
1824 | and/or --top-10 file-pattern options (see below).
1825 | 
1826 | =item --top-10, --all
1827 | 
1828 | Supplies a file I<pattern> and optional directory path into which the reports
1829 | should be written per --query-type. The pattern is prefixed with the
1830 | --query-type and host for this report and placed into the requested directory
1831 | (or cwd if no path is present).
1832 | 
1833 | --all will list every canonized query encountered, which is likely to
1834 | contain a large number of queries of no interest (those with negligible
1835 | impact).
1836 | 
1837 | --top-10 limits the report to only the 10 entries with the greatest SI.
1838 | 
1839 |     pgsi.pl \
1840 |         --query-types=select,update \
1841 |         --all=si_reports/monday_10am_1pm.all.txt \
1842 |         --top-10=si_reports/monday_10am_1pm.t10.txt
1843 | 
1844 | This will produce the following reports in si_reports/ for a database running
1845 | on server db1:
1846 | 
1847 |     SELECT-db1-monday_10am_1pm.all.txt
1848 |     UPDATE-db1-monday_10am_1pm.all.txt
1849 |     SELECT-db1-monday_10am_1pm.t10.txt
1850 |     UPDATE-db1-monday_10am_1pm.t10.txt
1851 | 
1852 | If --top-10 is not supplied, then no top 10 report is generated. If --all is
1853 | not supplied, then the report(s) print to stdout.
1854 | 
1855 | =item --pg-version
1856 | 
1857 | Currently, this might better be described as either "before DETAIL" or "after
1858 | DETAIL". The code was written against PG 8.1 originally, but when 8.2 came out
1859 | the addition of DETAIL log entries forced a different parser. That unfortunate
1860 | timing led to the assumption that log construction would change with each
1861 | release. Going forward, --pg-version will be (other than 8.1) the first version
1862 | in which this log format was encountered.
1863 | 
1864 | --pg-version is only either 8.1 or 8.2 (8.2 is default). It's unknown how far
1865 | back in versions the 8.1 format holds, but 8.2 holds for itself and 8.3. So,
1866 | unless you're working against logs generated by a PG version less than 8.2, you
1867 | do not need to include this option (but it might save you some trouble if a new
1868 | format comes at a later version and the default bumps up to the most recent
1869 | while you stay on your older version).
1870 | 
1871 |     pgsi.pl --pg-version=8.1
1872 | 
1873 | =item --offenders
1874 | 
1875 | Number of best and worst queries to included with the report, in terms of
1876 | overall duration of execution. Enough log information is listed along with the
1877 | duration such that tracking down the original query (not the canonized
1878 | version) is straightforward. The offenders list can be very useful for a query
1879 | that is causing trouble in a handful of permutations, but most of the time is
1880 | behaving well.
1881 | 
1882 | The list in conjunction with standard deviation gives an overall indication of
1883 | performance volatility.
1884 | 
1885 | --offenders=5 produces additional output in the report that looks something
1886 | like the following example:
1887 | 
1888 |  Best
1889 |    1. 2009-01-12T10:11:49-07:00 db1 postgres[4692] -- 4.833 ms
1890 |    2. 2009-01-12T10:31:19-07:00 db1 postgres[1937] -- 4.849 ms
1891 |    3. 2009-01-12T09:16:20-07:00 db1 postgres[20294] -- 4.864 ms
1892 |    4. 2009-01-12T10:16:54-07:00 db1 postgres[20955] -- 4.867 ms
1893 |    5. 2009-01-12T10:32:16-07:00 db1 postgres[5010] -- 4.871 ms
1894 | 
1895 |  Worst
1896 |    1. 2009-01-12T10:00:07-07:00 db1 postgres[2804] -- 2175.650 ms
1897 |    2. 2009-01-12T09:30:07-07:00 db1 postgres[2804] -- 2090.914 ms
1898 |    3. 2009-01-12T10:00:18-07:00 db1 postgres[2804] -- 2046.608 ms
1899 |    4. 2009-01-12T09:30:10-07:00 db1 postgres[2804] -- 1954.604 ms
1900 |    5. 2009-01-12T11:20:11-07:00 db1 postgres[2804] -- 1788.576 ms
1901 | 
1902 | =back
1903 | 
1904 | =head1 BUGS
1905 | 
1906 | =over
1907 | 
1908 | =item *
1909 | 
1910 | If queries contain exceptionally long IN lists, the regex that attempts to
1911 | flatten them can run into a perl recursion limit. In that event, the query will
1912 | keep the placeholders of the IN list, making it unique compared to the same
1913 | query with a different cardinality of list params in the same IN. This
1914 | deficiency should only surface on IN lists with composite parameters [e.g., IN
1915 | ((?,?,...,?),(?,?,...,?),...,(?,?,...,?))]. For scalar IN lists, there should
1916 | be no such limit.
1917 | 
1918 | =back
1919 | 
1920 | =head1 AUTHOR
1921 | 
1922 | Original code:
1923 |     Mark Johnson (mark@endpoint.com), End Point Corp.
1924 | 
1925 | Contributions:
1926 |     Ethan Rowe (ethan@endpoint.com), End Point Corp.
1927 |     Greg Sabino Mullane (greg@endpoint.com), End Point Corp.
1928 |     Daniel Browning (db@endpoint.com), End Point Corp.
1929 |     Joshua Tolley <josh@endpoint.com>, End Point Corp.
1930 |     Abraham Ingersoll <abe@abe.us>
1931 | 
1932 | =head1 LICENSE AND COPYRIGHT
1933 | 
1934 | Copyright 2008-2011 Mark Johnson (mark@endpoint.com)
1935 | 
1936 | This module is free software; you can redistribute it and/or modify it
1937 | under the same terms as Perl itself. See the LICENSE file.
1938 | 
1939 | =cut
1940 | 


--------------------------------------------------------------------------------