├── package
    ├── __init__.py
    ├── which.py
    └── rdspgbadger.py
├── .stickler.yml
├── MANIFEST.in
├── requirements.txt
├── .gitignore
├── .editorconfig
├── .circleci
    └── config.yml
├── LICENSE
├── setup.py
└── README.rst


/package/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.stickler.yml:
--------------------------------------------------------------------------------
1 | linters:
2 |   flake8: {  }
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include *.rst
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.8.1
2 | botocore==1.11.1
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled python modules.
 2 | *.pyc
 3 | __pycache__
 4 | 
 5 | # Setuptools distribution folder.
 6 | /dist/
 7 | 
 8 | # Python egg metadata, regenerated from source files by setuptools.
 9 | /*.egg-info
10 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: http://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*]
 7 | charset = utf-8
 8 | indent_style = space
 9 | indent_size = 4
10 | end_of_line = lf
11 | insert_final_newline = true
12 | trim_trailing_whitespace = true
13 | insert_final_newline = true
14 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | jobs:
 2 |   Build Error:
 3 |     docker:
 4 |     - image: bash:4.4.19
 5 |     steps:
 6 |     - run:
 7 |         name: Config Processing Error (Don't rerun)
 8 |         command: "# No configuration was found in your project. Please refer to https://circleci.com/docs/2.0/ to get started with your configuration.\n# \n# -------\n# Warning: This configuration was auto-generated to show you the message above.\n# Don't rerun this job. Rerunning will have no effect.\nfalse"
 9 | workflows:
10 |   version: 2
11 |   Build Error:
12 |     jobs:
13 |     - Build Error
14 | version: 2
15 | 
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 François Pietka
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from setuptools import setup
 5 | 
 6 | 
 7 | setup(name='rdspgbadger',
 8 |       version='1.2.3',
 9 |       description=("Fetch logs from RDS postgres instance and use them with "
10 |                    "pgbadger to generate a report."),
11 |       url='http://github.com/fpietka/rds-pgbadger',
12 |       author='François Pietka',
13 |       author_email='francois@pietka.fr',
14 |       license='MIT',
15 |       packages=['package'],
16 |       install_requires=['boto3>=1.4.0'],
17 |       entry_points={
18 |           'console_scripts': [
19 |               'rds-pgbadger = package.rdspgbadger:main'
20 |           ],
21 |       },
22 |       long_description=open('README.rst').read(),
23 |       classifiers=[
24 |           'Intended Audience :: Developers',
25 |           'License :: OSI Approved :: MIT License',
26 |           'Environment :: Console',
27 |           'Programming Language :: Python',
28 |           'Programming Language :: Python :: 2.6',
29 |           'Programming Language :: Python :: 2.7',
30 |           'Programming Language :: Python :: 3',
31 |           'Programming Language :: Python :: 3.3',
32 |           'Programming Language :: Python :: 3.4',
33 |           'Programming Language :: Python :: 3.5',
34 |           'Programming Language :: Python :: 3.6'
35 |       ],
36 |       zip_safe=True)
37 | 


--------------------------------------------------------------------------------
/package/which.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | 
 5 | def which(cmd, mode=os.F_OK | os.X_OK, path=None):
 6 |     """Given a command, mode, and a PATH string, return the path which
 7 |     conforms to the given mode on the PATH, or None if there is no such
 8 |     file.
 9 | 
10 |     `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
11 |     of os.environ.get("PATH"), or can be overridden with a custom search
12 |     path.
13 | 
14 |     """
15 |     # Check that a given file can be accessed with the correct mode.
16 |     # Additionally check that `file` is not a directory, as on Windows
17 |     # directories pass the os.access check.
18 |     def _access_check(fn, mode):
19 |         return (os.path.exists(fn) and
20 |                 os.access(fn, mode) and not
21 |                 os.path.isdir(fn))
22 | 
23 |     # If we're given a path with a directory part, look it up directly rather
24 |     # than referring to PATH directories. This includes checking relative to
25 |     # the current directory, e.g. ./script
26 |     if os.path.dirname(cmd):
27 |         if _access_check(cmd, mode):
28 |             return cmd
29 |         return None
30 | 
31 |     if path is None:
32 |         path = os.environ.get("PATH", os.defpath)
33 |     if not path:
34 |         return None
35 |     path = path.split(os.pathsep)
36 | 
37 |     if sys.platform == "win32":
38 |         # The current directory takes precedence on Windows.
39 |         if os.curdir not in path:
40 |             path.insert(0, os.curdir)
41 | 
42 |         # PATHEXT is necessary to check on Windows.
43 |         pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
44 |         # See if the given file matches any of the expected path extensions.
45 |         # This will allow us to short circuit when given "python.exe".
46 |         # If it does match, only test that one, otherwise we have to try
47 |         # others.
48 |         if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
49 |             files = [cmd]
50 |         else:
51 |             files = [cmd + ext for ext in pathext]
52 |     else:
53 |         # On other platforms you don't have things like PATHEXT to tell you
54 |         # what file suffixes are executable, so just pass on cmd as-is.
55 |         files = [cmd]
56 | 
57 |     seen = set()
58 |     for folder in path:
59 |         normdir = os.path.normcase(folder)
60 |         if normdir not in seen:
61 |             seen.add(normdir)
62 |             for thefile in files:
63 |                 name = os.path.join(folder, thefile)
64 |                 if _access_check(name, mode):
65 |                     return name
66 |     return None
67 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. image:: https://api.codacy.com/project/badge/Grade/902dd72b33df408b8d1274890cd805db
 2 |    :target: https://www.codacy.com/project/fpietka/rds-pgbadger/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=fpietka/rds-pgbadger&amp;utm_campaign=Badge_Grade_Dashboard
 3 |    :alt: Grade
 4 | .. image:: https://img.shields.io/pypi/v/rdspgbadger.svg
 5 |    :target: https://pypi.python.org/pypi/rdspgbadger
 6 |    :alt: Version
 7 | .. image:: https://img.shields.io/pypi/pyversions/rdspgbadger.svg
 8 |    :target: https://pypi.python.org/pypi/rdspgbadger
 9 |    :alt: Python versions supported
10 | .. image:: https://img.shields.io/pypi/l/rdspgbadger.svg
11 |    :target: https://pypi.python.org/pypi/rdspgbadger
12 |    :alt: License
13 | 
14 | ============
15 | RDS-pgBadger
16 | ============
17 | 
18 | Fetches RDS log files and analyzes them with pgBadger_.
19 | 
20 | Prerequisites
21 | -------------
22 | 
23 | Make sure your credentials are set in the ``~/.aws/credentials`` file.
24 | Also, you can set a region in the ``~/.aws/config`` file, so passing region option to the script is not needed.
25 | Last but not least, make sure you have ``pgbadger`` installed and reacheable from your ``$PATH``.
26 | 
27 | Parameter group
28 | ---------------
29 | 
30 | You will have to configure your database parameter group.
31 | 
32 | First of all, ensure ``log_min_duration_statement`` is set to ``0`` or higher, else you won't have anything to be parsed.
33 | 
34 | Then you must enable some other parameters to get more information in the logs.
35 | 
36 | +-----------------------------+-------+
37 | | Parameter                   | Value |
38 | +=============================+=======+
39 | | log_checkpoints             | 1     |
40 | +-----------------------------+-------+
41 | | log_connections             | 1     |
42 | +-----------------------------+-------+
43 | | log_disconnections          | 1     |
44 | +-----------------------------+-------+
45 | | log_lock_waits              | 1     |
46 | +-----------------------------+-------+
47 | | log_temp_files              | 0     |
48 | +-----------------------------+-------+
49 | | log_autovacuum_min_duration | 0     |
50 | +-----------------------------+-------+
51 | 
52 | Also make sure ``lc_messages`` is either at engine default or set to ``C``.
53 | 
54 | For further details, please refer to Dalibo's pgBadger_ documentation.
55 | 
56 | Installation
57 | ------------
58 | 
59 | You can install it using ``pip``::
60 | 
61 |  $ pip install rdspgbadger
62 | 
63 | Usage
64 | -----
65 | 
66 | To build a ``pgbadger`` report, just run the following (replacing ``instanceid`` by your instance ID)::
67 | 
68 |  $ rds-pgbadger instanceid
69 | 
70 | Options
71 | -------
72 | 
73 | Only the Instance ID is mandatory, but there are also other options you can use:
74 | 
75 | * -d, --date : by default the script downloads all the available logs. By specifying a date in the format ``YYYY-MM-DD``, you can then download only that day's logs.
76 | * -r, --region : by default the script use the region specified in your AWS config file. If none, or if you wish to change it, you can use this option to do so.
77 | * -o, --output : by default the script outputs log files and reports to the ``out`` folder. This option allows you to change it.
78 | * -n, --no-process : download log file(s), but do not process them with pgBadger.
79 | * -X, --pgbadger-args : command-line arguments to pass to pgBadger
80 | * --assume-role : By specifying a role you can use STS to assume a role, which is useful for cross account access with out having to setup the `.config` file. Format ``arn:aws:iam::<account_id>:<role_name>``
81 | 
82 | Known issue
83 | -----------
84 | 
85 | In spite of the great work of askainet_, AWS API seems to be too instable, and sometimes download of big log files can
86 | fail. In such case retrying a few minutes later seems to work.
87 | 
88 | see `pull request 10`_
89 | 
90 | Contribute
91 | ----------
92 | 
93 | For any request, feel free to make a pull request or fill an issue on Github_.
94 | 
95 | .. _pgBadger: http://dalibo.github.io/pgbadger/
96 | .. _Github: https://github.com/fpietka/rds-pgbadger
97 | .. _askainet: https://github.com/askainet
98 | .. _pull request 10: https://github.com/fpietka/rds-pgbadger/pull/10
99 | 


--------------------------------------------------------------------------------
/package/rdspgbadger.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """
  5 | Fetch logs from RDS postgres instance and use them with pgbadger to generate a
  6 | report.
  7 | """
  8 | 
  9 | import os
 10 | import errno
 11 | import boto3
 12 | from botocore.exceptions import (ClientError,
 13 |                                  EndpointConnectionError,
 14 |                                  NoRegionError,
 15 |                                  NoCredentialsError,
 16 |                                  PartialCredentialsError)
 17 | import argparse
 18 | from datetime import datetime
 19 | try:
 20 |     from shutil import which
 21 | except ImportError:
 22 |     from which import which
 23 | 
 24 | import subprocess
 25 | 
 26 | import logging
 27 | 
 28 | __version__ = "1.2.3"
 29 | 
 30 | 
 31 | def valid_date(s):
 32 |     try:
 33 |         return datetime.strptime(s, "%Y-%m-%d").strftime("%Y-%m-%d")
 34 |     except ValueError:
 35 |         msg = "Not a valid date: '{0}'.".format(s)
 36 |         raise argparse.ArgumentTypeError(msg)
 37 | 
 38 | 
 39 | parser = argparse.ArgumentParser(description=__doc__,
 40 |                                  formatter_class=argparse.RawTextHelpFormatter)
 41 | 
 42 | parser.add_argument('instance', help="RDS instance identifier")
 43 | parser.add_argument('--version', action='version',
 44 |                     version='%(prog)s {version}'.format(version=__version__))
 45 | 
 46 | parser.add_argument('-v', '--verbose', help="increase output verbosity",
 47 |                     action='store_true')
 48 | parser.add_argument('-d', '--date', help="get logs for given YYYY-MM-DD date",
 49 |                     type=valid_date)
 50 | parser.add_argument('--assume-role', help="AWS STS AssumeRole")
 51 | parser.add_argument('-r', '--region', help="AWS region")
 52 | parser.add_argument('-o', '--output', help="Output folder for logs and report",
 53 |                     default='out')
 54 | parser.add_argument('-n', '--no-process', help="Only download logs",
 55 |                     action='store_true')
 56 | parser.add_argument('-X', '--pgbadger-args', help="pgbadger arguments",
 57 |                     default='')
 58 | parser.add_argument('-f', '--format', help="Format of the report",
 59 |                     choices=['text', 'html', 'bin', 'json', 'tsung'],
 60 |                     default='html')
 61 | 
 62 | logger = logging.getLogger("rds-pgbadger")
 63 | 
 64 | 
 65 | def define_logger(verbose=False):
 66 |     logger = logging.getLogger("rds-pgbadger")
 67 |     if verbose:
 68 |         logger.setLevel(logging.DEBUG)
 69 |     else:
 70 |         logger.setLevel(logging.INFO)
 71 |     logFormatter = logging.Formatter("%(asctime)s :: %(levelname)s :: "
 72 |                                      "%(message)s")
 73 |     consoleHandler = logging.StreamHandler()
 74 |     consoleHandler.setFormatter(logFormatter)
 75 |     logger.addHandler(consoleHandler)
 76 | 
 77 | 
 78 | def get_all_logs(dbinstance_id, output,
 79 |                  date=None, region=None, assume_role=None):
 80 | 
 81 |     boto_args = {}
 82 |     if region:
 83 |         boto_args['region_name'] = region
 84 | 
 85 |     if assume_role:
 86 |         sts_client = boto3.client('sts')
 87 |         assumedRoleObject = sts_client.assume_role(
 88 |                 RoleArn=assume_role,
 89 |                 RoleSessionName="RDSPGBadgerSession1"
 90 |         )
 91 | 
 92 |         credentials = assumedRoleObject['Credentials']
 93 |         boto_args['aws_access_key_id'] = credentials['AccessKeyId']
 94 |         boto_args['aws_secret_access_key'] = credentials['SecretAccessKey']
 95 |         boto_args['aws_session_token'] = credentials['SessionToken']
 96 |         logger.info('STS Assumed role %s', assume_role)
 97 | 
 98 |     client = boto3.client("rds", **boto_args)
 99 |     paginator = client.get_paginator("describe_db_log_files")
100 |     response_iterator = paginator.paginate(
101 |         DBInstanceIdentifier=dbinstance_id,
102 |         FilenameContains="postgresql.log"
103 |     )
104 | 
105 |     for response in response_iterator:
106 |         for log in (name for name in response.get("DescribeDBLogFiles")
107 |                     if not date or date in name["LogFileName"]):
108 |             filename = "{}/{}".format(output, log["LogFileName"])
109 |             logger.info("Downloading file %s", filename)
110 |             try:
111 |                 os.remove(filename)
112 |             except OSError:
113 |                 pass
114 |             write_log(client, dbinstance_id, filename, log["LogFileName"])
115 | 
116 | 
117 | def write_log(client, dbinstance_id, filename, logfilename):
118 |     marker = "0"
119 |     initial_max_number_of_lines = 10000
120 |     max_number_of_lines = initial_max_number_of_lines
121 |     truncated_string = " [Your log message was truncated]"
122 |     slice_length = len(truncated_string) + 1
123 | 
124 |     response = client.download_db_log_file_portion(
125 |         DBInstanceIdentifier=dbinstance_id,
126 |         LogFileName=logfilename,
127 |         Marker=marker,
128 |         NumberOfLines=max_number_of_lines
129 |     )
130 | 
131 |     while True:
132 |         if not os.path.exists(os.path.dirname(filename)):
133 |             try:
134 |                 os.makedirs(os.path.dirname(filename))
135 |             except OSError as exc:  # Guard against race condition
136 |                 if exc.errno != errno.EEXIST:
137 |                     raise
138 |         with open(filename, "a") as logfile:
139 |             if 'LogFileData' in response:
140 |                 if truncated_string in response["LogFileData"][-slice_length:]:
141 |                     downloaded_lines = response["LogFileData"].count("\n")
142 |                     if downloaded_lines == 0:
143 |                         raise Exception(
144 |                             "No line was downloaded in last portion!")
145 |                     max_number_of_lines = max(int(downloaded_lines / 2), 1)
146 |                     logger.info("Log truncated, retrying portion with "
147 |                                 "NumberOfLines = {0}".format(
148 |                                     max_number_of_lines))
149 |                 else:
150 |                     max_number_of_lines = initial_max_number_of_lines
151 |                     marker = response["Marker"]
152 |                     logfile.write(response["LogFileData"])
153 | 
154 |         if ('LogFileData' in response and
155 |                 not response["LogFileData"].rstrip("\n") and
156 |                 not response["AdditionalDataPending"]):
157 |             break
158 | 
159 |         response = client.download_db_log_file_portion(
160 |             DBInstanceIdentifier=dbinstance_id,
161 |             LogFileName=logfilename,
162 |             Marker=marker,
163 |             NumberOfLines=max_number_of_lines
164 |         )
165 | 
166 | 
167 | def main():
168 |     args = parser.parse_args()
169 |     define_logger(args.verbose)
170 | 
171 |     if args.date:
172 |         logger.info("Getting logs from %s", args.date)
173 |     else:
174 |         logger.info("Getting all logs")
175 | 
176 |     pgbadger = which("pgbadger")
177 |     if not pgbadger:
178 |         raise Exception("pgbadger not found")
179 |     logger.debug("pgbadger found")
180 | 
181 |     try:
182 |         get_all_logs(
183 |                 args.instance,
184 |                 args.output,
185 |                 date=args.date,
186 |                 region=args.region,
187 |                 assume_role=args.assume_role
188 |             )
189 |     except (EndpointConnectionError, ClientError) as e:
190 |         logger.error(e)
191 |         exit(1)
192 |     except NoRegionError:
193 |         logger.error("No region provided")
194 |         exit(1)
195 |     except NoCredentialsError:
196 |         logger.error("Missing credentials")
197 |         exit(1)
198 |     except PartialCredentialsError:
199 |         logger.error("Partial credentials, please check your credentials file")
200 |         exit(1)
201 | 
202 |     if args.no_process:
203 |         logger.info("File(s) downloaded. Not processing with PG Badger.")
204 |     else:
205 |         logger.info("Generating PG Badger report.")
206 |         command = ("{} -p \"%t:%r:%u@%d:[%p]:\" {} -o {}/report.{} "
207 |                    "{}/error/*.log.* ".format(pgbadger,
208 |                                               args.pgbadger_args,
209 |                                               args.output,
210 |                                               args.format,
211 |                                               args.output))
212 |         logger.debug("Command: %s", command)
213 |         subprocess.call(command, shell=True)
214 |         logger.info("Done")
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     main()
219 | 


--------------------------------------------------------------------------------