├── .gitignore
├── .idea
    └── inspectionProfiles
    │   └── Project_Default.xml
├── .travis.yml
├── CONTRIBUTORS.md
├── LICENSE
├── MANIFEST.in
├── README.rst
├── _config.yml
├── awsdbrparser
    ├── __init__.py
    ├── cli.py
    ├── config.py
    ├── data
    │   ├── dbr_doctype_es2x.json
    │   └── dbr_doctype_es6x.json
    ├── parser.py
    └── utils.py
├── job.sh
├── requirements
    └── base.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    └── test_cli.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.__pycache__
 3 | 
 4 | dist/
 5 | build/
 6 | *.egg-info/
 7 | 
 8 | .cache/
 9 | .tox/
10 | .coverage
11 | 
12 | .idea/
13 | *.iml
14 | 


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
 5 |       <option name="ourVersions">
 6 |         <value>
 7 |           <list size="2">
 8 |             <item index="0" class="java.lang.String" itemvalue="2.7" />
 9 |             <item index="1" class="java.lang.String" itemvalue="3.6" />
10 |           </list>
11 |         </value>
12 |       </option>
13 |     </inspection_tool>
14 |   </profile>
15 | </component>


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: python
3 | python:
4 |   - "2.7"
5 |   - "3.6"
6 | install: pip install tox-travis
7 | script: tox
8 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Contributors
 3 | 
 4 | People who have contributed code and ideas to this project.
 5 | 
 6 | Rafael M. Koike `koiker@amazon.com`
 7 | Henri Yandell `hyandell`
 8 | Daniel Gonçalves `daniel@base4.com.br`
 9 | Mathieu Guillaume `mg@nuxeo.com`
10 | 
11 | Thank you guys!
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.rst
3 | include requirements/base.txt
4 | include awsdbrparser/data/*.json
5 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://travis-ci.org/awslabs/aws-detailed-billing-parser.svg?branch=master
  2 |     :target: https://travis-ci.org/awslabs/aws-detailed-billing-parser
  3 | 
  4 | 
  5 | AWS DBR parser
  6 | ==============
  7 | 
  8 |     Author: Rafael M. Koike
  9 | 
 10 |     AWS ProServe
 11 | 
 12 | This script was created to support automatic parsing of Detailed Billing
 13 | Records (DBR) to JSON format and send these documents directly to
 14 | ElasticSearch or save it in a JSON file. It’s based on `AWS boto3`_,
 15 | `Elasticsearch`_ Python API and `click`_ CLI creation kit.
 16 | 
 17 | Installation Instructions
 18 | ------------------------
 19 | 
 20 | This project isn’t on PyPI yet. For installation you will need to clone
 21 | this repository and use ``setup.py`` script to install it. For
 22 | requirements see the ``requirements/base.txt`` file. Clone this
 23 | repository and install using plain python and the ``setup.py`` script.
 24 | For example:
 25 | 
 26 | -  This option just install the dbrparser but maybe you can use the
 27 |    job.sh from the repository to schedule the process in your cronjob
 28 | 
 29 | ::
 30 | 
 31 |     $ pip install git+https://github.com/awslabs/aws-detailed-billing-parser.git
 32 | 
 33 | -  This option clone the repository from git to your instance and
 34 |    install
 35 | 
 36 | .. code:: bash
 37 | 
 38 |     $ git clone https://github.com/awslabs/aws-detailed-billing-parser.git
 39 |     $ cd aws-detailed-billing-parser
 40 |     $ python setup.py install
 41 | 
 42 | Executing
 43 | ---------
 44 | 
 45 | Once installed run ``dbrparser`` CLI with ``--help`` option:
 46 | 
 47 | .. code:: bash
 48 | 
 49 |     $ dbrparser --help
 50 | 
 51 | Running Tests
 52 | -------------
 53 | 
 54 | Tests still need to be written. But we have already introduced
 55 | `py.test`_, `tox`_ for test run automation and `flake8`_ to check
 56 | quality and style of the code. There are nice stubs for testing the CLI
 57 | command line. All you have to do is install **tox** and issue ``tox`` in
 58 | the command line.
 59 | 
 60 | TODO (Features to incorporate in the dbrparser)
 61 | -----------------------------------------------
 62 | 
 63 | -  Unzip (Extract the DBR from zip file);
 64 | -  S3 (Copy the source file from S3 bucket to local folder to process);
 65 | -  To be compatible with **AWS Lambda** the parser must run in max 5 min
 66 |    and depending on the size of the file this won’t be possible, so we
 67 |    will probably need to include a new option like, say ``--max-rows``
 68 |    and every call to lambda will process a maximum of ``10000`` rows for
 69 |    exemple. This may give us a previsibility that lambda will work in
 70 |    the correct timeframe;
 71 | -  Write more tests.
 72 | 
 73 | Changes
 74 | -------
 75 | Version 0.6.0 - 2019-03-26
 76 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 77 | 
 78 | - Bugfix: the last commits broke the compatibility with Elasticsearch 2.3
 79 | This version added the compatibility of ES 2.x and 6.x using 2 different document types
 80 | To use the solution with ES6.x you must add `--es6` in the command line and the index name will be just "billing"
 81 | The default is ES2.x to keep backward compatibility. (the index name will be: billing-<year>-<month>)
 82 | 
 83 | 
 84 | Version 0.5.4 - 2017-08-29
 85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 86 | 
 87 | - Bugfix: RI and Spot coverage was returning incorrect results with Python 2.7 due to the default integer result from /
 88 | 
 89 | Version 0.5.3 - 2017-08-28
 90 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 91 | 
 92 | - Refactoring: analytics() function has been refactored to reduce the memory usage and avoid problems with huge DBR files
 93 | 
 94 | Version 0.5.2 - 2017-08-24
 95 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
 96 | 
 97 | -  BugFix: Python 3 has deprecated the parameter encoding in the json library and some functions
 98 | still have them. Removed the encoding paramter solved the incompatibility
 99 | 
100 | 
101 | Version 0.5.1 - 2017-01-05
102 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
103 | 
104 | -  BugFix: Some running instances don't have the UsageType in the format: <Usage>:<InstanceType>
105 | Than we need to check if the UsageType has ':' and return InstanceType or N/A
106 | 
107 | 
108 | Version 0.5.0 - 2016-10-11
109 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
110 | 
111 | -  Included Dynamic Template to new document fields be Not Analyzed
112 | -  Included support to AWS Signed V4 requests. If you are running the
113 |    program from an EC2 instance or from a computer that has installed
114 |    aws cli and configured with the correct credentials you just need to
115 |    include\ ``--awsauth`` parameter
116 | -  Changed the split\_keys function to pre\_process and include extra
117 |    information based on the UsageType field Now you have:
118 | 
119 |    -  UsageItem with the options:
120 | 
121 |       -  On-Demand
122 |       -  Reserved Instance
123 |       -  Spot Instance
124 | 
125 |    -  InstanceType with only the instance name extracted from the
126 |       UsageType
127 | 
128 | Version 0.4.1 - 2016-08-31
129 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
130 | 
131 | -  Changed requirements to support newer versions of boto3. (Due to some
132 |    other softwares that need version 1.3.1 or higher, dbrparser is
133 |    conflicting with other softwares)
134 | 
135 | Version 0.4.1 - 2016-05-11
136 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
137 | 
138 | -  Bugfix of timeout when sending by bulk (Increased to 30 seconds)
139 | 
140 | Version 0.4.0 - 2016-03-27
141 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
142 | 
143 | -  Project was completely restructured in order to create a proper
144 |    Python package called ``awsdbrparser`` and the CLI name
145 |    ``dbrparser``;
146 | -  In the CLI side, ``argparse`` was dropped in favor of Armin
147 |    Ronacher’s ``click``, allowing a better and easier CLI
148 |    implementation;
149 | -  Introduced option ‘–quiet’ for those who intent to schedule DBR
150 |    parsing via cron jobs, for example;
151 | -  Introduced option ‘–fail-fast’ which will stop parsing execution in
152 |    case of an expected parse error or other component error;
153 | -  Dropped own implementation of progress bar in favor of click’s
154 |    progress bar, which includes a nice ETA (estimated time for
155 |    acomplishment) calculation;
156 | -  When used as a library, parser execution can be parametrized through
157 |    ``awsdbrparser.config.Config`` class instance.
158 | -  Entire code was reviewed to match PEP8 compliance (with fewer
159 |    exceptions) through ``flake8``.
160 | 
161 | Version 0.3 - 2016-02-12
162 | ~~~~~~~~~~~~~~~~~~~~~~~~
163 | 
164 | -  Added incremental updates with ``--check`` parameter (Now you can
165 |    update the same file to the index without need to delete the index
166 |    and reprocess the entirely file again);
167 | -  Compatible with Elasticsearch 2.1 and above (Removed the
168 |    ``_timestamp`` from mapping that has been deprecated from 2.0 and
169 |    above);
170 | -  Included elapsed time to evaluate the time to process the file.
171 | 
172 | Version 0.2 - 2015-10-26
173 | ~~~~~~~~~~~~~~~~~~~~~~~~
174 | 
175 | -  Filter of control messages (Stop the error in the end of processing);
176 | -  Verbose output of the processing;
177 | -  Progress bar;
178 | -  Output options (to file or directly to Elasticsearch);
179 | -  Elasticsearch mapping.
180 | 
181 | version 0.1 - 2015-10-17
182 | ~~~~~~~~~~~~~~~~~~~~~~~~
183 | 
184 | -  Initial version.
185 | 
186 | .. _AWS boto3: https://aws.amazon.com/pt/sdk-for-python/
187 | .. _Elasticsearch: https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/
188 | .. _click: http://click.pocoo.org/
189 | .. _py.test: http://pytest.org/
190 | .. _tox: https://testrun.org/tox/latest/
191 | .. _flake8: https://gitlab.com/pycqa/flake8
192 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-midnight


--------------------------------------------------------------------------------
/awsdbrparser/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # awsdbrparser/__init__.py
 4 | #
 5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | __author__ = "Rafael M. Koike"
21 | __email__ = "koiker@amazon.com"
22 | __date__ = "2019-03-26"
23 | __version__ = '0.6.0'
24 | 


--------------------------------------------------------------------------------
/awsdbrparser/cli.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # awsdbrparser/cli.py
  4 | #
  5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | import datetime
 20 | import os
 21 | import sys
 22 | import time
 23 | 
 24 | import click
 25 | 
 26 | from . import parser
 27 | from .config import BULK_SIZE
 28 | from .config import Config
 29 | from .config import ES_TIMEOUT
 30 | from .config import OUTPUT_OPTIONS
 31 | from .config import OUTPUT_TO_FILE
 32 | from .config import PROCESS_BY_LINE
 33 | from .config import PROCESS_OPTIONS
 34 | from .config import DEFAULT_ES2
 35 | from .utils import ClickEchoWrapper
 36 | from .utils import display_banner
 37 | from .utils import hints_for
 38 | from .utils import values_of
 39 | 
 40 | configure = click.make_pass_decorator(Config, ensure=True)
 41 | 
 42 | 
 43 | @click.command()
 44 | @click.option('-i', '--input', metavar='FILE', help='Input file (expected to be a CSV file).')
 45 | @click.option('-o', '--output', metavar='FILE', help='Output file (will generate a JSON file).')
 46 | @click.option('-e', '--es-host', metavar='HOST', help='Elasticsearch host name or IP address.')
 47 | @click.option('-p', '--es-port', type=int, metavar='PORT', help='Elasticsearch port number.')
 48 | @click.option('-to', '--es-timeout', type=int, default=ES_TIMEOUT, metavar='TIMEOUT',
 49 |               help='Elasticsearch connection Timeout.')
 50 | @click.option('-ei', '--es-index', metavar='INDEX', help='Elasticsearch index prefix.')
 51 | @click.option('-bi', '--analytics', is_flag=True, default=False,
 52 |               help='Execute analytics on file to generate extra-information')
 53 | @click.option('-a', '--account-id', help='AWS Account-ID.')
 54 | @click.option('-y', '--year', type=int, help='Year for the index (defaults to current year).')
 55 | @click.option('-m', '--month', type=int, help='Month for the index (defaults to current month).')
 56 | @click.option('-t', '--output-type', default=OUTPUT_TO_FILE,
 57 |               type=click.Choice(values_of(OUTPUT_OPTIONS)),
 58 |               help='Output type ({}, default is {}).'.format(hints_for(OUTPUT_OPTIONS), OUTPUT_TO_FILE))
 59 | @click.option('-d', '--csv-delimiter', help='CSV delimiter (default is comma).')
 60 | @click.option('--delete-index', is_flag=True, default=False,
 61 |               help='Delete current index before processing (default is keep).')
 62 | @click.option('-bm', '--process-mode', default=PROCESS_BY_LINE,
 63 |               type=click.Choice(values_of(PROCESS_OPTIONS)),
 64 |               help='Send DBR line-by-line or in bulk ({}, bulk mode implies sending '
 65 |                    'data to an Elasticsearch instance).'.format(hints_for(PROCESS_OPTIONS)))
 66 | @click.option('-bs', '--bulk-size', default=BULK_SIZE, metavar='BS',
 67 |               help='Define the size of bulk to send to (see --bulk-mode option).')
 68 | @click.option('-u', '--update', is_flag=True, default=False,
 69 |               help='Update existing documents in Elasticseaerch index before add (should be used with --check flag).')
 70 | @click.option('-c', '--check', is_flag=True, default=False,
 71 |               help='Check if current record exists in Elasticseaerch before add '
 72 |                    'new (this option will be ignored in bulk processing).')
 73 | @click.option('--awsauth', is_flag=True, default=False,
 74 |               help='Access the Elasticsearch with AWS Signed V4 Requests')
 75 | @click.option('--es2/--es6', default=DEFAULT_ES2, help='Define the Document Type to be ingested. Default is Elastic 2.x')
 76 | @click.option('-v', '--version', is_flag=True, default=False, help='Display version number and exit.')
 77 | @click.option('-q', '--quiet', is_flag=True, default=False, help='Runs as silently as possible.')
 78 | @click.option('--fail-fast', is_flag=True, default=False, help='Stop parsing on first index error.')
 79 | @click.option('--debug', is_flag=True, default=False, help='Print extra data even in quiet mode.')
 80 | @configure
 81 | def main(config, *args, **kwargs):
 82 |     """AWS - Detailed Billing Records parser"""
 83 | 
 84 |     quiet = kwargs.pop('quiet')
 85 |     version = kwargs.pop('version')
 86 | 
 87 |     echo = ClickEchoWrapper(quiet=quiet)
 88 |     display_banner(echo=echo)
 89 | 
 90 |     if version:
 91 |         return
 92 | 
 93 | 
 94 |     # tweak kwargs for expected config object attributes
 95 |     kwargs['input_filename'] = kwargs.pop('input', config.input_filename)
 96 |     kwargs['output_filename'] = kwargs.pop('output', config.output_filename)
 97 |     kwargs['es_year'] = kwargs.pop('year', config.es_year)
 98 |     kwargs['es_month'] = kwargs.pop('month', config.es_month)
 99 | 
100 |     config.update_from(**kwargs)
101 | 
102 |     if not os.path.isfile(config.input_filename):
103 |         sys.exit('Input file not found: {}'.format(config.input_filename))
104 | 
105 |     start = time.time()
106 |     parser.parse(config, verbose=(not quiet))
107 | 
108 |     elapsed_time = time.time() - start
109 |     echo('Elapsed time: {}'.format(datetime.timedelta(seconds=elapsed_time)))
110 | 


--------------------------------------------------------------------------------
/awsdbrparser/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # awsdbrparser/config.py
  4 | #
  5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | import os
 21 | import json
 22 | 
 23 | from datetime import datetime
 24 | 
 25 | OUTPUT_TO_FILE = '1'
 26 | OUTPUT_TO_ELASTICSEARCH = '2'
 27 | 
 28 | OUTPUT_OPTIONS = (
 29 |     (OUTPUT_TO_FILE, 'Output to File'),
 30 |     (OUTPUT_TO_ELASTICSEARCH, 'Output to Elasticsearch'),)
 31 | 
 32 | PROCESS_BY_LINE = '1'
 33 | PROCESS_BY_BULK = '2'
 34 | PROCESS_BI_ONLY = '3'
 35 | 
 36 | PROCESS_OPTIONS = (
 37 |     (PROCESS_BY_LINE, 'Process by Line'),
 38 |     (PROCESS_BY_BULK, 'Process in Bulk'),
 39 |     (PROCESS_BI_ONLY, 'Process BI Only'))
 40 | 
 41 | BULK_SIZE = 1000
 42 | ES_TIMEOUT = 30
 43 | 
 44 | DEFAULT_ES2 = True
 45 | DATA_PATH = 'data'
 46 | DOCTYPE_FILES = {
 47 |     '2': 'dbr_doctype_es2x.json',
 48 |     '6': 'dbr_doctype_es6x.json'
 49 | }
 50 | 
 51 | """
 52 | DBR document properties for actual document type.
 53 | See :attr:`Config.es_doctype` and :attr:`Config.mapping` for details.
 54 | """
 55 | 
 56 | 
 57 | class Config(object):
 58 |     def __init__(self):
 59 |         today = datetime.today()
 60 | 
 61 |         # elasticsearch default values
 62 |         self.es_host = 'search-name-hash.region.es.amazonaws.com'
 63 |         self.es_port = 80
 64 |         self.es_index = 'billing'
 65 |         self.es_doctype = 'billing'
 66 |         self.es_year = today.year
 67 |         self.es_month = today.month
 68 |         self.es_timestamp = 'UsageStartDate'  # fieldname that will be replaced by Timestamp
 69 |         self.es_timeout = ES_TIMEOUT
 70 | 
 71 |         # aws account id
 72 |         self.account_id = '01234567890'
 73 | 
 74 |         # encoding (this is the default encoding for most files, but if
 75 |         # customer uses latin/spanish characters you may to change
 76 |         # self.encoding = 'iso-8859-1'
 77 |         self.encoding = 'utf-8'
 78 | 
 79 |         # update flag (if True update existing documents in Elasticsearch index;
 80 |         # defaults to False for performance reasons)
 81 |         self.update = False
 82 | 
 83 |         # check flag (check if current record exists before add new -- for
 84 |         # incremental updates)
 85 |         self.check = False
 86 | 
 87 |         # Use AWS Signed requests to access the Elasticsearch
 88 |         self.awsauth = False
 89 | 
 90 |         # Run Business Intelligence on the line items
 91 |         self.analytics = False
 92 | 
 93 |         # Time to wait for the analytics process. Default is 30 minutes
 94 |         self.analytics_timeout = 30
 95 | 
 96 |         # Run Business Intelligence Only
 97 |         self.bi_only = False
 98 | 
 99 |         # delete index flag indicates whether or not the current elasticsearch
100 |         # should be kept or deleted
101 |         self.delete_index = False
102 | 
103 |         # debug flag (will force print some extra data even in quiet mode)
104 |         self.debug = False
105 | 
106 |         # fail fast flag (if True stop parsing on first index error)
107 |         self.fail_fast = False
108 | 
109 |         # input and output filenames
110 |         self._input_filename = None
111 |         self._output_filename = None
112 | 
113 |         # other defaults
114 |         self.csv_delimiter = ','
115 |         self._output_type = OUTPUT_TO_FILE
116 |         self._bulk_mode = PROCESS_BY_LINE
117 |         self.bulk_size = BULK_SIZE
118 |         self.bulk_msg = {
119 |             "RecordType": [
120 |                 "StatementTotal",
121 |                 "InvoiceTotal",
122 |                 "Rounding",
123 |                 "AccountTotal"]}
124 |         self._es2 = False
125 |         self._doctype = None
126 | 
127 |     @property
128 |     def mapping(self):
129 |         return {self.es_doctype: self.doctype}
130 | 
131 |     @property
132 |     def output_type(self):
133 |         return self._output_type
134 | 
135 |     @output_type.setter
136 |     def output_type(self, value):
137 |         if value not in (v for v, s in OUTPUT_OPTIONS):
138 |             raise ValueError('Invalid output type value: {!r}'.format(value))
139 |         self._output_type = value
140 | 
141 |     @property
142 |     def output_to_file(self):
143 |         return self.output_type == OUTPUT_TO_FILE
144 | 
145 |     @property
146 |     def output_to_elasticsearch(self):
147 |         return self.output_type == OUTPUT_TO_ELASTICSEARCH
148 | 
149 |     @property
150 |     def process_mode(self):
151 |         return self._bulk_mode
152 | 
153 |     @process_mode.setter
154 |     def process_mode(self, value):
155 |         if value not in (v for v, s in PROCESS_OPTIONS):
156 |             raise ValueError('Invalid bulk mode value: {!r}'.format(value))
157 |         self._bulk_mode = value
158 | 
159 |     @property
160 |     def input_filename(self):
161 |         return self._input_filename or self._sugest_filename('.csv')
162 | 
163 |     @input_filename.setter
164 |     def input_filename(self, value):
165 |         self._input_filename = value
166 | 
167 |     @property
168 |     def output_filename(self):
169 |         return self._output_filename or self._sugest_filename('.json')
170 | 
171 | 
172 |     @property
173 |     def doctype(self):
174 |         return self._doctype
175 | 
176 |     @doctype.setter
177 |     def doctype(self, version):
178 |         if not version in ('2', '6'):
179 |             raise Exception('Invalid document type version.')
180 | 
181 |         try:
182 |             filename = os.path.join(os.path.dirname(__file__), DATA_PATH, DOCTYPE_FILES[version])
183 |             self._doctype = json.load(open(filename))
184 |         except IOError:
185 |             print('Unable to load Elastic Search Doctype')
186 |             raise
187 | 
188 |     @property
189 |     def es2(self):
190 |         return self._es2
191 | 
192 |     @es2.setter
193 |     def es2(self, is_es2x):
194 |         self._es2 = is_es2x
195 |         if self._es2:
196 |             version = '2'
197 |         else:
198 |             version = '6'
199 |         self.doctype = version
200 | 
201 |     @property
202 |     def index_name(self):
203 |         if self.es2:
204 |             # if using Elasticsearch 2.x the index is composed by <index-name>-<year>-<month>
205 |             return '{}-{:d}-{:02d}'.format(self.es_index, self.es_year, self.es_month)
206 |         else:
207 |             # if using Elasticsearch 6.x the index is just the prefix <index-name>
208 |             return self.es_index
209 | 
210 | 
211 |     @output_filename.setter
212 |     def output_filename(self, value):
213 |         self._output_filename = value
214 | 
215 |     def update_from(self, **kwargs):
216 |         for attr, value in kwargs.items():
217 |             if value is None:
218 |                 # simply ignore None values
219 |                 continue
220 |             if hasattr(self, attr):
221 |                 setattr(self, attr, value)
222 |             else:
223 |                 raise AttributeError('{!r} object has no attribute {!r}'.format(
224 |                     self.__class__.__name__, attr))
225 | 
226 |     def _sugest_filename(self, extension):
227 |         return '{}-aws-billing-detailed-line-items-with-' \
228 |                'resources-and-tags-{:04d}-{:02d}{}'.format(self.account_id, self.es_year, self.es_month, extension)
229 | 


--------------------------------------------------------------------------------
/awsdbrparser/data/dbr_doctype_es2x.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "properties": {
 3 |         "LinkedAccountId": {"type": "string"},
 4 |         "InvoiceID": {"type": "string", "index": "not_analyzed"},
 5 |         "RecordType": {"type": "string"},
 6 |         "RecordId": {"type": "string", "index": "not_analyzed"},
 7 |         "UsageType": {"type": "string", "index": "not_analyzed"},
 8 |         "UsageEndDate": {"type": "date", "format": "YYYY-MM-dd HH:mm:ss"},
 9 |         "ItemDescription": {"type": "string", "index": "not_analyzed"},
10 |         "ProductName": {"type": "string", "index": "not_analyzed"},
11 |         "RateId": {"type": "string"},
12 |         "Rate": {"type": "float"},
13 |         "AvailabilityZone": {"type": "string", "index": "not_analyzed"},
14 |         "PricingPlanId": {"type": "string", "index": "not_analyzed"},
15 |         "ResourceId": {"type": "string", "index": "not_analyzed"},
16 |         "Cost": {"type": "float"},
17 |         "PayerAccountId": {"type": "string", "index": "not_analyzed"},
18 |         "SubscriptionId": {"type": "string", "index": "not_analyzed"},
19 |         "UsageQuantity": {"type": "float"},
20 |         "Operation": {"type": "string"},
21 |         "ReservedInstance": {"type": "string", "index": "not_analyzed"},
22 |         "UsageStartDate": {"type": "date", "format": "YYYY-MM-dd HH:mm:ss"},
23 |         "BlendedCost": {"type": "float"},
24 |         "BlendedRate": {"type": "float"},
25 |         "UnBlendedCost": {"type": "float"},
26 |         "UnBlendedRate": {"type": "float"}
27 |     }, "dynamic_templates": [
28 |         {
29 |             "notanalyzed": {
30 |                 "match": "*",
31 |                 "match_mapping_type": "string",
32 |                 "mapping": {
33 |                     "type": "string",
34 |                     "index": "not_analyzed"
35 |                 }
36 |             }
37 |         }
38 |     ]
39 | }


--------------------------------------------------------------------------------
/awsdbrparser/data/dbr_doctype_es6x.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "properties": {
 3 |         "LinkedAccountId": {"type": "keyword"},
 4 |         "InvoiceID": {"type": "keyword"},
 5 |         "RecordType": {"type": "keyword"},
 6 |         "RecordId": {"type": "keyword"},
 7 |         "UsageType": {"type": "keyword"},
 8 |         "UsageEndDate": {"type": "date", "format": "YYYY-MM-dd HH:mm:ss"},
 9 |         "ItemDescription": {"type": "keyword"},
10 |         "ProductName": {"type": "keyword"},
11 |         "RateId": {"type": "keyword"},
12 |         "Rate": {"type": "float"},
13 |         "AvailabilityZone": {"type": "keyword"},
14 |         "PricingPlanId": {"type": "keyword"},
15 |         "ResourceId": {"type": "keyword"},
16 |         "Cost": {"type": "float"},
17 |         "PayerAccountId": {"type": "keyword"},
18 |         "SubscriptionId": {"type": "keyword"},
19 |         "UsageQuantity": {"type": "float"},
20 |         "Operation": {"type": "keyword"},
21 |         "ReservedInstance": {"type": "keyword"},
22 |         "UsageStartDate": {"type": "date", "format": "YYYY-MM-dd HH:mm:ss"},
23 |         "BlendedCost": {"type": "float"},
24 |         "BlendedRate": {"type": "float"},
25 |         "UnBlendedCost": {"type": "float"},
26 |         "UnBlendedRate": {"type": "float"}
27 |     }, "dynamic_templates": [
28 |         {
29 |             "notanalyzed": {
30 |                 "match": "*",
31 |                 "match_mapping_type": "string",
32 |                 "mapping": {
33 |                     "type": "string",
34 |                     "index": "not_analyzed"
35 |                 }
36 |             }
37 |         }
38 |     ]
39 | }


--------------------------------------------------------------------------------
/awsdbrparser/parser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # awsdbrparser/parser.py
  4 | #
  5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | from __future__ import print_function
 20 | 
 21 | import collections
 22 | import csv
 23 | import json
 24 | import threading
 25 | import time
 26 | 
 27 | import boto3
 28 | import click
 29 | from elasticsearch import Elasticsearch, RequestsHttpConnection, helpers
 30 | from requests_aws4auth import AWS4Auth
 31 | 
 32 | from . import utils
 33 | from .config import PROCESS_BY_BULK, PROCESS_BY_LINE, PROCESS_BI_ONLY
 34 | 
 35 | Summary = collections.namedtuple('Summary', 'added skipped updated control_messages')
 36 | """
 37 | Holds the summary of documents processed by the parser.
 38 | """
 39 | 
 40 | 
 41 | class ParserError(Exception):
 42 |     pass
 43 | 
 44 | 
 45 | def analytics(config, echo):
 46 |     """
 47 |     This function generate extra information in elasticsearch analyzing the line items of the file
 48 |     :param echo:
 49 |     :param config:
 50 |     :return:
 51 |     """
 52 | 
 53 |     # Opening Input filename again to run in parallel
 54 |     file_in = open(config.input_filename, 'r')
 55 |     awsauth = None
 56 |     if config.awsauth:
 57 |         session = boto3.Session()
 58 |         credentials = session.get_credentials()
 59 |         if credentials:
 60 |             region = session.region_name
 61 |             awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es',
 62 |                                session_token=credentials.token)
 63 | 
 64 |     es = Elasticsearch([{'host': config.es_host, 'port': config.es_port}], timeout=config.es_timeout, http_auth=awsauth,
 65 |                        connection_class=RequestsHttpConnection)
 66 |     es.indices.create(config.index_name, ignore=400)
 67 |     es.indices.create(config.es_doctype, ignore=400)
 68 | 
 69 |     csv_file = csv.DictReader(file_in, delimiter=config.csv_delimiter)
 70 |     analytics_daytime = dict()
 71 |     analytics_day_only = dict()
 72 |     for recno, json_row in enumerate(csv_file):
 73 |         # Pre-Process the row to append extra information
 74 |         json_row = utils.pre_process(json_row)
 75 |         if is_control_message(json_row, config):
 76 |             # Skip this line
 77 |             continue
 78 |         elif json_row.get('ProductName') == 'Amazon Elastic Compute Cloud' and 'RunInstances' in json_row.get(
 79 |                 'Operation') and json_row.get('UsageItem'):
 80 |             # Get the day time ('2016-03-01 01:00:00')
 81 |             daytime = json_row.get('UsageStartDate')
 82 |             # the day only '2016-03-01'
 83 |             day = json_row.get('UsageStartDate').split(' ')[0]
 84 |             # Add the day time to the dict
 85 |             analytics_daytime.setdefault(daytime, {"Count": 0, "Cost": 0.00, "RI": 0, "Spot": 0, "Unblended": 0.00})
 86 |             # Increment the count of total instances
 87 |             analytics_daytime[daytime]["Count"] += 1
 88 |             analytics_daytime[daytime]["Unblended"] += float(json_row.get('UnBlendedCost', 0.00))
 89 |             analytics_daytime[daytime]["Cost"] += float(json_row.get('Cost', 0.00))
 90 | 
 91 |             # Add the day only to the dict
 92 |             analytics_day_only.setdefault(day, {"Count": 0, "RI": 0, "Spot": 0, "Min": None, "Max": None})
 93 |             analytics_day_only[day]["Count"] += 1
 94 |             # Increment the count of RI or Spot if the instance is one or other
 95 |             if json_row.get('UsageItem') == 'Reserved Instance':
 96 |                 analytics_day_only[day]["RI"] += 1
 97 |                 analytics_daytime[daytime]["RI"] += 1
 98 |             elif json_row.get('UsageItem') == 'Spot Instance':
 99 |                 analytics_day_only[day]["Spot"] += 1
100 |                 analytics_daytime[daytime]["Spot"] += 1
101 | 
102 |     # Some DBR files has Cost (Single Account) and some has (Un)BlendedCost (Consolidated Account)
103 |     # In this case we try to process both, but one will be zero and we need to check
104 |     # TODO: use a single variable and an flag to output Cost or Unblended
105 |     if config.es2:
106 |         index_name = config.index_name
107 |     else:
108 |         index_name = 'ec2_per_usd'
109 |     if not es.indices.exists(index=index_name):
110 |         es.indices.create(index_name, ignore=400, body={
111 |             "mappings": {
112 |                 "ec2_per_usd": {
113 |                     "properties": {
114 |                         "UsageStartDate" : {"type": "date", "format": "YYYY-MM-dd HH:mm:ss"}
115 |                     }
116 |                 }
117 |             }
118 |         })
119 |     for k, v in analytics_daytime.items():
120 |         result_cost = 1.0 / (v.get('Cost') / v.get('Count')) if v.get('Cost') else 0.00
121 |         result_unblended = 1.0 / (v.get('Unblended') / v.get('Count')) if v.get('Unblended') else 0.0
122 |         response = es.index(index=index_name, doc_type='ec2_per_usd',
123 |                             body={'UsageStartDate': k,
124 |                                   'EPU_Cost': result_cost,
125 |                                   'EPU_UnBlended': result_unblended})
126 |         if not response.get('created'):
127 |             echo('[!] Unable to send document to ES!')
128 | 
129 |     # Elasticity
130 |     #
131 |     # The calculation is 1 - min / max EC2 instances per day
132 |     # The number of EC2 instances has been calculated previously
133 |     #
134 |     if config.es2:
135 |         index_name = config.index_name
136 |     else:
137 |         index_name = 'elasticity'
138 |     if not es.indices.exists(index=index_name):
139 |         es.indices.create(index_name, ignore=400, body={
140 |             "mappings": {
141 |                 "elasticity": {
142 |                     "properties": {
143 |                         "UsageStartDate" : {"type": "date", "format": "YYYY-MM-dd HH:mm:ss"}
144 |                     }
145 |                 }
146 |             }
147 |         })
148 |     for k, v in analytics_day_only.items():
149 |         ec2_min = min(value["Count"] - value["RI"] for key, value in analytics_daytime.items() if k in key)
150 |         ec2_max = max(value["Count"] - value["RI"] for key, value in analytics_daytime.items() if k in key)
151 |         if ec2_max:
152 |             elasticity = 1.0 - float(ec2_min) / float(ec2_max)
153 |         else:
154 |             elasticity = 1.0
155 | 
156 |         ri_coverage = float(analytics_day_only[k]["RI"]) / float(analytics_day_only[k]["Count"])
157 |         spot_coverage = float(analytics_day_only[k]["Spot"]) / float(analytics_day_only[k]["Count"])
158 | 
159 | 
160 |         response = es.index(index=index_name, doc_type='elasticity',
161 |                             body={'UsageStartDate': k + ' 12:00:00',
162 |                                   'Elasticity': elasticity,
163 |                                   'ReservedInstanceCoverage': ri_coverage,
164 |                                   'SpotCoverage': spot_coverage})
165 | 
166 |         if not response.get('created'):
167 |             echo('[!] Unable to send document to ES!')
168 | 
169 |     file_in.close()
170 |     # Finished Processing
171 |     return
172 | 
173 | 
174 | def parse(config, verbose=False):
175 |     """
176 | 
177 |     :param verbose:
178 |     :param config: An instance of :class:`~awsdbrparser.config.Config` class,
179 |         used for parsing parametrization.
180 | 
181 |     :rtype: Summary
182 |     """
183 |     echo = utils.ClickEchoWrapper(quiet=(not verbose))
184 | 
185 | 
186 |     echo('Opening input file: {}'.format(config.input_filename))
187 |     file_in = open(config.input_filename, 'r')
188 | 
189 |     if config.output_to_file:
190 |         echo('Opening output file: {}'.format(config.output_filename))
191 |         file_out = open(config.output_filename, 'w')
192 | 
193 |     elif config.output_to_elasticsearch:
194 |         echo('Sending DBR to Elasticsearch host: {}:{}'.format(config.es_host, config.es_port))
195 |         awsauth = None
196 |         if config.awsauth:
197 |             session = boto3.Session()
198 |             credentials = session.get_credentials()
199 |             if credentials:
200 |                 region = session.region_name
201 |                 awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es',
202 |                                    session_token=credentials.token)
203 | 
204 |         es = Elasticsearch([{'host': config.es_host, 'port': config.es_port}], timeout=config.es_timeout,
205 |                            http_auth=awsauth, connection_class=RequestsHttpConnection)
206 |         if config.delete_index:
207 |             echo('Deleting current index: {}'.format(config.index_name))
208 |             es.indices.delete(config.index_name, ignore=404)
209 |         es.indices.create(config.index_name, ignore=400)
210 |         es.indices.put_mapping(index=config.index_name, doc_type=config.es_doctype, body=config.mapping)
211 | 
212 |     if verbose:
213 |         progressbar = click.progressbar
214 | 
215 |         # calculate number of rows in input file in preparation to display a progress bar
216 |         record_count = sum(1 for _ in file_in) - 1
217 |         file_in.seek(0)  # reset file descriptor
218 | 
219 |         echo("Input file has {} record(s)".format(record_count))
220 | 
221 |         if config.process_mode == PROCESS_BY_BULK:
222 |             echo('Processing in BULK MODE, size: {}'.format(config.bulk_size))
223 |         elif config.process_mode == PROCESS_BY_LINE:
224 |             echo('Processing in LINE MODE')
225 |         elif config.process_mode == PROCESS_BI_ONLY:
226 |             if config.analytics:
227 |                 echo('Processing BI Only')
228 |             else:
229 |                 echo("You don't have set the parameter -bi. Nothing to do.")
230 |     else:
231 |         # uses a 100% bug-free progressbar, guaranteed :-)
232 |         progressbar = utils.null_progressbar
233 |         record_count = 0
234 | 
235 |     # If BI is enabled, create a thread and start running
236 |     analytics_start = time.time()
237 |     if config.analytics:
238 |         echo('Starting the BI Analytics Thread')
239 |         thread = threading.Thread(target=analytics, args=(config, echo,))
240 |         thread.start()
241 | 
242 |     added = skipped = updated = control = 0
243 | 
244 |     if config.process_mode == PROCESS_BY_BULK:
245 |         with progressbar(length=record_count) as pbar:
246 |             # If you wish to sort the records by UsageStartDate before send to
247 |             # ES just uncomment the 2 lines below and comment the third line
248 |             # reader = csv.DictReader(file_in, delimiter=config.csv_delimiter)
249 |             # csv_file = sorted(reader, key=lambda line: line["UsageStartDate"]+line["UsageEndDate"])
250 |             csv_file = csv.DictReader(file_in, delimiter=config.csv_delimiter)
251 | 
252 |             def documents():
253 |                 for json_row in csv_file:
254 |                     if not is_control_message(json_row, config):
255 |                         if config.debug:
256 |                             print(json.dumps(  # do not use 'echo()' here
257 |                                 utils.pre_process(json_row)))
258 |                         yield json.dumps(utils.pre_process(json_row))
259 |                         pbar.update(1)
260 | 
261 |             for recno, (success, result) in enumerate(helpers.streaming_bulk(es, documents(),
262 |                                                                              index=config.index_name,
263 |                                                                              doc_type=config.es_doctype,
264 |                                                                              chunk_size=config.bulk_size)):
265 |                 # <recno> integer, the record number (0-based)
266 |                 # <success> bool
267 |                 # <result> a dictionary like this one:
268 |                 #
269 |                 #   {
270 |                 #       'create': {
271 |                 #           'status': 201,
272 |                 #           '_type': 'billing',
273 |                 #           '_shards': {
274 |                 #               'successful': 1,
275 |                 #               'failed': 0,
276 |                 #               'total': 2
277 |                 #           },
278 |                 #           '_index': 'billing-2015-12',
279 |                 #           '_version': 1,
280 |                 #           '_id': u'AVOmiEdSF_o3S6_4Qeur'
281 |                 #       }
282 |                 #   }
283 |                 #
284 |                 if not success:
285 |                     message = 'Failed to index record {:d} with result: {!r}'.format(recno, result)
286 |                     if config.fail_fast:
287 |                         raise ParserError(message)
288 |                     else:
289 |                         echo(message, err=True)
290 |                 else:
291 |                     added += 1
292 | 
293 |     elif config.process_mode == PROCESS_BY_LINE:
294 |         with progressbar(length=record_count) as pbar:
295 |             csv_file = csv.DictReader(file_in, delimiter=config.csv_delimiter)
296 |             for recno, json_row in enumerate(csv_file):
297 |                 if is_control_message(json_row, config):
298 |                     control += 1
299 |                 else:
300 |                     if config.debug:
301 |                         print(json.dumps(  # do not use 'echo()' here
302 |                             utils.pre_process(json_row),
303 |                             ensure_ascii=False))
304 | 
305 |                     if config.output_to_file:
306 |                         file_out.write(
307 |                             json.dumps(utils.pre_process(json_row), ensure_ascii=False))
308 |                         file_out.write('\n')
309 |                         added += 1
310 | 
311 |                     elif config.output_to_elasticsearch:
312 |                         if config.check:
313 |                             # FIXME: the way it was, `search_exists` will not suffice, since we'll need the document _id for the update operation; # noqa
314 |                             # FIXME: use `es.search` with the following sample body: `{'query': {'match': {'RecordId': '43347302922535274380046564'}}}`; # noqa
315 |                             # SEE: https://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.search; # noqa
316 |                             response = es.search_exists(index=config.es_doctype, doc_type=config.es_doctype,
317 |                                                         q='RecordId:{}'.format(json_row['RecordId']))
318 |                             if response:
319 |                                 if config.update:
320 |                                     # TODO: requires _id from the existing document
321 |                                     # FIXME: requires use of `es.search` method instead of `es.search_exists`
322 |                                     # SEE: https://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.update; # noqa
323 |                                     skipped += 1
324 |                                 else:
325 |                                     skipped += 1
326 |                             else:
327 |                                 response = es.index(index=config.es_doctype, doc_type=config.es_doctype,
328 |                                                     body=body_dump(json_row, config))
329 |                                 if not es_index_successful(response):
330 |                                     message = 'Failed to index record {:d} with result {!r}'.format(recno, response)
331 |                                     if config.fail_fast:
332 |                                         raise ParserError(message)
333 |                                     else:
334 |                                         echo(message, err=True)
335 |                                 else:
336 |                                     added += 1
337 |                         else:
338 |                             response = es.index(index=config.es_doctype, doc_type=config.es_doctype,
339 |                                                 body=body_dump(json_row, config))
340 |                             if not es_index_successful(response):
341 |                                 message = 'Failed to index record {:d} with result {!r}'.format(recno, response)
342 |                                 if config.fail_fast:
343 |                                     raise ParserError(message)
344 |                                 else:
345 |                                     echo(message, err=True)
346 |                             else:
347 |                                 added += 1
348 | 
349 |                 pbar.update(1)
350 |     elif config.process_mode == PROCESS_BI_ONLY and config.analytics:
351 |         echo('Processing Analytics Only')
352 |         while thread.is_alive():
353 |             # Wait for a timeout
354 |             analytics_now = time.time()
355 |             if analytics_start - analytics_now > config.analytics_timeout * 60:
356 |                 echo('Analytics processing timeout. exiting')
357 |                 break
358 |             time.sleep(5)
359 | 
360 |     else:
361 |         echo('Nothing to do!')
362 | 
363 |     file_in.close()
364 | 
365 |     if config.output_to_file:
366 |         file_out.close()
367 | 
368 |     echo('Finished processing!')
369 |     echo('')
370 | 
371 |     # the first line is the header then is skipped by the count bellow
372 |     echo('Summary of documents processed...')
373 |     echo('           Added: {}'.format(added))
374 |     echo('         Skipped: {}'.format(skipped))
375 |     echo('         Updated: {}'.format(updated))
376 |     echo('Control messages: {}'.format(control))
377 |     echo('')
378 | 
379 |     return Summary(added, skipped, updated, control)
380 | 
381 | 
382 | def is_control_message(record, config):
383 |     # <record> record dict
384 |     # <config> an instance of `awsdbrparser.config.Config`
385 |     # data = json.dumps(record, ensure_ascii=False)
386 |     return utils.bulk_data(record, config.bulk_msg)
387 | 
388 | 
389 | def body_dump(record, config):
390 |     # <record> record dict
391 |     # <config> an instance of `awsdbrparser.config.Config`
392 |     body = json.dumps(utils.pre_process(record), ensure_ascii=False)
393 |     return body
394 | 
395 | 
396 | def es_index_successful(response):
397 |     """
398 |     Test if an Elasticsearch client ``index`` method response indicates a
399 |     successful index operation. The response parameter should be a dictionary
400 |     with following keys:
401 | 
402 |     .. sourcecode:: python
403 | 
404 |         {
405 |             '_shards': {
406 |                 'total': 2,
407 |                 'failed': 0,
408 |                 'successful': 1
409 |             },
410 |             '_index': u'billing-2015-12',
411 |             '_type': u'billing',
412 |             '_id': u'AVOmKFXgF_o3S6_4PkP1',
413 |             '_version': 1,
414 |             'created': True
415 |         }
416 | 
417 |     According to `Elasticsearch Index API <https://www.elastic.co/guide/en/
418 |     elasticsearch/reference/current/docs-index_.html>`, an index operation is
419 |     successful in the case ``successful`` is at least 1.
420 | 
421 |     :rtype: bool
422 |     """
423 |     return response.get('_shards', {}).get('successful', 0) >= 1
424 | 


--------------------------------------------------------------------------------
/awsdbrparser/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # awsdbrparser/utils.py
  4 | #
  5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | import contextlib
 20 | 
 21 | import click
 22 | 
 23 | from . import __version__
 24 | 
 25 | 
 26 | def pre_process(json_dict):
 27 |     """
 28 |     Find json keys like '{"key:subkey": "value"}' and replaces
 29 |     with '{"key" : {"subkey" : "value"}}'.
 30 | 
 31 |     Get items that are EC2 running and evaluate if the instance is:
 32 |     * On-Demand
 33 |     * Reserved Instance
 34 |     * Spot
 35 |     The result is included in the field: UsageItem
 36 | 
 37 |     The instance size is included in the new field: InstanceType
 38 | 
 39 |     :param dict json_dict:
 40 |     :returns: json dict
 41 |     :rtype: dict
 42 |     """
 43 |     temp_json = dict()
 44 |     for key, value in json_dict.items():
 45 |         if ':' in key:
 46 |             # This key has COLON, let's try to split this key in key/subkey
 47 |             new_key, subkey = key.split(':', 1)
 48 |             temp_json.setdefault(new_key, {}).setdefault(subkey, value)
 49 |         else:
 50 |             temp_json.setdefault(key, value)
 51 | 
 52 |     temp_json['UsageItem'] = ''
 53 | 
 54 |     if temp_json.get('ProductName') == 'Amazon Elastic Compute Cloud' and 'RunInstances' in temp_json.get('Operation'):
 55 |         # Some lineitems contain strings like: "RunInstances:002".
 56 |         if temp_json.get('ReservedInstance', '') == 'Y':
 57 |             temp_json['UsageItem'] = 'Reserved Instance'
 58 | 
 59 |         elif 'BoxUsage' in temp_json.get('UsageType', ' '):
 60 |             # If this LineItem is a EC2 instance running we include 'EC2-Running'
 61 |             temp_json['UsageItem'] = 'On-Demand'
 62 | 
 63 |         elif 'SpotUsage' in temp_json.get('UsageType', ' '):
 64 |             temp_json['UsageItem'] = 'Spot Instance'
 65 | 
 66 |         if ':' in temp_json.get('UsageType'):
 67 |             temp_json['InstanceType'] = temp_json.get('UsageType').split(':')[1]
 68 |         else:
 69 |             temp_json['InstanceType'] = 'N/A'
 70 | 
 71 |     return temp_json
 72 | 
 73 | 
 74 | def bulk_data(json_string, bulk):
 75 |     """
 76 |     Check if json has bulk data/control messages. The string to check are in
 77 |     format: ``{key : [strings]}``.
 78 |     If the key/value is found return True else False
 79 | 
 80 |     :param dict json_string:
 81 |     :param dict bulk:
 82 |     :returns: True if found a control message and False if not.
 83 |     :rtype: bool
 84 |     """
 85 |     for key, value in bulk.items():
 86 |         if key in json_string.keys():
 87 |             for line in value:
 88 |                 if json_string.get(key) == line:
 89 |                     return True
 90 |     return False
 91 | 
 92 | 
 93 | def values_of(choices):
 94 |     """
 95 |     Returns a tuple of values from choices options represented as a tuple of
 96 |     tuples (value, label). For example:
 97 | 
 98 |     .. sourcecode:: python
 99 | 
100 |         >>> values_of((
101 |         ...         ('1', 'One'),
102 |         ...         ('2', 'Two'),))
103 |         ('1', '2')
104 | 
105 |     :rtype: tuple
106 |     """
107 |     return tuple([value for value, label in choices])
108 | 
109 | 
110 | def hints_for(choices):
111 |     """
112 |     Build a hint string from choices options represented as a tuple of tuples.
113 |     For example:
114 | 
115 |     .. sourcecode:: python
116 | 
117 |         >>> hints_for((
118 |         ...         ('1', 'One'),
119 |         ...         ('2', 'Two'),))
120 |         '1=One, 2=Two'
121 | 
122 |     :rtype: str
123 |     """
124 |     return ', '.join(['{}={}'.format(value, label) for value, label in choices])
125 | 
126 | 
127 | def display_banner(echo=None):
128 |     echo = echo or click.echo
129 |     echo("   ___      _____ ___  ___ ___ ___                      ")
130 |     echo("  /_\ \    / / __|   \| _ ) _ \ _ \__ _ _ _ ___ ___ _ _ ")
131 |     echo(" / _ \ \/\/ /\__ \ |) | _ \   /  _/ _` | '_(_-</ -_) '_|")
132 |     echo("/_/ \_\_/\_/ |___/___/|___/_|_\_| \__,_|_| /__/\___|_|  ")
133 |     echo("AWS - Detailed Billing Records parser, version {}\n".format(__version__))
134 | 
135 | 
136 | @contextlib.contextmanager
137 | def null_progressbar(*arg, **kwargs):
138 |     yield
139 | 
140 | 
141 | class ClickEchoWrapper(object):
142 |     def __init__(self, quiet=False):
143 |         self._quiet = quiet
144 | 
145 |     def __call__(self, *args, **kwargs):
146 |         if self._quiet:
147 |             return
148 |         click.echo(*args, **kwargs)
149 | 


--------------------------------------------------------------------------------
/job.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function usage
 4 | {
 5 |     echo "usage: job [[-pm] | [-h]]"
 6 |     echo "Parameters:"
 7 |     echo "-pm | --previous-month = process the previous month from current date"
 8 | }
 9 | 
10 | BUCKET='s3://bucket-123456'
11 | ACCOUNT='123456789012'
12 | YEAR=$(date +%Y)
13 | MONTH=$(date +%m)
14 | LOCAL_FOLDER='/mnt/jobs'
15 | 
16 | ES_HOST='elastic-search-host.endopoint.name'
17 | ES_PORT=80
18 | 
19 | # Process input parameters
20 | while [ "$1" != "" ]; do
21 |     case $1 in
22 |         -pm | --previous-month )echo "Processing previous month!" 
23 | 				MONTH=$(date --date='-1 month' +%m)
24 | 				YEAR=$(date --date='-1 month' +%Y)
25 |                                 ;;
26 |         -h | --help )           usage
27 |                                 exit
28 |                                 ;;
29 |     esac
30 |     shift
31 | done
32 | 
33 | DBR_FILE=$ACCOUNT-aws-billing-detailed-line-items-with-resources-and-tags-$YEAR-$MONTH.csv
34 | ZIP_FILE=$DBR_FILE.zip
35 | 
36 | #Change to local working folder
37 | cd $LOCAL_FOLDER
38 | 
39 | # Copy the file from bucket to local folder
40 | aws s3 cp $BUCKET/$ZIP_FILE .
41 | 
42 | # Extract the ziped file
43 | unzip -o  $ZIP_FILE
44 | 
45 | # Process the file with dbrparser
46 | dbrparser -i $DBR_FILE -e $ES_HOST -p $ES_PORT -t 2 -bm 2 -y $YEAR -m $MONTH --delete-index -bi
47 | 
48 | # Remove processed file
49 | rm $DBR_FILE
50 | rm $ZIP_FILE
51 | 
52 | echo 'Finished processing...'
53 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 | click>=6.3
2 | boto3>=1.9.2
3 | elasticsearch>=6.0.0,<7.0.0
4 | Unidecode>=0.04.19
5 | requests-aws4auth>=0.9.0
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | universal = 1
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # setup.py
 4 | #
 5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | """
21 | Parse DBR (Detailed Billing Records) and send resulting data direct to
22 | Elasticsearch or dump formatted as JSON.
23 | """
24 | 
25 | import io
26 | import os
27 | import re
28 | 
29 | from setuptools import find_packages, setup
30 | 
31 | 
32 | def read(*filenames, **kwargs):
33 |     encoding = kwargs.get('encoding', 'utf-8')
34 |     sep = kwargs.get('sep', os.linesep)
35 |     buf = []
36 |     for filename in filenames:
37 |         with io.open(filename, encoding=encoding) as f:
38 |             buf.append(f.read())
39 |     return sep.join(buf)
40 | 
41 | 
42 | def read_version():
43 |     content = read(os.path.join(
44 |         os.path.dirname(__file__), 'awsdbrparser', '__init__.py'))
45 |     return re.search(r"__version__ = '([^']+)'", content).group(1)
46 | 
47 | 
48 | def read_requirements():
49 |     content = read(os.path.join('requirements', 'base.txt'))
50 |     return [line for line in content.split(os.linesep)
51 |             if not line.strip().startswith('#')]
52 | 
53 | 
54 | setup(
55 |     name='awsdbrparser',
56 |     version=read_version(),
57 |     url='http://github.com/awslabs/aws-detailed-billing-parser',
58 |     license='Apache Software License',
59 |     author='Rafael M. Koike',
60 |     author_email='koiker@amazon.com',
61 |     description='Parse DBR and send to Elasticsearch or dumps to JSON',
62 |     long_description=read('README.rst'),
63 |     packages=find_packages(exclude=['tests']),
64 |     package_data={'awsdbrparser': ['data/*.json']},
65 |     include_package_data=True,
66 |     zip_safe=False,
67 |     platforms='any',
68 |     install_requires=read_requirements(),
69 |     entry_points={
70 |         'console_scripts': [
71 |             'dbrparser = awsdbrparser.cli:main',
72 |         ],
73 |     },
74 |     classifiers=[
75 |         'Development Status :: 4 - Beta',
76 |         'Environment :: Console',
77 |         'Environment :: Other Environment',
78 |         'Intended Audience :: Developers',
79 |         'Intended Audience :: Information Technology',
80 |         'License :: OSI Approved :: Apache Software License',
81 |         'Operating System :: OS Independent',
82 |         'Programming Language :: Python',
83 |         'Programming Language :: Python :: 2',
84 |         'Programming Language :: Python :: 3',
85 |         'Topic :: Internet',
86 |         'Topic :: Software Development :: Libraries :: Python Modules',
87 |         'Topic :: Utilities',
88 |     ]
89 | )
90 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # tests/__init__.py
 4 | #
 5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # tests/cli.py
 4 | #
 5 | # Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | import pytest
20 | 
21 | from click.testing import CliRunner
22 | 
23 | from awsdbrparser import cli
24 | 
25 | 
26 | @pytest.fixture
27 | def runner():
28 |     return CliRunner()
29 | 
30 | 
31 | def test_cli(runner):
32 |     result = runner.invoke(cli.main)
33 |     assert result.exit_code != 0  # should raises IOError for missing default input file
34 | 
35 | 
36 | def test_cli_with_option(runner):
37 |     result = runner.invoke(cli.main, ['--version'])
38 |     assert not result.exception
39 |     assert result.exit_code == 0
40 |     assert 'AWS - Detailed Billing Records parser' in result.output
41 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist=py27, py35, py36, flake8
 3 | 
 4 | [flake8]
 5 | ignore = E128, E126
 6 | 
 7 | [testenv]
 8 | commands=py.test --cov awsdbrparser {posargs}
 9 | deps=
10 |     pytest
11 |     pytest-cov
12 | 
13 | [testenv:flake8]
14 | basepython = python2.7
15 | deps =
16 |     flake8
17 | commands =
18 |     flake8 awsdbrparser tests --max-line-length=120
19 | 


--------------------------------------------------------------------------------