├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── hippolyte
    ├── __init__.py
    ├── aws_utils.py
    ├── config_util.py
    ├── dynamodb_backup.py
    ├── dynamodb_booster.py
    ├── monitor.py
    ├── multiple.template
    ├── pipeline_scheduler.py
    ├── pipeline_translator.py
    ├── project_config.py
    └── utils.py
├── requirements-dev.txt
├── requirements.txt
├── serverless.yml
└── tests
    ├── __init__.py
    ├── resources
        └── test_backup_metadata.json
    ├── test.py
    ├── test_dynamodb_backup.py
    ├── test_dynamodb_booster.py
    ├── test_monitor.py
    └── test_utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *.cover
46 | .hypothesis/
47 | 
48 | 
49 | # Serverless directories
50 | .serverless
51 | .requirements
52 | 
53 | # Pycharm
54 | .idea/
55 | 
56 | # Node.js
57 | node_modules


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "2.7"
 5 | 
 6 | env:
 7 |   - AWS_DEFAULT_REGION=us-east-1 TRAVIS_NODE_VERSION=6.11.0 SERVERLESS_VERSION=1.16.1 SERVERLESS_PYTHONR_VERSION=2.3.3
 8 | 
 9 | install:
10 |   - rm -rf ~/.nvm && git clone https://github.com/creationix/nvm.git ~/.nvm && (cd ~/.nvm && git checkout `git describe --abbrev=0 --tags`) && source ~/.nvm/nvm.sh && nvm install $TRAVIS_NODE_VERSION
11 |   - npm install serverless@$SERVERLESS_VERSION -g
12 |   - npm install --save serverless-python-requirements@$SERVERLESS_PYTHONR_VERSION
13 |   - pip install -r requirements-dev.txt
14 | 
15 | script:
16 |   - python tests/test.py
17 |   - serverless deploy --region $AWS_DEFAULT_REGION --stage dev --email test@test.com --noDeploy
18 |   - serverless deploy --region $AWS_DEFAULT_REGION --stage prod --email test@test.com --noDeploy
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Hippolyte [![Build Status](https://travis-ci.org/ocadotechnology/hippolyte.svg?branch=master)](https://travis-ci.org/ocadotechnology/hippolyte) [![Gitter](https://img.shields.io/gitter/room/TechnologyAdvice/Stardust.svg)](https://gitter.im/ocado-hippolyte)
 2 | _Project Discontinued: AWS released [native DynamoDB backups](https://aws.amazon.com/blogs/aws/new-for-amazon-dynamodb-global-tables-and-on-demand-backup/)._
 3 | 
 4 | Hippolyte an at-scale, point-in-time backup solution for DynamoDB. It is designed to handle frequent, recurring backups of large numbers of tables, scale read throughput, and batch together backup jobs over multiple EMR clusters.
 5 | 
 6 | ## Deployment
 7 | Hippolyte is deployed with the [Serverless Framework](https://serverless.com/). We have tested it with Node.js 6.11 / Serverless 1.16.1 / serverless-python-requirements 2.3.3. This can be installed with `npm`. To start with run:
 8 | ```
 9 | npm install serverless@1.16.1
10 | npm install --save serverless-python-requirements@2.3.3
11 | ```
12 |  To configure the project for your Amazon accounts, update `hippolyte/project_config.py` with the details of the account in which you intend to run the backup process, you will also need AWS credentials for creating all the dependent resources:
13 | * Lambda Function
14 | * CloudWatch scheduled events
15 | * SNS topic
16 | 
17 | To deploy the stack run
18 | `serverless deploy --region <aws_region> --stage <stage> --email <email_address>` 
19 | You can update `serverless.yml` to associate your credentials with stages if you intended to deploy multiple instances of the service. The email setting is optional and uses SNS to alert the provided address to an failed pipelines or tables.
20 | 
21 | ## Motivation
22 | Since DynamoDB is a fully managed service and supports cross-region replication you may wonder why you even need to backup data in the first place. If you're running production applications on AWS then you probably already have a lot of confidence in the durability of data in services like DynamoDB or S3.
23 | 
24 | Our motivation for building this was to protect against application or user error. No matter how durable Amazon's services are, they wont protect you from unintended updates and deletes. Historical snapshots of data and state also provide additional value, allowing you to restore to a separate table and compare against live data.
25 | 
26 | ## Design
27 | We've chosen [Amazon Data Pipeline](https://aws.amazon.com/datapipeline/) as a tool to create, manage and run our backup tasks. Data Pipeline helps with orchestration, automatic retries for failed jobs and the potential to make use of SNS notifications for successful or failed EMR tasks.
28 | 
29 | We also use AWS Lambda to schedule and monitor backup jobs. This is responsible for dynamically generating Data Pipeline templates based on configuration and discovered tables, and modifying table throughputs to reduce the duration of the backup job.
30 | 
31 | ## Scaling
32 | Part of the job of our scheduling Lambda function is to attempt to optimally assign DynamoDB tables to individual EMR clusters that will be created. Since new tables may be created each day and size may grow significantly, this optimisation is performed each night during the scheduling step. By default, each data pipeline only supports 100 objects; this means each pipeline can support 32 tables, this is because each tables requires 3 Data Pipeline objects:
33 | 
34 | * DynamoDBDataNode
35 | * EmrActivity
36 | * S3DataNode
37 | 
38 | In addition to this 2 additional objects are needed, the pipeline configuration and an EmrCluster node. 32 * 3 + 2 = 98. In addition to this hard limit we also want every backup to run between 12:00 AM and 7:00 AM. We can work out how long each pipeline will take to complete by starting with some static values
39 | 
40 | * EMR cluster bootstrap (10 min)
41 | * EMR activity bootstrap (1 min)
42 | 
43 | From there calculating how long each table will take to backup, this can be done with the following formula:
44 | 
45 | $$$
46 | Duration = \frac{Size}{RCU * ConsumedPercentage * 4096\ bytes/second}
47 | $$$
48 | 
49 | Where _Size_ is the table size in bytes, _RCU_ is the provisioned Read Capacity Units for a given table and _ConsumedPercentage_ is what proportion of this capacity the backup job will use. Since each EMR cluster will run backup jobs sequentially and we have limits to the number of tables and length of time, we can pack each pipeline with tables until one of those two constraints is met.
50 | 
51 | Additionally some tables are either too large to be backed up in a timely manner with their provisioned read capacity. Here we derive the ratio between the expected backup duration and what is desired and increase our read capacity units by this ratio. We can also increase the percentage of provisioned throughput we consume while preserving the original amount needed for the application. Typically since we paying for clusters and capacity by the hour, it's rarely worth reduce the total expected duration to be less than that.
52 | 
53 | ## Restore
54 | Restore process is also done by Data Pipelines. Target table needs to be done manualy and has to have the same:
55 | 
56 | * partitioning key
57 | * sort key
58 | * secondary indices
59 | 
60 | as the original table. We also recommend setting write capacity to 1000, for the restore process duration. 
61 | 
62 | 
63 | You need to get subnet id of your EMR-Subnet, as you’ll later need it.
64 | Go to DataPipeline web console and create new pipeline. In our example, values would be:
65 | 
66 | * Name: restore-test
67 | * Source: Build using template ( Import DynamoDB backup data from S3 ).
68 | * Input S3 folder: s3://hippolyte-eu-west-1-prod-backups/table_name/2017-02-22-00-10-39/
69 | * Target DynamoDB table name: table_name
70 | * DynamoDB write throughput ratio: 1 //use full speed, as we are the only users now )
71 | * Region of DynamoDB table: eu-west-1
72 | * Schedule: Run (on pipeline activation)
73 | * Logging: Enabled (s3://hippolyte-eu-west-1-prod-backups/logs/)
74 | * IAM Roles: Default
75 | 
76 | 
77 | Do not click Activate yet, as at the time of writing it that default template is missing some mandatory parameters. Instead click Edit in Architect.
78 | Now click EmrCluster on diagram and go to Add an optional field… find terminateAfter and set it to a value, high above estimated restore duration, like 3 Days. Add in the same place Subnet Id and set it to EMR-Subnet. Save it and click Activate.
79 | 
80 | ## Need help with the setup?
81 | Please pm me: romek.rjm@gmail.com
82 | 


--------------------------------------------------------------------------------
/hippolyte/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = "roman.subik"
2 | 


--------------------------------------------------------------------------------
/hippolyte/aws_utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import time
  3 | import json
  4 | from uuid import uuid4
  5 | import boto3
  6 | from botocore.exceptions import ClientError
  7 | from retrying import retry
  8 | import hippolyte.pipeline_translator as pipeline_translator
  9 | from hippolyte.utils import chunks
 10 | 
 11 | 
 12 | def retry_if_throttling_error(exception):
 13 |     if isinstance(exception, ClientError):
 14 |         return 'Throttling' in exception.message or 'limit exceeded' in exception.message
 15 | 
 16 |     return False
 17 | 
 18 | 
 19 | class DataPipelineUtil(object):
 20 |     def __init__(self):
 21 |         self.client = boto3.client('datapipeline')
 22 | 
 23 |     @retry(retry_on_exception=retry_if_throttling_error,
 24 |            wait_exponential_multiplier=1000,
 25 |            stop_max_attempt_number=5)
 26 |     def create_pipeline(self, name=None):
 27 |         if not name:
 28 |             name = "dynamodb-backup-" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
 29 | 
 30 |         return self.client.create_pipeline(
 31 |             name=name,
 32 |             uniqueId=str(uuid4()),
 33 |             description="Used to do automatic DynamoDB backups.",
 34 |             tags=[
 35 |                 {
 36 |                     "key": "app",
 37 |                     "value": "hippolyte-datapipeline"
 38 |                 }
 39 |             ]
 40 |         )
 41 | 
 42 |     @retry(retry_on_exception=retry_if_throttling_error,
 43 |            wait_exponential_multiplier=1000,
 44 |            stop_max_attempt_number=5)
 45 |     def put_pipeline_definition(self, pipeline_id, definition):
 46 |         return self.client.put_pipeline_definition(
 47 |             pipelineId=pipeline_id,
 48 |             pipelineObjects=pipeline_translator.definition_to_api_objects(definition),
 49 |             parameterObjects=pipeline_translator.definition_to_api_parameters(definition),
 50 |             parameterValues=pipeline_translator.definition_to_parameter_values(definition)
 51 |         )
 52 | 
 53 |     @retry(retry_on_exception=retry_if_throttling_error,
 54 |            wait_exponential_multiplier=1000,
 55 |            stop_max_attempt_number=5)
 56 |     def activate_pipeline(self, pipeline_id, definition):
 57 |         return self.client.activate_pipeline(
 58 |             pipelineId=pipeline_id,
 59 |             parameterValues=pipeline_translator.definition_to_parameter_values(definition)
 60 |         )
 61 | 
 62 |     @retry(retry_on_exception=retry_if_throttling_error,
 63 |            wait_exponential_multiplier=1000,
 64 |            stop_max_attempt_number=5)
 65 |     def list_pipelines(self):
 66 |         pipelines = []
 67 |         paginator = self.client.get_paginator('list_pipelines')
 68 |         page_iterator = paginator.paginate(PaginationConfig={
 69 |             'MaxItems': 1000
 70 |         })
 71 | 
 72 |         for page in page_iterator:
 73 |             pipelines += page.get("pipelineIdList", [])
 74 | 
 75 |         return pipelines
 76 | 
 77 |     @retry(retry_on_exception=retry_if_throttling_error,
 78 |            wait_exponential_multiplier=1000,
 79 |            stop_max_attempt_number=5)
 80 |     def describe_pipelines(self):
 81 |         pipeline_list = self.list_pipelines()
 82 |         pipeline_ids = map(lambda x: x['id'], pipeline_list)
 83 |         pipeline_ids_chunked = list(chunks(pipeline_ids, 25))
 84 |         pipeline_descriptions = []
 85 | 
 86 |         for pipeline_id in pipeline_ids_chunked:
 87 |             descriptions = self.client.describe_pipelines(pipelineIds=pipeline_id)
 88 |             pipeline_descriptions += descriptions['pipelineDescriptionList']
 89 | 
 90 |         return pipeline_descriptions
 91 | 
 92 |     @retry(retry_on_exception=retry_if_throttling_error,
 93 |            wait_exponential_multiplier=1000,
 94 |            stop_max_attempt_number=5)
 95 |     def delete_pipeline(self, pipeline_id):
 96 |         self.client.delete_pipeline(pipelineId=pipeline_id)
 97 | 
 98 | 
 99 | class DynamoDBUtil(object):
100 |     def __init__(self):
101 |         self.client = boto3.client('dynamodb')
102 | 
103 |     @retry(retry_on_exception=retry_if_throttling_error,
104 |            wait_exponential_multiplier=1000,
105 |            stop_max_attempt_number=5)
106 |     def list_tables(self):
107 |         tables = []
108 |         paginator = self.client.get_paginator('list_tables')
109 |         page_iterator = paginator.paginate(PaginationConfig={
110 |             'MaxItems': 10000,
111 |             'PageSize': 100
112 |         })
113 | 
114 |         for page in page_iterator:
115 |             tables += page.get("TableNames", [])
116 | 
117 |         return tables
118 | 
119 |     @retry(retry_on_exception=retry_if_throttling_error,
120 |            wait_exponential_multiplier=1000,
121 |            stop_max_attempt_number=5)
122 |     def describe_table(self, table_name):
123 |         return self.client.describe_table(TableName=table_name)
124 | 
125 |     def describe_tables(self, table_names):
126 |         table_descriptions = []
127 | 
128 |         for table_name in table_names:
129 |             table_descriptions.append(self.describe_table(table_name))
130 | 
131 |         return table_descriptions
132 | 
133 |     @retry(retry_on_exception=retry_if_throttling_error,
134 |            wait_exponential_multiplier=1000,
135 |            stop_max_attempt_number=5)
136 |     def describe_limits(self):
137 |         return self.client.describe_limits()
138 | 
139 |     @retry(retry_on_exception=retry_if_throttling_error,
140 |            wait_exponential_multiplier=1000,
141 |            stop_max_attempt_number=5)
142 |     def batch_write_items(self, table_name, items):
143 |         table = self.client.Table(TableName=table_name)
144 | 
145 |         with table.batch_writer() as batch:
146 |             for item in items:
147 |                 batch.put_item(Item=item)
148 | 
149 |     @retry(retry_on_exception=retry_if_throttling_error,
150 |            wait_exponential_multiplier=1000,
151 |            stop_max_attempt_number=5)
152 |     def update_item(self, table_name, key, update_expression, expression_attribute_values):
153 |         table = self.client.Table(TableName=table_name)
154 |         table.update_item(Key=key, UpdateExpression=update_expression,
155 |                           ExpressionAttributeValues=expression_attribute_values)
156 | 
157 |     @retry(retry_on_exception=retry_if_throttling_error,
158 |            wait_exponential_multiplier=1000,
159 |            stop_max_attempt_number=5)
160 |     def change_capacity_units(self, table_name, new_read_throughput=None, new_write_throughput=None):
161 |         table_description = self.describe_table(table_name).get('Table', {})
162 | 
163 |         throughput, requires_update = self._get_adjusted_throughput(table_description,
164 |                                                                     new_read_throughput, new_write_throughput)
165 | 
166 |         if requires_update:
167 |             self.client.update_table(TableName=table_name, ProvisionedThroughput=throughput)
168 | 
169 |     def _get_adjusted_throughput(self, table_description, new_read_throughput, new_write_throughput):
170 |         current_throughput = table_description.get('ProvisionedThroughput')
171 |         current_read_throughput = current_throughput.get('ReadCapacityUnits')
172 |         current_write_throughput = current_throughput.get('WriteCapacityUnits')
173 | 
174 |         throughput = {
175 |             'ReadCapacityUnits': current_read_throughput,
176 |             'WriteCapacityUnits': current_write_throughput
177 |         }
178 | 
179 |         requires_update = False
180 | 
181 |         if new_read_throughput and (current_read_throughput != new_read_throughput):
182 |             throughput['ReadCapacityUnits'] = new_read_throughput
183 |             requires_update = True
184 | 
185 |         if new_write_throughput and (current_write_throughput != new_write_throughput):
186 |             throughput['WriteCapacityUnits'] = new_write_throughput
187 |             requires_update = True
188 | 
189 |         return throughput, requires_update
190 | 
191 | 
192 | class S3Util(object):
193 |     def __init__(self):
194 |         self.client = boto3.client('s3')
195 | 
196 |     @retry(retry_on_exception=retry_if_throttling_error,
197 |            wait_exponential_multiplier=1000,
198 |            stop_max_attempt_number=5)
199 |     def put_json(self, bucket, key, json_file):
200 |         body = json.dumps(json_file, default=lambda o: str(o), sort_keys=True, indent=4)
201 |         self.client.put_object(Bucket=bucket, Key=key, Body=body)
202 | 
203 |     @retry(retry_on_exception=retry_if_throttling_error,
204 |            wait_exponential_multiplier=1000,
205 |            stop_max_attempt_number=5)
206 |     def get_json(self, bucket, key):
207 |         obj = self.client.get_object(Bucket=bucket, Key=key)
208 |         return json.loads(obj.get('Body').read().decode('utf-8'))
209 | 
210 |     @retry(retry_on_exception=retry_if_throttling_error,
211 |            wait_exponential_multiplier=1000,
212 |            stop_max_attempt_number=5)
213 |     def list_objects(self, bucket, prefix):
214 |         paginator = self.client.get_paginator('list_objects')
215 |         contents = []
216 |         response = None
217 | 
218 |         for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
219 |             response = page
220 |             contents += page.get('Contents', [])
221 | 
222 |         if response:
223 |             response['Contents'] = contents
224 | 
225 |         return response
226 | 
227 |     @retry(retry_on_exception=retry_if_throttling_error,
228 |            wait_exponential_multiplier=1000,
229 |            stop_max_attempt_number=5)
230 |     def object_exists(self, bucket, key):
231 |         try:
232 |             self.client.get_object(Bucket=bucket, Key=key)
233 |         except ClientError as ce:
234 |             if ce.response['Error']['Code'] == "404":
235 |                 return False
236 | 
237 |         return True
238 | 
239 | 
240 | class ApplicationAutoScalingUtil(object):
241 |     def __init__(self):
242 |         self.client = self._init_client()
243 | 
244 |     def _init_client(self):
245 |         return boto3.client('application-autoscaling')
246 | 
247 |     @retry(retry_on_exception=retry_if_throttling_error,
248 |            wait_exponential_multiplier=1000,
249 |            stop_max_attempt_number=5)
250 |     def describe_scalable_targets(self, service_namespace):
251 |         paginator = self.client.get_paginator('describe_scalable_targets')
252 |         targets = []
253 |         response = None
254 | 
255 |         for page in paginator.paginate(ServiceNamespace=service_namespace):
256 |             response = page
257 |             targets += page.get('ScalableTargets', [])
258 |             time.sleep(3)
259 | 
260 |         if response:
261 |             response['ScalableTargets'] = targets
262 | 
263 |         return response
264 | 
265 |     @retry(retry_on_exception=retry_if_throttling_error,
266 |            wait_exponential_multiplier=1000,
267 |            stop_max_attempt_number=5)
268 |     def describe_scaling_policies(self, service_namespace):
269 |         paginator = self.client.get_paginator('describe_scaling_policies')
270 |         policies = []
271 |         response = None
272 | 
273 |         for page in paginator.paginate(ServiceNamespace=service_namespace):
274 |             response = page
275 |             policies += page.get('ScalingPolicies', [])
276 | 
277 |         if response:
278 |             response['ScalingPolicies'] = policies
279 | 
280 |         return response
281 | 
282 |     @retry(retry_on_exception=retry_if_throttling_error,
283 |            wait_exponential_multiplier=1000,
284 |            stop_max_attempt_number=5)
285 |     def delete_scaling_policy(self, policy_name, service_namespace, resource_id, scalable_dimension):
286 |         self.client.delete_scaling_policy(PolicyName=policy_name,
287 |                                           ServiceNamespace=service_namespace,
288 |                                           ResourceId=resource_id,
289 |                                           ScalableDimension=scalable_dimension)
290 | 
291 |     @retry(retry_on_exception=retry_if_throttling_error,
292 |            wait_exponential_multiplier=1000,
293 |            stop_max_attempt_number=5)
294 |     def deregister_scalable_target(self, service_namespace, resource_id, scalable_dimension):
295 |         self.client.deregister_scalable_target(ServiceNamespace=service_namespace,
296 |                                                ResourceId=resource_id,
297 |                                                ScalableDimension=scalable_dimension)
298 | 
299 |     @retry(retry_on_exception=retry_if_throttling_error,
300 |            wait_exponential_multiplier=1000,
301 |            stop_max_attempt_number=5)
302 |     def put_scaling_policy(self, policy_name, service_namespace, resource_id, scalable_dimension, policy_type,
303 |                            target_scaling_policy_configuration):
304 |         self.client.put_scaling_policy(PolicyName=policy_name,
305 |                                        ServiceNamespace=service_namespace,
306 |                                        ResourceId=resource_id,
307 |                                        ScalableDimension=scalable_dimension,
308 |                                        PolicyType=policy_type,
309 |                                        TargetTrackingScalingPolicyConfiguration=target_scaling_policy_configuration)
310 | 
311 |     @retry(retry_on_exception=retry_if_throttling_error,
312 |            wait_exponential_multiplier=1000,
313 |            stop_max_attempt_number=5)
314 |     def register_scalable_target(self, service_namespace, resource_id, scalable_dimension,
315 |                                  min_capacity, max_capacity, role_arn):
316 |         self.client.register_scalable_target(ServiceNamespace=service_namespace,
317 |                                              ResourceId=resource_id,
318 |                                              ScalableDimension=scalable_dimension,
319 |                                              MinCapacity=min_capacity,
320 |                                              MaxCapacity=max_capacity,
321 |                                              RoleARN=role_arn)
322 | 
323 | 
324 | class SnsUtil(object):
325 |     def __init__(self):
326 |         self.client = boto3.client('sns')
327 | 
328 |     @retry(retry_on_exception=retry_if_throttling_error,
329 |            wait_exponential_multiplier=1000,
330 |            stop_max_attempt_number=5)
331 |     def publish(self, sns_topic, subject, message):
332 |         self.client.publish(
333 |             TopicArn=sns_topic,
334 |             Message=message,
335 |             Subject=subject
336 |         )
337 | 


--------------------------------------------------------------------------------
/hippolyte/config_util.py:
--------------------------------------------------------------------------------
 1 | __author__ = "roman.subik"
 2 | 
 3 | from hippolyte.aws_utils import S3Util, DataPipelineUtil
 4 | from hippolyte.utils import get_date_suffix
 5 | import logging
 6 | 
 7 | COMMON_PREFIX = 'backup_metadata'
 8 | DONE_STATES = ["CANCELED", "CASCADE_FAILED", "FAILED", "FINISHED", "INACTIVE", "PAUSED", "SKIPPED", "TIMEDOUT"]
 9 | 
10 | logger = logging.getLogger()
11 | logger.setLevel(logging.INFO)
12 | 
13 | 
14 | class ConfigUtil(object):
15 |     def __init__(self):
16 |         self.s3_util = S3Util()
17 |         self.data_pipeline_util = DataPipelineUtil()
18 | 
19 |     def save_configuration(self, pipeline_definitions, backup_bucket, table_descriptions,
20 |                            scaling_policies, scalable_targets):
21 |         self.s3_util.put_json(
22 |             backup_bucket, self._get_metadata_file_name(),
23 |             {
24 |                 "Tables": table_descriptions,
25 |                 "Pipelines": pipeline_definitions,
26 |                 "ScalingPolicies": scaling_policies,
27 |                 "ScalableTargets": scalable_targets
28 |             }
29 |         )
30 | 
31 |     def load_configuration(self, backup_bucket):
32 |         contents = self.s3_util.list_objects(
33 |             backup_bucket, COMMON_PREFIX
34 |         ).get("Contents", [])
35 | 
36 |         contents = sorted(contents, key=lambda x: x['LastModified'], reverse=True)
37 | 
38 |         if contents:
39 |             return self.s3_util.get_json(
40 |                 backup_bucket, contents[0].get('Key')
41 |             )
42 |         else:
43 |             return
44 | 
45 |     def _get_metadata_file_name(self):
46 |         return '{}-{}'.format(COMMON_PREFIX, get_date_suffix())
47 | 
48 |     def list_backed_up_tables(self, pipelines, backup_bucket):
49 |         finished_pipelines = self.list_finished_pipelines(backup_bucket, pipelines)
50 |         backed_up_tables = []
51 | 
52 |         for pipeline in pipelines:
53 |             if pipeline['pipeline_id'] in finished_pipelines:
54 |                 backed_up_tables += pipeline['backed_up_tables']
55 | 
56 |         return backed_up_tables
57 | 
58 |     def list_finished_pipelines(self, backup_bucket=None, backup_pipelines=None):
59 |         if not backup_pipelines:
60 |             last_configuration = self.load_configuration(backup_bucket)
61 | 
62 |             if last_configuration:
63 |                 backup_pipelines = last_configuration['Pipelines']
64 | 
65 |         if not backup_pipelines:
66 |             logger.error("Couldn't find any backed up tables. Has your backup ran?")
67 |             return []
68 | 
69 |         backup_pipeline_names = map(lambda x: x['pipeline_id'], backup_pipelines)
70 |         pipelines = self.data_pipeline_util.describe_pipelines()
71 |         finished_pipelines = []
72 | 
73 |         for pipeline in pipelines:
74 |             fields = pipeline["fields"]
75 |             pipeline_id = pipeline["pipelineId"]
76 | 
77 |             logger.info("Checking pipeline {}".format(str(pipeline_id)))
78 | 
79 |             if pipeline_id not in backup_pipeline_names:
80 |                 continue
81 | 
82 |             for field in fields:
83 |                 if field["key"] != "@pipelineState":
84 |                     continue
85 |                 if field["stringValue"] in DONE_STATES:
86 |                     logger.info("Pipeline {} state is in DONE_STATES.".format(str(pipeline_id)))
87 |                     logger.debug(str(pipeline))
88 |                     finished_pipelines.append(pipeline_id)
89 | 
90 |         return finished_pipelines
91 | 


--------------------------------------------------------------------------------
/hippolyte/dynamodb_backup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import logging
  3 | import re
  4 | 
  5 | from botocore.exceptions import ClientError
  6 | from hippolyte.aws_utils import DataPipelineUtil, DynamoDBUtil
  7 | from hippolyte.config_util import ConfigUtil
  8 | from hippolyte.monitor import Monitor
  9 | from hippolyte.pipeline_scheduler import Scheduler
 10 | from hippolyte.dynamodb_booster import DynamoDbBooster
 11 | from hippolyte.utils import MAX_DURATION_SINGLE_PIPELINE, INITIAL_READ_THROUGHPUT_PERCENT, list_tables_in_definition
 12 | from hippolyte.project_config import ACCOUNT_CONFIGS
 13 | 
 14 | logger = logging.getLogger()
 15 | logger.setLevel(logging.INFO)
 16 | 
 17 | 
 18 | def _extract_from_arn(arn, position):
 19 |     """
 20 |     Helper Function to extract part of an ARN
 21 | 
 22 |     :param arn: Arn to extract from
 23 |     :param position: Position in Arn of interest
 24 |     :return: String containing value at requested position
 25 |     """
 26 | 
 27 |     return re.findall("(.*?):", arn)[position]
 28 | 
 29 | 
 30 | def get_table_descriptions(exclude_from_backup, always_backup):
 31 |     """
 32 |     Decides which tables should be backed up, based on their names.
 33 |     :param exclude_from_backup: list of regexp., matching tables will be skipped from backup
 34 |     :param always_backup: those tables will always be backed up, despite exclude_from_backup matching
 35 |     :return: list of table names to backup
 36 |     """
 37 |     dynamo_db_util = DynamoDBUtil()
 38 |     table_names = dynamo_db_util.list_tables()
 39 |     tables_filtered = set()
 40 |     patterns = map(lambda x: re.compile(x), exclude_from_backup)
 41 | 
 42 |     for table_name in table_names:
 43 |         if table_name in always_backup or _not_excluded(table_name, patterns):
 44 |             tables_filtered.add(table_name)
 45 | 
 46 |     return dynamo_db_util.describe_tables(tables_filtered)
 47 | 
 48 | 
 49 | def _not_excluded(table_name, patterns):
 50 |     should_be_added = True
 51 | 
 52 |     for pattern in patterns:
 53 |         if pattern.match(table_name):
 54 |             should_be_added = False
 55 |             break
 56 | 
 57 |     return should_be_added
 58 | 
 59 | 
 60 | def get_account(context):
 61 |     return _extract_from_arn(context.invoked_function_arn, 4)
 62 | 
 63 | 
 64 | def get_sns_endpoint(context):
 65 |     region = _extract_from_arn(context.invoked_function_arn, 3)
 66 |     return 'arn:aws:sns:{}:{}:hippolyte-backup-monitoringbackup'.format(region, get_account(context))
 67 | 
 68 | 
 69 | def detect_action(event):
 70 |     resources = event.get("resources", [])
 71 | 
 72 |     for resource in resources:
 73 |         if resource.endswith('monitor-dynamodb-backup'):
 74 |             return monitor
 75 | 
 76 |     return backup
 77 | 
 78 | 
 79 | def backup(**kwargs):
 80 |     logger.info("Performing full DynamoDB backup task.")
 81 |     logger.info("Building pipeline definitions")
 82 |     scheduler = Scheduler(kwargs['table_descriptions'], 'multiple.template', kwargs['emr_subnet'],
 83 |                           kwargs['region'], kwargs['backup_bucket'], kwargs['log_bucket'])
 84 |     pipeline_definitions = scheduler.build_pipeline_definitions()
 85 | 
 86 |     logger.info("Creating pipelines.")
 87 |     pipeline_descriptions = []
 88 |     for definition in pipeline_definitions:
 89 |         created = True
 90 | 
 91 |         try:
 92 |             response = kwargs['pipeline_util'].create_pipeline()
 93 |         except ClientError as e:
 94 |             if e.message == 'LimitExceededException':
 95 |                 logger.warn("Can't create more pipelines, as account limit exceeded. Details: {}"
 96 |                       .format(e.message))
 97 | 
 98 |             created = False
 99 |             logger.warn("Can't create more pipelines. Details: {}".format(e.message))
100 | 
101 |         if created:
102 |             pipeline_descriptions.append(
103 |                 {
104 |                     'pipeline_id': response.get("pipelineId"),
105 |                     'backed_up_tables': list_tables_in_definition(definition),
106 |                     'definition': definition
107 |                 }
108 |             )
109 | 
110 |     logger.info("Updating throughputs, to meet Time Point Objective.")
111 |     kwargs['dynamodb_booster'].boost_throughput(pipeline_descriptions, MAX_DURATION_SINGLE_PIPELINE)
112 | 
113 |     for description in pipeline_descriptions:
114 |         pipeline_id = description["pipeline_id"]
115 |         pipeline_definition = description["definition"]
116 | 
117 |         logger.info("Deploying pipeline definition to {}".format(pipeline_id))
118 |         kwargs['pipeline_util'].put_pipeline_definition(pipeline_id, pipeline_definition)
119 | 
120 |         logger.info("Activating pipeline: {}".format(pipeline_id))
121 |         kwargs['pipeline_util'].activate_pipeline(pipeline_id, pipeline_definition)
122 | 
123 |     logger.info("Finished dynamo db backup.")
124 | 
125 | 
126 | def monitor(**kwargs):
127 |     logger.info("Performing monitoring only this time.")
128 |     logger.info("Restoring original throughputs.")
129 |     kwargs['dynamodb_booster'].restore_throughput()
130 | 
131 |     finished_pipelines = ConfigUtil().list_finished_pipelines(kwargs['backup_bucket'])
132 |     for pipeline_id in finished_pipelines:
133 |         logger.info("Deleting finished pipeline: {}".format(pipeline_id))
134 |         kwargs['pipeline_util'].delete_pipeline(pipeline_id)
135 | 
136 |     logger.info("Looking for failed backups.")
137 |     monitor = Monitor(kwargs['account'], kwargs['log_bucket'], kwargs['backup_bucket'], kwargs['sns_endpoint'])
138 |     monitor.notify_about_failures(finished_pipelines)
139 | 
140 | 
141 | def lambda_handler(event, context):
142 |     account_id = get_account(context)
143 | 
144 |     if account_id not in ACCOUNT_CONFIGS:
145 |         logger.error("Couldn't find configuration for {} in project_config.py.".format(account_id))
146 |         return
147 | 
148 |     account_config = ACCOUNT_CONFIGS[account_id]
149 |     exclude_from_backup = account_config.get('exclude_from_backup', [])
150 |     always_backup = account_config.get('always_backup', [])
151 | 
152 |     logger.info("Describing tables in the account.")
153 |     table_descriptions = get_table_descriptions(exclude_from_backup, always_backup)
154 | 
155 |     action = detect_action(event)
156 |     action(**{
157 |         'table_descriptions': table_descriptions,
158 |         'pipeline_util': DataPipelineUtil(),
159 |         'dynamodb_booster': DynamoDbBooster(table_descriptions,
160 |                                             account_config['backup_bucket'],
161 |                                             INITIAL_READ_THROUGHPUT_PERCENT),
162 |         'account': account_id,
163 |         'log_bucket': account_config['log_bucket'],
164 |         'sns_endpoint': get_sns_endpoint(context),
165 |         'backup_bucket': account_config['backup_bucket'],
166 |         'emr_subnet': account_config['emr_subnet'],
167 |         'region': _extract_from_arn(context.invoked_function_arn, 3)
168 |     })
169 | 
170 | # Uncomment to test monitor phase:
171 | # class Context(object):
172 | #     def __init__(self):
173 | #         self.invoked_function_arn = "a:b:c:eu-west-1:274670120741:e"
174 | #
175 | # lambda_handler({'resources': ['monitor-dynamodb-backup']}, Context())
176 | 
177 | # Uncomment to test backup phase
178 | # lambda_handler({}, Context())
179 | 


--------------------------------------------------------------------------------
/hippolyte/dynamodb_booster.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import logging
  3 | from botocore.exceptions import ClientError
  4 | from hippolyte.aws_utils import ApplicationAutoScalingUtil, DataPipelineUtil, DynamoDBUtil
  5 | from hippolyte.config_util import ConfigUtil
  6 | from hippolyte.utils import ACTIVITY_BOOTSTRAP_TIME, EMR_BOOTSTRAP_TIME, MAX_DURATION_SEC, \
  7 |     MAX_ALLOWED_PROVISIONED_READ_THROUGHPUT, INITIAL_READ_THROUGHPUT_PERCENT, \
  8 |     estimate_backup_duration, compute_required_throughput, get_first_element_in_the_list_with
  9 | 
 10 | logger = logging.getLogger()
 11 | logger.setLevel(logging.INFO)
 12 | 
 13 | 
 14 | class DynamoDbBooster(object):
 15 |     def __init__(self, table_descriptions, backup_bucket, read_throughput_percent):
 16 |         self.table_descriptions = table_descriptions
 17 |         self.backup_bucket = backup_bucket
 18 |         self.read_throughput_percent = read_throughput_percent
 19 |         self.dynamo_db_util = DynamoDBUtil()
 20 |         self.config_util = ConfigUtil()
 21 |         self.data_pipeline_util = DataPipelineUtil()
 22 |         self.application_auto_scaling_util = ApplicationAutoScalingUtil()
 23 | 
 24 |     def boost_throughput(self, pipeline_descriptions, desired_backup_duration):
 25 |         scaling_policies = self.list_dynamodb_scaling_policies()
 26 |         scalable_targets = self.list_dynamodb_scalable_targets()
 27 |         self.config_util.save_configuration(pipeline_descriptions, self.backup_bucket, self.table_descriptions,
 28 |                                             scaling_policies, scalable_targets)
 29 |         self.disable_auto_scaling(scaling_policies, scalable_targets)
 30 | 
 31 |         limits = self.dynamo_db_util.describe_limits()
 32 |         total_increase = 0
 33 | 
 34 |         pipeline_definitions = map(lambda x: x['definition'], pipeline_descriptions)
 35 | 
 36 |         for nodes in pipeline_definitions:
 37 |             total_increase += self._boost_single_pipeline(nodes.get('objects'), desired_backup_duration, limits)
 38 | 
 39 |         logger.info("Total throughput increase: {}".format(total_increase))
 40 | 
 41 |     def restore_throughput(self):
 42 |         last_configuration = self.config_util.load_configuration(self.backup_bucket)
 43 | 
 44 |         if not last_configuration:
 45 |             logger.error("Couldn't find configuration file. Stopping throughput restore process.")
 46 |             return
 47 | 
 48 |         self._restore_all_tables(last_configuration)
 49 | 
 50 |         self.reenable_auto_scaling(last_configuration)
 51 | 
 52 |     def _restore_all_tables(self, last_configuration):
 53 |         pipelines = last_configuration['Pipelines']
 54 |         tables = last_configuration['Tables']
 55 | 
 56 |         backed_up_tables = self.config_util.list_backed_up_tables(pipelines, self.backup_bucket)
 57 |         previous_table_state = filter(lambda x: 'TableArn' in x['Table'], tables)
 58 |         current_table_state = filter(lambda x: 'TableArn' in x['Table'], self.table_descriptions)
 59 | 
 60 |         logger.debug("Previous table state: {}".format(str(previous_table_state)))
 61 |         logger.debug("Current table state: {}".format(str(current_table_state)))
 62 | 
 63 |         for previous_state in previous_table_state:
 64 |             previous_name, previous_throughput = self._get_name_and_capacity(previous_state)
 65 | 
 66 |             if previous_name not in backed_up_tables:
 67 |                 continue
 68 | 
 69 |             for current_state in current_table_state:
 70 |                 current_name, current_throughput = self._get_name_and_capacity(current_state)
 71 |                 logger.debug("current_name:{}, current_throughput:{}, previous_name:{}, previous_throughput:{}"
 72 |                              .format(current_name, current_throughput, previous_name, previous_throughput))
 73 | 
 74 |                 if current_name == previous_name and current_throughput != previous_throughput:
 75 |                     logger.info("Decreasing throughput of {} from {} to {}.".format(
 76 |                         current_name, current_throughput, previous_throughput))
 77 | 
 78 |                     try:
 79 |                         self.dynamo_db_util.change_capacity_units(current_name, previous_throughput)
 80 |                     except ClientError as e:
 81 |                         if 'decreased' in e.message:
 82 |                             logger.error("Can't decrease throughput of {}, max number of decreases for 24h reached."
 83 |                                          .format(current_name))
 84 |                         else:
 85 |                             logger.error("Can't decrease throughput of {}, reason: ".format(e.message))
 86 | 
 87 |     def _boost_single_pipeline(self, nodes, desired_backup_duration, limits):
 88 |         dynamo_db_nodes = filter(lambda x: 'tableName' in x, nodes)
 89 |         bootstrap_duration = EMR_BOOTSTRAP_TIME + ACTIVITY_BOOTSTRAP_TIME * len(dynamo_db_nodes)
 90 |         max_backup_duration = MAX_DURATION_SEC - bootstrap_duration
 91 |         total_backup_duration = 0
 92 |         table_durations = []
 93 |         total_increase = 0
 94 | 
 95 |         for node in dynamo_db_nodes:
 96 |             table_description = filter(lambda x: x.get('Table', {}).get('TableName') == node['tableName'],
 97 |                                        self.table_descriptions)[0]
 98 |             table_size = table_description.get('Table', {}).get('TableSizeBytes')
 99 |             read_capacity_units = table_description.get('Table', {}).get('ProvisionedThroughput', {}) \
100 |                 .get('ReadCapacityUnits', {})
101 |             duration = estimate_backup_duration(self.read_throughput_percent, table_size, read_capacity_units)
102 |             table_durations.append((node, table_description, read_capacity_units, duration))
103 |             total_backup_duration += duration
104 | 
105 |         if total_backup_duration <= max_backup_duration:
106 |             return total_increase
107 | 
108 |         for node, description, read_capacity_units, duration in table_durations:
109 |             target_duration = float(duration) * desired_backup_duration / total_backup_duration
110 |             new_read_capacity_units, new_throughput_percent = compute_required_throughput(
111 |                 duration, target_duration, read_capacity_units, INITIAL_READ_THROUGHPUT_PERCENT)
112 | 
113 |             read_limit = min(MAX_ALLOWED_PROVISIONED_READ_THROUGHPUT, limits['TableMaxReadCapacityUnits'])
114 | 
115 |             if new_read_capacity_units > read_limit:
116 |                 logger.error("Can't meet RTO for {} as max table read capacity limit is {}, conntact aws support, "
117 |                              "to increase it. ".format(node['tableName'], read_limit))
118 |                 new_read_capacity_units = read_limit
119 | 
120 |             logger.info("Increasing throughput of {} from {} to {}.".format(
121 |                 node['tableName'], read_capacity_units, new_read_capacity_units))
122 |             node['readThroughputPercent'] = str(new_throughput_percent)
123 | 
124 |             try:
125 |                 self.dynamo_db_util.change_capacity_units(node['tableName'], new_read_capacity_units)
126 |             except ClientError as e:
127 |                 if e.message == 'LimitExceededException':
128 |                     logger.error("Can't meet RTO for {} as max account read capacity limit exceeded. Details: {}"
129 |                                  .format(node['tableName'], e.message))
130 |                 else:
131 |                     logger.error("Failed to increase table {} read capacity limit. Details: {}"
132 |                                  .format(node['tableName'], e.message))
133 | 
134 |                 new_read_capacity_units = read_capacity_units
135 | 
136 |             total_increase += new_read_capacity_units - read_capacity_units
137 | 
138 |         return total_increase
139 | 
140 |     def _get_name_and_capacity(self, state):
141 |         table = state.get('Table', {})
142 |         name = table.get('TableName', '')
143 |         throughput = table.get('ProvisionedThroughput', {}).get('ReadCapacityUnits')
144 | 
145 |         return name, throughput
146 | 
147 |     def disable_auto_scaling(self, scaling_policies, scalable_targets):
148 |         logger.info("Disabling autoscaling on backed up tables, for backup duration.")
149 | 
150 |         for table in self.table_descriptions:
151 |             table_name = table.get('Table', {}).get('TableName')
152 |             resource_id = "table/{}".format(table_name)
153 | 
154 |             read_scaling_policy = get_first_element_in_the_list_with(scaling_policies, 'ResourceId', resource_id)
155 | 
156 |             if read_scaling_policy:
157 |                 logger.info("Removing scaling policy: {}".format(read_scaling_policy['PolicyName']))
158 | 
159 |                 try:
160 |                     self.application_auto_scaling_util. \
161 |                         delete_scaling_policy(read_scaling_policy['PolicyName'], "dynamodb",
162 |                                               resource_id, "dynamodb:table:ReadCapacityUnits")
163 |                 except ClientError as e:
164 |                     if 'No scaling policy found for service namespace' in e.message:
165 |                         logger.warn("Can't delete scaling policy for: {}, as it does not exist".format(table_name))
166 |                     else:
167 |                         logger.warn(
168 |                             "Can't delete scaling policy for: {}, error: {}".format(table_name, e.message))
169 | 
170 |             read_scalable_target = get_first_element_in_the_list_with(scalable_targets, 'ResourceId', resource_id)
171 | 
172 |             if read_scalable_target:
173 |                 logger.info("Removing scalable target for: {}".format(resource_id))
174 |                 try:
175 |                     self.application_auto_scaling_util. \
176 |                         deregister_scalable_target("dynamodb", resource_id, "dynamodb:table:ReadCapacityUnits")
177 |                 except ClientError as e:
178 |                     if 'No scalable target found for service namespace' in e.message:
179 |                         logger.warn("Can't delete scalable target for: {}, as it does not exist".format(table_name))
180 |                     else:
181 |                         logger.warn(
182 |                             "Can't delete scalable target for: {}, error: {}".format(table_name, e.message))
183 | 
184 |     def reenable_auto_scaling(self, last_configuration):
185 |         logger.info("Reenabling autoscaling tables after backup.")
186 |         scalable_targets = last_configuration['ScalableTargets']
187 |         scaling_policies = last_configuration['ScalingPolicies']
188 | 
189 |         for target in scalable_targets:
190 |             logger.info("Adding scalable target for: {}".format(target['ResourceId']))
191 | 
192 |             try:
193 |                 self.application_auto_scaling_util.register_scalable_target(target['ServiceNamespace'],
194 |                                                                             target['ResourceId'],
195 |                                                                             target['ScalableDimension'],
196 |                                                                             target['MinCapacity'],
197 |                                                                             target['MaxCapacity'],
198 |                                                                             target['RoleARN'])
199 |             except ClientError as e:
200 |                 if 'table does not exist' in e.message:
201 |                     logger.warn("Can't restore scalable target for: {}, table was deleted".format(target['ResourceId']))
202 |                 else:
203 |                     logger.warn(
204 |                         "Can't restore scalable target for: {}, error: {}".format(target['ResourceId'], e.message))
205 | 
206 |         for policy in scaling_policies:
207 |             logger.info("Adding scaling policy: {}".format(policy['PolicyName']))
208 | 
209 |             try:
210 |                 self.application_auto_scaling_util.put_scaling_policy(policy['PolicyName'],
211 |                                                                       policy['ServiceNamespace'],
212 |                                                                       policy['ResourceId'],
213 |                                                                       policy['ScalableDimension'],
214 |                                                                       policy['PolicyType'],
215 |                                                                       policy[
216 |                                                                           'TargetTrackingScalingPolicyConfiguration'])
217 |             except ClientError as e:
218 |                 if 'table does not exist' in e.message:
219 |                     logger.warn("Can't restore scaling policy for: {}, table was deleted".format(target['ResourceId']))
220 |                 else:
221 |                     logger.warn(
222 |                         "Can't restore scaling policy for: {}, error: {}".format(target['ResourceId'], e.message))
223 | 
224 |     def list_dynamodb_scalable_targets(self):
225 |         targets = self.application_auto_scaling_util \
226 |             .describe_scalable_targets("dynamodb").get('ScalableTargets', [])
227 |         return self._only_return_rcu_dimension(targets)
228 | 
229 |     def list_dynamodb_scaling_policies(self):
230 |         policies = self.application_auto_scaling_util \
231 |             .describe_scaling_policies("dynamodb").get('ScalingPolicies', [])
232 |         return self._only_return_rcu_dimension(policies)
233 | 
234 |     def _only_return_rcu_dimension(self, _list):
235 |         return filter(lambda x: x.get('ScalableDimension') == 'dynamodb:table:ReadCapacityUnits', _list)
236 | 


--------------------------------------------------------------------------------
/hippolyte/monitor.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from datetime import datetime
  3 | import logging
  4 | from hippolyte.aws_utils import S3Util, SnsUtil
  5 | from hippolyte.config_util import ConfigUtil
  6 | from hippolyte.utils import TIME_IN_BETWEEN_BACKUPS
  7 | 
  8 | logger = logging.getLogger()
  9 | logger.setLevel(logging.INFO)
 10 | 
 11 | 
 12 | class Monitor(object):
 13 |     def __init__(self, account, log_bucket, backup_bucket, sns_endpoint):
 14 |         self.account = account
 15 |         self.log_bucket = log_bucket
 16 |         self.backup_bucket = backup_bucket
 17 |         self.sns_endpoint = sns_endpoint
 18 |         self.config_util = ConfigUtil()
 19 |         self.s3_util = S3Util()
 20 |         self.sns_util = SnsUtil()
 21 | 
 22 |     def notify_about_failures(self, pipelines):
 23 |         configuration = self.config_util.load_configuration(self.backup_bucket)
 24 | 
 25 |         if not configuration:
 26 |             logger.info("Couldn't find configuration file. Stopping throughput restore process, sending email.")
 27 | 
 28 |             email_body = all_failed_backup_email_template.format(
 29 |                 account=self.account,
 30 |                 log_bucket = self.log_bucket
 31 |             )
 32 |             self.send_notification_email(email_body)
 33 | 
 34 |         pipeline_failed_tables = {}
 35 | 
 36 |         for pipeline_id in pipelines:
 37 |             finished_pipeline = filter(lambda x: x['pipeline_id'] == pipeline_id, configuration['Pipelines'])
 38 |             failed_tables = []
 39 | 
 40 |             if finished_pipeline:
 41 |                 failed_tables = self.extract_failed_tables(finished_pipeline[0])
 42 | 
 43 |             if failed_tables:
 44 |                 pipeline_failed_tables[finished_pipeline[0]['pipeline_id']] = failed_tables
 45 | 
 46 |         if pipeline_failed_tables:
 47 |             logger.info('Some tables were not backed up properly: {}'.format(str(pipeline_failed_tables)))
 48 |             logger.info('Sending sns notification about failures.')
 49 | 
 50 |             email_body = failed_table_backup_email_template.format(
 51 |                 account=self.account,
 52 |                 description=create_description(pipeline_failed_tables),
 53 |                 log_bucket = self.log_bucket
 54 |             )
 55 |             self.send_notification_email(email_body)
 56 | 
 57 |     def extract_failed_tables(self, pipeline):
 58 |         objects = pipeline.get('definition', {'objects': []}).get('objects', [])
 59 |         s3_attributes = filter(lambda x: 'directoryPath' in x, objects)
 60 |         failed_tables = []
 61 | 
 62 |         for s3_attribute in s3_attributes:
 63 | 
 64 |             protocol, _, bucket, table_name, timestamp = s3_attribute['directoryPath'].split('/')
 65 | 
 66 |             backup_archive = self.s3_util.list_objects(
 67 |                 bucket, table_name
 68 |             ).get("Contents", [])
 69 | 
 70 |             backup_archive = sorted(backup_archive, key=lambda x: x['LastModified'], reverse=True)
 71 | 
 72 |             if not backup_archive:
 73 |                 failed_tables.append(table_name)
 74 |                 continue
 75 | 
 76 |             success_flag = get_first_success_flag(backup_archive)
 77 | 
 78 |             if not success_flag:
 79 |                 failed_tables.append(table_name)
 80 |                 continue
 81 | 
 82 |             if not is_backup_from_current_batch(success_flag):
 83 |                 failed_tables.append(table_name)
 84 |                 continue
 85 | 
 86 |         return failed_tables
 87 | 
 88 |     def send_notification_email(self, email_body):
 89 |         email_subject = email_subject_template.format(account=self.account)
 90 |         self.sns_util.publish(self.sns_endpoint, email_subject, email_body)
 91 | 
 92 | 
 93 | def get_first_success_flag(backup_dir_contents):
 94 |     for content in backup_dir_contents:
 95 |         if content['Key'].endswith('_SUCCESS'):
 96 |             return content
 97 | 
 98 |     return None
 99 | 
100 | 
101 | failed_table_backup_email_template = """
102 | Hello
103 | 
104 | You have been notified, as some of tables in {account} account were not backed up in last 24h.
105 | Please find details below:
106 | 
107 | Pipeline Id: Failed tables
108 | 
109 | {description}
110 | 
111 | Please check logs in: {log_bucket} for details.
112 | 
113 | Best regards,
114 | Hippolyte
115 | """
116 | 
117 | email_subject_template = "Failed to backup DynamoDB tables in {account} account."
118 | 
119 | all_failed_backup_email_template = """
120 | Hello
121 | 
122 | You have been notified, as DynamoDB backup failed completely in {account}.
123 | I couldn't even find a backup_metadata* file in {log_bucket}.
124 | 
125 | Best regards,
126 | Hippolyte
127 | """
128 | 
129 | 
130 | def is_backup_from_current_batch(backup_dir):
131 |     last_modified = backup_dir['LastModified']
132 |     return (datetime.now(tz=last_modified.tzinfo) - last_modified).total_seconds() <= TIME_IN_BETWEEN_BACKUPS
133 | 
134 | 
135 | def create_description(pipeline_failed_tables):
136 |     table = ""
137 |     for pipeline_id in pipeline_failed_tables:
138 |         table += "{}: {}\n".format(pipeline_id, ",".join(pipeline_failed_tables[pipeline_id]))
139 | 
140 |     return table
141 | 


--------------------------------------------------------------------------------
/hippolyte/multiple.template:
--------------------------------------------------------------------------------
 1 | {
 2 |   "objects": [
 3 |     {
 4 |       "subnetId": "{{subnetId}}",
 5 |       "bootstrapAction": "s3://{{region}}.elasticmapreduce/bootstrap-actions/configure-hadoop, {{clusterMemory}}",
 6 |       "name": "EmrClusterForBackup",
 7 |       "coreInstanceCount": "{{coreInstanceCount}}",
 8 |       "coreInstanceType": "{{coreInstanceType}}",
 9 |       "amiVersion": "3.9.0",
10 |       "masterInstanceType": "{{masterInstanceType}}",
11 |       "id": "EmrClusterForBackup",
12 |       "region": "{{region}}",
13 |       "type": "EmrCluster",
14 |       "terminateAfter": "{{terminateAfter}}"
15 |     },
16 |     {
17 |       "failureAndRerunMode": "CASCADE",
18 |       "resourceRole": "DataPipelineDefaultResourceRole",
19 |       "role": "DataPipelineDefaultRole",
20 |       "pipelineLogUri": "s3://{{s3PipelineLogBucket}}/",
21 |       "scheduleType": "ONDEMAND",
22 |       "name": "Default",
23 |       "id": "Default"
24 |     },
25 |     {{#backups}}
26 |     {
27 |       "readThroughputPercent": "{{dbSourceTableReadThroughputPercent}}",
28 |       "name": "{{dbSourceTableName}}",
29 |       "id": "{{dbSourceTableId}}",
30 |       "type": "DynamoDBDataNode",
31 |       "tableName": "{{dynamoDBTableName}}"
32 |     },
33 |     {
34 |       "output": {
35 |         "ref": "{{s3BackupLocationId}}"
36 |       },
37 |       "input": {
38 |         "ref": "{{dbSourceTableId}}"
39 |       },
40 |       "maximumRetries": "{{tableBackupActivityMaximumRetries}}",
41 |       "name": "{{tableBackupActivityName}}",
42 |       "step": "s3://dynamodb-emr-{{region}}/emr-ddb-storage-handler/2.1.0/emr-ddb-2.1.0.jar,org.apache.hadoop.dynamodb.tools.DynamoDbExport,#{output.directoryPath},#{input.tableName},#{input.readThroughputPercent}",
43 |       "id": "{{tableBackupActivityId}}",
44 |       "runsOn": {
45 |         "ref": "EmrClusterForBackup"
46 |       },
47 |       "type": "EmrActivity",
48 |       "resizeClusterBeforeRunning": "false"
49 |     },
50 |     {
51 |       "directoryPath": "s3://{{s3BackupBucket}}/{{dynamoDBTableName}}/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}",
52 |       "name": "{{s3BackupLocationName}}",
53 |       "id": "{{s3BackupLocationId}}",
54 |       "type": "S3DataNode"
55 |     }{{#comma}},{{/comma}}
56 |     {{/backups}}
57 |   ],
58 |   "parameters": [
59 |   ],
60 |   "values": {
61 |   }
62 | }


--------------------------------------------------------------------------------
/hippolyte/pipeline_scheduler.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import json
  4 | import logging
  5 | import math
  6 | import os
  7 | import pystache
  8 | 
  9 | from hippolyte.utils import EMR_BOOTSTRAP_TIME, MAX_DURATION_SEC, ACTIVITY_BOOTSTRAP_TIME, \
 10 |     MAX_TABLES_PER_PIPELINE, INITIAL_READ_THROUGHPUT_PERCENT, estimate_backup_duration, get_date_suffix
 11 | 
 12 | logger = logging.getLogger()
 13 | logger.setLevel(logging.INFO)
 14 | 
 15 | 
 16 | CLUSTER_CONFIGS = [
 17 |     {
 18 |         'masterInstanceType': 'm1.medium',
 19 |         'coreInstanceType': 'm1.medium',
 20 |         'coreInstanceCount': 1,
 21 |         'clusterMemory': '--yarn-key-value,yarn.nodemanager.resource.memory-mb=2048,'
 22 |                          '--yarn-key-value,yarn.scheduler.maximum-allocation-mb=2048,'
 23 |                          '--yarn-key-value,yarn.scheduler.minimum-allocation-mb=256,'
 24 |                          '--yarn-key-value,yarn.app.mapreduce.am.resource.mb=1024,'
 25 |                          '--mapred-key-value,mapreduce.map.memory.mb=768,'
 26 |                          '--mapred-key-value,mapreduce.map.java.opts=-Xmx512M,'
 27 |                          '--mapred-key-value,mapreduce.reduce.memory.mb=1024,'
 28 |                          '--mapred-key-value,mapreduce.reduce.java.opts=-Xmx768m,'
 29 |                          '--mapred-key-value,mapreduce.map.speculative=false',
 30 |         'maxTotalDynamoDbSizeBytes': 597688320  # 570MB
 31 |     },
 32 |     {
 33 |         'masterInstanceType': 'm3.xlarge',
 34 |         'coreInstanceType': 'm3.xlarge',
 35 |         'coreInstanceCount': 1,
 36 |         'clusterMemory': '--yarn-key-value,yarn.nodemanager.resource.memory-mb=11520,'
 37 |                          '--yarn-key-value,yarn.scheduler.maximum-allocation-mb=11520,'
 38 |                          '--yarn-key-value,yarn.scheduler.minimum-allocation-mb=1440,'
 39 |                          '--yarn-key-value,yarn.app.mapreduce.am.resource.mb=2880,'
 40 |                          '--mapred-key-value,mapreduce.map.memory.mb=5760,'
 41 |                          '--mapred-key-value,mapreduce.map.java.opts=-Xmx4608M,'
 42 |                          '--mapred-key-value,mapreduce.reduce.memory.mb=2880,'
 43 |                          '--mapred-key-value,mapreduce.reduce.java.opts=-Xmx2304m,'
 44 |                          '--mapred-key-value,mapreduce.map.speculative=false',
 45 |         'maxTotalDynamoDbSizeBytes': 1099511627776000  # 1PB
 46 |     }
 47 | ]
 48 | 
 49 | 
 50 | class Scheduler(object):
 51 |     def __init__(self, table_descriptions, template_file, subnet_id, region,
 52 |                  s3_backup_bucket, s3_pipeline_log_bucket, max_retries=2,
 53 |                  read_throughput_percent=INITIAL_READ_THROUGHPUT_PERCENT):
 54 |         """
 55 |         :param table_descriptions: descriptions, as returned from DynamoDBUtil.describe_tables()
 56 |         :param template_file: path to template file
 57 |         :param read_throughput_percent: how much read throughput should be used for backing up, ex. 0.5 - 50%
 58 |         :param subnet_id: EMR subnet
 59 |         :param region:
 60 |         :param s3_backup_bucket: S3 location, where backup files go
 61 |         :param s3_pipeline_log_bucket: S3 location, where pipeline logs go
 62 |         :param max_retries: how many times to retry pipeline execution on error, before giving up
 63 |         :return:
 64 |         """
 65 |         self.table_descriptions = table_descriptions
 66 |         self.template_file = template_file
 67 |         self.read_throughput_percent = read_throughput_percent
 68 |         self.subnet_id = subnet_id
 69 |         self.region = region
 70 |         self.s3_backup_bucket = s3_backup_bucket
 71 |         self.s3_pipeline_log_bucket = s3_pipeline_log_bucket
 72 |         self.max_retries = max_retries
 73 |         self.s3_log_location = '{}/logs/{}'.format(s3_pipeline_log_bucket, get_date_suffix())
 74 |         self.terminate_after = int(math.ceil(MAX_DURATION_SEC / 3600.0)) + 1
 75 | 
 76 |     def build_pipeline_definitions(self):
 77 |         """
 78 |         Creates list of pipeline definitions, which could be use to populate data pipelines.
 79 |         Does it by combining template with parameter list.
 80 |         :return: list of pipeline definitions
 81 |         """
 82 |         data_pipelines = []
 83 | 
 84 |         template = self.read_template()
 85 |         dp_parameters = self.build_parameters()
 86 | 
 87 |         for parameters in dp_parameters:
 88 |             data_pipelines.append(json.loads(pystache.render(template, parameters)))
 89 | 
 90 |         return data_pipelines
 91 | 
 92 |     def read_template(self):
 93 |         template_file = os.path.join(os.path.dirname(__file__), self.template_file)
 94 |         with open(template_file, "r") as f:
 95 |             return f.read()
 96 | 
 97 |     def build_parameters(self):
 98 |         """
 99 |         Builds list of parameters, describing dynamo db backup process on a single data pipeline.
100 |         Performs scheduling, in terms of what tables to assign to pipelines, to achieve close execution time,
101 |         on all pipelines.
102 |         :return: list of parameters for single data pipeline
103 |         """
104 |         data_pipeline_parameters = []
105 |         total_duration = EMR_BOOTSTRAP_TIME
106 |         backups = []
107 |         table_counter = 0
108 |         table_index = 0
109 |         total_table_size = 0
110 |         table_backup_durations = self.build_table_backup_durations()
111 | 
112 |         for table_name, backup_duration, table_size_bytes in table_backup_durations:
113 |             total_duration += backup_duration
114 | 
115 |             backups.append(self.create_backup_parameters(table_counter, table_name))
116 |             table_counter += 1
117 |             total_table_size += table_size_bytes
118 | 
119 |             if not self.should_add_more_tables(table_index, total_duration,
120 |                                                table_backup_durations, backups):
121 |                 backups = self.normalize_backup_parameters(backups)
122 | 
123 |                 data_pipeline_parameters.append(self.create_pipeline_parameters(backups, total_table_size))
124 | 
125 |                 logger.info('Total estimated duration of pipeline execution: {}'.format(total_duration))
126 | 
127 |                 total_duration = EMR_BOOTSTRAP_TIME
128 |                 backups = []
129 |                 table_counter = 0
130 |                 total_table_size = 0
131 | 
132 |             table_index += 1
133 | 
134 |         return data_pipeline_parameters
135 | 
136 |     def build_table_backup_durations(self):
137 |         """
138 |         Describes dynamo db tables in the account and assigns estimated duration time to each one of those.
139 |         Tables with 0 size will not be backed up.
140 |         :return: list of dynamo db table descriptions, sorted by ascending estimated backup duration
141 |         """
142 |         table_backup_duration = []
143 |         for description in self.table_descriptions:
144 |             table = description['Table']
145 |             duration = self.estimate_duration(table)
146 | 
147 |             if table['TableSizeBytes']:
148 |                 table_backup_duration.append((table['TableName'], duration, table['TableSizeBytes']))
149 |             else:
150 |                 logger.info("Skipping {} as it appears to be empty.".format(table["TableName"]))
151 | 
152 |         return sorted(table_backup_duration, key=lambda x: x[1])
153 | 
154 |     def create_pipeline_parameters(self, backups, total_table_size):
155 |         """
156 |         :param backups: list of elements, as returned from create_backup_parameters
157 |         :param max_table_size: sizew in bytes of the biggest table currently backed up
158 |         :return: list of parameters needed for data pipeline Config and EMRCluster nodes
159 |         """
160 |         cluster_config = None
161 | 
162 |         for config in CLUSTER_CONFIGS:
163 |             if total_table_size < config['maxTotalDynamoDbSizeBytes']:
164 |                 cluster_config = config
165 |                 break
166 | 
167 |         return {
168 |             'subnetId': '{}'.format(self.subnet_id),
169 |             'coreInstanceCount': cluster_config['coreInstanceCount'],
170 |             'coreInstanceType': cluster_config['coreInstanceType'],
171 |             'masterInstanceType': cluster_config['masterInstanceType'],
172 |             'clusterMemory': cluster_config['clusterMemory'],
173 |             'region': '{}'.format(self.region),
174 |             'terminateAfter': '{} Hour'.format(self.terminate_after),
175 |             's3BackupBucket': '{}'.format(self.s3_backup_bucket),
176 |             's3PipelineLogBucket': '{}'.format(self.s3_log_location),
177 |             'backups': backups
178 |         }
179 | 
180 |     def create_backup_parameters(self, table_counter, table_name):
181 |         """
182 |         :param table_counter:
183 |         :param table_name:
184 |         :return: list of parameters, needed for backing up single dynamo db table.
185 |         """
186 |         return {'dbSourceTableReadThroughputPercent': '{}'.format(self.read_throughput_percent),
187 |                 'dbSourceTableName': 'DDBSourceTable{}'.format(table_counter),
188 |                 'dbSourceTableId': 'DDBSourceTable{}'.format(table_counter),
189 |                 'dynamoDBTableName': table_name,
190 |                 's3BackupLocationId': 'S3BackupLocation{}'.format(table_counter),
191 |                 's3BackupLocationName': 'S3BackupLocation{}'.format(table_counter),
192 |                 'tableBackupActivityMaximumRetries': '{}'.format(self.max_retries),
193 |                 'tableBackupActivityName': 'TableBackupActivity{}'.format(table_counter),
194 |                 'tableBackupActivityId': 'TableBackupActivity{}'.format(table_counter),
195 |                 'region': '{}'.format(self.region),
196 |                 'comma': True}
197 | 
198 |     def normalize_backup_parameters(self, backups):
199 |         if backups:
200 |             backups[-1]['comma'] = False
201 | 
202 |         return backups
203 | 
204 |     def should_add_more_tables(self, table_index, total_duration, table_backup_durations, backups):
205 |         """
206 |         Checks whether or not more tables should be backed up on current data pipeline
207 |         :param table_index:
208 |         :param total_duration:
209 |         :param table_backup_durations:
210 |         :param backups:
211 |         :return:
212 |         """
213 |         add_more_tables = True
214 | 
215 |         if table_index + 1 < len(table_backup_durations):
216 |             if total_duration + table_backup_durations[table_index + 1][1] >= MAX_DURATION_SEC:
217 |                 add_more_tables = False
218 | 
219 |             if len(backups) >= MAX_TABLES_PER_PIPELINE:
220 |                 add_more_tables = False
221 |         else:
222 |             add_more_tables = False
223 | 
224 |         return add_more_tables
225 | 
226 |     def estimate_duration(self, data):
227 |         """
228 |         Gives rough estimate, on how long backing up dynamo db table will take.
229 |         :param data: dynamic dynamo db table definition
230 |         :return: Estimated time in seconds.
231 |         """
232 |         table_size_bytes = data.get('TableSizeBytes', 0)
233 |         read_capacity_units = data['ProvisionedThroughput']['ReadCapacityUnits']
234 | 
235 |         return estimate_backup_duration(self.read_throughput_percent, table_size_bytes,
236 |                                         read_capacity_units) + ACTIVITY_BOOTSTRAP_TIME
237 | 


--------------------------------------------------------------------------------
/hippolyte/pipeline_translator.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from copy import deepcopy
  3 | 
  4 | # Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License"). You
  7 | # may not use this file except in compliance with the License. A copy of
  8 | # the License is located at
  9 | #
 10 | #     http://aws.amazon.com/apache2.0/
 11 | #
 12 | # or in the "license" file accompanying this file. This file is
 13 | # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 14 | # ANY KIND, either express or implied. See the License for the specific
 15 | # language governing permissions and limitations under the License.
 16 | 
 17 | 
 18 | class PipelineDefinitionError(Exception):
 19 |     def __init__(self, msg):
 20 |         full_msg = (
 21 |             "Error in pipeline definition: %s\n" % msg)
 22 |         super(PipelineDefinitionError, self).__init__(full_msg)
 23 |         self.msg = msg
 24 | 
 25 | 
 26 | def definition_to_api_objects(definition):
 27 |     definition_copy = deepcopy(definition)
 28 |     if 'objects' not in definition_copy:
 29 |         raise PipelineDefinitionError('Missing "objects" key')
 30 |     api_elements = []
 31 |     # To convert to the structure expected by the service,
 32 |     # we convert the existing structure to a list of dictionaries.
 33 |     # Each dictionary has a 'fields', 'id', and 'name' key.
 34 |     for element in definition_copy['objects']:
 35 |         try:
 36 |             element_id = element.pop('id')
 37 |         except KeyError:
 38 |             raise PipelineDefinitionError('Missing "id" key of element: %s' %
 39 |                                           json.dumps(element))
 40 |         api_object = {'id': element_id}
 41 |         # If a name is provided, then we use that for the name,
 42 |         # otherwise the id is used for the name.
 43 |         name = element.pop('name', element_id)
 44 |         api_object['name'] = name
 45 |         # Now we need the field list.  Each element in the field list is a dict
 46 |         # with a 'key', 'stringValue'|'refValue'
 47 |         fields = []
 48 |         for key, value in sorted(element.items()):
 49 |             fields.extend(_parse_each_field(key, value))
 50 |         api_object['fields'] = fields
 51 |         api_elements.append(api_object)
 52 |     return api_elements
 53 | 
 54 | 
 55 | def definition_to_api_parameters(definition):
 56 |     definition_copy = deepcopy(definition)
 57 |     if 'parameters' not in definition_copy:
 58 |         return None
 59 |     parameter_objects = []
 60 |     for element in definition_copy['parameters']:
 61 |         try:
 62 |             parameter_id = element.pop('id')
 63 |         except KeyError:
 64 |             raise PipelineDefinitionError('Missing "id" key of parameter: %s' %
 65 |                                           json.dumps(element))
 66 |         parameter_object = {'id': parameter_id}
 67 |         # Now we need the attribute list.  Each element in the attribute list
 68 |         # is a dict with a 'key', 'stringValue'
 69 |         attributes = []
 70 |         for key, value in sorted(element.items()):
 71 |             attributes.extend(_parse_each_field(key, value))
 72 |         parameter_object['attributes'] = attributes
 73 |         parameter_objects.append(parameter_object)
 74 |     return parameter_objects
 75 | 
 76 | 
 77 | def definition_to_parameter_values(definition):
 78 |     definition_copy = deepcopy(definition)
 79 |     if 'values' not in definition_copy:
 80 |         return None
 81 |     parameter_values = []
 82 |     for key in definition_copy['values']:
 83 |         parameter_values.extend(
 84 |             _convert_single_parameter_value(key, definition_copy['values'][key]))
 85 | 
 86 |     return parameter_values
 87 | 
 88 | 
 89 | def _parse_each_field(key, value):
 90 |     values = []
 91 |     if isinstance(value, list):
 92 |         for item in value:
 93 |             values.append(_convert_single_field(key, item))
 94 |     else:
 95 |         values.append(_convert_single_field(key, value))
 96 |     return values
 97 | 
 98 | 
 99 | def _convert_single_field(key, value):
100 |     field = {'key': key}
101 |     if isinstance(value, dict) and list(value.keys()) == ['ref']:
102 |         field['refValue'] = value['ref']
103 |     else:
104 |         field['stringValue'] = value
105 |     return field
106 | 
107 | 
108 | def _convert_single_parameter_value(key, values):
109 |     parameter_values = []
110 |     if isinstance(values, list):
111 |         for each_value in values:
112 |             parameter_value = {'id': key, 'stringValue': each_value}
113 |             parameter_values.append(parameter_value)
114 |     else:
115 |         parameter_value = {'id': key, 'stringValue': values}
116 |         parameter_values.append(parameter_value)
117 |     return parameter_values
118 | 


--------------------------------------------------------------------------------
/hippolyte/project_config.py:
--------------------------------------------------------------------------------
 1 | ACCOUNT_CONFIGS = {
 2 |     '123456789100': {
 3 |         'name': 'example-account',
 4 |         'emr_subnet': 'example-subnet-id',
 5 |         'log_bucket': 'hippolyte-eu-west-1-prod-backups',
 6 |         'backup_bucket': 'hippolyte-eu-west-1-prod-backups',
 7 |         'exclude_from_backup': [
 8 |             'example-table-*'
 9 |         ],
10 |         'always_backup': [
11 |             'this-is-not-an-example-table-1'
12 |         ]
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/hippolyte/utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | MAX_TABLES_PER_PIPELINE = 32
 4 | READ_BLOCK_SIZE_BYTES = 4096
 5 | MAX_ALLOWED_PROVISIONED_READ_THROUGHPUT = 1000
 6 | MAX_MSG_BULK_READ = 100
 7 | MAX_BULK_READ_SIZE_BYTES = 16777216
 8 | MAX_DURATION_SEC = 12 * 3600
 9 | MAX_DURATION_SINGLE_PIPELINE = 3300  #less than 1h
10 | ACTIVITY_BOOTSTRAP_TIME = 60
11 | EMR_BOOTSTRAP_TIME = 600
12 | INITIAL_READ_THROUGHPUT_PERCENT = 0.5
13 | TIME_IN_BETWEEN_BACKUPS = 86400
14 | 
15 | 
16 | def estimate_backup_duration(read_throughput_percent, table_size_bytes, read_capacity_units):
17 |     """
18 |     Gives rough estimate, on how long backing up dynamo db table will take.
19 |     :param table_size_bytes:
20 |     :param read_capacity_units
21 |     :return: Estimated time in seconds.
22 |     """
23 |     read_bytes_per_second = read_capacity_units * read_throughput_percent * READ_BLOCK_SIZE_BYTES
24 | 
25 |     return table_size_bytes / read_bytes_per_second
26 | 
27 | 
28 | def compute_required_throughput(estimated_duration, target_duration, read_capacity_units, read_throughput_percent):
29 |     """
30 |     :param estimated_duration: estimated duration using current: read_capacity_units, read_throughput_percent
31 |     :param target_duration: how long should backup take
32 |     :param read_capacity_units: current provisioned read capacity
33 |     :param read_throughput_percent: current backup read throughput as % of total read throughput
34 |     :return: new read throughput with new read throughput percent
35 |     """
36 |     ratio = estimated_duration / float(target_duration)
37 |     new_read_capacity_units = read_capacity_units * (ratio + 1)
38 |     new_read_throughput_percent = float(read_capacity_units) / new_read_capacity_units
39 | 
40 |     new_read_throughput_percent = 1 - max(new_read_throughput_percent, 0.01)
41 | 
42 |     return int(round(new_read_capacity_units)), round(new_read_throughput_percent, 2)
43 | 
44 | 
45 | def get_date_suffix():
46 |     return datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
47 | 
48 | 
49 | def chunks(l, n):
50 |     for i in range(0, len(l), n):
51 |         yield l[i:i + n]
52 | 
53 | 
54 | def list_tables_in_definition(pipeline_definition):
55 |     nodes = pipeline_definition.get('objects')
56 |     table_nodes = filter(lambda x: 'tableName' in x, nodes)
57 | 
58 |     return map(lambda x: x['tableName'], table_nodes)
59 | 
60 | 
61 | def get_first_element_in_the_list_with(l, key, value):
62 |     element = filter(lambda x: x[key] == value, l)
63 | 
64 |     if element:
65 |         return element[0]
66 | 
67 |     return None
68 | 
69 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | boto3==1.4.4
2 | pystache==0.5.4
3 | retrying==1.3.3
4 | mock==2.0.0
5 | moto==1.0.1


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pystache==0.5.4
2 | retrying==1.3.3


--------------------------------------------------------------------------------
/serverless.yml:
--------------------------------------------------------------------------------
 1 | service: hippolyte
 2 | 
 3 | provider:
 4 |   name: aws
 5 |   runtime: python2.7
 6 |   role: LambdaRole
 7 | 
 8 |   # If you have multiple accounts then it's recommended to use multiple profiles to store AWS credentials
 9 |   # The 'custom' sections for the stage/profiles key value map.
10 |   region: ${opt:region, self:custom.defaultRegion}
11 |   stage: ${opt:stage, self:custom.defaultStage}
12 |   profile: ${self:custom.profiles.${self:provider.stage}}
13 | 
14 | plugins:
15 |   - serverless-python-requirements
16 | 
17 | custom:
18 |   defaultStage: dev
19 |   defaultRegion: us-east-1
20 |   profiles:
21 |     # These should correspond to the account credentails stored in ~/.aws/credentials
22 |     dev: devProfile
23 |     prod: prodProfile
24 | 
25 | 
26 | functions:
27 |   backup:
28 |     handler: hippolyte.dynamodb_backup.lambda_handler
29 |     events:
30 |       # The lambda function depends on the names of these events to determine in which mode to run
31 |       - schedule:
32 |           name: hippolyte-${self:provider.stage}-backup-event
33 |           rate: cron(10 0 * * ? *)
34 |       - schedule:
35 |           name: hippolyte-${self:provider.stage}-monitor-dynamodb-backup
36 |           rate: cron(15 1-10 * * ? *)
37 | 
38 | resources:
39 |   Resources:
40 |     LambdaRole:
41 |       Type: "AWS::IAM::Role"
42 |       Properties:
43 |         AssumeRolePolicyDocument:
44 |           Version: "2012-10-17"
45 |           Statement:
46 |             - Effect: Allow
47 |               Principal:
48 |                 Service:
49 |                   - lambda.amazonaws.com
50 |               Action: "sts:AssumeRole"
51 |         ManagedPolicyArns:
52 |           - "arn:aws:iam::aws:policy/AmazonDynamoDBFullAccesswithDataPipeline"
53 |         Policies:
54 |           - PolicyName: LambdaPolicy
55 |             PolicyDocument:
56 |               Version: "2012-10-17"
57 |               Statement:
58 |                 - Effect: Allow
59 |                   Action:
60 |                     - logs:CreateLogGroup
61 |                     - logs:CreateLogStream
62 |                     - logs:PutLogEvents
63 |                   Resource:
64 |                     - Fn::Sub: "arn:aws:logs:${self:provider.region}:*:log-group:/aws/lambda/*:*:*"
65 |                 -  Effect: "Allow"
66 |                    Action:
67 |                      - "sns:Publish"
68 |                    Resource: {"Ref": "EmailNotificationTopic"}
69 |     BackupBucket:
70 |       Type: "AWS::S3::Bucket"
71 |       Properties:
72 |         BucketName: hippolyte-${self:provider.region}-${self:provider.stage}-backups
73 |     EmailNotificationTopic:
74 |       Type: "AWS::SNS::Topic"
75 |       Properties:
76 |         TopicName: hippolyte-backup-monitoring
77 |         Subscription:
78 |           - Protocol: email
79 |             Endpoint: ${opt:email, self:custom.defaultEmail}
80 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ocadotechnology/hippolyte/a8f95f7a7de7a8499c89029106941ee0ea3a62d7/tests/__init__.py


--------------------------------------------------------------------------------
/tests/resources/test_backup_metadata.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "Pipelines": [
  3 |     {
  4 |       "backed_up_tables": [
  5 |         "prd-shd-euw1-scotty_audit-actions"
  6 |       ],
  7 |       "definition": {
  8 |         "objects": [
  9 |           {
 10 |             "amiVersion": "3.9.0",
 11 |             "bootstrapAction": "s3://eu-west-1.elasticmapreduce/bootstrap-actions/configure-hadoop, --yarn-key-value,yarn.nodemanager.resource.memory-mb=11520,--yarn-key-value,yarn.scheduler.maximum-allocation-mb=11520,--yarn-key-value,yarn.scheduler.minimum-allocation-mb=1440,--yarn-key-value,yarn.app.mapreduce.am.resource.mb=2880,--mapred-key-value,mapreduce.map.memory.mb=5760,--mapred-key-value,mapreduce.map.java.opts=-Xmx4608M,--mapred-key-value,mapreduce.reduce.memory.mb=2880,--mapred-key-value,mapreduce.reduce.java.opts=-Xmx2304m,--mapred-key-value,mapreduce.map.speculative=false",
 12 |             "coreInstanceCount": "1",
 13 |             "coreInstanceType": "m3.xlarge",
 14 |             "id": "EmrClusterForBackup",
 15 |             "masterInstanceType": "m3.xlarge",
 16 |             "name": "EmrClusterForBackup",
 17 |             "region": "eu-west-1",
 18 |             "subnetId": "subnet-9f2395c6",
 19 |             "terminateAfter": "5 Hour",
 20 |             "type": "EmrCluster"
 21 |           },
 22 |           {
 23 |             "failureAndRerunMode": "CASCADE",
 24 |             "id": "Default",
 25 |             "name": "Default",
 26 |             "pipelineLogUri": "s3://euw1-dynamodb-backups-prd-480503113116/logs/2017-06-22-13-53-48/",
 27 |             "resourceRole": "DataPipelineDefaultResourceRole",
 28 |             "role": "DataPipelineDefaultRole",
 29 |             "scheduleType": "ONDEMAND"
 30 |           },
 31 |           {
 32 |             "id": "DDBSourceTable5",
 33 |             "name": "DDBSourceTable5",
 34 |             "readThroughputPercent": "0.5",
 35 |             "tableName": "prd-shd-euw1-scotty_audit-actions",
 36 |             "type": "DynamoDBDataNode"
 37 |           },
 38 |           {
 39 |             "id": "TableBackupActivity5",
 40 |             "input": {
 41 |               "ref": "DDBSourceTable5"
 42 |             },
 43 |             "maximumRetries": "2",
 44 |             "name": "TableBackupActivity5",
 45 |             "output": {
 46 |               "ref": "S3BackupLocation5"
 47 |             },
 48 |             "resizeClusterBeforeRunning": "false",
 49 |             "runsOn": {
 50 |               "ref": "EmrClusterForBackup"
 51 |             },
 52 |             "step": "s3://dynamodb-emr-eu-west-1/emr-ddb-storage-handler/2.1.0/emr-ddb-2.1.0.jar,org.apache.hadoop.dynamodb.tools.DynamoDbExport,#{output.directoryPath},#{input.tableName},#{input.readThroughputPercent}",
 53 |             "type": "EmrActivity"
 54 |           },
 55 |           {
 56 |             "directoryPath": "s3://euw1-dynamodb-backups-prd-480503113116/prd-shd-euw1-scotty_audit-actions/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}",
 57 |             "id": "S3BackupLocation5",
 58 |             "name": "S3BackupLocation5",
 59 |             "type": "S3DataNode"
 60 |           }
 61 |         ],
 62 |         "parameters": [],
 63 |         "values": {}
 64 |       },
 65 |       "pipeline_id": "df-0024453ANW0OBWGL7YE"
 66 |     }
 67 |   ],
 68 |   "ScalableTargets": [
 69 |     {
 70 |       "CreationTime": "2017-06-26 15:21:59.485000+01:00",
 71 |       "MaxCapacity": 20,
 72 |       "MinCapacity": 1,
 73 |       "ResourceId": "table/prd-shd-euw1-scotty_audit-actions",
 74 |       "RoleARN": "arn:aws:iam::480503113116:role/service-role/DynamoDBAutoscaleRole",
 75 |       "ScalableDimension": "dynamodb:table:ReadCapacityUnits",
 76 |       "ServiceNamespace": "dynamodb"
 77 |     }
 78 |   ],
 79 |   "ScalingPolicies": [
 80 |     {
 81 |       "Alarms": [
 82 |         {
 83 |           "AlarmARN": "arn:aws:cloudwatch:eu-west-1:480503113116:alarm:TargetTracking-table/prd-shd-euw1-scotty_audit-actions-AlarmHigh-13a0215d-5b1f-4ddc-ad7e-5d58ba296a66",
 84 |           "AlarmName": "TargetTracking-table/prd-shd-euw1-scotty_audit-actions-AlarmHigh-13a0215d-5b1f-4ddc-ad7e-5d58ba296a66"
 85 |         },
 86 |         {
 87 |           "AlarmARN": "arn:aws:cloudwatch:eu-west-1:480503113116:alarm:TargetTracking-table/prd-shd-euw1-scotty_audit-actions-AlarmLow-ea7fbdd6-1aa4-4e9c-9ab9-66d8d7183e22",
 88 |           "AlarmName": "TargetTracking-table/prd-shd-euw1-scotty_audit-actions-AlarmLow-ea7fbdd6-1aa4-4e9c-9ab9-66d8d7183e22"
 89 |         },
 90 |         {
 91 |           "AlarmARN": "arn:aws:cloudwatch:eu-west-1:480503113116:alarm:TargetTracking-table/prd-shd-euw1-scotty_audit-actions-ProvisionedCapacityHigh-3acc8c06-62cd-4c1e-95e4-6f22a911a2ef",
 92 |           "AlarmName": "TargetTracking-table/prd-shd-euw1-scotty_audit-actions-ProvisionedCapacityHigh-3acc8c06-62cd-4c1e-95e4-6f22a911a2ef"
 93 |         },
 94 |         {
 95 |           "AlarmARN": "arn:aws:cloudwatch:eu-west-1:480503113116:alarm:TargetTracking-table/prd-shd-euw1-scotty_audit-actions-ProvisionedCapacityLow-a8f80034-8f75-4860-b2a3-2fada5cce22d",
 96 |           "AlarmName": "TargetTracking-table/prd-shd-euw1-scotty_audit-actions-ProvisionedCapacityLow-a8f80034-8f75-4860-b2a3-2fada5cce22d"
 97 |         }
 98 |       ],
 99 |       "CreationTime": "2017-06-26 15:21:59.571000+01:00",
100 |       "PolicyARN": "arn:aws:autoscaling:eu-west-1:480503113116:scalingPolicy:ddb81ffd-483d-4b37-8e38-1440e5d7d37d:resource/dynamodb/table/prd-shd-euw1-scotty_audit-actions:policyName/DynamoDBReadCapacityUtilization:table/prd-shd-euw1-scotty_audit-actions",
101 |       "PolicyName": "DynamoDBReadCapacityUtilization:table/prd-shd-euw1-scotty_audit-actions",
102 |       "PolicyType": "TargetTrackingScaling",
103 |       "ResourceId": "table/prd-shd-euw1-scotty_audit-actions",
104 |       "ScalableDimension": "dynamodb:table:ReadCapacityUnits",
105 |       "ServiceNamespace": "dynamodb",
106 |       "TargetTrackingScalingPolicyConfiguration": {
107 |         "PredefinedMetricSpecification": {
108 |           "PredefinedMetricType": "DynamoDBReadCapacityUtilization"
109 |         },
110 |         "TargetValue": 70.0
111 |       }
112 |     }
113 |   ],
114 |   "Tables": [
115 |     {
116 |       "ResponseMetadata": {
117 |         "HTTPHeaders": {
118 |           "content-length": "1202",
119 |           "content-type": "application/x-amz-json-1.0",
120 |           "date": "Thu, 22 Jun 2017 13:53:47 GMT",
121 |           "x-amz-crc32": "3477012252",
122 |           "x-amzn-requestid": "O02UK4SOAUB6CP49O5US1K1O63VV4KQNSO5AEMVJF66Q9ASUAAJG"
123 |         },
124 |         "HTTPStatusCode": 200,
125 |         "RequestId": "O02UK4SOAUB6CP49O5US1K1O63VV4KQNSO5AEMVJF66Q9ASUAAJG"
126 |       },
127 |       "Table": {
128 |         "AttributeDefinitions": [
129 |           {
130 |             "AttributeName": "id",
131 |             "AttributeType": "S"
132 |           },
133 |           {
134 |             "AttributeName": "startDate",
135 |             "AttributeType": "S"
136 |           },
137 |           {
138 |             "AttributeName": "startTime",
139 |             "AttributeType": "S"
140 |           }
141 |         ],
142 |         "CreationDateTime": "2015-06-15 16:12:43.471000+00:00",
143 |         "GlobalSecondaryIndexes": [
144 |           {
145 |             "IndexArn": "arn:aws:dynamodb:eu-west-1:480503113116:table/prd-shd-euw1-scotty_audit-actions/index/StartDateIndex",
146 |             "IndexName": "StartDateIndex",
147 |             "IndexSizeBytes": 4010436256,
148 |             "IndexStatus": "ACTIVE",
149 |             "ItemCount": 2091475,
150 |             "KeySchema": [
151 |               {
152 |                 "AttributeName": "startDate",
153 |                 "KeyType": "HASH"
154 |               },
155 |               {
156 |                 "AttributeName": "startTime",
157 |                 "KeyType": "RANGE"
158 |               }
159 |             ],
160 |             "Projection": {
161 |               "ProjectionType": "ALL"
162 |             },
163 |             "ProvisionedThroughput": {
164 |               "NumberOfDecreasesToday": 0,
165 |               "ReadCapacityUnits": 100,
166 |               "WriteCapacityUnits": 50
167 |             }
168 |           }
169 |         ],
170 |         "ItemCount": 2091475,
171 |         "KeySchema": [
172 |           {
173 |             "AttributeName": "id",
174 |             "KeyType": "HASH"
175 |           }
176 |         ],
177 |         "ProvisionedThroughput": {
178 |           "LastDecreaseDateTime": "2017-06-21 09:37:33.736000+00:00",
179 |           "LastIncreaseDateTime": "2017-04-26 00:10:44.103000+00:00",
180 |           "NumberOfDecreasesToday": 0,
181 |           "ReadCapacityUnits": 350,
182 |           "WriteCapacityUnits": 50
183 |         },
184 |         "TableArn": "arn:aws:dynamodb:eu-west-1:480503113116:table/prd-shd-euw1-scotty_audit-actions",
185 |         "TableName": "prd-shd-euw1-scotty_audit-actions",
186 |         "TableSizeBytes": 4010436256,
187 |         "TableStatus": "ACTIVE"
188 |       }
189 |     }
190 |   ]
191 | }


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import sys
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     runner = unittest.TextTestRunner()
 7 |     suite = unittest.TestLoader().discover('.')
 8 |     exit_code = not runner.run(suite).wasSuccessful()
 9 |     sys.exit(exit_code)
10 | 


--------------------------------------------------------------------------------
/tests/test_dynamodb_backup.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import boto3
 3 | import json
 4 | import sys
 5 | import os
 6 | from moto import mock_dynamodb2
 7 | 
 8 | sys.path.append(os.path.join(os.getcwd() + '/../code'))
 9 | 
10 | from hippolyte.dynamodb_backup import get_table_descriptions
11 | from test_utils import create_test_table, load_backup_metadata
12 | 
13 | 
14 | class TestDynamoDbBackup(unittest.TestCase):
15 |     @mock_dynamodb2
16 |     def test_get_table_descriptions(self):
17 |         dynamodb_client = boto3.client('dynamodb', region_name='eu-west-1')
18 | 
19 |         backup_metadata = load_backup_metadata()
20 |         table_descriptions = json.loads(backup_metadata)['Tables']
21 | 
22 |         always_backup = ['prd-mol-euw1-fluxcapacitor-fluxcapacitor-alertdefinition',
23 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-alertdefinitionhistory',
24 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-dashboard',
25 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-eventtype',
26 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-fluxcontext',
27 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-kinesisstream',
28 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-metricdefinition',
29 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-metricdefinitionhistory',
30 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-pagerdutyServiceIntegration',
31 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-snssubscription',
32 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-system',
33 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-systemaccessrule',
34 |                          'prd-mol-euw1-fluxcapacitor-fluxcapacitor-useraccessrule'
35 |                          ]
36 | 
37 |         for table_name in always_backup + [
38 |             'prd-cymes-euw1-commsflux--alertdefinition',
39 |             'prd-cymes-euw1-fluxcapacitor-prd-cymes-euw1-fluxcapacitor-alert',
40 |             'prd-cymes-euw1-fluxcapacitor-prd-cymes-euw1-storepick-flux-events-192.168.1.39-kinesis-consumer',
41 |             'prd-shd-euw1-flux-table1',
42 |             'prd-shd-euw1-smth-flux',
43 |             'prd-mol-euw1-storepickreporting-ContainerReport-snapshots',
44 |             'prd-mol-euw1-will-be-backed-up',
45 |             'prd-mol-euw1-will-be-backed-up-snapshots'
46 |         ]:
47 |             create_test_table(dynamodb_client, table_name, table_descriptions[0]['Table'])
48 | 
49 |         exclude_from_backup = [
50 |             '.*flux.*',
51 |             '.*storepickreporting-.*-snapshots'
52 |         ]
53 | 
54 |         table_descriptions = get_table_descriptions(exclude_from_backup, always_backup)
55 |         included_tables = map(lambda x: x['Table']['TableName'], table_descriptions)
56 |         included_tables.sort()
57 | 
58 |         expected_tables = always_backup + [
59 |             'prd-mol-euw1-will-be-backed-up',
60 |             'prd-mol-euw1-will-be-backed-up-snapshots'
61 |         ]
62 |         expected_tables.sort()
63 | 
64 |         self.assertListEqual(included_tables, expected_tables)
65 | 


--------------------------------------------------------------------------------
/tests/test_dynamodb_booster.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import boto3
  3 | import json
  4 | import sys
  5 | import os
  6 | from botocore.exceptions import ClientError
  7 | from moto import mock_s3, mock_datapipeline, mock_dynamodb2
  8 | from mock import patch, Mock
  9 | 
 10 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 11 | 
 12 | import hippolyte.dynamodb_booster
 13 | import hippolyte.aws_utils
 14 | from test_utils import create_test_table, load_backup_metadata
 15 | 
 16 | TABLE_NAME = 'prd-shd-euw1-scotty_audit-actions'
 17 | 
 18 | 
 19 | def create_backup_metadata(s3_client, bucket, key, body):
 20 |     s3_client.create_bucket(Bucket=bucket)
 21 |     s3_client.put_object(Bucket=bucket, Key=key, Body=body)
 22 | 
 23 | 
 24 | def get_old_rcu_and_boost(table_descriptions, new_read_capacity):
 25 |     old = table_descriptions[0]['Table']['ProvisionedThroughput']['ReadCapacityUnits']
 26 |     table_descriptions[0]['Table']['ProvisionedThroughput']['ReadCapacityUnits'] = new_read_capacity
 27 | 
 28 |     return old
 29 | 
 30 | 
 31 | class FakeApplicationAutoscalingClient():
 32 |     def __init__(self):
 33 |         self.scalable_targets = []
 34 |         self.scaling_policies = []
 35 | 
 36 |     def get_paginator(self, paginator_name):
 37 |         _list = []
 38 |         _key = ''
 39 |         if paginator_name == 'describe_scalable_targets':
 40 |             _list = self.scalable_targets
 41 |             _key = 'ScalableTargets'
 42 |         elif paginator_name == 'describe_scaling_policies':
 43 |             _list = self.scaling_policies
 44 |             _key = 'ScalingPolicies'
 45 | 
 46 |         paginator = Mock()
 47 |         paginator.paginate = Mock(return_value=[{
 48 |             _key: _list
 49 |         }])
 50 | 
 51 |         return paginator
 52 | 
 53 |     def delete_scaling_policy(self, PolicyName, ServiceNamespace, ResourceId, ScalableDimension):
 54 |         before_delete = len(self.scaling_policies)
 55 |         self.scaling_policies = filter(
 56 |             lambda x: x['PolicyName'] != PolicyName or x['ScalableDimension'] != ScalableDimension,
 57 |             self.scaling_policies)
 58 | 
 59 |         if len(self.scaling_policies) == before_delete:
 60 |             raise ClientError(
 61 |                 {
 62 |                     'Error':
 63 |                         {
 64 |                             'Code': 'ObjectNotFoundException',
 65 |                             'Message': 'No scaling policy found for service namespace: dynamodb, resource ID: {},'
 66 |                                        ' scalable dimension: {}: ObjectNotFoundException'.format(ResourceId,
 67 |                                                                                                  ScalableDimension)
 68 |                         }
 69 |                 },
 70 |                 'DeleteScalingPolicy'
 71 |             )
 72 | 
 73 |     def deregister_scalable_target(self, ServiceNamespace, ResourceId, ScalableDimension):
 74 |         before_delete = len(self.scalable_targets)
 75 |         self.scalable_targets = filter(
 76 |             lambda x: x['ResourceId'] != ResourceId or x['ScalableDimension'] != ScalableDimension,
 77 |             self.scalable_targets)
 78 | 
 79 |         if len(self.scalable_targets) == before_delete:
 80 |             raise ClientError(
 81 |                 {
 82 |                     'Error':
 83 |                         {
 84 |                             'Code': 'ObjectNotFoundException',
 85 |                             'Message': 'No scalable target found for service namespace: dynamodb, resource ID: {},'
 86 |                                        ' scalable dimension: {}: ObjectNotFoundException'.format(ResourceId,
 87 |                                                                                                  ScalableDimension)
 88 |                         }
 89 |                 },
 90 |                 'DeregisterScalableTarget'
 91 |             )
 92 | 
 93 |     def put_scaling_policy(self, PolicyName, ServiceNamespace, ResourceId, ScalableDimension,
 94 |                            PolicyType, TargetTrackingScalingPolicyConfiguration):
 95 |         self.scaling_policies.append({
 96 |             "PolicyName": PolicyName,
 97 |             "ServiceNamespace": ServiceNamespace,
 98 |             "ResourceId": ResourceId,
 99 |             "ScalableDimension": ScalableDimension,
100 |             "PolicyType": PolicyType,
101 |             "TargetTrackingScalingPolicyConfiguration": TargetTrackingScalingPolicyConfiguration
102 |         })
103 | 
104 |     def register_scalable_target(self, ServiceNamespace, ResourceId, ScalableDimension,
105 |                                  MinCapacity, MaxCapacity, RoleARN):
106 |         self.scalable_targets.append({
107 |             "ServiceNamespace": ServiceNamespace,
108 |             "ResourceId": ResourceId,
109 |             "ScalableDimension": ScalableDimension,
110 |             "MinCapacity": MinCapacity,
111 |             "MaxCapacity": MaxCapacity,
112 |             "RoleARN": RoleARN
113 |         })
114 | 
115 | 
116 | class TestDynamoDbBooster(unittest.TestCase):
117 |     @mock_dynamodb2
118 |     @mock_datapipeline
119 |     @mock_s3
120 |     @patch("hippolyte.config_util.ConfigUtil.list_backed_up_tables", return_value=TABLE_NAME)
121 |     @patch("hippolyte.aws_utils.ApplicationAutoScalingUtil._init_client",
122 |            return_value=FakeApplicationAutoscalingClient())
123 |     def test_restore_throughput(self, config_mock, autoscaling_mock):
124 |         dynamodb_client = boto3.client('dynamodb', region_name='eu-west-1')
125 |         s3_client = boto3.client('s3')
126 | 
127 |         backup_metadata = load_backup_metadata()
128 |         table_descriptions = json.loads(backup_metadata)['Tables']
129 |         create_test_table(dynamodb_client, TABLE_NAME, table_descriptions[0]['Table'])
130 |         old_rcu = get_old_rcu_and_boost(table_descriptions, 1000)
131 | 
132 |         bucket = 'euw1-dynamodb-backups-prd-480503113116'
133 |         booster = hippolyte.dynamodb_booster.DynamoDbBooster(table_descriptions, bucket, 0.5)
134 |         create_backup_metadata(s3_client, bucket, 'backup_metadata-2099-06-06-00-00-01', backup_metadata)
135 | 
136 |         booster.restore_throughput()
137 | 
138 |         table = dynamodb_client.describe_table(TableName=TABLE_NAME)
139 |         self.assertEqual(table['Table']['ProvisionedThroughput']['ReadCapacityUnits'], old_rcu)
140 | 
141 |     @mock_dynamodb2
142 |     @mock_datapipeline
143 |     @mock_s3
144 |     @patch("hippolyte.aws_utils.ApplicationAutoScalingUtil._init_client",
145 |            return_value=FakeApplicationAutoscalingClient())
146 |     def test_autoscaling_support(self, autoscaling_mock):
147 |         backup_metadata = load_backup_metadata()
148 |         backup_metadata_dict = json.loads(backup_metadata)
149 |         table_descriptions = backup_metadata_dict['Tables']
150 |         scaling_policies = backup_metadata_dict['ScalingPolicies']
151 |         scalable_targets = backup_metadata_dict['ScalableTargets']
152 | 
153 |         booster = hippolyte.dynamodb_booster.DynamoDbBooster(table_descriptions, 'foo', 0.5)
154 |         autoscaling_util = booster.application_auto_scaling_util
155 | 
156 |         for policy in scaling_policies:
157 |             autoscaling_util.put_scaling_policy(policy['PolicyName'],
158 |                                                 policy['ServiceNamespace'],
159 |                                                 policy['ResourceId'],
160 |                                                 policy['ScalableDimension'],
161 |                                                 policy['PolicyType'],
162 |                                                 policy['TargetTrackingScalingPolicyConfiguration'])
163 | 
164 |         for target in scalable_targets:
165 |             autoscaling_util.register_scalable_target(target['ServiceNamespace'],
166 |                                                       target['ResourceId'],
167 |                                                       target['ScalableDimension'],
168 |                                                       target['MinCapacity'],
169 |                                                       target['MaxCapacity'],
170 |                                                       target['RoleARN'])
171 | 
172 |         scaling_policies_before = autoscaling_util.describe_scaling_policies("dynamodb").get('ScalingPolicies')
173 |         scalable_targets_before = autoscaling_util.describe_scalable_targets("dynamodb").get('ScalableTargets')
174 | 
175 |         booster.disable_auto_scaling(scaling_policies, scalable_targets)
176 | 
177 |         self.assertFalse(autoscaling_util.describe_scaling_policies("dynamodb").get('ScalingPolicies'))
178 |         self.assertFalse(autoscaling_util.describe_scalable_targets("dynamodb").get('ScalableTargets'))
179 | 
180 |         booster.reenable_auto_scaling(backup_metadata_dict)
181 | 
182 |         scaling_policies_after = autoscaling_util.describe_scaling_policies("dynamodb").get('ScalingPolicies')
183 |         scalable_targets_after = autoscaling_util.describe_scalable_targets("dynamodb").get('ScalableTargets')
184 | 
185 |         self.assertListEqual(scaling_policies_before, scaling_policies_after)
186 |         self.assertListEqual(scalable_targets_before, scalable_targets_after)
187 | 
188 |     @mock_dynamodb2
189 |     @mock_datapipeline
190 |     @mock_s3
191 |     @patch('hippolyte.dynamodb_booster.logger')
192 |     @patch("hippolyte.aws_utils.ApplicationAutoScalingUtil._init_client",
193 |            return_value=FakeApplicationAutoscalingClient())
194 |     def test_disable_autoscaling_warns_on_missing_resources(self, logger_mock, autoscaling_mock):
195 |         backup_metadata = load_backup_metadata()
196 |         backup_metadata_dict = json.loads(backup_metadata)
197 |         table_descriptions = backup_metadata_dict['Tables']
198 |         scaling_policies = backup_metadata_dict['ScalingPolicies']
199 |         scalable_targets = backup_metadata_dict['ScalableTargets']
200 | 
201 |         hippolyte.dynamodb_booster.logger = logger_mock
202 |         booster = hippolyte.dynamodb_booster.DynamoDbBooster(table_descriptions, 'foo', 0.5)
203 |         autoscaling_util = booster.application_auto_scaling_util
204 | 
205 |         for policy in scaling_policies:
206 |             autoscaling_util.put_scaling_policy(policy['PolicyName'],
207 |                                                 policy['ServiceNamespace'],
208 |                                                 policy['ResourceId'],
209 |                                                 'NonExisting',
210 |                                                 policy['PolicyType'],
211 |                                                 policy['TargetTrackingScalingPolicyConfiguration'])
212 | 
213 |         for target in scalable_targets:
214 |             autoscaling_util.register_scalable_target(target['ServiceNamespace'],
215 |                                                       target['ResourceId'],
216 |                                                       'NonExisting',
217 |                                                       target['MinCapacity'],
218 |                                                       target['MaxCapacity'],
219 |                                                       target['RoleARN'])
220 | 
221 |         booster.disable_auto_scaling(scaling_policies, scalable_targets)
222 |         self.assertEqual(logger_mock.warn.call_count, 2)
223 | 


--------------------------------------------------------------------------------
/tests/test_monitor.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import boto3
 3 | import sys
 4 | import os
 5 | from moto import mock_s3, mock_datapipeline, mock_sns
 6 | from datetime import datetime, timedelta
 7 | 
 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 9 | 
10 | from hippolyte.monitor import Monitor, is_backup_from_current_batch
11 | 
12 | 
13 | class TestESMonitor(unittest.TestCase):
14 |     @mock_sns
15 |     @mock_datapipeline
16 |     @mock_s3
17 |     def test_success_on_large_number_of_backup_files(self):
18 |         bucket = 'euw1-dynamodb-backups-prd-480503113116'
19 |         log_bucket = 'euw1-infrastructure_logs-prd-480503113116'
20 |         s3 = boto3.client('s3', region_name='eu-west-1')
21 |         s3.create_bucket(Bucket=bucket)
22 | 
23 |         for day in range(1, 30):
24 |             key = 'prd-shd-euw1-scotty_audit-events/2099-05-{}-00-10-38/'.format(str(day).zfill(2))
25 |             for file_name in range(0, 100):
26 |                 s3.put_object(Bucket=bucket, Key=key + str(file_name), Body='')
27 | 
28 |             s3.put_object(Bucket=bucket, Key=key + '_SUCCESS', Body='')
29 | 
30 |         dummy_pipeline = {
31 |             "definition": {
32 |                 "objects": [
33 |                     {
34 |                         "directoryPath": "s3://euw1-dynamodb-backups-prd-480503113116/prd-shd-euw1-scotty_audit-events/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}"
35 |                     }
36 |                 ]
37 |             }
38 |         }
39 | 
40 |         monitor = Monitor('480503113116', log_bucket, bucket, 'dummy_sns')
41 |         failed_tables = monitor.extract_failed_tables(dummy_pipeline)
42 | 
43 |         self.assertFalse(failed_tables)
44 | 
45 |     def test_is_backup_from_current_batch_success(self):
46 |         last_modified = datetime.utcnow()
47 |         self.assertTrue(is_backup_from_current_batch({'LastModified': last_modified}))
48 | 
49 |     def test_is_backup_from_current_batch_failure(self):
50 |         last_modified = datetime.utcnow() - timedelta(hours=26)
51 |         self.assertFalse(is_backup_from_current_batch({'LastModified': last_modified}))


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from copy import deepcopy
 3 | 
 4 | 
 5 | def create_test_table(dynamodb_client, table_name, table):
 6 |     _table = deepcopy(table)
 7 |     del _table['ProvisionedThroughput']["LastIncreaseDateTime"]
 8 |     del _table['ProvisionedThroughput']["LastDecreaseDateTime"]
 9 |     del _table['ProvisionedThroughput']["NumberOfDecreasesToday"]
10 | 
11 |     dynamodb_client.create_table(TableName=table_name,
12 |                                  AttributeDefinitions=_table['AttributeDefinitions'],
13 |                                  KeySchema=_table['KeySchema'],
14 |                                  ProvisionedThroughput=_table['ProvisionedThroughput']
15 |                                  )
16 | 
17 | 
18 | def load_backup_metadata():
19 |     metadata_file = os.path.join(os.path.dirname(__file__), 'resources/test_backup_metadata.json')
20 |     with open(metadata_file) as f:
21 |         return f.read()


--------------------------------------------------------------------------------