├── source ├── tests │ ├── __init__.py │ ├── test_notify.py │ ├── test_cw.py │ ├── test_efs.py │ ├── test_ddb.py │ ├── test_asg.py │ ├── test_events.py │ └── test_ssm.py ├── requirements.txt ├── lib │ ├── __init__.py │ ├── asg.py │ ├── efs.py │ ├── dynamodb.py │ ├── logger.py │ ├── ssm.py │ ├── notify.py │ ├── cloudwatch.py │ ├── events.py │ └── ssm.sh ├── setup.cfg ├── scripts │ ├── pip_install_dependencies.py │ ├── efs-restore-fpsync.sh │ ├── efs-ec2-backup.sh │ ├── efs-backup-fpsync.sh │ └── efs-ec2-restore.sh ├── setup.py ├── solution-helper.py └── orchestrator.py ├── .gitignore ├── deployment ├── run-unit-tests.sh ├── build-s3-dist.sh ├── efs-to-efs-restore.template └── efs-to-efs-backup.template ├── CODE_OF_CONDUCT.md ├── NOTICE.txt ├── CHANGELOG.md ├── CONTRIBUTING.md ├── README.md └── LICENSE.txt /source/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Lalit G.' -------------------------------------------------------------------------------- /source/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.9.180 2 | mock>=2.0.0 3 | moto==1.3.8 4 | pytest>=3.1.3 5 | pytest-mock>=1.6.2 6 | pytest-runner>=2.11.1 7 | uuid>=1.30 -------------------------------------------------------------------------------- /source/lib/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Lalit G.' 2 | 3 | __all__ = ["events", "dynamodb", "ssm", "notify", "logger", "efs", "asg", "cloudwatch"] 4 | 5 | __version__ = ["1.0.0"] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .DS_Store 3 | source/scratch/ 4 | .idea/ 5 | *.jpg 6 | *.pyc 7 | .eggs 8 | efs_backup_solution.egg-info 9 | .cache 10 | **/global-s3-assets 11 | **/regional-s3-assets 12 | **/open-source -------------------------------------------------------------------------------- /deployment/run-unit-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo 'Installing dependencies using pip' 3 | echo 'python source/scripts/pip_install_dependencies.py' 4 | python source/scripts/pip_install_dependencies.py 5 | echo 'cd source && pytest tests && cd -' 6 | cd source && pytest tests && cd - 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /source/setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [tool:pytest] 5 | #Standard (info level) (Passed/Failed per test function) 6 | addopts = -v 7 | 8 | #Verbose (debug level) 9 | #addopts = -sv 10 | 11 | #Standard (info level) ('Dots' per test function) (Dots = number of tests per function) 12 | #addopts = -rsx 13 | 14 | #Succinct Output (q = quiet) 15 | #addopts = -rsxX -q 16 | -------------------------------------------------------------------------------- /source/scripts/pip_install_dependencies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import subprocess 5 | 6 | def install_dependencies(function_path): 7 | """get dependencies in requirements.txt 8 | 9 | """ 10 | function_path = os.path.normpath(function_path) 11 | requirements_path = os.path.join(function_path, 'requirements.txt') 12 | if os.path.isfile(requirements_path): 13 | try: 14 | subprocess.call(["pip", "install", "-r", requirements_path, "--upgrade"]) 15 | except Exception as e: 16 | print("Error: %s" % (e)) 17 | 18 | if __name__ == "__main__": 19 | if 'scripts' not in os.getcwd(): 20 | os.chdir('./source/scripts') 21 | # package files in this directory 22 | function_path = '../../source' 23 | install_dependencies(function_path) 24 | -------------------------------------------------------------------------------- /source/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import unicode_literals 3 | from setuptools import setup, find_packages 4 | 5 | #Use "source/scripts/pip_install_dependencies.py" to install dependencies 6 | 7 | tests_requires = [ 8 | 'pytest-mock == 1.6.2', 9 | 'pytest-runner == 2.11.1', 10 | 'pytest == 3.2.1' 11 | ] 12 | 13 | setup( 14 | name='efs-backup-solution', 15 | version='1.4.1', 16 | description='AWS EFS to AWS EFS backup', 17 | author='Lalit G.', 18 | url='https://github.com/awslabs/aws-efs-backup', 19 | packages=find_packages(exclude=("tests", "tests.*")), 20 | license="Amazon", 21 | zip_safe=False, 22 | test_suite="tests", 23 | tests_require=tests_requires, 24 | setup_requires=['pytest-runner'], 25 | classifiers=[ 26 | "Programming Language :: Python :: 3.8" 27 | ], 28 | ) -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | EFS to EFS Backup Solution 2 | Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | ********************** 17 | THIRD PARTY COMPONENTS 18 | ********************** 19 | This software includes third party software subject to the following copyrights: 20 | 21 | fpart under the Berkeley Software Distribution (BSD) license 22 | mock under the Berkeley Software Distribution (BSD) License 23 | moto under the Apache Software License (Apache) 24 | pytest under the Massachusetts Institute of Technology (MIT) License 25 | pytest-mock under the Massachusetts Institute of Technology (MIT) License 26 | pytest-runner under the Massachusetts Institute of Technology (MIT) License 27 | boto3 under the Apache License Version 2.0 -------------------------------------------------------------------------------- /source/tests/test_notify.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import boto3 4 | from uuid import uuid4 5 | from moto import mock_sns 6 | from decimal import Decimal 7 | from unittest import TestCase 8 | from unittest import TestLoader 9 | from unittest import TextTestRunner 10 | from lib.notify import Notify 11 | from lib.logger import Logger 12 | 13 | log_level = 'info' 14 | logger = Logger(loglevel=log_level) 15 | 16 | class NotifyTest(TestCase): 17 | def setUp(self): 18 | self.notify = Notify(logger) 19 | 20 | @mock_sns 21 | def test_customer_notify(self): 22 | conn = boto3.client('sns', region_name='us-east-1') 23 | conn.create_topic(Name="dummy-topic") 24 | response = conn.list_topics() 25 | topic_arn = response["Topics"][0]['TopicArn'] 26 | 27 | message = {'key_string1': '2017-7-6', 28 | 'key_string2': '12345', 29 | 'decimal': Decimal('1') 30 | } 31 | response = self.notify.customer(topic_arn, message) 32 | self.assertTrue(response['ResponseMetadata']['HTTPStatusCode'] == 200) 33 | 34 | 35 | def test_backend_metrics(self): 36 | uuid = str(uuid4()) 37 | solution_id = 'SO_unit_test' 38 | customer_uuid = uuid 39 | logger.info("UUID: " + customer_uuid) 40 | data = {'key_string1': '2017-7-6', 41 | 'key_string2': '12345', 42 | 'decimal': Decimal('1') 43 | } 44 | url = 'https://oszclq8tyh.execute-api.us-east-1.amazonaws.com/prod/generic' 45 | response = self.notify.metrics(solution_id, customer_uuid, data, url) 46 | self.assertTrue(response == 200) 47 | 48 | if __name__ == '__main__' and __package__ is None: 49 | suite = TestLoader().loadTestsFromTestCase(NotifyTest) 50 | TextTestRunner(verbosity=2).run(suite) 51 | -------------------------------------------------------------------------------- /source/tests/test_cw.py: -------------------------------------------------------------------------------- 1 | from lib.cloudwatch import CloudWatchMetric 2 | from lib.logger import Logger 3 | from decimal import Decimal 4 | 5 | log_level = 'critical' 6 | logger = Logger(loglevel=log_level) 7 | cw = CloudWatchMetric(logger) 8 | 9 | 10 | efs_metrics_response = {'SrcBurstCreditBalance': Decimal('23'), 'SrcPermittedThroughput': Decimal('10')} 11 | 12 | def test_cw_returns_dict(mocker): 13 | mocker.patch.object(cw, 'efs_cw_metrics') 14 | cw.efs_cw_metrics.return_value = efs_metrics_response 15 | response = cw.efs_cw_metrics('fake_efs_id', 'src') 16 | assert type(response) == dict 17 | 18 | def test_check_efs_metrics(mocker): 19 | mocker.patch.object(cw, 'efs_cw_metrics') 20 | cw.efs_cw_metrics.return_value = efs_metrics_response 21 | cw.efs_cw_metrics('fake_efs_id', 'src') 22 | for key, value in efs_metrics_response.items(): 23 | assert type(value) == Decimal 24 | 25 | s3_metric_response = {'Datapoints': [ 26 | {'Average': 1839341.0, 'Unit': 'Bytes'}, 27 | {'Average': 1839341.0, 'Unit': 'Bytes'}], 28 | 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 29 | 'RequestId': '6547', 30 | 'HTTPHeaders': {'x-amzn-requestid': '6547','content-length': '649', 'content-type': 'text/xml'}}, 31 | 'Label': 'BucketSizeBytes'} 32 | 33 | def test_s3_returns_dict(mocker): 34 | mocker.patch.object(cw, 's3_cw_metrics') 35 | cw.s3_cw_metrics.return_value = s3_metric_response 36 | response = cw.s3_cw_metrics('fake_bucket_name') 37 | assert type(response) == dict 38 | 39 | def test_check_s3_metrics(mocker): 40 | mocker.patch.object(cw, 's3_cw_metrics') 41 | cw.s3_cw_metrics.return_value = s3_metric_response 42 | cw.s3_cw_metrics('fake_bucket_name') 43 | value = s3_metric_response['Datapoints'][-1]['Average'] 44 | assert type(value) == float -------------------------------------------------------------------------------- /source/tests/test_efs.py: -------------------------------------------------------------------------------- 1 | from lib.logger import Logger 2 | from lib.efs import EFS 3 | log_level = 'critical' 4 | logger = Logger(loglevel=log_level) 5 | efs = EFS(logger) 6 | 7 | response = { 8 | 'ResponseMetadata': 9 | { 10 | 'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': '7feeb7b6-760e-11e7-81f2-d7792aa8bdb2', 11 | 'HTTPHeaders': { 12 | 'x-amzn-requestid': '7feeb7b6-760e-11e7-81f2-d7792aa8bdb2', 13 | 'date': 'Mon, 31 Jul 2017 16:37:10 GMT', 'content-length': '375', 14 | 'content-type': 'application/json' 15 | } 16 | }, 17 | u'FileSystems': [ 18 | { 19 | u'SizeInBytes': { 20 | u'Value': 99 21 | }, 22 | u'Name': u'gen-purpose-src', 23 | u'CreationToken': u'console-ea5e8735-901f-44a1-87c7-53d45ad666ba', 24 | u'PerformanceMode': u'generalPurpose', 25 | u'FileSystemId': u'fs-7c9e1835', 26 | u'NumberOfMountTargets': 5, 27 | u'LifeCycleState': u'available', 28 | u'OwnerId': u'36' 29 | } 30 | ] 31 | } 32 | 33 | 34 | def test_efs_size(mocker): 35 | mocker.patch.object(efs, 'size') 36 | efs.size.response = response 37 | efs.size('mock-efs-id') 38 | assert response['FileSystems'][0]['SizeInBytes']['Value'] == 99 39 | 40 | 41 | def test_performance_mode(mocker): 42 | mocker.patch.object(efs, 'performance_mode') 43 | efs.performance_mode.response = response 44 | efs.size('mock-efs-id') 45 | assert response['FileSystems'][0]['PerformanceMode'] == 'generalPurpose' 46 | 47 | 48 | def test_size_method_exception(): 49 | response = efs.size('mock-efs-id') 50 | assert response == 'unhandled exception' 51 | 52 | 53 | def test_performance_mode_method_exception(): 54 | response = efs.performance_mode('mock-efs-id') 55 | assert response == 'unhandled exception' 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /source/tests/test_ddb.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import boto3 4 | from moto import mock_dynamodb2 5 | from lib.logger import Logger 6 | from lib.dynamodb import DDB 7 | 8 | log_level = 'critical' 9 | logger = Logger(loglevel=log_level) 10 | 11 | def create_test_table(): 12 | client = boto3.client('dynamodb', region_name='us-east-1') 13 | client.create_table(TableName='mock-table', KeySchema=[ 14 | {'AttributeName': 'primary_key', 'KeyType': 'HASH'} 15 | ], 16 | AttributeDefinitions=[ 17 | {'AttributeName': 'primary_key', 'AttributeType': 'S'} 18 | ], 19 | ProvisionedThroughput={ 20 | 'ReadCapacityUnits': 10, 21 | 'WriteCapacityUnits': 10, 22 | }) 23 | return client.describe_table(TableName='mock-table') 24 | 25 | @mock_dynamodb2 26 | def test_item_add_and_describe_and_update(): 27 | table_desc = create_test_table() 28 | table_name = table_desc['Table']['TableName'] 29 | ddb = DDB(logger, table_name) 30 | 31 | item = { 32 | "primary_key": "Test1234", 33 | "key2" : 1234 34 | } 35 | response = ddb.write_item(item) 36 | 37 | returned_item = ddb.read_item('primary_key', 'Test1234') 38 | 39 | assert dict(returned_item) == item 40 | 41 | @mock_dynamodb2 42 | def test_write_item_exception(): 43 | table_desc = create_test_table() 44 | table_name = table_desc['Table']['TableName'] 45 | ddb = DDB(logger, table_name) 46 | 47 | item = { 48 | "invalid_key": "Test1234", 49 | "key2" : 1234 50 | } 51 | item = {} 52 | response = ddb.write_item(item) 53 | 54 | assert response == 'unhandled exception put' 55 | 56 | @mock_dynamodb2 57 | def test_read_item_exception(): 58 | table_desc = create_test_table() 59 | table_name = table_desc['Table']['TableName'] 60 | ddb = DDB(logger, table_name) 61 | 62 | item = { 63 | "primary_key": "Test1234", 64 | "key2" : 1234 65 | } 66 | item = {} 67 | ddb.write_item(item) 68 | 69 | response = ddb.read_item('invalid_key', 'Test1234') 70 | 71 | assert response == 'unhandled exception get' 72 | -------------------------------------------------------------------------------- /source/lib/asg.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import boto3 17 | 18 | asg_client = boto3.client('autoscaling') 19 | 20 | class AutoScaling(object): 21 | def __init__(self, logger, asg_name): 22 | self.logger = logger 23 | self.asg_name = asg_name 24 | 25 | # update ASG desired capacity 26 | def update_asg(self, action): 27 | try: 28 | global desired_capacity 29 | if action == 'start_instance': 30 | desired_capacity = 1 31 | elif action == 'stop_instance': 32 | desired_capacity = 0 33 | self.logger.info("Changing desired capacity to {}".format(desired_capacity)) 34 | response = asg_client.update_auto_scaling_group( 35 | AutoScalingGroupName=self.asg_name, 36 | DesiredCapacity=desired_capacity 37 | ) 38 | return response 39 | except Exception as e: 40 | self.logger.error("unhandled exception: AutoScaling_start_instance", exc_info=1) 41 | return 'unhandled exception' 42 | -------------------------------------------------------------------------------- /source/lib/efs.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import boto3 17 | 18 | 19 | class EFS(object): 20 | 21 | def __init__(self, logger): 22 | self.logger = logger 23 | 24 | def size(self, efs_id): 25 | # Check the EFS size 26 | try: 27 | client = boto3.client('efs') 28 | self.logger.debug('Checking EFS Size') 29 | response = client.describe_file_systems( 30 | MaxItems=2, 31 | FileSystemId=efs_id 32 | ) 33 | return (response['FileSystems'][0]['SizeInBytes']['Value']) 34 | except Exception as e: 35 | self.logger.error("unhandled exception: EFS_size", exc_info=1) 36 | return 'unhandled exception' 37 | 38 | def performance_mode(self, efs_id): 39 | # Check the EFS performance mode 40 | try: 41 | client = boto3.client('efs') 42 | self.logger.debug('Checking EFS Performance Mode') 43 | response = client.describe_file_systems( 44 | MaxItems=2, 45 | FileSystemId=efs_id 46 | ) 47 | return (response['FileSystems'][0]['PerformanceMode']) 48 | except Exception as e: 49 | self.logger.error("unhandled exception: EFS_performance_mode", exc_info=1) 50 | return 'unhandled exception' 51 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [1.5] - 2019-12-20 8 | ### Updated 9 | - Update Node.JS runtime from 8.10 to 12 10 | - Update Python runtime from 3.7 to 3.8 11 | - Update software license 12 | - Update ```README.md``` 13 | - Update ```build-s3-dist.sh``` script 14 | - Update ```efs-to-backup.template``` Auto Scaling launch configuration userdata and Lambda function code ```S3Bucket``` and ```S3Key``` 15 | - Update ```efs-to-restore.template``` Auto Scaling launch configuration userdata and Lambda function code ```S3Bucket``` and ```S3Key``` 16 | - Update ```solution-helper.py``` to remove ```pycfn_custom_resource``` and add send response function 17 | 18 | ### Added 19 | - Add ```CHANGELOG.md``` 20 | - Add sending anonymous metrics whiling creating, updating, and deleting the solution 21 | 22 | ### Removed 23 | - Remove ```lambda-build.py``` 24 | - Remove ```pycfn_custom_resource``` 25 | 26 | ## [1.4] - 2019-07-31 27 | ### Added 28 | - Add security group to outputs in backup and restore templates 29 | 30 | ### Updated 31 | - Upgrade Python code from version 2.7 to 3.7 32 | - Update unit tests 33 | - Making encryption/access control changes to log bucket to be consistent with best practices 34 | - Where applicable, add constraints for parameters to make them required 35 | 36 | ## [1.3] - 2018-07-18 37 | ### Added 38 | - Parallel operations to improve removal of old backups 39 | - Parallel operations to improve creation of hardlinks for backups 40 | - Drop down options for backup window selection 41 | 42 | ### Updated 43 | - Improve backup notifications 44 | - Update Node.JS runtime to 8.10 45 | - DynamoDB Read/Write provisioned capacity units reduced to 2 46 | - Instance size selection defaults to c5.xlarge 47 | 48 | ## [1.2.0] - 2018-05-09 49 | ### Added 50 | - Add support for restoring sub directory from the backup 51 | 52 | ### Updated 53 | - Fix timeout issue with custom lambda resource fetching latest AMI 54 | - Fix false notification when efs mount targets not mounted 55 | - Backup window provided in form of drop down menu to avoid input errors 56 | - Parallelized removal of snapshot in ec2-backup-fpsync.sh 57 | - Improved overall backup and restore experience 58 | 59 | ### Removed 60 | - Remove duplicate line in efs-to-efs-backup.template 61 | 62 | ## [1.0.0] - 2017-09-05 63 | ### Added 64 | - AWS EFS-to-EFS Backup Solution release -------------------------------------------------------------------------------- /source/lib/dynamodb.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import boto3 17 | from json import dumps, JSONEncoder 18 | from decimal import Decimal 19 | 20 | dynamodb_client = boto3.resource('dynamodb') 21 | 22 | class DecimalEncoder(JSONEncoder): 23 | def default(self, o): 24 | if isinstance(o, Decimal): 25 | if o % 1 > 0: 26 | return float(o) 27 | else: 28 | return int(o) 29 | return super(DecimalEncoder, self).default(o) 30 | 31 | 32 | class DDB(object): 33 | def __init__(self, logger, table_name): 34 | self.logger = logger 35 | self.table_name = table_name 36 | self.table = dynamodb_client.Table(self.table_name) 37 | 38 | # DDB API call to get an item 39 | def read_item(self, key, value): 40 | try: 41 | response = self.table.get_item( 42 | Key={ 43 | key: value 44 | } 45 | ) 46 | item = response['Item'] 47 | self.logger.info('DynamoDB Item') 48 | self.logger.info(dumps(item, indent=4, cls=DecimalEncoder)) 49 | return item 50 | except Exception as e: 51 | self.logger.error("unhandled exception: DDB_read_item", exc_info=1) 52 | return 'unhandled exception get' 53 | 54 | # DDB API call to put an item 55 | def write_item(self, item): 56 | try: 57 | response = self.table.put_item( 58 | Item=item 59 | ) 60 | return response 61 | except Exception as e: 62 | self.logger.error("unhandled exception: DDB_write_item", exc_info=1) 63 | return 'unhandled exception put' -------------------------------------------------------------------------------- /source/scripts/efs-restore-fpsync.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # input arguments 4 | source=$1 #source_ip:/prefix 5 | backup=$2 #backup_ip:/ 6 | interval=$3 7 | backupNum=$4 8 | efsid=$5 9 | subdir=$6 10 | s3bucket=$7 11 | 12 | # prepare system for fpsync 13 | echo "-- $(date -u +%FT%T) -- sudo yum -y update" 14 | sudo yum -y update 15 | echo "-- $(date -u +%FT%T) -- sudo yum -y install nfs-utils" 16 | sudo yum -y install nfs-utils 17 | echo "-- $(date -u +%FT%T) -- sudo yum -y groupinstall 'Development Tools'" 18 | sudo yum -y groupinstall "Development Tools" 19 | echo '-- $(date -u +%FT%T) -- wget https://github.com/martymac/fpart/archive/fpart-1.0.0.zip' 20 | wget https://github.com/martymac/fpart/archive/fpart-1.0.0.zip 21 | unzip fpart-1.0.0.zip 22 | cd fpart-fpart-1.0.0/ 23 | autoreconf -i 24 | ./configure 25 | make 26 | sudo make install 27 | 28 | # Adding PATH 29 | PATH=$PATH:/usr/local/bin 30 | 31 | 32 | _thread_count=$(($(nproc --all) * 16)) 33 | 34 | # 12/28/2018 - EFS-21432 - EFS mount best practices 35 | echo '-- $(date -u +%FT%T) -- sudo mkdir /mnt/source' 36 | sudo mkdir /mnt/source 37 | echo '-- $(date -u +%FT%T) -- sudo mkdir /mnt/backups' 38 | sudo mkdir /mnt/backups 39 | echo "-- $(date -u +%FT%T) -- sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $source /mnt/source" 40 | sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $source /mnt/source 41 | echo "-- $(date -u +%FT%T) -- sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $backup /mnt/backups" 42 | sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $backup /mnt/backups 43 | 44 | if [ ! sudo test -d /mnt/backups/$efsid/$interval.$backupNum/ ]; then 45 | echo "EFS Backup $efsid/$interval.$backupNum does not exist!" 46 | exit 1 47 | fi 48 | 49 | # running fpsync in reverse direction to restore 50 | echo "fpsync_start:$(date -u +%FT%T)" 51 | echo "-- $(date -u +%FT%T) -- sudo \"PATH=$PATH\" /usr/local/bin/fpsync -n $_thread_count -v -o \"-a --stats --numeric-ids --log-file=/tmp/efs-restore.log\" /mnt/backups/$efsid/$interval.$backupNum$subdir /mnt/source/" 52 | sudo "PATH=$PATH" /usr/local/bin/fpsync -n $_thread_count -v -o "-a --stats --numeric-ids --log-file=/tmp/efs-restore.log" /mnt/backups/$efsid/$interval.$backupNum$subdir /mnt/source/ 53 | fpsyncStatus=$? 54 | echo "fpsync_stop:$(date -u +%FT%T)" 55 | 56 | echo "rsync_delete_start:$(date -u +%FT%T)" 57 | echo "-- $(date -u +%FT%T) -- sudo rsync -r --delete --existing --ignore-existing --ignore-errors --log-file=/tmp/efs-restore-rsync.log /mnt/backups/$efsid/$interval.$backupNum$subdir /mnt/source/" 58 | sudo rsync -r --delete --existing --ignore-existing --ignore-errors --log-file=/tmp/efs-restore-rsync.log /mnt/backups/$efsid/$interval.$backupNum$subdir /mnt/source/ 59 | echo "rsync_delete_stop:$(date -u +%FT%T)" 60 | 61 | exit $fpsyncStatus 62 | -------------------------------------------------------------------------------- /source/lib/logger.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import json 17 | import logging 18 | 19 | class Logger(object): 20 | 21 | def __init__(self, loglevel='warning'): 22 | """Initializes logging""" 23 | self.config(loglevel=loglevel) 24 | return 25 | 26 | def config(self, loglevel='warning'): 27 | loglevel = logging.getLevelName(loglevel.upper()) 28 | mainlogger = logging.getLogger() 29 | mainlogger.setLevel(loglevel) 30 | 31 | logfmt = '{"time_stamp": "%(asctime)s", "log_level": "%(levelname)s", "log_message": %(message)s}\n' 32 | if len(mainlogger.handlers) == 0: 33 | mainlogger.addHandler(logging.StreamHandler()) 34 | mainlogger.handlers[0].setFormatter(logging.Formatter(logfmt)) 35 | self.log = logging.LoggerAdapter(mainlogger, {}) 36 | 37 | def _format(self, message): 38 | """formats log message in json 39 | 40 | Args: 41 | message (str): log message, can be a dict, list, string, or json blob 42 | """ 43 | 44 | try: 45 | message = json.loads(message) 46 | except Exception: 47 | pass 48 | try: 49 | return json.dumps({ 50 | "message": message 51 | }) 52 | except Exception: 53 | return json.dumps({ 54 | "message": str(message) 55 | }) 56 | 57 | def debug(self, message, **kwargs): 58 | """wrapper for logging.debug call""" 59 | self.log.debug(self._format(message), **kwargs) 60 | 61 | def info(self, message, **kwargs): 62 | # type: (object, object) -> object 63 | """wrapper for logging.info call""" 64 | self.log.info(self._format(message), **kwargs) 65 | 66 | def warning(self, message, **kwargs): 67 | """wrapper for logging.warning call""" 68 | self.log.warning(self._format(message), **kwargs) 69 | 70 | def error(self, message, **kwargs): 71 | """wrapper for logging.error call""" 72 | self.log.error(self._format(message), **kwargs) 73 | 74 | def critical(self, message, **kwargs): 75 | """wrapper for logging.critical call""" 76 | self.log.critical(self._format(message), **kwargs) 77 | -------------------------------------------------------------------------------- /source/lib/ssm.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import boto3 17 | import os 18 | from datetime import date, datetime 19 | 20 | from json import dumps, JSONEncoder 21 | 22 | ssm_client = boto3.client("ssm") 23 | 24 | class DateTimeEncoder(JSONEncoder): 25 | def default(self, o): 26 | if isinstance(o, (datetime, date)): 27 | serial = o.isoformat() 28 | return serial 29 | raise TypeError("Type %s not serializable" % type(o)) 30 | 31 | class SimpleSystemsManager(object): 32 | def __init__(self, logger): 33 | self.logger = logger 34 | 35 | # reading ssm.sh to send for run-command 36 | def create_command(self, replace_dict): 37 | try: 38 | lines=[] 39 | src_dir = os.path.dirname(os.path.abspath(__file__)) 40 | self.logger.debug('Abs path: {}'.format(src_dir)) 41 | f = 'ssm.sh' 42 | with open(os.path.join(src_dir, f)) as file: 43 | for line in file: 44 | for src, target in replace_dict.items(): 45 | line = line.replace(src, target) 46 | lines.append(line) 47 | return lines 48 | except Exception as e: 49 | self.logger.error("unhandled exception: SimpleSystemsManager_create_command", exc_info=1) 50 | 51 | # sending run-command 52 | def send_command(self, instance_id, document_name, replace_dict): 53 | try: 54 | bucket_name = replace_dict.get('${_s3bucket}') 55 | self.logger.debug('SSM Bucket Name: {}'.format(bucket_name)) 56 | response = ssm_client.send_command( 57 | InstanceIds=[instance_id], 58 | DocumentName=document_name, 59 | TimeoutSeconds=120, 60 | OutputS3BucketName=bucket_name, 61 | OutputS3KeyPrefix='ssm-logs', 62 | Parameters={"commands": self.create_command(replace_dict)}, 63 | Comment='EFS Backup Solution: Performs cleanup, ' 64 | 'upload logs files to S3, updates DDB and lifecycle hook. ' 65 | ) 66 | self.logger.debug(dumps(response, indent=4, cls=DateTimeEncoder)) 67 | return response 68 | except Exception as e: 69 | self.logger.error("unhandled exception: SimpleSystemsManager_send_command", exc_info=1) 70 | return 'unhandled exception' 71 | -------------------------------------------------------------------------------- /source/scripts/efs-ec2-backup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #======================================================================== 4 | # 5 | # master script to run efs-backup 6 | # fetches EFS mount IPs 7 | # runs efs-backup scripts 8 | # uploads logs to S3 9 | # updates status on DynamoDB 10 | # 11 | #======================================================================== 12 | # author: aws-solutions-builder@ 13 | 14 | 15 | clear 16 | echo "This is the master script to perform efs backup" 17 | sleep 2 18 | 19 | _source_efs=$1 ## {type:string, description:source efs id} 20 | _destination_efs=$2 ## {type:string, description:destination efs id} 21 | _interval=$3 ## {type:string, description:interval for backup daily/weekly/monthly} 22 | _retain=$4 ## {type:number, description:number of copies to retain} 23 | _folder_label=$5 ## {type:string, description:backup folder identifier} 24 | _backup_prefix=$6 ## {type:string, description:backup source prefix} 25 | 26 | echo "## input from user ##" 27 | echo "_source_efs: ${_source_efs}" 28 | echo "_destination_efs: ${_destination_efs}" 29 | echo "_interval: ${_interval}" 30 | echo "_retain: ${_retain}" 31 | echo "_folder_label: ${_folder_label}" 32 | echo "_backup_prefix: ${_backup_prefix}" 33 | 34 | # 35 | # get region and instance-id from instance meta-data 36 | # 37 | _az=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/) 38 | _region=${_az::-1} 39 | echo "region is ${_region}" 40 | _instance_id=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) 41 | echo "instance-id is ${_instance_id}" 42 | 43 | # 44 | # getting source/destination efs mount ip 45 | # parameters : [_source_efs, _destination_efs, _region] 46 | # 47 | 48 | echo "-- $(date -u +%FT%T) -- resolving source efs address ${_source_efs}.efs.${_region}.amazonaws.com" 49 | until dig ${_source_efs}.efs.${_region}.amazonaws.com +short 50 | do 51 | sleep 1 52 | done 53 | _src_mount_ip=$(dig ${_source_efs}.efs.${_region}.amazonaws.com +short) 54 | echo "-- $(date -u +%FT%T) -- src mount ip: ${_src_mount_ip}" 55 | 56 | echo "-- $(date -u +%FT%T) -- resolving backup efs address ${_destination_efs}.efs.${_region}.amazonaws.com" 57 | until dig ${_destination_efs}.efs.${_region}.amazonaws.com +short 58 | do 59 | sleep 1 60 | done 61 | _dst_mount_ip=$(dig ${_destination_efs}.efs.${_region}.amazonaws.com +short) 62 | echo "-- $(date -u +%FT%T) -- dst mount ip: ${_dst_mount_ip}" 63 | 64 | if [ -z "${_src_mount_ip}" ] || [ -z "${_dst_mount_ip}" ]; then 65 | echo "-- $(date -u +%FT%T) -- ERROR:efs_mount_ip_not_found" 66 | echo "-- $(date -u +%FT%T) -- Either or both mount IPs not found, skipping EFS backup script. Please verify if the EC2 instance was launched in the same AZ as the EFS systems." 67 | else 68 | # 69 | # running EFS backup script 70 | # parameters : [_src_mount_ip, _dst_mount_ip, _interval, _retain, _folder_label, _backup_window] 71 | # 72 | echo "-- $(date -u +%FT%T) -- running EFS backup script" 73 | /home/ec2-user/efs-backup-fpsync.sh ${_src_mount_ip}:${_backup_prefix} ${_dst_mount_ip}:/ ${_interval} ${_retain} ${_folder_label} ${_region} ${_instance_id} 74 | fi 75 | 76 | 77 | # 78 | # changing auto scaling capacity 79 | # parameters : [_asg_name] 80 | # 81 | echo "-- $(date -u +%FT%T) -- Backup script finished before the backup window, stopping the ec2 instance." 82 | _asg_name=$(aws ec2 describe-tags --region ${_region} --filters "Name=resource-id,Values=${_instance_id}" --query 'Tags[?Key==`aws:autoscaling:groupName`]'.Value --output text) 83 | aws autoscaling set-desired-capacity --region ${_region} --auto-scaling-group-name ${_asg_name} --desired-capacity 0 84 | -------------------------------------------------------------------------------- /source/lib/notify.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import json 17 | from datetime import datetime 18 | from urllib.request import Request 19 | from urllib.request import urlopen 20 | import boto3 21 | from decimal import Decimal 22 | 23 | 24 | class DecimalEncoder(json.JSONEncoder): 25 | def default(self, o): 26 | if isinstance(o, Decimal): 27 | if o % 1 > 0: 28 | return float(o) 29 | else: 30 | return int(o) 31 | return super(DecimalEncoder, self).default(o) 32 | 33 | 34 | class Notify(object): 35 | def __init__(self, logger): 36 | self.logger = logger 37 | 38 | # API call to notify the customer 39 | def customer(self, arn, message): 40 | try: 41 | sns_client = boto3.resource('sns') 42 | topic = sns_client.Topic(arn) 43 | response = topic.publish( 44 | Subject='EFS Backup Status', 45 | Message=json.dumps(message, indent=4, cls=DecimalEncoder, sort_keys=True), 46 | ) 47 | self.logger.info('SNS Publish Response') 48 | self.logger.info(response) 49 | return response 50 | except Exception as e: 51 | self.logger.error("unhandled exception: Notify_customer", exc_info=1) 52 | 53 | # Send anonymous metrics 54 | def metrics(self, solution_id, uuid, data, url): 55 | try: 56 | time_stamp = {'TimeStamp': str(datetime.utcnow().isoformat())} 57 | params = {'Solution': solution_id, 58 | 'UUID': uuid, 59 | 'Data': data} 60 | metrics = dict(time_stamp, **params) 61 | json_data = json.dumps(metrics, indent=4, cls=DecimalEncoder, sort_keys=True) 62 | json_data_utf8 = json_data.encode('utf-8') 63 | headers = { 64 | 'content-type': 'application/json; charset=utf-8', 65 | 'content-length': len(json_data_utf8) 66 | } 67 | req = Request(url, json_data_utf8, headers) 68 | rsp = urlopen(req) 69 | content = rsp.read() 70 | rsp_code = rsp.getcode() 71 | self.logger.info('Response Code: {}'.format(rsp_code)) 72 | self.logger.debug('Response Code: {}'.format(content)) 73 | return rsp_code 74 | except Exception as e: 75 | self.logger.error("unhandled exception: Notify_metrics", exc_info=1) 76 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/awslabs/iot-device-simulator/issues), or [recently closed](https://github.com/awslabs/iot-device-simulator/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/iot-device-simulator/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/awslabs/iot-device-simulator/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /source/tests/test_asg.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from moto import mock_autoscaling 3 | from moto import mock_ec2 4 | from lib.logger import Logger 5 | from lib.asg import AutoScaling 6 | 7 | log_level = 'critical' 8 | logger = Logger(loglevel=log_level) 9 | asg = AutoScaling(logger, 'test_asg') 10 | 11 | def create_test_subnet(): 12 | ec2 = boto3.resource('ec2', region_name='us-east-1') 13 | vpc = list(ec2.vpcs.all())[0] 14 | subnet = ec2.create_subnet( 15 | VpcId=vpc.id, 16 | CidrBlock='10.11.1.0/24', 17 | AvailabilityZone='us-east-1a') 18 | return subnet 19 | 20 | def get_test_image_id(): 21 | ec2_client = boto3.client('ec2', region_name='us-east-1') 22 | ec2_images = ec2_client.describe_images() 23 | return ec2_images['Images'][0]['ImageId'] 24 | 25 | @mock_autoscaling 26 | @mock_ec2 27 | def test_start_instance(): 28 | subnet = create_test_subnet() 29 | 30 | client = boto3.client('autoscaling', region_name='us-east-1') 31 | _ = client.create_launch_configuration( 32 | LaunchConfigurationName='test_launch_configuration', 33 | ImageId=get_test_image_id() 34 | ) 35 | _ = client.create_auto_scaling_group( 36 | AutoScalingGroupName='test_asg', 37 | LaunchConfigurationName='test_launch_configuration', 38 | MinSize=0, 39 | MaxSize=20, 40 | DesiredCapacity=0, 41 | VPCZoneIdentifier=subnet.id 42 | ) 43 | 44 | response = client.describe_auto_scaling_groups( 45 | AutoScalingGroupNames=["test_asg"] 46 | ) 47 | 48 | assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 0 49 | 50 | # Start Backup - changes the desired capacity to 1 51 | asg.update_asg('start_instance') 52 | 53 | response = client.describe_auto_scaling_groups( 54 | AutoScalingGroupNames=["test_asg"] 55 | ) 56 | 57 | assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 1 58 | 59 | @mock_autoscaling 60 | @mock_ec2 61 | def test_stop_instance(): 62 | subnet = create_test_subnet() 63 | 64 | client = boto3.client('autoscaling', region_name='us-east-1') 65 | _ = client.create_launch_configuration( 66 | LaunchConfigurationName='test_launch_configuration', 67 | ImageId=get_test_image_id() 68 | ) 69 | _ = client.create_auto_scaling_group( 70 | AutoScalingGroupName='test_asg', 71 | LaunchConfigurationName='test_launch_configuration', 72 | MinSize=0, 73 | MaxSize=20, 74 | DesiredCapacity=1, 75 | VPCZoneIdentifier=subnet.id 76 | ) 77 | 78 | response = client.describe_auto_scaling_groups( 79 | AutoScalingGroupNames=["test_asg"] 80 | ) 81 | 82 | assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 1 83 | 84 | # Start Backup - changes the desired capacity to 0 85 | asg.update_asg('stop_instance') 86 | 87 | response = client.describe_auto_scaling_groups( 88 | AutoScalingGroupNames=["test_asg"] 89 | ) 90 | 91 | assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 0 92 | 93 | @mock_autoscaling 94 | @mock_ec2 95 | def test_exception(): 96 | subnet = create_test_subnet() 97 | 98 | client = boto3.client('autoscaling', region_name='us-east-1') 99 | _ = client.create_launch_configuration( 100 | LaunchConfigurationName='test_launch_configuration', 101 | ImageId=get_test_image_id() 102 | ) 103 | _ = client.create_auto_scaling_group( 104 | AutoScalingGroupName='test_asg', 105 | LaunchConfigurationName='test_launch_configuration', 106 | MinSize=0, 107 | MaxSize=20, 108 | DesiredCapacity=1, 109 | VPCZoneIdentifier=subnet.id 110 | ) 111 | 112 | response = client.describe_auto_scaling_groups( 113 | AutoScalingGroupNames=["test_asg"] 114 | ) 115 | # Instantiate class with invalid ASG Name, the function should catch the exception 116 | asg = AutoScaling(logger, 'test_asg_invalid') 117 | response = asg.update_asg('invalid') 118 | assert response == 'unhandled exception' 119 | -------------------------------------------------------------------------------- /source/lib/cloudwatch.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | from datetime import datetime, timedelta 17 | import boto3 18 | from decimal import Decimal 19 | 20 | 21 | class CloudWatchMetric(object): 22 | def __init__(self, logger): 23 | self.logger = logger 24 | 25 | # CloudWatch API call to get EFS metrics 26 | def efs_cw_metrics(self, efs_id, name): 27 | try: 28 | cw_client = boto3.client('cloudwatch') 29 | cw_metrics = {} 30 | metrics = { 31 | 'BurstCreditBalance': 'Average', 32 | 'PermittedThroughput': 'Average' 33 | } 34 | now = datetime.utcnow() 35 | start_time = now - timedelta(seconds=300) 36 | end_time = min(now, start_time + timedelta(seconds=3600)) # 5 min window 37 | for metric in metrics: 38 | data = cw_client.get_metric_statistics( 39 | Namespace='AWS/EFS', 40 | MetricName=metric, 41 | Dimensions=[{ 42 | 'Name': 'FileSystemId', 43 | 'Value': efs_id}], 44 | Period=300, 45 | StartTime=start_time, 46 | EndTime=end_time, 47 | Statistics=[metrics[metric]])['Datapoints'] 48 | for d in data: 49 | key = name + metric 50 | value = Decimal(d[metrics[metric]]) 51 | cw_metrics[key] = value 52 | return cw_metrics 53 | except Exception as e: 54 | self.logger.error("unhandled exception: CloudWatchMetric_efs_cw_metrics", exc_info=1) 55 | 56 | # CloudWatch API call to get S3 metrics 57 | def s3_cw_metrics(self, bucket_name): 58 | try: 59 | cw_client = boto3.client('cloudwatch') 60 | response = cw_client.get_metric_statistics( 61 | Namespace="AWS/S3", 62 | MetricName="BucketSizeBytes", 63 | Dimensions=[ 64 | { 65 | "Name": "BucketName", 66 | "Value": bucket_name 67 | }, 68 | { 69 | "Name": "StorageType", 70 | "Value": "StandardStorage" 71 | } 72 | ], 73 | StartTime=datetime.now() - timedelta(days=1), 74 | EndTime=datetime.now(), 75 | Period=300, 76 | Statistics=['Average'] 77 | ) 78 | if not response['Datapoints']: 79 | self.logger.debug("S3 bucket size is zero. This is an empty bucket.") 80 | return '0' 81 | else: 82 | bucket_size_bytes = response['Datapoints'][-1]['Average'] 83 | return Decimal(bucket_size_bytes) 84 | except Exception as e: 85 | self.logger.error("unhandled exception: CloudWatchMetric_s3_cw_metrics", exc_info=1) 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## AWS EFS-to-EFS Backup Solution 2 | 3 | ### Description 4 | The EFS-to-EFS backup solution leverages Amazon CloudWatch and AWS Lambda to 5 | automatically create incremental backups of an Amazon Elastic File System (EFS) file system on a customer- 6 | defined schedule. The solution is easy to deploy and provides automated backups for data 7 | recovery and protection. For example, an organization can use this backup solution in a 8 | production environment to automatically create backups of their file system(s) on daily basis, 9 | and keep only a specified number of backups. For customers who do not have a mechanism 10 | for backing up their Amazon EFS file systems, this solution provides an easy way to improve 11 | data protection and recoverability. 12 | 13 | ### Architectural Workflow 14 | • The orchestrator lambda function is first invoked by CW event (start backup) schedule defined by the customer. The lambda function creates a 'Stop Backup' CWE event and add the orchestrator (itself) lambda function as the target. It also updates desired capacity of the autoscaling group (ASG) to 1 (one). Auto Scaling Group (ASG) launches an EC2 instance that mounts the source and target EFS and backup the primary EFS. 15 | 16 | • The orchestrator lambda function writes backup metadata to the DDB table with backup id as the primary key. 17 | 18 | • Fifteen minutes before the backup window defined by the customer, the 'Stop' CWE invokes orchestrator lambda to change the desired capacity of ASG to 0 (zero). 19 | 20 | • The lifecycle hook CWE is triggered by ASG event (EC2_Instance_Terminating). This CWE invokes the orchestrator lambda function that use ‘AWS-RunShellScript’ document name to make send_command api call to the SSM service. 21 | 22 | • During the lifecycle hook event, the EC2 instance will stop/cleanup rsync process gracefully and update the DDB table with the KPIs, upload logs to the S3 bucket. 23 | 24 | • The EC2 successful termination trigger another lifecycle hook event. This event triggers the orchestrator lambda function to send the anonymous metrics, notify customer if complete backup was not done. 25 | 26 | ### Setup 27 | 28 | #### Run Unit Tests (pytest) 29 | *Note: Use **sudo** if necessary to install python dependencies* 30 | 31 | ```bash 32 | $ bash deployment/run-unit-tests.sh 33 | ``` 34 | *** 35 | 36 | #### Build S3 Assets 37 | 38 | * Configure the build paraemters. 39 | ```bash 40 | export EFS_BACKUP_PATH=`pwd` 41 | export DIST_OUTPUT_BUCKET=my-bucket-name # bucket where customized code will reside 42 | export VERSION=my-version # version number for the customized code 43 | export SOLUTION_NAME=efs-backup # solution name for the customized code 44 | ``` 45 | _Note:_ You would have to create an S3 bucket with the prefix 'my-bucket-name-' as whole Lambda functions are going to get the source codes from the 'my-bucket-name-' bucket; aws_region is where you are deployting the customized solution (e.g. us-east-1, us-east-2, etc.). 46 | 47 | * Build the customized solution 48 | ```bash 49 | cd $EFS_BACKUP_PATH/deployment 50 | chmod +x ./build-s3-dist.sh 51 | ./build-s3-dist.sh $DIST_OUTPUT_BUCKET $SOLUTION_NAME $VERSION 52 | ``` 53 | 54 | * Deploy the source codes to an Amazon S3 bucket in your account. _Note:_ You must have the AWS Command Line Interface installed and create the Amazon S3 bucket in your account prior to copy source codes. 55 | ```bash 56 | export AWS_REGION=us-east-1 # the AWS region you are going to deploy the solution in your account. 57 | export AWS_PROFILE=default # the AWS Command Line Interface profile 58 | 59 | aws s3 cp $EFS_BACKUP_PATH/deployment/global-s3-assets/ s3://$DIST_OUTPUT_BUCKET-$AWS_REGION/$SOLUTION_NAME/$VERSION/ --recursive --acl bucket-owner-full-control --profile $AWS_PROFILE 60 | aws s3 cp $EFS_BACKUP_PATH/deployment/regional-s3-assets/ s3://$DIST_OUTPUT_BUCKET-$AWS_REGION/$SOLUTION_NAME/$VERSION/ --recursive --acl bucket-owner-full-control --profile $AWS_PROFILE 61 | ``` 62 | 63 | ## Deploying the customized solution 64 | * Get the link of the efs-to-efs-backup.template and efs-to-efs-restore.template uploaded to your Amazon S3 bucket. 65 | * Deploy the EFS Backup solution to your account by launching a new AWS CloudFormation stack using the link of the efs-to-efs-backup.template and efs-to-efs-restore.template. 66 | 67 | ## Collection of operational metrics 68 | This solution collects anonymous operational metrics to help AWS improve the quality and features of the solution. For more information, including how to disable this capability, please see the [implementation guide](https://docs.aws.amazon.com/solutions/latest/amazon-virtual-andon/collection-of-operational-metrics.html). 69 | 70 | *** 71 | 72 | Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 73 | 74 | Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at 75 | 76 | http://www.apache.org/licenses/LICENSE-2.0 77 | 78 | or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 79 | -------------------------------------------------------------------------------- /source/solution-helper.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import logging 17 | import uuid 18 | import json 19 | import boto3 20 | 21 | from urllib import request 22 | from datetime import datetime 23 | 24 | log = logging.getLogger() 25 | log.setLevel(logging.INFO) 26 | 27 | # Send anonymous metric function 28 | def send_anonymous_metric(solution_id, solution_version, solution_uuid, region, event_type, request_type): 29 | now = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] 30 | metric_url = 'https://metrics.awssolutionsbuilder.com/generic' 31 | response_body = json.dumps({ 32 | "Solution": solution_id, 33 | "UUID": solution_uuid, 34 | "TimeStamp": now, 35 | "Data": { 36 | "Launch": now, 37 | "Region": region, 38 | "Version": solution_version, 39 | "EventType": event_type, 40 | "RequestType": request_type 41 | } 42 | }) 43 | log.info('Metric Body: {}'.format(response_body)) 44 | 45 | try: 46 | data = response_body.encode('utf-8') 47 | req = request.Request(metric_url, data=data) 48 | req.add_header('Content-Type', 'application/json') 49 | req.add_header('Content-Length', len(response_body)) 50 | response = request.urlopen(req) 51 | 52 | log.info('Status code: {}'.format(response.getcode())) 53 | log.info('Status message: {}'.format(response.msg)) 54 | except Exception as e: 55 | log.error('Error occurred while sending metric: {}'.format(json.dumps(response_body))) 56 | log.error('Error: {}'.format(e)) 57 | 58 | # Send response function 59 | def send_response(event, context, response_status, response_data): 60 | try: 61 | response_body = json.dumps({ 62 | "Status": response_status, 63 | "Reason": 'See the details in CloudWatch Log Stream: {}'.format(context.log_stream_name), 64 | "PhysicalResourceId": context.log_stream_name, 65 | "StackId": event['StackId'], 66 | "RequestId": event['RequestId'], 67 | "LogicalResourceId": event['LogicalResourceId'], 68 | "Data": response_data 69 | }) 70 | 71 | log.info('Response URL: {}'.format(event['ResponseURL'])) 72 | log.info('Response Body: {}'.format(response_body)) 73 | 74 | data = response_body.encode('utf-8') 75 | req = request.Request(event['ResponseURL'], data=data, method='PUT') 76 | req.add_header('Content-Type', '') 77 | req.add_header('Content-Length', len(response_body)) 78 | response = request.urlopen(req) 79 | 80 | log.info('Status code: {}'.format(response.getcode())) 81 | log.info('Status message: {}'.format(response.msg)) 82 | except Exception as e: 83 | log.error('Custom resource send_response error: {}'.format(e)) 84 | 85 | def lambda_handler(event, context): 86 | log.info('Received event: {}'.format(json.dumps(event))) 87 | response_data = { 88 | "Message": "No action is needed." 89 | } 90 | properties = event['ResourceProperties'] 91 | 92 | try: 93 | if event['RequestType'] in ['Create', 'Update']: 94 | response_data = { 95 | "UUID": str(uuid.uuid4()) 96 | } 97 | 98 | if event['ResourceType'] == 'Custom::SendAnonymousMetrics' and properties['SendAnonymousMetrics'] == 'Yes': 99 | solution_id = properties['SolutionId'] 100 | solution_version = properties['SolutionVersion'] 101 | solution_uuid = properties['SolutionUuid'] 102 | region = properties['Region'] 103 | event_type = properties['EventType'] 104 | send_anonymous_metric(solution_id, solution_version, solution_uuid, region, event_type, event['RequestType']) 105 | response_data = { 106 | "Message": "Sent anonymous metric" 107 | } 108 | 109 | send_response(event, context, 'SUCCESS', response_data) 110 | except Exception as e: 111 | log.error('Error: {}'.format(e)) 112 | response_data = { 113 | 'Error': e 114 | } 115 | send_response(event, context, 'FAILED', response_data) 116 | -------------------------------------------------------------------------------- /source/tests/test_events.py: -------------------------------------------------------------------------------- 1 | from lib.logger import Logger 2 | from lib.events import CloudWatchEvent 3 | from json import loads 4 | log_level = 'critical' 5 | logger = Logger(loglevel=log_level) 6 | cwe = CloudWatchEvent(logger, 'uid') 7 | 8 | describe_target_response = {'Targets': [{'Input': '{"action": "stop", "backup_id": "4a1e7e2f", "mode": "backup"}', 'Id': 'terminate_event_orchestrator', 'Arn': 'arn:aws:lambda:us-east-1:1234:function:test-cwe-events'}], 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': '8a0', 'HTTPHeaders': {'x-amzn-requestid': '8a0', 'content-length': '206', 'content-type': 'application/x-amz-json-1.1'}}} 9 | 10 | create_event_response = {'RuleArn': 'arn:aws:events:us-east-1:1234:rule/stop_backup_9aef9d39', 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': 'e44', 'HTTPHeaders': {'x-amzn-requestid': 'e44', 'content-length': '77', 'content-type': 'application/x-amz-json-1.1'}}} 11 | 12 | add_target_response = {'FailedEntries': [], 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': 'e46', 'HTTPHeaders': {'x-amzn-requestid': 'e46', 'content-length': '41', 'content-type': 'application/x-amz-json-1.1'}}, 'FailedEntryCount': 0} 13 | 14 | delete_event_response = {'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': '3a1', 'HTTPHeaders': {'x-amzn-requestid': '3a1', 'date': 'Mon, 14 Aug 2017 20:21:59 GMT', 'content-length': '0', 'content-type': 'application/x-amz-json-1.1'}}} 15 | 16 | remove_target_response = {'FailedEntries': [], 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': '3a0', 'HTTPHeaders': {'x-amzn-requestid': '3a0', 'date': 'Mon, 14 Aug 2017 20:21:59 GMT', 'content-length': '41', 'content-type': 'application/x-amz-json-1.1'}}, 'FailedEntryCount': 0} 17 | 18 | get_lambda_arn_response = {'Code': {'RepositoryType': 'S3'}, 'Configuration': {'TracingConfig': {'Mode': 'PassThrough'}, 'Version': '$LATEST', 'FunctionName': 'test-cwe-events', 'VpcConfig': {'SubnetIds': [], 'SecurityGroupIds': []}, 'MemorySize': 128, 'CodeSize': 316, 'FunctionArn': 'arn:aws:lambda:us-east-1:1234:function:test-cwe-events', 'Environment': {'Variables': {'test1': 'value1', 'test3': 'value3', 'test2': 'value2', 'test5': 'value5', 'test4': 'value4'}}, 'Handler': 'lambda_function.lambda_handler', 'Role': 'arn:aws:iam::1234:role/lambda_basic_execution', 'Timeout': 3, 'Runtime': 'python3.8'}, 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': 'e45', 'HTTPHeaders': {'x-amzn-requestid': 'e45', 'content-length': '1885', 'content-type': 'application/json', 'connection': 'keep-alive'}}} 19 | 20 | add_permission_response = {'Statement': '{"Sid":"stop_backup_event","Effect":"Allow","Principal":{"Service":"events.amazonaws.com"},"Action":"lambda:InvokeFunction","Resource":"arn:aws:lambda:us-east-1:1234:function:test-cwe-events","Condition":{"ArnLike":{"AWS:SourceArn":"arn:aws:events:us-east-1:1234:rule/stop_backup_9aef9d39"}}}', 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 201, 'RequestId': '7f7', 'HTTPHeaders': {'x-amzn-requestid': '7f7', 'content-length': '354', 'content-type': 'application/json', 'connection': 'keep-alive'}}} 21 | 22 | remove_permission_response = {'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 204, 'RequestId': '21c', 'HTTPHeaders': {'x-amzn-requestid': '21c', 'connection': 'keep-alive', 'content-type': 'application/json'}}} 23 | 24 | 25 | def test_describe_target(mocker): 26 | mocker.patch.object(cwe, 'describe_target') 27 | cwe.describe_target.return_value = describe_target_response 28 | response = cwe.describe_target() 29 | assert loads(response['Targets'][-1]['Input'])['backup_id'] == '4a1e7e2f' 30 | 31 | 32 | def test_create_event(mocker): 33 | mocker.patch.object(cwe, 'create_event') 34 | cwe.create_event.return_value = create_event_response 35 | response = cwe.create_event('60', 'arn') 36 | assert response['RuleArn'] == 'arn:aws:events:us-east-1:1234:rule/stop_backup_9aef9d39' 37 | 38 | 39 | def test_add_target(mocker): 40 | mocker.patch.object(cwe, 'add_target') 41 | cwe.add_target.return_value = add_target_response 42 | dictionary = { 43 | 'mode': 'backup', 44 | 'action': 'stop', 45 | 'backup_id': 'backup_id' 46 | } 47 | response = cwe.add_target('mock_function_name', dictionary) 48 | assert response['ResponseMetadata']['HTTPStatusCode'] == 200 49 | 50 | 51 | def test_delete_event(mocker): 52 | mocker.patch.object(cwe, 'delete_event') 53 | cwe.delete_event.return_value = delete_event_response 54 | response = cwe.delete_event() 55 | assert response['ResponseMetadata']['HTTPStatusCode'] == 200 56 | 57 | 58 | def test_remove_target(mocker): 59 | mocker.patch.object(cwe, 'remove_target') 60 | cwe.remove_target.return_value = remove_target_response 61 | response = cwe.remove_target() 62 | assert response['ResponseMetadata']['HTTPStatusCode'] == 200 63 | 64 | 65 | def test_get_lambda_arn(mocker): 66 | mocker.patch.object(cwe, 'get_lambda_arn') 67 | cwe.get_lambda_arn.return_value = get_lambda_arn_response 68 | response = cwe.get_lambda_arn('mock_function_name') 69 | assert response['Configuration']['FunctionArn'] == 'arn:aws:lambda:us-east-1:1234:function:test-cwe-events' 70 | 71 | 72 | def test_add_permission(mocker): 73 | mocker.patch.object(cwe, 'add_permission') 74 | cwe.add_permission.return_value = add_permission_response 75 | response = cwe.add_permission('mock_function_name', 'mock_rule_arn') 76 | assert loads(response['Statement'])['Action'] == 'lambda:InvokeFunction' 77 | 78 | 79 | def test_remove_permission(mocker): 80 | mocker.patch.object(cwe, 'remove_permission') 81 | cwe.remove_permission.return_value = remove_permission_response 82 | response = cwe.remove_permission('mock_function_name') 83 | assert response['ResponseMetadata']['HTTPStatusCode'] == 204 84 | -------------------------------------------------------------------------------- /deployment/build-s3-dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This assumes all of the OS-level configuration has been completed and git repo has already been cloned 4 | # 5 | # This script should be run from the repo's deployment directory 6 | # cd deployment 7 | # ./build-s3-dist.sh source-bucket-base-name trademarked-solution-name version-code 8 | # 9 | # Paramenters: 10 | # - source-bucket-base-name: Name for the S3 bucket location where the template will source the Lambda 11 | # code from. The template will append '-[region_name]' to this bucket name. 12 | # For example: ./build-s3-dist.sh solutions my-solution v1.0.0 13 | # The template will then expect the source code to be located in the solutions-[region_name] bucket 14 | # 15 | # - trademarked-solution-name: name of the solution for consistency 16 | # 17 | # - version-code: version of the package 18 | 19 | # Check to see if input has been provided: 20 | if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then 21 | echo "Please provide the base source bucket name, trademark approved solution name and version where the lambda code will eventually reside." 22 | echo "For example: ./build-s3-dist.sh solutions trademarked-solution-name v1.0.0" 23 | exit 1 24 | fi 25 | 26 | deployment_dir="$PWD" 27 | template_dist_dir="$deployment_dir/global-s3-assets" 28 | build_dist_dir="$deployment_dir/regional-s3-assets" 29 | source_dir="$deployment_dir/../source" 30 | 31 | echo "------------------------------------------------------------------------------" 32 | echo "[Init] Clean old dist folders" 33 | echo "------------------------------------------------------------------------------" 34 | echo "rm -rf $template_dist_dir" 35 | rm -rf $template_dist_dir 36 | echo "mkdir -p $template_dist_dir" 37 | mkdir -p $template_dist_dir 38 | echo "rm -rf $build_dist_dir" 39 | rm -rf $build_dist_dir 40 | echo "mkdir -p $build_dist_dir" 41 | mkdir -p $build_dist_dir 42 | 43 | echo "------------------------------------------------------------------------------" 44 | echo "[Packing] Templates" 45 | echo "------------------------------------------------------------------------------" 46 | # CloudFormation template creation 47 | echo "cp -f $deployment_dir/efs*.template $template_dist_dir" 48 | cp -f $deployment_dir/efs*.template $template_dist_dir 49 | 50 | if [[ "$OSTYPE" == "darwin"* ]]; then 51 | # Mac OS 52 | echo "Updating code source bucket in the template with $1" 53 | replace="s/%%BUCKET_NAME%%/$1/g" 54 | echo "sed -i '' -e $replace $template_dist_dir/efs-to-efs-backup.template" 55 | sed -i '' -e $replace $template_dist_dir/efs-to-efs-backup.template 56 | echo "sed -i '' -e $replace $template_dist_dir/efs-to-efs-restore.template" 57 | sed -i '' -e $replace $template_dist_dir/efs-to-efs-restore.template 58 | 59 | echo "Updating solution name in the template with $2" 60 | replace="s/%%SOLUTION_NAME%%/$2/g" 61 | echo "sed -i '' -e $replace $template_dist_dir/efs-to-efs-backup.template" 62 | sed -i '' -e $replace $template_dist_dir/efs-to-efs-backup.template 63 | echo "sed -i '' -e $replace $template_dist_dir/efs-to-efs-restore.template" 64 | sed -i '' -e $replace $template_dist_dir/efs-to-efs-restore.template 65 | 66 | echo "Updating version number in the template with $3" 67 | replace="s/%%VERSION%%/$3/g" 68 | echo "sed -i '' -e $replace $template_dist_dir/efs-to-efs-backup.template" 69 | sed -i '' -e $replace $template_dist_dir/efs-to-efs-backup.template 70 | echo "sed -i '' -e $replace $template_dist_dir/efs-to-efs-restore.template" 71 | sed -i '' -e $replace $template_dist_dir/efs-to-efs-restore.template 72 | else 73 | # Other linux 74 | echo "Updating code source bucket in the template with $1" 75 | replace="s/%%BUCKET_NAME%%/$1/g" 76 | echo "sed -i -e $replace $template_dist_dir/efs-to-efs-backup.template" 77 | sed -i -e $replace $template_dist_dir/efs-to-efs-backup.template 78 | echo "sed -i -e $replace $template_dist_dir/efs-to-efs-restore.template" 79 | sed -i -e $replace $template_dist_dir/efs-to-efs-restore.template 80 | 81 | echo "Updating solution name in the template with $2" 82 | replace="s/%%SOLUTION_NAME%%/$2/g" 83 | echo "sed -i -e $replace $template_dist_dir/efs-to-efs-backup.template" 84 | sed -i -e $replace $template_dist_dir/efs-to-efs-backup.template 85 | echo "sed -i -e $replace $template_dist_dir/efs-to-efs-restore.template" 86 | sed -i -e $replace $template_dist_dir/efs-to-efs-restore.template 87 | 88 | echo "Updating version number in the template with $3" 89 | replace="s/%%VERSION%%/$3/g" 90 | echo "sed -i -e $replace $template_dist_dir/efs-to-efs-backup.template" 91 | sed -i -e $replace $template_dist_dir/efs-to-efs-backup.template 92 | echo "sed -i -e $replace $template_dist_dir/efs-to-efs-restore.template" 93 | sed -i -e $replace $template_dist_dir/efs-to-efs-restore.template 94 | fi 95 | 96 | echo "------------------------------------------------------------------------------" 97 | echo "[Packing] Lambda functions and scripts" 98 | echo "------------------------------------------------------------------------------" 99 | # Create zip file for AWS Lambda function 100 | echo "cd $source_dir" 101 | cd $source_dir 102 | echo "zip -q -r9 $build_dist_dir/efs_to_efs_backup.zip * -x setup.* tests/\* requirements.txt scripts/\*" 103 | zip -q -r9 $build_dist_dir/efs_to_efs_backup.zip * -x setup.* tests/\* requirements.txt scripts/\* 104 | 105 | # Copying shell scripts from source/scripts' 106 | echo "cp $source_dir/scripts/efs-* $build_dist_dir" 107 | cp $source_dir/scripts/efs-* $build_dist_dir 108 | 109 | echo 'Download the AMI ID lookup package from S3' 110 | echo "curl --connect-timeout 5 --speed-time 5 --retry 10 https://s3.amazonaws.com/cloudformation-examples/lambda/amilookup.zip -o $build_dist_dir/amilookup.zip" 111 | curl --connect-timeout 5 --speed-time 5 --retry 10 https://s3.amazonaws.com/cloudformation-examples/lambda/amilookup.zip -o $build_dist_dir/amilookup.zip -------------------------------------------------------------------------------- /source/scripts/efs-backup-fpsync.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Example would be to run this script as follows: 3 | # Once a day; retain last 31 days 4 | # efs-backup.sh $src $dst daily 31 efs-12345 5 | # Once a week; retain 4 weeks of backup 6 | # efs-backup.sh $src $dst weekly 7 efs-12345 7 | # Once a month; retain 3 months of backups 8 | # efs-backup.sh $src $dst monthly 3 efs-12345 9 | # 10 | # Snapshots will look like: 11 | # $dst/$efsid/hourly.0-3; daily.0-30; weekly.0-3; monthly.0-2 12 | 13 | 14 | # input arguments 15 | source=$1 #source_ip:/prefix 16 | destination=$2 #destination_ip:/ 17 | interval=$3 18 | retain=$4 19 | efsid=$5 20 | region=$6 21 | instance_id=$7 22 | 23 | echo "## input from user ##" 24 | echo "source: ${source}" 25 | echo "destination: ${destination}" 26 | echo "interval: ${interval}" 27 | echo "retain: ${retain}" 28 | echo "efsid: ${efsid}" 29 | 30 | # prepare system for fpsync 31 | echo "-- $(date -u +%FT%T) -- sudo yum -y update" 32 | sudo yum -y update 33 | echo "-- $(date -u +%FT%T) -- sudo yum -y install nfs-utils" 34 | sudo yum -y install nfs-utils 35 | 36 | echo "-- $(date -u +%FT%T) -- sudo mkdir /backup" 37 | sudo mkdir /backup 38 | echo "-- $(date -u +%FT%T) -- sudo mkdir /mnt/backups" 39 | sudo mkdir /mnt/backups 40 | 41 | # 12/28/2018 - EFS-21432 - EFS mount best practices 42 | echo "-- $(date -u +%FT%T) -- sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $source /backup" 43 | sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $source /backup 44 | mount_src_status=$? 45 | echo "mount status for source efs: ${mount_src_status}" 46 | 47 | echo "-- $(date -u +%FT%T) -- sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $destination /mnt/backups" 48 | sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,timeo=600,retrans=2,hard,_netdev,noresvport $destination /mnt/backups 49 | mount_backup_status=$? 50 | echo "mount status for backup efs: ${mount_backup_status}" 51 | 52 | # if efs mount fails exit workflow 53 | if [ ${mount_src_status} != '0' ] || [ ${mount_backup_status} != '0' ]; then 54 | echo "-- $(date -u +%FT%T) -- ERROR:efs_not_mounted" 55 | exit $? 56 | fi 57 | 58 | echo "-- $(date -u +%FT%T) -- sudo yum -y install parallel" 59 | sudo yum -y install parallel 60 | echo "-- $(date -u +%FT%T) -- sudo yum -y install --enablerepo=epel tree" 61 | sudo yum -y install --enablerepo=epel tree 62 | echo "-- $(date -u +%FT%T) -- sudo yum -y groupinstall 'Development Tools'" 63 | sudo yum -y groupinstall "Development Tools" 64 | echo '-- $(date -u +%FT%T) -- wget https://github.com/martymac/fpart/archive/fpart-1.0.0.zip' 65 | wget https://github.com/martymac/fpart/archive/fpart-1.0.0.zip 66 | unzip fpart-1.0.0.zip 67 | cd fpart-fpart-1.0.0/ 68 | autoreconf -i 69 | ./configure 70 | make 71 | sudo make install 72 | 73 | # Adding PATH 74 | PATH=$PATH:/usr/local/bin 75 | 76 | _thread_count=$(($(nproc --all) * 16)) 77 | 78 | # we need to decrement retain because we start counting with 0 and we need to remove the oldest backup 79 | echo "remove_snapshot_start:$(date -u +%FT%T)" 80 | let "retain=$retain-1" 81 | if sudo test -d /mnt/backups/$efsid/$interval.$retain; then 82 | echo "-- $(date -u +%FT%T) -- sudo tree /mnt/backups/$efsid/$interval.$retain -dfi | parallel --no-notice -j $_thread_count sudo rm {} -r &>/dev/null" 83 | sudo tree /mnt/backups/$efsid/$interval.$retain -dfi | parallel --will-cite -j $_thread_count sudo rm {} -r &>/dev/null 84 | echo "-- $(date -u +%FT%T) -- sudo rm /mnt/backups/$efsid/$interval.$retain -r &>/dev/null" 85 | sudo rm /mnt/backups/$efsid/$interval.$retain -r &>/dev/null 86 | echo "rm status: $?" 87 | fi 88 | echo "remove_snapshot_stop:$(date -u +%FT%T)" 89 | 90 | # rotate all previous backups (except the first one), up one level 91 | for x in `seq $retain -1 2`; do 92 | if sudo test -d /mnt/backups/$efsid/$interval.$[$x-1]; then 93 | echo "-- $(date -u +%FT%T) -- sudo mv /mnt/backups/$efsid/$interval.$[$x-1] /mnt/backups/$efsid/$interval.$x" 94 | sudo mv /mnt/backups/$efsid/$interval.$[$x-1] /mnt/backups/$efsid/$interval.$x 95 | fi 96 | done 97 | 98 | echo "create_snapshot_start:$(date -u +%FT%T)" 99 | # copy first backup with hard links, then replace first backup with new backup 100 | if sudo test -d /mnt/backups/$efsid/$interval.0 ; then 101 | echo "-- $(date -u +%FT%T) -- sudo \"PATH=$PATH\" /usr/local/bin/fpsync -n $_thread_count -o \"-a -v --link-dest=../`basename /mnt/backups/$efsid/$interval.0`\" /mnt/backups/$efsid/$interval.0 /mnt/backups/$efsid/$interval.1" 102 | sudo "PATH=$PATH" /usr/local/bin/fpsync -n $_thread_count -o "-a -v --link-dest=../`basename /mnt/backups/$efsid/$interval.0`" /mnt/backups/$efsid/$interval.0 /mnt/backups/$efsid/$interval.1 103 | fi 104 | echo "create_snapshot_stop:$(date -u +%FT%T)" 105 | 106 | if [ ! -d /mnt/backups/$efsid ]; then 107 | echo "-- $(date -u +%FT%T) -- sudo mkdir -p /mnt/backups/$efsid" 108 | sudo mkdir -p /mnt/backups/$efsid 109 | echo "-- $(date -u +%FT%T) -- sudo chmod 700 /mnt/backups/$efsid" 110 | sudo chmod 700 /mnt/backups/$efsid 111 | fi 112 | 113 | echo "-- $(date -u +%FT%T) -- sudo rm /tmp/efs-backup.log" 114 | sudo rm /tmp/efs-backup.log 115 | 116 | # start fpsync process 117 | echo "Stating backup....." 118 | echo "-- $(date -u +%FT%T) -- sudo \"PATH=$PATH\" /usr/local/bin/fpsync -n $_thread_count -o \"-a --stats --numeric-ids --log-file=/tmp/efs-backup.log\" /backup/ /mnt/backups/$efsid/$interval.0/" 119 | sudo "PATH=$PATH" /usr/local/bin/fpsync -n $_thread_count -v -o "-a --stats --numeric-ids --log-file=/tmp/efs-backup.log" /backup/ /mnt/backups/$efsid/$interval.0/ &>/tmp/efs-fpsync.log 120 | fpsyncStatus=$? 121 | echo "fpsyncStatus:$fpsyncStatus" 122 | 123 | # removing files from target efs which are not in source 124 | echo "rsync_delete_start:$(date -u +%FT%T)" 125 | echo "-- $(date -u +%FT%T) -- sudo rsync -r --delete --existing --ignore-existing --ignore-errors --log-file=/tmp/efs-backup-rsync.log /backup/ /mnt/backups/$efsid/$interval.0/" 126 | sudo rsync -r --delete --existing --ignore-existing --ignore-errors --log-file=/tmp/efs-backup-rsync.log /backup/ /mnt/backups/$efsid/$interval.0/ 127 | rsyncDeleteStatus=$? 128 | echo "rsyncDeleteStatus:$rsyncDeleteStatus" 129 | echo "rsync_delete_stop:$(date -u +%FT%T)" 130 | echo "-- $(date -u +%FT%T) -- sudo touch /mnt/backups/$efsid/$interval.0/" 131 | sudo touch /mnt/backups/$efsid/$interval.0/ 132 | 133 | exit $fpsyncStatus 134 | -------------------------------------------------------------------------------- /source/lib/events.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | import boto3 17 | from json import dumps, loads, JSONEncoder 18 | from datetime import datetime, date 19 | 20 | cwe_client = boto3.client('events') 21 | lambda_client = boto3.client('lambda') 22 | 23 | 24 | class DateTimeEncoder(JSONEncoder): 25 | def default(self, o): 26 | if isinstance(o, (datetime, date)): 27 | serial = o.isoformat() 28 | return serial 29 | raise TypeError("Type %s not serializable" % type(o)) 30 | 31 | 32 | class CloudWatchEvent(object): 33 | def __init__(self, logger, uid): 34 | self.logger = logger 35 | self.rule_name = 'stop_backup' + '_' + uid 36 | self.target_id = 'terminate_event_orchestrator' 37 | 38 | # API call to get backup id from the stop CWE 39 | def describe_target(self): 40 | try: 41 | response = cwe_client.list_targets_by_rule( 42 | Rule=self.rule_name 43 | ) 44 | return loads(response['Targets'][-1]['Input'])['backup_id'] 45 | except Exception as e: 46 | self.logger.error("unhandled exception: CloudWatchEvent_describe_target", exc_info=1) 47 | 48 | # API call to create the CWE that stops the backup 49 | def create_event(self, minutes): 50 | try: 51 | time_to_run_ssm_command = 15 # minutes 52 | time = int(minutes) - time_to_run_ssm_command # subtracting time from the backup window 53 | backup_window = 'rate(' + str(time) + ' minutes)' 54 | response = cwe_client.put_rule( 55 | Name=self.rule_name, 56 | ScheduleExpression=backup_window, 57 | State='ENABLED', 58 | Description='EFS Backup Solution: CloudWatch Event created by Lambda to stop the backup instance', 59 | ) 60 | return response['RuleArn'] 61 | except Exception as e: 62 | self.logger.error("unhandled exception: CloudWatchEvent_create_event", exc_info=1) 63 | 64 | # API call to obtain name of the lambda function 65 | def get_lambda_arn(self, lambda_function_name): 66 | try: 67 | response = lambda_client.get_function( 68 | FunctionName=lambda_function_name 69 | ) 70 | return response['Configuration']['FunctionArn'] 71 | except Exception as e: 72 | self.logger.error("unhandled exception: CloudWatchEvent_get_lambda_arn", exc_info=1) 73 | 74 | # CloudWatch API call to add the Orchestrator lambda function as the target 75 | def add_target(self, lambda_function_name, event): 76 | try: 77 | json_event = dumps(event, indent=4, cls=DateTimeEncoder) 78 | response = cwe_client.put_targets( 79 | Rule=self.rule_name, 80 | Targets=[ 81 | { 82 | 'Id': self.target_id, 83 | 'Arn': self.get_lambda_arn(lambda_function_name), 84 | 'Input': json_event 85 | }, 86 | ] 87 | ) 88 | return response 89 | except Exception as e: 90 | self.logger.error("unhandled exception: CloudWatchEvent_add_target", exc_info=1) 91 | 92 | # Lambda API call to add the permission for CWE to invoke Orchestrator lambda function 93 | def add_permission(self, lambda_function_name, event_rule_arn): 94 | try: 95 | response = lambda_client.add_permission( 96 | FunctionName=lambda_function_name, 97 | StatementId='stop_backup_event', 98 | Action='lambda:InvokeFunction', 99 | Principal='events.amazonaws.com', 100 | SourceArn=event_rule_arn 101 | ) 102 | return response 103 | except Exception as e: 104 | self.logger.error("unhandled exception: CloudWatchEvent_add_permission", exc_info=1) 105 | 106 | # API call to delete the CWE that stops the backup 107 | def delete_event(self): 108 | try: 109 | response = cwe_client.delete_rule( 110 | Name=self.rule_name 111 | ) 112 | return response 113 | except Exception as e: 114 | self.logger.error("unhandled exception: CloudWatchEvent_delete_event", exc_info=1) 115 | 116 | # CloudWatch API call to delete the Orchestrator lambda function as the target 117 | def remove_target(self): 118 | try: 119 | response = cwe_client.remove_targets( 120 | Rule=self.rule_name, 121 | Ids=[ 122 | self.target_id 123 | ] 124 | ) 125 | return response 126 | except Exception as e: 127 | self.logger.error("unhandled exception: CloudWatchEvent_remove_target", exc_info=1) 128 | 129 | # Lambda API call to remove the permission for CWE to invoke Orchestrator lambda function 130 | def remove_permission(self, lambda_function_name): 131 | try: 132 | response = lambda_client.remove_permission( 133 | FunctionName=lambda_function_name, 134 | StatementId='stop_backup_event' 135 | ) 136 | return response 137 | except Exception as e: 138 | self.logger.error("unhandled exception: CloudWatchEvent_remove_permission", exc_info=1) 139 | -------------------------------------------------------------------------------- /source/scripts/efs-ec2-restore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #======================================================================== 4 | # 5 | # master script to run efs-restore-fpsync 6 | # fetches EFS mount IPs 7 | # runs efs-restore scripts 8 | # uploads logs to S3 9 | # updates status on DynamoDB 10 | # 11 | #======================================================================== 12 | # author: aws-solutions-builder@ 13 | 14 | 15 | clear 16 | echo "This is the master script to perform efs restore" 17 | sleep 2 18 | 19 | _source_efs=$1 ## {type:string, description:source efs id} 20 | _backup_efs=$2 ## {type:string, description:backup efs id} 21 | _interval=$3 ## {type:string, description:interval for backup daily/weekly/monthly} 22 | _backup_num=$4 ## {type:number, description:backup number to restore} 23 | _folder_label=$5 ## {type:string, description:backup identifier} 24 | _restore_prefix=$6 ## {type:string, description:source prefix where files will be restored} 25 | _restore_sub_dir=$7 ## {type:string, description:sub directory that you want to restore} 26 | _s3bucket=$8 ## {type:string, description:s3 bucket to publish logs} 27 | _sns_topic=$9 ## {type:string, description:sns topic arn for restore notifications} 28 | 29 | echo "## input from user ##" 30 | echo "_source_efs: ${_source_efs}" 31 | echo "_backup_efs: ${_backup_efs}" 32 | echo "_interval: ${_interval}" 33 | echo "_backup_num: ${_backup_num}" 34 | echo "_folder_label: ${_folder_label}" 35 | echo "_restore_prefix: ${_restore_prefix}" 36 | echo "_restore_sub_directory: ${_restore_sub_dir}" 37 | echo "_s3bucket: ${_s3bucket}" 38 | echo "_sns_topic: ${_sns_topic}" 39 | 40 | # 41 | # get region from instance meta-data 42 | # 43 | _az=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/) 44 | _region=${_az::-1} 45 | echo "region is ${_region}" 46 | _instance_id=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) 47 | echo "instance-id is ${_instance_id}" 48 | _instance_type=$(curl -s http://169.254.169.254/latest/meta-data/instance-type/) 49 | echo "instance-type is ${_instance_type}" 50 | 51 | # 52 | # getting source/backup efs mount ip 53 | # parameters : [_source_efs, _region] 54 | # 55 | echo "-- $(date -u +%FT%T) -- resolving source efs address ${_source_efs}.efs.${_region}.amazonaws.com" 56 | until dig ${_source_efs}.efs.${_region}.amazonaws.com +short 57 | do 58 | sleep 1 59 | done 60 | _src_mount_ip=$(dig ${_source_efs}.efs.${_region}.amazonaws.com +short) 61 | echo "-- $(date -u +%FT%T) -- src mount ip: ${_src_mount_ip}" 62 | 63 | echo "-- $(date -u +%FT%T) -- resolving backup efs address ${_backup_efs}.efs.${_region}.amazonaws.com" 64 | until dig ${_backup_efs}.efs.${_region}.amazonaws.com +short 65 | do 66 | sleep 1 67 | done 68 | _backup_mount_ip=$(dig ${_backup_efs}.efs.${_region}.amazonaws.com +short) 69 | echo "-- $(date -u +%FT%T) -- backup mount ip: ${_backup_mount_ip}" 70 | 71 | if [ -z "${_src_mount_ip}" ] || [ -z "${_backup_mount_ip}" ]; then 72 | echo "-- $(date -u +%FT%T) -- ERROR:efs_mount_ip_not_found" 73 | echo "-- $(date -u +%FT%T) -- Either or both mount IPs not found, skipping EFS restore script. Please verify if the EC2 instance was launched in the same AZ as the EFS systems." 74 | echo "-- $(date -u +%FT%T) -- Notify customer of failure" 75 | aws sns publish --region ${_region} \ 76 | --topic-arn ${_sns_topic} \ 77 | --message '{ 78 | SourceEFS:'${_source_efs}', 79 | BackupEFS:'${_backup_efs}', 80 | Interval:'${_interval}', 81 | BackupNum:'${_backup_num}', 82 | FolderLabel:'${_folder_label}', 83 | RestorePrefix:'${_restore_prefix}', 84 | LogBucket:'${_s3bucket}', 85 | RestoreStatus:Unable to find the mount IP address of either source or backup EFS. Please verify if the EC2 instance was launched in the same AZ as the EFS systems. Terminating instance. 86 | }' 87 | else 88 | # 89 | # running efs restore script 90 | # parameters : [_src_mount_ip, _backup_mount_ip, _interval, _retain, _folder_label, _backup_window] 91 | # 92 | echo "-- $(date -u +%FT%T) -- running efs restore script" 93 | _restore_start_time=$(date -u +%FT%T) 94 | # _timeout_val=$(((${_backup_window}-1)*60)) # timeout 1 minute less than given window -> timeout in SSM 95 | # timeout --preserve-status --signal=2 ${_timeout_val} ./efs-backup-fpsync.sh ${_src_mount_ip}:/ ${_backup_mount_ip}:/ ${_interval} ${_retain} ${_folder_label} 96 | /home/ec2-user/efs-restore-fpsync.sh ${_src_mount_ip}:${_restore_prefix} ${_backup_mount_ip}:/ ${_interval} ${_backup_num} ${_folder_label} ${_restore_sub_dir} ${_s3bucket} 97 | restoreStatus=$? 98 | _restore_stop_time=$(date -u +%FT%T) 99 | echo "-- $(date -u +%FT%T) -- fpsync finished with status: $restoreStatus" 100 | 101 | # 102 | # uploading efs restore logs to s3 103 | # parameters : [s3bucket, efsid] 104 | # 105 | echo "-- $(date -u +%FT%T) -- upload efs restore fpsync logs to S3 bucket" 106 | aws s3 cp /tmp/efs-restore.log s3://${_s3bucket}/efs-restore-logs/${_folder_label}-${_interval}.${_backup_num}-restore-fpsync-`date +%Y%m%d-%H%M`.log 107 | echo "upload restore fpsync logs to S3, status: $?" 108 | echo "-- $(date -u +%FT%T) -- upload efs restore rsync logs to S3 bucket" 109 | aws s3 cp /tmp/efs-restore-rsync.log s3://${_s3bucket}/efs-restore-logs/${_folder_label}-${_interval}.${_backup_num}-restore-rsync-delete-`date +%Y%m%d-%H%M`.log 110 | echo "upload restore rsync logs to S3, status: $?" 111 | 112 | # 113 | # calculating restored data and reporting to backend metric 114 | # parameters : [_nofs, _nfst, _tfs, _ttfs] 115 | # 116 | _nofs=$(cat /tmp/efs-restore.log | grep 'Number of files' | awk '{nofs += $7} END {print nofs}') 117 | echo "Number of files: ${_nofs}" 118 | 119 | _nfst=$(cat /tmp/efs-restore.log | grep 'Number of files transferred' | awk '{nfst += $8} END {print nfst}') 120 | echo "Number of files transferred: ${_nfst}" 121 | 122 | _tfs=$(cat /tmp/efs-restore.log | grep 'Total file size' | awk '{tfs += $7} END {print tfs}') 123 | echo "Total file size: ${_tfs}" 124 | 125 | _ttfs=$(cat /tmp/efs-restore.log | grep 'Total transferred file size' | awk '{ttfs += $8} END {print ttfs}') 126 | echo "Total transferred file size: ${_ttfs}" 127 | 128 | # timestamps for (fpsync) and (rsync --delete) file operations 129 | _fpsync_start=$(cat /var/log/cloud-init-output.log | grep 'fpsync_start' | cut -d: -f2-) 130 | echo "fpsync start time: ${_fpsync_start}" 131 | _fpsync_stop=$(cat /var/log/cloud-init-output.log | grep 'fpsync_stop' | cut -d: -f2-) 132 | echo "fpsync start time: ${_fpsync_stop}" 133 | _rsync_delete_start=$(cat /var/log/cloud-init-output.log | grep 'rsync_delete_start' | cut -d: -f2-) 134 | echo "rsync delete start: ${_rsync_delete_start}" 135 | _rsync_delete_stop=$(cat /var/log/cloud-init-output.log | grep 'rsync_delete_stop' | cut -d: -f2-) 136 | echo "rsync delete stop: ${_rsync_delete_stop}" 137 | 138 | 139 | _rtime=$(date -u +"%Y-%m-%dT%H:%M:%SZ") 140 | _headers="Content-Type: application/json" 141 | _url="https://metrics.awssolutionsbuilder.com/generic" 142 | _uuid=$(uuidgen) 143 | echo "_metric={\"Interval\":\"${_interval}\",\"BackupNum\":\"${_backup_num}\",\"FolderLabel\":\"${_folder_label}\",\"NumberOfFiles\":\"${_nofs}\",\"NumberOfFilesTransferred\":\"${_nfst}\",\"TotalFileSize\":\"${_tfs}\",\"TotalTransferredFileSize\":\"${_ttfs}\",\"RestoreStartTime\":\"${_restore_start_time}\",\"RestoreStopTime\":\"${_restore_stop_time}\",\"InstanceType\":\"${_instance_type}\",\"Region\":\"${_region}\"}" 144 | _metric="{\"Interval\":\"${_interval}\",\"BackupNum\":\"${_backup_num}\",\"FolderLabel\":\"${_folder_label}\",\"NumberOfFiles\":\"${_nofs}\",\"NumberOfFilesTransferred\":\"${_nfst}\",\"TotalFileSize\":\"${_tfs}\",\"TotalTransferredFileSize\":\"${_ttfs}\",\"RestoreStartTime\":\"${_restore_start_time}\",\"RestoreStopTime\":\"${_restore_stop_time}\",\"InstanceType\":\"${_instance_type}\",\"Region\":\"${_region}\"}" 145 | curl -H "${_headers}" -X POST -d '{"TimeStamp":'${_rtime}',"UUID":'${_uuid}',"Solution":"SO0031R","Data":'${_metric}'}' ${_url} 146 | echo "-- $(date -u +%FT%T) -- post metric status: $?" 147 | 148 | # 149 | # notify customer with restore status 150 | # parameters : [_sns_topic, _source_efs, _backup_efs, _interval, _backup_num, _folder_label, _restore_prefix, _s3bucket] 151 | # 152 | if [ "${restoreStatus}" == "0" ]; then 153 | echo "-- $(date -u +%FT%T) -- notify customer of success" 154 | aws sns publish --region ${_region} \ 155 | --topic-arn ${_sns_topic} \ 156 | --message '{ 157 | SourceEFS:'${_source_efs}', 158 | BackupEFS:'${_backup_efs}', 159 | Interval:'${_interval}', 160 | BackupNum:'${_backup_num}', 161 | FolderLabel:'${_folder_label}', 162 | RestorePrefix:'${_restore_prefix}', 163 | RestoreSubDirectory:'${_restore_sub_dir}', 164 | LogBucket:'${_s3bucket}', 165 | RestoreStartTime:'${_restore_start_time}', 166 | RestoreStopTime:'${_restore_stop_time}', 167 | RestoreStatus:Success 168 | }' 169 | else 170 | echo "-- $(date -u +%FT%T) -- notify customer of failure" 171 | aws sns publish --region ${_region} \ 172 | --topic-arn ${_sns_topic} \ 173 | --message '{ 174 | SourceEFS:'${_source_efs}', 175 | BackupEFS:'${_backup_efs}', 176 | Interval:'${_interval}', 177 | BackupNum:'${_backup_num}', 178 | FolderLabel:'${_folder_label}', 179 | RestorePrefix:'${_restore_prefix}', 180 | RestoreSubDirectory:'${_restore_sub_dir}', 181 | LogBucket:'${_s3bucket}', 182 | RestoreStartTime:'${_restore_start_time}', 183 | RestoreStopTime:'${_restore_stop_time}', 184 | RestoreStatus:Fail 185 | }' 186 | fi 187 | echo "send notification to customer, status: $?" 188 | fi 189 | 190 | # 191 | # uploading cloud init logs to s3 192 | # parameters : [_s3bucket, _folder_label] 193 | # 194 | aws s3 cp /var/log/cloud-init-output.log s3://${_s3bucket}/ec2-logs/${_folder_label}-${_interval}.${_backup_num}-restore-`date +%Y%m%d-%H%M`.log 195 | echo "-- $(date -u +%FT%T) -- upload ec2 cloud init logs to S3, status: $?" 196 | 197 | # 198 | # changing auto scaling capacity 199 | # parameters : [_asg_name, _instance_id, _region] 200 | # 201 | _asg_name=$(aws ec2 describe-tags --region ${_region} --filters "Name=resource-id,Values=${_instance_id}" --query 'Tags[?Key==`aws:autoscaling:groupName`]'.Value --output text) 202 | aws autoscaling set-desired-capacity --region ${_region} --auto-scaling-group-name ${_asg_name} --desired-capacity 0 203 | echo "-- $(date -u +%FT%T) -- autoscaling desired capacity changed, status: $?" 204 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /source/tests/test_ssm.py: -------------------------------------------------------------------------------- 1 | from lib.logger import Logger 2 | from lib.ssm import SimpleSystemsManager 3 | log_level = 'critical' 4 | logger = Logger(loglevel=log_level) 5 | ssm = SimpleSystemsManager(logger) 6 | 7 | ssm_send_command_response = {"Command": {"Comment": "EFS Backup Solution: Performs cleanup, upload logs files to S3, updates DDB and lifecycle hook. ", "Status": "Pending", "MaxErrors": "0", 8 | "Parameters": {"commands": 9 | ['#!/bin/bash\n', '#========================================================================\n', '#\n', '# ec2 ssm script\n', '# stops fpsync process\n', '# uploads logs to S3\n', '# updates status on DynamoDB\n', '# completes lifecycle hook\n', '#\n', '#========================================================================\n', '# author: aws-solutions-builder@\n', '\n', '_az=$(curl http://169.254.169.254/latest/meta-data/placement/availability-zone/)\n', '_region=${_az::-1}\n', '_instance_id=$(curl http://169.254.169.254/latest/meta-data/instance-id)\n', '_hook_result="CONTINUE"\n', '\n', '#\n', '# uploading cloud-init and fpsync log to s3 before stopping fpsync process\n', '# parameters : [_s3bucket, _folder_label]\n', '#\n', 'echo "-- $(date -u +%FT%T) -- uploading cloud init logs"\n', 'aws s3 cp /var/log/cloud-init-output.log s3://S3_BUCKET/ec2-logs/efs-backup-backup-`date +%Y%m%d-%H%M`.log\n', 'echo "-- $(date -u +%FT%T) -- upload ec2 cloud init logs to S3, status: $?"\n', 'echo "-- $(date -u +%FT%T) -- uploading backup (fpsync) logs"\n', 'aws s3 cp /tmp/efs-backup.log s3://S3_BUCKET/efs-backup-logs/efs-backup-backup-fpsync-`date +%Y%m%d-%H%M`.log\n', 'echo "-- $(date -u +%FT%T) -- upload backup fpsync logs to S3 status: $?"\n', '\n', '#\n', '# kill fpsync process with SIGINT, wait until background processes complete\n', '# parameters : [_fpsync_pid]\n', '#\n', "_fpsync_pid=$(head -1 /tmp/efs-fpsync.log | awk '{print $4}' | awk -F '-' '{print $2}')\n", 'echo "-- $(date -u +%FT%T) -- fpsync foreground process-id: $_fpsync_pid"\n', '\n', 'sudo kill -SIGINT $_fpsync_pid\n', 'echo "-- $(date -u +%FT%T) -- kill fpsync pid status: $?"\n', '\n', 'if sudo test -e /tmp/efs-fpsync.log; then\n', ' echo "-- $(date -u +%FT%T) -- killing child rsync processes, may take up to 15 minutes"\n', ' _to1=$((SECONDS+900))\n', " until tail -n 2 /tmp/efs-fpsync.log | grep -Po '\\d+(?=%)'\n", ' do\n', ' # timeout after 900 SECONDS\n', ' if [ $SECONDS -gt $_to1 ]; then\n', ' break\n', ' fi\n', ' done\n', " _backup_percentage=$(tail -n 2 /tmp/efs-fpsync.log | grep -Po '\\d+(?=%)')\n", ' echo "-- $(date -u +%FT%T) -- exiting loop"\n', 'else\n', ' echo "-- $(date -u +%FT%T) -- /tmp/efs-fpsync.log file does not exist"\n', 'fi\n', '\n', '#\n', '# updating dynamo db with backup meta-data\n', '# parameters : [_nofs, _nfst, _tfs, _ttfs]\n', '#\n', "_nofs=$(cat /tmp/efs-backup.log | grep 'Number of files' | awk '{nofs += $7} END {print nofs}')\n", 'echo "-- $(date -u +%FT%T) -- Number of files: $_nofs"\n', '\n', "_nfst=$(cat /tmp/efs-backup.log | grep 'Number of files transferred' | awk '{nfst += $8} END {print nfst}')\n", 'echo "-- $(date -u +%FT%T) -- Number of files transferred: $_nfst"\n', '\n', "_tfs=$(cat /tmp/efs-backup.log | grep 'Total file size' | awk '{tfs += $7} END {print tfs}')\n", 'echo "-- $(date -u +%FT%T) -- Total file size: $_tfs"\n', '\n', "_ttfs=$(cat /tmp/efs-backup.log | grep 'Total transferred file size' | awk '{ttfs += $8} END {print ttfs}')\n", 'echo "-- $(date -u +%FT%T) -- Total transferred file size: $_ttfs"\n', '\n', '#\n', '# removing files from target efs which are not in source\n', '# parameters : [_folder_label, _interval]\n', '#\n', 'echo "rsync_delete_start:$(date -u +%FT%T)"\n', '_rsync_delete_start=$(date -u +%FT%T)\n', 'echo "-- $(date -u +%FT%T) -- sudo rsync -r --delete --existing --ignore-existing --ignore-errors --log-file=/tmp/efs-backup-rsync.log /backup/ /mnt/backups/efs-backup/BACKUP_INTERVAL.0/"\n', 'sudo rsync -r --delete --existing --ignore-existing --ignore-errors --log-file=/tmp/efs-backup-rsync.log /backup/ /mnt/backups/efs-backup/BACKUP_INTERVAL.0/\n', 'echo "rsync delete status: $?"\n', 'echo "rsync_delete_stop:$(date -u +%FT%T)"\n', '_rsync_delete_stop=$(date -u +%FT%T)\n', 'echo "-- $(date -u +%FT%T) -- sudo touch /mnt/backups/efs-backup/BACKUP_INTERVAL.0/"\n', 'sudo touch /mnt/backups/efs-backup/BACKUP_INTERVAL.0/\n', '\n', '_finish_time=$(date -u +%FT%T)\n', 'echo "-- $(date -u +%FT%T) -- backup finish time: $_finish_time"\n', '\n', '#\n', '# uploading backup (rsync delete) log to s3\n', '# parameters : [_s3bucket, _folder_label]\n', '#\n', 'echo "-- $(date -u +%FT%T) -- uploading backup (rsync delete) logs"\n', 'aws s3 cp /tmp/efs-backup-rsync.log s3://S3_BUCKET/efs-backup-logs/efs-backup-backup-rsync-delete-`date +%Y%m%d-%H%M`.log\n', 'echo "-- $(date -u +%FT%T) -- upload rsync delete logs to S3 status: $?"\n', '\n', '# timestamps for (rm -rf) and (cp -al) file operations\n', "_rm_start=$(cat /var/log/cloud-init-output.log | grep 'remove_snapshot_start' | cut -d: -f2-)\n", "_rm_stop=$(cat /var/log/cloud-init-output.log | grep 'remove_snapshot_stop' | cut -d: -f2-)\n", "_hl_start=$(cat /var/log/cloud-init-output.log | grep 'create_snapshot_start' | cut -d: -f2-)\n", "_hl_stop=$(cat /var/log/cloud-init-output.log | grep 'create_snapshot_stop' | cut -d: -f2-)\n", "_err_61=$(cat /var/log/cloud-init-output.log | grep 'efs_mount_ip_not_found' | cut -d: -f4)\n", '\n', '#\n', '# getting burst credit balance from Source EFS\n', '# parameters : [_source_efs]\n', '#\n', "_mtime1=$(date --date '30 minutes ago' +%FT%T)\n", '_mtime2=$(date -u +%FT%T)\n', "_src_efs_credit_balance=$(aws cloudwatch get-metric-statistics --namespace AWS/EFS --region $_region --metric-name BurstCreditBalance --period 300 --statistics Average --dimensions Name=FileSystemId,Value=fake-efs-id --start-time $_mtime1 --end-time $_mtime2 --query Datapoints[0].['Average'] --output text)\n", 'echo "-- $(date -u +%FT%T) -- source efs BurstCreditBalance after backup: $_src_efs_credit_balance"\n', '\n', '#\n', '# update Dynamo DB Table with backup status\n', '# parameters : [_ddb_table_name, _backup_id, _backup_percentage, _region]\n', '#\n', 'if [ "$_err_61" == "efs_mount_ip_not_found" ]; then\n', ' echo "-- $(date -u +%FT%T) -- backup unsuccessful (id: B_ID)"\n', ' aws dynamodb update-item --table-name DDB_TABLE_NAME --key \'{"BackupId":{"S":"\'B_ID\'"}}\' --update-expression "SET BackupStatus = :q, BackupStopTime = :t" --expression-attribute-values \'{":q": {"S":"Unsuccessful"}, ":t": {"S":"\'$_finish_time\'"}}\' --region $_region\n', 'else\n', ' if [ "$_backup_percentage" == "100" ]; then\n', ' echo "-- $(date -u +%FT%T) -- backup completed successfully (id: B_ID)"\n', ' aws dynamodb update-item --table-name DDB_TABLE_NAME --key \'{"BackupId":{"S":"\'B_ID\'"}}\' --update-expression "SET BackupStatus = :q, NumberOfFiles = :n1, NumberOfFilesTransferred = :n2, TotalFileSize = :f1, TotalTransferredFileSize = :f2, BackupStopTime = :t, RemoveSnapshotStartTime = :rm1, RemoveSnapshotStopTime = :rm2, CreateHardlinksStartTime = :hl1, CreateHardlinksStopTime = :hl2, RsyncDeleteStartTime = :rd1, RsyncDeleteStopTime = :rd2, SourceBurstCreditBalancePostBackup = :cb1" --expression-attribute-values \'{":q": {"S":"Success"}, ":n1": {"N":"\'$_nofs\'"}, ":n2": {"N":"\'$_nfst\'"}, ":f1": {"N":"\'$_tfs\'"}, ":f2": {"N":"\'$_ttfs\'"}, ":t": {"S":"\'$_finish_time\'"}, ":rm1": {"S":"\'$_rm_start\'"}, ":rm2": {"S":"\'$_rm_stop\'"}, ":hl1": {"S":"\'$_hl_start\'"}, ":hl2": {"S":"\'$_hl_stop\'"}, ":rd1": {"S":"\'$_rsync_delete_start\'"}, ":rd2": {"S":"\'$_rsync_delete_stop\'"}, ":cb1": {"N":"\'$_src_efs_credit_balance\'"}}\' --region $_region\n', ' echo "-- $(date -u +%FT%T) -- dynamo db update status: $?"\n', ' else\n', ' echo "-- $(date -u +%FT%T) -- backup incomplete (id: B_ID)"\n', ' aws dynamodb update-item --table-name DDB_TABLE_NAME --key \'{"BackupId":{"S":"\'B_ID\'"}}\' --update-expression "SET BackupStatus = :q, NumberOfFiles = :n1, NumberOfFilesTransferred = :n2, TotalFileSize = :f1, TotalTransferredFileSize = :f2, BackupStopTime = :t, RemoveSnapshotStartTime = :rm1, RemoveSnapshotStopTime = :rm2, CreateHardlinksStartTime = :hl1, CreateHardlinksStopTime = :hl2, RsyncDeleteStartTime = :rd1, RsyncDeleteStopTime = :rd2, SourceBurstCreditBalancePostBackup = :cb1" --expression-attribute-values \'{":q": {"S":"Incomplete"}, ":n1": {"N":"\'$_nofs\'"}, ":n2": {"N":"\'$_nfst\'"}, ":f1": {"N":"\'$_tfs\'"}, ":f2": {"N":"\'$_ttfs\'"}, ":t": {"S":"\'$_finish_time\'"}, ":rm1": {"S":"\'$_rm_start\'"}, ":rm2": {"S":"\'$_rm_stop\'"}, ":hl1": {"S":"\'$_hl_start\'"}, ":hl2": {"S":"\'$_hl_stop\'"}, ":rd1": {"S":"\'$_rsync_delete_start\'"}, ":rd2": {"S":"\'$_rsync_delete_stop\'"}, ":cb1": {"N":"\'$_src_efs_credit_balance\'"}}\' --region $_region\n', ' echo "-- $(date -u +%FT%T) -- dynamo db update status: $?"\n', ' fi\n', 'fi\n', '\n', '#\n', '# update lifecycle hook with completion\n', '# parameters : [_lifecycle_hookname, _autoscaling_grp_name, _hook_result, _instance_id, _region]\n', '#\n', 'echo "-- $(date -u +%FT%T) -- updating lifecycle hook"\n', 'aws autoscaling complete-lifecycle-action --lifecycle-hook-name HOOK_NAME --auto-scaling-group-name ASG_NAME --lifecycle-action-result $_hook_result --instance-id $_instance_id --region $_region\n', 'echo "-- $(date -u +%FT%T) -- lifecycle hook update status: $?"\n'] 10 | }, "ExpiresAfter": "2017-08-15T18:59:11.748000-04:00", "ServiceRole": "", "DocumentName": "AWS-RunShellScript", "TargetCount": 1, "OutputS3BucketName": "", "NotificationConfig": {"NotificationArn": "", "NotificationEvents": [], "NotificationType": ""}, "CompletedCount": 0, "StatusDetails": "Pending", "ErrorCount": 0, "OutputS3KeyPrefix": "", "InstanceIds": ["i-05820148d33df4e76"], "MaxConcurrency": "50", "Targets": [], "RequestedDateTime": "2017-08-15T17:57:11.748000-04:00", "CommandId": "0d47c8fd-9dff-41c8-a7dd-991f551d0596"}, "ResponseMetadata": {"RetryAttempts": 0, "HTTPStatusCode": 200, "RequestId": "b0dcf461-8204-11e7-aaf7-c1d3afb664b8", "HTTPHeaders": {"x-amzn-requestid": "b0dcf461-8204-11e7-aaf7-c1d3afb664b8", "date": "Tue, 15 Aug 2017 21:57:11 GMT", "content-length": "5042", "content-type": "application/x-amz-json-1.1"}}} 11 | 12 | replace_dict = {} 13 | replace_dict['${_s3bucket}'] = 'S3_BUCKET' 14 | replace_dict['${_interval}'] = 'BACKUP_INTERVAL' 15 | replace_dict['${_ddb_table_name}'] = 'DDB_TABLE_NAME' 16 | replace_dict['${_backup_id}'] = 'B_ID' 17 | replace_dict['${_autoscaling_grp_name}'] = 'ASG_NAME' 18 | replace_dict['${_lifecycle_hookname}'] = 'HOOK_NAME' 19 | replace_dict['${_folder_label}'] = 'efs-backup' 20 | replace_dict['${_source_efs}'] = 'fake-efs-id' 21 | 22 | # Dynamically replace the ssm.sh script with replace_dict values 23 | def test_ssm_create_command(): 24 | response = ssm.create_command(replace_dict) 25 | for line in response: 26 | if '${_' in line and '{_az' not in line: 27 | status = 'fail' 28 | else: 29 | status = 'pass' 30 | logger.debug(status + ' on line >> ' + line) 31 | assert status == 'pass' 32 | return response 33 | 34 | def test_ssm_send_command(mocker): 35 | mocker.patch.object(ssm, 'send_command') 36 | ssm.send_command.return_value = ssm_send_command_response 37 | ssm.send_command('instance_id', 'AWS-RunShellScript', replace_dict) 38 | # Fixed Mock response 39 | x = ssm_send_command_response['Command']['Parameters']['commands'] 40 | # Calling create_command function 41 | y = test_ssm_create_command() 42 | return 43 | # assert x == y 44 | -------------------------------------------------------------------------------- /source/lib/ssm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #======================================================================== 3 | # 4 | # ec2 ssm script 5 | # stops fpsync process 6 | # uploads logs to S3 7 | # updates status on DynamoDB 8 | # completes lifecycle hook 9 | # 10 | #======================================================================== 11 | # author: aws-solutions-builder@ 12 | 13 | _az=$(curl http://169.254.169.254/latest/meta-data/placement/availability-zone/) 14 | _region=${_az::-1} 15 | _instance_id=$(curl http://169.254.169.254/latest/meta-data/instance-id) 16 | _hook_result="CONTINUE" 17 | 18 | # 19 | # uploading cloud-init and fpsync log to s3 before stopping fpsync process 20 | # parameters : [_s3bucket, _folder_label] 21 | # 22 | echo "-- $(date -u +%FT%T) -- uploading cloud init logs" 23 | _ec2log=$(date +%Y%m%d-%H%M) 24 | aws s3 cp /var/log/cloud-init-output.log s3://${_s3bucket}/ec2-logs/${_folder_label}-backup-$_ec2log.log 25 | echo "-- $(date -u +%FT%T) -- upload ec2 cloud init logs to S3, status: $?" 26 | _log_location=https://s3.amazonaws.com/${_s3bucket}/ec2-logs/${_folder_label}-backup-$_ec2log.log 27 | 28 | # find if efs mounted successfully 29 | _err_61=$(cat /var/log/cloud-init-output.log | grep 'efs_mount_ip_not_found' | cut -d: -f4) 30 | _err_62=$(cat /var/log/cloud-init-output.log | grep 'efs_not_mounted' | cut -d: -f4) 31 | 32 | if [ "$_err_61" != "efs_mount_ip_not_found" ] && [ "$_err_62" != "efs_not_mounted" ] ; then 33 | 34 | echo "-- $(date -u +%FT%T) -- uploading backup (fpsync) logs" 35 | aws s3 cp /tmp/efs-backup.log s3://${_s3bucket}/efs-backup-logs/${_folder_label}-backup-fpsync-`date +%Y%m%d-%H%M`.log 36 | echo "-- $(date -u +%FT%T) -- upload backup fpsync logs to S3 status: $?" 37 | 38 | # uploading backup (rsync delete) log to s3 39 | echo "-- $(date -u +%FT%T) -- uploading backup (rsync delete) logs" 40 | aws s3 cp /tmp/efs-backup-rsync.log s3://${_s3bucket}/efs-backup-logs/${_folder_label}-backup-rsync-delete-`date +%Y%m%d-%H%M`.log 41 | echo "-- $(date -u +%FT%T) -- upload rsync delete logs to S3 status: $?" 42 | 43 | # 44 | # kill fpsync process with SIGINT, wait until background processes complete 45 | # parameters : [_fpsync_pid] 46 | # 47 | _fpsync_pid=$(head -1 /tmp/efs-fpsync.log | awk '{print $4}' | awk -F '-' '{print $2}') 48 | echo "-- $(date -u +%FT%T) -- fpsync foreground process-id: $_fpsync_pid" 49 | 50 | sudo kill -SIGTERM $_fpsync_pid 51 | echo "-- $(date -u +%FT%T) -- kill with SIGTERM, status: $?" 52 | 53 | if sudo test -e /tmp/efs-fpsync.log; then 54 | echo "-- $(date -u +%FT%T) -- /tmp/efs-fpsync.log exists" 55 | echo "-- $(date -u +%FT%T) -- uploading fpsync output logs" 56 | aws s3 cp /tmp/efs-fpsync.log s3://${_s3bucket}/efs-backup-logs/${_folder_label}-fpsync-output-`date +%Y%m%d-%H%M`.log 57 | echo "-- $(date -u +%FT%T) -- upload fpsync output logs to S3 status: $?" 58 | else 59 | echo "-- $(date -u +%FT%T) -- /tmp/efs-fpsync.log file does not exist" 60 | # this means cp -al did not complete or fpsync process did not initiate 61 | _finish_time=$(date -u +%FT%T) 62 | echo "-- $(date -u +%FT%T) -- workflow finish time: $_finish_time" 63 | echo "-- $(date -u +%FT%T) -- backup unsuccessful (id: ${_backup_id})" 64 | aws dynamodb update-item --table-name ${_ddb_table_name} --key '{"BackupId":{"S":"'${_backup_id}'"}}' --update-expression "SET BackupStatus = :q, BackupStopTime = :t, EC2Logs = :log" --expression-attribute-values '{":q": {"S":"fpsync failed"}, ":t": {"S":"'$_finish_time'"}, ":log": {"S":"'$_log_location'"}}' --region $_region 65 | # udpate lifecycle hook and exit ssm script 66 | echo "-- $(date -u +%FT%T) -- updating lifecycle hook" 67 | aws autoscaling complete-lifecycle-action --lifecycle-hook-name ${_lifecycle_hookname} --auto-scaling-group-name ${_autoscaling_grp_name} --lifecycle-action-result $_hook_result --instance-id $_instance_id --region $_region 68 | echo "-- $(date -u +%FT%T) -- lifecycle hook update status: $?" 69 | exit $? 70 | fi 71 | 72 | # 73 | # updating dynamo db with backup meta-data 74 | # parameters : [_nofs, _nfst, _tfs, _ttfs] 75 | # 76 | _nofs=$(cat /tmp/efs-backup.log | grep 'Number of files' | awk '{nofs += $7} END {print nofs}') 77 | if [[ -z $_nofs ]] ; then 78 | _nofs=0 79 | fi 80 | echo "-- $(date -u +%FT%T) -- Number of files: $_nofs" 81 | 82 | _nfst=$(cat /tmp/efs-backup.log | grep 'Number of files transferred' | awk '{nfst += $8} END {print nfst}') 83 | if [[ -z $_nfst ]] ; then 84 | _nfst=0 85 | fi 86 | echo "-- $(date -u +%FT%T) -- Number of files transferred: $_nfst" 87 | 88 | _tfs=$(cat /tmp/efs-backup.log | grep 'Total file size' | awk '{tfs += $7} END {print tfs}') 89 | if [[ -z $_tfs ]] ; then 90 | _tfs=0 91 | fi 92 | echo "-- $(date -u +%FT%T) -- Total file size: $_tfs" 93 | 94 | _ttfs=$(cat /tmp/efs-backup.log | grep 'Total transferred file size' | awk '{ttfs += $8} END {print ttfs}') 95 | if [[ -z $_ttfs ]] ; then 96 | _ttfs=0 97 | fi 98 | echo "-- $(date -u +%FT%T) -- Total transferred file size: $_ttfs" 99 | 100 | # timestamps for (rm -rf) and (hardlink) file operations 101 | _rm_start=$(cat /var/log/cloud-init-output.log | grep 'remove_snapshot_start' | cut -d: -f2-) 102 | _rm_stop=$(cat /var/log/cloud-init-output.log | grep 'remove_snapshot_stop' | cut -d: -f2-) 103 | _hl_start=$(cat /var/log/cloud-init-output.log | grep 'create_snapshot_start' | cut -d: -f2-) 104 | _hl_stop=$(cat /var/log/cloud-init-output.log | grep 'create_snapshot_stop' | cut -d: -f2-) 105 | 106 | # 107 | # getting burst credit balance from Source EFS 108 | # parameters : [_source_efs] 109 | # 110 | _mtime1=$(date --date '30 minutes ago' +%FT%T) 111 | _mtime2=$(date -u +%FT%T) 112 | _src_efs_credit_balance=$(aws cloudwatch get-metric-statistics --namespace AWS/EFS --region $_region --metric-name BurstCreditBalance --period 300 --statistics Average --dimensions Name=FileSystemId,Value=${_source_efs} --start-time $_mtime1 --end-time $_mtime2 --query Datapoints[0].['Average'] --output text) 113 | echo "-- $(date -u +%FT%T) -- source efs BurstCreditBalance after backup: $_src_efs_credit_balance" 114 | if [[ -z $_src_efs_credit_balance ]] ; then 115 | _src_efs_credit_balance=0 116 | fi 117 | 118 | # getting fpsync and rsync status 119 | fpsyncStatus=$(cat /var/log/cloud-init-output.log | grep 'fpsyncStatus' | cut -d: -f2-) 120 | echo "fpsyncStatus: $fpsyncStatus" 121 | rsyncDeleteStatus=$(cat /var/log/cloud-init-output.log | grep 'rsyncDeleteStatus' | cut -d: -f2-) 122 | echo "rsync delete status: $rsyncDeleteStatus" 123 | 124 | _finish_time=$(date -u +%FT%T) 125 | echo "-- $(date -u +%FT%T) -- backup finish time: $_finish_time" 126 | 127 | # update Dynamo DB Table with backup status 128 | if [ "$fpsyncStatus" == "0" ] && [ "$rsyncDeleteStatus" == "0" ] ; then 129 | echo "-- $(date -u +%FT%T) -- backup completed successfully (id: ${_backup_id})" 130 | _rsync_delete_start=$(cat /var/log/cloud-init-output.log | grep 'rsync_delete_start' | cut -d: -f2-) 131 | _rsync_delete_stop=$(cat /var/log/cloud-init-output.log | grep 'rsync_delete_stop' | cut -d: -f2-) 132 | aws dynamodb update-item --table-name ${_ddb_table_name} --key '{"BackupId":{"S":"'${_backup_id}'"}}' --update-expression "SET BackupStatus = :q, NumberOfFiles = :n1, NumberOfFilesTransferred = :n2, TotalFileSize = :f1, TotalTransferredFileSize = :f2, BackupStopTime = :t, RemoveSnapshotStartTime = :rm1, RemoveSnapshotStopTime = :rm2, CreateHardlinksStartTime = :hl1, CreateHardlinksStopTime = :hl2, RsyncDeleteStartTime = :rd1, RsyncDeleteStopTime = :rd2, SourceBurstCreditBalancePostBackup = :cb1, EC2Logs = :log" --expression-attribute-values '{":q": {"S":"Success"}, ":n1": {"N":"'$_nofs'"}, ":n2": {"N":"'$_nfst'"}, ":f1": {"N":"'$_tfs'"}, ":f2": {"N":"'$_ttfs'"}, ":t": {"S":"'$_finish_time'"}, ":rm1": {"S":"'$_rm_start'"}, ":rm2": {"S":"'$_rm_stop'"}, ":hl1": {"S":"'$_hl_start'"}, ":hl2": {"S":"'$_hl_stop'"}, ":rd1": {"S":"'$_rsync_delete_start'"}, ":rd2": {"S":"'$_rsync_delete_stop'"}, ":cb1": {"N":"'$_src_efs_credit_balance'"}, ":log": {"S":"'$_log_location'"}}' --region $_region 133 | echo "-- $(date -u +%FT%T) -- dynamo db update status: $?" 134 | elif [ "$fpsyncStatus" == "0" ] && [ "$rsyncDeleteStatus" != "0" ] ; then 135 | echo "-- $(date -u +%FT%T) -- rsync delete incomplete (id: ${_backup_id})" 136 | aws dynamodb update-item --table-name ${_ddb_table_name} --key '{"BackupId":{"S":"'${_backup_id}'"}}' --update-expression "SET BackupStatus = :q, NumberOfFiles = :n1, NumberOfFilesTransferred = :n2, TotalFileSize = :f1, TotalTransferredFileSize = :f2, BackupStopTime = :t, RemoveSnapshotStartTime = :rm1, RemoveSnapshotStopTime = :rm2, CreateHardlinksStartTime = :hl1, CreateHardlinksStopTime = :hl2, SourceBurstCreditBalancePostBackup = :cb1, EC2Logs = :log" --expression-attribute-values '{":q": {"S":"Rsync Delete Incomplete"}, ":n1": {"N":"'$_nofs'"}, ":n2": {"N":"'$_nfst'"}, ":f1": {"N":"'$_tfs'"}, ":f2": {"N":"'$_ttfs'"}, ":t": {"S":"'$_finish_time'"}, ":rm1": {"S":"'$_rm_start'"}, ":rm2": {"S":"'$_rm_stop'"}, ":hl1": {"S":"'$_hl_start'"}, ":hl2": {"S":"'$_hl_stop'"}, ":cb1": {"N":"'$_src_efs_credit_balance'"}, ":log": {"S":"'$_log_location'"}}' --region $_region 137 | echo "-- $(date -u +%FT%T) -- dynamo db update status: $?" 138 | else 139 | echo "-- $(date -u +%FT%T) -- backup incomplete (id: ${_backup_id})" 140 | aws dynamodb update-item --table-name ${_ddb_table_name} --key '{"BackupId":{"S":"'${_backup_id}'"}}' --update-expression "SET BackupStatus = :q, NumberOfFiles = :n1, NumberOfFilesTransferred = :n2, TotalFileSize = :f1, TotalTransferredFileSize = :f2, BackupStopTime = :t, RemoveSnapshotStartTime = :rm1, RemoveSnapshotStopTime = :rm2, CreateHardlinksStartTime = :hl1, CreateHardlinksStopTime = :hl2, SourceBurstCreditBalancePostBackup = :cb1, EC2Logs = :log" --expression-attribute-values '{":q": {"S":"Incomplete"}, ":n1": {"N":"'$_nofs'"}, ":n2": {"N":"'$_nfst'"}, ":f1": {"N":"'$_tfs'"}, ":f2": {"N":"'$_ttfs'"}, ":t": {"S":"'$_finish_time'"}, ":rm1": {"S":"'$_rm_start'"}, ":rm2": {"S":"'$_rm_stop'"}, ":hl1": {"S":"'$_hl_start'"}, ":hl2": {"S":"'$_hl_stop'"}, ":cb1": {"N":"'$_src_efs_credit_balance'"}, ":log": {"S":"'$_log_location'"}}' --region $_region 141 | echo "-- $(date -u +%FT%T) -- dynamo db update status: $?" 142 | fi 143 | 144 | fi 145 | 146 | # update Dynamo DB Table with backup status 147 | if [ "$_err_61" == "efs_mount_ip_not_found" ] || [ "$_err_62" == "efs_not_mounted" ] ; then 148 | _finish_time=$(date -u +%FT%T) 149 | echo "-- $(date -u +%FT%T) -- workflow finish time: $_finish_time" 150 | echo "-- $(date -u +%FT%T) -- backup unsuccessful (id: ${_backup_id})" 151 | aws dynamodb update-item --table-name ${_ddb_table_name} --key '{"BackupId":{"S":"'${_backup_id}'"}}' --update-expression "SET BackupStatus = :q, BackupStopTime = :t, EC2Logs = :log" --expression-attribute-values '{":q": {"S":"Unsuccessful"}, ":t": {"S":"'$_finish_time'"}, ":log": {"S":"'$_log_location'"}}' --region $_region 152 | fi 153 | 154 | # 155 | # update lifecycle hook with completion 156 | # parameters : [_lifecycle_hookname, _autoscaling_grp_name, _hook_result, _instance_id, _region] 157 | # 158 | echo "-- $(date -u +%FT%T) -- updating lifecycle hook" 159 | aws autoscaling complete-lifecycle-action --lifecycle-hook-name ${_lifecycle_hookname} --auto-scaling-group-name ${_autoscaling_grp_name} --lifecycle-action-result $_hook_result --instance-id $_instance_id --region $_region 160 | echo "-- $(date -u +%FT%T) -- lifecycle hook update status: $?" 161 | -------------------------------------------------------------------------------- /source/orchestrator.py: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | # Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # 3 | # # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). # 5 | # You may not use this file except in compliance with the License. # 6 | # A copy of the License is located at # 7 | # # 8 | # http://www.apache.org/licenses/LICENSE-2.0 # 9 | # # 10 | # or in the "license" file accompanying this file. This file is distributed # 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # 12 | # express or implied. See the License for the specific language governing # 13 | # permissions and limitations under the License. # 14 | ################################################################################### 15 | 16 | #!/bin/python 17 | 18 | from lib.dynamodb import DDB 19 | from lib.efs import EFS 20 | from lib.notify import Notify 21 | from lib.cloudwatch import CloudWatchMetric 22 | from lib.events import CloudWatchEvent 23 | from lib.logger import Logger 24 | from lib.ssm import SimpleSystemsManager 25 | from lib.asg import AutoScaling 26 | from uuid import uuid4 27 | from datetime import datetime, timedelta 28 | import os 29 | 30 | log_level = 'info' 31 | logger = Logger(loglevel=log_level) 32 | 33 | 34 | # Instantiate dictionaries 35 | ddb_item = {} 36 | replace_dict = {} 37 | notification_message = {} 38 | 39 | 40 | # Global Variables 41 | ddb_table_name = os.environ['table_name'] 42 | backup_id = str(uuid4())[:8] 43 | sns_topic_arn = os.environ['topic_arn'] 44 | source_efs_id = os.environ['source_efs'] 45 | destination_efs_id = os.environ['destination_efs'] 46 | s3_bucket = os.environ['s3_bucket'] 47 | backup_window_period = os.environ['backup_window_period'] 48 | folder_label = os.environ['folder_label'] 49 | retain_period = os.environ['backup_retention_copies'] 50 | lambda_function_name = os.environ['AWS_LAMBDA_FUNCTION_NAME'] 51 | region = os.environ['AWS_REGION'] 52 | customer_uuid = os.environ['uuid'] 53 | backup_prefix = os.environ['backup_prefix'] 54 | instance_type = os.environ['instance_type'] 55 | item_time_to_live_days = 90 56 | send_data = os.environ['send_anonymous_data'] 57 | notify_customer = os.environ['notification_on_success'] 58 | interval_tag = os.environ['interval_tag'] 59 | destination_efs_mode = os.environ['efs_mode'] 60 | backup_asg = os.environ['autoscaling_group_name'] 61 | 62 | solution_id = 'SO0031' 63 | metrics_url = 'https://metrics.awssolutionsbuilder.com/generic' 64 | 65 | 66 | # Time conversion to epoch for DDB TTL Attribute 67 | def set_item_time_to_live(): 68 | expire_time = datetime.now() + timedelta(days=item_time_to_live_days) # Default is 30 days 69 | ttl = expire_time.strftime('%s') 70 | return ttl 71 | 72 | 73 | # Custom event constant that invokes lambda function to stop the backup 74 | def terminate_event(): 75 | terminate_instance_event = { 76 | 'mode': 'backup', 77 | 'action': 'stop', 78 | 'backup_id': backup_id, 79 | 'time_stamp': datetime.now() 80 | } 81 | return terminate_instance_event 82 | 83 | 84 | # Condition to check if STOP event was triggered during CloudWatch Event creation 85 | def validate_stop_event(e): 86 | now = datetime.now() 87 | time_stamp = e.get('time_stamp') 88 | fixed_time = datetime.strptime(time_stamp, '%Y-%m-%dT%H:%M:%S.%f') + timedelta(minutes=10) 89 | if now > fixed_time: 90 | return True 91 | else: 92 | return False 93 | 94 | # Main Lambda function to catch different CW Rules 95 | def lambda_handler(event, context): 96 | logger.debug('Lambda Event') 97 | logger.debug(event) 98 | 99 | # Event triggered when AutoScaling updated from 1 to 0 desired capacity 100 | if event.get('detail', {}).get('LifecycleTransition') == 'autoscaling:EC2_INSTANCE_TERMINATING': 101 | logger.info("ASG Event: Instance Terminating") 102 | # Instantiate Custom classes 103 | cwe = CloudWatchEvent(logger, customer_uuid[:8]) 104 | ssm = SimpleSystemsManager(logger) 105 | 106 | b_id = cwe.describe_target() # Retrieve unique backup id from cwe target 107 | hook_name = event['detail']['LifecycleHookName'] 108 | asg_name = event['detail']['AutoScalingGroupName'] 109 | instance_id = event['detail']['EC2InstanceId'] 110 | document_name = "AWS-RunShellScript" 111 | replace_dict.update({'${_s3bucket}': s3_bucket}) 112 | replace_dict.update({'${_interval}': interval_tag}) 113 | replace_dict.update({'${_ddb_table_name}': ddb_table_name}) 114 | replace_dict.update({'${_backup_id}': b_id}) 115 | replace_dict.update({'${_autoscaling_grp_name}': asg_name}) 116 | replace_dict.update({'${_lifecycle_hookname}': hook_name}) 117 | replace_dict.update({'${_folder_label}': folder_label}) 118 | replace_dict.update({'${_source_efs}': source_efs_id}) 119 | 120 | # Send message to SSM 121 | ssm.send_command(instance_id, document_name, replace_dict) 122 | 123 | # Event triggered when lifecycle hook on instance completes 124 | elif event.get('detail-type') == 'EC2 Instance Terminate Successful': 125 | logger.info("ASG Event: EC2 Instance Terminate Successful") 126 | # Instantiate Custom classes 127 | cwe = CloudWatchEvent(logger, customer_uuid[:8]) 128 | notify = Notify(logger) 129 | ddb = DDB(logger, ddb_table_name) 130 | 131 | b_id = cwe.describe_target() 132 | data = ddb.read_item('BackupId', b_id) 133 | 134 | # Create SNS notification message 135 | if data is not None: 136 | if data.get('BackupStatus') == 'Incomplete': 137 | data.update({'Message': 'The EFS backup was incomplete. Either backup window expired before full backup or fpsync process was not completed.'}) 138 | notify.customer(sns_topic_arn, data) 139 | elif data.get('BackupStatus') == 'Unsuccessful': 140 | data.update({'Message': 'The EFS backup was unsuccessful. ' 141 | 'The EC2 instance was unable to find the mount IP OR mount EFS'}) 142 | notify.customer(sns_topic_arn, data) 143 | elif data.get('BackupStatus') == "fpsync failed": 144 | data.update({'Message': 'fpsync process in backup script failed or did not start'}) 145 | notify.customer(sns_topic_arn, data) 146 | elif data.get('BackupStatus') == "Rsync Delete Incomplete": 147 | data.update({'Message': 'rsync --delete process could not complete'}) 148 | notify.customer(sns_topic_arn, data) 149 | elif data.get('BackupStatus') is None: 150 | data.update({'Message': 'The SSM script could not update the DynamoDB table with backup status, ' 151 | 'please check the logs in the S3 bucket for the details.'}) 152 | data.update({'BackupStatus': 'Unknown'}) 153 | notify.customer(sns_topic_arn, data) 154 | else: 155 | if notify_customer.lower() == 'yes': 156 | data.update({'Message': 'The EFS was backed up successfully'}) 157 | notify.customer(sns_topic_arn, data) 158 | else: 159 | notification_message.update({'Message': 'Could not find the backup id: {} in the DDB table'.format(b_id)}) 160 | notify.customer(sns_topic_arn, notification_message) 161 | 162 | # Send anonymous notification 163 | if send_data.lower() == 'yes': 164 | customer_data = ['SourceEfsId', 'DestinationEfsId', 'BackupPrefix', 'ExpireItem', 'Message'] 165 | for key in customer_data: 166 | data.pop(key, None) 167 | data.update({'Region': region}) 168 | notify.metrics(solution_id, customer_uuid, data, metrics_url) 169 | 170 | # Delete CWE 171 | cwe.remove_target() 172 | cwe.delete_event() 173 | cwe.remove_permission(lambda_function_name) 174 | 175 | else: 176 | # Event to Start Backup 177 | if event.get('mode') == 'backup' and event.get('action') == 'start': 178 | logger.info("Starting Backup") 179 | # Instantiate Custom classes 180 | ddb = DDB(logger, ddb_table_name) 181 | cw = CloudWatchMetric(logger) 182 | efs = EFS(logger) 183 | cwe = CloudWatchEvent(logger, customer_uuid[:8]) 184 | asg = AutoScaling(logger, backup_asg) 185 | 186 | # Change the ASG desired capacity 187 | asg.update_asg('start_instance') 188 | 189 | # Creating DDB Item (dict) 190 | efs_cw_metrics = cw.efs_cw_metrics(source_efs_id, 'Source') 191 | ddb_item.update({'BackupId': backup_id}) 192 | ddb_item.update({'BackupWindow': backup_window_period}) 193 | ddb_item.update({'IntervalTag': interval_tag}) 194 | ddb_item.update({'RetainPeriod': retain_period}) 195 | ddb_item.update({'ExpireItem': set_item_time_to_live()}) 196 | ddb_item.update({'S3BucketSize': cw.s3_cw_metrics(s3_bucket)}) 197 | ddb_item.update({'SourceEfsSize': efs.size(source_efs_id)}) 198 | ddb_item.update({'SourceEfsId': source_efs_id}) 199 | ddb_item.update({'DestinationEfsId': destination_efs_id}) 200 | ddb_item.update({'BackupPrefix': backup_prefix}) 201 | ddb_item.update({'DestinationEfsSize': efs.size(destination_efs_id)}) 202 | ddb_item.update({'SourcePerformanceMode': efs.performance_mode(source_efs_id)}) 203 | ddb_item.update({'InstanceType': instance_type}) 204 | ddb_item.update({'DestinationPerformanceMode': destination_efs_mode}) 205 | ddb_item.update({'BackupStartTime': (datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S"))}) 206 | item = dict(ddb_item, **efs_cw_metrics) # Merging two dictionaries 207 | 208 | # Put DDB item 209 | ddb.write_item(item) 210 | 211 | # Create CWE to update desired capacity in ASG 212 | event_rule_arn = cwe.create_event(backup_window_period) 213 | cwe.add_target(lambda_function_name, terminate_event()) 214 | cwe.add_permission(lambda_function_name, event_rule_arn) 215 | 216 | # Event to Stop Backup 217 | elif event.get('mode') == 'backup' and event.get('action') == 'stop' and \ 218 | validate_stop_event(event): 219 | logger.info("Stopping Backup") 220 | # Instantiate Custom classes 221 | asg = AutoScaling(logger, backup_asg) 222 | 223 | # Change the ASG desired capacity 224 | asg.update_asg('stop_instance') 225 | 226 | else: 227 | if not validate_stop_event(event): 228 | # If stop event triggered lambda during CloudWatch event creation, it should be ignored 229 | logger.info('Ignoring STOP backup event occurring within 10 minutes of the START backup event.') 230 | else: 231 | logger.error('Invalid Event. No action taken.') 232 | -------------------------------------------------------------------------------- /deployment/efs-to-efs-restore.template: -------------------------------------------------------------------------------- 1 | # EFS Backup Solution 2 | # 3 | # template for efs-backup-solution 'Restore' 4 | # **DO NOT DELETE** 5 | # 6 | # author: aws-solutions-builder@ 7 | 8 | AWSTemplateFormatVersion: '2010-09-09' 9 | 10 | Description: (SO0031R) - EFS-to-EFS Backup Solution template. Version %%VERSION%% 11 | 12 | Parameters: 13 | # Source EFS on which restore will be performed 14 | SrcEFS: 15 | Description: Source EFS Id 16 | Type: String 17 | AllowedPattern: .+ 18 | ConstraintDescription: cannot be blank 19 | 20 | # Backup EFS from where restore will be performed 21 | DstEFS: 22 | Description: Backup EFS Id 23 | Type: String 24 | AllowedPattern: .+ 25 | ConstraintDescription: cannot be blank 26 | 27 | # Interval tag which you want to restore 28 | IntervalTag: 29 | Description: Interval label for backup which you want to restore 30 | Type: String 31 | Default: daily 32 | AllowedValues: 33 | - daily 34 | - weekly 35 | - monthly 36 | 37 | # Backup Number that you want to restore 38 | BackupNum: 39 | Description: Backup number you want to restore, 0 being most recent 40 | Type: Number 41 | Default: 0 42 | 43 | # Folder identifier for backup copy to be restored 44 | FolderLabel: 45 | Description: Folder on destination efs where backups reside 46 | Type: String 47 | Default: efs-backup 48 | 49 | # Source EFS Prefix where you want files to be restored 50 | RestorePrefix: 51 | Description: Source prefix for restore 52 | Type: String 53 | Default: '/' 54 | AllowedPattern: .+ 55 | ConstraintDescription: cannot be blank 56 | 57 | # Sub directory that you want to restore 58 | RestoreSubDir: 59 | Description: Sub directory for restore, eg. /dir_x/; must have trailing '/'. Leave default if you want entire backup to be restored. 60 | Type: String 61 | Default: '/' 62 | AllowedPattern: (.+)*/ 63 | ConstraintDescription: must have trailing '/' 64 | 65 | # VPC where the source/destination EFS resides 66 | VpcId: 67 | Description: VPC where the source/destination EFS mount targets reside 68 | Type: AWS::EC2::VPC::Id 69 | AllowedPattern: .+ 70 | ConstraintDescription: cannot be blank 71 | 72 | # List of SubnetIDs for EC2, must be same AZ as of EFS Mount Targets (Choose 2) 73 | Subnets: 74 | Description: List of SubnetIDs for EC2, must be same AZ as of EFS Mount Targets (Choose 2). Must specify subnets in different AZs. 75 | Type: List 76 | AllowedPattern: .+ 77 | ConstraintDescription: cannot be blank 78 | 79 | SecurityGroupId: 80 | Description: The ID of an existing EC2 SecurityGroup in your Virtual Private Cloud (VPC), which should provide access to your existing EFS 81 | Type: AWS::EC2::SecurityGroup::Id 82 | AllowedPattern: .+ 83 | ConstraintDescription: cannot be blank 84 | 85 | # Bucket where restore logs will be saved 86 | RestoreLogBucket: 87 | Description: Bucket to store restore logs (use the same bucket as Backup) 88 | Type: String 89 | AllowedPattern: .+ 90 | ConstraintDescription: cannot be blank 91 | 92 | # Email for restore notifications 93 | Email: 94 | Description: Email for restore notifications 95 | Type: String 96 | AllowedPattern: .+ 97 | ConstraintDescription: cannot be blank 98 | 99 | # CW Dashboard 100 | Dashboard: 101 | Description: Do you want dashboard for your metrics? 102 | Type: String 103 | AllowedValues: 104 | - "Yes" 105 | - "No" 106 | Default: "Yes" 107 | 108 | Metadata: 109 | AWS::CloudFormation::Interface: 110 | ParameterGroups: 111 | - Label: 112 | default: Restore Configuration 113 | Parameters: 114 | - SrcEFS 115 | - DstEFS 116 | - IntervalTag 117 | - BackupNum 118 | - FolderLabel 119 | - RestorePrefix 120 | - RestoreSubDir 121 | - RestoreLogBucket 122 | - Label: 123 | default: EC2 Configuration 124 | Parameters: 125 | - VpcId 126 | - Subnets 127 | - SecurityGroupId 128 | - Label: 129 | default: Notification & Dashboard 130 | Parameters: 131 | - Email 132 | - Dashboard 133 | ParameterLabels: 134 | IntervalTag: 135 | default: Interval Label 136 | DstEFS: 137 | default: Backup EFS 138 | Subnets: 139 | default: Subnet IDs 140 | SrcEFS: 141 | default: Source EFS 142 | BackupNum: 143 | default: Backup Number 144 | FolderLabel: 145 | default: Folder Label 146 | RestorePrefix: 147 | default: Restore Prefix 148 | RestoreSubDir: 149 | default: Restore Subdirectory 150 | VpcId: 151 | default: VPC ID 152 | SecurityGroupId: 153 | default: Security Group ID 154 | RestoreLogBucket: 155 | default: Restore Log Bucket 156 | 157 | Mappings: 158 | Map: 159 | send-data: {"SendAnonymousData": "Yes"} 160 | c5.xlarge: {"Arch":"HVM64"} 161 | us-east-1: {"InstanceSize":"c5.xlarge"} 162 | us-east-2: {"InstanceSize":"c5.xlarge"} 163 | us-west-1: {"InstanceSize":"c5.xlarge"} 164 | us-west-2: {"InstanceSize":"c5.xlarge"} 165 | ca-central-1: {"InstanceSize":"c5.xlarge"} 166 | eu-west-1: {"InstanceSize":"c5.xlarge"} 167 | eu-central-1: {"InstanceSize":"c5.xlarge"} 168 | eu-west-2: {"InstanceSize":"c5.xlarge"} 169 | ap-southeast-1: {"InstanceSize":"c5.xlarge"} 170 | ap-southeast-2: {"InstanceSize":"c5.xlarge"} 171 | ap-northeast-1: {"InstanceSize":"c5.xlarge"} 172 | ap-northeast-2: {"InstanceSize":"c5.xlarge"} 173 | ap-south-1: {"InstanceSize":"c5.xlarge"} 174 | sa-east-1: {"InstanceSize":"c5.xlarge"} 175 | SourceCode: 176 | General: 177 | S3Bucket: "%%BUCKET_NAME%%" 178 | KeyPrefix: "%%SOLUTION_NAME%%/%%VERSION%%" 179 | 180 | Conditions: 181 | DashboardOpt: !Equals [ !Ref Dashboard, "Yes" ] 182 | 183 | Resources: 184 | # 185 | # EFS resources 186 | # [EFSSecurityGroup, EFSIngressRule, RestoreInstanceLaunchConfig, RestoreAutoScalingGroup] 187 | # 188 | EFSSecurityGroup: 189 | Type: AWS::EC2::SecurityGroup 190 | Metadata: 191 | cfn_nag: 192 | rules_to_suppress: 193 | - id: F1000 194 | reason: "allowing all egress traffic" 195 | Properties: 196 | VpcId: !Sub ${VpcId} 197 | GroupDescription: !Sub SG for EFS backup solution ${AWS::StackName} 198 | 199 | EFSIngressRule: 200 | Type: AWS::EC2::SecurityGroupIngress 201 | Metadata: 202 | cfn_nag: 203 | rules_to_suppress: 204 | - id: W36 205 | reason: "adding description causes replace during CFN update, causing a stack creation error" 206 | - id: W42 207 | reason: Allowing ICMP within the same security group only 208 | Properties: 209 | FromPort: -1 210 | GroupId: !Sub ${EFSSecurityGroup} 211 | IpProtocol: -1 212 | SourceSecurityGroupId: !Sub ${EFSSecurityGroup} 213 | ToPort: -1 214 | 215 | RestoreInstanceLaunchConfig: 216 | Type: AWS::AutoScaling::LaunchConfiguration 217 | Properties: 218 | ImageId: !GetAtt AMIInfo.Id 219 | SecurityGroups: 220 | - !Sub ${EFSSecurityGroup} 221 | - !Ref SecurityGroupId 222 | InstanceType: !FindInMap [Map, !Ref "AWS::Region", "InstanceSize"] 223 | IamInstanceProfile: !Sub ${InstanceProfile} 224 | UserData: 225 | # download and run efs-restore script 226 | # 12/28/2018 - EFS-21432 - adding retries for downloads 227 | Fn::Base64: !Sub 228 | - | 229 | #!/bin/bash 230 | # V4488716 - 08/03/2018 - Support custom DHCP option 231 | # https://github.com/awslabs/efs-backup/issues/1 232 | cat < 111 | AllowedPattern: .+ 112 | ConstraintDescription: cannot be blank 113 | 114 | # Email for notifications 115 | Email: 116 | Description: Email for backup notifications 117 | Type: String 118 | AllowedPattern: .+ 119 | ConstraintDescription: cannot be blank 120 | 121 | # CW Dashboard 122 | Dashboard: 123 | Description: Do you want dashoard for your metrics? 124 | Type: String 125 | AllowedValues: 126 | - "Yes" 127 | - "No" 128 | Default: "Yes" 129 | 130 | # EFS Encryption 131 | EFSEncryption: 132 | Description: Do you want backup EFS to be encrypted? 133 | Type: String 134 | AllowedValues: 135 | - "Yes" 136 | - "No" 137 | Default: "Yes" 138 | 139 | Metadata: 140 | AWS::CloudFormation::Interface: 141 | ParameterGroups: 142 | - Label: 143 | default: Backup Configuration 144 | Parameters: 145 | - SrcEFS 146 | - IntervalTag 147 | - Retain 148 | - FolderLabel 149 | - BackupWindow 150 | - BackupSchedule 151 | - BackupPrefix 152 | - EFSMode 153 | - EFSEncryption 154 | - Label: 155 | default: EC2 Configuration 156 | Parameters: 157 | - VpcId 158 | - Subnets 159 | - Label: 160 | default: Notification & Dashboard 161 | Parameters: 162 | - SuccessNotification 163 | - Email 164 | - Dashboard 165 | ParameterLabels: 166 | IntervalTag: 167 | default: Interval Label 168 | Subnets: 169 | default: Subnet IDs 170 | SrcEFS: 171 | default: Source EFS 172 | FolderLabel: 173 | default: Folder Label 174 | BackupWindow: 175 | default: Backup Window 176 | BackupSchedule: 177 | default: Backup Schedule 178 | BackupPrefix: 179 | default: Backup Prefix 180 | EFSMode: 181 | default: EFS Mode 182 | SuccessNotification: 183 | default: Success Notification 184 | VpcId: 185 | default: VPC ID 186 | EFSEncryption: 187 | default: EFS Encryption 188 | 189 | Mappings: 190 | Map: 191 | encryption: {"Yes": "true", "No": "false"} 192 | send-data: {"SendAnonymousData": "Yes"} 193 | c5.xlarge: {"Arch":"HVM64"} 194 | us-east-1: {"InstanceSize":"c5.xlarge"} 195 | us-east-2: {"InstanceSize":"c5.xlarge"} 196 | us-west-1: {"InstanceSize":"c5.xlarge"} 197 | us-west-2: {"InstanceSize":"c5.xlarge"} 198 | ca-central-1: {"InstanceSize":"c5.xlarge"} 199 | eu-west-1: {"InstanceSize":"c5.xlarge"} 200 | eu-central-1: {"InstanceSize":"c5.xlarge"} 201 | eu-west-2: {"InstanceSize":"c5.xlarge"} 202 | ap-southeast-1: {"InstanceSize":"c5.xlarge"} 203 | ap-southeast-2: {"InstanceSize":"c5.xlarge"} 204 | ap-northeast-1: {"InstanceSize":"c5.xlarge"} 205 | ap-northeast-2: {"InstanceSize":"c5.xlarge"} 206 | ap-south-1: {"InstanceSize":"c5.xlarge"} 207 | sa-east-1: {"InstanceSize":"c5.xlarge"} 208 | SourceCode: 209 | General: 210 | S3Bucket: "%%BUCKET_NAME%%" 211 | KeyPrefix: "%%SOLUTION_NAME%%/%%VERSION%%" 212 | 213 | Conditions: 214 | DashboardOpt: !Equals [ !Ref Dashboard, "Yes" ] 215 | 216 | Resources: 217 | # 218 | # EFS resources 219 | # [EFSSecurityGroup, EFSIngressRule, DstEFS, MountTarget0, MountTarget1] 220 | # 221 | EFSSecurityGroup: 222 | Type: AWS::EC2::SecurityGroup 223 | Metadata: 224 | cfn_nag: 225 | rules_to_suppress: 226 | - id: F1000 227 | reason: "allowing all egress traffic" 228 | Properties: 229 | VpcId: !Sub ${VpcId} 230 | GroupDescription: !Sub SG for EFS backup solution ${AWS::StackName} 231 | 232 | EFSIngressRule: 233 | Type: AWS::EC2::SecurityGroupIngress 234 | Metadata: 235 | cfn_nag: 236 | rules_to_suppress: 237 | - id: W36 238 | reason: "adding description causes replace during CFN update, causing a stack creation error" 239 | - id: W42 240 | reason: Allowing ICMP within the same security group only 241 | Properties: 242 | FromPort: -1 243 | GroupId: !Sub ${EFSSecurityGroup} 244 | IpProtocol: -1 245 | SourceSecurityGroupId: !Sub ${EFSSecurityGroup} 246 | ToPort: -1 247 | 248 | DstEFS: 249 | Type: AWS::EFS::FileSystem 250 | DeletionPolicy: Retain 251 | Properties: 252 | FileSystemTags: 253 | - Key: Name 254 | Value: !Sub efs-backup-${AWS::StackName} 255 | PerformanceMode: !Sub ${EFSMode} 256 | Encrypted: !FindInMap [Map, encryption, !Ref EFSEncryption] 257 | 258 | MountTarget0: 259 | Type: AWS::EFS::MountTarget 260 | Properties: 261 | FileSystemId: !Sub ${DstEFS} 262 | SubnetId: !Select [ 0, !Ref Subnets ] 263 | SecurityGroups: 264 | - !Sub ${EFSSecurityGroup} 265 | 266 | MountTarget1: 267 | Type: AWS::EFS::MountTarget 268 | Properties: 269 | FileSystemId: !Sub ${DstEFS} 270 | SubnetId: !Select [ 1, !Ref Subnets ] 271 | SecurityGroups: 272 | - !Sub ${EFSSecurityGroup} 273 | 274 | # 275 | # EC2 resources 276 | # [BackupInstanceLaunchConfig, EFSAutoScalingGroup, LifecycleHook] 277 | # 278 | BackupInstanceLaunchConfig: 279 | Type: AWS::AutoScaling::LaunchConfiguration 280 | Properties: 281 | ImageId: !GetAtt AMIInfo.Id 282 | SecurityGroups: 283 | - !Sub ${EFSSecurityGroup} 284 | InstanceType: !FindInMap [Map, !Ref "AWS::Region", "InstanceSize"] 285 | IamInstanceProfile: !Sub ${InstanceProfile} 286 | UserData: 287 | # download and run efs-backup script 288 | # 12/28/2018 - EFS-21432 - adding retries for downloads 289 | Fn::Base64: !Sub 290 | - | 291 | #!/bin/bash 292 | # sudo yum install amazon-ssm-agent -y 293 | sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm 294 | sudo start amazon-ssm-agent 295 | 296 | # SIM:V4488716 - 08/03/2018 - Support custom DHCP option 297 | # https://github.com/awslabs/efs-backup/issues/1 298 | cat <