├── requirements.txt ├── setup.cfg ├── fixtures ├── sample_describe_cluster_response.json ├── sample_ec2_event.json ├── sample_ecs_event.json └── sample_ecs_task_event.json ├── sequence.puml ├── LICENSE ├── .gitignore ├── README.md ├── lambda_function.py └── test_lambda_function.py /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.4.4 2 | pytest==3.1.1 3 | pytest-cov==2.5.1 4 | pytest-mock==1.6.0 5 | pytest-socket==0.1.0 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git,.direnv/ 3 | max-line-length = 160 4 | 5 | [coverage:run] 6 | omit = 7 | .git/* 8 | .direnv/* 9 | test_*.py 10 | 11 | [coverage:report] 12 | fail_under = 100 13 | -------------------------------------------------------------------------------- /fixtures/sample_describe_cluster_response.json: -------------------------------------------------------------------------------- 1 | { 2 | "clusters": [ 3 | { 4 | "activeServicesCount": 2, 5 | "clusterArn": "arn:aws:ecs:us-east-1:123456789012:cluster/cluster1", 6 | "clusterName": "cluster1", 7 | "pendingTasksCount": 0, 8 | "registeredContainerInstancesCount": 3, 9 | "runningTasksCount": 9, 10 | "status": "ACTIVE" 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /fixtures/sample_ec2_event.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0", 3 | "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", 4 | "detail-type": "EC2 Instance State-change Notification", 5 | "source": "aws.ec2", 6 | "account": "111122223333", 7 | "time": "2015-12-22T18:43:48Z", 8 | "region": "us-east-1", 9 | "resources": [ 10 | "arn:aws:ec2:us-east-1:123456789012:instance/i-12345678" 11 | ], 12 | "detail": { 13 | "instance-id": "i-12345678", 14 | "state": "terminated" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /sequence.puml: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | participant ECS 4 | participant CloudWatch 5 | participant Lambda 6 | 7 | ECS -> CloudWatch: State Change 8 | activate CloudWatch 9 | CloudWatch -> Lambda: Event payload 10 | deactivate CloudWatch 11 | Lambda -> ECS: describe_services 12 | activate ECS 13 | alt service not in cluster 14 | ECS -> Lambda: 'failures': [MISSING] 15 | note right: exit 16 | else service exists in cluster 17 | ECS -> Lambda: desiredCount 18 | deactivate ECS 19 | activate Lambda 20 | Lambda -> ECS: describe_clusters 21 | activate ECS 22 | ECS -> Lambda: registered_instances 23 | deactivate ECS 24 | Lambda -> Lambda: inspect desiredCount 25 | note left: exit if equal 26 | Lambda -> ECS: set desired = registered 27 | activate ECS 28 | ECS -> Lambda: OK 29 | deactivate ECS 30 | deactivate Lambda 31 | end 32 | 33 | @enduml 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Mike Fiedler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fixtures/sample_ecs_event.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0", 3 | "id": "fa0433a4-37ca-4211-be67-ae189eef9447", 4 | "detail-type": "ECS Container Instance State Change", 5 | "source": "aws.ecs", 6 | "account": "123456789012", 7 | "time": "2016-09-15T22:19:18Z", 8 | "region": "us-east-1", 9 | "resources": [ 10 | "arn:aws:ecs:us-east-1:123456789012:container-instance/320a34dc-1864-40bd-85f6-1f0963bf1943" 11 | ], 12 | "detail": { 13 | "agentConnected": true, 14 | "clusterArn": "arn:aws:ecs:us-east-1:123456789012:cluster/cluster1", 15 | "containerInstanceArn": "arn:aws:ecs:us-east-1:123456789012:container-instance/320a34dc-1864-40bd-85f6-1f0963bf1943", 16 | "pendingTasksCount": 0, 17 | "registeredResources": [ 18 | { 19 | "name": "CPU", 20 | "type": "INTEGER", 21 | "integerValue": 1000 22 | }, 23 | { 24 | "name": "MEMORY", 25 | "type": "INTEGER", 26 | "integerValue": 2000 27 | } 28 | ], 29 | "remainingResources": [ 30 | { 31 | "name": "CPU", 32 | "type": "INTEGER", 33 | "integerValue": 1000 34 | }, 35 | { 36 | "name": "MEMORY", 37 | "type": "INTEGER", 38 | "integerValue": 2000 39 | } 40 | ], 41 | "runningTasksCount": 0, 42 | "status": "ACTIVE", 43 | "version": 1, 44 | "versionInfo": {}, 45 | "updatedAt": "2016-09-15T21:36:17.779Z" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | .venv/ 84 | venv/ 85 | ENV/ 86 | 87 | 88 | # .direnv 89 | .envrc 90 | .direnv/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | -------------------------------------------------------------------------------- /fixtures/sample_ecs_task_event.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0", 3 | "id": "9bcdac79-b31f-4d3d-9410-fbd727c29fab", 4 | "detail-type": "ECS Task State Change", 5 | "source": "aws.ecs", 6 | "account": "111122223333", 7 | "time": "2016-12-06T16:41:06Z", 8 | "region": "us-east-1", 9 | "resources": [ 10 | "arn:aws:ecs:us-east-1:111122223333:task/b99d40b3-5176-4f71-9a52-9dbd6f1cebef" 11 | ], 12 | "detail": { 13 | "clusterArn": "arn:aws:ecs:us-east-1:111122223333:cluster/default", 14 | "containerInstanceArn": "arn:aws:ecs:us-east-1:111122223333:container-instance/b54a2a04-046f-4331-9d74-3f6d7f6ca315", 15 | "containers": [ 16 | { 17 | "containerArn": "arn:aws:ecs:us-east-1:111122223333:container/3305bea1-bd16-4217-803d-3e0482170a17", 18 | "exitCode": 0, 19 | "lastStatus": "STOPPED", 20 | "name": "xray", 21 | "taskArn": "arn:aws:ecs:us-east-1:111122223333:task/b99d40b3-5176-4f71-9a52-9dbd6f1cebef" 22 | } 23 | ], 24 | "createdAt": "2016-12-06T16:41:05.702Z", 25 | "desiredStatus": "RUNNING", 26 | "lastStatus": "RUNNING", 27 | "overrides": { 28 | "containerOverrides": [ 29 | { 30 | "name": "xray" 31 | } 32 | ] 33 | }, 34 | "startedAt": "2016-12-06T16:41:06.8Z", 35 | "startedBy": "ecs-svc/9223370556150183303", 36 | "updatedAt": "2016-12-06T16:41:06.975Z", 37 | "taskArn": "arn:aws:ecs:us-east-1:111122223333:task/b99d40b3-5176-4f71-9a52-9dbd6f1cebef", 38 | "taskDefinitionArn": "arn:aws:ecs:us-east-1:111122223333:task-definition/xray:2", 39 | "version": 4 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ecs-host-service-scale 2 | 3 | A Lambda function to ensure an ECS Service is set to the correct Desired Count for a One-Task-Per-Host placement value for any cluster that runs the Service. 4 | 5 | ## Requirements 6 | 7 | - `boto3` (included in AWS Lambda functions, no need for packaged deployment) 8 | - `ECS_SERVICE_ARN` environment variable 9 | - IAM Role/Policy access setup 10 | 11 | ## Flow 12 | 13 | ![UML Sequence Flow](http://uml.mvnsearch.org/gist/00347bf8cfd22ac011f0e8f1bfa12359) 14 | 15 | ## Deployment 16 | 17 | The function is meant to be deployed as a non-VPC Lambda function. It will probably work in a VPC environment, given an Internet Gateway and proper permissions, but it simply does not need access to any in-VPC resources, only AWS API calls. 18 | 19 | - IAM Policy & IAM Role allowing: `ecs:Describe*` and `ecs:UpdateService` 20 | - CloudWatch Event Rule for: `{"source": ["aws.ecs"], "detail-type": ["ECS Container Instance State Change"]}` 21 | - Code from `lambda_function.py` 22 | - The ECS Service ARN environment variable 23 | 24 | ## Testing 25 | 26 | - Install all requirements via `pip install -r requirements.txt` 27 | - Execute tests vis `pytest` 28 | 29 | See [py.test docs](http://doc.pytest.org/) and [botocore Stubber](http://botocore.readthedocs.io/en/latest/reference/stubber.html) reference for more. 30 | 31 | ## Contributing 32 | 33 | 1. [Fork it](https://github.com/miketheman/ecs-host-service-scale/fork) 34 | 2. Create your feature branch (`git checkout -b my-new-feature`) 35 | 3. Test your changes with `pytest --cov` - the tests currently cover the code 100% - don't lower that number! 36 | 4. Commit your changes (`git commit -am 'Add some feature'`) 37 | 5. Push to the branch (`git push origin my-new-feature`) 38 | 6. Create a new Pull Request 39 | 40 | # Author 41 | 42 | [Mike Fiedler](https://github.com/miketheman) (miketheman@gmail.com) 43 | -------------------------------------------------------------------------------- /lambda_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | A Lambda Function to set the desired count of running tasks 4 | in a service based on a cluster's containter instances. 5 | Designed to be triggered by a CloudWatch Event rule. 6 | """ 7 | from __future__ import print_function 8 | 9 | import os 10 | 11 | import boto3 12 | 13 | 14 | def ecs_client(): 15 | return boto3.client("ecs") 16 | 17 | 18 | def adjust_service_desired_count(ecs_client, cluster, service): 19 | running_service = ecs_client.describe_services(cluster=cluster, services=[service]) 20 | 21 | if not running_service["services"]: 22 | print("SKIP: Service not found in cluster {}".format(cluster)) 23 | return 24 | 25 | desired_task_count = running_service["services"][0]["desiredCount"] 26 | 27 | clusters = ecs_client.describe_clusters(clusters=[cluster]) 28 | registered_instances = clusters["clusters"][0]["registeredContainerInstancesCount"] 29 | 30 | if desired_task_count != registered_instances: 31 | print("Adjusting cluster '{}' to run {} tasks of service '{}'".format( 32 | cluster, registered_instances, service 33 | )) 34 | response = ecs_client.update_service( 35 | cluster=cluster, 36 | service=service, 37 | desiredCount=registered_instances, 38 | ) 39 | 40 | print(response) 41 | return response 42 | 43 | # Do nothing otherwise 44 | print("SKIP: Cluster {} has {} desired tasks for {} registered instances.".format( 45 | cluster, desired_task_count, registered_instances 46 | )) 47 | return 48 | 49 | 50 | def lambda_handler(event, context): 51 | if not event: 52 | raise ValueError("No event provided.") 53 | 54 | if event["source"] != "aws.ecs": 55 | raise ValueError("Function only supports input from events with a source type of: aws.ecs") 56 | 57 | service = os.getenv('ECS_SERVICE_ARN') 58 | if not service: 59 | raise ValueError("Need to set `ECS_SERVICE_ARN` env var to serviceArn.") 60 | 61 | # Determine if this event is one that we care about 62 | if event["detail-type"] != "ECS Container Instance State Change": 63 | print("SKIP: Function operates only on ECS Container Instance State Change events.") 64 | return 65 | 66 | # Valid event, and one we are interested in 67 | cluster = event["detail"]["clusterArn"] 68 | adjust_service_desired_count(ecs_client(), cluster, service) 69 | print("DONE") 70 | -------------------------------------------------------------------------------- /test_lambda_function.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from botocore.stub import Stubber 4 | import pytest 5 | 6 | import lambda_function 7 | 8 | AGENT_SERVICE_ARN = 'arn:aws:ecs:us-east-1:123456789012:service/AgentService' 9 | CLUSTER_ARN = 'arn:aws:ecs:us-east-1:123456789012:cluster/cluster1' 10 | 11 | 12 | def load_json_from_file(json_path): 13 | with open(json_path) as f: 14 | return json.load(f) 15 | 16 | 17 | @pytest.fixture 18 | def ecs_event(): 19 | return load_json_from_file('fixtures/sample_ecs_event.json') 20 | 21 | 22 | @pytest.fixture 23 | def ecs_task_event(): 24 | return load_json_from_file('fixtures/sample_ecs_task_event.json') 25 | 26 | 27 | @pytest.fixture 28 | def ec2_event(): 29 | return load_json_from_file('fixtures/sample_ec2_event.json') 30 | 31 | 32 | @pytest.fixture 33 | def cluster_response(): 34 | return load_json_from_file('fixtures/sample_describe_cluster_response.json') 35 | 36 | 37 | def test_no_event_raises(): 38 | with pytest.raises(ValueError): 39 | lambda_function.lambda_handler(None, None) 40 | 41 | 42 | def test_event_non_ecs(ec2_event): 43 | with pytest.raises(ValueError): 44 | lambda_function.lambda_handler(ec2_event, None) 45 | 46 | 47 | def test_no_service_env_var(ecs_event): 48 | with pytest.raises(ValueError): 49 | lambda_function.lambda_handler(ecs_event, None) 50 | 51 | 52 | def test_event_no_match(ecs_task_event, mocker, monkeypatch): 53 | monkeypatch.setenv('ECS_SERVICE_ARN', AGENT_SERVICE_ARN) 54 | mocker.patch.object(lambda_function, 'adjust_service_desired_count') 55 | lambda_function.lambda_handler(ecs_task_event, None) 56 | assert lambda_function.adjust_service_desired_count.call_count == 0 57 | 58 | 59 | def test_event_matches(ecs_event, mocker, monkeypatch): 60 | monkeypatch.setenv('ECS_SERVICE_ARN', AGENT_SERVICE_ARN) 61 | mocker.patch.object(lambda_function, 'adjust_service_desired_count') 62 | lambda_function.lambda_handler(ecs_event, None) 63 | assert lambda_function.adjust_service_desired_count.call_count == 1 64 | 65 | 66 | def tests_skip_when_service_not_in_cluster(): 67 | ecs = lambda_function.ecs_client() 68 | stubber = Stubber(ecs) 69 | 70 | describe_services_response = { 71 | 'services': [], 72 | } 73 | expected_params = {'cluster': 'cluster1', 'services': [AGENT_SERVICE_ARN]} 74 | stubber.add_response('describe_services', describe_services_response, expected_params) 75 | 76 | with stubber: 77 | response = lambda_function.adjust_service_desired_count(ecs, 'cluster1', AGENT_SERVICE_ARN) 78 | assert response is None 79 | 80 | 81 | def test_adjusts_service_when_mismatch(cluster_response): 82 | ecs = lambda_function.ecs_client() 83 | stubber = Stubber(ecs) 84 | 85 | describe_services_response = { 86 | 'services': [ 87 | { 88 | 'serviceArn': AGENT_SERVICE_ARN, 89 | 'serviceName': 'AgentService', 90 | 'clusterArn': CLUSTER_ARN, 91 | 'desiredCount': 2, 92 | } 93 | ] 94 | } 95 | expected_params = {'cluster': 'cluster1', 'services': [AGENT_SERVICE_ARN]} 96 | stubber.add_response('describe_services', describe_services_response, expected_params) 97 | 98 | expected_params = {'clusters': ['cluster1']} 99 | stubber.add_response('describe_clusters', cluster_response, expected_params) 100 | 101 | update_service_response = { 102 | 'service': { 103 | 'serviceArn': AGENT_SERVICE_ARN, 104 | 'serviceName': 'AgentService', 105 | 'clusterArn': CLUSTER_ARN, 106 | 'desiredCount': 3, 107 | } 108 | } 109 | expected_params = {'cluster': 'cluster1', 'desiredCount': 3, 'service': AGENT_SERVICE_ARN} 110 | stubber.add_response('update_service', update_service_response, expected_params) 111 | 112 | with stubber: 113 | response = lambda_function.adjust_service_desired_count(ecs, 'cluster1', AGENT_SERVICE_ARN) 114 | assert response == update_service_response 115 | 116 | 117 | def test_adjusts_nothing_when_equal(cluster_response): 118 | ecs = lambda_function.ecs_client() 119 | stubber = Stubber(ecs) 120 | 121 | describe_services_response = { 122 | 'services': [ 123 | { 124 | 'serviceArn': AGENT_SERVICE_ARN, 125 | 'serviceName': 'AgentService', 126 | 'clusterArn': CLUSTER_ARN, 127 | 'desiredCount': 3, 128 | } 129 | ] 130 | } 131 | expected_params = {'cluster': 'cluster1', 'services': [AGENT_SERVICE_ARN]} 132 | stubber.add_response('describe_services', describe_services_response, expected_params) 133 | 134 | expected_params = {'clusters': ['cluster1']} 135 | stubber.add_response('describe_clusters', cluster_response, expected_params) 136 | 137 | with stubber: 138 | response = lambda_function.adjust_service_desired_count(ecs, 'cluster1', AGENT_SERVICE_ARN) 139 | assert response is None 140 | --------------------------------------------------------------------------------