├── images
    ├── jenkins-ecs-rollback.jpg
    ├── ecs-blue-green-deployment.gif
    ├── ecs-cluster-spot-instances.png
    ├── ecs-cluster-instance-draining.png
    ├── ecs-cluster-as-desired-capacity.png
    ├── ecs-cluster-cloudformation-exports.png
    └── ecs-cluster-cloudwatch-schedulable-containers.png
├── nginx-redirect
    ├── nginx-default.conf
    ├── Dockerfile
    └── start.sh
├── README.md
├── LICENSE
├── ECS-Batch-Jobs.template
├── ECS-Blue-Green.md
├── ECS-Service-Canary.template
├── ECS-Web-Cluster.md
├── ECS-Service.template
├── ECS-Batch-Cluster.template
└── ECS-Web-Cluster.template


/images/jenkins-ecs-rollback.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/jenkins-ecs-rollback.jpg


--------------------------------------------------------------------------------
/images/ecs-blue-green-deployment.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/ecs-blue-green-deployment.gif


--------------------------------------------------------------------------------
/images/ecs-cluster-spot-instances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/ecs-cluster-spot-instances.png


--------------------------------------------------------------------------------
/images/ecs-cluster-instance-draining.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/ecs-cluster-instance-draining.png


--------------------------------------------------------------------------------
/images/ecs-cluster-as-desired-capacity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/ecs-cluster-as-desired-capacity.png


--------------------------------------------------------------------------------
/images/ecs-cluster-cloudformation-exports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/ecs-cluster-cloudformation-exports.png


--------------------------------------------------------------------------------
/nginx-redirect/nginx-default.conf:
--------------------------------------------------------------------------------
1 | server {
2 |     listen       80;
3 |     server_name  localhost;
4 | 
5 |     return 301 __REDIRECT_URL__$request_uri;
6 | }
7 | 


--------------------------------------------------------------------------------
/images/ecs-cluster-cloudwatch-schedulable-containers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoveInc/ecs-cloudformation-templates/HEAD/images/ecs-cluster-cloudwatch-schedulable-containers.png


--------------------------------------------------------------------------------
/nginx-redirect/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:alpine
2 | EXPOSE 80
3 | ADD start.sh /usr/local/bin
4 | ADD nginx-default.conf /etc/nginx/conf.d/default.conf
5 | CMD ["/usr/local/bin/start.sh"]
6 | 


--------------------------------------------------------------------------------
/nginx-redirect/start.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | if [ "${REDIRECT_URL}" == "" ] ; then
 7 | 	echo "Error: REDIRECT_URL environment variable is not set."
 8 | 	exit 1
 9 | fi
10 | 
11 | sed -i "s|__REDIRECT_URL__|${REDIRECT_URL}|" /etc/nginx/conf.d/default.conf
12 | exec nginx -g 'daemon off;'
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repository contains some CloudFormation templates for Amazon's Elastic Container Service (ECS).
 2 | 
 3 | ## Templates for web workloads
 4 | 
 5 | - [ECS cluster template](ECS-Web-Cluster.template) and
 6 |   [accompanying article](ECS-Web-Cluster.md) that describes the features available in the template.
 7 | - [ECS service template](ECS-Service.template),
 8 |   [ECS service canary template](ECS-Service-Canary.template) and
 9 |   [accompanying article](ECS-Blue-Green.md).
10 | 
11 | ## Templates for batch workloads
12 | 
13 | - [ECS cluster template](ECS-Batch-Cluster.template)
14 | - [ECS batch jobs template](ECS-Batch-Jobs.template)
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | Copyright © 2018 Move, Inc.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the “Software”), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/ECS-Batch-Jobs.template:
--------------------------------------------------------------------------------
 1 | # This sets up one or more CloudWatch event rules that sends a message to a SQS queue with
 2 | # an ECS task to start. This will only work with ECS clusters that are deployed using the
 3 | # template ECS-Batch-Cluster.template. See the top of that template for a description that
 4 | # describes the overall architecture.
 5 | 
 6 | Description: {{ envName }} ECS scheduled tasks.
 7 | 
 8 | Resources:
 9 |   TaskDefinition:
10 |     Type: AWS::ECS::TaskDefinition
11 |     Properties:
12 |       TaskRoleArn: !Ref TaskRole
13 | {{{ taskDefinitionYaml }}}
14 | 
15 |   TaskRole:
16 |     Type: AWS::IAM::Role
17 |     Properties:
18 |       RoleName: !Sub ecs-${AWS::StackName}-task-role
19 |       Path: /
20 |       AssumeRolePolicyDocument:
21 |         Version: "2012-10-17"
22 |         Statement:
23 |           - Effect: Allow
24 |             Principal:
25 |               Service:
26 |                 - ecs-tasks.amazonaws.com
27 |             Action:
28 |               - sts:AssumeRole
29 |       ManagedPolicyArns:
30 |         - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceEventsRole
31 |       Policies:
32 |         - PolicyName: !Sub ecs-service-${AWS::StackName}
33 |           PolicyDocument:
34 | {{{ iamTaskPolicy }}}
35 | 
36 | {{#scheduledTaskRules}}
37 |   ScheduledTask{{name}}:
38 |     Type: AWS::Events::Rule
39 |     Properties: 
40 |       Description: "{{^description}}ecs scheduled task{{/description}}{{#description}}{{.}}{{/description}}"
41 |       Name: !Sub {{^name}}${AWS::StackName}-{{ random_hex }}{{/name }}{{#name}}{{.}}{{/name}}-rule
42 |       State: {{^state}}ENABLED{{/state}}{{#state}}{{.}}{{/state}}
43 |       ScheduleExpression: {{ scheduleExpression }}
44 |       Targets:
45 |         - Id: !Sub ${AWS::StackName}-target
46 |           Arn:
47 |             'Fn::ImportValue': "{{ ecsCluster }}-start-tasks-queue-arn"
48 |           Input: !Sub |
49 |             {
50 |               {{#containerOverrides}}"containerOverrides": {{#to_json}}{{{containerOverrides}}}{{/to_json}},{{/containerOverrides}}
51 |               "taskDefinition": "${TaskDefinition}",
52 |               "jobName": "{{name}}"
53 |             }
54 | 
55 | {{/scheduledTaskRules}}
56 | 
57 | Outputs:
58 |   TaskDefinition:
59 |     Description: ECS task definition ARN
60 |     Value: !Ref TaskDefinition
61 | 
62 |   ECSCluster:
63 |     Description: The name of the ECS cluster that the jobs run in.
64 |     Value: "{{ ecsCluster }}"
65 | 
66 |   StartTasksQueueArn:
67 |     Description: SQS queue ARN for submitting jobs to start.
68 |     Value:
69 |       'Fn::ImportValue': "{{ ecsCluster }}-start-tasks-queue-arn"
70 | 
71 |   StartTasksQueueUrl:
72 |     Description: SQS queue URL for submitting jobs to start.
73 |     Value:
74 |       'Fn::ImportValue': "{{ ecsCluster }}-start-tasks-queue-url"
75 | 


--------------------------------------------------------------------------------
/ECS-Blue-Green.md:
--------------------------------------------------------------------------------
  1 | # Blue/Green Deployments using AWS ECS, Including Canary and Rollback Support
  2 | 
  3 | This article describes how Move performs blue/green deployments using
  4 | [Amazon’s Elastic Container Service (ECS)](https://aws.amazon.com/ecs/) without changing the
  5 | application’s DNS entry. Canary containers are used to slowly introduce a new application
  6 | version in production. The blue/green deployments allow us to fully rollback a bad deployment
  7 | in under a minute.
  8 | 
  9 | This is a follow up to the article [A Better ECS](https://techblog.realtor.com/a-better-ecs/) that
 10 | goes into detail about how our ECS clusters are configured at Move.
 11 | 
 12 | ## Canary Deployments
 13 | 
 14 | The first part of our application deployment process in ECS is to introduce a single canary
 15 | container with the new application version and wire it into the existing ECS service. A separate
 16 | CloudFormation stack is created for this canary container and the container is registered with
 17 | the existing ALB target group so that a subset of the traffic will go to this canary container.
 18 | 
 19 | Unfortunately, the ALB at this time does not support sending a certain percentage of traffic to
 20 | different containers. In order to overcome this limitation, we introduce a new container which
 21 | will receive a percentage of traffic based on the total number of containers.  For instance, if
 22 | your application currently has 9 containers with the previous application version, and 1 canary
 23 | container with the new application version, then the canary will receive approximately 10% of
 24 | the traffic.
 25 | 
 26 | While the canary is running, we’d like to ensure that requests from clients which are accessing
 27 | the canary do not crossover to containers that are still running the prior application version.
 28 | In order to mitigate this, the ECS service in our CloudFormation template is setup with
 29 | stickiness turned on via the stickiness.enabled target group attribute on the
 30 | AWS::ElasticLoadBalancingV2::TargetGroup CloudFormation resource.
 31 | 
 32 | ## Blue / Green Deployments
 33 | 
 34 | After the canary container has been live for some period of time (typically 5 minutes for most
 35 | applications), our deployment pipeline then proceeds with a full blue/green deployment.
 36 | 
 37 | At a high level, a DNS entry points to a single Application Load Balancer (ALB) and the DNS entry
 38 | does not change between deployments. For each application, there will be two services in ECS
 39 | called myapp-blue and myapp-green. Host based routing is used on the ALB to control how the
 40 | traffic is routed. A new deployment goes out to the inactive service, automated smoke tests are
 41 | performed against that deployment, and if successful, the host routes in the ALB are updated so
 42 | that the traffic is routed to the newly deployed service. If a rollback needs to occur, then the
 43 | host routes at the ALB can be updated without making any changes to the ECS services. Here is a
 44 | detailed walkthrough of this process:
 45 | 
 46 | ### Initial State
 47 | 
 48 | ECS Service Name     | Version | Hostname                | ALB Priority | Remarks
 49 | ---------------------|---------|-------------------------|--------------|--------
 50 | myapp-blue           | v1      | myapp.move.com          |          100 | myapp.move.com currently points here and is serving all traffic to end users.
 51 | myapp-green          | None    |                         |          200 | ECS service does not exist yet.
 52 | 
 53 | ### Deploy v2 to green service
 54 | 
 55 | ECS Service Name     | Version | Hostname                | ALB Priority | Remarks
 56 | ---------------------|---------|-------------------------|--------------|--------
 57 | myapp-blue           | v1      | myapp.move.com          |          100 | Customer traffic is served by this service.
 58 | myapp-green          | **v2**  | myapp-inactive.move.com |          200 | **v2 is deployed here. Automated smoke tests are performed against this service using the hostname myapp-inactive.move.com and the deployment will stop if any tests fail.**
 59 | 
 60 | ### Update hostname of green service
 61 | 
 62 | ECS Service Name     | Version | Hostname                | ALB Priority | Remarks
 63 | ---------------------|---------|-------------------------|--------------|--------
 64 | myapp-blue           | v1      | myapp.move.com          |          100 | Customer traffic is served by this service.
 65 | myapp-green          | v2      | **myapp.move.com**      |          200 | **Both ECS services now have the same hostname. The blue side will continue to receive all customer traffic since it has a lower ALB priority.**
 66 | 
 67 | ### Mark blue ECS service as inactive
 68 | 
 69 | ECS Service Name     | Version | Hostname                    | ALB Priority | Remarks
 70 | ---------------------|---------|-----------------------------|--------------|--------
 71 | myapp-blue           | v1      | **myapp-inactive.move.com** |          100 | **Service is now inactive.**
 72 | myapp-green          | v2      | myapp.move.com              |          200 | **All customer traffic is now routed to this service.**
 73 | 
 74 | Our Jenkins pipeline will keep the old application version around for a few hours and will then
 75 | automatically remove the ECS service for the previous application version if a rollback is not
 76 | requested during that time period.
 77 | 
 78 | | ![](images/ecs-blue-green-deployment.gif?raw=1) |
 79 | |:--:|
 80 | | *The blue/green deployment process.* |
 81 | 
 82 | Our Jenkins pipeline will keep the old application version around for a few hours and will
 83 | automatically remove the ECS service and associated resources for the previous application version
 84 | if a rollback is not requested during that time period.
 85 | 
 86 | ## Rolling back a bad deployment
 87 | 
 88 | Rolling back a deployment simply requires swapping the hostnames for the two ECS services. After a
 89 | rollback occurs, the inactive service is still available with the inactive hostname and a developer
 90 | now has a low-stress environment in production to troubleshoot the error.
 91 | 
 92 | | ![](images/jenkins-ecs-rollback.jpg?raw=1) |
 93 | |:--:|
 94 | | *We have the ability to roll back to a previous application version wired into our Jenkins pipeline.* |
 95 | 
 96 | # CloudFormation templates
 97 | 
 98 | Our CloudFormation templates for ECS
 99 | [are available on GitHub](https://github.com/MoveInc/ecs-cloudformation-templates). The ECS service
100 | template is available in the file
101 | [ECS-Service.template](https://github.com/MoveInc/ecs-cloudformation-templates/ECS-Service.template)
102 | and the canary support is in the file
103 | [ECS-Service-Canary.template](https://github.com/MoveInc/ecs-cloudformation-templates/ECS-Service-Canary.template).
104 | 


--------------------------------------------------------------------------------
/ECS-Service-Canary.template:
--------------------------------------------------------------------------------
  1 | # CloudFormation template for ECS canary deployments. This template creates a new ECS service
  2 | # with a single task for the canary deployment. It then looks up the ECS service that is
  3 | # currently live (via the prevDeployStackName mustache variable), grabs the ALB target group
  4 | # ARN, and wires the new canary task into that target group. CloudFormation doesn't natively
  5 | # support wiring a task into the other target group so the Lambda function
  6 | # TargetGroupRegistrationFunction makes the relevant API calls.
  7 | #
  8 | # This template uses Mustache (or one of its derivities, like Pystache) to inject the following
  9 | # values:
 10 | #
 11 | # - ecsCluster - The name of the ECS cluster to deploy the service into.
 12 | #
 13 | # - envName  - The name of the ECS canary service. This is only used for the CloudFormation
 14 | #              template description.
 15 | #
 16 | # - iamRoleArn - Optional. An existing IAM role ARN that will be used as the ECS task role.
 17 | #                If you specify this, then do not specify iamTaskPolicy.
 18 | #
 19 | # - iamTaskPolicy - Optional. Create a new IAM role for the ECS task role. This is the policy
 20 | #                   document for the AWS::IAM::Role CloudFormation resource.
 21 | #
 22 | # - prevDeployStackName - The CloudFormation stack name for the existing ECS blue / green
 23 | #                         service that the canary will be wired into. This template assumes
 24 | #                         that the CloudFormation stack exports a variable named
 25 | #                         ${STACKNAME}-tg with the ARN of the target group. The provided
 26 | #                         ECS blue/green service template contains the correct export.
 27 | #
 28 | # - taskDefinitionYaml - The AWS::ECS::TaskDefinition CloudFormation configuration. This is
 29 | #                        embedded within Move's projects in the deployment/Dockerrun.aws.yml
 30 | #                        file.
 31 | #                        https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecs-taskdefinition.html
 32 | 
 33 | Description: {{ envName }} canary ECS service.
 34 | 
 35 | Resources:
 36 |   Service:
 37 |     Type: AWS::ECS::Service
 38 |     {{#iamTaskPolicy}}
 39 |     DependsOn:
 40 |       - TaskRole
 41 |     {{/iamTaskPolicy}}
 42 |     Properties:
 43 |       ServiceName: !Ref AWS::StackName
 44 |       Cluster: {{ ecsCluster }}
 45 |       DesiredCount: 1
 46 |       TaskDefinition: !Ref TaskDefinition
 47 | 
 48 |   TaskDefinition:
 49 |     Type: AWS::ECS::TaskDefinition
 50 |     Properties:
 51 |       {{#iamRoleArn}}
 52 |       TaskRoleArn: {{ iamRoleArn }}
 53 |       {{/iamRoleArn}}
 54 |       {{#iamTaskPolicy}}
 55 |       TaskRoleArn: !Ref TaskRole
 56 |       {{/iamTaskPolicy}}
 57 | {{{ taskDefinitionYaml }}}
 58 | 
 59 | {{#iamTaskPolicy}}
 60 |   TaskRole:
 61 |     Type: AWS::IAM::Role
 62 |     Properties:
 63 |       RoleName: !Sub svc-${AWS::StackName}-role
 64 |       Path: /
 65 |       AssumeRolePolicyDocument:
 66 |         Version: "2012-10-17"
 67 |         Statement:
 68 |           - Effect: Allow
 69 |             Principal:
 70 |               Service:
 71 |                 - ecs-tasks.amazonaws.com
 72 |             Action:
 73 |               - sts:AssumeRole
 74 |       Policies:
 75 |         - PolicyName: !Sub svc-${AWS::StackName}
 76 |           PolicyDocument:
 77 | {{{ iamTaskPolicy }}}
 78 | {{/iamTaskPolicy}}
 79 | 
 80 |   TargetGroupRegistrationFunction:
 81 |     Type: AWS::Lambda::Function
 82 |     Properties:
 83 |       Handler: index.lambda_handler
 84 |       Role: !GetAtt LambdaExecutionRole.Arn
 85 |       Runtime: python3.6
 86 |       Timeout: 60
 87 |       Code:
 88 |         ZipFile: |
 89 |           import time
 90 |           import boto3
 91 |           import cfnresponse
 92 | 
 93 |           ECS_BOTO = boto3.client('ecs')
 94 |           ELBV2_BOTO = boto3.client('elbv2')
 95 | 
 96 |           def lookup_canary_instance(cluster, canary_svc_arn):
 97 |               resp = ECS_BOTO.list_tasks(cluster=cluster, serviceName=canary_svc_arn)
 98 |               task_arn = resp['taskArns'][0]
 99 | 
100 |               resp = ECS_BOTO.describe_tasks(cluster=cluster, tasks=[task_arn])
101 |               container_instance_arn = resp['tasks'][0]['containerInstanceArn']
102 |               host_port = resp['tasks'][0]['containers'][0]['networkBindings'][0]['hostPort']
103 | 
104 |               resp = ECS_BOTO.describe_container_instances(cluster=cluster,
105 |                                                            containerInstances=[container_instance_arn])
106 |               ec2_instance_id = resp['containerInstances'][0]['ec2InstanceId']
107 | 
108 |               return (ec2_instance_id, host_port)
109 | 
110 |           def lambda_handler(event, context):
111 |               (ec2_instance_id, host_port) = \
112 |                 lookup_canary_instance(event['ResourceProperties']['EcsCluster'],
113 |                                        event['ResourceProperties']['CanaryServiceArn'])
114 | 
115 |               params = {'TargetGroupArn': event['ResourceProperties']['TargetGroupArn'],
116 |                         'Targets': [{'Id': ec2_instance_id, 'Port': host_port}]}
117 | 
118 |               if event['RequestType'] == 'Delete':
119 |                   print('Deregistering target %s' % (params))
120 |                   ELBV2_BOTO.deregister_targets(**params)
121 |                   time.sleep(5) # wait before deleting ECS service
122 |               elif event['RequestType'] == 'Create':
123 |                   print('Registering target %s' % (params))
124 |                   time.sleep(5) # wait for ECS service to come up before registration.
125 |                   ELBV2_BOTO.register_targets(**params)
126 |               else:
127 |                   print('Performing no action for stack update.')
128 | 
129 |               ret = {}
130 |               ret['EC2InstanceID'] = ec2_instance_id
131 |               ret['HostPort'] = host_port
132 | 
133 |               cfnresponse.send(event, context, cfnresponse.SUCCESS, ret)
134 | 
135 |   LambdaExecutionRole:
136 |     Type: AWS::IAM::Role
137 |     Properties:
138 |       RoleName: !Sub lambda-${AWS::StackName}-role
139 |       Path: /
140 |       AssumeRolePolicyDocument:
141 |         Version: "2012-10-17"
142 |         Statement:
143 |           - Effect: Allow
144 |             Principal:
145 |               Service:
146 |                 - lambda.amazonaws.com
147 |             Action:
148 |               - sts:AssumeRole
149 |       Policies:
150 |         - PolicyName: !Sub svc-${AWS::StackName}
151 |           PolicyDocument:
152 |             Version: 2012-10-17
153 |             Statement:
154 |               Action:
155 |                 - ecs:DescribeContainerInstances
156 |                 - ecs:DescribeTasks
157 |                 - ecs:ListTasks
158 |                 - elasticloadbalancing:RegisterTargets
159 |                 - elasticloadbalancing:DeregisterTargets
160 |                 - logs:CreateLogGroup
161 |                 - logs:CreateLogStream
162 |                 - logs:PutLogEvents
163 |               Effect: Allow
164 |               Resource: "*"
165 | 
166 |   TargetGroupRegistration:
167 |     Type: AWS::CloudFormation::CustomResource
168 |     DependsOn:
169 |       - Service
170 |       - TaskDefinition
171 |       {{#iamTaskPolicy}}
172 |       - TaskRole
173 |       {{/iamTaskPolicy}}
174 |     Properties:
175 |       ServiceToken: !GetAtt TargetGroupRegistrationFunction.Arn
176 |       EcsCluster: "{{ ecsCluster }}"
177 |       CanaryServiceArn: !Ref Service
178 |       TargetGroupArn:
179 |         'Fn::ImportValue': '{{ prevDeployStackName }}-tg'
180 | 
181 | Outputs:
182 |   EC2InstanceID:
183 |     Description: Canary EC2 instance ID
184 |     Value: !GetAtt TargetGroupRegistration.EC2InstanceID
185 | 
186 |   HostPort:
187 |     Description: Canary EC2 host port
188 |     Value: !GetAtt TargetGroupRegistration.HostPort
189 | 


--------------------------------------------------------------------------------
/ECS-Web-Cluster.md:
--------------------------------------------------------------------------------
  1 | # A better ECS
  2 | 
  3 | As more application services migrate to the AWS cloud, a pattern quickly emerges in which EC2
  4 | resources are considerably underutilized. While a wide array of EC2 instance types and autoscaling
  5 | options help to match the consumed infrastructure with current demand, many services still make
  6 | little use of the available memory, CPU, or bandwidth. In order to make better use of available
  7 | resources, AWS provides Elastic Container Service (ECS), which enables multiple services to run
  8 | on a single set of EC2 instances.
  9 | 
 10 | Developers moving onto ECS will most likely encounter difficulties getting the instance
 11 | autoscaling to operate as expected. This article describes how we were able to improve the
 12 | instance autoscaling, save money by running our Dev and QA EC2 instances on spot instances,
 13 | several other management improvements and best practices to manage the cluster.
 14 | 
 15 | 
 16 | ## Instance autoscaling that works
 17 | 
 18 | Anyone that has ran multiple applications inside a single ECS cluster has most likely
 19 | encountered this error:
 20 | 
 21 |     service XXX was unable to place a task because no container instance met all of its
 22 |     requirements
 23 | 
 24 | The desired instance count of the EC2 autoscaling group would be below the maximum instance
 25 | count but the ECS scheduler is not aware of this. ECS provides CloudWatch metrics about the
 26 | overall CPU and memory reservation inside the cluster, however ECS currently does not
 27 | provide metrics about the number of pending tasks. Setting the scale up and scale down policy
 28 | based on multiple CloudWatch metrics can be problematic since there can be conflicts if one
 29 | metric says to scale up but the other metric says to scale down.
 30 | 
 31 | The method described at http://garbe.io/blog/2017/04/12/a-better-solution-to-ecs-autoscaling/
 32 | provides a solution to this problem. That blog post provides a Lambda function that publishes
 33 | a custom CloudWatch metric called SchedulableContainers. The Lambda function needs to know
 34 | the largest CPU and memory reservation that can be requested inside your cluster so that it can
 35 | calculate how many of the largest containers can be started. The instance autoscaling is
 36 | configured to only use this metric. In essence, this means that the cluster will always have
 37 | available capacity for one additional instance of the largest task.
 38 | 
 39 | For large applications, the ECS instance and service autoscaling had to be tightly coupled in
 40 | the past. We were initially getting around some of the ECS autoscaling issues by running our
 41 | ECS clusters a little larger than they needed to be. We are now able to run some of our ECS
 42 | clusters at 80-90% reservation capacity with no issues.
 43 | 
 44 | The Lambda function is included inline in the CloudFormation template.
 45 | 
 46 | | ![](images/ecs-cluster-as-desired-capacity.png?raw=1) |
 47 | |:--:|
 48 | | *An ECS service was started with 500 idle containers. The number of EC2 instances in the cluster automatically scaled up from 2 to 8 to handle running that many containers. Once everything was stable, the ECS service was manually removed from the cluster and the number of instances automatically scaled back down to 2. The cluster can be configured to scale up more aggressively if needed.* |
 49 | 
 50 | | ![](images/ecs-cluster-cloudwatch-schedulable-containers.png?raw=1) |
 51 | |:--:|
 52 | | *The SchedulableContainers CloudWatch metric that corresponds to the instance autoscaling graph from above. Notice that the number of SchedulableContainers goes up to 53 once the ECS service with 500 containers was removed and that is what triggers the instance autoscaling to slowly remove instances.* |
 53 | 
 54 | 
 55 | ## Scaling down the cluster without affecting end users
 56 | 
 57 | When an ECS cluster scales down, your applications will likely see intermittent 50X errors
 58 | from the ALB when an instance is taken out of service. This is caused by AWS AutoScaling
 59 | not being aware of the ECS containers running on the instance that is terminated, so the
 60 | instance is shutting down while it is currently serving traffic. Ideally, the instance
 61 | should stop receiving traffic prior to shutting down.
 62 | 
 63 | AWS AutoScaling supports
 64 | [lifecycle hooks](https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html)
 65 | to notify a Lambda function when an instance is about to be terminated. AWS Support recommends
 66 | the Lambda function at
 67 | https://aws.amazon.com/blogs/compute/how-to-automate-container-instance-draining-in-amazon-ecs/
 68 | to gracefully drain the ECS tasks before the instance is terminated. The version provided
 69 | by AWS has several issues and a rewritten version is provided inline in the ECS cluster template
 70 | with the following changes:
 71 | 
 72 | - The AWS code can post messages to the wrong SNS topic when retrying. It looks for the first
 73 |   SNS topic in the account that has a lambda function subscribed to it and posts the retry message
 74 |   to that topic.
 75 | - The AWS code does not do any kind of pagination against the ECS API when reading the list of
 76 |   EC2 instances. So if it couldn't find the instance ID that was about to be terminated on the
 77 |   first page, then the instance was not set to DRAINING and the end users would see 50X
 78 |   messages when the operation timed out and autoscaling killed the instance.
 79 | - The retry logic did not put in any kind of delay in place when retrying. The Lambda function
 80 |   would be invoked about 5-10 times a second, and each Lambda function invocation would probably
 81 |   make close to a dozen AWS API calls. A 5 second delay between each retry was introduced.
 82 | - There was a large amount of unused code and variables in the in the AWS implementation.
 83 | - Converted the code from Python 2 to 3.
 84 | - Previously, the old Lambda function was included as a separate 8.1 MB ZIP file that needed
 85 |   to be stored at S3 and managed separately from the rest of your ECS cluster. Python code
 86 |   in AWS Lambda no longer needs to bundle all of its dependencies . With all of the refactoring
 87 |   above, the new Python code is small enough that it is embedded directly in the CloudFormation
 88 |   template to reduce external dependencies. This will make it easy to make changes to this code
 89 |   on a branch and test it against a single ECS cluster.
 90 | 
 91 | Other container schedulers, such as Kubernetes, will have the same issue and the same approach
 92 | can be used to drain pods.
 93 | 
 94 | | ![](images/ecs-cluster-instance-draining.png?raw=1) |
 95 | |:--:|
 96 | | *The cluster has an improved autodraining Lambda that integrates with AWS AutoScaling to drain containers during scale-down events which will avoid any unexpected 50X errors returned to the end users.* |
 97 | 
 98 | 
 99 | ## Spot instances in Dev and QA environments
100 | 
101 | EC2 supports spot instances that allow you to bid on excess computing capacity that is available at
102 | AWS. This typically saves between 70-90% off of the posted on-demand price. However, AWS can
103 | terminate the spot instances at any time with only a two-minute termination notice given.
104 | 
105 | To reduce our AWS costs, we run our Dev and QA environments on spot instances when the spot bid
106 | price is low. Since the bid price may be too high for several hours or more, we needed a way to fall
107 | back to using on-demand instances when the bid price is too high. The
108 | [Autospotting](https://github.com/cristim/autospotting) Lambda will automatically replace the
109 | expensive on-demand instances with spot instances of equal size or larger when the bid price is low.
110 | If one or more spot instances are terminated (such as due to a high bid price), then EC2
111 | AutoScaling will start new on-demand instance(s). These on-demand instances will eventually be
112 | replaced with spot instances once the bid price goes back down. Autospotting also tries to use a
113 | diverse set of instance types to avoid issues all of the spot instances suddenly going away.
114 | 
115 | A script listens on each EC2 instance for the two-minute
116 | [spot instance termination notification](https://aws.amazon.com/blogs/aws/new-ec2-spot-instance-termination-notices/)
117 | from the EC2 metadata service. When an instance is scheduled to be terminated, the container
118 | instance state is automatically set to DRAINING so that the existing containers can gracefully
119 | drain.
120 | 
121 | We have plans to run a small subset of our production webservers on spot instances with the
122 | help of Autospotting after more testing is completed.
123 | 
124 | | ![](images/ecs-cluster-spot-instances.png?raw=1) |
125 | |:--:|
126 | | *The instances inside this ECS cluster are currently running on spot instances inside our QA environment with the help of the Autospotting Lambda. The cluster is currently configured to use r4.xlarge on-demand instances and autospotting will give a diversified set of spot instance types of similar size to avoid any issues with all of the instances going away when the bid price suddenly increases. When an instance is terminated, a new on-demand instance is automatically started by AWS AutoScaling.* |
127 | 
128 | 
129 | ## cfn-init and forcing new EC2 instances
130 | 
131 | You can use [AWS::CloudFormation::Init](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-init.html)
132 | to manage resources on the underlying EC2 instances. However, sometimes there are situations where
133 | a file is changed, and services may need to be restarted. For instance, maybe a service is no
134 | longer needed. Now you need to test the create and update code paths, which adds more administrative
135 | overhead. In keeping with the "*cattle, not pets*" philosophy of infrastructure, we put a version
136 | number in the autoscaling launch configuration user data script, and increment that number to
137 | force new EC2 instances.
138 | 
139 |     ECSLaunchConfiguration:
140 |       Type: AWS::AutoScaling::LaunchConfiguration
141 |       Properties:
142 |         UserData:
143 |           "Fn::Base64": !Sub |
144 |             #!/bin/bash
145 |             # Increment version number below to force new instances in the cluster.
146 |             # Version: 1
147 | 
148 | With this change, we now only need to test the code path that creates new EC2 instances.
149 | 
150 | 
151 | ## Logging drivers
152 | 
153 | The ECS logging driver is configured so that the
154 | [Splunk](https://www.splunk.com/blog/2016/07/13/docker-amazon-ecs-splunk-how-they-now-all-seamlessly-work-together.html),
155 | [CloudWatch logs](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/using_cloudwatch_logs.html), and
156 | json-file log drivers are available to containers. It is up to each application's container
157 | definition(s) to configure the appropriate logging driver. For example, the Splunk logging
158 | driver can be configured on the
159 | [ECS task definition](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecs-taskdefinition.html)
160 | like so:
161 | 
162 |     TaskDefinition:
163 |       Type: AWS::ECS::TaskDefinition
164 |       Properties:
165 |         ContainerDefinitions:
166 |           - Name: my-app-container
167 |             LogConfiguration:
168 |               LogDriver: splunk
169 |               Options:
170 |                 splunk-token: my-apps-token
171 |                 splunk-url: https://splunk-url.local
172 |                 splunk-source: docker
173 |                 splunk-sourcetype: my-apps-env-name
174 |                 splunk-format: json
175 |                 splunk-verify-connection: false
176 | 
177 | 
178 | ## IAM roles
179 | 
180 | [Task-based IAM roles](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html)
181 | are implemented so that the cluster doesn't need to run with the permissions of all applications
182 | running inside it.
183 | 
184 | The ECS cluster itself needs some IAM roles configured for its proper operation and the provided
185 | CloudFormation template uses the AWS-managed IAM roles when available so that the clusters
186 | automatically get the required IAM permissions as new AWS features are made available in the
187 | future.
188 | 
189 |     ECSRole:
190 |       Type: AWS::IAM::Role
191 |       Properties:
192 |         ManagedPolicyArns:
193 |           - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role
194 | 
195 | 
196 | ## CloudFormation exports
197 | 
198 | After your ECS cluster is setup, you will need to know some duplicate information such as VPC
199 | IDs, load balancer information, etc when setting up your ECS services. We use
200 | [CloudFormation exports](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-stack-exports.html)
201 | so that the service can look up all of this information from the ECS cluster CloudFormation
202 | stack. When setting up a new ECS service via CloudFormation, we only need to know 1) the AWS
203 | region, 2) the CloudFormation stack name that has our ECS cluster, and 3) which shared load
204 | balancer to attach to (internet-facing or internal). The ECS service can lookup the VPC
205 | that the cluster is in with the CloudFormation snippet
206 | `'Fn::ImportValue': "cluster-stack-name-VPC"`. This reduces the number of parameters that
207 | our ECS services need to have.
208 | 
209 | | ![](images/ecs-cluster-cloudformation-exports.png?raw=1) |
210 | |:--:|
211 | | *The `Fn::ImportValue` function can be used from other CloudFormation stacks to import these values*. |
212 | 
213 | 
214 | ## Tagging compliance
215 | 
216 | All taggable AWS resources at Move must have the `owner`, `product`, `component`, and
217 | `environment` tags present. We use the equivalent of `aws cloudformation create-stack --tags ...`
218 | to provision our CloudFormation stacks so that all taggable AWS resources will get the proper
219 | tags. There are two exceptions in the ECS cluster template:
220 | 
221 | - The EC2 AutoScaling group will get the tags, however `PropagateAtLaunch: true` will not be set
222 |   so the EC2 instances that are started will not get the proper tags. These four tags are
223 |   explicitly configured on the AutoScaling group so that the EC2 instances are tagged properly.
224 | - The EBS volumes associated with the EC2 instances do not inherit the tags of the EC2 instance.
225 |   On startup, each EC2 instance takes care of adding the appropriate tags to its EBS volumes.
226 | 
227 | 
228 | ## Application Load Balancers (ALBs)
229 | 
230 | The ECS cluster template allows you to create an internet-facing and an internal load balancer
231 | to allow easily running multiple applications inside the same cluster. One or both of the load
232 | balancers can be disabled via CloudFormation parameters if desired. Be aware that the ALB
233 | [currently has a limit of 100 listener rules per load balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-limits.html).
234 | 
235 | A dedicated S3 bucket is created for the cluster to store the ALB access logs.
236 | 
237 | 
238 | ## Start a task on each ECS instance
239 | 
240 | ECS currently does not have the ability to start a task on each instance inside the cluster.
241 | To work around this, each EC2 instance has the ability to start a task that will run only
242 | on the current instance.
243 | 
244 | 
245 | ## CloudFormation Template
246 | 
247 | By following these best practices and techniques, ECS can significantly lower infrastructure
248 | costs and simplify scaling, deployment, and management concerns. A fully functional CloudFormation
249 | template which implements all of these best practices can be
250 | [downloaded here](ECS-Web-Cluster.template).
251 | 
252 | The next article in this series will describe how we are doing blue/green deployments with canary
253 | containers inside ECS.
254 | 


--------------------------------------------------------------------------------
/ECS-Service.template:
--------------------------------------------------------------------------------
  1 | # CloudFormation template for ECS blue / green deployments. This template uses Mustache (or one of
  2 | # its derivities, like Pystache) to inject the following values:
  3 | #
  4 | # - ecsCluster - The name of the ECS cluster to deploy the service into.
  5 | #
  6 | # - ecsDeployMaxPercent - Optional. The MaximumPercent field.
  7 | #                         https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ecs-service-deploymentconfiguration.html
  8 | #
  9 | # - ecsDeployMinHealthyPercent - Optional. The MinimumHealthyPercent field.
 10 | #                                https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ecs-service-deploymentconfiguration.html
 11 | #
 12 | # - ecsDistinctInstancePlacementConstraint - Optional. Whether or not to enable a distinctInstance
 13 | #                                            placement constraint.
 14 | #                                            https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ecs-service-placementconstraints-placementconstraint.html
 15 | #
 16 | # - ecsSpreadAzPlacementStrategy - Optional. Whether or not to spread containers across AZs
 17 | #                                  as a placement strategy.
 18 | #                                  https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ecs-service-placementstrategies-placementstrategy.html
 19 | #
 20 | # - ecsSpreadInstanceIdPlacementStrategy - Optional. Whether or not to spread containers across
 21 | #                                          instances as a placement strategy.
 22 | #                                          https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ecs-service-placementstrategies-placementstrategy.html
 23 | #
 24 | # - ecsServiceScaleDownAdjustment
 25 | # - ecsServiceScaleDownCompOp
 26 | # - ecsServiceScaleDownEvalPeriods
 27 | # - ecsServiceScaleDownMetric
 28 | # - ecsServiceScaleDownNamespace
 29 | # - ecsServiceScaleDownStatistic
 30 | # - ecsServiceScaleDownThreshold
 31 | # - ecsServiceScaleUpAdjustment
 32 | # - ecsServiceScaleUpCompOp
 33 | # - ecsServiceScaleUpEvalPeriods
 34 | # - ecsServiceScaleUpMetric
 35 | # - ecsServiceScaleUpNamespace
 36 | # - ecsServiceScaleUpStatistic
 37 | # - ecsServiceScaleUpThreshold - Optional. Autoscaling properties for the
 38 | #                                AWS::ApplicationAutoScaling::ScalableTarget and
 39 | #                                AWS::ApplicationAutoScaling::ScalingPolicy.
 40 | #
 41 | # - envName  - The name of the ECS canary service. This is only used for the CloudFormation
 42 | #              template description.
 43 | #
 44 | # - healthCheckHttpCode - The expected HTTP status code from the health check endpoint.
 45 | #                         https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-targetgroup.html
 46 | #
 47 | # - healthCheckUrl - The relative path of the health check URL.
 48 | #                         https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-targetgroup.html
 49 | #
 50 | # - httpListenerExport
 51 | # - httpsListenerExport - The ARN of the HTTP/HTTPS listeners on the ALB. This is typically the
 52 | #                         CloudFormation export ${ECS_CLUSTER}-${ALB_SCHEME}-http-listener,
 53 | #                         where ALB_SCHEME is either internal or internet_facing. See the
 54 | #                         ECS Cluster CloudFormation template in this repo for the export.
 55 | #                         https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticloadbalancingv2-listener.html
 56 | #
 57 | # - iamRoleArn - Optional. An existing IAM role ARN that will be used as the ECS task role.
 58 | #                If you specify this, then do not specify iamTaskPolicy.
 59 | #
 60 | # - iamTaskPolicy - Optional. Create a new IAM role for the ECS task role. This is the policy
 61 | #                   document for the AWS::IAM::Role CloudFormation resource.
 62 | #
 63 | # - mainContainerName
 64 | # - mainContainerPort - The container name and container port that will receive traffic from the ALB
 65 | #                       https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ecs-service-loadbalancers.html
 66 | #
 67 | # - minCapacity
 68 | # - maxCapacity - The min/max capacity for the ECS service application autoscaling group.
 69 | #                 https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-applicationautoscaling-scalabletarget.html
 70 | #
 71 | # - prevDesiredCount - The desired count for the ECS service. Typically, when we do a blue / green
 72 | #                      deployment, the deployment pipeline gets the desired count of
 73 | #
 74 | # - targetGroupDeregistrationDelay - The target group deregistration delay.
 75 | #                                    https://docs.aws.amazon.com/elasticloadbalancing/latest/APIReference/API_TargetGroupAttribute.html
 76 | #
 77 | # - targetGroupLbCookieDurationSecs
 78 | # - targetGroupStickinessEnabled - Whether or not session stickiness is enabled at the ALB and how
 79 | #                                  long the session tokens should live.
 80 | #                                  https://docs.aws.amazon.com/elasticloadbalancing/latest/APIReference/API_TargetGroupAttribute.html
 81 | #
 82 | # - taskDefinitionYaml - The AWS::ECS::TaskDefinition CloudFormation configuration. This is
 83 | #                        embedded within Move's projects in the deployment/Dockerrun.aws.yml
 84 | #                        file.
 85 | #                        https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecs-taskdefinition.html
 86 | 
 87 | 
 88 | Description: {{ envName }} ECS service.
 89 | 
 90 | Parameters:
 91 |   ServiceName:
 92 |     Type: String
 93 |     Description: ECS service name
 94 | 
 95 |   Hostname:
 96 |     Type: String
 97 |     Description: Hostname 1 for the ALB
 98 | 
 99 |   Hostname2:
100 |     Type: String
101 |     Description: Optional hostname 2 for the ALB.
102 |     Default: "unused.internal"
103 | 
104 |   Hostname3:
105 |     Type: String
106 |     Description: Optional hostname 3 for the ALB.
107 |     Default: "unused.internal"
108 | 
109 |   AlbPriority:
110 |     Type: Number
111 |     Description: ALB priority 1
112 | 
113 |   AlbPriority2:
114 |     Type: Number
115 |     Description: Optional ALB priority 2. Requires Hostname2 to not be empty
116 |     Default: -1
117 | 
118 |   AlbPriority3:
119 |     Type: Number
120 |     Description: Optional ALB priority 3. Requires Hostname3 to not be empty
121 |     Default: -1
122 | 
123 | Conditions:
124 |   HasHostname2:
125 |     Fn::Not:
126 |     - !Equals [ "unused.internal", !Ref Hostname2 ]
127 |   HasHostname3:
128 |     Fn::Not:
129 |     - !Equals [ "unused.internal", !Ref Hostname3 ]
130 | 
131 | Resources:
132 |   Service:
133 |     Type: AWS::ECS::Service
134 |     DependsOn:
135 |       - EcsServiceRole
136 |       - HttpListenerRule
137 |       - HttpsListenerRule
138 |       - TargetGroup
139 | {{#iamTaskPolicy}}
140 |       - TaskRole
141 | {{/iamTaskPolicy}}
142 |     Properties:
143 |       ServiceName: !Ref ServiceName
144 |       Cluster: {{ ecsCluster }}
145 |       Role: !Ref EcsServiceRole
146 |       DesiredCount: {{ prevDesiredCount }}
147 |       TaskDefinition: !Ref TaskDefinition
148 | {{#ecsDeployMinHealthyPercent}}
149 |       DeploymentConfiguration:
150 | {{#ecsDeployMaxPercent}}
151 |         MaximumPercent: {{ ecsDeployMaxPercent }}
152 | {{/ecsDeployMaxPercent}}
153 |         MinimumHealthyPercent: {{ecsDeployMinHealthyPercent}}
154 | {{/ecsDeployMinHealthyPercent}}
155 | {{#ecsDistinctInstancePlacementConstraint}}
156 |       PlacementConstraints:
157 |         - Type: distinctInstance
158 | {{/ecsDistinctInstancePlacementConstraint}}
159 |       PlacementStrategies:
160 | {{#ecsSpreadAzPlacementStrategy}}
161 |         - Type: spread
162 |           Field: attribute:ecs.availability-zone
163 | {{/ecsSpreadAzPlacementStrategy}}
164 | {{#ecsSpreadInstanceIdPlacementStrategy}}
165 |         - Type: spread
166 |           Field: instanceId
167 | {{/ecsSpreadInstanceIdPlacementStrategy}}
168 |       LoadBalancers:
169 |         - ContainerName: {{ mainContainerName }}
170 |           ContainerPort: {{ mainContainerPort }}
171 |           TargetGroupArn: !Ref TargetGroup
172 | 
173 | {{#ecsServiceScaleUpMetric}}
174 |   ServiceScalingTarget:
175 |     Type: AWS::ApplicationAutoScaling::ScalableTarget
176 |     DependsOn: Service
177 |     Properties:
178 |       MinCapacity: {{ minCapacity }}
179 |       MaxCapacity: {{ maxCapacity }}
180 |       ResourceId: !Join ['', [service/{{ ecsCluster }}, /, !GetAtt [Service, Name]]]
181 |       RoleARN: !GetAtt [AutoscalingRole, Arn]
182 |       ScalableDimension: ecs:service:DesiredCount
183 |       ServiceNamespace: ecs
184 | 
185 |   AutoscalingRole:
186 |     Type: AWS::IAM::Role
187 |     Properties:
188 |       AssumeRolePolicyDocument:
189 |         Statement:
190 |         - Effect: Allow
191 |           Principal:
192 |             Service:
193 |               - application-autoscaling.amazonaws.com
194 |           Action:
195 |             - sts:AssumeRole
196 |       Path: /
197 |       ManagedPolicyArns:
198 |         - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceAutoscaleRole
199 |       Policies:
200 |         - PolicyName: service-autoscaling
201 |           PolicyDocument:
202 |             Version: "2012-10-17"
203 |             Statement:
204 |               - Effect: Allow
205 |                 Action:
206 |                   - application-autoscaling:*
207 |                   - cloudwatch:DescribeAlarms
208 |                   - cloudwatch:PutMetricAlarm
209 |                 Resource:
210 |                   - "*"
211 | 
212 |   ServiceScaleUpPolicy:
213 |     Type: AWS::ApplicationAutoScaling::ScalingPolicy
214 |     Properties:
215 |       PolicyName: !Sub ${AWS::StackName}-scale-up-policy
216 |       PolicyType: StepScaling
217 |       ScalingTargetId: !Ref 'ServiceScalingTarget'
218 |       StepScalingPolicyConfiguration:
219 |         AdjustmentType: ChangeInCapacity
220 |         Cooldown: 300
221 |         MetricAggregationType: Maximum
222 |         StepAdjustments:
223 |           - MetricIntervalLowerBound: 0
224 |             ScalingAdjustment: {{ ecsServiceScaleUpAdjustment }}
225 | 
226 |   ServiceScaleDownPolicy:
227 |     Type: AWS::ApplicationAutoScaling::ScalingPolicy
228 |     Properties:
229 |       PolicyName: !Sub ${AWS::StackName}-scale-down-policy
230 |       PolicyType: StepScaling
231 |       ScalingTargetId: !Ref 'ServiceScalingTarget'
232 |       StepScalingPolicyConfiguration:
233 |         AdjustmentType: ChangeInCapacity
234 |         Cooldown: 300
235 |         MetricAggregationType: Maximum
236 |         StepAdjustments:
237 |           - MetricIntervalUpperBound: 0
238 |             ScalingAdjustment: {{ ecsServiceScaleDownAdjustment }}
239 | 
240 |   ScaleUp:
241 |     Type: AWS::CloudWatch::Alarm
242 |     Properties:
243 |       EvaluationPeriods: '{{ ecsServiceScaleUpEvalPeriods }}'
244 |       Statistic: {{ ecsServiceScaleUpStatistic }}
245 |       AlarmDescription: Alarm if {{ ecsServiceScaleUpMetric }} is too high.
246 |       Period: '60'
247 |       AlarmActions: [!Ref 'ServiceScaleUpPolicy']
248 |       Threshold: '{{ ecsServiceScaleUpThreshold }}'
249 |       ComparisonOperator: {{ ecsServiceScaleUpCompOp }}
250 |       Namespace: {{ ecsServiceScaleUpNamespace }}
251 |       Dimensions:
252 |         - Name: ClusterName
253 |           Value: {{ ecsCluster }}
254 |         - Name: ServiceName
255 |           Value: !GetAtt [Service, Name]
256 |       MetricName: {{ ecsServiceScaleUpMetric }}
257 | 
258 |   ScaleDown:
259 |     Type: AWS::CloudWatch::Alarm
260 |     Properties:
261 |       EvaluationPeriods: '{{ ecsServiceScaleDownEvalPeriods }}'
262 |       Statistic: {{ ecsServiceScaleDownStatistic }}
263 |       AlarmDescription: Alarm if {{ ecsServiceScaleDownMetric }} is too low.
264 |       Period: '60'
265 |       AlarmActions: [!Ref 'ServiceScaleDownPolicy']
266 |       Threshold: '{{ ecsServiceScaleDownThreshold }}'
267 |       ComparisonOperator: {{ ecsServiceScaleDownCompOp }}
268 |       Namespace: {{ ecsServiceScaleDownNamespace }}
269 |       Dimensions:
270 |         - Name: ClusterName
271 |           Value: {{ ecsCluster }}
272 |         - Name: ServiceName
273 |           Value: !GetAtt [Service, Name]
274 |       MetricName: {{ ecsServiceScaleDownMetric }}
275 | {{/ecsServiceScaleUpMetric}}
276 | 
277 |   TaskDefinition:
278 |     Type: AWS::ECS::TaskDefinition
279 |     Properties:
280 |       {{#iamRoleArn}}
281 |       TaskRoleArn: {{ iamRoleArn }}
282 |       {{/iamRoleArn}}
283 |       {{#iamTaskPolicy}}
284 |       TaskRoleArn: !Ref TaskRole
285 |       {{/iamTaskPolicy}}
286 | {{{ taskDefinitionYaml }}}
287 | 
288 |   TargetGroup:
289 |     Type: AWS::ElasticLoadBalancingV2::TargetGroup
290 |     Properties:
291 |       VpcId:
292 |         'Fn::ImportValue': "{{ ecsCluster}}-VPC"
293 |       Port: 80
294 |       Protocol: HTTP
295 |       Matcher:
296 |         HttpCode: {{ healthCheckHttpCode }}
297 |       HealthCheckIntervalSeconds: 30
298 |       HealthCheckPath: {{ healthCheckUrl }}
299 |       HealthCheckProtocol: HTTP
300 |       HealthCheckTimeoutSeconds: 5
301 |       HealthyThresholdCount: 2
302 |       UnhealthyThresholdCount: 5
303 |       TargetGroupAttributes:
304 |         - Key: deregistration_delay.timeout_seconds
305 |           Value: {{ targetGroupDeregistrationDelay }}
306 |         - Key: stickiness.enabled
307 |           Value: {{ targetGroupStickinessEnabled }}
308 |         - Key: stickiness.lb_cookie.duration_seconds
309 |           Value: {{ targetGroupLbCookieDurationSecs }}
310 | 
311 | {{#httpListenerExport}}
312 |   HttpListenerRule:
313 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
314 |     Properties:
315 |       ListenerArn:
316 |         'Fn::ImportValue': "{{ httpListenerExport }}"
317 |       Priority: !Ref AlbPriority
318 |       Conditions:
319 |         - Field: host-header
320 |           Values:
321 |             - !Ref Hostname
322 |       Actions:
323 |         - TargetGroupArn: !Ref TargetGroup
324 |           Type: forward
325 | 
326 |   HttpListenerRule2:
327 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
328 |     Condition: HasHostname2
329 |     Properties:
330 |       ListenerArn:
331 |         'Fn::ImportValue': "{{ httpListenerExport }}"
332 |       Priority: !Ref AlbPriority2
333 |       Conditions:
334 |         - Field: host-header
335 |           Values:
336 |             - !Ref Hostname2
337 |       Actions:
338 |         - TargetGroupArn: !Ref TargetGroup
339 |           Type: forward
340 | 
341 |   HttpListenerRule3:
342 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
343 |     Condition: HasHostname3
344 |     Properties:
345 |       ListenerArn:
346 |         'Fn::ImportValue': "{{ httpListenerExport }}"
347 |       Priority: !Ref AlbPriority3
348 |       Conditions:
349 |         - Field: host-header
350 |           Values:
351 |             - !Ref Hostname3
352 |       Actions:
353 |         - TargetGroupArn: !Ref TargetGroup
354 |           Type: forward
355 | {{/httpListenerExport}}
356 | 
357 | {{#httpsListenerExport}}
358 |   HttpsListenerRule:
359 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
360 |     Properties:
361 |       ListenerArn:
362 |         'Fn::ImportValue': "{{ httpsListenerExport }}"
363 |       Priority: !Ref AlbPriority
364 |       Conditions:
365 |         - Field: host-header
366 |           Values:
367 |             - !Ref Hostname
368 |       Actions:
369 |         - TargetGroupArn: !Ref TargetGroup
370 |           Type: forward
371 | 
372 |   HttpsListenerRule2:
373 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
374 |     Condition: HasHostname2
375 |     Properties:
376 |       ListenerArn:
377 |         'Fn::ImportValue': "{{ httpsListenerExport }}"
378 |       Priority: !Ref AlbPriority2
379 |       Conditions:
380 |         - Field: host-header
381 |           Values:
382 |             - !Ref Hostname2
383 |       Actions:
384 |         - TargetGroupArn: !Ref TargetGroup
385 |           Type: forward
386 | 
387 |   HttpsListenerRule3:
388 |     Type: AWS::ElasticLoadBalancingV2::ListenerRule
389 |     Condition: HasHostname3
390 |     Properties:
391 |       ListenerArn:
392 |         'Fn::ImportValue': "{{ httpsListenerExport }}"
393 |       Priority: !Ref AlbPriority3
394 |       Conditions:
395 |         - Field: host-header
396 |           Values:
397 |             - !Ref Hostname3
398 |       Actions:
399 |         - TargetGroupArn: !Ref TargetGroup
400 |           Type: forward
401 | {{/httpsListenerExport}}
402 | 
403 |   # This IAM Role grants the service access to register/unregister with the
404 |   # Application Load Balancer (ALB). It is based on the default documented here:
405 |   # http://docs.aws.amazon.com/AmazonECS/latest/developerguide/service_IAM_role.html
406 |   EcsServiceRole:
407 |     Type: AWS::IAM::Role
408 |     Properties:
409 |       RoleName: !Sub svc-${AWS::StackName}
410 |       Path: /
411 |       AssumeRolePolicyDocument:
412 |         Version: "2012-10-17"
413 |         Statement:
414 |           - Effect: Allow
415 |             Principal:
416 |               Service:
417 |                 - ecs.amazonaws.com
418 |             Action:
419 |               - sts:AssumeRole
420 |       ManagedPolicyArns:
421 |         - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceRole
422 | 
423 | {{#iamTaskPolicy}}
424 |   TaskRole:
425 |     Type: AWS::IAM::Role
426 |     Properties:
427 |       RoleName: !Sub svc-${AWS::StackName}-role
428 |       Path: /
429 |       AssumeRolePolicyDocument:
430 |         Version: "2012-10-17"
431 |         Statement:
432 |           - Effect: Allow
433 |             Principal:
434 |               Service:
435 |                 - ecs-tasks.amazonaws.com
436 |             Action:
437 |               - sts:AssumeRole
438 |       Policies:
439 |         - PolicyName: !Sub svc-${AWS::StackName}
440 |           PolicyDocument:
441 | {{{ iamTaskPolicy }}}
442 | {{/iamTaskPolicy}}
443 | 
444 | Outputs:
445 |   TargetGroupArn:
446 |     Description: Target Group ARN
447 |     Value: !Ref TargetGroup
448 |     Export:
449 |       Name: !Sub ${AWS::StackName}-tg
450 | 


--------------------------------------------------------------------------------
/ECS-Batch-Cluster.template:
--------------------------------------------------------------------------------
  1 | # CloudFormation template for an ECS cluster that runs scheduled batch jobs.
  2 | #
  3 | # ECS currently does not have good integration with EC2 to properly scale up a cluster when
  4 | # there is not enough capacity. When this occurs, the ECS run-task API call will fail with no
  5 | # metrics posted to CloudWatch and the ECS event stream about this issue and there is no good
  6 | # public-facing API available from AWS to detect this situation. For the web-based ECS clusters,
  7 | # we've been running the clusters with enough room to start at least one copy of the largest
  8 | # container that will run. We've periodically encountered issues when running large numbers of
  9 | # batch jobs that some will periodically fail to start due to insufficient capacity in the cluster
 10 | # when several jobs are started within a short period of time.
 11 | #
 12 | # For the long-running jobs, we initially tried to use the autodraining Lambda in
 13 | # ECS-Web-Cluster.template to allow them to complete, however several problems were encountered.
 14 | # The autodraining Lambda would prevent premature instance termination for hours if needed, however
 15 | # that would block all other EC2 autoscaling actions. There may be instances that go idle in that
 16 | # time period, or new jobs may want to start and won't be able to due to insufficient capacity in
 17 | # the cluster.
 18 | #
 19 | # This template takes a different approach to scaling the cluster since we have full control over
 20 | # how the ECS tasks are started in the cluster, and it is acceptable for batch jobs to wait a few
 21 | # minutes to provision additional EC2 instances, An EC2 autoscaling group is created, however no
 22 | # alarms, or scale up/down policies are created for that group. Two Lambda functions are included
 23 | # in this template that are used to scale the cluster up and down as needed.
 24 | #
 25 | # CloudWatch event rules are used for starting the scheduled tasks via the CloudFormation template
 26 | # ECS-Batch-Jobs.template. The CloudWatch event rules have the ability to directly start an ECS
 27 | # task, however these API calls will fail if the cluster does not have sufficient capacity to
 28 | # start those tasks, and no retries are attempted. To work around this limitation, the CloudWatch
 29 | # event rules are configured to post to a SQS queue and the StartTasksFunction Lambda function
 30 | # below is subscribed to that queue. The Lambda function attempts to start the task, and if it
 31 | # fails due to insufficient capacity in the cluster, then the desired number in the autoscaling
 32 | # group is incremented by one, and the Lambda function returns an error so that the message will
 33 | # be redelivered every minute for up to a default of 20 minutes to retry the operation. Any
 34 | # failures beyond that will be delivered to a dead-letter queue.
 35 | #
 36 | # When containers are started, they are placed using the binpack placement strategy so that
 37 | # instances will be packed as tight as possible and to encourage idle instances in the cluster to
 38 | # simplify the scale down operation. The TerminateInstancesFunction Lambda function below is
 39 | # executed every three minutes to scale down the cluster, and it checks for the following
 40 | # conditions:
 41 | #
 42 | #  - If an ECS instance is already set to DRAINING, and there are no running tasks on the instance,
 43 | #    then terminate the underlying EC2 instance.
 44 | #  - Set any instances that have an old launch configuration to DRAINING. This situation typically
 45 | #    occurs when the AMI is updated, instance type is changed, new SSH keypair, etc.
 46 | #  - Any ECS instances that currently have no tasks running and have not had any tasks finish for
 47 | #    the last TerminateInstancesAfterIdleSecs seconds will be set to DRAINING.
 48 | #
 49 | # Be aware that the ECS API is eventually consistent, so terminating instances is done in a
 50 | # two pass manner to eliminate the race condition of the possibility shutting down an instance that
 51 | # is currently running a task. The first call of the terminate lambda will set the instance to
 52 | # DRAINING, and the second call will terminate the instance.
 53 | #
 54 | # The terminate Lambda will look for EC2 instances that were started by autoscaling, but failed to
 55 | # register with ECS within a 10 minute period. Without this check, the cluster will get into a state
 56 | # where the StartTask Lambda thinks that autoscaling activity is currently in progress and no
 57 | # additional instances will be started until the bad instance is terminated.
 58 | #
 59 | # It should be noted that this cluster template has the ability to scale down to zero instances on
 60 | # its own when no tasks are running.
 61 | #
 62 | #
 63 | # Scheduled tasks tab in the ECS console
 64 | # --------------------------------------
 65 | #
 66 | # Since the CloudWatch event rules are configured to deliver a message to SQS, instead of directly
 67 | # starting an ECS task, be aware that the 'Scheduled Tasks' tab on the ECS cluster will not
 68 | # show any scheduled tasks. You can view the scheduled tasks in the Event Rules page in CloudWatch.
 69 | #
 70 | #
 71 | # SQS message format
 72 | # ------------------
 73 | #
 74 | # To have a process start a task in the ECS cluster outside of the CloudWatch event rules, add a
 75 | # message to the StartTasksQueue with the following format:
 76 | #
 77 | #     {
 78 | #       "jobName": "COSMETIC_JOB_NAME_SHOWN_IN_STARTED_BY_FIELD",
 79 | #       "taskDefinition": "TASK_DEFINITION_ARN",
 80 | #       "containerOverrides": {
 81 | #         "name": "CONTAINER_NAME_IN_DOCKERRUN_AWS_YML",
 82 | #         "command": [ "command", "arg1", "arg2", "arg3" ],
 83 | #         "environment": [
 84 | #           {
 85 | #             "name": "SOME_ENV_VAR",
 86 | #             "value": "SOME_VALUE"
 87 | #           }
 88 | #         ],
 89 | #         "memory": 750
 90 | #       }
 91 | #     }
 92 | #
 93 | # The 'jobName' / 'taskDefinition' parameters are required and 'containerOverrides' is optional.
 94 | #
 95 | # The SQS queue information is available as a CloudFormation export in this template and in the
 96 | # template ecs-batch-jobs-v1.template so that your application can easily look up this information
 97 | # via the CloudFormation API.
 98 | #
 99 | #
100 | # Troubleshooting job launch errors
101 | # ---------------------------------
102 | #
103 | # Go to the Lambda console, pull up the StartTasksFunction for your ECS cluster, click on the
104 | # Monitoring tab, then the View logs in CloudWatch button, and finally click on the Log Streams
105 | # for the time period of interest to see the following sequence appear in the logs for tasks that
106 | # need to scale up the cluster:
107 | #
108 | #     Failed to start JOB_JSON
109 | #     Set desired count of AUTOSCALING_GROUP to XX: JOB_JSON
110 | #     ... (~1 minute later)
111 | #     Failed to start JOB_JSON
112 | #     Instances are starting: asg=XX, ecs=XX-1: JOB_JSON
113 | #     ...
114 | #     Started JOB_JSON
115 | #
116 | # The Lambda function will retry up to the number of times specified by the
117 | # StartTasksQueueMaxReceiveCount parameter below and then the message is dropped into a dead-letter
118 | # queue.
119 | 
120 | Description: ECS batch job cluster for running scheduled tasks.
121 | 
122 | Parameters:
123 |   InstanceType:
124 |     Description: The type of instances to use in the ECS cluster. See https://www.ec2instances.info/
125 |     Type: String
126 | 
127 |   MaxClusterSize:
128 |     Description: Maximum number of instances in the ECS cluster.
129 |     Type: Number
130 | 
131 |   VPC:
132 |     Description: VPC this ECS cluster will be be deployed to.
133 |     Type: AWS::EC2::VPC::Id
134 | 
135 |   AmiId:
136 |     Description: AMI ID for the EC2 instances.
137 |     Type: String
138 | 
139 |   KeyName:
140 |     Description: Name of an existing EC2 KeyPair to enable SSH access to the ECS instances.
141 |     Type: AWS::EC2::KeyPair::KeyName
142 | 
143 |   AdditonalEC2SecurityGroups:
144 |     Description: Optional comma separated list of additional security groups to add to the EC2
145 |                  instances.
146 |     Type: String
147 |     Default: ""
148 | 
149 |   EbsVolumeSize:
150 |     Description: Size of the EBS volumes on each instance.
151 |     Type: String
152 | 
153 |   DockerVolumeSize:
154 |     Description: Size of the Docker volumes on each instance.
155 |     Type: String
156 |     Default: 50
157 | 
158 |   ContainerDeviceManagerSize:
159 |     Description: The amount of disk space to allocate in device manager to each running container.
160 |                  Currently ECS does not allow configuring this on a per-container basis.
161 |     Type: String
162 |     Default: "10"
163 | 
164 |   VpcInternalSubnets:
165 |     Description: Subnets that the EC2 instances will be placed in.
166 |     Type: List<AWS::EC2::Subnet::Id>
167 | 
168 |   OwnerTag:
169 |     Description: Owner tag for the EC2 instances and EBS volumes.
170 |     Type: String
171 | 
172 |   ProductTag:
173 |     Description: Product tag for the EC2 instances and EBS volumes.
174 |     Type: String
175 | 
176 |   ComponentTag:
177 |     Description: Component tag for the EC2 instances and EBS volumes.
178 |     Type: String
179 | 
180 |   EnvironmentTag:
181 |     Description: Environment tag for the EC2 instances and EBS volumes.
182 |     Type: String
183 |     AllowedValues: [dev, qa, prod]
184 |     ConstraintDescription: Please choose either dev, qa or prod
185 | 
186 |   ChaosMonkey:
187 |     Description: The value of the chaos_monkey tag for Netflix's Chaos Monkey.
188 |     Type: String
189 |     AllowedValues: [true, false]
190 |     Default: true
191 | 
192 |   NewRelicLicenseKey:
193 |     Description: New Relic license key for the infrastructure monitoring. Leave the value blank
194 |                  to disable New Relic Infrastructure monitoring.
195 |     Type: String
196 |     Default: ""
197 | 
198 |   TerminateInstancesScheduleExpression:
199 |     Description: The schedule expression for when to run that Lambda that terminates idle EC2
200 |                  instances.
201 |     Type: String
202 |     Default: "rate(3 minutes)"
203 | 
204 |   TerminateInstancesAfterIdleSecs:
205 |     Description: The number of seconds that an instance is idle before it is drained from ECS
206 |                  and eventually terminated.
207 |     Type: Number
208 |     Default: 600
209 | 
210 |   StartTasksQueueMaxReceiveCount:
211 |     Description: The maximum number of times that jobs that fail to launch will be retried. Retries
212 |                  typically occur every minute.
213 |     Type: Number
214 |     Default: 20
215 | 
216 | Conditions:
217 |   HasAdditonalEC2SecurityGroups: !Not [ !Equals [!Ref AdditonalEC2SecurityGroups, '']]
218 | 
219 | Resources:
220 |   ECSCluster:
221 |     Type: AWS::ECS::Cluster
222 |     Properties:
223 |       ClusterName: !Ref AWS::StackName
224 | 
225 |   ECSAutoScalingGroup:
226 |     Type: AWS::AutoScaling::AutoScalingGroup
227 |     Properties:
228 |       VPCZoneIdentifier: !Ref VpcInternalSubnets
229 |       LaunchConfigurationName: !Ref ECSLaunchConfiguration
230 |       MinSize: 0
231 |       MaxSize: !Ref MaxClusterSize
232 |       MetricsCollection:
233 |         - Granularity: 1Minute
234 |       Tags:
235 |         - Key: Name
236 |           Value: !Ref AWS::StackName
237 |           PropagateAtLaunch: true
238 |         - Key: owner
239 |           Value: !Ref OwnerTag
240 |           PropagateAtLaunch: true
241 |         - Key: product
242 |           Value: !Ref ProductTag
243 |           PropagateAtLaunch: true
244 |         - Key: component
245 |           Value: !Ref ComponentTag
246 |           PropagateAtLaunch: true
247 |         - Key: environment
248 |           Value: !Ref EnvironmentTag
249 |           PropagateAtLaunch: true
250 |         - Key: chaos_monkey
251 |           Value: !Ref ChaosMonkey
252 |           PropagateAtLaunch: true
253 | 
254 |   ECSHostSecurityGroup:
255 |     Type: AWS::EC2::SecurityGroup
256 |     Metadata:
257 |       cfn_nag:
258 |         rules_to_suppress:
259 |           - id: W5
260 |             reason: "Allow all outbound network traffic."
261 |           - id: W9
262 |             reason: "Allow direct access to the ECS services from the internal network."
263 |     Properties:
264 |       VpcId: !Ref VPC
265 |       GroupDescription: Allow access from the internal network.
266 |       SecurityGroupIngress:
267 |         - CidrIp: 10.0.0.0/8
268 |           IpProtocol: -1
269 |         - CidrIp: 192.168.0.0/16
270 |           IpProtocol: -1
271 |       SecurityGroupEgress:
272 |         - CidrIp: 0.0.0.0/0
273 |           IpProtocol: -1
274 | 
275 |   ECSLaunchConfiguration:
276 |     Type: AWS::AutoScaling::LaunchConfiguration
277 |     Properties:
278 |       ImageId: !Ref AmiId
279 |       InstanceType: !Ref InstanceType
280 |       SecurityGroups:
281 |         'Fn::If':
282 |           - HasAdditonalEC2SecurityGroups
283 |           - !Split [',', !Join [',', [!Ref AdditonalEC2SecurityGroups, !Ref ECSHostSecurityGroup]]]
284 |           - !Split [',', !Ref ECSHostSecurityGroup]
285 |       IamInstanceProfile: !Ref ECSInstanceProfile
286 |       KeyName: !Ref 'KeyName'
287 |       BlockDeviceMappings:
288 |         - DeviceName: "/dev/xvda"
289 |           Ebs:
290 |             VolumeSize: !Ref EbsVolumeSize
291 |             VolumeType: gp2
292 |         - DeviceName: "/dev/xvdcz"
293 |           Ebs:
294 |             VolumeSize: !Ref DockerVolumeSize
295 |             VolumeType: gp2
296 | 
297 |       UserData:
298 |         "Fn::Base64": !Sub |
299 |           #!/bin/bash
300 | 
301 |           # Increment version number below to force new instances in the cluster.
302 |           # This is intentionally not a CloudFormation parameter.
303 |           # Version: 1
304 | 
305 |           /opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} \
306 |                         --resource ECSLaunchConfiguration
307 |           /opt/aws/bin/cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} \
308 |                         --resource ECSAutoScalingGroup
309 | 
310 |           if [ "${NewRelicLicenseKey}" != "" ] ; then
311 |               echo "license_key: ${NewRelicLicenseKey}" > /etc/newrelic-infra.yml
312 |               initctl start newrelic-infra
313 |           fi
314 | 
315 |           vgextend docker /dev/xvdcz
316 |           lvextend -L+${DockerVolumeSize}G /dev/docker/docker-pool
317 | 
318 |     Metadata:
319 |       AWS::CloudFormation::Init:
320 |         config:
321 |           commands:
322 |             01_add_ebs_tags:
323 |               command: /usr/local/bin/create-ebs-tags.py
324 |             02_docker_storage_opt:
325 |               command: !Sub echo 'OPTIONS="$OPTIONS --storage-opt dm.basesize=${ContainerDeviceManagerSize}G"' >> /etc/sysconfig/docker
326 | 
327 |           files:
328 |             "/etc/ecs/ecs.config":
329 |               mode: "000644"
330 |               owner: root
331 |               group: root
332 |               content: !Sub |
333 |                ECS_CLUSTER=${ECSCluster}
334 |                ECS_AVAILABLE_LOGGING_DRIVERS=["splunk","awslogs","json-file"]
335 | 
336 |             "/usr/local/bin/create-ebs-tags.py":
337 |               mode: "000755"
338 |               owner: root
339 |               group: root
340 |               content: !Sub |
341 |                 #!/usr/bin/env python3
342 | 
343 |                 import urllib
344 |                 import boto3
345 | 
346 |                 def tag_ebs_volumes():
347 |                     client = boto3.client('ec2', region_name='${AWS::Region}')
348 |                     url = 'http://169.254.169.254/latest/meta-data/instance-id'
349 |                     instance_id = urllib.request.urlopen(url).read().decode('UTF-8')
350 |                     response = client.describe_volumes(Filters=[{'Name': 'attachment.instance-id',
351 |                                                                  'Values': [instance_id]}])
352 |                     for volume in response['Volumes']:
353 |                         print('Tagging %s' % (volume['VolumeId']))
354 |                         client.create_tags(Resources=[volume['VolumeId']],
355 |                                            Tags=[{'Key': 'Name', 'Value': '${AWS::StackName}'},
356 |                                                  {'Key': 'product', 'Value': '${ProductTag}'},
357 |                                                  {'Key': 'component', 'Value': '${ComponentTag}'},
358 |                                                  {'Key': 'owner', 'Value': '${OwnerTag}'},
359 |                                                  {'Key': 'environment', 'Value': '${EnvironmentTag}'}])
360 | 
361 |                 if __name__ == '__main__':
362 |                     tag_ebs_volumes()
363 | 
364 |             "/etc/cfn/cfn-hup.conf":
365 |               mode: "000400"
366 |               owner: root
367 |               group: root
368 |               content: !Sub |
369 |                 [main]
370 |                 stack=${AWS::StackId}
371 |                 region=${AWS::Region}
372 | 
373 |             "/etc/cfn/hooks.d/cfn-auto-reloader.conf":
374 |               content: !Sub |
375 |                 [cfn-auto-reloader-hook]
376 |                 triggers=post.update
377 |                 path=Resources.ECSLaunchConfiguration.Metadata.AWS::CloudFormation::Init
378 |                 action=/opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource ECSLaunchConfiguration
379 | 
380 |           services:
381 |             sysvinit:
382 |               cfn-hup:
383 |                 enabled: true
384 |                 ensureRunning: true
385 |                 files:
386 |                   - /etc/cfn/cfn-hup.conf
387 |                   - /etc/cfn/hooks.d/cfn-auto-reloader.conf
388 | 
389 |   ECSRole:
390 |     Type: AWS::IAM::Role
391 |     Properties:
392 |       Path: /
393 |       RoleName: !Sub ${AWS::StackName}-ECSRole-${AWS::Region}
394 |       AssumeRolePolicyDocument:
395 |         Version: 2012-10-17
396 |         Statement:
397 |           - Effect: Allow
398 |             Principal:
399 |               Service:
400 |                 - ec2.amazonaws.com
401 |             Action:
402 |               - sts:AssumeRole
403 |       ManagedPolicyArns:
404 |         - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role
405 |       Policies:
406 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-cfn-init
407 |           PolicyDocument:
408 |             Version: 2012-10-17
409 |             Statement:
410 |               - Effect: Allow
411 |                 Action:
412 |                   - cloudformation:DescribeStackResource
413 |                   - cloudformation:SignalResource
414 |                 Resource: "*"
415 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-ebs-tags
416 |           PolicyDocument:
417 |             Version: 2012-10-17
418 |             Statement:
419 |               - Effect: Allow
420 |                 Action:
421 |                   - ec2:DescribeVolumes
422 |                   - ec2:CreateTags
423 |                 Resource: "*"
424 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-logging
425 |           PolicyDocument:
426 |             Version: 2012-10-17
427 |             Statement:
428 |               - Effect: Allow
429 |                 Action:
430 |                   - logs:CreateLogGroup
431 |                   - logs:CreateLogStream
432 |                   - logs:PutLogEvents
433 |                   - logs:DescribeLogGroups
434 |                   - logs:DescribeLogStreams
435 |                 Resource: "*"
436 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-mon-put-data
437 |           PolicyDocument:
438 |             Version: 2012-10-17
439 |             Statement:
440 |               - Effect: Allow
441 |                 Action:
442 |                   - cloudwatch:PutMetricData
443 |                 Resource: "*"
444 | 
445 |   ECSInstanceProfile:
446 |     Type: AWS::IAM::InstanceProfile
447 |     Properties:
448 |       Path: /
449 |       Roles:
450 |         - !Ref ECSRole
451 | 
452 |   StartTasksFunction:
453 |     Type: AWS::Lambda::Function
454 |     Properties:
455 |       Description: Start ECS tasks and scale out the number of EC2 instances if necessary.
456 |       Handler: index.lambda_handler
457 |       Role: !GetAtt StartTasksLambdaRole.Arn
458 |       Runtime: python3.6
459 |       MemorySize: 128
460 |       Timeout: 60
461 |       Code:
462 |         ZipFile: !Sub |
463 |           import json
464 |           import boto3
465 |           import botocore.exceptions
466 | 
467 |           QUEUE = '${StartTasksQueue}'
468 |           CLUSTER = '${ECSCluster}'
469 |           ASG_NAME = '${ECSAutoScalingGroup}'
470 | 
471 |           AS = boto3.client('autoscaling', region_name='${AWS::Region}')
472 |           ECS = boto3.client('ecs', region_name='${AWS::Region}')
473 | 
474 |           def scale_cluster(task):
475 |               res = ECS.describe_clusters(clusters=[CLUSTER])
476 |               ecs_instances = res['clusters'][0]['registeredContainerInstancesCount']
477 | 
478 |               res = AS.describe_auto_scaling_groups(AutoScalingGroupNames=[ASG_NAME])
479 |               desired = res['AutoScalingGroups'][0]['DesiredCapacity']
480 |               if desired > ecs_instances:
481 |                   raise Exception('Instance is starting (asg=%d, ecs=%d) for %s' % \
482 |                                   (desired, ecs_instances, task))
483 | 
484 |               desired += 1
485 |               AS.set_desired_capacity(AutoScalingGroupName=ASG_NAME,
486 |                                       DesiredCapacity=desired,
487 |                                       HonorCooldown=False)
488 |               raise Exception('Set desired count of %s to %s for %s' % (ASG_NAME, desired, task))
489 | 
490 |           def run_task(task):
491 |               try:
492 |                   res = ECS.run_task(**task)
493 |                   if res['failures']:
494 |                       print('Failed to start %s: %s' % (task, res['failures']))
495 |                       scale_cluster(task)
496 |               except botocore.exceptions.ClientError as excp:
497 |                   message = excp.response['Error']['Message']
498 |                   if 'No Container Instances' in message:
499 |                       scale_cluster(task)
500 |                   raise
501 | 
502 |               print('Started %s' % (task))
503 | 
504 |           def lambda_handler(event, context):
505 |               for record in event['Records']:
506 |                   body = json.loads(record['body'])
507 |                   if 'containerOverrides' in body:
508 |                       overrides = {'containerOverrides': body['containerOverrides']}
509 |                   else:
510 |                       overrides = {}
511 | 
512 |                   run_task({'cluster': CLUSTER,
513 |                             'taskDefinition': body['taskDefinition'],
514 |                             'overrides': overrides,
515 |                             'startedBy': body['jobName'][0:35],
516 |                             'placementStrategy': [{'type': 'binpack', 'field': 'memory'}]})
517 | 
518 | 
519 |   StartTasksLambdaRole:
520 |     Type: AWS::IAM::Role
521 |     Properties:
522 |       Policies:
523 |         - PolicyName: !Sub ${AWS::StackName}-asg
524 |           PolicyDocument:
525 |             Version: 2012-10-17
526 |             Statement:
527 |               - Effect: Allow
528 |                 Action:
529 |                   - autoscaling:DescribeAutoScalingGroups
530 |                   - autoscaling:SetDesiredCapacity
531 |                 Resource: "*"
532 |         - PolicyName: !Sub ${AWS::StackName}-ecs
533 |           PolicyDocument:
534 |             Version: 2012-10-17
535 |             Statement:
536 |               - Effect: Allow
537 |                 Action:
538 |                   - ecs:DescribeClusters
539 |                   - ecs:RunTask
540 |                 Resource: "*"
541 |         - PolicyName: !Sub ${AWS::StackName}-logs
542 |           PolicyDocument:
543 |             Version: 2012-10-17
544 |             Statement:
545 |               - Action:
546 |                   - logs:CreateLogGroup
547 |                   - logs:CreateLogStream
548 |                   - logs:PutLogEvents
549 |                   - logs:DescribeLogGroups
550 |                   - logs:DescribeLogStreams
551 |                 Effect: Allow
552 |                 Resource: "*"
553 |         - PolicyName: !Sub ${AWS::StackName}-sqs
554 |           PolicyDocument:
555 |             Version: 2012-10-17
556 |             Statement:
557 |               - Effect: Allow
558 |                 Action:
559 |                   - sqs:ChangeMessageVisibility
560 |                   - sqs:DeleteMessage
561 |                   - sqs:GetQueueAttributes
562 |                   - sqs:ReceiveMessage
563 |                 Resource:
564 |                   - !GetAtt StartTasksQueue.Arn
565 |                   - !GetAtt StartTasksDeadLetterQueue.Arn
566 |       AssumeRolePolicyDocument:
567 |         Version: 2012-10-17
568 |         Statement:
569 |         - Effect: Allow
570 |           Principal:
571 |             Service:
572 |               - lambda.amazonaws.com
573 |           Action:
574 |             - sts:AssumeRole
575 | 
576 |   StartTasksQueue:
577 |     Type: AWS::SQS::Queue
578 |     Properties:
579 |       DelaySeconds: 0
580 |       MessageRetentionPeriod: 3600
581 |       VisibilityTimeout: 60
582 |       RedrivePolicy:
583 |         deadLetterTargetArn: !GetAtt StartTasksDeadLetterQueue.Arn
584 |         maxReceiveCount: !Ref StartTasksQueueMaxReceiveCount
585 | 
586 |   StartTasksDeadLetterQueue:
587 |     Type: AWS::SQS::Queue
588 |     Properties:
589 |       MessageRetentionPeriod: 1209600
590 | 
591 |   StartTasksQueuePolicy:
592 |     Type: AWS::SQS::QueuePolicy
593 |     Properties:
594 |       Queues:
595 |         - !Ref StartTasksQueue
596 |       PolicyDocument:
597 |         Id: !Sub ${AWS::StackName}-start-task-sqs-policy
598 |         Version: "2012-10-17"
599 |         Statement:
600 |           - Effect: "Allow"
601 |             Principal:
602 |               AWS: "*"
603 |             Action:
604 |               - "sqs:SendMessage"
605 |             Resource: !GetAtt StartTasksQueue.Arn
606 |             Condition:
607 |               ArnEquals:
608 |                 aws:SourceArn: !Sub "arn:aws:*:${AWS::Region}:${AWS::AccountId}:*"
609 | 
610 |   StartTasksFunctionEventSourceMapping:
611 |     Type: AWS::Lambda::EventSourceMapping
612 |     Properties:
613 |       BatchSize: 1
614 |       Enabled: true
615 |       EventSourceArn: !GetAtt StartTasksQueue.Arn
616 |       FunctionName: !GetAtt StartTasksFunction.Arn
617 | 
618 |   TerminateInstancesFunction:
619 |     Type: AWS::Lambda::Function
620 |     Properties:
621 |       Description: Drain and terminate inactive EC2 instances.
622 |       Handler: index.lambda_handler
623 |       Role: !GetAtt TerminateInstancesRole.Arn
624 |       Runtime: python3.6
625 |       MemorySize: 128
626 |       Timeout: 60
627 |       Code:
628 |         ZipFile: !Sub |
629 |           import datetime
630 |           import dateutil
631 |           import boto3
632 | 
633 |           CLUSTER = '${ECSCluster}'
634 |           ASG_NAME = '${ECSAutoScalingGroup}'
635 | 
636 |           def aws(svc):
637 |               return boto3.client(svc, region_name='${AWS::Region}')
638 | 
639 |           AS = aws('autoscaling')
640 |           EC2 = aws('ec2')
641 |           ECS = aws('ecs')
642 | 
643 |           def get_instance_timeout():
644 |               return datetime.datetime.now(dateutil.tz.tzlocal()) - datetime.timedelta(seconds=720)
645 | 
646 |           def get_last_activity(instance):
647 |               ret = instance['registeredAt']
648 |               paginator = ECS.get_paginator('list_tasks')
649 |               for list_resp in paginator.paginate(cluster=CLUSTER,
650 |                                                   containerInstance=instance['containerInstanceArn'],
651 |                                                   desiredStatus='STOPPED'):
652 |                   if not list_resp['taskArns']:
653 |                       break
654 | 
655 |                   task_resp = ECS.describe_tasks(cluster=CLUSTER,
656 |                                                  tasks=list_resp['taskArns'])
657 |                   for task in task_resp['tasks']:
658 |                       ret = max(ret, task['stoppedAt'])
659 | 
660 |               return ret
661 | 
662 |           def terminate_ec2_instance(instance_id):
663 |               print('Terminating %s' % (instance_id))
664 |               AS.terminate_instance_in_auto_scaling_group(InstanceId=instance_id,
665 |                                                           ShouldDecrementDesiredCapacity=True)
666 | 
667 |           def drain_ecs_instances(all_arns):
668 |               for arn in all_arns:
669 |                   print('Draining %s' % (arn))
670 |                   ECS.update_container_instances_state(cluster=CLUSTER,
671 |                                                        containerInstances=[arn],
672 |                                                        status='DRAINING')
673 | 
674 |           def drain_ec2_instances(instance_ids):
675 |               res = ECS.list_container_instances(cluster=CLUSTER,
676 |                                                  filter='ec2InstanceId in [%s]' % (','.join(instance_ids)))
677 |               if not res['containerInstanceArns']:
678 |                   return
679 | 
680 |               drain_ecs_instances(res['containerInstanceArns'])
681 | 
682 |           def process_ecs_instances():
683 |               ec2_instance_ids = set()
684 |               paginator = ECS.get_paginator('list_container_instances')
685 |               for list_resp in paginator.paginate(cluster=CLUSTER):
686 |                   arns = list_resp['containerInstanceArns']
687 |                   if not arns:
688 |                       continue
689 | 
690 |                   res = ECS.describe_container_instances(cluster=CLUSTER,
691 |                                                          containerInstances=arns)
692 |                   for instance in res['containerInstances']:
693 |                       ec2_instance_ids.add(instance['ec2InstanceId'])
694 | 
695 |                       if instance['runningTasksCount'] != 0:
696 |                           continue
697 | 
698 |                       if instance['status'] == 'DRAINING':
699 |                           terminate_ec2_instance(instance['ec2InstanceId'])
700 |                       elif get_last_activity(instance) < get_instance_timeout():
701 |                           drain_ecs_instances([instance['containerInstanceArn']])
702 | 
703 |               return ec2_instance_ids
704 | 
705 |           def terminate_bad_ec2_instances(instance_ids):
706 |               instances = EC2.describe_instances(InstanceIds=instance_ids)
707 |               for instance in instances['Reservations'][0]['Instances']:
708 |                   if instance['LaunchTime'] < get_instance_timeout():
709 |                       print('Instance %s failed to register with ECS' % (instance['InstanceId']))
710 |                       terminate_ec2_instance(instance['InstanceId'])
711 | 
712 |           def drain_or_terminate_instances(ec2_instances_in_ecs):
713 |               res = AS.describe_auto_scaling_groups(AutoScalingGroupNames=[ASG_NAME])
714 |               if not res['AutoScalingGroups']:
715 |                   return
716 | 
717 |               instance_to_drain = []
718 |               possibly_bad = []
719 |               for ec2 in res['AutoScalingGroups'][0]['Instances']:
720 |                   if ec2['InstanceId'] not in ec2_instances_in_ecs:
721 |                       possibly_bad.append(ec2['InstanceId'])
722 | 
723 |                   if 'LaunchConfigurationName' not in ec2:
724 |                       instance_to_drain.append(ec2['InstanceId'])
725 | 
726 |               if instance_to_drain:
727 |                   drain_ec2_instances(instance_to_drain)
728 | 
729 |               if possibly_bad:
730 |                   terminate_bad_ec2_instances(possibly_bad)
731 | 
732 |           def lambda_handler(event, context):
733 |               ec2_instances_in_ecs = process_ecs_instances()
734 |               drain_or_terminate_instances(ec2_instances_in_ecs)
735 | 
736 |   TerminateInstancesRole:
737 |     Type: AWS::IAM::Role
738 |     Properties:
739 |       Policies:
740 |         - PolicyName: !Sub ${AWS::StackName}-asg
741 |           PolicyDocument:
742 |             Version: 2012-10-17
743 |             Statement:
744 |               - Action:
745 |                   - autoscaling:DescribeAutoScalingGroups
746 |                   - autoscaling:TerminateInstanceInAutoScalingGroup
747 |                 Effect: Allow
748 |                 Resource: "*"
749 |         - PolicyName: !Sub ${AWS::StackName}-ec2
750 |           PolicyDocument:
751 |             Version: 2012-10-17
752 |             Statement:
753 |               - Action:
754 |                   - ec2:DescribeInstances
755 |                 Effect: Allow
756 |                 Resource: "*"
757 |         - PolicyName: !Sub ${AWS::StackName}-ecs
758 |           PolicyDocument:
759 |             Version: 2012-10-17
760 |             Statement:
761 |               - Action:
762 |                   - ecs:DescribeContainerInstances
763 |                   - ecs:DescribeTasks
764 |                   - ecs:ListContainerInstances
765 |                   - ecs:ListTasks
766 |                   - ecs:UpdateContainerInstancesState
767 |                 Effect: Allow
768 |                 Resource: "*"
769 |         - PolicyName: !Sub ${AWS::StackName}-logs
770 |           PolicyDocument:
771 |             Version: 2012-10-17
772 |             Statement:
773 |               - Action:
774 |                   - logs:CreateLogGroup
775 |                   - logs:CreateLogStream
776 |                   - logs:PutLogEvents
777 |                   - logs:DescribeLogGroups
778 |                   - logs:DescribeLogStreams
779 |                 Effect: Allow
780 |                 Resource: "*"
781 |       AssumeRolePolicyDocument:
782 |         Version: 2012-10-17
783 |         Statement:
784 |           - Effect: Allow
785 |             Principal:
786 |               Service:
787 |                 - lambda.amazonaws.com
788 |             Action:
789 |               - sts:AssumeRole
790 | 
791 |   TerminateInstancesRule:
792 |     Type: AWS::Events::Rule
793 |     Properties:
794 |       ScheduleExpression: !Ref TerminateInstancesScheduleExpression
795 |       Targets:
796 |         - Id: !Sub ${AWS::StackName}-terminator
797 |           Arn: !GetAtt TerminateInstancesFunction.Arn
798 | 
799 |   TerminateInstancesInvokePermission:
800 |     Type: AWS::Lambda::Permission
801 |     Properties:
802 |       FunctionName: !GetAtt TerminateInstancesFunction.Arn
803 |       Action: lambda:InvokeFunction
804 |       Principal: events.amazonaws.com
805 |       SourceArn: !GetAtt TerminateInstancesRule.Arn
806 | 
807 | Outputs:
808 |   VPC:
809 |     Description: VPC ID
810 |     Value: !Ref VPC
811 |     Export:
812 |       Name: !Sub ${AWS::StackName}-VPC
813 | 
814 |   ECSHostSecurityGroup:
815 |     Description: Allow access from the internal network.
816 |     Value: !Ref ECSHostSecurityGroup
817 |     Export:
818 |       Name: !Sub ${AWS::StackName}-ecs-host-security-group
819 | 
820 |   ECSAutoScalingGroupName:
821 |     Description: Autoscaling group for the cluster.
822 |     Value: !Ref ECSAutoScalingGroup
823 |     Export:
824 |       Name: !Sub ${AWS::StackName}-auto-scaling-group
825 | 
826 |   StartTasksQueueUrl:
827 |     Description: SQS queue URL for submitting jobs to start.
828 |     Value: !Ref StartTasksQueue
829 |     Export:
830 |       Name: !Sub ${AWS::StackName}-start-tasks-queue-url
831 | 
832 |   StartTasksQueueArn:
833 |     Description: SQS queue ARN for submitting jobs to start.
834 |     Value: !GetAtt StartTasksQueue.Arn
835 |     Export:
836 |       Name: !Sub ${AWS::StackName}-start-tasks-queue-arn
837 | 
838 |   StartTasksDeadLetterQueueUrl:
839 |     Description: SQS dead letter queue URL for jobs that failed to launch.
840 |     Value: !Ref StartTasksDeadLetterQueue
841 |     Export:
842 |       Name: !Sub ${AWS::StackName}-start-tasks-dead-queue-url
843 | 
844 |   StartTasksDeadLetterQueueArn:
845 |     Description: SQS dead letter queue ARN for jobs that failed to launch.
846 |     Value: !GetAtt StartTasksDeadLetterQueue.Arn
847 |     Export:
848 |       Name: !Sub ${AWS::StackName}-start-tasks-dead-queue-arn
849 | 


--------------------------------------------------------------------------------
/ECS-Web-Cluster.template:
--------------------------------------------------------------------------------
   1 | Description: >
   2 |   Basic ECS cluster with an auto scaling group, internet-facing, and internal
   3 |   application load balancers.
   4 | 
   5 | Parameters:
   6 |   InstanceType:
   7 |     Description: The type of instances to use in the ECS cluster. See https://www.ec2instances.info/
   8 |     Type: String
   9 | 
  10 |   MinClusterSize:
  11 |     Description: Minimum number of instances in the ECS cluster.
  12 |     Type: Number
  13 | 
  14 |   MaxClusterSize:
  15 |     Description: Maximum number of instances in the ECS cluster.
  16 |     Type: Number
  17 | 
  18 |   MaxBatchSize:
  19 |     Description: Maximum number of instances to resize in the ECS cluster.
  20 |     Type: Number
  21 |     Default: 1
  22 | 
  23 |   LargestContainerCpuReservation:
  24 |     Description: The CPU reservation for the largest container that will run in the cluster.
  25 |                  LargestContainerCpuReservation and LargestContainerMemoryReservation is used to
  26 |                  publish a custom CloudWatch metric called SchedulableContainers that is used by
  27 |                  instance autoscaling.
  28 |     Type: Number
  29 |     Default: 1024
  30 | 
  31 |   LargestContainerMemoryReservation:
  32 |     Description: The memory reservation for the largest container that will run in the cluster.
  33 |                  LargestContainerCpuReservation and LargestContainerMemoryReservation is used to
  34 |                  publish a custom CloudWatch metric called SchedulableContainers that is used by
  35 |                  instance autoscaling.
  36 |     Type: Number
  37 |     Default: 4096
  38 | 
  39 |   ClusterScaleUpAdjustment:
  40 |     Description: The scaling adjustment during a scale up event. ClusterScaleUpAdjustmentType
  41 |                  determines the unit.
  42 |     Type: String
  43 | 
  44 |   ClusterScaleUpAdjustmentType:
  45 |     Description: The scale up adjustment type.
  46 |     Type: String
  47 |     AllowedValues: [ChangeInCapacity, PercentChangeInCapacity]
  48 |     Default: ChangeInCapacity
  49 | 
  50 |   ClusterScaleUpCooldown:
  51 |     Description: The amount of time, in seconds, after a scale up activity completes before any
  52 |                  further trigger-related scaling activities can start.
  53 |     Type: String
  54 | 
  55 |   ClusterScaleUpMins:
  56 |     Description: The number of minutes that the SchedulableContainers metric is above
  57 |                  ClusterScaleUpThreshold before scaling up the instances.
  58 |     Type: Number
  59 | 
  60 |   ClusterScaleUpThreshold:
  61 |     Description: The number of SchedulableContainers before scaling the cluster up.
  62 |     Type: Number
  63 | 
  64 |   ClusterScaleDownAdjustment:
  65 |     Description: The scaling adjustment during a scale down event. ClusterScaleDownAdjustmentType
  66 |                  determines the unit.
  67 |     Type: String
  68 | 
  69 |   ClusterScaleDownAdjustmentType:
  70 |     Description: The scale down adjustment type.
  71 |     Type: String
  72 |     AllowedValues: [ChangeInCapacity, PercentChangeInCapacity]
  73 |     Default: ChangeInCapacity
  74 | 
  75 |   ClusterScaleDownCooldown:
  76 |     Description: The amount of time, in seconds, after a scale down activity completes before any
  77 |                  further trigger-related scaling activities can start.
  78 |     Type: String
  79 | 
  80 |   ClusterScaleDownMins:
  81 |     Description: The number of SchedulableContainers before scaling the cluster down.
  82 |     Type: Number
  83 | 
  84 |   ClusterScaleDownThreshold:
  85 |     Description: The number of SchedulableContainers before scaling the cluster down.
  86 |     Type: Number
  87 | 
  88 |   VPC:
  89 |     Description: VPC this ECS cluster will be be deployed to.
  90 |     Type: AWS::EC2::VPC::Id
  91 | 
  92 |   AmiId:
  93 |     Description: AMI ID for the EC2 instances.
  94 |     Type: String
  95 | 
  96 |   KeyName:
  97 |     Description: Name of an existing EC2 KeyPair to enable SSH access to the ECS instances.
  98 |     Type: AWS::EC2::KeyPair::KeyName
  99 | 
 100 |   AdditonalEC2SecurityGroups:
 101 |     Description: Optional comma separated list of additional security groups to add to the EC2
 102 |                  instances.
 103 |     Type: String
 104 |     Default: ""
 105 | 
 106 |   InternetFacingELBSecurityGroups:
 107 |     Description: Optional comma separated list of internet facing security groups to add to the LoadBalancer to make it restricted.
 108 |     Type: String
 109 |     Default: ""
 110 | 
 111 |   EbsVolumeSize:
 112 |     Description: Size of the EBS volumes on each instance.
 113 |     Type: String
 114 | 
 115 |   DockerVolumeSize:
 116 |     Description: Size of the Docker volumes on each instance.
 117 |     Type: String
 118 |     Default: 50
 119 | 
 120 |   CreateInternetFacingLoadBalancer:
 121 |     Description: Whether or not to create the internal load balancer.
 122 |     Type: String
 123 |     AllowedValues: [true, false]
 124 |     Default: true
 125 | 
 126 |   VpcInternetFacingSubnets:
 127 |     Description: Subnets that the internet-facing load balancers will use. This is not used if
 128 |                  the CreateInternetFacingLoadBalancer parameter is false.
 129 |     Type: List<AWS::EC2::Subnet::Id>
 130 | 
 131 |   CreateInternalLoadBalancer:
 132 |     Description: Whether or not to create the internet facing load balancer.
 133 |     Type: String
 134 |     AllowedValues: [true, false]
 135 |     Default: true
 136 | 
 137 |   VpcInternalSubnets:
 138 |     Description: Subnets that the internal load balancers (if enabled by CreateInternalLoadBalancer)
 139 |                  and EC2 instances will be placed in.
 140 |     Type: List<AWS::EC2::Subnet::Id>
 141 | 
 142 |   SslCertificateId:
 143 |     Description: SSL Certificate ID for the HTTPS load balancer(s).
 144 |     Type: String
 145 | 
 146 |   OwnerTag:
 147 |     Description: Owner tag for the EC2 instances and EBS volumes.
 148 |     Type: String
 149 | 
 150 |   ProductTag:
 151 |     Description: Product tag for the EC2 instances and EBS volumes.
 152 |     Type: String
 153 | 
 154 |   ComponentTag:
 155 |     Description: Component tag for the EC2 instances and EBS volumes.
 156 |     Type: String
 157 | 
 158 |   EnvironmentTag:
 159 |     Description: Environment tag for the EC2 instances and EBS volumes.
 160 |     Type: String
 161 |     AllowedValues: [dev, qa, prod]
 162 |     ConstraintDescription: Please choose either dev, qa or prod
 163 | 
 164 |   SpotEnabled:
 165 |     Description: Set to true if you want to use autospotting to manage spot instances.
 166 |     Type: String
 167 | 
 168 |   MinOnDemandPercentage:
 169 |     Description: Minimum percentage of on-demand instances in the cluster. This is only used
 170 |                  if the SpotEnabled parameter is true. This tag is used by autospotting.
 171 |     Type: String
 172 | 
 173 |   ChaosMonkey:
 174 |     Description: The value of the chaos_monkey tag for Netflix's Chaos Monkey.
 175 |     Type: String
 176 |     AllowedValues: [true, false]
 177 |     Default: true
 178 | 
 179 |   NewRelicLicenseKey:
 180 |     Description: New Relic license key for the infrastructure monitoring.
 181 |     Type: String
 182 | 
 183 |   Task1ToStartOnAllInstances:
 184 |     Description: Task definition 1 to start on all EC2 instances
 185 |     Type: String
 186 |     Default: ""
 187 | 
 188 |   LogsS3BucketName:
 189 |     Description: The optional name of the S3 bucket that the ALB access logs will be sent to.
 190 |                  Leave blank if you want CloudFormation to pick a name for you based on the name
 191 |                  of your ECS cluster. You typically want to set this if you are deploying a
 192 |                  cluster to multiple regions with the same name and the default naming scheme
 193 |                  currently doesn't include the region name and S3 bucket names must be globally
 194 |                  unique.
 195 |     Type: String
 196 |     Default: ""
 197 | 
 198 |   PurgeS3LogsOnStackDelete:
 199 |     Description: Set this to true if this is a test cluster and you want the logs S3 bucket to be
 200 |                  automatically purged so that the CloudFormation stack delete operation does not
 201 |                  fail.
 202 |     Type: String
 203 |     AllowedValues: [true, false]
 204 |     Default: false
 205 | 
 206 |   DefaultHttpRedirect:
 207 |     Description: The URL that the user will be redirected to when the requested hostname is not
 208 |                  configured in the ALB. If this property is not specified, then the user will see
 209 |                  a HTTP 503 error. This requires DefaultHttpRedirectImage to be set.
 210 |     Type: String
 211 |     Default: ""
 212 | 
 213 |   DefaultHttpRedirectImage:
 214 |     Description: Docker image that provides the HTTP direct service. See nginx-redirect directory
 215 |                  for an example.
 216 |     Type: String
 217 |     Default: ""
 218 | 
 219 |   ASGTerminateTimeout:
 220 |     Description: The number of seconds to wait for existing ECS tasks to drain before forcefully
 221 |                  terminating the EC2 instance. This occurs when upgrading to a newer version of an
 222 |                  AMI, or when the cluster is scaled down.
 223 |     Type: Number
 224 |     Default: 900
 225 | 
 226 |   ContainerDeviceManagerSize:
 227 |     Description: The amount of disk space to allocate in device manager to each running container.
 228 |                  Currently ECS does not allow configuring this on a per-container basis.
 229 |     Type: String
 230 |     Default: "10"
 231 | 
 232 | Conditions:
 233 |   HasAdditonalEC2SecurityGroups: !Not [ !Equals [!Ref AdditonalEC2SecurityGroups, '']]
 234 |   HasInternetFacingELBSecurityGroups: !Not [ !Equals [!Ref InternetFacingELBSecurityGroups, '']]
 235 |   HasInternalLoadBalancer: !Equals [!Ref CreateInternalLoadBalancer, true]
 236 |   HasInternetFacingLoadBalancer: !Equals [!Ref CreateInternetFacingLoadBalancer, true]
 237 |   HasLoadBalancerResources: !Or [Condition: HasInternalLoadBalancer,
 238 |                                  Condition: HasInternetFacingLoadBalancer]
 239 |   HasPurgeS3LogsOnStackDelete: !And [!Equals [!Ref PurgeS3LogsOnStackDelete, true],
 240 |                                      Condition: HasLoadBalancerResources]
 241 |   HasLogsS3BucketName: !Not [ !Equals [!Ref LogsS3BucketName, '']]
 242 |   HasDefaultHttpRedirect: !And [!Not [ !Equals [!Ref DefaultHttpRedirect, '']],
 243 |                                 Condition: HasLoadBalancerResources]
 244 | 
 245 | Resources:
 246 |   ECSCluster:
 247 |     Type: AWS::ECS::Cluster
 248 |     Properties:
 249 |       ClusterName: !Ref AWS::StackName
 250 | 
 251 |   ECSAutoScalingGroup:
 252 |     Type: AWS::AutoScaling::AutoScalingGroup
 253 |     Properties:
 254 |       VPCZoneIdentifier: !Ref VpcInternalSubnets
 255 |       LaunchConfigurationName: !Ref ECSLaunchConfiguration
 256 |       MinSize: !Ref MinClusterSize
 257 |       MaxSize: !Ref MaxClusterSize
 258 |       MetricsCollection:
 259 |         - Granularity: 1Minute
 260 |       NotificationConfigurations:
 261 |       - TopicARN: !Ref ASGTerminateSNSTopic
 262 |         NotificationTypes:
 263 |         - autoscaling:EC2_INSTANCE_TERMINATE
 264 |       Tags:
 265 |         - Key: Name
 266 |           Value: !Ref AWS::StackName
 267 |           PropagateAtLaunch: true
 268 |         - Key: owner
 269 |           Value: !Ref OwnerTag
 270 |           PropagateAtLaunch: true
 271 |         - Key: product
 272 |           Value: !Ref ProductTag
 273 |           PropagateAtLaunch: true
 274 |         - Key: component
 275 |           Value: !Ref ComponentTag
 276 |           PropagateAtLaunch: true
 277 |         - Key: environment
 278 |           Value: !Ref EnvironmentTag
 279 |           PropagateAtLaunch: true
 280 |         - Key: spot-enabled
 281 |           Value: !Ref SpotEnabled
 282 |           PropagateAtLaunch: false
 283 |         - Key: autospotting_min_on_demand_percentage
 284 |           Value: !Ref MinOnDemandPercentage
 285 |           PropagateAtLaunch: false
 286 |         - Key: chaos_monkey
 287 |           Value: !Ref ChaosMonkey
 288 |           PropagateAtLaunch: true
 289 |     CreationPolicy:
 290 |       ResourceSignal:
 291 |         Timeout: PT15M
 292 |     UpdatePolicy:
 293 |       AutoScalingRollingUpdate:
 294 |         MinInstancesInService: 1
 295 |         MaxBatchSize: !Ref MaxBatchSize
 296 |         PauseTime: PT15M
 297 |         WaitOnResourceSignals: true
 298 | 
 299 |   ScaleUpPolicy:
 300 |     Type: AWS::AutoScaling::ScalingPolicy
 301 |     Properties:
 302 |       AdjustmentType: !Ref ClusterScaleUpAdjustmentType
 303 |       PolicyType: SimpleScaling
 304 |       Cooldown: !Ref ClusterScaleUpCooldown
 305 |       AutoScalingGroupName:
 306 |         Ref: ECSAutoScalingGroup
 307 |       ScalingAdjustment: !Ref ClusterScaleUpAdjustment
 308 | 
 309 |   ScaleDownPolicy:
 310 |     Type: AWS::AutoScaling::ScalingPolicy
 311 |     Properties:
 312 |       AdjustmentType: !Ref ClusterScaleDownAdjustmentType
 313 |       PolicyType: SimpleScaling
 314 |       Cooldown: !Ref ClusterScaleDownCooldown
 315 |       AutoScalingGroupName:
 316 |         Ref: ECSAutoScalingGroup
 317 |       ScalingAdjustment: !Ref ClusterScaleDownAdjustment
 318 | 
 319 |   SchedulableContainersLow:
 320 |     Type: AWS::CloudWatch::Alarm
 321 |     Properties:
 322 |       EvaluationPeriods: !Ref ClusterScaleUpMins
 323 |       Statistic: Average
 324 |       Threshold: !Ref ClusterScaleUpThreshold
 325 |       AlarmDescription: Scale up if the SchedulableContainers metric is low
 326 |       Period: '60'
 327 |       AlarmActions:
 328 |       - Ref: ScaleUpPolicy
 329 |       Namespace: AWS/ECS
 330 |       Dimensions:
 331 |       - Name: ClusterName
 332 |         Value:
 333 |           Ref: ECSCluster
 334 |       ComparisonOperator: LessThanThreshold
 335 |       MetricName: SchedulableContainers
 336 | 
 337 |   SchedulableContainersHigh:
 338 |     Type: AWS::CloudWatch::Alarm
 339 |     Properties:
 340 |       EvaluationPeriods: !Ref ClusterScaleDownMins
 341 |       Statistic: Maximum
 342 |       Threshold: !Ref ClusterScaleDownThreshold
 343 |       AlarmDescription: Scale down if the SchedulableContainers metric is high
 344 |       Period: '60'
 345 |       AlarmActions:
 346 |       - Ref: ScaleDownPolicy
 347 |       Namespace: AWS/ECS
 348 |       Dimensions:
 349 |       - Name: ClusterName
 350 |         Value:
 351 |           Ref: ECSCluster
 352 |       ComparisonOperator: GreaterThanThreshold
 353 |       MetricName: SchedulableContainers
 354 | 
 355 |   ELBSecurityGroup:
 356 |     Type: AWS::EC2::SecurityGroup
 357 |     Metadata:
 358 |       cfn_nag:
 359 |         rules_to_suppress:
 360 |           - id: W2
 361 |             reason: "This is a public facing ELB and all traffic should be permitted."
 362 |           - id: W5
 363 |             reason: "This is a public facing ELB and all traffic should be permitted."
 364 |           - id: W9
 365 |             reason: "This is a public facing ELB and all traffic should be permitted."
 366 |     Properties:
 367 |       VpcId: !Ref VPC
 368 |       GroupDescription: Allow access to the EC2 instances from the ELB
 369 |       SecurityGroupEgress:
 370 |         - CidrIp: 0.0.0.0/0
 371 |           IpProtocol: -1
 372 | 
 373 |   ECSHostSecurityGroup:
 374 |     Type: AWS::EC2::SecurityGroup
 375 |     Metadata:
 376 |       cfn_nag:
 377 |         rules_to_suppress:
 378 |           - id: W5
 379 |             reason: "Allow all outbound network traffic."
 380 |           - id: W9
 381 |             reason: "Allow direct access to the ECS services from the internal network."
 382 |     Properties:
 383 |       VpcId: !Ref VPC
 384 |       GroupDescription: Allow access from the ELB and internet network.
 385 |       SecurityGroupIngress:
 386 |         - SourceSecurityGroupId: !Ref ELBSecurityGroup
 387 |           IpProtocol: -1
 388 |         - CidrIp: 10.0.0.0/8
 389 |           IpProtocol: -1
 390 |         - CidrIp: 192.168.0.0/16
 391 |           IpProtocol: -1
 392 |       SecurityGroupEgress:
 393 |         - CidrIp: 0.0.0.0/0
 394 |           IpProtocol: -1
 395 | 
 396 |   ECSLaunchConfiguration:
 397 |     Type: AWS::AutoScaling::LaunchConfiguration
 398 |     Properties:
 399 |       ImageId: !Ref AmiId
 400 |       InstanceType: !Ref InstanceType
 401 |       SecurityGroups:
 402 |         'Fn::If':
 403 |           - HasAdditonalEC2SecurityGroups
 404 |           - !Split [',', !Join [',', [!Ref AdditonalEC2SecurityGroups, !Ref ECSHostSecurityGroup]]]
 405 |           - !Split [',', !Ref ECSHostSecurityGroup]
 406 |       IamInstanceProfile: !Ref ECSInstanceProfile
 407 |       KeyName: !Ref 'KeyName'
 408 |       BlockDeviceMappings:
 409 |         - DeviceName: "/dev/xvda"
 410 |           Ebs:
 411 |             VolumeSize: !Ref EbsVolumeSize
 412 |             VolumeType: gp2
 413 |         - DeviceName: "/dev/xvdcz"
 414 |           Ebs:
 415 |             VolumeSize: !Ref DockerVolumeSize
 416 |             VolumeType: gp2
 417 | 
 418 |       UserData:
 419 |         "Fn::Base64": !Sub |
 420 |           #!/bin/bash
 421 | 
 422 |           # Increment version number below to force new instances in the cluster.
 423 |           # This is intentionally not a CloudFormation parameter.
 424 |           # Version: 1
 425 | 
 426 |           yum install -y python36 python36-pip
 427 |           pip-3.6 install boto3
 428 | 
 429 |           /opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} \
 430 |                         --resource ECSLaunchConfiguration
 431 |           /opt/aws/bin/cfn-signal -e $? --region ${AWS::Region} --stack ${AWS::StackName} \
 432 |                         --resource ECSAutoScalingGroup
 433 | 
 434 |           yum update newrelic-infra -y
 435 |           echo "license_key: ${NewRelicLicenseKey}" > /etc/newrelic-infra.yml
 436 |           initctl start newrelic-infra
 437 | 
 438 |           vgextend docker /dev/xvdcz
 439 |           lvextend -L+${DockerVolumeSize}G /dev/docker/docker-pool
 440 | 
 441 |     Metadata:
 442 |       AWS::CloudFormation::Init:
 443 |         config:
 444 |           commands:
 445 |             01_add_ebs_tags:
 446 |               command: /usr/local/bin/create-ebs-tags.py
 447 |             02_docker_storage_opt:
 448 |               command: !Sub echo 'OPTIONS="$OPTIONS --storage-opt dm.basesize=${ContainerDeviceManagerSize}G"' >> /etc/sysconfig/docker
 449 | 
 450 |           files:
 451 |             "/etc/ecs/ecs.config":
 452 |               mode: "000644"
 453 |               owner: root
 454 |               group: root
 455 |               content: !Sub |
 456 |                ECS_CLUSTER=${ECSCluster}
 457 |                ECS_AVAILABLE_LOGGING_DRIVERS=["splunk","awslogs","json-file"]
 458 | 
 459 |             "/etc/init/spot-watcher.conf":
 460 |               mode: "000644"
 461 |               owner: root
 462 |               group: root
 463 |               content: !Sub |
 464 |                 description "Set instance to draining when spot instance is about to be terminated."
 465 |                 start on stopped rc RUNLEVEL=[345]
 466 |                 exec /usr/local/bin/spot-watcher.py
 467 | 
 468 |             "/usr/local/bin/spot-watcher.py":
 469 |               mode: "000755"
 470 |               owner: root
 471 |               group: root
 472 |               content: !Sub |
 473 |                 #!/usr/bin/env python3
 474 | 
 475 |                 import json
 476 |                 import time
 477 |                 import urllib
 478 |                 import boto3
 479 | 
 480 |                 CLUSTER = '${AWS::StackName}'
 481 |                 REGION = '${AWS::Region}'
 482 | 
 483 |                 def spot_watcher():
 484 |                     while not is_scheduled_for_termination():
 485 |                         time.sleep(5)
 486 | 
 487 |                     url = 'http://localhost:51678/v1/metadata'
 488 |                     body = urllib.request.urlopen(url).read().decode('UTF-8')
 489 |                     instance_arn = json.loads(body)['ContainerInstanceArn']
 490 | 
 491 |                     client = boto3.client('ecs', region_name=REGION)
 492 |                     print('Setting containers on instance %s to draining.' % (instance_arn))
 493 |                     client.update_container_instances_state(cluster=CLUSTER,
 494 |                                                             containerInstances=[instance_arn],
 495 |                                                             status='DRAINING')
 496 | 
 497 |                 def is_scheduled_for_termination():
 498 |                     url = 'http://169.254.169.254/latest/meta-data/spot/termination-time'
 499 |                     try:
 500 |                         urllib.request.urlopen(url).read()
 501 |                         return True
 502 |                     except Exception:
 503 |                         return False
 504 | 
 505 |                 if __name__ == '__main__':
 506 |                     spot_watcher()
 507 | 
 508 |             "/etc/init/task1.conf":
 509 |               mode: "000644"
 510 |               owner: root
 511 |               group: root
 512 |               content: !Sub |
 513 |                 description "Task 1"
 514 |                 start on started ecs
 515 |                 script
 516 |                   # Wait for ECS agent to start
 517 |                   sleep 5
 518 |                   /usr/local/bin/start-ecs-task.py "${Task1ToStartOnAllInstances}"
 519 |                 end script
 520 |                 respawn limit 10 10
 521 | 
 522 |             "/usr/local/bin/start-ecs-task.py":
 523 |               mode: "000755"
 524 |               owner: root
 525 |               group: root
 526 |               content: !Sub |
 527 |                 #!/usr/bin/env python3
 528 | 
 529 |                 import json
 530 |                 import sys
 531 |                 import urllib
 532 |                 import boto3
 533 | 
 534 |                 CLUSTER = '${AWS::StackName}'
 535 |                 REGION = '${AWS::Region}'
 536 | 
 537 |                 def start_task(task_arn):
 538 |                     url = 'http://localhost:51678/v1/metadata'
 539 |                     body = urllib.request.urlopen(url).read().decode('UTF-8')
 540 |                     instance_arn = json.loads(body)['ContainerInstanceArn']
 541 | 
 542 |                     client = boto3.client('ecs', region_name=REGION)
 543 |                     print('Starting task %s on instance %s.' % (task_arn, instance_arn))
 544 |                     client.start_task(cluster=CLUSTER,
 545 |                                       taskDefinition=task_arn,
 546 |                                       containerInstances=[instance_arn])
 547 | 
 548 |                 if __name__ == '__main__':
 549 |                     if len(sys.argv) == 1 or sys.argv[1] == '' or sys.argv[1] == '-':
 550 |                         sys.exit(0)
 551 | 
 552 |                     start_task(sys.argv[1])
 553 | 
 554 |             "/usr/local/bin/create-ebs-tags.py":
 555 |               mode: "000755"
 556 |               owner: root
 557 |               group: root
 558 |               content: !Sub |
 559 |                 #!/usr/bin/env python3
 560 | 
 561 |                 import urllib
 562 |                 import boto3
 563 | 
 564 |                 def tag_ebs_volumes():
 565 |                     client = boto3.client('ec2', region_name='${AWS::Region}')
 566 |                     url = 'http://169.254.169.254/latest/meta-data/instance-id'
 567 |                     instance_id = urllib.request.urlopen(url).read().decode('UTF-8')
 568 |                     response = client.describe_volumes(Filters=[{'Name': 'attachment.instance-id',
 569 |                                                                  'Values': [instance_id]}])
 570 |                     for volume in response['Volumes']:
 571 |                         print('Tagging %s' % (volume['VolumeId']))
 572 |                         client.create_tags(Resources=[volume['VolumeId']],
 573 |                                            Tags=[{'Key': 'Name', 'Value': '${AWS::StackName}'},
 574 |                                                  {'Key': 'product', 'Value': '${ProductTag}'},
 575 |                                                  {'Key': 'component', 'Value': '${ComponentTag}'},
 576 |                                                  {'Key': 'owner', 'Value': '${OwnerTag}'},
 577 |                                                  {'Key': 'environment', 'Value': '${EnvironmentTag}'}])
 578 | 
 579 |                 if __name__ == '__main__':
 580 |                     tag_ebs_volumes()
 581 | 
 582 |             "/etc/cfn/cfn-hup.conf":
 583 |               mode: "000400"
 584 |               owner: root
 585 |               group: root
 586 |               content: !Sub |
 587 |                 [main]
 588 |                 stack=${AWS::StackId}
 589 |                 region=${AWS::Region}
 590 | 
 591 |             "/etc/cfn/hooks.d/cfn-auto-reloader.conf":
 592 |               content: !Sub |
 593 |                 [cfn-auto-reloader-hook]
 594 |                 triggers=post.update
 595 |                 path=Resources.ECSLaunchConfiguration.Metadata.AWS::CloudFormation::Init
 596 |                 action=/opt/aws/bin/cfn-init -v --region ${AWS::Region} --stack ${AWS::StackName} --resource ECSLaunchConfiguration
 597 | 
 598 |           services:
 599 |             sysvinit:
 600 |               cfn-hup:
 601 |                 enabled: true
 602 |                 ensureRunning: true
 603 |                 files:
 604 |                   - /etc/cfn/cfn-hup.conf
 605 |                   - /etc/cfn/hooks.d/cfn-auto-reloader.conf
 606 | 
 607 |   ECSRole:
 608 |     Type: AWS::IAM::Role
 609 |     Properties:
 610 |       Path: /
 611 |       RoleName: !Sub ${AWS::StackName}-ECSRole-${AWS::Region}
 612 |       AssumeRolePolicyDocument:
 613 |         Version: 2012-10-17
 614 |         Statement:
 615 |           - Effect: Allow
 616 |             Principal:
 617 |               Service:
 618 |                 - ec2.amazonaws.com
 619 |             Action:
 620 |               - sts:AssumeRole
 621 |       ManagedPolicyArns:
 622 |         - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role
 623 |       Policies:
 624 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-logging
 625 |           PolicyDocument:
 626 |             Version: 2012-10-17
 627 |             Statement:
 628 |               - Effect: Allow
 629 |                 Action:
 630 |                   - logs:CreateLogGroup
 631 |                   - logs:CreateLogStream
 632 |                   - logs:PutLogEvents
 633 |                   - logs:DescribeLogGroups
 634 |                   - logs:DescribeLogStreams
 635 |                 Resource:
 636 |                   - "*"
 637 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-ebs-tags
 638 |           PolicyDocument:
 639 |             Version: 2012-10-17
 640 |             Statement:
 641 |               - Effect: Allow
 642 |                 Action:
 643 |                   - ec2:DescribeVolumes
 644 |                   - ec2:CreateTags
 645 |                 Resource:
 646 |                   - "*"
 647 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-spot
 648 |           PolicyDocument:
 649 |             Version: 2012-10-17
 650 |             Statement:
 651 |               - Effect: Allow
 652 |                 Action:
 653 |                   - ecs:UpdateContainerInstancesState
 654 |                 Resource:
 655 |                   - "*"
 656 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-cfn-init
 657 |           PolicyDocument:
 658 |             Version: 2012-10-17
 659 |             Statement:
 660 |               - Effect: Allow
 661 |                 Action:
 662 |                   - cloudformation:DescribeStackResource
 663 |                   - cloudformation:SignalResource
 664 |                 Resource:
 665 |                   - "*"
 666 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-start-task
 667 |           PolicyDocument:
 668 |             Version: 2012-10-17
 669 |             Statement:
 670 |               - Effect: Allow
 671 |                 Action:
 672 |                   - ecs:StartTask
 673 |                 Resource:
 674 |                   - "*"
 675 |         - PolicyName: !Sub ecs-service-${AWS::StackName}-mon-put-data
 676 |           PolicyDocument:
 677 |             Version: 2012-10-17
 678 |             Statement:
 679 |               - Effect: Allow
 680 |                 Action:
 681 |                   - cloudwatch:PutMetricData
 682 |                 Resource:
 683 |                   - "*"
 684 | 
 685 |   ECSInstanceProfile:
 686 |     Type: AWS::IAM::InstanceProfile
 687 |     Properties:
 688 |       Path: /
 689 |       Roles:
 690 |         - !Ref ECSRole
 691 | 
 692 |   LogsS3Bucket:
 693 |     Type: AWS::S3::Bucket
 694 |     Condition: HasLoadBalancerResources
 695 |     Properties:
 696 |       AccessControl: Private
 697 |       BucketName:
 698 |         'Fn::If':
 699 |           - HasLogsS3BucketName
 700 |           - !Ref LogsS3BucketName
 701 |           - !Sub ${AWS::StackName}-${EnvironmentTag}-logs
 702 |       LifecycleConfiguration:
 703 |         Rules:
 704 |           - ExpirationInDays: 365
 705 |             Status: Enabled
 706 |       Tags:
 707 |         - Key: classification
 708 |           Value: internal
 709 | 
 710 |   LogsS3BucketPolicy:
 711 |     Type: AWS::S3::BucketPolicy
 712 |     Condition: HasLoadBalancerResources
 713 |     Properties:
 714 |       Bucket: !Ref LogsS3Bucket
 715 |       PolicyDocument:
 716 |         Statement:
 717 |           - Effect: Allow
 718 |             Principal:
 719 |               AWS:
 720 |                 # Account IDs came can be found at
 721 |                 # https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/enable-access-logs.html
 722 |                 # Currently configures us-east-1, us-east-2, and us-west-2.
 723 |                 - arn:aws:iam::127311923021:root
 724 |                 - arn:aws:iam::033677994240:root
 725 |                 - arn:aws:iam::797873946194:root
 726 |             Action:
 727 |               - s3:PutObject
 728 |               - s3:ListBucket
 729 |             Resource:
 730 |               - !Sub arn:aws:s3:::${LogsS3Bucket}
 731 |               - !Sub arn:aws:s3:::${LogsS3Bucket}/*
 732 | 
 733 |   PurgeS3BucketOnDeleteFunction:
 734 |     Type: AWS::Lambda::Function
 735 |     Condition: HasPurgeS3LogsOnStackDelete
 736 |     Properties:
 737 |       Description: Purge S3 logs when the CloudFormation stack is deleted.
 738 |       Handler: index.lambda_handler
 739 |       Role: !GetAtt PurgeS3BucketOnDeleteRole.Arn
 740 |       Runtime: python3.6
 741 |       MemorySize: 256
 742 |       Timeout: 300
 743 |       Code:
 744 |         ZipFile: !Sub |
 745 |           import time
 746 |           import boto3
 747 |           import cfnresponse
 748 | 
 749 |           BUCKET_NAME = '${LogsS3Bucket}'
 750 |           REGION = '${AWS::Region}'
 751 | 
 752 |           def empty_s3_bucket():
 753 |               client = boto3.resource('s3', region_name=REGION)
 754 |               bucket = client.Bucket(BUCKET_NAME)
 755 |               bucket.objects.all().delete()
 756 | 
 757 |           def lambda_handler(event, context):
 758 |               if event['RequestType'] == 'Delete':
 759 |                   empty_s3_bucket()
 760 | 
 761 |               cfnresponse.send(event, context, cfnresponse.SUCCESS, {})
 762 | 
 763 |   PurgeS3BucketOnDeleteRole:
 764 |     Type: AWS::IAM::Role
 765 |     Condition: HasPurgeS3LogsOnStackDelete
 766 |     Properties:
 767 |       RoleName: !Sub purge-${AWS::StackName}-${AWS::Region}
 768 |       Path: /
 769 |       AssumeRolePolicyDocument:
 770 |         Version: "2012-10-17"
 771 |         Statement:
 772 |           - Effect: Allow
 773 |             Principal:
 774 |               Service:
 775 |                 - lambda.amazonaws.com
 776 |             Action:
 777 |               - sts:AssumeRole
 778 |       Policies:
 779 |         - PolicyName: !Sub purge-s3-${AWS::StackName}
 780 |           PolicyDocument:
 781 |             Version: 2012-10-17
 782 |             Statement:
 783 |               Action:
 784 |                 - s3:DeleteObject
 785 |                 - s3:ListBucket
 786 |               Effect: Allow
 787 |               Resource:
 788 |                 - !Sub arn:aws:s3:::${LogsS3Bucket}
 789 |                 - !Sub arn:aws:s3:::${LogsS3Bucket}/*
 790 |         - PolicyName: !Sub purge-s3-${AWS::StackName}-logs
 791 |           PolicyDocument:
 792 |             Version: 2012-10-17
 793 |             Statement:
 794 |               Action:
 795 |                 - logs:CreateLogGroup
 796 |                 - logs:CreateLogStream
 797 |                 - logs:PutLogEvents
 798 |               Effect: Allow
 799 |               Resource: "*"
 800 | 
 801 |   PurgeS3BucketOnDelete:
 802 |     Type: AWS::CloudFormation::CustomResource
 803 |     Condition: HasPurgeS3LogsOnStackDelete
 804 |     DependsOn:
 805 |       - LogsS3Bucket
 806 |     Properties:
 807 |       ServiceToken: !GetAtt PurgeS3BucketOnDeleteFunction.Arn
 808 | 
 809 |   LoadBalancerSecurityGroup:
 810 |     Type: AWS::EC2::SecurityGroup
 811 |     Metadata:
 812 |       cfn_nag:
 813 |         rules_to_suppress:
 814 |           - id: W2
 815 |             reason: "This is a public facing ELB and all traffic should be permitted."
 816 |           - id: W5
 817 |             reason: "This is a public facing ELB and all traffic should be permitted."
 818 |           - id: W9
 819 |             reason: "This is a public facing ELB and all traffic should be permitted."
 820 |     Condition: HasLoadBalancerResources
 821 |     Properties:
 822 |       VpcId: !Ref VPC
 823 |       GroupDescription: Access to the load balancer that sits in front of ECS
 824 |       SecurityGroupIngress:
 825 |         - CidrIp: 0.0.0.0/0
 826 |           IpProtocol: -1
 827 |       SecurityGroupEgress:
 828 |         - CidrIp: 0.0.0.0/0
 829 |           IpProtocol: -1
 830 | 
 831 |   InternetFacingLoadBalancer:
 832 |     Type: AWS::ElasticLoadBalancingV2::LoadBalancer
 833 |     Condition: HasInternetFacingLoadBalancer
 834 |     DependsOn:
 835 |       - LogsS3Bucket
 836 |       - LogsS3BucketPolicy
 837 |     Properties:
 838 |       LoadBalancerAttributes:
 839 |         - Key: idle_timeout.timeout_seconds
 840 |           Value: 60
 841 |         - Key: access_logs.s3.enabled
 842 |           Value: true
 843 |         - Key: access_logs.s3.bucket
 844 |           Value: !Ref LogsS3Bucket
 845 |         - Key: access_logs.s3.prefix
 846 |           Value: internet-facing
 847 |       Subnets: !Ref VpcInternetFacingSubnets
 848 |       Scheme: internet-facing
 849 |       SecurityGroups:
 850 |         'Fn::If':
 851 |           - HasInternetFacingELBSecurityGroups
 852 |           - !Split [',', !Join [',', [!Ref InternetFacingELBSecurityGroups, !Ref ELBSecurityGroup]]]
 853 |           - !Split [',', !Join [',', [!Ref LoadBalancerSecurityGroup, !Ref ELBSecurityGroup]]]
 854 | 
 855 |   InternetFacingLoadBalancerHttpListener:
 856 |     Type: AWS::ElasticLoadBalancingV2::Listener
 857 |     Condition: HasInternetFacingLoadBalancer
 858 |     Properties:
 859 |       LoadBalancerArn: !Ref InternetFacingLoadBalancer
 860 |       Port: 80
 861 |       Protocol: HTTP
 862 |       DefaultActions:
 863 |         - Type: forward
 864 |           TargetGroupArn: !Ref InternetFacingDefaultTargetGroup
 865 | 
 866 |   InternetFacingLoadBalancerHttpsListener:
 867 |     Type: AWS::ElasticLoadBalancingV2::Listener
 868 |     Condition: HasInternetFacingLoadBalancer
 869 |     Properties:
 870 |       LoadBalancerArn: !Ref InternetFacingLoadBalancer
 871 |       Certificates:
 872 |         - CertificateArn: !Ref SslCertificateId
 873 |       Port: 443
 874 |       Protocol: HTTPS
 875 |       DefaultActions:
 876 |         - Type: forward
 877 |           TargetGroupArn: !Ref InternetFacingDefaultTargetGroup
 878 | 
 879 |   InternalLoadBalancer:
 880 |     Type: AWS::ElasticLoadBalancingV2::LoadBalancer
 881 |     Condition: HasInternalLoadBalancer
 882 |     DependsOn:
 883 |       - LogsS3Bucket
 884 |       - LogsS3BucketPolicy
 885 |     Properties:
 886 |       LoadBalancerAttributes:
 887 |         - Key: idle_timeout.timeout_seconds
 888 |           Value: 60
 889 |         - Key: access_logs.s3.enabled
 890 |           Value: true
 891 |         - Key: access_logs.s3.bucket
 892 |           Value: !Ref LogsS3Bucket
 893 |         - Key: access_logs.s3.prefix
 894 |           Value: internal
 895 |       Subnets: !Ref VpcInternalSubnets
 896 |       Scheme: internal
 897 |       SecurityGroups:
 898 |         - !Ref LoadBalancerSecurityGroup
 899 |         - !Ref ELBSecurityGroup
 900 | 
 901 |   InternalLoadBalancerHttpListener:
 902 |     Type: AWS::ElasticLoadBalancingV2::Listener
 903 |     Condition: HasInternalLoadBalancer
 904 |     Properties:
 905 |       LoadBalancerArn: !Ref InternalLoadBalancer
 906 |       Port: 80
 907 |       Protocol: HTTP
 908 |       DefaultActions:
 909 |         - Type: forward
 910 |           TargetGroupArn: !Ref InternalDefaultTargetGroup
 911 | 
 912 |   InternalLoadBalancerHttpsListener:
 913 |     Type: AWS::ElasticLoadBalancingV2::Listener
 914 |     Condition: HasInternalLoadBalancer
 915 |     Properties:
 916 |       LoadBalancerArn: !Ref InternalLoadBalancer
 917 |       Certificates:
 918 |         - CertificateArn: !Ref SslCertificateId
 919 |       Port: 443
 920 |       Protocol: HTTPS
 921 |       DefaultActions:
 922 |         - Type: forward
 923 |           TargetGroupArn: !Ref InternalDefaultTargetGroup
 924 | 
 925 |   # Default target groups
 926 |   InternetFacingDefaultTargetGroup:
 927 |     Type: AWS::ElasticLoadBalancingV2::TargetGroup
 928 |     Condition: HasInternetFacingLoadBalancer
 929 |     Properties:
 930 |       Name: !Sub ${AWS::StackName}-ifdef
 931 |       VpcId: !Ref VPC
 932 |       Port: 80
 933 |       Protocol: HTTP
 934 |       Matcher:
 935 |         HttpCode: 301
 936 |       HealthCheckIntervalSeconds: 30
 937 |       HealthCheckPath: /
 938 |       HealthCheckProtocol: HTTP
 939 |       HealthCheckTimeoutSeconds: 5
 940 |       HealthyThresholdCount: 2
 941 |       UnhealthyThresholdCount: 5
 942 | 
 943 |   InternalDefaultTargetGroup:
 944 |     Type: AWS::ElasticLoadBalancingV2::TargetGroup
 945 |     Condition: HasInternalLoadBalancer
 946 |     Properties:
 947 |       Name: !Sub ${AWS::StackName}-intdef
 948 |       VpcId: !Ref VPC
 949 |       Port: 80
 950 |       Protocol: HTTP
 951 |       Matcher:
 952 |         HttpCode: 301
 953 |       HealthCheckIntervalSeconds: 30
 954 |       HealthCheckPath: /
 955 |       HealthCheckProtocol: HTTP
 956 |       HealthCheckTimeoutSeconds: 5
 957 |       HealthyThresholdCount: 2
 958 |       UnhealthyThresholdCount: 5
 959 | 
 960 |   DefaultRedirectTaskDefinition:
 961 |     Type: AWS::ECS::TaskDefinition
 962 |     Condition: HasDefaultHttpRedirect
 963 |     Properties:
 964 |       ContainerDefinitions:
 965 |         - Name: nginx
 966 |           Image: !Ref DefaultHttpRedirectImage
 967 |           Essential: true
 968 |           Memory: 50
 969 |           Environment:
 970 |             - Name: "REDIRECT_URL"
 971 |               Value: !Ref DefaultHttpRedirect
 972 |           PortMappings:
 973 |             - ContainerPort: 80
 974 | 
 975 |   DefaultInternetFacingRedirectService:
 976 |     Type: AWS::ECS::Service
 977 |     Condition: HasDefaultHttpRedirect
 978 |     DependsOn:
 979 |       - InternetFacingLoadBalancer
 980 |       - InternetFacingLoadBalancerHttpListener
 981 |       - InternetFacingLoadBalancerHttpsListener
 982 |     Properties:
 983 |       ServiceName: !Sub ${AWS::StackName}-if-redirect
 984 |       Cluster: !Ref ECSCluster
 985 |       DesiredCount: 2
 986 |       TaskDefinition: !Ref DefaultRedirectTaskDefinition
 987 |       PlacementStrategies:
 988 |         - Type: spread
 989 |           Field: attribute:ecs.availability-zone
 990 |         - Type: spread
 991 |           Field: instanceId
 992 |       LoadBalancers:
 993 |         - ContainerName: nginx
 994 |           ContainerPort: 80
 995 |           TargetGroupArn: !Ref InternetFacingDefaultTargetGroup
 996 | 
 997 |   DefaultInternalRedirectService:
 998 |     Type: AWS::ECS::Service
 999 |     Condition: HasDefaultHttpRedirect
1000 |     DependsOn:
1001 |       - InternalLoadBalancer
1002 |       - InternalLoadBalancerHttpListener
1003 |       - InternalLoadBalancerHttpsListener
1004 |     Properties:
1005 |       ServiceName: !Sub ${AWS::StackName}-int-redirect
1006 |       Cluster: !Ref ECSCluster
1007 |       DesiredCount: 2
1008 |       TaskDefinition: !Ref DefaultRedirectTaskDefinition
1009 |       PlacementStrategies:
1010 |         - Type: spread
1011 |           Field: attribute:ecs.availability-zone
1012 |         - Type: spread
1013 |           Field: instanceId
1014 |       LoadBalancers:
1015 |         - ContainerName: nginx
1016 |           ContainerPort: 80
1017 |           TargetGroupArn: !Ref InternalDefaultTargetGroup
1018 | 
1019 |   # Everything between here and ASGTerminateLifecycleHook are for intercepting the autoscaling
1020 |   # termination notifications so that the ECS tasks can be gracefully drained. See
1021 |   # https://aws.amazon.com/blogs/compute/how-to-automate-container-instance-draining-in-amazon-ecs/
1022 |   # and https://github.com/aws-samples/ecs-cid-sample for the original code licensed under the
1023 |   # Apache 2.0 license. The code has been significantly refactored and the core fixes have been
1024 |   # pushed upstream back to AWS.
1025 | 
1026 |   ASGTerminateLambda:
1027 |     Type: AWS::Lambda::Function
1028 |     Properties:
1029 |       Description: Gracefully drain ECS tasks from EC2 instances before the instances are
1030 |                    terminated by autoscaling.
1031 |       Handler: index.lambda_handler
1032 |       Role: !GetAtt ASGTerminateExecutionRole.Arn
1033 |       Runtime: python3.6
1034 |       MemorySize: 128
1035 |       Timeout: 60
1036 |       Code:
1037 |         ZipFile: !Sub |
1038 |           import datetime
1039 |           import json
1040 |           import time
1041 |           import boto3
1042 | 
1043 |           CLUSTER = '${AWS::StackName}'
1044 |           TIMEOUT = ${ASGTerminateTimeout}
1045 | 
1046 |           def aws(svc):
1047 |               return boto3.client(svc, region_name='${AWS::Region}')
1048 | 
1049 |           ASG = aws('autoscaling')
1050 |           EC2 = aws('ec2')
1051 |           ECS = aws('ecs')
1052 |           SNS = aws('sns')
1053 | 
1054 |           def lookup_instance(msg):
1055 |               res = ECS.list_container_instances(cluster=CLUSTER,
1056 |                                                  filter='ec2InstanceId == %s' % (msg['EC2InstanceId']))
1057 |               if not res['containerInstanceArns']:
1058 |                   return None, None, 0
1059 | 
1060 |               res = ECS.describe_container_instances(cluster=CLUSTER,
1061 |                                                      containerInstances=res['containerInstanceArns'])
1062 |               ret = (res['containerInstances'][0]['containerInstanceArn'],
1063 |                      res['containerInstances'][0]['status'],
1064 |                      res['containerInstances'][0]['runningTasksCount'])
1065 |               print('Found: %s %s' % (str(ret), msg))
1066 |               return ret
1067 | 
1068 |           def can_terminate(msg):
1069 |               (arn, status, count) = lookup_instance(msg)
1070 |               if arn is None:
1071 |                   print('Cannot lookup: %s' % (msg))
1072 |                   return True
1073 | 
1074 |               if status != 'DRAINING':
1075 |                   print('Draining: %s' % (msg))
1076 |                   ECS.update_container_instances_state(cluster=CLUSTER,
1077 |                                                        containerInstances=[arn],
1078 |                                                        status='DRAINING')
1079 |                   return False
1080 | 
1081 |               if count == 0:
1082 |                   print('Finished draining: %s' % (msg))
1083 |                   return True
1084 | 
1085 |               now = datetime.datetime.now().timestamp()
1086 |               if msg['instance_timeout'] < now:
1087 |                   print('Timed out: %s' % (msg))
1088 |                   return True
1089 | 
1090 |               return False
1091 | 
1092 |           def lambda_handler(event, context):
1093 |               msg = json.loads(event['Records'][0]['Sns']['Message'])
1094 |               if 'instance_timeout' not in msg:
1095 |                   msg['instance_timeout'] = (datetime.datetime.now() + \
1096 |                                          datetime.timedelta(seconds=TIMEOUT)).timestamp()
1097 | 
1098 |               if 'LifecycleTransition' not in msg.keys() or \
1099 |                  msg['LifecycleTransition'].find('autoscaling:EC2_INSTANCE_TERMINATING') == -1:
1100 |                   print('Unknown transition: %s' % (msg))
1101 |                   return
1102 | 
1103 |               if can_terminate(msg):
1104 |                   print('ASG complete: %s' % (msg))
1105 |                   ASG.complete_lifecycle_action(LifecycleHookName=msg['LifecycleHookName'],
1106 |                                                 AutoScalingGroupName=msg['AutoScalingGroupName'],
1107 |                                                 LifecycleActionResult='CONTINUE',
1108 |                                                 InstanceId=msg['EC2InstanceId'])
1109 |                   return
1110 | 
1111 |               print('Tasks are still running: %s' % (msg))
1112 | 
1113 |               time.sleep(20)
1114 | 
1115 |               ASG.record_lifecycle_action_heartbeat(LifecycleHookName=msg['LifecycleHookName'],
1116 |                                                     AutoScalingGroupName=msg['AutoScalingGroupName'],
1117 |                                                     LifecycleActionToken=msg['LifecycleActionToken'],
1118 |                                                     InstanceId=msg['EC2InstanceId'])
1119 |               SNS.publish(TopicArn=event['Records'][0]['Sns']['TopicArn'],
1120 |                           Message=json.dumps(msg),
1121 |                           Subject='Retry')
1122 | 
1123 |   ASGTerminateExecutionRole:
1124 |     Type: AWS::IAM::Role
1125 |     Properties:
1126 |       Policies:
1127 |         - PolicyName: lambda-inline
1128 |           PolicyDocument:
1129 |             Version: 2012-10-17
1130 |             Statement:
1131 |               - Effect: Allow
1132 |                 Action:
1133 |                 - autoscaling:CompleteLifecycleAction
1134 |                 - autoscaling:RecordLifecycleActionHeartbeat
1135 |                 - logs:CreateLogGroup
1136 |                 - logs:CreateLogStream
1137 |                 - logs:PutLogEvents
1138 |                 - ecs:ListContainerInstances
1139 |                 - ecs:DescribeContainerInstances
1140 |                 - ecs:UpdateContainerInstancesState
1141 |                 - sns:Publish
1142 |                 Resource: "*"
1143 |       AssumeRolePolicyDocument:
1144 |         Version: 2012-10-17
1145 |         Statement:
1146 |           - Effect: Allow
1147 |             Principal:
1148 |               Service:
1149 |                 - lambda.amazonaws.com
1150 |             Action:
1151 |               - sts:AssumeRole
1152 |       ManagedPolicyArns:
1153 |       - arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole
1154 |       Path: "/"
1155 | 
1156 |   ASGTerminateSNSLambdaRole:
1157 |     Type: AWS::IAM::Role
1158 |     Properties:
1159 |       AssumeRolePolicyDocument:
1160 |         Version: 2012-10-17
1161 |         Statement:
1162 |           - Effect: Allow
1163 |             Principal:
1164 |               Service:
1165 |                 - autoscaling.amazonaws.com
1166 |             Action:
1167 |               - sts:AssumeRole
1168 |       ManagedPolicyArns:
1169 |       - arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole
1170 |       Path: "/"
1171 | 
1172 |   ASGTerminateSNSTopic:
1173 |     Type: AWS::SNS::Topic
1174 |     DependsOn: ASGTerminateLambda
1175 |     Properties:
1176 |       Subscription:
1177 |         - Endpoint: !GetAtt ASGTerminateLambda.Arn
1178 |           Protocol: lambda
1179 | 
1180 |   ASGTerminateInvokePermission:
1181 |     Type: AWS::Lambda::Permission
1182 |     Properties:
1183 |        FunctionName: !Ref ASGTerminateLambda
1184 |        Action: lambda:InvokeFunction
1185 |        Principal: sns.amazonaws.com
1186 |        SourceArn: !Ref ASGTerminateSNSTopic
1187 | 
1188 |   ASGTerminateSNSLambdaSubscription:
1189 |     Type: AWS::SNS::Subscription
1190 |     Properties:
1191 |        Endpoint: !GetAtt ASGTerminateLambda.Arn
1192 |        Protocol: 'lambda'
1193 |        TopicArn: !Ref ASGTerminateSNSTopic
1194 | 
1195 |   ASGTerminateLifecycleHook:
1196 |     Type: AWS::AutoScaling::LifecycleHook
1197 |     Properties:
1198 |       AutoScalingGroupName: !Ref ECSAutoScalingGroup
1199 |       DefaultResult: ABANDON
1200 |       HeartbeatTimeout: 120
1201 |       LifecycleTransition: autoscaling:EC2_INSTANCE_TERMINATING
1202 |       NotificationTargetARN: !Ref ASGTerminateSNSTopic
1203 |       RoleARN: !GetAtt ASGTerminateSNSLambdaRole.Arn
1204 |     DependsOn: ASGTerminateSNSTopic
1205 | 
1206 |   # The following Python Lambda function is a stripped down version of
1207 |   # http://garbe.io/blog/2017/04/12/a-better-solution-to-ecs-autoscaling/. This is used to
1208 |   # publish a custom CloudWatch metric for instance autoscaling. It is a stripped down version
1209 |   # of the code from Johannes Müller and licensed under the MIT license.
1210 | 
1211 |   SchedulableContainersLambda:
1212 |     Type: AWS::Lambda::Function
1213 |     Properties:
1214 |       Description: Publish a custom metric to CloudWatch with the total number of the largest
1215 |                    container that can be scheduled in the cluster. This metric is used for
1216 |                    instance autoscaling.
1217 |       Handler: index.lambda_handler
1218 |       Role: !GetAtt SchedulableContainersExecutionRole.Arn
1219 |       Runtime: python3.6
1220 |       MemorySize: 128
1221 |       Timeout: 60
1222 |       Code:
1223 |         ZipFile: !Sub |
1224 |           import boto3
1225 | 
1226 |           CLUSTER = '${AWS::StackName}'
1227 |           REGION = '${AWS::Region}'
1228 |           MAX_CPU = ${LargestContainerCpuReservation}
1229 |           MAX_MEM = ${LargestContainerMemoryReservation}
1230 | 
1231 |           ECS = boto3.client('ecs', region_name=REGION)
1232 |           CLOUDWATCH = boto3.client('cloudwatch', region_name=REGION)
1233 | 
1234 |           def lambda_handler(event, context):
1235 |               instances = ECS.list_container_instances(cluster=CLUSTER, status='ACTIVE')
1236 |               args = instances['containerInstanceArns']
1237 |               instances = ECS.describe_container_instances(cluster=CLUSTER,
1238 |                                                            containerInstances=args)
1239 | 
1240 |               schedulable_containers = 0
1241 |               for instance in instances['containerInstances']:
1242 |                   remaining_resources = {resource['name']: resource
1243 |                                          for resource in instance['remainingResources']}
1244 |                   containers_by_cpu = int(remaining_resources['CPU']['integerValue'] / MAX_CPU)
1245 |                   containers_by_mem = int(remaining_resources['MEMORY']['integerValue'] / MAX_MEM)
1246 |                   schedulable_containers += min(containers_by_cpu, containers_by_mem)
1247 | 
1248 |               print('cluster=%s, max_cpu=%s, max_mem=%s, schedulable containers=%s' %
1249 |                     (CLUSTER, MAX_CPU, MAX_MEM, schedulable_containers))
1250 | 
1251 |               CLOUDWATCH.put_metric_data(Namespace='AWS/ECS',
1252 |                                          MetricData=[{
1253 |                                              'MetricName': 'SchedulableContainers',
1254 |                                              'Dimensions': [{
1255 |                                                  'Name': 'ClusterName',
1256 |                                                  'Value': CLUSTER
1257 |                                              }],
1258 |                                              'Value': schedulable_containers
1259 |                                          }])
1260 | 
1261 |   SchedulableContainersExecutionRole:
1262 |     Type: AWS::IAM::Role
1263 |     Properties:
1264 |       RoleName: !Sub sc-${AWS::StackName}-${AWS::Region}
1265 |       Path: /
1266 |       AssumeRolePolicyDocument:
1267 |         Version: 2012-10-17
1268 |         Statement:
1269 |           - Effect: Allow
1270 |             Principal:
1271 |               Service:
1272 |                 - lambda.amazonaws.com
1273 |             Action:
1274 |               - sts:AssumeRole
1275 |       Policies:
1276 |         - PolicyName: !Sub sc-${AWS::StackName}
1277 |           PolicyDocument:
1278 |            Version: 2012-10-17
1279 |            Statement:
1280 |              Action:
1281 |                - logs:CreateLogGroup
1282 |                - logs:CreateLogStream
1283 |                - logs:PutLogEvents
1284 |                - ecs:DescribeContainerInstances
1285 |                - ecs:ListContainerInstances
1286 |                - cloudwatch:PutMetricData
1287 |              Effect: Allow
1288 |              Resource: "*"
1289 | 
1290 |   SchedulableContainersRule:
1291 |     Type: AWS::Events::Rule
1292 |     Properties:
1293 |       ScheduleExpression: "rate(1 minute)"
1294 |       Targets:
1295 |         - Id: !Sub ${AWS::StackName}-scheduler
1296 |           Arn: !GetAtt SchedulableContainersLambda.Arn
1297 | 
1298 |   SchedulableContainersInvokePermission:
1299 |     Type: AWS::Lambda::Permission
1300 |     Properties:
1301 |       FunctionName: !GetAtt SchedulableContainersLambda.Arn
1302 |       Action: lambda:InvokeFunction
1303 |       Principal: events.amazonaws.com
1304 |       SourceArn: !GetAtt SchedulableContainersRule.Arn
1305 | 
1306 | Outputs:
1307 |   VPC:
1308 |     Description: VPC ID
1309 |     Value: !Ref VPC
1310 |     Export:
1311 |       Name: !Sub ${AWS::StackName}-VPC
1312 | 
1313 |   InternetFacingLoadBalancerUrl:
1314 |     Description: The URL of the ALB
1315 |     Condition: HasInternetFacingLoadBalancer
1316 |     Value: !GetAtt InternetFacingLoadBalancer.DNSName
1317 |     Export:
1318 |       Name: !Sub ${AWS::StackName}-internet-facing-url
1319 | 
1320 |   InternetFacingLoadBalancerHostedZoneId:
1321 |     Description: The ID of the Amazon Route 53 hosted zone associated with the internet-facing
1322 |                  load balancer.
1323 |     Condition: HasInternetFacingLoadBalancer
1324 |     Value: !GetAtt InternetFacingLoadBalancer.CanonicalHostedZoneID
1325 |     Export:
1326 |       Name: !Sub ${AWS::StackName}-internet-facing-hosted-zone-id
1327 | 
1328 |   InternetFacingHttpListener:
1329 |     Description: A reference to the internet-facing HTTP listener
1330 |     Condition: HasInternetFacingLoadBalancer
1331 |     Value: !Ref InternetFacingLoadBalancerHttpListener
1332 |     Export:
1333 |       Name: !Sub ${AWS::StackName}-internet-facing-http-listener
1334 | 
1335 |   InternetFacingHttpsListener:
1336 |     Description: A reference to the internet-facing HTTPS listener
1337 |     Condition: HasInternetFacingLoadBalancer
1338 |     Value: !Ref InternetFacingLoadBalancerHttpsListener
1339 |     Export:
1340 |       Name: !Sub ${AWS::StackName}-internet-facing-https-listener
1341 | 
1342 |   InternalLoadBalancerUrl:
1343 |     Description: The URL of the ALB
1344 |     Condition: HasInternalLoadBalancer
1345 |     Value: !GetAtt InternalLoadBalancer.DNSName
1346 |     Export:
1347 |       Name: !Sub ${AWS::StackName}-internal-url
1348 | 
1349 |   InternalLoadBalancerHostedZoneId:
1350 |     Description: The ID of the Amazon Route 53 hosted zone associated with the internal load
1351 |                  balancer.
1352 |     Condition: HasInternalLoadBalancer
1353 |     Value: !GetAtt InternalLoadBalancer.CanonicalHostedZoneID
1354 |     Export:
1355 |       Name: !Sub ${AWS::StackName}-internal-hosted-zone-id
1356 | 
1357 |   InternalHttpListener:
1358 |     Description: A reference to the internal HTTP listener
1359 |     Condition: HasInternalLoadBalancer
1360 |     Value: !Ref InternalLoadBalancerHttpListener
1361 |     Export:
1362 |       Name: !Sub ${AWS::StackName}-internal-http-listener
1363 | 
1364 |   InternalHttpsListener:
1365 |     Description: A reference to the internal HTTPS listener
1366 |     Condition: HasInternalLoadBalancer
1367 |     Value: !Ref InternalLoadBalancerHttpsListener
1368 |     Export:
1369 |       Name: !Sub ${AWS::StackName}-internal-https-listener
1370 | 
1371 |   LogsS3Bucket:
1372 |     Description: S3 bucket containing the ALB access logs
1373 |     Condition: HasLoadBalancerResources
1374 |     Value: !Ref LogsS3Bucket
1375 |     Export:
1376 |       Name: !Sub ${AWS::StackName}-logs-s3-bucket
1377 | 
1378 |   ECSHostSecurityGroup:
1379 |     Description: Allow access from the ELB and internet network.
1380 |     Value: !Ref ECSHostSecurityGroup
1381 |     Export:
1382 |       Name: !Sub ${AWS::StackName}-ecs-host-security-group
1383 | 


--------------------------------------------------------------------------------