├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── CHANGELOG.md
├── Gemfile
├── README.md
├── Rakefile
├── bin
    ├── console
    └── setup
├── ecs_deploy.gemspec
├── exe
    └── ecs_auto_scaler
├── lib
    ├── ecs_deploy.rb
    └── ecs_deploy
    │   ├── auto_scaler.rb
    │   ├── auto_scaler
    │       ├── auto_scaling_group_config.rb
    │       ├── cluster_resource_manager.rb
    │       ├── config_base.rb
    │       ├── instance_drainer.rb
    │       ├── service_config.rb
    │       ├── spot_fleet_request_config.rb
    │       └── trigger_config.rb
    │   ├── capistrano.rb
    │   ├── configuration.rb
    │   ├── instance_fluctuation_manager.rb
    │   ├── scheduled_task.rb
    │   ├── service.rb
    │   ├── task_definition.rb
    │   └── version.rb
├── renovate.json
└── spec
    ├── ecs_deploy
        ├── auto_scaler
        │   ├── auto_scaling_group_config_spec.rb
        │   ├── cluster_resource_manager_spec.rb
        │   ├── instance_drainer_spec.rb
        │   └── service_config_spec.rb
        ├── auto_scaler_spec.rb
        └── instance_fluctuation_manager_spec.rb
    ├── fixtures
        └── files
        │   ├── ecs_auto_scaler_config_in_new_format.yaml
        │   └── ecs_auto_scaler_config_in_old_format.yaml
    └── spec_helper.rb


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 | 
 7 | jobs:
 8 |   test:
 9 | 
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         ruby-version: ['2.5', '2.6', '2.7', '3.0', '3.1', '3.2']
14 | 
15 |     steps:
16 |     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
17 |     - name: Set up Ruby
18 |       uses: ruby/setup-ruby@13e7a03dc3ac6c3798f4570bfead2aed4d96abfb # v1.244.0
19 |       with:
20 |         ruby-version: ${{ matrix.ruby-version }}
21 |         bundler-cache: true
22 |     - name: Run tests
23 |       run: bundle exec rake
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /Gemfile.lock
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | 
11 | .rspec_status
12 | .envrc
13 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # v1.0
  2 | 
  3 | ## Release v1.0.7 - 2024/08/08
  4 | 
  5 | ### Bug fixes
  6 | 
  7 | - Fix Aws::AutoScaling::Errors::ValidationError https://github.com/reproio/ecs_deploy/pull/85
  8 | 
  9 | ## Release v1.0.6 - 2024/03/19
 10 | 
 11 | ### Enhancement
 12 | 
 13 | - Make region fallback logic consistent in Capistrano tasks https://github.com/reproio/ecs_deploy/pull/91
 14 | - Add runtime_platform to task_definition. https://github.com/reproio/ecs_deploy/pull/92
 15 | 
 16 | ### Bug fixes
 17 | 
 18 | - Delete option :placement_strategy if DAEMON service https://github.com/reproio/ecs_deploy/pull/93
 19 | 
 20 | ## Release v1.0.5 - 2023/03/15
 21 | 
 22 | ### Enhancement
 23 | 
 24 | - Add variable of capistrano `ecs_client_retry_params` to override parameter of ECS::Client#initialize https://github.com/reproio/ecs_deploy/pull/88
 25 | 
 26 | ## Release v1.0.4 - 2023/02/10
 27 | 
 28 | ### Bug fixes
 29 | 
 30 | - Fix Aws::AutoScaling::Errors::ValidationError https://github.com/reproio/ecs_deploy/pull/85
 31 | 
 32 | - Fix Timeout::Error that occurs in trigger_capacity_update https://github.com/reproio/ecs_deploy/pull/80
 33 | 
 34 | - use force a new deployment, when switching from launch type to capacity provider strategy on an existing service https://github.com/reproio/ecs_deploy/pull/75
 35 | 
 36 | ### Enhancement
 37 | 
 38 | - Run test with Ruby 3.2 https://github.com/reproio/ecs_deploy/pull/83
 39 | 
 40 | - Merge `propagate_tags` to service_options when updating service https://github.com/reproio/ecs_deploy/pull/82
 41 | 
 42 | - Show service event logs while waiting for services https://github.com/reproio/ecs_deploy/pull/81
 43 | 
 44 | - Stop supporting ruby 2.4 https://github.com/reproio/ecs_deploy/pull/79
 45 | 
 46 | - Display warning that desired count has reached max value https://github.com/reproio/ecs_deploy/pull/78
 47 | 
 48 | - Make draining feature opt-outable https://github.com/reproio/ecs_deploy/pull/77
 49 | 
 50 | - Add capacity_provider_strategy options to Service https://github.com/reproio/ecs_deploy/pull/74
 51 | 
 52 | ## Release v1.0.3 - 2021/11/17
 53 | 
 54 | ### Bug fixes
 55 | * Fix bug that `InstanceFluctuationManager#decrement` tries to detach instances whose status is 'DEREGISTERING'
 56 |   https://github.com/reproio/ecs_deploy/pull/72
 57 | 
 58 | ### Enhancement
 59 | * Add a cluster name to deployment logs
 60 |   https://github.com/reproio/ecs_deploy/pull/71
 61 | 
 62 | 
 63 | ## Release v1.0.2 - 2021/05/26
 64 | 
 65 | ### Enhancement
 66 | 
 67 | * add option enable_execute_command to support ECS Exec
 68 |   https://github.com/reproio/ecs_deploy/pull/69
 69 | 
 70 | ## Release v1.0.1 - 2021/05/19
 71 | 
 72 | ### Enhancement
 73 | 
 74 | * retry register_task_definition by AWS SDK feature
 75 |   https://github.com/reproio/ecs_deploy/pull/67
 76 | * Support Ruby 3.0
 77 |   https://github.com/reproio/ecs_deploy/pull/66
 78 | * Wait until stop old tasks
 79 |   https://github.com/reproio/ecs_deploy/pull/65
 80 | * Add prioritized_over_upscale_triggers option to triggers
 81 |   https://github.com/reproio/ecs_deploy/pull/62
 82 | * Display only unstable services in EcsDeploy::Service#wait_all_running
 83 |   https://github.com/reproio/ecs_deploy/pull/61
 84 | 
 85 | ## Release v1.0.0 - 2019/12/24
 86 | 
 87 | ### New feature
 88 | 
 89 | * Add tasks to deploy the application faster
 90 |   https://github.com/reproio/ecs_deploy/pull/57
 91 | 
 92 | ### Enhancement
 93 | 
 94 | * Add parameters `ecs_wait_until_services_stable_max_attempts` and `ecs_wait_until_services_stable_delay`
 95 |   https://github.com/reproio/ecs_deploy/pull/30
 96 | * Detect region automatically according to AWS SDK
 97 |   https://github.com/reproio/ecs_deploy/pull/31
 98 | * Support new features of ECS to support Fargate
 99 |   https://github.com/reproio/ecs_deploy/pull/32
100 | * Ignore running tasks which don't belong to the ECS services on deregistering container instances
101 |   https://github.com/reproio/ecs_deploy/pull/33
102 | * Drop AWS SDK 2 support
103 |   https://github.com/reproio/ecs_deploy/pull/34
104 | * Support scheduling_strategy option
105 |   https://github.com/reproio/ecs_deploy/pull/35
106 | * Support execution_role_arn on task_definition
107 |   https://github.com/reproio/ecs_deploy/pull/36
108 | * Support spot fleet requests and container instance draining
109 |   https://github.com/reproio/ecs_deploy/pull/40
110 | * Add network_configuration paramters to ScheduledTask
111 |   https://github.com/reproio/ecs_deploy/pull/46
112 | * Support tagging ECS resources
113 |   https://github.com/reproio/ecs_deploy/pull/48
114 |   https://github.com/reproio/ecs_deploy/pull/49
115 | * Wait for stopping tasks until tasks stop
116 |   https://github.com/reproio/ecs_deploy/pull/50
117 | * Improve performance when start tasks
118 |   https://github.com/reproio/ecs_deploy/pull/53
119 | * Improve stability of auto scaling groups managed by ecs_auto_scaler
120 |   https://github.com/reproio/ecs_deploy/pull/55
121 | 
122 | ### Bug fixes
123 | 
124 | * Fix infinite loop that occurs when there are more than 100 container instances
125 |   https://github.com/reproio/ecs_deploy/pull/38
126 | * Fix errors that occur on decreasing more than 20 container instances
127 |   https://github.com/reproio/ecs_deploy/pull/39
128 | 
129 | # Ancient releases
130 | 
131 | ## Release v0.3.2 - 2017/23/10
132 | 
133 | ### Enhancement
134 | 
135 | * Remove execution feature
136 |   https://github.com/reproio/ecs_deploy/pull/24
137 | * Support container overrides in scheduled tasks
138 |   https://github.com/reproio/ecs_deploy/pull/26
139 | 
140 | ### Bug fixes
141 | 
142 | * Fix deployment errors that occur when `ecs_scheduled_tasks` is not set
143 |   https://github.com/reproio/ecs_deploy/pull/27
144 | 
145 | ## Release v0.3.1 - 2017/04/08
146 | 
147 | ### Bug fixes
148 | 
149 | * Fix block parameter name
150 | 
151 | ## Release v0.3.0 - 2017/03/08
152 | 
153 | ### New feature
154 | 
155 | * Support ScheduledTask deployment
156 |   https://github.com/reproio/ecs_deploy/pull/22
157 | 
158 | ### Enhancement
159 | 
160 | * Support network_mode and placement_constraints
161 | * Introduce `ecs_registered_tasks` capistrano variable
162 |   https://github.com/reproio/ecs_deploy/pull/23
163 | 
164 | ### Bug fixes
165 | 
166 | * Filter inactive services
167 |   https://github.com/reproio/ecs_deploy/pull/19
168 | * Wait 10 services at once
169 |   https://github.com/reproio/ecs_deploy/pull/20
170 |   https://github.com/reproio/ecs_deploy/pull/21
171 | 
172 | ## Release v0.2.0 - 2016/31/10
173 | 
174 | ### Enhancement
175 | 
176 | * Support task role arn
177 |   https://github.com/reproio/ecs_deploy/pull/13
178 | * Make the scale-in process safe
179 |   https://github.com/reproio/ecs_deploy/pull/14
180 | * Support ALB
181 |   https://github.com/reproio/ecs_deploy/pull/15
182 | 
183 | ## Release v0.1.2 - 2016/28/07
184 | 
185 | ### Bug fixes
186 | 
187 | * Fix rollback bug
188 |   https://github.com/reproio/ecs_deploy/pull/11
189 | 
190 | ## Release v0.1.1 - 2016/03/07
191 | 
192 | ### Bug fixes
193 | 
194 | * Add missing desired_count for backend services
195 |   https://github.com/reproio/ecs_deploy/pull/9
196 | 
197 | ## Release v0.1.0 - 2016/27/06
198 | 
199 | Initial release.
200 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | # Specify your gem's dependencies in ecs_deploy.gemspec
4 | gemspec
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # EcsDeploy
  2 | 
  3 | Helper script for deployment to Amazon ECS, designed to be compatible with `capistrano`. 
  4 | 
  5 | This gem is experimental.
  6 | 
  7 | ## Installation
  8 | 
  9 | Add this line to your application's Gemfile:
 10 | 
 11 | ```ruby
 12 | gem "ecs_deploy", github: "reproio/ecs_deploy"
 13 | ```
 14 | 
 15 | And then execute:
 16 | 
 17 |     $ bundle
 18 | 
 19 | ## Configuration
 20 | 
 21 | ```ruby
 22 | # Capfile
 23 | require "ecs_deploy/capistrano"
 24 | 
 25 | # deploy.rb
 26 | set :ecs_default_cluster, "ecs-cluster-name"
 27 | set :ecs_region, %w(ap-northeast-1) # optional, if nil, use environment variable
 28 | set :ecs_service_role, "customEcsServiceRole" # default: ecsServiceRole
 29 | set :ecs_deploy_wait_timeout, 600 # default: 300
 30 | set :ecs_wait_until_services_stable_max_attempts, 40 # optional
 31 | set :ecs_wait_until_services_stable_delay, 15 # optional
 32 | set :ecs_client_params, { retry_mode: "standard", max_attempts: 10 } # default: {}
 33 | 
 34 | set :ecs_tasks, [
 35 |   {
 36 |     name: "myapp-#{fetch(:rails_env)}",
 37 |     container_definitions: [
 38 |       {
 39 |         name: "myapp",
 40 |         image: "#{fetch(:docker_registry_host_with_port)}/myapp:#{fetch(:sha1)}",
 41 |         cpu: 1024,
 42 |         memory: 512,
 43 |         port_mappings: [],
 44 |         essential: true,
 45 |         environment: [
 46 |           {name: "RAILS_ENV", value: fetch(:rails_env)},
 47 |         ],
 48 |         mount_points: [
 49 |           {
 50 |             source_volume: "sockets_path",
 51 |             container_path: "/app/tmp/sockets",
 52 |             read_only: false,
 53 |           },
 54 |         ],
 55 |         volumes_from: [],
 56 |         log_configuration: {
 57 |           log_driver: "fluentd",
 58 |           options: {
 59 |             "tag" => "docker.#{fetch(:rails_env)}.#{name}.{{.ID}}",
 60 |           },
 61 |         },
 62 |       },
 63 |       {
 64 |         name: "nginx",
 65 |         image: "#{fetch(:docker_registry_host_with_port)}/my-nginx",
 66 |         cpu: 256,
 67 |         memory: 256,
 68 |         links: [],
 69 |         port_mappings: [
 70 |           {container_port: 443, host_port: 443, protocol: "tcp"},
 71 |         ],
 72 |         essential: true,
 73 |         environment: {},
 74 |         mount_points: [],
 75 |         volumes_from: [
 76 |           {source_container: "myapp-#{fetch(:rails_env)}", read_only: false},
 77 |         ],
 78 |         log_configuration: {
 79 |           log_driver: "fluentd",
 80 |           options: {
 81 |             "tag" => "docker.#{fetch(:rails_env)}.#{name}.{{.ID}}",
 82 |           },
 83 |         },
 84 |       }
 85 |     ],
 86 |     volumes: [{name: "sockets_path", host: {}}],
 87 |   },
 88 | ]
 89 | 
 90 | set :ecs_scheduled_tasks, [
 91 |   {
 92 |     cluster: "default", # Defaults to fetch(:ecs_default_cluster)
 93 |     rule_name: "schedule_name",
 94 |     schedule_expression: "cron(0 12 * * ? *)",
 95 |     description: "schedule_description", # Optional
 96 |     target_id: "task_name", # Defaults to the task_definition_name
 97 |     task_definition_name: "myapp-#{fetch(:rails_env)}",
 98 |     task_count: 2, # Default 1
 99 |     revision: 12, # Optional
100 |     role_arn: "TaskRoleArn", # Optional
101 |     container_overrides: [ # Optional
102 |       name: "myapp-main",
103 |       command: ["ls"],
104 |     ]
105 |   }
106 | ]
107 | 
108 | set :ecs_services, [
109 |   {
110 |     name: "myapp-#{fetch(:rails_env)}",
111 |     load_balancers: [
112 |       {
113 |         load_balancer_name: "service-elb-name",
114 |         container_port: 443,
115 |         container_name: "nginx",
116 |       },
117 |       {
118 |         target_group_arn: "alb_target_group_arn",
119 |         container_port: 443,
120 |         container_name: "nginx",
121 |       }
122 |     ],
123 |     desired_count: 1,
124 |     deployment_configuration: {maximum_percent: 200, minimum_healthy_percent: 50},
125 |   },
126 | ]
127 | ```
128 | 
129 | ## Usage
130 | 
131 | ```sh
132 | bundle exec cap <stage> ecs:register_task_definition # register ecs_tasks as TaskDefinition
133 | bundle exec cap <stage> ecs:deploy_scheduled_task # register ecs_scheduled_tasks to CloudWatchEvent
134 | bundle exec cap <stage> ecs:deploy # create or update Service by ecs_services info
135 | 
136 | bundle exec cap <stage> ecs:rollback # deregister current task definition and update Service by previous revision of current task definition
137 | ```
138 | 
139 | ### Rollback example
140 | 
141 | | sequence | taskdef  | service       | desc    |
142 | | -------- | -------- | ------------- | ------  |
143 | | 1        | myapp:12 | myapp-service |         |
144 | | 2        | myapp:13 | myapp-service |         |
145 | | 3        | myapp:14 | myapp-service | current |
146 | 
147 | After rollback
148 | 
149 | | sequence | taskdef  | service       | desc       |
150 | | -------- | -------- | ------------- | ------     |
151 | | 1        | myapp:12 | myapp-service |            |
152 | | 2        | myapp:13 | myapp-service |            |
153 | | 3        | myapp:14 | myapp-service | deregister |
154 | | 4        | myapp:13 | myapp-service | current    |
155 | 
156 | And rollback again
157 | 
158 | | sequence | taskdef  | service       | desc       |
159 | | -------- | -------- | ------------- | ------     |
160 | | 1        | myapp:12 | myapp-service |            |
161 | | 2        | myapp:13 | myapp-service | previous   |
162 | | 3        | myapp:14 | myapp-service | deregister |
163 | | 4        | myapp:13 | myapp-service | deregister |
164 | | 5        | myapp:12 | myapp-service | current    |
165 | 
166 | And deploy new version
167 | 
168 | | sequence | taskdef  | service       | desc       |
169 | | -------- | -------- | ------------- | ------     |
170 | | 1        | myapp:12 | myapp-service |            |
171 | | 2        | myapp:13 | myapp-service |            |
172 | | 3        | myapp:14 | myapp-service | deregister |
173 | | 4        | myapp:13 | myapp-service | deregister |
174 | | 5        | myapp:12 | myapp-service |            |
175 | | 6        | myapp:15 | myapp-service | current    |
176 | 
177 | And rollback
178 | 
179 | | sequence | taskdef  | service       | desc       |
180 | | -------- | -------- | ------------- | ------     |
181 | | 1        | myapp:12 | myapp-service |            |
182 | | 2        | myapp:13 | myapp-service |            |
183 | | 3        | myapp:14 | myapp-service | deregister |
184 | | 4        | myapp:13 | myapp-service | deregister |
185 | | 5        | myapp:12 | myapp-service |            |
186 | | 6        | myapp:15 | myapp-service | deregister |
187 | | 7        | myapp:12 | myapp-service | current    |
188 | 
189 | ## Autoscaler
190 | 
191 | The autoscaler of `ecs_deploy` supports auto scaling of ECS services and clusters.
192 | 
193 | ### Prerequisits
194 | 
195 | * An ECS cluster whose instances belong to either an Auto Scaling group or a Spot Fleet request
196 | * You have CloudWatch alarms and you want to scale services when their state changes
197 | 
198 | ### How to use autoscaler
199 | 
200 | First, write a configuration file (YAML format) like below:
201 | 
202 | ```yaml
203 | # ポーリング時にupscale_triggersに指定した状態のalarmがあればstep分serviceとinstanceを増やす (max_task_countまで)
204 | # ポーリング時にdownscale_triggersに指定した状態のalarmがあればstep分serviceとinstanceを減らす (min_task_countまで)
205 | # max_task_countは段階的にリミットを設けられるようにする
206 | # 一回リミットに到達するとcooldown_for_reach_maxを越えても状態が継続したら再開するようにする
207 | 
208 | polling_interval: 60
209 | 
210 | auto_scaling_groups:
211 |   - name: ecs-cluster-nodes
212 |     region: ap-northeast-1
213 |     cluster: ecs-cluster
214 |     # autoscaler will set the capacity to (buffer + desired_tasks * required_capacity).
215 |     # Adjust this value if it takes much time to prepare ECS instances and launch new tasks.
216 |     buffer: 1
217 |     disable_draining: false # cf. spot_instance_intrp_warns_queue_urls
218 |     services:
219 |       - name: repro-api-production
220 |         step: 1
221 |         idle_time: 240
222 |         max_task_count: [10, 25]
223 |         scheduled_min_task_count:
224 |           - {from: "1:45", to: "4:30", count: 8}
225 |         cooldown_time_for_reach_max: 600
226 |         min_task_count: 0
227 |         # Required capacity per task (default: 1)
228 |         # You should specify "binpack" as task placement strategy if the value is less than 1 and you use an auto scaling group.
229 |         required_capacity: 0.5
230 |         upscale_triggers:
231 |           - alarm_name: "ECS [repro-api-production] CPUUtilization"
232 |             state: ALARM
233 |           - alarm_name: "ELB repro-api-a HTTPCode_Backend_5XX"
234 |             state: ALARM
235 |             step: 2
236 |         downscale_triggers:
237 |           - alarm_name: "ECS [repro-api-production] CPUUtilization (low)"
238 |             state: OK
239 | 
240 | spot_fleet_requests:
241 |   - id: sfr-354de735-2c17-4565-88c9-10ada5b957e5
242 |     region: ap-northeast-1
243 |     cluster: ecs-cluster-for-worker
244 |     buffer: 1
245 |     disable_draining: false # cf. spot_instance_intrp_warns_queue_urls
246 |     services:
247 |       - name: repro-worker-production
248 |         step: 1
249 |         idle_time: 240
250 |         cooldown_time_for_reach_max: 600
251 |         min_task_count: 0
252 |         # Required capacity per task (default: 1)
253 |         # The capacity assumes that WeightedCapacity is equal to the number of vCPUs.
254 |         required_capacity: 2
255 |         upscale_triggers:
256 |           - alarm_name: "ECS [repro-worker-production] CPUUtilization"
257 |             state: ALARM
258 |         downscale_triggers:
259 |           - alarm_name: "ECS [repro-worker-production] CPUUtilization (low)"
260 |             state: OK
261 |           - alarm_name: "Aurora DMLLatency is high"
262 |             state: ALARM
263 |             prioritized_over_upscale_triggers: true
264 | 
265 | # When you use spot instances, instances that receive interruption warnings should be drained.
266 | # If you set URLs of SQS queues for spot instance interruption warnings to `spot_instance_intrp_warns_queue_urls`,
267 | # autoscaler drains instances to interrupt and detaches the instances from the auto scaling groups with
268 | # should_decrement_desired_capacity false.
269 | # If you set ECS_ENABLE_SPOT_INSTANCE_DRAINING to true, we recommend that you opt out of the draining feature
270 | # by setting disable_draining to true in the configurations of auto scaling groups and spot fleet requests.
271 | # Otherwise, instances don't seem to be drained on rare occasions.
272 | # Even if you opt out of the feature, you still have the advantage of setting `spot_instance_intrp_warns_queue_urls`
273 | # because instances to interrupt are replaced with new instances as soon as possible.
274 | spot_instance_intrp_warns_queue_urls:
275 |   - https://sqs.ap-northeast-1.amazonaws.com/<account-id>/spot-instance-intrp-warns
276 | ```
277 | 
278 | Then, execute the following command:
279 | 
280 | ```sh
281 | ecs_auto_scaler <config yaml>
282 | ```
283 | 
284 | It is recommended to run the `ecs_auto_scaler` via a container on ECS.
285 | 
286 | ### Signals
287 | 
288 |  Signal    | Description
289 | -----------|------------------------------------------------------------
290 |  TERM, INT | Shutdown gracefully
291 |  CONT      | Resume auto scaling
292 |  TSTP      | Pause auto scaling (Run only container instance draining)
293 | 
294 | ### IAM policy for autoscaler
295 | 
296 | The following permissions are required for the preceding configuration of "repro-api-production" service:
297 | 
298 | ```
299 | {
300 |   "Version": "2012-10-17",
301 |   "Statement": [
302 |     {
303 |       "Effect": "Allow",
304 |       "Action": [
305 |         "autoscaling:DescribeAutoScalingGroups",
306 |         "cloudwatch:DescribeAlarms",
307 |         "ec2:DescribeInstances",
308 |         "ec2:TerminateInstances",
309 |         "ecs:ListTasks"
310 |       ],
311 |       "Resource": "*"
312 |     },
313 |     {
314 |       "Effect": "Allow",
315 |       "Action": [
316 |         "ecs:DescribeServices",
317 |         "ecs:UpdateService"
318 |       ],
319 |       "Resource": [
320 |         "arn:aws:ecs:ap-northeast-1:<account-id>:service/ecs-cluster/repro-api-production"
321 |       ]
322 |     },
323 |     {
324 |       "Effect": "Allow",
325 |       "Action": [
326 |         "ecs:DescribeTasks"
327 |       ],
328 |       "Resource": [
329 |         "arn:aws:ecs:ap-northeast-1:<account-id>:task/ecs-cluster/*"
330 |       ]
331 |     },
332 |     {
333 |       "Effect": "Allow",
334 |       "Action": [
335 |         "autoscaling:DetachInstances",
336 |         "autoscaling:UpdateAutoScalingGroup"
337 |       ],
338 |       "Resource": [
339 |         "arn:aws:autoscaling:ap-northeast-1:<account-id>:autoScalingGroup:<group-id>:autoScalingGroupName/ecs-cluster-nodes"
340 |       ]
341 |     },
342 |     {
343 |       "Effect": "Allow",
344 |       "Action": [
345 |         "ecs:DescribeContainerInstances"
346 |       ],
347 |       "Resource": [
348 |         "arn:aws:ecs:ap-northeast-1:<account-id>:container-instance/ecs-cluster/*"
349 |       ]
350 |     },
351 |     {
352 |       "Effect": "Allow",
353 |       "Action": [
354 |         "ecs:DeregisterContainerInstance",
355 |         "ecs:ListContainerInstances"
356 |       ],
357 |       "Resource": [
358 |         "arn:aws:ecs:ap-northeast-1:<account-id>:cluster/ecs-cluster"
359 |       ]
360 |     }
361 |   ]
362 | }
363 | ```
364 | 
365 | If you use spot instances, additional permissions are required like below:
366 | 
367 | ```
368 | {
369 |   "Version": "2012-10-17",
370 |   "Statement": [
371 |     {
372 |       "Effect": "Allow",
373 |       "Action": "ecs:UpdateContainerInstancesState",
374 |       "Resource": "arn:aws:ecs:ap-northeast-1:<account-id>:container-instance/ecs-cluster/*"
375 |     },
376 |     {
377 |       "Effect": "Allow",
378 |       "Action": [
379 |         "sqs:DeleteMessage",
380 |         "sqs:DeleteMessageBatch",
381 |         "sqs:ReceiveMessage"
382 |       ],
383 |       "Resource": "arn:aws:sqs:ap-northeast-1:<account-id>:spot-instance-intrp-warns"
384 |     }
385 |   ]
386 | }
387 | ```
388 | 
389 | The following permissions are required for the preceding configuration of "repro-worker-production" service:
390 | 
391 | ```
392 | {
393 |   "Version": "2012-10-17",
394 |   "Statement": [
395 |     {
396 |       "Effect": "Allow",
397 |       "Action": [
398 |         "sqs:DeleteMessage",
399 |         "sqs:DeleteMessageBatch",
400 |         "sqs:ReceiveMessage"
401 |       ],
402 |       "Resource": "arn:aws:sqs:ap-northeast-1:<account-id>:spot-instance-intrp-warns"
403 |     },
404 |     {
405 |       "Effect": "Allow",
406 |       "Action": [
407 |         "cloudwatch:DescribeAlarms",
408 |         "ec2:DescribeInstances",
409 |         "ec2:DescribeSpotFleetInstances",
410 |         "ec2:DescribeSpotFleetRequests",
411 |         "ec2:ModifySpotFleetRequest",
412 |         "ec2:TerminateInstances",
413 |         "ecs:ListTasks"
414 |       ],
415 |       "Resource": "*"
416 |     },
417 |     {
418 |       "Effect": "Allow",
419 |       "Action": [
420 |         "ecs:DescribeServices",
421 |         "ecs:UpdateService"
422 |       ],
423 |       "Resource": [
424 |         "arn:aws:ecs:ap-northeast-1:<account-id>:service/ecs-cluster-for-worker/repro-worker-production"
425 |       ]
426 |     },
427 |     {
428 |       "Effect": "Allow",
429 |       "Action": [
430 |         "ecs:DescribeTasks"
431 |       ],
432 |       "Resource": [
433 |         "arn:aws:ecs:ap-northeast-1:<account-id>:task/ecs-cluster-for-worker/*"
434 |       ]
435 |     },
436 |     {
437 |       "Effect": "Allow",
438 |       "Action": [
439 |         "ecs:DescribeContainerInstances",
440 |         "ecs:UpdateContainerInstancesState"
441 |       ],
442 |       "Resource": [
443 |         "arn:aws:ecs:ap-northeast-1:<account-id>:container-instance/ecs-cluster-for-worker/*"
444 |       ]
445 |     },
446 |     {
447 |       "Effect": "Allow",
448 |       "Action": [
449 |         "ecs:ListContainerInstances"
450 |       ],
451 |       "Resource": [
452 |         "arn:aws:ecs:ap-northeast-1:<account-id>:cluster/ecs-cluster-for-worker"
453 |       ]
454 |     }
455 |   ]
456 | }
457 | ```
458 | 
459 | ### How to deploy faster with Auto Scaling Group
460 | 
461 | Add the following configuration and hooks to your `config/deploy.rb`:
462 | 
463 | ```ruby
464 | # deploy.rb
465 | set :ecs_instance_fluctuation_manager_configs, [
466 |   {
467 |     region: "ap-northeast-1",
468 |     cluster: "CLUSTER_NAME",
469 |     auto_scaling_group_name: "AUTO_SCALING_GROUP_NAME",
470 |     desired_capacity: 20, # original capacity of auto scaling group
471 |   }
472 | ]
473 | ```
474 | 
475 | This configuration enables tasks `ecs:increase_instances_to_max_size` and `ecs:terminate_redundant_instances`.
476 | If this configuration is not set, the above tasks do nothing.
477 | The task `ecs:increase_instances_to_max_size` will increase ECS instances.
478 | The task `ecs:terminate_redundant_instances` will decrease ECS instances considering AZ balance.
479 | 
480 | Hook configuration example:
481 | 
482 | ```ruby
483 | after "deploy:updating", "ecs:increase_instances_to_max_size"
484 | after "deploy:finished", "ecs:terminate_redundant_instances"
485 | after "deploy:failed", "ecs:terminate_redundant_instances"
486 | ```
487 | 
488 | ## Development
489 | 
490 | After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
491 | 
492 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
493 | 
494 | ## Contributing
495 | 
496 | Bug reports and pull requests are welcome on GitHub at https://github.com/reproio/ecs_deploy.
497 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | require "rspec/core/rake_task"
3 | 
4 | RSpec::Core::RakeTask.new(:spec)
5 | 
6 | task :default => :spec
7 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require "bundler/setup"
 4 | require "ecs_deploy"
 5 | 
 6 | # You can add fixtures and/or initialization code here to make experimenting
 7 | # with your gem easier. You can also use a different console, if you like.
 8 | 
 9 | # (If you use this, don't forget to add pry to your Gemfile!)
10 | # require "pry"
11 | # Pry.start
12 | 
13 | require "irb"
14 | IRB.start
15 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | IFS=$'\n\t'
4 | set -vx
5 | 
6 | bundle install
7 | 
8 | # Do any other automated setup that you need to do here
9 | 


--------------------------------------------------------------------------------
/ecs_deploy.gemspec:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | lib = File.expand_path('../lib', __FILE__)
 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 4 | require 'ecs_deploy/version'
 5 | 
 6 | Gem::Specification.new do |spec|
 7 |   spec.name          = "ecs_deploy"
 8 |   spec.version       = EcsDeploy::VERSION
 9 |   spec.authors       = ["joker1007"]
10 |   spec.email         = ["kakyoin.hierophant@gmail.com"]
11 | 
12 |   spec.summary       = %q{AWS ECS deploy helper}
13 |   spec.description   = %q{AWS ECS deploy helper}
14 |   spec.homepage      = "https://github.com/reproio/ecs_deploy"
15 | 
16 |   spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17 |   spec.bindir        = "exe"
18 |   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19 |   spec.require_paths = ["lib"]
20 | 
21 |   spec.add_runtime_dependency "aws-sdk-autoscaling", "~> 1"
22 |   spec.add_runtime_dependency "aws-sdk-cloudwatch", "~> 1"
23 |   spec.add_runtime_dependency "aws-sdk-cloudwatchevents", "~> 1"
24 |   spec.add_runtime_dependency "aws-sdk-ec2", "~> 1"
25 |   spec.add_runtime_dependency "aws-sdk-ecs", "~> 1"
26 |   spec.add_runtime_dependency "aws-sdk-sqs", "~> 1"
27 |   spec.add_runtime_dependency "terminal-table"
28 |   spec.add_runtime_dependency "paint"
29 | 
30 |   spec.add_development_dependency "bundler", ">= 1.11", "< 3"
31 |   spec.add_development_dependency "rake", ">= 10.0"
32 |   spec.add_development_dependency "rspec", "~> 3.0"
33 |   spec.add_development_dependency "rexml" # For aws-sdk-*
34 | end
35 | 


--------------------------------------------------------------------------------
/exe/ecs_auto_scaler:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | 
3 | require "ecs_deploy"
4 | require "ecs_deploy/auto_scaler"
5 | 
6 | EcsDeploy::AutoScaler.run(*ARGV)
7 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy.rb:
--------------------------------------------------------------------------------
 1 | require "ecs_deploy/version"
 2 | require "ecs_deploy/configuration"
 3 | 
 4 | require 'aws-sdk-ecs'
 5 | require 'logger'
 6 | require 'terminal-table'
 7 | require 'paint'
 8 | 
 9 | module EcsDeploy
10 |   def self.logger
11 |     @logger ||= Logger.new(STDOUT).tap do |l|
12 |       l.level = Logger.const_get(config.log_level.to_s.upcase)
13 |     end
14 |   end
15 | 
16 |   def self.config
17 |     @config ||= Configuration.new
18 |   end
19 | 
20 |   def self.configure(&block)
21 |     if block_given?
22 |       yield config
23 |       @logger = nil
24 |     end
25 |   end
26 | end
27 | 
28 | require "ecs_deploy/task_definition"
29 | require "ecs_deploy/service"
30 | require "ecs_deploy/scheduled_task"
31 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler.rb:
--------------------------------------------------------------------------------
  1 | require "logger"
  2 | require "time"
  3 | require "yaml"
  4 | 
  5 | require "ecs_deploy/auto_scaler/auto_scaling_group_config"
  6 | require "ecs_deploy/auto_scaler/instance_drainer"
  7 | require "ecs_deploy/auto_scaler/service_config"
  8 | require "ecs_deploy/auto_scaler/spot_fleet_request_config"
  9 | 
 10 | module EcsDeploy
 11 |   module AutoScaler
 12 |     class << self
 13 |       attr_reader :logger, :error_logger
 14 | 
 15 |       def run(yaml_path, log_file = nil, error_log_file = nil)
 16 |         @enable_auto_scaling = true
 17 |         setup_signal_handlers
 18 |         @logger = Logger.new(log_file || STDOUT)
 19 |         @logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
 20 |         STDOUT.sync = true unless log_file
 21 |         @error_logger = Logger.new(error_log_file || STDERR)
 22 |         @error_logger.level = Logger.const_get(ENV["ECS_AUTO_SCALER_LOG_LEVEL"].upcase) if ENV["ECS_AUTO_SCALER_LOG_LEVEL"]
 23 |         STDERR.sync = true unless error_log_file
 24 |         load_config(yaml_path)
 25 | 
 26 |         ths = (auto_scaling_group_configs + spot_fleet_request_configs).map do |cluster_scaling_config|
 27 |           Thread.new(cluster_scaling_config, &method(:main_loop)).tap { |th| th.abort_on_exception = true }
 28 |         end
 29 | 
 30 |         if @config["spot_instance_intrp_warns_queue_urls"]
 31 |           drainer = EcsDeploy::AutoScaler::InstanceDrainer.new(
 32 |             auto_scaling_group_configs: auto_scaling_group_configs,
 33 |             spot_fleet_request_configs: spot_fleet_request_configs,
 34 |             logger: logger,
 35 |           )
 36 |           polling_ths = @config["spot_instance_intrp_warns_queue_urls"].map do |queue_url|
 37 |             Thread.new(queue_url) do |url|
 38 |               drainer.poll_spot_instance_interruption_warnings(url)
 39 |             end.tap { |th| th.abort_on_exception = true }
 40 |           end
 41 |         end
 42 | 
 43 |         ths.each(&:join)
 44 | 
 45 |         drainer&.stop
 46 |         polling_ths&.each(&:join)
 47 |       end
 48 | 
 49 |       def main_loop(cluster_scaling_config)
 50 |         loop_with_polling_interval("loop of #{cluster_scaling_config.name}") do
 51 |           ths = cluster_scaling_config.service_configs.map do |service_config|
 52 |             Thread.new(service_config) do |s|
 53 |               @logger.debug "Scaling service #{s.name}"
 54 |               s.adjust_desired_count(cluster_scaling_config.cluster_resource_manager)
 55 |             end
 56 |           end
 57 |           ths.each { |th| th.abort_on_exception = true }
 58 | 
 59 |           ths.each(&:join)
 60 | 
 61 |           @logger.debug "Scaling cluster #{cluster_scaling_config.name}"
 62 | 
 63 |           required_capacity = cluster_scaling_config.service_configs.sum { |s| s.desired_count * s.required_capacity }
 64 |           cluster_scaling_config.update_desired_capacity(required_capacity)
 65 | 
 66 |           cluster_scaling_config.service_configs.each(&:wait_until_desired_count_updated)
 67 |         end
 68 |       end
 69 | 
 70 |       def load_config(yaml_path)
 71 |         @config = YAML.load_file(yaml_path)
 72 |         @polling_interval = @config["polling_interval"] || 30
 73 |         if @config["services"]
 74 |           @error_logger&.warn('"services" property in root-level is deprecated. Please define it in "auto_scaling_groups" property or "spot_fleet_requests" property.')
 75 |           @config.delete("services").each do |svc|
 76 |             if svc["auto_scaling_group_name"] && svc["spot_fleet_request_id"]
 77 |               raise "You can specify only one of 'auto_scaling_group_name' or 'spot_fleet_request_name'"
 78 |             end
 79 | 
 80 |             svc_region = svc.delete("region")
 81 |             if svc["auto_scaling_group_name"]
 82 |               asg_name = svc.delete("auto_scaling_group_name")
 83 |               asg = @config["auto_scaling_groups"].find { |g| g["region"] == svc_region && g["name"] == asg_name }
 84 |               asg["services"] ||= []
 85 |               asg["services"] << svc
 86 |               asg["cluster"] = svc.delete("cluster")
 87 |             end
 88 | 
 89 |             if svc["spot_fleet_request_id"]
 90 |               sfr_id = svc.delete("spot_fleet_request_id")
 91 |               sfr = @config["spot_fleet_requests"].find { |r| r["region"] == svc_region && r["id"] == sfr_id }
 92 |               sfr["services"] ||= []
 93 |               sfr["services"] << svc
 94 |               sfr["cluster"] = svc.delete("cluster")
 95 |             end
 96 |           end
 97 |         end
 98 |       end
 99 | 
100 |       def auto_scaling_group_configs
101 |         @auto_scaling_group_configs ||= (@config["auto_scaling_groups"] || []).each.with_object({}) do |c, configs|
102 |           configs[c["name"]] ||= {}
103 |           if configs[c["name"]][c["region"]]
104 |             raise "Duplicate entry in auto_scaling_groups (name: #{c["name"]}, region: #{c["region"]})"
105 |           end
106 |           configs[c["name"]][c["region"]] = AutoScalingGroupConfig.new(c, @logger)
107 |         end.values.flat_map(&:values)
108 |       end
109 | 
110 |       def spot_fleet_request_configs
111 |         @spot_fleet_request_configs ||= (@config["spot_fleet_requests"] || []).each.with_object({}) do |c, configs|
112 |           configs[c["id"]] ||= {}
113 |           if configs[c["id"]][c["region"]]
114 |             raise "Duplicate entry in spot_fleet_requests (id: #{c["id"]}, region: #{c["region"]})"
115 |           end
116 |           configs[c["id"]][c["region"]] = SpotFleetRequestConfig.new(c, @logger)
117 |         end.values.flat_map(&:values)
118 |       end
119 | 
120 |       private
121 | 
122 |       def setup_signal_handlers
123 |         # Use a thread and a queue to avoid "log writing failed. can't be called from trap context"
124 |         # cf. https://bugs.ruby-lang.org/issues/14222#note-3
125 |         signals = Queue.new
126 |         %i(TERM INT CONT TSTP).each do |sig|
127 |           trap(sig) { signals << sig }
128 |         end
129 | 
130 |         Thread.new do
131 |           loop do
132 |             sig = signals.pop
133 |             case sig
134 |             when :INT, :TERM
135 |               @logger.info "Received SIG#{sig}, shutting down gracefully"
136 |               @stop = true
137 |             when :CONT
138 |               @logger.info "Received SIGCONT, resume auto scaling"
139 |               @enable_auto_scaling = true
140 |             when :TSTP
141 |               @logger.info "Received SIGTSTP, pause auto scaling. Send SIGCONT to resume it."
142 |               @enable_auto_scaling = false
143 |             end
144 |           end
145 |         end
146 |       end
147 | 
148 |       def wait_polling_interval?(last_executed_at)
149 |         current = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
150 |         diff = current - last_executed_at
151 |         diff <= @polling_interval
152 |       end
153 | 
154 |       def loop_with_polling_interval(name)
155 |         @logger.debug "Start #{name}"
156 | 
157 |         last_executed_at = 0
158 |         loop do
159 |           break if @stop
160 |           sleep 1
161 |           next unless @enable_auto_scaling
162 |           next if wait_polling_interval?(last_executed_at)
163 |           yield
164 |           last_executed_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
165 |           @logger.debug "#{name} is last executed at #{last_executed_at}"
166 |         end
167 | 
168 |         @logger.debug "Stop #{name}"
169 |       end
170 |     end
171 |   end
172 | end
173 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb:
--------------------------------------------------------------------------------
  1 | require "aws-sdk-autoscaling"
  2 | require "aws-sdk-ec2"
  3 | 
  4 | require "ecs_deploy"
  5 | require "ecs_deploy/auto_scaler/config_base"
  6 | require "ecs_deploy/auto_scaler/cluster_resource_manager"
  7 | 
  8 | module EcsDeploy
  9 |   module AutoScaler
 10 |     AutoScalingGroupConfig = Struct.new(:name, :region, :cluster, :buffer, :service_configs, :disable_draining) do
 11 |       include ConfigBase
 12 | 
 13 |       MAX_DETACHABLE_INSTANCE_COUNT = 20
 14 | 
 15 |       def initialize(attributes = {}, logger)
 16 |         attributes = attributes.dup
 17 |         services = attributes.delete("services")
 18 |         super(attributes, logger)
 19 |         self.service_configs = services.map do |s|
 20 |           ServiceConfig.new(s.merge("cluster" => cluster, "region" => region), logger)
 21 |         end
 22 |       end
 23 | 
 24 |       def update_desired_capacity(required_capacity)
 25 |         detach_and_terminate_orphan_instances
 26 | 
 27 |         desired_capacity = (required_capacity + buffer.to_f).ceil
 28 | 
 29 |         current_asg = client.describe_auto_scaling_groups({
 30 |           auto_scaling_group_names: [name],
 31 |         }).auto_scaling_groups[0]
 32 | 
 33 |         if current_asg.desired_capacity > desired_capacity
 34 |           decreased_capacity = decrease_desired_capacity(current_asg.desired_capacity - desired_capacity)
 35 |           if decreased_capacity > 0
 36 |             new_desired_capacity = current_asg.desired_capacity - decreased_capacity
 37 |             cluster_resource_manager.trigger_capacity_update(current_asg.desired_capacity, new_desired_capacity)
 38 |             @logger.info "#{log_prefix} Updated desired_capacity to #{new_desired_capacity}"
 39 |           else
 40 |             @logger.info "#{log_prefix} Tried to Update desired_capacity but there were no deregisterable instances"
 41 |           end
 42 |         elsif current_asg.desired_capacity < desired_capacity
 43 |           client.update_auto_scaling_group(
 44 |             auto_scaling_group_name: name,
 45 |             min_size: 0,
 46 |             max_size: [current_asg.max_size, desired_capacity].max,
 47 |             desired_capacity: desired_capacity,
 48 |           )
 49 |           cluster_resource_manager.trigger_capacity_update(current_asg.desired_capacity, desired_capacity)
 50 |           @logger.info "#{log_prefix} Updated desired_capacity to #{desired_capacity}"
 51 |         end
 52 |       rescue => e
 53 |         AutoScaler.error_logger.error(e)
 54 |       end
 55 | 
 56 |       def cluster_resource_manager
 57 |         @cluster_resource_manager ||= EcsDeploy::AutoScaler::ClusterResourceManager.new(
 58 |           region: region,
 59 |           cluster: cluster,
 60 |           service_configs: service_configs,
 61 |           capacity_based_on: "instances",
 62 |           logger: @logger,
 63 |         )
 64 |       end
 65 | 
 66 |       # NOTE: InstanceDrainer calls this method when it receives spot instance interruption warnings
 67 |       def detach_instances(instance_ids:, should_decrement_desired_capacity:)
 68 |         return if instance_ids.empty?
 69 | 
 70 |         # detach only detachable instances
 71 |         detachable_instance_ids = instance_ids & describe_detachable_instances.map(&:instance_id)
 72 | 
 73 |         detachable_instance_ids.each_slice(MAX_DETACHABLE_INSTANCE_COUNT) do |ids|
 74 |           client.detach_instances(
 75 |             auto_scaling_group_name: name,
 76 |             instance_ids: ids,
 77 |             should_decrement_desired_capacity: should_decrement_desired_capacity,
 78 |           )
 79 |         end
 80 | 
 81 |         @logger.info "#{log_prefix} Detached instances from ASG: #{instance_ids.inspect}"
 82 |       end
 83 | 
 84 |       private
 85 | 
 86 |       def decrease_desired_capacity(count)
 87 |         container_instance_arns_in_service = cluster_resource_manager.fetch_container_instance_arns_in_service
 88 |         container_instances_in_cluster = cluster_resource_manager.fetch_container_instances_in_cluster
 89 |         auto_scaling_group_instances = describe_detachable_instances
 90 |         deregisterable_instances = container_instances_in_cluster.select do |i|
 91 |           i.pending_tasks_count == 0 &&
 92 |             !running_essential_task?(i, container_instance_arns_in_service) &&
 93 |             auto_scaling_group_instances.any? {|instance| instance.instance_id == i.ec2_instance_id }
 94 |         end
 95 | 
 96 |         @logger.info "#{log_prefix} Fetched deregisterable instances: #{deregisterable_instances.map(&:ec2_instance_id).inspect}"
 97 | 
 98 |         az_to_instance_count = auto_scaling_group_instances.each_with_object(Hash.new(0)) { |i, h| h[i.availability_zone] += 1 }
 99 |         az_to_deregisterable_instances = deregisterable_instances.group_by do |i|
100 |           i.attributes.find { |a| a.name == "ecs.availability-zone" }.value
101 |         end
102 | 
103 |         deregistered_instance_ids = []
104 |         prev_max_count = nil
105 |         # Select instances to be deregistered keeping the balance of instance count per availability zone
106 |         while deregistered_instance_ids.size < count
107 |           max_count = az_to_instance_count.each_value.max
108 |           break if max_count == prev_max_count # No more deregistable instances with keeping the balance
109 | 
110 |           azs = az_to_instance_count.select { |_, c| c == max_count }.keys
111 |           azs.each do |az|
112 |             instance = az_to_deregisterable_instances[az]&.pop
113 |             next if instance.nil?
114 |             begin
115 |               cluster_resource_manager.deregister_container_instance(instance.container_instance_arn)
116 |               deregistered_instance_ids << instance.ec2_instance_id
117 |               az_to_instance_count[az] -= 1
118 |             rescue EcsDeploy::AutoScaler::ClusterResourceManager::DeregisterContainerInstanceFailed
119 |             end
120 |             break if deregistered_instance_ids.size >= count
121 |           end
122 |           prev_max_count = max_count
123 |         end
124 | 
125 |         @logger.info "#{log_prefix} Deregistered instances: #{deregistered_instance_ids.inspect}"
126 | 
127 |         detach_and_terminate_instances(deregistered_instance_ids)
128 | 
129 |         deregistered_instance_ids.size
130 |       end
131 | 
132 |       def detach_and_terminate_instances(instance_ids)
133 |         return if instance_ids.empty?
134 | 
135 |         detach_instances(
136 |           instance_ids: instance_ids,
137 |           should_decrement_desired_capacity: true
138 |         )
139 | 
140 |         sleep 3
141 | 
142 |         ec2_client.terminate_instances(instance_ids: instance_ids)
143 | 
144 |         @logger.info "#{log_prefix} Terminated instances: #{instance_ids.inspect}"
145 |       rescue => e
146 |         AutoScaler.error_logger.error(e)
147 |       end
148 | 
149 |       def detach_and_terminate_orphan_instances
150 |         container_instance_ids = cluster_resource_manager.fetch_container_instances_in_cluster.map(&:ec2_instance_id)
151 |         orphans = describe_detachable_instances.reject do |i|
152 |           next true if container_instance_ids.include?(i.instance_id)
153 |         end.map(&:instance_id)
154 | 
155 |         return if orphans.empty?
156 | 
157 |         targets = ec2_client.describe_instances(instance_ids: orphans).reservations.flat_map(&:instances).select do |i|
158 |           (Time.now - i.launch_time) > 600
159 |         end
160 | 
161 |         detach_and_terminate_instances(targets.map(&:instance_id))
162 |       rescue => e
163 |         AutoScaler.error_logger.error(e)
164 |       end
165 | 
166 |       def client
167 |         Aws::AutoScaling::Client.new(
168 |           access_key_id: EcsDeploy.config.access_key_id,
169 |           secret_access_key: EcsDeploy.config.secret_access_key,
170 |           region: region,
171 |           logger: logger
172 |         )
173 |       end
174 | 
175 |       def ec2_client
176 |         Aws::EC2::Client.new(
177 |           access_key_id: EcsDeploy.config.access_key_id,
178 |           secret_access_key: EcsDeploy.config.secret_access_key,
179 |           region: region,
180 |           logger: logger
181 |         )
182 |       end
183 | 
184 |       def describe_detachable_instances
185 |         client.describe_auto_scaling_groups({ auto_scaling_group_names: [name] }).auto_scaling_groups[0].instances.reject do |i|
186 |           # The lifecycle state of terminated instances becomes "Detaching", "Terminating", "Terminating:Wait", or "Terminating:Proceed",
187 |           # and we can't detach instances in such a state.
188 |           i.lifecycle_state.start_with?("Terminating") || i.lifecycle_state == "Detaching" ||
189 |           # EC2 instance sometimes stays in Pending state for more than 10 minutes
190 |             i.lifecycle_state == "Pending"
191 |         end
192 |       end
193 | 
194 |       def running_essential_task?(instance, container_instance_arns_in_service)
195 |         return false if instance.running_tasks_count == 0
196 | 
197 |         container_instance_arns_in_service.include?(instance.container_instance_arn)
198 |       end
199 | 
200 |       def log_prefix
201 |         "[#{self.class.to_s.sub(/\AEcsDeploy::AutoScaler::/, "")} #{name} #{region}]"
202 |       end
203 |     end
204 |   end
205 | end
206 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/cluster_resource_manager.rb:
--------------------------------------------------------------------------------
  1 | require "timeout"
  2 | 
  3 | require "aws-sdk-ecs"
  4 | 
  5 | module EcsDeploy
  6 |   module AutoScaler
  7 |     class ClusterResourceManager
  8 |       class DeregisterContainerInstanceFailed < StandardError; end
  9 | 
 10 |       MAX_DESCRIBABLE_SERVICE_COUNT = 10
 11 | 
 12 |       def initialize(region:, cluster:, service_configs:, logger: nil, capacity_based_on:)
 13 |         @region = region
 14 |         @cluster = cluster
 15 |         @logger = logger
 16 |         @service_configs = service_configs
 17 |         @capacity_based_on = capacity_based_on
 18 |         if @capacity_based_on != "instances" && @capacity_based_on != "vCPUs"
 19 |           raise ArgumentError, 'capacity_based_on should be either "instances" or "vCPUs"'
 20 |         end
 21 | 
 22 |         @mutex = Mutex.new
 23 |         @resource = ConditionVariable.new
 24 |         @used_capacity = @service_configs.sum { |s| s.desired_count * s.required_capacity }
 25 |         @capacity = calculate_active_instance_capacity
 26 |       end
 27 | 
 28 |       def acquire(capacity, timeout: nil)
 29 |         @mutex.synchronize do
 30 |           @logger&.debug("#{log_prefix} Trying to acquire #{capacity} capacity (capacity: #{@capacity}, used_capacity: #{@used_capacity})")
 31 |           Timeout.timeout(timeout) do
 32 |             while @capacity - @used_capacity < capacity
 33 |               @resource.wait(@mutex)
 34 |             end
 35 |           end
 36 |           @used_capacity += capacity
 37 |           @logger&.debug("#{log_prefix} Acquired #{capacity} capacity (capacity: #{@capacity}, used_capacity: #{@used_capacity})")
 38 |         end
 39 |         true
 40 |       rescue Timeout::Error
 41 |         false
 42 |       end
 43 | 
 44 |       def release(capacity)
 45 |         @mutex.synchronize do
 46 |           @used_capacity -= capacity
 47 |           @resource.broadcast
 48 |         end
 49 |         @logger&.debug("#{log_prefix} Released #{capacity} capacity (capacity: #{@capacity}, used_capacity: #{@used_capacity})")
 50 |         true
 51 |       end
 52 | 
 53 |       def fetch_container_instances_in_cluster
 54 |         cl = ecs_client
 55 |         resp = cl.list_container_instances(cluster: @cluster)
 56 |         if resp.container_instance_arns.empty?
 57 |           []
 58 |         else
 59 |           resp.flat_map do |resp|
 60 |             cl.describe_container_instances(cluster: @cluster, container_instances: resp.container_instance_arns).container_instances
 61 |           end
 62 |         end
 63 |       end
 64 | 
 65 |       def fetch_container_instance_arns_in_service
 66 |         task_groups = @service_configs.map { |s| "service:#{s.name}" }
 67 |         ecs_client.list_container_instances(cluster: @cluster, filter: "task:group in [#{task_groups.join(",")}]").flat_map(&:container_instance_arns)
 68 |       end
 69 | 
 70 |       def deregister_container_instance(container_instance_arn)
 71 |         ecs_client.deregister_container_instance(cluster: @cluster, container_instance: container_instance_arn, force: true)
 72 |       rescue Aws::ECS::Errors::InvalidParameterException
 73 |         raise DeregisterContainerInstanceFailed
 74 |       end
 75 | 
 76 |       def trigger_capacity_update(old_desired_capacity, new_desired_capacity, interval: 5, wait_until_capacity_updated: false)
 77 |         return if new_desired_capacity == old_desired_capacity
 78 | 
 79 |         th = Thread.new do
 80 |           @logger&.info "#{log_prefix} Updating capacity: #{old_desired_capacity} -> #{new_desired_capacity}"
 81 |           Timeout.timeout(180) do
 82 |             until @capacity == new_desired_capacity ||
 83 |                 (new_desired_capacity > old_desired_capacity && @capacity > new_desired_capacity) ||
 84 |                 (new_desired_capacity < old_desired_capacity && @capacity < new_desired_capacity)
 85 |               @mutex.synchronize do
 86 |                 @capacity = calculate_active_instance_capacity
 87 |                 @resource.broadcast
 88 |               rescue => e
 89 |                 AutoScaler.error_logger.warn("#{log_prefix} `#{__method__}': #{e} (#{e.class})")
 90 |               end
 91 | 
 92 |               sleep interval
 93 |             end
 94 |             @logger&.info "#{log_prefix} updated capacity to #{@capacity}"
 95 |           end
 96 |         rescue Timeout::Error => e
 97 |           msg = "#{log_prefix} `#{__method__}': #{e} (#{e.class})"
 98 |           if @capacity_based_on == "vCPUs"
 99 |             # Timeout::Error sometimes occur.
100 |             # For example, the following case never meats the condition of until
101 |             #   * old_desired_capaacity is 102
102 |             #   * new_desired_capaacity is 101
103 |             #   * all instances have 2 vCPUs
104 |             AutoScaler.error_logger.warn(msg)
105 |           else
106 |             AutoScaler.error_logger.error(msg)
107 |           end
108 |         end
109 | 
110 |         if wait_until_capacity_updated
111 |           @logger&.info "#{log_prefix} Waiting for the number of active instances to reach #{new_desired_capacity} (from #{old_desired_capacity})"
112 |           th.join
113 |         end
114 |       end
115 | 
116 |       def calculate_active_instance_capacity
117 |         cl = ecs_client
118 | 
119 |         if @capacity_based_on == "instances"
120 |           return cl.list_container_instances(cluster: @cluster, status: "ACTIVE").sum do |resp|
121 |             resp.container_instance_arns.size
122 |           end
123 |         end
124 | 
125 |         total_cpu = cl.list_container_instances(cluster: @cluster, status: "ACTIVE").sum do |resp|
126 |           next 0 if resp.container_instance_arns.empty?
127 |           ecs_client.describe_container_instances(
128 |             cluster: @cluster,
129 |             container_instances: resp.container_instance_arns,
130 |           ).container_instances.sum { |ci| ci.registered_resources.find { |r| r.name == "CPU" }.integer_value }
131 |         end
132 | 
133 |         total_cpu / 1024
134 |       end
135 | 
136 |       private
137 | 
138 |       def ecs_client
139 |         Aws::ECS::Client.new(
140 |           access_key_id: EcsDeploy.config.access_key_id,
141 |           secret_access_key: EcsDeploy.config.secret_access_key,
142 |           region: @region,
143 |           logger: @logger,
144 |         )
145 |       end
146 | 
147 |       def log_prefix
148 |         "[#{self.class.to_s.gsub(/\AEcsDeploy::AutoScaler::/, "")} #{@region} #{@cluster}]"
149 |       end
150 |     end
151 |   end
152 | end
153 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/config_base.rb:
--------------------------------------------------------------------------------
 1 | module EcsDeploy
 2 |   module AutoScaler
 3 |     module ConfigBase
 4 |       def initialize(attributes = {}, logger)
 5 |         attributes.each do |key, val|
 6 |           send("#{key}=", val)
 7 |         end
 8 |         @logger = logger
 9 |       end
10 | 
11 |       def logger
12 |         @logger
13 |       end
14 |     end
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/instance_drainer.rb:
--------------------------------------------------------------------------------
  1 | require "aws-sdk-ec2"
  2 | require "aws-sdk-ecs"
  3 | require "aws-sdk-sqs"
  4 | 
  5 | require "ecs_deploy"
  6 | 
  7 | module EcsDeploy
  8 |   module AutoScaler
  9 |     class InstanceDrainer
 10 |       def initialize(auto_scaling_group_configs:, spot_fleet_request_configs:, logger:)
 11 |         @auto_scaling_group_configs = auto_scaling_group_configs || []
 12 |         @spot_fleet_request_configs = spot_fleet_request_configs || []
 13 |         @logger = logger
 14 |         @stop = false
 15 |       end
 16 | 
 17 |       def poll_spot_instance_interruption_warnings(queue_url)
 18 |         @logger.debug "Start polling spot instance interruption warnings of #{queue_url}"
 19 | 
 20 |         # cf. https://docs.aws.amazon.com/general/latest/gr/rande.html#sqs_region
 21 |         region = URI.parse(queue_url).host.split(".")[1]
 22 | 
 23 |         poller = Aws::SQS::QueuePoller.new(queue_url, client: sqs_client(region))
 24 |         poller.before_request do |stats|
 25 |           throw :stop_polling if @stop
 26 |         end
 27 | 
 28 |         until @stop
 29 |           begin
 30 |             poller.poll(max_number_of_messages: 10, visibility_timeout: 15) do |messages, _|
 31 |               instance_ids = messages.map do |msg|
 32 |                 JSON.parse(msg.body).dig("detail", "instance-id")
 33 |               end
 34 | 
 35 |               config_to_instance_ids = build_config_to_instance_ids(instance_ids, region)
 36 |               set_instance_state_to_draining(config_to_instance_ids, region)
 37 |               # Detach the instances to launch other instances
 38 |               detach_instances_from_auto_scaling_groups(config_to_instance_ids, region)
 39 |             end
 40 |           rescue => e
 41 |             AutoScaler.error_logger.error(e)
 42 |           end
 43 |         end
 44 | 
 45 |         @logger.debug "Stop polling spot instance interruption warnings of #{queue_url}"
 46 |       end
 47 | 
 48 |       def stop
 49 |         @stop = true
 50 |       end
 51 | 
 52 |       private
 53 | 
 54 |       def build_config_to_instance_ids(instance_ids, region)
 55 |         config_to_instance_ids = Hash.new{ |h, k| h[k] = [] }
 56 |         ec2_client(region).describe_instances(instance_ids: instance_ids).each do |resp|
 57 |           resp.reservations.each do |reservation|
 58 |             reservation.instances.each do |i|
 59 |               sfr_id = i.tags.find { |t| t.key == "aws:ec2spot:fleet-request-id" }&.value
 60 |               if sfr_id
 61 |                 config = @spot_fleet_request_configs.find { |c| c.id == sfr_id && c.region == region }
 62 |                 config_to_instance_ids[config] << i.instance_id if config
 63 |                 next
 64 |               end
 65 | 
 66 |               asg_name = i.tags.find { |t| t.key == "aws:autoscaling:groupName" }&.value
 67 |               if asg_name
 68 |                 config = @auto_scaling_group_configs.find { |c| c.name == asg_name && c.region == region }
 69 |                 config_to_instance_ids[config] << i.instance_id if config
 70 |               end
 71 |             end
 72 |           end
 73 |         end
 74 | 
 75 |         config_to_instance_ids
 76 |       end
 77 | 
 78 |       def set_instance_state_to_draining(config_to_instance_ids, region)
 79 |         cl = ecs_client(region)
 80 |         config_to_instance_ids.each do |config, instance_ids|
 81 |           if config.disable_draining == true || config.disable_draining == "true"
 82 |             @logger.info "Skipped draining instances: region: #{region}, cluster: #{config.cluster}, instance_ids: #{instance_ids.inspect}"
 83 |             next
 84 |           end
 85 | 
 86 |           arns = cl.list_container_instances(
 87 |             cluster: config.cluster,
 88 |             filter: "ec2InstanceId in [#{instance_ids.join(",")}]",
 89 |           ).container_instance_arns
 90 | 
 91 |           if instance_ids.size != arns.size
 92 |             AutoScaler.error_logger.warn("The number of ARNs differs from the number of instance IDs: instance_ids: #{instance_ids.inspect}, container_instance_arns: #{arns.inspect}")
 93 |           end
 94 |           next if arns.empty?
 95 | 
 96 |           cl.update_container_instances_state(
 97 |             cluster: config.cluster,
 98 |             container_instances: arns,
 99 |             status: "DRAINING",
100 |           )
101 |           @logger.info "Draining instances: region: #{region}, cluster: #{config.cluster}, instance_ids: #{instance_ids.inspect}, container_instance_arns: #{arns.inspect}"
102 |         end
103 |       end
104 | 
105 |       def detach_instances_from_auto_scaling_groups(config_to_instance_ids, region)
106 |         @auto_scaling_group_configs.each do |config|
107 |           config.detach_instances(instance_ids: config_to_instance_ids[config], should_decrement_desired_capacity: false)
108 |         end
109 |       end
110 | 
111 |       def ec2_client(region)
112 |         Aws::EC2::Client.new(
113 |           access_key_id: EcsDeploy.config.access_key_id,
114 |           secret_access_key: EcsDeploy.config.secret_access_key,
115 |           region: region,
116 |           logger: @logger,
117 |         )
118 |       end
119 | 
120 |       def ecs_client(region)
121 |         Aws::ECS::Client.new(
122 |           access_key_id: EcsDeploy.config.access_key_id,
123 |           secret_access_key: EcsDeploy.config.secret_access_key,
124 |           region: region,
125 |           logger: @logger,
126 |         )
127 |       end
128 | 
129 |       def sqs_client(region)
130 |         Aws::SQS::Client.new(
131 |           access_key_id: EcsDeploy.config.access_key_id,
132 |           secret_access_key: EcsDeploy.config.secret_access_key,
133 |           region: region,
134 |           logger: @logger,
135 |         )
136 |       end
137 |     end
138 |   end
139 | end
140 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/service_config.rb:
--------------------------------------------------------------------------------
  1 | require "aws-sdk-ecs"
  2 | require "ecs_deploy"
  3 | require "ecs_deploy/auto_scaler/config_base"
  4 | require "ecs_deploy/auto_scaler/trigger_config"
  5 | 
  6 | module EcsDeploy
  7 |   module AutoScaler
  8 |     SERVICE_CONFIG_ATTRIBUTES = %i(name cluster region step max_task_count min_task_count idle_time scheduled_min_task_count cooldown_time_for_reach_max upscale_triggers downscale_triggers desired_count required_capacity)
  9 |     ServiceConfig = Struct.new(*SERVICE_CONFIG_ATTRIBUTES) do
 10 |       include ConfigBase
 11 | 
 12 |       MAX_DESCRIBABLE_TASK_COUNT = 100
 13 | 
 14 |       def initialize(attributes = {}, logger)
 15 |         super
 16 |         self.idle_time ||= 60
 17 |         self.max_task_count = Array(max_task_count)
 18 |         self.upscale_triggers = upscale_triggers.to_a.map do |t|
 19 |           TriggerConfig.new({"region" => region, "step" => step}.merge(t), logger)
 20 |         end
 21 |         self.downscale_triggers = downscale_triggers.to_a.map do |t|
 22 |           TriggerConfig.new({"region" => region, "step" => step}.merge(t), logger)
 23 |         end
 24 |         self.max_task_count.sort!
 25 |         self.desired_count = fetch_service.desired_count
 26 |         self.required_capacity ||= 1
 27 |         @reach_max_at = nil
 28 |         @last_updated_at = nil
 29 |         @logger = logger
 30 |       end
 31 | 
 32 |       def adjust_desired_count(cluster_resource_manager)
 33 |         if idle?
 34 |           @logger.debug "#{name} is idling"
 35 |           return
 36 |         end
 37 | 
 38 |         difference = 0
 39 |         upscale_triggers.each do |trigger|
 40 |           next if difference >= trigger.step
 41 | 
 42 |           if trigger.match?
 43 |             @logger.info "#{log_prefix} Firing upscale trigger by #{trigger.alarm_name} #{trigger.state}"
 44 |             difference = trigger.step
 45 |           end
 46 |         end
 47 | 
 48 |         if desired_count > current_min_task_count
 49 |           downscale_triggers.each do |trigger|
 50 |             next if difference > 0 && !trigger.prioritized_over_upscale_triggers?
 51 |             next unless trigger.match?
 52 | 
 53 |             @logger.info "#{log_prefix} Firing downscale trigger by #{trigger.alarm_name} #{trigger.state}"
 54 |             difference = [difference, -trigger.step].min
 55 |           end
 56 |         end
 57 | 
 58 |         if current_min_task_count > desired_count + difference
 59 |           difference = current_min_task_count - desired_count
 60 |         end
 61 | 
 62 |         if difference >= 0 && desired_count > max_task_count.max
 63 |           difference = max_task_count.max - desired_count
 64 |         end
 65 | 
 66 |         if difference != 0
 67 |           update_service(difference, cluster_resource_manager)
 68 |         end
 69 |       end
 70 | 
 71 |       def wait_until_desired_count_updated
 72 |         @increase_desired_count_thread&.join
 73 |       rescue => e
 74 |         AutoScaler.error_logger.warn("`#{__method__}': #{e} (#{e.class})")
 75 |       ensure
 76 |         @increase_desired_count_thread = nil
 77 |       end
 78 | 
 79 |       private
 80 | 
 81 |       def client
 82 |         Aws::ECS::Client.new(
 83 |           access_key_id: EcsDeploy.config.access_key_id,
 84 |           secret_access_key: EcsDeploy.config.secret_access_key,
 85 |           region: region,
 86 |           logger: logger
 87 |         )
 88 |       end
 89 | 
 90 |       def idle?
 91 |         return false unless @last_updated_at
 92 | 
 93 |         diff = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @last_updated_at
 94 |         diff < idle_time
 95 |       end
 96 | 
 97 |       def current_min_task_count
 98 |         return min_task_count if scheduled_min_task_count.nil? || scheduled_min_task_count.empty?
 99 | 
100 |         scheduled_min_task_count.find(-> { {"count" => min_task_count} }) { |s|
101 |           from = Time.parse(s["from"])
102 |           to = Time.parse(s["to"])
103 |           (from..to).cover?(Time.now)
104 |         }["count"]
105 |       end
106 | 
107 |       def overheat?
108 |         return false unless @reach_max_at
109 |         (Process.clock_gettime(Process::CLOCK_MONOTONIC, :second) - @reach_max_at) > cooldown_time_for_reach_max
110 |       end
111 | 
112 |       def fetch_service
113 |         res = client.describe_services(cluster: cluster, services: [name])
114 |         raise "Service \"#{name}\" is not found" if res.services.empty?
115 |         res.services[0]
116 |       rescue => e
117 |         AutoScaler.error_logger.error(e)
118 |       end
119 | 
120 |       def update_service(difference, cluster_resource_manager)
121 |         next_desired_count = desired_count + difference
122 |         current_level = max_task_level(desired_count)
123 |         next_level = max_task_level(next_desired_count)
124 |         if current_level < next_level && overheat? # next max
125 |           level = next_level
126 |           @reach_max_at = nil
127 |           @logger.info "#{log_prefix} Service is overheated, uses next max count"
128 |         elsif current_level < next_level && !overheat? # wait cooldown
129 |           level = current_level
130 |           now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
131 |           @reach_max_at ||= now
132 |           @logger.info "#{log_prefix} Service waiting for cooldown period to elapse #{(now - @reach_max_at).to_i}sec"
133 |         elsif current_level == next_level && next_desired_count >= max_task_count[current_level] # reach current max
134 |           level = current_level
135 |           now = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
136 |           @reach_max_at ||= now
137 |           @logger.info "#{log_prefix} Service waiting for cooldown period to elapse #{(now - @reach_max_at).to_i}sec"
138 |           if next_desired_count > max_task_count[current_level] && current_level == max_task_count.size - 1
139 |             @logger.warn "#{log_prefix} Desired count has reached the maximum value and couldn't be increased"
140 |           end
141 |         elsif current_level == next_level && next_desired_count < max_task_count[current_level]
142 |           level = current_level
143 |           @reach_max_at = nil
144 |           @logger.info "#{log_prefix} Service has finished cooling down"
145 |         elsif current_level > next_level
146 |           level = next_level
147 |           @reach_max_at = nil
148 |           @logger.info "#{log_prefix} Service has finished cooling down"
149 |         end
150 | 
151 |         next_desired_count = [next_desired_count, max_task_count[level]].min
152 |         if next_desired_count > desired_count
153 |           increase_desired_count(next_desired_count - desired_count, cluster_resource_manager)
154 |         else
155 |           decrease_desired_count(desired_count - next_desired_count, cluster_resource_manager)
156 |         end
157 | 
158 |         @last_updated_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :second)
159 |         @logger.info "#{log_prefix} Updated desired_count to #{next_desired_count}"
160 |       rescue => e
161 |         AutoScaler.error_logger.error(e)
162 |       end
163 | 
164 |       def increase_desired_count(by, cluster_resource_manager)
165 |         applied_desired_count = desired_count
166 |         self.desired_count += by
167 | 
168 |         wait_until = Process.clock_gettime(Process::CLOCK_MONOTONIC) + 180
169 |         @increase_desired_count_thread = Thread.new do
170 |           cl = client
171 |           by.times do
172 |             timeout = wait_until - Process.clock_gettime(Process::CLOCK_MONOTONIC)
173 |             break if timeout <= 0
174 |             break unless cluster_resource_manager.acquire(required_capacity, timeout: timeout)
175 |             begin
176 |               cl.update_service(cluster: cluster, service: name, desired_count: applied_desired_count + 1)
177 |               applied_desired_count += 1
178 |             rescue => e
179 |               cluster_resource_manager.release(required_capacity)
180 |               AutoScaler.error_logger.error(e)
181 |               break
182 |             end
183 |           end
184 | 
185 |           if applied_desired_count != desired_count
186 |             self.desired_count = applied_desired_count
187 |             @logger.info "#{log_prefix} Failed to update service and set desired_count to #{desired_count}"
188 |           end
189 |         end
190 |       end
191 | 
192 |       def decrease_desired_count(by, cluster_resource_manager)
193 |         cl = client
194 |         running_task_arns = cl.list_tasks(cluster: cluster, service_name: name, desired_status: "RUNNING").flat_map(&:task_arns)
195 | 
196 |         cl.update_service(cluster: cluster, service: name, desired_count: desired_count - by)
197 | 
198 |         cl.wait_until(:services_stable, cluster: cluster, services: [name]) do |w|
199 |           w.before_wait do
200 |             @logger.debug "#{log_prefix} waiting for service to stabilize"
201 |           end
202 |         end
203 | 
204 |         stopping_task_arns = running_task_arns - cl.list_tasks(cluster: cluster, service_name: name, desired_status: "RUNNING").flat_map(&:task_arns)
205 |         stopping_task_arns.each_slice(MAX_DESCRIBABLE_TASK_COUNT) do |arns|
206 |           cl.wait_until(:tasks_stopped, cluster: cluster, tasks: arns) do |w|
207 |             w.before_wait do
208 |               @logger.debug "#{log_prefix} waiting for tasks to finish stopping"
209 |             end
210 |           end
211 |         end
212 | 
213 |         cluster_resource_manager.release(required_capacity * by)
214 |         self.desired_count -= by
215 |       end
216 | 
217 |       def max_task_level(count)
218 |         max_task_count.index { |i| count <= i } || max_task_count.size - 1
219 |       end
220 | 
221 |       def log_prefix
222 |         "[#{self.class.to_s.sub(/\AEcsDeploy::AutoScaler::/, "")} #{name} #{region}]"
223 |       end
224 |     end
225 |   end
226 | end
227 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/spot_fleet_request_config.rb:
--------------------------------------------------------------------------------
  1 | require "json"
  2 | require "timeout"
  3 | 
  4 | require "aws-sdk-ec2"
  5 | require "ecs_deploy"
  6 | require "ecs_deploy/auto_scaler/config_base"
  7 | require "ecs_deploy/auto_scaler/cluster_resource_manager"
  8 | 
  9 | module EcsDeploy
 10 |   module AutoScaler
 11 |     SpotFleetRequestConfig = Struct.new(:id, :region, :cluster, :buffer, :service_configs, :disable_draining) do
 12 |       include ConfigBase
 13 | 
 14 |       def initialize(attributes = {}, logger)
 15 |         attributes = attributes.dup
 16 |         services = attributes.delete("services")
 17 |         super(attributes, logger)
 18 |         self.service_configs = services.map do |s|
 19 |           ServiceConfig.new(s.merge("cluster" => cluster, "region" => region), logger)
 20 |         end
 21 |       end
 22 | 
 23 |       def name
 24 |         id
 25 |       end
 26 | 
 27 |       def update_desired_capacity(required_capacity)
 28 |         terminate_orphan_instances
 29 | 
 30 |         desired_capacity = (required_capacity + buffer.to_f).ceil
 31 | 
 32 |         request_config = ec2_client.describe_spot_fleet_requests(
 33 |           spot_fleet_request_ids: [id]
 34 |         ).spot_fleet_request_configs[0].spot_fleet_request_config
 35 | 
 36 |         return if desired_capacity == request_config.target_capacity
 37 | 
 38 |         ec2_client.modify_spot_fleet_request(spot_fleet_request_id: id, target_capacity: desired_capacity)
 39 | 
 40 |         cluster_resource_manager.trigger_capacity_update(
 41 |           request_config.target_capacity,
 42 |           desired_capacity,
 43 |           # Wait until the capacity is updated to prevent the process from terminating before container draining is completed
 44 |           wait_until_capacity_updated: desired_capacity < request_config.target_capacity,
 45 |         )
 46 |         @logger.info "#{log_prefix} Updated desired_capacity to #{desired_capacity}"
 47 |       rescue => e
 48 |         AutoScaler.error_logger.error(e)
 49 |       end
 50 | 
 51 |       def cluster_resource_manager
 52 |         @cluster_resource_manager ||= EcsDeploy::AutoScaler::ClusterResourceManager.new(
 53 |           region: region,
 54 |           cluster: cluster,
 55 |           service_configs: service_configs,
 56 |           capacity_based_on: "vCPUs",
 57 |           logger: @logger,
 58 |         )
 59 |       end
 60 | 
 61 |       private
 62 | 
 63 |       def terminate_orphan_instances
 64 |         container_instance_ids = cluster_resource_manager.fetch_container_instances_in_cluster.map(&:ec2_instance_id)
 65 |         spot_fleet_instances = ec2_client.describe_spot_fleet_instances(spot_fleet_request_id: id).active_instances
 66 |         orphans = spot_fleet_instances.reject { |i| container_instance_ids.include?(i.instance_id) }.map(&:instance_id)
 67 | 
 68 |         return if orphans.empty?
 69 | 
 70 |         running_instances = ec2_client.describe_instances(
 71 |           instance_ids: orphans,
 72 |           filters: [{ name: "instance-state-name", values: ["running"] }],
 73 |         ).reservations.flat_map(&:instances)
 74 |         # instances which have just launched might not be registered to the cluster yet.
 75 |         instance_ids = running_instances.select { |i| (Time.now - i.launch_time) > 600 }.map(&:instance_id)
 76 | 
 77 |         return if instance_ids.empty?
 78 | 
 79 |         # Terminate orpahns without canceling spot instance request
 80 |         # because we can't terminate canceled spot instances by decreasing the capacity
 81 |         ec2_client.terminate_instances(instance_ids: instance_ids)
 82 | 
 83 |         @logger.info "#{log_prefix} Terminated instances: #{instance_ids.inspect}"
 84 |       rescue => e
 85 |         AutoScaler.error_logger.error(e)
 86 |       end
 87 | 
 88 |       def ec2_client
 89 |         Aws::EC2::Client.new(
 90 |           access_key_id: EcsDeploy.config.access_key_id,
 91 |           secret_access_key: EcsDeploy.config.secret_access_key,
 92 |           region: region,
 93 |           logger: logger,
 94 |         )
 95 |       end
 96 | 
 97 |       def log_prefix
 98 |         "[#{self.class.to_s.sub(/\AEcsDeploy::AutoScaler::/, "")} #{name} #{region}]"
 99 |       end
100 |     end
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/auto_scaler/trigger_config.rb:
--------------------------------------------------------------------------------
 1 | require "aws-sdk-cloudwatch"
 2 | require "ecs_deploy"
 3 | require "ecs_deploy/auto_scaler"
 4 | require "ecs_deploy/auto_scaler/config_base"
 5 | 
 6 | module EcsDeploy
 7 |   module AutoScaler
 8 |     TriggerConfig = Struct.new(:alarm_name, :region, :state, :step, :prioritized_over_upscale_triggers) do
 9 |       include ConfigBase
10 | 
11 |       def match?
12 |         fetch_alarm.state_value == state
13 |       end
14 | 
15 |       def prioritized_over_upscale_triggers?
16 |         !!prioritized_over_upscale_triggers
17 |       end
18 | 
19 |       private
20 | 
21 |       def client
22 |         Aws::CloudWatch::Client.new(
23 |           access_key_id: EcsDeploy.config.access_key_id,
24 |           secret_access_key: EcsDeploy.config.secret_access_key,
25 |           region: region,
26 |           logger: logger
27 |         )
28 |       end
29 | 
30 |       def fetch_alarm
31 |         res = client.describe_alarms(alarm_names: [alarm_name])
32 | 
33 |         raise "Alarm \"#{alarm_name}\" is not found" if res.metric_alarms.empty?
34 |         res.metric_alarms[0].tap do |alarm|
35 |           AutoScaler.logger.debug("#{alarm.alarm_name} state is #{alarm.state_value}")
36 |         end
37 |       rescue => e
38 |         AutoScaler.error_logger.error(e)
39 |       end
40 |     end
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/capistrano.rb:
--------------------------------------------------------------------------------
  1 | require 'ecs_deploy'
  2 | require 'ecs_deploy/instance_fluctuation_manager'
  3 | 
  4 | namespace :ecs do
  5 |   task :configure do
  6 |     EcsDeploy.configure do |c|
  7 |       c.log_level = fetch(:ecs_log_level) if fetch(:ecs_log_level)
  8 |       c.deploy_wait_timeout = fetch(:ecs_deploy_wait_timeout) if fetch(:ecs_deploy_wait_timeout)
  9 |       c.ecs_service_role = fetch(:ecs_service_role) if fetch(:ecs_service_role)
 10 |       c.default_region = Array(fetch(:ecs_region))[0] if fetch(:ecs_region)
 11 |       c.ecs_wait_until_services_stable_max_attempts = fetch(:ecs_wait_until_services_stable_max_attempts) if fetch(:ecs_wait_until_services_stable_max_attempts)
 12 |       c.ecs_wait_until_services_stable_delay = fetch(:ecs_wait_until_services_stable_delay) if fetch(:ecs_wait_until_services_stable_delay)
 13 |       c.ecs_client_params = fetch(:ecs_client_params) if fetch(:ecs_client_params)
 14 |     end
 15 | 
 16 |     if ENV["TARGET_CLUSTER"]
 17 |       set :target_cluster, ENV["TARGET_CLUSTER"].split(",").map(&:strip)
 18 |     end
 19 |     if ENV["TARGET_TASK_DEFINITION"]
 20 |       set :target_task_definition, ENV["TARGET_TASK_DEFINITION"].split(",").map(&:strip)
 21 |     end
 22 |   end
 23 | 
 24 |   task register_task_definition: [:configure] do
 25 |     if fetch(:ecs_tasks)
 26 |       regions = Array(fetch(:ecs_region))
 27 |       regions = [EcsDeploy.config.default_region] if regions.empty?
 28 |       ecs_registered_tasks = {}
 29 |       regions.each do |region|
 30 |         ecs_registered_tasks[region] = {}
 31 |         fetch(:ecs_tasks).each do |t|
 32 |           task_definition = EcsDeploy::TaskDefinition.new(
 33 |             region: region,
 34 |             task_definition_name: t[:name],
 35 |             container_definitions: t[:container_definitions],
 36 |             task_role_arn: t[:task_role_arn],
 37 |             execution_role_arn: t[:execution_role_arn],
 38 |             volumes: t[:volumes],
 39 |             network_mode: t[:network_mode],
 40 |             placement_constraints: t[:placement_constraints],
 41 |             requires_compatibilities: t[:requires_compatibilities],
 42 |             cpu: t[:cpu],
 43 |             memory: t[:memory],
 44 |             tags: t[:tags],
 45 |             runtime_platform: t[:runtime_platform],
 46 |           )
 47 |           result = task_definition.register
 48 |           ecs_registered_tasks[region][t[:name]] = result
 49 |         end
 50 |       end
 51 | 
 52 |       set :ecs_registered_tasks, ecs_registered_tasks
 53 |     end
 54 |   end
 55 | 
 56 |   task deploy_scheduled_task: [:configure, :register_task_definition] do
 57 |     if fetch(:ecs_scheduled_tasks)
 58 |       regions = Array(fetch(:ecs_region))
 59 |       regions = [EcsDeploy.config.default_region] if regions.empty?
 60 |       regions.each do |r|
 61 |         fetch(:ecs_scheduled_tasks).each do |t|
 62 |           scheduled_task = EcsDeploy::ScheduledTask.new(
 63 |             region: r,
 64 |             cluster: t[:cluster] || fetch(:ecs_default_cluster),
 65 |             rule_name: t[:rule_name],
 66 |             schedule_expression: t[:schedule_expression],
 67 |             enabled: t[:enabled] != false,
 68 |             description: t[:description],
 69 |             target_id: t[:target_id],
 70 |             task_definition_name: t[:task_definition_name],
 71 |             network_configuration: t[:network_configuration],
 72 |             launch_type: t[:launch_type],
 73 |             platform_version: t[:platform_version],
 74 |             group: t[:group],
 75 |             revision: t[:revision],
 76 |             task_count: t[:task_count],
 77 |             role_arn: t[:role_arn],
 78 |             container_overrides: t[:container_overrides],
 79 |           )
 80 |           scheduled_task.deploy
 81 |         end
 82 |       end
 83 |     end
 84 |   end
 85 | 
 86 |   task deploy: [:configure, :register_task_definition] do
 87 |     if fetch(:ecs_services)
 88 |       regions = Array(fetch(:ecs_region))
 89 |       regions = [EcsDeploy.config.default_region] if regions.empty?
 90 |       regions.each do |r|
 91 |         services = fetch(:ecs_services).map do |service|
 92 |           if fetch(:target_cluster) && fetch(:target_cluster).size > 0
 93 |             next unless fetch(:target_cluster).include?(service[:cluster])
 94 |           end
 95 |           if fetch(:target_task_definition) && fetch(:target_task_definition).size > 0
 96 |             next unless fetch(:target_task_definition).include?(service[:task_definition_name])
 97 |           end
 98 | 
 99 |           service_options = {
100 |             region: r,
101 |             cluster: service[:cluster] || fetch(:ecs_default_cluster),
102 |             service_name: service[:name],
103 |             task_definition_name: service[:task_definition_name],
104 |             load_balancers: service[:load_balancers],
105 |             desired_count: service[:desired_count],
106 |             launch_type: service[:launch_type],
107 |             network_configuration: service[:network_configuration],
108 |             health_check_grace_period_seconds: service[:health_check_grace_period_seconds],
109 |             delete: service[:delete],
110 |             enable_ecs_managed_tags: service[:enable_ecs_managed_tags],
111 |             tags: service[:tags],
112 |             propagate_tags: service[:propagate_tags],
113 |             enable_execute_command: service[:enable_execute_command],
114 |           }
115 |           service_options[:deployment_configuration] = service[:deployment_configuration] if service[:deployment_configuration]
116 |           service_options[:placement_constraints] = service[:placement_constraints] if service[:placement_constraints]
117 |           service_options[:placement_strategy] = service[:placement_strategy] if service[:placement_strategy]
118 |           service_options[:capacity_provider_strategy] = service[:capacity_provider_strategy] if service[:capacity_provider_strategy]
119 |           service_options[:scheduling_strategy] = service[:scheduling_strategy] if service[:scheduling_strategy]
120 |           s = EcsDeploy::Service.new(**service_options)
121 |           s.deploy
122 |           s
123 |         end
124 |         EcsDeploy::Service.wait_all_running(services)
125 |       end
126 |     end
127 |   end
128 | 
129 |   task rollback: [:configure] do
130 |     if fetch(:ecs_services)
131 |       regions = Array(fetch(:ecs_region))
132 |       regions = [EcsDeploy.config.default_region] if regions.empty?
133 | 
134 |       rollback_routes = {}
135 |       regions.each do |r|
136 |         services = fetch(:ecs_services).map do |service|
137 |           if fetch(:target_cluster) && fetch(:target_cluster).size > 0
138 |             next unless fetch(:target_cluster).include?(service[:cluster])
139 |           end
140 |           if fetch(:target_task_definition) && fetch(:target_task_definition).size > 0
141 |             next unless fetch(:target_task_definition).include?(service[:task_definition_name])
142 |           end
143 | 
144 |           task_definition_arns = EcsDeploy::TaskDefinition.new(
145 |             region: r,
146 |             task_definition_name: service[:task_definition_name] || service[:name],
147 |           ).recent_task_definition_arns
148 | 
149 |           rollback_step = (ENV["STEP"] || 1).to_i
150 | 
151 |           current_task_definition_arn = EcsDeploy::Service.new(
152 |             region: r,
153 |             cluster: service[:cluster] || fetch(:ecs_default_cluster),
154 |             service_name: service[:name],
155 |           ).current_task_definition_arn
156 | 
157 |           unless (rollback_arn = rollback_routes[current_task_definition_arn])
158 |             current_arn_index = task_definition_arns.index do |arn|
159 |               arn == current_task_definition_arn
160 |             end
161 | 
162 |             rollback_arn = task_definition_arns[current_arn_index + rollback_step]
163 | 
164 |             rollback_routes[current_task_definition_arn] = rollback_arn
165 |           end
166 | 
167 |           EcsDeploy.logger.info "#{current_task_definition_arn} -> #{rollback_arn}"
168 | 
169 |           raise "Past task_definition_arns is empty" unless rollback_arn
170 | 
171 |           service_options = {
172 |             region: r,
173 |             cluster: service[:cluster] || fetch(:ecs_default_cluster),
174 |             service_name: service[:name],
175 |             task_definition_name: rollback_arn,
176 |             load_balancers: service[:load_balancers],
177 |             desired_count: service[:desired_count],
178 |             launch_type: service[:launch_type],
179 |             network_configuration: service[:network_configuration],
180 |             health_check_grace_period_seconds: service[:health_check_grace_period_seconds],
181 |           }
182 |           service_options[:deployment_configuration] = service[:deployment_configuration] if service[:deployment_configuration]
183 |           service_options[:placement_constraints] = service[:placement_constraints] if service[:placement_constraints]
184 |           service_options[:placement_strategy] = service[:placement_strategy] if service[:placement_strategy]
185 |           service_options[:capacity_provider_strategy] = service[:capacity_provider_strategy] if service[:capacity_provider_strategy]
186 |           s = EcsDeploy::Service.new(**service_options)
187 |           s.deploy
188 |           EcsDeploy::TaskDefinition.deregister(current_task_definition_arn, region: r)
189 |           s
190 |         end
191 |         EcsDeploy::Service.wait_all_running(services)
192 |       end
193 |     end
194 |   end
195 | 
196 |   task increase_instances_to_max_size: [:configure] do
197 |     configs = fetch(:ecs_instance_fluctuation_manager_configs, [])
198 |     unless configs.empty?
199 |       regions = Array(fetch(:ecs_region))
200 |       regions = [EcsDeploy.config.default_region] if regions.empty?
201 |       regions.each do |region|
202 |         configs.each do |config|
203 |           logger = config.fetch(:logger, EcsDeploy.logger)
204 |           m = EcsDeploy::InstanceFluctuationManager.new(
205 |             region: config[:region] || region,
206 |             cluster: config[:cluster] || fetch(:ecs_default_cluster),
207 |             auto_scaling_group_name: config[:auto_scaling_group_name],
208 |             desired_capacity: config[:desired_capacity],
209 |             logger: logger
210 |           )
211 |           m.increase
212 |         end
213 |       end
214 |     end
215 |   end
216 | 
217 |   task terminate_redundant_instances: [:configure] do
218 |     configs = fetch(:ecs_instance_fluctuation_manager_configs, [])
219 |     unless configs.empty?
220 |       regions = Array(fetch(:ecs_region))
221 |       regions = [EcsDeploy.config.default_region] if regions.empty?
222 |       regions.each do |region|
223 |         configs.each do |config|
224 |           logger = config.fetch(:logger, EcsDeploy.logger)
225 |           m = EcsDeploy::InstanceFluctuationManager.new(
226 |             region: config[:region] || region,
227 |             cluster: config[:cluster] || fetch(:ecs_default_cluster),
228 |             auto_scaling_group_name: config[:auto_scaling_group_name],
229 |             desired_capacity: config[:desired_capacity],
230 |             logger: logger
231 |           )
232 |           m.decrease
233 |         end
234 |       end
235 |     end
236 |   end
237 | end
238 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/configuration.rb:
--------------------------------------------------------------------------------
 1 | module EcsDeploy
 2 |   class Configuration
 3 |     attr_accessor \
 4 |       :log_level,
 5 |       :access_key_id,
 6 |       :secret_access_key,
 7 |       :default_region,
 8 |       :deploy_wait_timeout,
 9 |       :ecs_service_role,
10 |       :ecs_wait_until_services_stable_max_attempts,
11 |       :ecs_wait_until_services_stable_delay,
12 |       :ecs_client_params
13 | 
14 |     def initialize
15 |       @log_level = :info
16 |       @deploy_wait_timeout = 300
17 |       # The following values are the default values of Aws::ECS::Waiters::ServicesStable
18 |       @ecs_wait_until_services_stable_max_attempts = 40
19 |       @ecs_wait_until_services_stable_delay = 15
20 |       @ecs_client_params = {}
21 |     end
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/instance_fluctuation_manager.rb:
--------------------------------------------------------------------------------
  1 | require "aws-sdk-autoscaling"
  2 | require "aws-sdk-ec2"
  3 | require "aws-sdk-ecs"
  4 | 
  5 | module EcsDeploy
  6 |   class InstanceFluctuationManager
  7 |     attr_reader :logger
  8 | 
  9 |     MAX_UPDATABLE_ECS_CONTAINER_COUNT = 10
 10 |     MAX_DETACHEABLE_EC2_INSTACE_COUNT = 20
 11 |     MAX_DESCRIBABLE_ECS_TASK_COUNT = 100
 12 | 
 13 |     def initialize(region:, cluster:, auto_scaling_group_name:, desired_capacity:, logger:)
 14 |       @region = region
 15 |       @cluster = cluster
 16 |       @auto_scaling_group_name = auto_scaling_group_name
 17 |       @desired_capacity = desired_capacity
 18 |       @logger = logger
 19 |     end
 20 | 
 21 |     def increase
 22 |       asg = fetch_auto_scaling_group
 23 | 
 24 |       @logger.info("Increasing desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{asg.max_size}")
 25 |       as_client.update_auto_scaling_group(auto_scaling_group_name: @auto_scaling_group_name, desired_capacity: asg.max_size)
 26 | 
 27 |       # Run in background because increasing instances may take time
 28 |       Thread.new do
 29 |         loop do
 30 |           cluster = ecs_client.describe_clusters(clusters: [@cluster]).clusters.first
 31 |           instance_count = cluster.registered_container_instances_count
 32 |           if instance_count == asg.max_size
 33 |             @logger.info("Succeeded in increasing instances!")
 34 |             break
 35 |           end
 36 |           @logger.info("Current registered instance count: #{instance_count}")
 37 |           sleep 5
 38 |         end
 39 |       end
 40 |     end
 41 | 
 42 |     def decrease
 43 |       asg = fetch_auto_scaling_group
 44 | 
 45 |       decrease_count = asg.desired_capacity - @desired_capacity
 46 |       if decrease_count <= 0
 47 |         @logger.info("The capacity is already #{asg.desired_capacity}")
 48 |         return
 49 |       end
 50 |       @logger.info("Decreasing desired capacity of #{@auto_scaling_group_name}: #{asg.desired_capacity} => #{@desired_capacity}")
 51 | 
 52 |       container_instances = ecs_client.list_container_instances(cluster: @cluster).flat_map do |resp|
 53 |         ecs_client.describe_container_instances(
 54 |           cluster: @cluster,
 55 |           container_instances: resp.container_instance_arns
 56 |         ).container_instances
 57 |       end
 58 | 
 59 |       # The status of ECS instances sometimes seems to remain 'DEREGISTERING' for a few minutes after they are terminated.
 60 |       container_instances.reject! { |ci| ci.status == 'DEREGISTERING' }
 61 | 
 62 |       az_to_container_instances = container_instances.sort_by {|ci| - ci.running_tasks_count }.group_by do |ci|
 63 |         ci.attributes.find {|attribute| attribute.name == "ecs.availability-zone" }.value
 64 |       end
 65 |       if az_to_container_instances.empty?
 66 |         @logger.info("There are no instances to terminate.")
 67 |         return
 68 |       end
 69 | 
 70 |       target_container_instances = extract_target_container_instances(decrease_count, az_to_container_instances)
 71 | 
 72 |       @logger.info("running tasks: #{ecs_client.list_tasks(cluster: @cluster).task_arns.size}")
 73 |       all_running_task_arns = []
 74 |       target_container_instances.map(&:container_instance_arn).each_slice(MAX_UPDATABLE_ECS_CONTAINER_COUNT) do |arns|
 75 |         @logger.info(arns)
 76 |         ecs_client.update_container_instances_state(
 77 |           cluster: @cluster,
 78 |           container_instances: arns,
 79 |           status: "DRAINING"
 80 |         )
 81 |         arns.each do |arn|
 82 |           all_running_task_arns.concat(list_running_task_arns(arn))
 83 |         end
 84 |       end
 85 | 
 86 |       stop_tasks_not_belonging_service(all_running_task_arns)
 87 |       wait_until_tasks_stopped(all_running_task_arns)
 88 | 
 89 |       instance_ids = target_container_instances.map(&:ec2_instance_id)
 90 |       terminate_instances(instance_ids)
 91 |       @logger.info("Succeeded in decreasing instances!")
 92 |     end
 93 | 
 94 |     private
 95 | 
 96 |     def aws_params
 97 |       {
 98 |         access_key_id: EcsDeploy.config.access_key_id,
 99 |         secret_access_key: EcsDeploy.config.secret_access_key,
100 |         region: @region,
101 |         logger: @logger
102 |       }.reject do |_key, value|
103 |         value.nil?
104 |       end
105 |     end
106 | 
107 |     def as_client
108 |       @as_client ||= Aws::AutoScaling::Client.new(aws_params)
109 |     end
110 | 
111 |     def ec2_client
112 |       @ec2_client ||= Aws::EC2::Client.new(aws_params)
113 |     end
114 | 
115 |     def ecs_client
116 |       @ecs_client ||= Aws::ECS::Client.new(aws_params.merge(EcsDeploy.config.ecs_client_params))
117 |     end
118 | 
119 |     def fetch_auto_scaling_group
120 |       as_client.describe_auto_scaling_groups(auto_scaling_group_names: [@auto_scaling_group_name]).auto_scaling_groups.first
121 |     end
122 | 
123 |     # Extract container instances to terminate considering AZ balance
124 |     def extract_target_container_instances(decrease_count, az_to_container_instances)
125 |       target_container_instances = []
126 |       decrease_count.times do
127 |         @logger.debug do
128 |           "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
129 |         end
130 |         az = az_to_container_instances.max_by {|_az, instances| instances.size }.first
131 |         target_container_instances << az_to_container_instances[az].pop
132 |       end
133 |       @logger.info do
134 |         "AZ balance: #{az_to_container_instances.sort_by {|az, _| az }.map {|az, instances| [az, instances.size] }.to_h}"
135 |       end
136 | 
137 |       target_container_instances
138 |     end
139 | 
140 |     # list tasks whose desired_status is "RUNNING" or
141 |     # whoose desired_status is "STOPPED" but last_status is "RUNNING" on the ECS container
142 |     def list_running_task_arns(container_instance_arn)
143 |       running_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn).flat_map(&:task_arns)
144 |       stopped_tasks_arn = ecs_client.list_tasks(cluster: @cluster, container_instance: container_instance_arn, desired_status: "STOPPED").flat_map(&:task_arns)
145 |       stopped_running_task_arns = stopped_tasks_arn.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).flat_map do |arns|
146 |         ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.select do |task|
147 |           task.desired_status == "STOPPED" && task.last_status == "RUNNING"
148 |         end
149 |       end.map(&:task_arn)
150 |       running_tasks_arn + stopped_running_task_arns
151 |     end
152 | 
153 |     def wait_until_tasks_stopped(task_arns)
154 |       @logger.info("All old tasks: #{task_arns.size}")
155 |       task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
156 |         ecs_client.wait_until(:tasks_stopped, cluster: @cluster, tasks: arns)
157 |       end
158 |       @logger.info("All old tasks are stopped")
159 |     end
160 | 
161 |     def stop_tasks_not_belonging_service(running_task_arns)
162 |       @logger.info("Running tasks: #{running_task_arns.size}")
163 |       unless running_task_arns.empty?
164 |         running_task_arns.each_slice(MAX_DESCRIBABLE_ECS_TASK_COUNT).each do |arns|
165 |           ecs_client.describe_tasks(cluster: @cluster, tasks: arns).tasks.each do |task|
166 |             ecs_client.stop_task(cluster: @cluster, task: task.task_arn) if task.group.start_with?("family:")
167 |           end
168 |         end
169 |       end
170 |     end
171 | 
172 |     def terminate_instances(instance_ids)
173 |       if instance_ids.empty?
174 |         @logger.info("There are no instances to terminate.")
175 |         return
176 |       end
177 |       instance_ids.each_slice(MAX_DETACHEABLE_EC2_INSTACE_COUNT) do |ids|
178 |         as_client.detach_instances(
179 |           auto_scaling_group_name: @auto_scaling_group_name,
180 |           instance_ids: ids,
181 |           should_decrement_desired_capacity: true
182 |         )
183 |       end
184 | 
185 |       ec2_client.terminate_instances(instance_ids: instance_ids)
186 | 
187 |       ec2_client.wait_until(:instance_terminated, instance_ids: instance_ids) do |w|
188 |         w.before_wait do |attempts, response|
189 |           @logger.info("Waiting for stopping all instances...#{attempts}")
190 |           instances = response.reservations.flat_map(&:instances)
191 |           instances.sort_by(&:instance_id).each do |instance|
192 |             @logger.info("#{instance.instance_id}\t#{instance.state.name}")
193 |           end
194 |         end
195 |       end
196 |     end
197 |   end
198 | end
199 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/scheduled_task.rb:
--------------------------------------------------------------------------------
  1 | require 'aws-sdk-cloudwatchevents'
  2 | require 'timeout'
  3 | 
  4 | module EcsDeploy
  5 |   class ScheduledTask
  6 |     class PutTargetsFailure < StandardError; end
  7 | 
  8 |     attr_reader :cluster, :region, :schedule_rule_name
  9 | 
 10 |     def initialize(
 11 |       cluster:, rule_name:, schedule_expression:, enabled: true, description: nil, target_id: nil,
 12 |       task_definition_name:, revision: nil, task_count: nil, role_arn:, network_configuration: nil, launch_type: nil, platform_version: nil, group: nil,
 13 |       region: nil, container_overrides: nil
 14 |     )
 15 |       @cluster = cluster
 16 |       @rule_name = rule_name
 17 |       @schedule_expression = schedule_expression
 18 |       @enabled = enabled
 19 |       @description = description
 20 |       @target_id = target_id || task_definition_name
 21 |       @task_definition_name = task_definition_name
 22 |       @task_count = task_count || 1
 23 |       @revision = revision
 24 |       @role_arn = role_arn
 25 |       @network_configuration = network_configuration
 26 |       @launch_type = launch_type || "EC2"
 27 |       @platform_version = platform_version
 28 |       @group = group
 29 |       region ||= EcsDeploy.config.default_region
 30 |       params ||= EcsDeploy.config.ecs_client_params
 31 |       @container_overrides = container_overrides
 32 | 
 33 |       @client = region ? Aws::ECS::Client.new(params.merge(region: region)) : Aws::ECS::Client.new(params)
 34 |       @region = @client.config.region
 35 |       @cloud_watch_events = Aws::CloudWatchEvents::Client.new(region: @region)
 36 |     end
 37 | 
 38 |     def deploy
 39 |       put_rule
 40 |       put_targets
 41 |     end
 42 | 
 43 |     private
 44 | 
 45 |     def cluster_arn
 46 |       cl = @client.describe_clusters(clusters: [@cluster]).clusters[0]
 47 |       if cl
 48 |         cl.cluster_arn
 49 |       end
 50 |     end
 51 | 
 52 |     def task_definition_arn
 53 |       suffix = @revision ? ":#{@revision}" : ""
 54 |       name = "#{@task_definition_name}#{suffix}"
 55 |       @client.describe_task_definition(task_definition: name).task_definition.task_definition_arn
 56 |     end
 57 | 
 58 |     def put_rule
 59 |       res = @cloud_watch_events.put_rule(
 60 |         name: @rule_name,
 61 |         schedule_expression: @schedule_expression,
 62 |         state: @enabled ? "ENABLED" : "DISABLED",
 63 |         description: @description,
 64 |       )
 65 |       EcsDeploy.logger.info "created cloudwatch event rule [#{res.rule_arn}] [#{@region}] [#{Paint['OK', :green]}]"
 66 |     end
 67 | 
 68 |     def put_targets
 69 |       target = {
 70 |         id: @target_id,
 71 |         arn: cluster_arn,
 72 |         role_arn: @role_arn,
 73 |         ecs_parameters: {
 74 |           task_definition_arn: task_definition_arn,
 75 |           task_count: @task_count,
 76 |           network_configuration: @network_configuration,
 77 |           launch_type: @launch_type,
 78 |           platform_version: @platform_version,
 79 |           group: @group,
 80 |         },
 81 |       }
 82 |       target[:ecs_parameters].compact!
 83 | 
 84 |       if @container_overrides
 85 |         target.merge!(input: { containerOverrides: @container_overrides }.to_json)
 86 |       end
 87 | 
 88 |       res = @cloud_watch_events.put_targets(
 89 |         rule: @rule_name,
 90 |         targets: [target]
 91 |       )
 92 |       if res.failed_entry_count.zero?
 93 |         EcsDeploy.logger.info "created cloudwatch event target [#{@target_id}] [#{@region}] [#{Paint['OK', :green]}]"
 94 |       else
 95 |         res.failed_entries.each do |entry|
 96 |           EcsDeploy.logger.error "failed to create cloudwatch event target [#{@region}] target_id=#{entry.target_id} error_code=#{entry.error_code} error_message=#{entry.error_message}"
 97 |         end
 98 |         raise PutTargetsFailure
 99 |       end
100 |     end
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/service.rb:
--------------------------------------------------------------------------------
  1 | require 'timeout'
  2 | 
  3 | module EcsDeploy
  4 |   class Service
  5 |     CHECK_INTERVAL = 5
  6 |     MAX_DESCRIBE_SERVICES = 10
  7 | 
  8 |     class TooManyAttemptsError < StandardError; end
  9 | 
 10 |     attr_reader :cluster, :region, :service_name, :delete, :deploy_started_at
 11 | 
 12 |     def initialize(
 13 |       cluster:, service_name:, task_definition_name: nil, revision: nil,
 14 |       load_balancers: nil,
 15 |       desired_count: nil, deployment_configuration: {maximum_percent: 200, minimum_healthy_percent: 100},
 16 |       launch_type: nil,
 17 |       placement_constraints: [],
 18 |       placement_strategy: [],
 19 |       capacity_provider_strategy: nil,
 20 |       network_configuration: nil,
 21 |       health_check_grace_period_seconds: nil,
 22 |       scheduling_strategy: 'REPLICA',
 23 |       enable_ecs_managed_tags: nil,
 24 |       tags: nil,
 25 |       propagate_tags: nil,
 26 |       region: nil,
 27 |       delete: false,
 28 |       enable_execute_command: false
 29 |     )
 30 |       @cluster = cluster
 31 |       @service_name = service_name
 32 |       @task_definition_name = task_definition_name || service_name
 33 |       @load_balancers = load_balancers
 34 |       @desired_count = desired_count
 35 |       @deployment_configuration = deployment_configuration
 36 |       @launch_type = launch_type
 37 |       @placement_constraints = placement_constraints
 38 |       @placement_strategy = placement_strategy
 39 |       @capacity_provider_strategy = capacity_provider_strategy
 40 |       @network_configuration = network_configuration
 41 |       @health_check_grace_period_seconds = health_check_grace_period_seconds
 42 |       @scheduling_strategy = scheduling_strategy
 43 |       @revision = revision
 44 |       @enable_ecs_managed_tags = enable_ecs_managed_tags
 45 |       @tags = tags
 46 |       @propagate_tags = propagate_tags
 47 |       @enable_execute_command = enable_execute_command
 48 | 
 49 |       @response = nil
 50 | 
 51 |       region ||= EcsDeploy.config.default_region
 52 |       params ||= EcsDeploy.config.ecs_client_params
 53 |       @client = region ? Aws::ECS::Client.new(params.merge(region: region)) : Aws::ECS::Client.new(params)
 54 |       @region = @client.config.region
 55 | 
 56 |       @delete = delete
 57 |     end
 58 | 
 59 |     def current_task_definition_arn
 60 |       res = @client.describe_services(cluster: @cluster, services: [@service_name])
 61 |       res.services[0].task_definition
 62 |     end
 63 | 
 64 |     def deploy
 65 |       @deploy_started_at = Time.now
 66 |       res = @client.describe_services(cluster: @cluster, services: [@service_name])
 67 |       service_options = {
 68 |         cluster: @cluster,
 69 |         task_definition: task_definition_name_with_revision,
 70 |         deployment_configuration: @deployment_configuration,
 71 |         network_configuration: @network_configuration,
 72 |         health_check_grace_period_seconds: @health_check_grace_period_seconds,
 73 |         capacity_provider_strategy: @capacity_provider_strategy,
 74 |         enable_execute_command: @enable_execute_command,
 75 |         enable_ecs_managed_tags: @enable_ecs_managed_tags,
 76 |         placement_constraints: @placement_constraints,
 77 |         placement_strategy: @placement_strategy,
 78 |       }
 79 | 
 80 |       if @load_balancers && EcsDeploy.config.ecs_service_role
 81 |         service_options.merge!({
 82 |           role: EcsDeploy.config.ecs_service_role,
 83 |         })
 84 |       end
 85 | 
 86 |       if @load_balancers
 87 |         service_options.merge!({
 88 |           load_balancers: @load_balancers,
 89 |         })
 90 |       end
 91 | 
 92 |       if res.services.select{ |s| s.status == 'ACTIVE' }.empty?
 93 |         return if @delete
 94 | 
 95 |         service_options.merge!({
 96 |           service_name: @service_name,
 97 |           desired_count: @desired_count.to_i,
 98 |           launch_type: @launch_type,
 99 |           tags: @tags,
100 |           propagate_tags: @propagate_tags,
101 |         })
102 | 
103 |         if @scheduling_strategy == 'DAEMON'
104 |           service_options[:scheduling_strategy] = @scheduling_strategy
105 |           service_options.delete(:desired_count)
106 |           service_options.delete(:placement_strategy)
107 |         end
108 |         @response = @client.create_service(service_options)
109 |         EcsDeploy.logger.info "created service [#{@service_name}] [#{@cluster}] [#{@region}] [#{Paint['OK', :green]}]"
110 |       else
111 |         return delete_service if @delete
112 | 
113 |         service_options.merge!({service: @service_name})
114 |         service_options.merge!({desired_count: @desired_count}) if @desired_count
115 |         service_options.merge!({propagate_tags: @propagate_tags}) if @propagate_tags
116 | 
117 |         current_service = res.services[0]
118 |         service_options.merge!({force_new_deployment: true}) if need_force_new_deployment?(current_service)
119 | 
120 |         update_tags(@service_name, @tags)
121 |         if @scheduling_strategy == 'DAEMON'
122 |           service_options.delete(:placement_strategy)
123 |         end
124 |         @response = @client.update_service(service_options)
125 |         EcsDeploy.logger.info "updated service [#{@service_name}] [#{@cluster}] [#{@region}] [#{Paint['OK', :green]}]"
126 |       end
127 |     end
128 | 
129 |     private def need_force_new_deployment?(service)
130 |       return false unless @capacity_provider_strategy
131 |       return true unless service.capacity_provider_strategy
132 | 
133 |       return true if @capacity_provider_strategy.size != service.capacity_provider_strategy.size
134 | 
135 |       match_array = @capacity_provider_strategy.all? do |strategy|
136 |         service.capacity_provider_strategy.find do |current_strategy|
137 |           strategy[:capacity_provider] == current_strategy.capacity_provider &&
138 |             strategy[:weight] == current_strategy.weight &&
139 |             strategy[:base] == current_strategy.base
140 |         end
141 |       end
142 | 
143 |       !match_array
144 |     end
145 | 
146 |     def delete_service
147 |       if @scheduling_strategy != 'DAEMON'
148 |         @client.update_service(cluster: @cluster, service: @service_name, desired_count: 0)
149 |         sleep 1
150 |       end
151 |       @client.delete_service(cluster: @cluster, service: @service_name)
152 |       EcsDeploy.logger.info "deleted service [#{@service_name}] [#{@cluster}] [#{@region}] [#{Paint['OK', :green]}]"
153 |     end
154 | 
155 |     def update_tags(service_name, tags)
156 |       service_arn = @client.describe_services(cluster: @cluster, services: [service_name]).services.first.service_arn
157 |       if service_arn.split('/').size == 2
158 |         if tags
159 |           EcsDeploy.logger.warn "#{service_name} doesn't support tagging operations, so tags are ignored. Long arn format must be used for tagging operations."
160 |         end
161 |         return
162 |       end
163 | 
164 |       tags ||= []
165 |       current_tag_keys = @client.list_tags_for_resource(resource_arn: service_arn).tags.map(&:key)
166 |       deleted_tag_keys = current_tag_keys - tags.map { |t| t[:key] }
167 | 
168 |       unless deleted_tag_keys.empty?
169 |         @client.untag_resource(resource_arn: service_arn, tag_keys: deleted_tag_keys)
170 |       end
171 | 
172 |       unless tags.empty?
173 |         @client.tag_resource(resource_arn: service_arn, tags: tags)
174 |       end
175 |     end
176 | 
177 |     def log_events(ecs_service)
178 |       ecs_service.events.sort_by(&:created_at).each do |e|
179 |         next if e.created_at <= deploy_started_at
180 |         next if @last_event && e.created_at <= @last_event.created_at
181 | 
182 |         EcsDeploy.logger.info e.message
183 |         @last_event = e
184 |       end
185 |     end
186 | 
187 |     def self.wait_all_running(services)
188 |       services.group_by { |s| [s.cluster, s.region] }.flat_map do |(cl, region), ss|
189 |         params ||= EcsDeploy.config.ecs_client_params
190 |         client = Aws::ECS::Client.new(params.merge(region: region))
191 |         ss.reject(&:delete).map(&:service_name).each_slice(MAX_DESCRIBE_SERVICES).map do |chunked_service_names|
192 |           Thread.new do
193 |             EcsDeploy.config.ecs_wait_until_services_stable_max_attempts.times do
194 |               EcsDeploy.logger.info "waiting for services to stabilize [#{chunked_service_names.join(", ")}] [#{cl}]"
195 |               resp = client.describe_services(cluster: cl, services: chunked_service_names)
196 |               resp.services.each do |s|
197 |                 # cf. https://github.com/aws/aws-sdk-ruby/blob/master/gems/aws-sdk-ecs/lib/aws-sdk-ecs/waiters.rb#L91-L96
198 |                 if s.deployments.size == 1 && s.running_count == s.desired_count
199 |                   chunked_service_names.delete(s.service_name)
200 |                 end
201 |                 service = ss.detect {|sc| sc.service_name == s.service_name }
202 |                 service.log_events(s)
203 |               end
204 |               break if chunked_service_names.empty?
205 |               sleep EcsDeploy.config.ecs_wait_until_services_stable_delay
206 |             end
207 |             raise TooManyAttemptsError unless chunked_service_names.empty?
208 |           end
209 |         end
210 |       end.each(&:join)
211 |     end
212 | 
213 |     private
214 | 
215 |     def task_definition_name_with_revision
216 |       suffix = @revision ? ":#{@revision}" : ""
217 |       "#{@task_definition_name}#{suffix}"
218 |     end
219 |   end
220 | end
221 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/task_definition.rb:
--------------------------------------------------------------------------------
 1 | module EcsDeploy
 2 |   class TaskDefinition
 3 |     def self.deregister(arn, region: nil)
 4 |       region ||= EcsDeploy.config.default_region
 5 |       params ||= EcsDeploy.config.ecs_client_params
 6 |       client = region ? Aws::ECS::Client.new(params.merge(region: region)) : Aws::ECS::Client.new(params)
 7 |       client.deregister_task_definition({
 8 |         task_definition: arn,
 9 |       })
10 |       EcsDeploy.logger.info "deregistered task definition [#{arn}] [#{client.config.region}] [#{Paint['OK', :green]}]"
11 |     end
12 | 
13 |     def initialize(
14 |       task_definition_name:, region: nil,
15 |       network_mode: "bridge", volumes: [], container_definitions: [], placement_constraints: [],
16 |       task_role_arn: nil,
17 |       execution_role_arn: nil,
18 |       requires_compatibilities: nil,
19 |       cpu: nil, memory: nil,
20 |       tags: nil,
21 |       runtime_platform: {}
22 |     )
23 |       @task_definition_name = task_definition_name
24 |       @task_role_arn        = task_role_arn
25 |       @execution_role_arn   = execution_role_arn
26 |       region ||= EcsDeploy.config.default_region
27 |       params ||= EcsDeploy.config.ecs_client_params
28 | 
29 |       @container_definitions = container_definitions.map do |cd|
30 |         if cd[:docker_labels]
31 |           cd[:docker_labels] = cd[:docker_labels].map { |k, v| [k.to_s, v] }.to_h
32 |         end
33 |         if cd.dig(:log_configuration, :options)
34 |           cd[:log_configuration][:options] = cd.dig(:log_configuration, :options).map { |k, v| [k.to_s, v] }.to_h
35 |         end
36 |         cd
37 |       end
38 |       @volumes = volumes
39 |       @network_mode = network_mode
40 |       @placement_constraints = placement_constraints
41 |       @requires_compatibilities = requires_compatibilities
42 |       @cpu = cpu&.to_s
43 |       @memory = memory&.to_s
44 |       @tags = tags
45 |       @client = region ? Aws::ECS::Client.new(params.merge(region: region)) : Aws::ECS::Client.new(params)
46 |       @region = @client.config.region
47 |       @runtime_platform = runtime_platform
48 |     end
49 | 
50 |     def recent_task_definition_arns
51 |       resp = @client.list_task_definitions(
52 |         family_prefix: @task_definition_name,
53 |         sort: "DESC"
54 |       )
55 |       resp.task_definition_arns
56 |     rescue
57 |       []
58 |     end
59 | 
60 |     def register
61 |       res = @client.register_task_definition({
62 |         family: @task_definition_name,
63 |         network_mode: @network_mode,
64 |         container_definitions: @container_definitions,
65 |         volumes: @volumes,
66 |         placement_constraints: @placement_constraints,
67 |         task_role_arn: @task_role_arn,
68 |         execution_role_arn: @execution_role_arn,
69 |         requires_compatibilities: @requires_compatibilities,
70 |         cpu: @cpu, memory: @memory,
71 |         tags: @tags,
72 |         runtime_platform: @runtime_platform
73 |       })
74 |       EcsDeploy.logger.info "registered task definition [#{@task_definition_name}] [#{@region}] [#{Paint['OK', :green]}]"
75 |       res.task_definition
76 |     end
77 |   end
78 | end
79 | 


--------------------------------------------------------------------------------
/lib/ecs_deploy/version.rb:
--------------------------------------------------------------------------------
1 | module EcsDeploy
2 |   VERSION = "1.0.7"
3 | end
4 | 


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 |   "extends": [
4 |     "config:recommended"
5 |   ]
6 | }
7 | 


--------------------------------------------------------------------------------
/spec/ecs_deploy/auto_scaler/auto_scaling_group_config_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | 
  3 | require "ecs_deploy/auto_scaler/auto_scaling_group_config"
  4 | require "ecs_deploy/auto_scaler/service_config"
  5 | 
  6 | RSpec.describe EcsDeploy::AutoScaler::AutoScalingGroupConfig do
  7 |   describe "#update_desired_capacity" do
  8 |     subject(:auto_scaling_group_config) do
  9 |       described_class.new({
 10 |         "name"   => asg_name,
 11 |         "region" => "ap-northeast-1",
 12 |         "buffer" => buffer,
 13 |         "services" => [],
 14 |       }, Logger.new(nil))
 15 |     end
 16 | 
 17 |     let(:asg_name) { "asg_name" }
 18 |     let(:buffer) { 1 }
 19 |     let(:cluster_resource_manager) { instance_double("EcsDeploy::AutoScaler::ClusterResourceManager") }
 20 | 
 21 |     before do
 22 |       allow(auto_scaling_group_config).to receive(:cluster_resource_manager) { cluster_resource_manager }
 23 |     end
 24 | 
 25 |     context "when the current desired capacity is greater than expected" do
 26 |       before do
 27 |         allow_any_instance_of(Aws::AutoScaling::Client).to receive(:describe_auto_scaling_groups).with(
 28 |           auto_scaling_group_names: [asg_name],
 29 |         ).and_return(
 30 |           double(
 31 |             auto_scaling_groups: [
 32 |               double(
 33 |                 desired_capacity: container_instances.size,
 34 |                 instances: container_instances.map do |i|
 35 |                   double(
 36 |                     availability_zone: i.attributes.find { |a| a.name == "ecs.availability-zone" }.value,
 37 |                     instance_id: i.ec2_instance_id,
 38 |                     lifecycle_state: "InService",
 39 |                   )
 40 |                 end,
 41 |               )
 42 |             ]
 43 |           )
 44 |         )
 45 | 
 46 |         allow(cluster_resource_manager).to receive(:fetch_container_instances_in_cluster).and_return(container_instances)
 47 |         allow(auto_scaling_group_config).to receive(:sleep).and_return(nil)
 48 |       end
 49 | 
 50 |       context "when there are deregistable instances in all availability zones" do
 51 |         let(:container_instances) do
 52 |           [
 53 |             Aws::ECS::Types::ContainerInstance.new(
 54 |               pending_tasks_count: 1,
 55 |               running_tasks_count: 0,
 56 |               ec2_instance_id: "i-000000",
 57 |               container_instance_arn: "with_pending_task",
 58 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
 59 |             ),
 60 |             Aws::ECS::Types::ContainerInstance.new(
 61 |               pending_tasks_count: 0,
 62 |               running_tasks_count: 0,
 63 |               ec2_instance_id: "i-111111",
 64 |               container_instance_arn: "with_no_task_in_ap_notrheast_1a",
 65 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
 66 |             ),
 67 |             Aws::ECS::Types::ContainerInstance.new(
 68 |               pending_tasks_count: 0,
 69 |               running_tasks_count: 1,
 70 |               ec2_instance_id: "i-222222",
 71 |               container_instance_arn: "with_essential_running_task",
 72 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
 73 |             ),
 74 |             Aws::ECS::Types::ContainerInstance.new(
 75 |               pending_tasks_count: 0,
 76 |               running_tasks_count: 0,
 77 |               ec2_instance_id: "i-333333",
 78 |               container_instance_arn: "with_no_task_in_ap_notrheast_1c",
 79 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1c")],
 80 |             ),
 81 |             Aws::ECS::Types::ContainerInstance.new(
 82 |               pending_tasks_count: 0,
 83 |               running_tasks_count: 1,
 84 |               ec2_instance_id: "i-444444",
 85 |               container_instance_arn: "with_no_essential_running_task",
 86 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1c")],
 87 |             ),
 88 |             Aws::ECS::Types::ContainerInstance.new(
 89 |               pending_tasks_count: 0,
 90 |               running_tasks_count: 0,
 91 |               ec2_instance_id: "i-555555",
 92 |               container_instance_arn: "with_no_task_in_ap_notrheast_1a_2",
 93 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
 94 |             ),
 95 |           ]
 96 |         end
 97 | 
 98 |         before do
 99 |           allow(cluster_resource_manager).to receive(:fetch_container_instance_arns_in_service).and_return(["with_essential_running_task"])
100 |         end
101 | 
102 |         it "terminates instances without esesstial running tasks" do
103 |           expect(auto_scaling_group_config).to receive(:detach_and_terminate_orphan_instances)
104 |           expect(cluster_resource_manager).to receive(:deregister_container_instance).with("with_no_task_in_ap_notrheast_1a")
105 |           expect(cluster_resource_manager).to receive(:deregister_container_instance).with("with_no_essential_running_task")
106 |           expect(cluster_resource_manager).to receive(:deregister_container_instance).with("with_no_task_in_ap_notrheast_1a_2")
107 |           expect(cluster_resource_manager).to receive(:trigger_capacity_update).with(container_instances.size, 3)
108 |           expect_any_instance_of(Aws::AutoScaling::Client).to receive(:detach_instances).with(
109 |             auto_scaling_group_name: asg_name,
110 |             instance_ids: ["i-555555", "i-111111", "i-444444"],
111 |             should_decrement_desired_capacity: true,
112 |           )
113 |           expect_any_instance_of(Aws::EC2::Client).to receive(:terminate_instances).with(instance_ids: ["i-555555", "i-111111", "i-444444"])
114 | 
115 |           auto_scaling_group_config.update_desired_capacity(2)
116 |         end
117 |       end
118 | 
119 |       context "when there are deregisterable instances only in one availability zone where there are fewer instances" do
120 |         let(:container_instances) do
121 |           [
122 |             Aws::ECS::Types::ContainerInstance.new(
123 |               pending_tasks_count: 0,
124 |               running_tasks_count: 1,
125 |               ec2_instance_id: "i-000000",
126 |               container_instance_arn: "with_essential_running_task_1a_0",
127 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
128 |             ),
129 |             Aws::ECS::Types::ContainerInstance.new(
130 |               pending_tasks_count: 0,
131 |               running_tasks_count: 1,
132 |               ec2_instance_id: "i-111111",
133 |               container_instance_arn: "with_essential_running_task_1a_1",
134 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
135 |             ),
136 |             Aws::ECS::Types::ContainerInstance.new(
137 |               pending_tasks_count: 0,
138 |               running_tasks_count: 0,
139 |               ec2_instance_id: "i-222222",
140 |               container_instance_arn: "with_no_essential_running_task_1c",
141 |               attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1c")],
142 |             ),
143 |           ]
144 |         end
145 | 
146 |         before do
147 |           allow(cluster_resource_manager).to receive(:fetch_container_instance_arns_in_service).and_return([
148 |             "with_essential_running_task_1a_0",
149 |             "with_essential_running_task_1a_1",
150 |           ])
151 |         end
152 | 
153 |         it "dosen't terminates any instances" do
154 |           expect(auto_scaling_group_config).to receive(:detach_and_terminate_orphan_instances)
155 |           expect(cluster_resource_manager).to_not receive(:deregister_container_instance)
156 |           expect(cluster_resource_manager).to_not receive(:trigger_capacity_update)
157 |           expect_any_instance_of(Aws::AutoScaling::Client).to_not receive(:detach_instances)
158 |           expect_any_instance_of(Aws::EC2::Client).to_not receive(:terminate_instances)
159 | 
160 |           auto_scaling_group_config.update_desired_capacity(1)
161 |         end
162 |       end
163 |     end
164 | 
165 |     context "when the current desired capacity is less than expected" do
166 |       let(:current_capacity) { 2 }
167 |       let(:desired_capacity) { current_capacity + buffer }
168 | 
169 |       before do
170 |         allow_any_instance_of(Aws::AutoScaling::Client).to receive(:describe_auto_scaling_groups).with(
171 |           auto_scaling_group_names: [asg_name]
172 |         ).and_return(double(auto_scaling_groups: [double(desired_capacity: current_capacity, max_size: 100)]))
173 |       end
174 | 
175 |       it "updates the desired capacity of the auto scaling group" do
176 |         expect(auto_scaling_group_config).to receive(:detach_and_terminate_orphan_instances)
177 |         expect(cluster_resource_manager).to receive(:trigger_capacity_update).with(current_capacity, desired_capacity)
178 |         expect_any_instance_of(Aws::AutoScaling::Client).to receive(:update_auto_scaling_group).with(
179 |           auto_scaling_group_name: asg_name,
180 |           min_size: 0,
181 |           max_size: 100,
182 |           desired_capacity: desired_capacity,
183 |         )
184 | 
185 |         auto_scaling_group_config.update_desired_capacity(current_capacity)
186 |       end
187 |     end
188 | 
189 |     context "when the current desired capacity is expected" do
190 |       let(:current_capacity) { 2 + buffer }
191 | 
192 |       before do
193 |         allow_any_instance_of(Aws::AutoScaling::Client).to receive(:describe_auto_scaling_groups).with(
194 |           auto_scaling_group_names: [asg_name]
195 |         ).and_return(double(auto_scaling_groups: [double(desired_capacity: current_capacity)]))
196 |       end
197 | 
198 |       it "does nothing" do
199 |         expect(auto_scaling_group_config).to receive(:detach_and_terminate_orphan_instances)
200 |         expect(cluster_resource_manager).to_not receive(:trigger_capacity_update)
201 |         expect_any_instance_of(Aws::EC2::Client).to_not receive(:terminate_instances)
202 |         expect_any_instance_of(Aws::AutoScaling::Client).to_not receive(:update_auto_scaling_group)
203 | 
204 |         auto_scaling_group_config.update_desired_capacity(current_capacity - buffer)
205 |       end
206 |     end
207 | 
208 |     context "when detached instance is still in the ecs cluster" do
209 |       let(:container_instances) do
210 |         [
211 |           Aws::ECS::Types::ContainerInstance.new(
212 |             pending_tasks_count: 0,
213 |             running_tasks_count: 0,
214 |             ec2_instance_id: "i-000000",
215 |             container_instance_arn: "with_no_pending_and_running_task_1a",
216 |             attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
217 |           ),
218 |           Aws::ECS::Types::ContainerInstance.new(
219 |             pending_tasks_count: 0,
220 |             running_tasks_count: 0,
221 |             ec2_instance_id: "i-111111",
222 |             container_instance_arn: "already_detached_by_drainer_but_still_in_the_cluster",
223 |             attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1a")],
224 |           ),
225 |           Aws::ECS::Types::ContainerInstance.new(
226 |             pending_tasks_count: 0,
227 |             running_tasks_count: 1,
228 |             ec2_instance_id: "i-222222",
229 |             container_instance_arn: "with_running_task",
230 |             attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1c")],
231 |           ),
232 |           Aws::ECS::Types::ContainerInstance.new(
233 |             pending_tasks_count: 0,
234 |             running_tasks_count: 0,
235 |             ec2_instance_id: "i-333333",
236 |             container_instance_arn: "with_no_pending_and_running_task_1c",
237 |             attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "ap-notrheast-1c")],
238 |           ),
239 |         ]
240 |       end
241 |       let(:auto_scaling_group_instances) do
242 |         [
243 |           Aws::AutoScaling::Types::Instance.new(
244 |             instance_id: "i-000000",
245 |             availability_zone: "ap-notrheast-1a",
246 |             lifecycle_state: "InService",
247 |             health_status: "Healthy",
248 |             launch_template: "launch_template",
249 |             protected_from_scale_in: true,
250 |           ),
251 |           Aws::AutoScaling::Types::Instance.new(
252 |             instance_id: "i-222222",
253 |             availability_zone: "ap-notrheast-1c",
254 |             lifecycle_state: "InService",
255 |             health_status: "Healthy",
256 |             launch_template: "launch_template",
257 |             protected_from_scale_in: true,
258 |           ),
259 |           Aws::AutoScaling::Types::Instance.new(
260 |             instance_id: "i-333333",
261 |             availability_zone: "ap-notrheast-1c",
262 |             lifecycle_state: "InService",
263 |             health_status: "Healthy",
264 |             launch_template: "launch_template",
265 |             protected_from_scale_in: true,
266 |           ),
267 |         ]
268 |       end
269 | 
270 |       before do
271 |         allow_any_instance_of(Aws::AutoScaling::Client).to receive(:describe_auto_scaling_groups).with(
272 |           auto_scaling_group_names: [asg_name],
273 |         ).and_return(
274 |           double(
275 |             auto_scaling_groups: [
276 |               double(
277 |                 desired_capacity: container_instances.size,
278 |                 instances: auto_scaling_group_instances.map do |i|
279 |                   double(
280 |                     availability_zone: i.availability_zone,
281 |                     instance_id: i.instance_id,
282 |                     lifecycle_state: "InService",
283 |                   )
284 |                 end,
285 |               )
286 |             ]
287 |           )
288 |         )
289 | 
290 |         allow(cluster_resource_manager).to receive(:fetch_container_instances_in_cluster).and_return(container_instances)
291 |         allow(auto_scaling_group_config).to receive(:sleep).and_return(nil)
292 |         allow(cluster_resource_manager).to receive(:fetch_container_instance_arns_in_service).and_return(["with_running_task"])
293 |       end
294 | 
295 |       it "terminates auto scaliing group instances without esesstial running tasks" do
296 |         expect(auto_scaling_group_config).to receive(:detach_and_terminate_orphan_instances)
297 |         expect(cluster_resource_manager).to receive(:deregister_container_instance).with("with_no_pending_and_running_task_1c")
298 |         expect(cluster_resource_manager).not_to receive(:deregister_container_instance).with("already_detached_but_still_in_the_cluster")
299 |         expect_any_instance_of(Aws::AutoScaling::Client).to receive(:detach_instances).with(
300 |           auto_scaling_group_name: asg_name,
301 |           instance_ids: ["i-333333"],
302 |           should_decrement_desired_capacity: true,
303 |         )
304 |         expect_any_instance_of(Aws::EC2::Client).to receive(:terminate_instances).with(instance_ids: ["i-333333"])
305 |         expect(cluster_resource_manager).to receive(:trigger_capacity_update).with(container_instances.size, 3)
306 | 
307 |         auto_scaling_group_config.update_desired_capacity(2)
308 |       end
309 |     end
310 |   end
311 | 
312 |   describe "#detach_instances" do
313 |     subject(:auto_scaling_group_config) do
314 |       described_class.new({
315 |         "name"   => asg_name,
316 |         "region" => "ap-northeast-1",
317 |         "buffer" => 0,
318 |         "services" => [],
319 |       }, Logger.new(nil))
320 |     end
321 | 
322 |     let(:asg_name) { "asg_name" }
323 |     let(:auto_scaling_group_instances) do
324 |       [
325 |         Aws::AutoScaling::Types::Instance.new(
326 |           instance_id: "i-000000",
327 |           availability_zone: "ap-notrheast-1a",
328 |           lifecycle_state: "InService",
329 |           health_status: "Healthy",
330 |           launch_template: "launch_template",
331 |           protected_from_scale_in: true,
332 |         ),
333 |         Aws::AutoScaling::Types::Instance.new(
334 |           instance_id: "i-222222",
335 |           availability_zone: "ap-notrheast-1c",
336 |           lifecycle_state: "Standby",
337 |           health_status: "Healthy",
338 |           launch_template: "launch_template",
339 |           protected_from_scale_in: true,
340 |         ),
341 |         Aws::AutoScaling::Types::Instance.new(
342 |           instance_id: "i-333333",
343 |           availability_zone: "ap-notrheast-1c",
344 |           lifecycle_state: "Terminating",
345 |           health_status: "",
346 |           launch_template: "launch_template",
347 |           protected_from_scale_in: true,
348 |         ),
349 |         Aws::AutoScaling::Types::Instance.new(
350 |           instance_id: "i-444444",
351 |           availability_zone: "ap-notrheast-1c",
352 |           lifecycle_state: "Pending",
353 |           health_status: "",
354 |           launch_template: "launch_template",
355 |           protected_from_scale_in: true,
356 |         ),
357 |       ]
358 |     end
359 | 
360 |     before do
361 |       allow_any_instance_of(Aws::AutoScaling::Client).to receive(:describe_auto_scaling_groups).with(
362 |         auto_scaling_group_names: [asg_name],
363 |       ).and_return(
364 |         double(
365 |           auto_scaling_groups: [
366 |             double(
367 |               desired_capacity: auto_scaling_group_instances.size,
368 |               instances: auto_scaling_group_instances.map do |i|
369 |                 double(
370 |                   availability_zone: i.availability_zone,
371 |                   instance_id: i.instance_id,
372 |                   lifecycle_state: i.lifecycle_state,
373 |                 )
374 |               end,
375 |             )
376 |           ]
377 |         )
378 |       )
379 |     end
380 | 
381 |     it "detaches only detachable instances" do
382 |       expect_any_instance_of(Aws::AutoScaling::Client).to receive(:detach_instances).with(
383 |         auto_scaling_group_name: asg_name,
384 |         instance_ids: ["i-000000", "i-222222"],
385 |         should_decrement_desired_capacity: false,
386 |       )
387 | 
388 |       auto_scaling_group_config.detach_instances(instance_ids: ["i-000000", "i-222222", "i-333333"], should_decrement_desired_capacity: false)
389 |     end
390 |   end
391 | 
392 | end
393 | 


--------------------------------------------------------------------------------
/spec/ecs_deploy/auto_scaler/cluster_resource_manager_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | 
  3 | require "ecs_deploy/auto_scaler/cluster_resource_manager"
  4 | 
  5 | RSpec.describe EcsDeploy::AutoScaler::ClusterResourceManager do
  6 |   let(:cluster_resource_manager) do
  7 |     described_class.new(
  8 |       region: "ap-northeast-1",
  9 |       cluster: "cluster",
 10 |       service_configs: service_configs,
 11 |       capacity_based_on: capacity_based_on,
 12 |     )
 13 |   end
 14 |   let(:service_configs) { [] }
 15 | 
 16 |   describe "#acquire" do
 17 |     let(:capacity_based_on) { "instances" }
 18 |     let(:service_configs) { [service_config] }
 19 |     let(:service_config) do
 20 |       double(name: "service_name", required_capacity: 0.5, desired_count: 4)
 21 |     end
 22 | 
 23 |     before do
 24 |       @container_instance_arns = ["arn", "arn"]
 25 |       Aws.config[:ecs] = {
 26 |         stub_responses: {
 27 |           list_container_instances: ->(_) {
 28 |             { container_instance_arns: @container_instance_arns }
 29 |           }
 30 |         }
 31 |       }
 32 |     end
 33 | 
 34 |     it do
 35 |       cluster_resource_manager.trigger_capacity_update(2, 3, interval: 0.1)
 36 | 
 37 |       expect(cluster_resource_manager.acquire(1, timeout: 0.5)).to be false
 38 |       @container_instance_arns << "arn"
 39 |       expect(cluster_resource_manager.acquire(1, timeout: 0.5)).to be true
 40 |     end
 41 |   end
 42 | 
 43 |   describe "#calculate_active_instance_capacity" do
 44 |     context "when capacity_based_on is 'instances'" do
 45 |       let(:capacity_based_on) { "instances" }
 46 | 
 47 |       before do
 48 |         Aws.config[:ecs] = {
 49 |           stub_responses: {
 50 |             list_container_instances: {
 51 |               container_instance_arns: %w[arn1 arn2],
 52 |             }
 53 |           }
 54 |         }
 55 |       end
 56 | 
 57 |       it do
 58 |         expect(cluster_resource_manager.calculate_active_instance_capacity).to eq 2
 59 |       end
 60 |     end
 61 | 
 62 |     context "when capacity_based_on is 'instances'" do
 63 |       let(:capacity_based_on) { "vCPUs" }
 64 | 
 65 |       let(:container_instances) do
 66 |         [
 67 |           Aws::ECS::Types::ContainerInstance.new(
 68 |             container_instance_arn: "2vCPUs_instance_arn",
 69 |             registered_resources: [
 70 |               {
 71 |                 integer_value: 2048,
 72 |                 name: "CPU",
 73 |               },
 74 |             ],
 75 |           ),
 76 |           Aws::ECS::Types::ContainerInstance.new(
 77 |             container_instance_arn: "4vCPUs_instance_arn",
 78 |             registered_resources: [
 79 |               {
 80 |                 integer_value: 4096,
 81 |                 name: "CPU",
 82 |               },
 83 |             ],
 84 |           ),
 85 |         ]
 86 |       end
 87 | 
 88 |       before do
 89 |         ecs_client = Aws::ECS::Client.new(stub_responses: true)
 90 |         ecs_client.stub_responses(:list_container_instances, {
 91 |           container_instance_arns: container_instances.map(&:container_instance_arn),
 92 |         })
 93 |         ecs_client.stub_responses(:describe_container_instances, {
 94 |           container_instances: container_instances,
 95 |         })
 96 |         allow(cluster_resource_manager).to receive(:ecs_client) { ecs_client }
 97 |       end
 98 | 
 99 |       it do
100 |         expect(cluster_resource_manager.calculate_active_instance_capacity).to eq 6
101 |       end
102 |     end
103 |   end
104 | end
105 | 


--------------------------------------------------------------------------------
/spec/ecs_deploy/auto_scaler/instance_drainer_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | 
  3 | require "securerandom"
  4 | 
  5 | require "ecs_deploy/auto_scaler/auto_scaling_group_config"
  6 | require "ecs_deploy/auto_scaler/instance_drainer"
  7 | 
  8 | RSpec.describe EcsDeploy::AutoScaler::InstanceDrainer do
  9 |   describe "#poll_spot_instance_interruption_warnings" do
 10 |     subject(:drainer) do
 11 |       described_class.new(
 12 |         auto_scaling_group_configs: [asg_config],
 13 |         spot_fleet_request_configs: [double(id: "sfr_id", region: "ap-northeast-1", cluster: nil, disable_draining: disable_draining)],
 14 |         logger: Logger.new(nil),
 15 |       )
 16 |     end
 17 | 
 18 |     let(:asg_config) do
 19 |       instance_double("EcsDeploy::AutoScaler::AutoScalingGroupConfig",
 20 |         name: "asg_name",
 21 |         region: "ap-northeast-1",
 22 |         cluster: "ecs-cluster",
 23 |         disable_draining: disable_draining,
 24 |       )
 25 |     end
 26 | 
 27 |     let(:instances) do
 28 |       [
 29 |         { instance_id: 'i-000000', tags: [{ key: "aws:ec2spot:fleet-request-id", value: "sfr_id" }] },
 30 |         { instance_id: 'i-111111', tags: [{ key: "aws:ec2spot:fleet-request-id", value: "another_sfr_id" }] },
 31 |         { instance_id: 'i-222222', tags: [{ key: "aws:autoscaling:groupName", value: "asg_name" }] },
 32 |         { instance_id: 'i-333333', tags: [{ key: "aws:autoscaling:groupName", value: "another_asg_name" }] },
 33 |         { instance_id: 'i-444444', tags: [] },
 34 |       ]
 35 |     end
 36 | 
 37 |     let(:messages) do
 38 |       instances.map do |i|
 39 |         {
 40 |           message_id: SecureRandom.uuid,
 41 |           body: %Q|{"version":"0","id":"478e68b4-9ad3-1fb4-e8a2-aef2d793738d","detail-type":"EC2 Spot Instance Interruption Warning","source":"aws.ec2","account":"1234","time":"2019-10-05T14:19:37Z","region":"ap-northeast-1","resources":["arn:aws:ec2:ap-northeast-1a:instance/#{i[:instance_id]}"],"detail":{"instance-id":"#{i[:instance_id]}","instance-action":"terminate"}}|,
 42 |         }
 43 |       end
 44 |     end
 45 | 
 46 |     let(:ec2_client) { Aws::EC2::Client.new(stub_responses: true) }
 47 |     let(:ecs_client) { Aws::ECS::Client.new(stub_responses: true) }
 48 |     let(:sqs_client) { Aws::SQS::Client.new(stub_responses: true) }
 49 | 
 50 |     before do
 51 |       allow(drainer).to receive(:ec2_client) { ec2_client }
 52 |       allow(drainer).to receive(:ecs_client) { ecs_client }
 53 |       allow(drainer).to receive(:sqs_client) { sqs_client }
 54 | 
 55 |       sqs_client.stub_responses(:receive_message, { messages: messages })
 56 |       allow(sqs_client).to receive(:delete_message_batch) do
 57 |         drainer.stop
 58 |         throw :stop_polling
 59 |       end
 60 | 
 61 |       ec2_client.stub_responses(:describe_instances, ->(context) {
 62 |         if context.params[:instance_ids] == instances.map { |i| i[:instance_id] }
 63 |           { reservations: [{ instances: instances }] }
 64 |         else
 65 |           {}
 66 |         end
 67 |       })
 68 | 
 69 |       ecs_client.stub_responses(:list_container_instances, ->(context) {
 70 |         if context.params[:cluster] == nil && context.params[:filter] == "ec2InstanceId in [i-000000]"
 71 |           { container_instance_arns: ["arn:i-000000"] }
 72 |         elsif context.params[:cluster] == "ecs-cluster" && context.params[:filter] == "ec2InstanceId in [i-222222]"
 73 |           { container_instance_arns: ["arn:i-222222"] }
 74 |         else
 75 |           {}
 76 |         end
 77 |       })
 78 |     end
 79 | 
 80 |     [nil, false, "false"].each do |disable_draining|
 81 |       context "with disable_draining #{disable_draining.inspect}" do
 82 |         let(:disable_draining) { disable_draining }
 83 | 
 84 |         it "updates the state of interrupted instances to 'DRAINING'" do
 85 |           expect(asg_config).to receive(:detach_instances).with(instance_ids: ["i-222222"], should_decrement_desired_capacity: false)
 86 | 
 87 |           drainer.poll_spot_instance_interruption_warnings("https://sqs.ap-northeast-1.amazonaws.com/account_id/queue_name")
 88 | 
 89 |           expect(ecs_client.api_requests).to include({
 90 |             operation_name: :update_container_instances_state,
 91 |             params: { cluster: nil, container_instances: ["arn:i-000000"], status: "DRAINING" },
 92 |             context: a_kind_of(Seahorse::Client::RequestContext),
 93 |           })
 94 |           expect(ecs_client.api_requests).to include({
 95 |             operation_name: :update_container_instances_state,
 96 |             params: { cluster: "ecs-cluster", container_instances: ["arn:i-222222"], status: "DRAINING" },
 97 |             context: a_kind_of(Seahorse::Client::RequestContext),
 98 |           })
 99 |         end
100 |       end
101 |     end
102 | 
103 |     [true, "true"].each do |disable_draining|
104 |       context "with disable_draining #{disable_draining.inspect}" do
105 |         let(:disable_draining) { disable_draining }
106 | 
107 |         it "updates the state of interrupted instances to 'DRAINING'" do
108 |           expect(asg_config).to receive(:detach_instances).with(instance_ids: ["i-222222"], should_decrement_desired_capacity: false)
109 | 
110 |           drainer.poll_spot_instance_interruption_warnings("https://sqs.ap-northeast-1.amazonaws.com/account_id/queue_name")
111 | 
112 |           expect(ecs_client.api_requests).to eq []
113 |         end
114 |       end
115 |     end
116 |   end
117 | end
118 | 


--------------------------------------------------------------------------------
/spec/ecs_deploy/auto_scaler/service_config_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | 
  3 | require "ecs_deploy/auto_scaler/service_config"
  4 | 
  5 | RSpec.describe EcsDeploy::AutoScaler::ServiceConfig do
  6 |   describe "#adjust_desired_count" do
  7 |     before do
  8 |       allow_any_instance_of(described_class).to receive(:client) { ecs_client }
  9 |       allow(ecs_client).to receive(:describe_services).and_return(double(services: [double(desired_count: initial_desired_count)]))
 10 |     end
 11 | 
 12 |     subject(:service_config) do
 13 |       described_class.new({
 14 |         "name"    => "service_name",
 15 |         "cluster" => "cluster",
 16 |         "region"  => "ap-northeast-1",
 17 |         "step"    => 1,
 18 |         "max_task_count" => 100,
 19 |         "min_task_count" => 1,
 20 |         "cooldown_time_for_reach_max" => 300,
 21 |         "upscale_triggers" => [
 22 |           {
 23 |             "alarm_name" => "upscale_trigger_with_default_step",
 24 |             "region"     => "ap-northeast-1",
 25 |             "state"      => "ALARM",
 26 |           },
 27 |           {
 28 |             "alarm_name" => "upscale_trigger_with_step_2",
 29 |             "region"     => "ap-northeast-1",
 30 |             "state"      => "ALARM",
 31 |             "step"       => 2,
 32 |           },
 33 |           {
 34 |             "alarm_name" => "upscale_trigger_with_step_1",
 35 |             "region"     => "ap-northeast-1",
 36 |             "state"      => "ALARM",
 37 |             "step"       => 1,
 38 |           },
 39 |         ],
 40 |         "downscale_triggers" => downscale_triggers,
 41 |       }, Logger.new(nil))
 42 |     end
 43 |     let(:downscale_triggers) do
 44 |       [
 45 |         {
 46 |           "alarm_name" => "downscale_trigger_with_step_2",
 47 |           "region"     => "ap-northeast-1",
 48 |           "state"      => "ALARM",
 49 |           "step"       => 2,
 50 |         },
 51 |         {
 52 |           "alarm_name" => "downscale_trigger_with_step_1",
 53 |           "region"     => "ap-northeast-1",
 54 |           "state"      => "ALARM",
 55 |           "step"       => 1,
 56 |         },
 57 |       ]
 58 |     end
 59 | 
 60 |     let(:initial_desired_count) { 1 }
 61 |     let(:ecs_client) { instance_double("Aws::ECS::Client") }
 62 | 
 63 |     let(:cluster_resource_manager) { instance_double("EcsDeploy::AutoScaler::ClusterResourceManager") }
 64 | 
 65 |     context "when all triggers match" do
 66 |       before do
 67 |         (service_config.upscale_triggers + service_config.downscale_triggers).each do |trigger|
 68 |           allow(trigger).to receive(:match?).and_return(true)
 69 |         end
 70 |       end
 71 | 
 72 |       it "uses the maximum step of upscale triggers" do
 73 |         expect(cluster_resource_manager).to receive(:acquire).with(1, timeout: kind_of(Float)).twice { true }
 74 |         expect(ecs_client).to receive(:update_service).with(
 75 |           cluster: service_config.cluster,
 76 |           service: service_config.name,
 77 |           desired_count: initial_desired_count + 1,
 78 |         )
 79 |         expect(ecs_client).to receive(:update_service).with(
 80 |           cluster: service_config.cluster,
 81 |           service: service_config.name,
 82 |           desired_count: initial_desired_count + 2,
 83 |         )
 84 | 
 85 |         service_config.adjust_desired_count(cluster_resource_manager)
 86 |         service_config.wait_until_desired_count_updated
 87 |       end
 88 |     end
 89 | 
 90 |     context "when a downscale trigger exists and all triggers match" do
 91 |       let(:initial_desired_count) { 3 }
 92 |       let(:downscale_triggers) do
 93 |         [
 94 |           {
 95 |             "alarm_name" => "downscale_trigger_with_step_2",
 96 |             "region"     => "ap-northeast-1",
 97 |             "state"      => "ALARM",
 98 |             "step"       => 2,
 99 |           },
100 |           {
101 |             "alarm_name" => "downscale_trigger_with_step_1",
102 |             "region"     => "ap-northeast-1",
103 |             "state"      => "ALARM",
104 |             "step"       => 1,
105 |             "prioritized_over_upscale_triggers" => true,
106 |           },
107 |         ]
108 |       end
109 | 
110 |       before do
111 |         (service_config.upscale_triggers + service_config.downscale_triggers).each do |trigger|
112 |           allow(trigger).to receive(:match?).and_return(true)
113 |         end
114 |       end
115 | 
116 |       it "uses the maximum step of down triggers with prioritized_over_upscale_triggers true" do
117 |         expect(cluster_resource_manager).to receive(:release).with(1)
118 |         expect(ecs_client).to receive(:update_service).with(
119 |           cluster: service_config.cluster,
120 |           service: service_config.name,
121 |           desired_count: initial_desired_count - 1,
122 |         )
123 | 
124 |         expect(ecs_client).to receive(:wait_until).with(:services_stable, cluster: service_config.cluster, services: [service_config.name])
125 |         expect(ecs_client).to receive(:list_tasks).and_return([double(task_arns: ["stopping_task_arn"])], [double(task_arns: [])])
126 |         expect(ecs_client).to receive(:wait_until).with(:tasks_stopped, cluster: service_config.cluster, tasks: ["stopping_task_arn"])
127 | 
128 |         service_config.adjust_desired_count(cluster_resource_manager)
129 |       end
130 |     end
131 | 
132 |     context "when only a downscale trigger matches" do
133 |       before do
134 |         (service_config.upscale_triggers + service_config.downscale_triggers).each do |trigger|
135 |           allow(trigger).to receive(:match?).and_return(false)
136 |         end
137 |         allow(service_config.downscale_triggers.first).to receive(:match?).and_return(true)
138 |       end
139 | 
140 |       context "when desired_count - step is greater than or equal to min_task_count" do
141 |         let(:initial_desired_count) { 3 }
142 | 
143 |         it "uses the maximum step of down triggers" do
144 |           expect(cluster_resource_manager).to receive(:release).with(2)
145 |           expect(ecs_client).to receive(:update_service).with(
146 |             cluster: service_config.cluster,
147 |             service: service_config.name,
148 |             desired_count: initial_desired_count - 2,
149 |           )
150 | 
151 |           expect(ecs_client).to receive(:wait_until).with(:services_stable, cluster: service_config.cluster, services: [service_config.name])
152 |           expect(ecs_client).to receive(:list_tasks).and_return([double(task_arns: ["stopping_task_arn"])], [double(task_arns: [])])
153 |           expect(ecs_client).to receive(:wait_until).with(:tasks_stopped, cluster: service_config.cluster, tasks: ["stopping_task_arn"])
154 | 
155 |           service_config.adjust_desired_count(cluster_resource_manager)
156 |         end
157 |       end
158 | 
159 |       context "when desired_count - step is less than min_task_count" do
160 |         let(:initial_desired_count) { 2 }
161 | 
162 |         it "decreases desired_count to min_task_count" do
163 |           expect(cluster_resource_manager).to receive(:release).with(1)
164 |           expect(ecs_client).to receive(:update_service).with(
165 |             cluster: service_config.cluster,
166 |             service: service_config.name,
167 |             desired_count: initial_desired_count - 1,
168 |           )
169 | 
170 |           expect(ecs_client).to receive(:wait_until).with(:services_stable, cluster: service_config.cluster, services: [service_config.name])
171 |           expect(ecs_client).to receive(:list_tasks).and_return([double(task_arns: ["stopping_task_arn"])], [double(task_arns: [])])
172 |           expect(ecs_client).to receive(:wait_until).with(:tasks_stopped, cluster: service_config.cluster, tasks: ["stopping_task_arn"])
173 | 
174 |           service_config.adjust_desired_count(cluster_resource_manager)
175 |         end
176 |       end
177 |     end
178 |   end
179 | end
180 | 


--------------------------------------------------------------------------------
/spec/ecs_deploy/auto_scaler_spec.rb:
--------------------------------------------------------------------------------
 1 | require "spec_helper"
 2 | 
 3 | require "ecs_deploy/auto_scaler"
 4 | 
 5 | RSpec.describe EcsDeploy::AutoScaler do
 6 |   describe "#load_config" do
 7 |     it do
 8 |       described_class.load_config(File.join(__dir__, "..", "fixtures", "files", "ecs_auto_scaler_config_in_old_format.yaml"))
 9 |       old_config = described_class.instance_variable_get(:@config)
10 |       described_class.load_config(File.join(__dir__, "..", "fixtures", "files", "ecs_auto_scaler_config_in_new_format.yaml"))
11 |       new_config = described_class.instance_variable_get(:@config)
12 |       expect(old_config).to eq new_config
13 |     end
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/spec/ecs_deploy/instance_fluctuation_manager_spec.rb:
--------------------------------------------------------------------------------
  1 | require "spec_helper"
  2 | 
  3 | require "logger"
  4 | require "stringio"
  5 | require "ecs_deploy/instance_fluctuation_manager"
  6 | 
  7 | RSpec.describe EcsDeploy::InstanceFluctuationManager do
  8 |   let(:logdev) do
  9 |     StringIO.new
 10 |   end
 11 |   let(:instance_fluctuation_manager) do
 12 |     described_class.new(
 13 |       region: "ap-northeast-1",
 14 |       cluster: "cluster",
 15 |       auto_scaling_group_name: "asg-cluster",
 16 |       desired_capacity: 50,
 17 |       logger: ::Logger.new(logdev)
 18 |     )
 19 |   end
 20 | 
 21 |   describe "#increase" do
 22 |     context "w/o error" do
 23 |       before do
 24 |         @auto_scaling_groups = [
 25 |           Aws::AutoScaling::Types::AutoScalingGroup.new(
 26 |             desired_capacity: 50,
 27 |             max_size: 100
 28 |           )
 29 |         ]
 30 |         Aws.config[:autoscaling] = {
 31 |           stub_responses: {
 32 |             describe_auto_scaling_groups: lambda do |_|
 33 |               Aws::AutoScaling::Types::AutoScalingGroupsType.new(
 34 |                 auto_scaling_groups: @auto_scaling_groups,
 35 |               )
 36 |             end,
 37 |             update_auto_scaling_group: lambda do |_|
 38 |               # no error
 39 |               nil
 40 |             end
 41 |           }
 42 |         }
 43 | 
 44 |         cluster = Aws::ECS::Types::Cluster.new(registered_container_instances_count: 50)
 45 |         expect(cluster).to receive(:registered_container_instances_count)
 46 |           .exactly(5).times.and_return(60, 70, 80, 90, 100)
 47 |         @clusters = [cluster]
 48 |         Aws.config[:ecs] = {
 49 |           stub_responses: {
 50 |             describe_clusters: lambda do |_|
 51 |               Aws::ECS::Types::DescribeClustersResponse.new(clusters: @clusters)
 52 |             end
 53 |           }
 54 |         }
 55 | 
 56 |         allow(instance_fluctuation_manager).to receive(:sleep)
 57 |       end
 58 | 
 59 |       it "succeeded in increasing instances" do
 60 |         thread = instance_fluctuation_manager.increase
 61 |         thread.join
 62 |         log = logdev.string
 63 |         expect(log).to include("Increasing desired capacity of asg-cluster: 50 => 100")
 64 |         [60, 70, 80, 90].each do |count|
 65 |           expect(log).to include("Current registered instance count: #{count}")
 66 |         end
 67 |         expect(log).to include("Succeeded in increasing instances!")
 68 |       end
 69 |     end
 70 |   end
 71 | 
 72 |   describe("#decrease") do
 73 |     context "w/ 2 availability zones" do
 74 |       before do
 75 |         @auto_scaling_groups = [
 76 |           Aws::AutoScaling::Types::AutoScalingGroup.new(
 77 |             desired_capacity: 100,
 78 |             max_size: 100
 79 |           )
 80 |         ]
 81 |         Aws.config[:autoscaling] = {
 82 |           stub_responses: {
 83 |             describe_auto_scaling_groups: lambda do |_|
 84 |               Aws::AutoScaling::Types::AutoScalingGroupsType.new(
 85 |                 auto_scaling_groups: @auto_scaling_groups,
 86 |               )
 87 |             end,
 88 |             update_auto_scaling_group: lambda do |_|
 89 |               # no error
 90 |             end,
 91 |             detach_instances: lambda do |_|
 92 |               # no error
 93 |             end
 94 |           }
 95 |         }
 96 | 
 97 |         arns = (1..100).to_a.map {|n| sprintf("arn:aws:ecs:ap-northeast-1:xxx:container-instance/%03d", n) }
 98 |         availability_zones = [
 99 |           Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-a"),
100 |           Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-b"),
101 |         ]
102 |         container_instances = arns.map do |arn|
103 |           Aws::ECS::Types::ContainerInstance.new(
104 |             container_instance_arn: arn,
105 |             running_tasks_count: rand(1..10),
106 |             attributes: [availability_zones.sample],
107 |             ec2_instance_id: "ec2-#{arn}"
108 |           )
109 |         end
110 |         task_arns = (1..10).to_a.map {|n| sprintf("task-arn%02d", n) }
111 |         tasks = task_arns.map do |arn|
112 |           group = ["family:#{arn}", "dummy:#{arn}"].sample
113 |           Aws::ECS::Types::Task.new(task_arn: arn, group: group)
114 |         end
115 |         Aws.config[:ecs] = {
116 |           stub_responses: {
117 |             list_container_instances: lambda do |_|
118 |               Aws::ECS::Types::ListContainerInstancesResponse.new(container_instance_arns: arns)
119 |             end,
120 |             describe_container_instances: lambda do |_|
121 |               Aws::ECS::Types::DescribeContainerInstancesResponse.new(container_instances: container_instances)
122 |             end,
123 |             update_container_instances_state: lambda do |_|
124 |               # no error
125 |             end,
126 |             list_tasks: lambda do |_|
127 |               Aws::ECS::Types::ListTasksResponse.new(task_arns: task_arns)
128 |             end,
129 |             describe_tasks: lambda do |_|
130 |               Aws::ECS::Types::DescribeTasksResponse.new(tasks: tasks)
131 |             end,
132 |             stop_task: lambda do |_|
133 |               # no error
134 |             end
135 |           }
136 |         }
137 |         # Must stub after set :stub_responses to Aws.config[:ecs]
138 |         ecs_client = instance_fluctuation_manager.send(:ecs_client)
139 |         allow(ecs_client).to receive(:wait_until)
140 |         expect(ecs_client).to receive(:stop_task).at_most(arns.size * tasks.size).times
141 | 
142 |         Aws.config[:ec2] = {
143 |           stub_responses: {
144 |             terminate_instances: {}
145 |           }
146 |         }
147 |         ec2_client = instance_fluctuation_manager.send(:ec2_client)
148 |         allow(ec2_client).to receive(:wait_until)
149 |       end
150 | 
151 |       it "succeeded in decreasing instances" do
152 |         instance_fluctuation_manager.decrease
153 |         log = logdev.string
154 |         expect(log).to include("Decreasing desired capacity of asg-cluster: 100 => 50")
155 |         expect(log).to include("Succeeded in decreasing instances!")
156 |         instance_size_per_az = log.lines.grep(/AZ balance/).last.scan(/AZ balance: \{"zone-a"=>(\d+), "zone-b"=>(\d+)\}/).flatten.map(&:to_i)
157 |         expect(instance_size_per_az).to contain_exactly(25, 25)
158 |       end
159 |     end
160 | 
161 |     context "w/ 3 availability_zones" do
162 |       before do
163 |         @auto_scaling_groups = [
164 |           Aws::AutoScaling::Types::AutoScalingGroup.new(
165 |             desired_capacity: 100,
166 |             max_size: 100
167 |           )
168 |         ]
169 |         Aws.config[:autoscaling] = {
170 |           stub_responses: {
171 |             describe_auto_scaling_groups: lambda do |_|
172 |               Aws::AutoScaling::Types::AutoScalingGroupsType.new(
173 |                 auto_scaling_groups: @auto_scaling_groups,
174 |               )
175 |             end,
176 |             update_auto_scaling_group: lambda do |_|
177 |               # no error
178 |             end,
179 |             detach_instances: lambda do |_|
180 |               # no error
181 |             end
182 |           }
183 |         }
184 | 
185 |         arns = (1..100).to_a.map {|n| sprintf("arn:aws:ecs:ap-northeast-1:xxx:container-instance/%03d", n) }
186 |         availability_zones = [
187 |           Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-a"),
188 |           Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-b"),
189 |           Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-c")
190 |         ]
191 |         container_instances = arns.map do |arn|
192 |           Aws::ECS::Types::ContainerInstance.new(
193 |             container_instance_arn: arn,
194 |             running_tasks_count: rand(1..10),
195 |             attributes: [availability_zones.sample],
196 |             ec2_instance_id: "ec2-#{arn}"
197 |           )
198 |         end
199 |         task_arns = (1..10).to_a.map {|n| sprintf("task-arn%02d", n) }
200 |         tasks = task_arns.map do |arn|
201 |           group = ["family:#{arn}", "dummy:#{arn}"].sample
202 |           Aws::ECS::Types::Task.new(task_arn: arn, group: group)
203 |         end
204 |         Aws.config[:ecs] = {
205 |           stub_responses: {
206 |             list_container_instances: lambda do |_|
207 |               Aws::ECS::Types::ListContainerInstancesResponse.new(container_instance_arns: arns)
208 |             end,
209 |             describe_container_instances: lambda do |_|
210 |               Aws::ECS::Types::DescribeContainerInstancesResponse.new(container_instances: container_instances)
211 |             end,
212 |             update_container_instances_state: lambda do |_|
213 |               # no error
214 |             end,
215 |             list_tasks: lambda do |_|
216 |               Aws::ECS::Types::ListTasksResponse.new(task_arns: task_arns)
217 |             end,
218 |             describe_tasks: lambda do |_|
219 |               Aws::ECS::Types::DescribeTasksResponse.new(tasks: tasks)
220 |             end,
221 |             stop_task: lambda do |_|
222 |               # no error
223 |             end
224 |           }
225 |         }
226 |         # Must stub after set :stub_responses to Aws.config[:ecs]
227 |         ecs_client = instance_fluctuation_manager.send(:ecs_client)
228 |         allow(ecs_client).to receive(:wait_until)
229 |         expect(ecs_client).to receive(:stop_task).at_most(arns.size * tasks.size).times
230 | 
231 |         Aws.config[:ec2] = {
232 |           stub_responses: {
233 |             terminate_instances: {}
234 |           }
235 |         }
236 |         ec2_client = instance_fluctuation_manager.send(:ec2_client)
237 |         allow(ec2_client).to receive(:wait_until)
238 |       end
239 | 
240 |       context "desired capacity is multiple of 3" do
241 |         let(:instance_fluctuation_manager) do
242 |           described_class.new(
243 |             region: "ap-northeast-1",
244 |             cluster: "cluster",
245 |             auto_scaling_group_name: "asg-cluster",
246 |             desired_capacity: 60,
247 |             logger: ::Logger.new(logdev)
248 |           )
249 |         end
250 | 
251 |         it "succeeded in decreasing instances" do
252 |           instance_fluctuation_manager.decrease
253 |           log = logdev.string
254 |           expect(log).to include("Decreasing desired capacity of asg-cluster: 100 => 60")
255 |           expect(log).to include("Succeeded in decreasing instances!")
256 |           instance_size_per_az = log.lines.grep(/AZ balance/).last.scan(/AZ balance: \{"zone-a"=>(\d+), "zone-b"=>(\d+), "zone-c"=>(\d+)\}/).flatten.map(&:to_i)
257 |           expect(instance_size_per_az).to contain_exactly(20, 20, 20)
258 |         end
259 |       end
260 | 
261 |       context "desired capacity is odd number" do
262 |         let(:instance_fluctuation_manager) do
263 |           described_class.new(
264 |             region: "ap-northeast-1",
265 |             cluster: "cluster",
266 |             auto_scaling_group_name: "asg-cluster",
267 |             desired_capacity: 53,
268 |             logger: ::Logger.new(logdev)
269 |           )
270 |         end
271 | 
272 |         it "succeeded in decreasing instances" do
273 |           instance_fluctuation_manager.decrease
274 |           log = logdev.string
275 |           expect(log).to include("Decreasing desired capacity of asg-cluster: 100 => 53")
276 |           expect(log).to include("Succeeded in decreasing instances!")
277 |           instance_size_per_az = log.lines.grep(/AZ balance/).last.scan(/AZ balance: \{"zone-a"=>(\d+), "zone-b"=>(\d+), "zone-c"=>(\d+)\}/).flatten.map(&:to_i)
278 |           expect(instance_size_per_az).to contain_exactly(17, 18, 18)
279 |         end
280 |       end
281 |     end
282 | 
283 |     context "with DEREGISTERING status" do
284 |       let(:instance_fluctuation_manager) do
285 |         described_class.new(
286 |           region: "ap-northeast-1",
287 |           cluster: "cluster",
288 |           auto_scaling_group_name: "asg-cluster",
289 |           desired_capacity: 0,
290 |           logger: ::Logger.new(logdev)
291 |         )
292 |       end
293 |       let(:auto_scaling_groups) do
294 |         [
295 |           Aws::AutoScaling::Types::AutoScalingGroup.new(
296 |             desired_capacity: 1,
297 |             max_size: 5
298 |           )
299 |         ]
300 |       end
301 |       let(:arns) do
302 |         2.times.map { |i| "arn:aws:ecs:ap-northeast-1:xxx:container-instance/00#{i}" }
303 |       end
304 |       let(:ec2_instance_ids) do
305 |         2.times.map { |i| "ec2-#{arns[i]}" }
306 |       end
307 |       let(:container_instances) do
308 |         [
309 |           Aws::ECS::Types::ContainerInstance.new(
310 |             container_instance_arn: arns[0],
311 |             running_tasks_count: 1,
312 |             attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-a")],
313 |             ec2_instance_id: ec2_instance_ids[0],
314 |             status: 'ACTIVE',
315 |           ),
316 |           Aws::ECS::Types::ContainerInstance.new(
317 |             container_instance_arn: arns[1],
318 |             running_tasks_count: 0,
319 |             attributes: [Aws::ECS::Types::Attribute.new(name: "ecs.availability-zone", value: "zone-a")],
320 |             ec2_instance_id: ec2_instance_ids[1],
321 |             status: 'DEREGISTERING',
322 |           )
323 |         ]
324 |       end
325 |       let(:task_arns) do
326 |         2.times.map {|i| sprintf("task-arn%02d", i) }
327 |       end
328 |       let(:tasks) do
329 |         task_arns.map do |arn|
330 |           group = ["family:#{arn}", "dummy:#{arn}"].sample
331 |           Aws::ECS::Types::Task.new(task_arn: arn, group: group)
332 |         end
333 |       end
334 | 
335 |       before do
336 |         Aws.config[:autoscaling] = {
337 |           stub_responses: {
338 |             describe_auto_scaling_groups: lambda do |_|
339 |               Aws::AutoScaling::Types::AutoScalingGroupsType.new(
340 |                 auto_scaling_groups: auto_scaling_groups,
341 |               )
342 |             end,
343 |             update_auto_scaling_group: lambda do |_|
344 |               # no error
345 |             end,
346 |             detach_instances: lambda do |_|
347 |               # no error
348 |             end
349 |           }
350 |         }
351 | 
352 |         Aws.config[:ecs] = {
353 |           stub_responses: {
354 |             list_container_instances: lambda do |_|
355 |               Aws::ECS::Types::ListContainerInstancesResponse.new(container_instance_arns: arns)
356 |             end,
357 |             describe_container_instances: lambda do |_|
358 |               Aws::ECS::Types::DescribeContainerInstancesResponse.new(container_instances: container_instances)
359 |             end,
360 |             update_container_instances_state: lambda do |_|
361 |               # no error
362 |             end,
363 |             list_tasks: lambda do |_|
364 |               Aws::ECS::Types::ListTasksResponse.new(task_arns: task_arns)
365 |             end,
366 |             describe_tasks: lambda do |_|
367 |               Aws::ECS::Types::DescribeTasksResponse.new(tasks: tasks)
368 |             end,
369 |             stop_task: lambda do |_|
370 |               # no error
371 |             end
372 |           }
373 |         }
374 |         Aws.config[:ec2] = {
375 |           stub_responses: {
376 |             terminate_instances: {}
377 |           }
378 |         }
379 | 
380 |         # Must stub after set :stub_responses to Aws.config[:ecs]
381 |         ecs_client = instance_fluctuation_manager.send(:ecs_client)
382 |         allow(ecs_client).to receive(:wait_until)
383 |         expect(ecs_client).to receive(:stop_task).at_most(2).times
384 | 
385 | 
386 |         ec2_client = instance_fluctuation_manager.send(:ec2_client)
387 |         allow(ec2_client).to receive(:wait_until)
388 |       end
389 | 
390 |       it "succeeded in decreasing instances" do
391 |         # terminate instances whose status is not 'DEREGISTERING'
392 |         ec2_client = instance_fluctuation_manager.send(:ec2_client)
393 |         expect(ec2_client).to receive(:terminate_instances).with(instance_ids: [ec2_instance_ids[0]])
394 | 
395 |         instance_fluctuation_manager.decrease
396 |         log = logdev.string
397 |         expect(log).to include("Decreasing desired capacity of asg-cluster: 1 => 0")
398 |         expect(log).to include("Succeeded in decreasing instances!")
399 |       end
400 |     end
401 |   end
402 | end
403 | 


--------------------------------------------------------------------------------
/spec/fixtures/files/ecs_auto_scaler_config_in_new_format.yaml:
--------------------------------------------------------------------------------
 1 | polling_interval: 60
 2 | 
 3 | auto_scaling_groups:
 4 |   - name: ecs-cluster-nodes
 5 |     region: ap-northeast-1
 6 |     cluster: ecs-cluster
 7 |     buffer: 1
 8 |     services:
 9 |       - name: repro-api-production
10 |         step: 1
11 |         idle_time: 240
12 |         max_task_count: [10, 25]
13 |         scheduled_min_task_count:
14 |           - {from: "1:45", to: "4:30", count: 8}
15 |         cooldown_time_for_reach_max: 600
16 |         min_task_count: 0
17 |         required_capacity: 0.5
18 |         upscale_triggers:
19 |           - alarm_name: "ECS [repro-api-production] CPUUtilization"
20 |             state: ALARM
21 |           - alarm_name: "ELB repro-api-a HTTPCode_Backend_5XX"
22 |             state: ALARM
23 |             step: 2
24 |         downscale_triggers:
25 |           - alarm_name: "ECS [repro-api-production] CPUUtilization (low)"
26 |             state: OK
27 | 
28 | spot_fleet_requests:
29 |   - id: sfr-354de735-2c17-4565-88c9-10ada5b957e5
30 |     region: ap-northeast-1
31 |     cluster: ecs-cluster-for-worker
32 |     buffer: 1
33 |     services:
34 |       - name: repro-worker-production
35 |         step: 1
36 |         idle_time: 240
37 |         cooldown_time_for_reach_max: 600
38 |         min_task_count: 0
39 |         required_capacity: 2
40 |         upscale_triggers:
41 |           - alarm_name: "ECS [repro-worker-production] CPUUtilization"
42 |             state: ALARM
43 |         downscale_triggers:
44 |           - alarm_name: "ECS [repro-worker-production] CPUUtilization (low)"
45 |             state: OK
46 | 
47 | spot_instance_intrp_warns_queue_urls:
48 |   - https://sqs.ap-northeast-1.amazonaws.com/<account-id>/spot-instance-intrp-warns
49 | 


--------------------------------------------------------------------------------
/spec/fixtures/files/ecs_auto_scaler_config_in_old_format.yaml:
--------------------------------------------------------------------------------
 1 | polling_interval: 60
 2 | 
 3 | auto_scaling_groups:
 4 |   - name: ecs-cluster-nodes
 5 |     region: ap-northeast-1
 6 |     buffer: 1
 7 | 
 8 | spot_fleet_requests:
 9 |   - id: sfr-354de735-2c17-4565-88c9-10ada5b957e5
10 |     region: ap-northeast-1
11 |     buffer: 1
12 | 
13 | spot_instance_intrp_warns_queue_urls:
14 |   - https://sqs.ap-northeast-1.amazonaws.com/<account-id>/spot-instance-intrp-warns
15 | 
16 | services:
17 |   - name: repro-api-production
18 |     cluster: ecs-cluster
19 |     region: ap-northeast-1
20 |     auto_scaling_group_name: ecs-cluster-nodes
21 |     step: 1
22 |     idle_time: 240
23 |     max_task_count: [10, 25]
24 |     scheduled_min_task_count:
25 |       - {from: "1:45", to: "4:30", count: 8}
26 |     cooldown_time_for_reach_max: 600
27 |     min_task_count: 0
28 |     required_capacity: 0.5
29 |     upscale_triggers:
30 |       - alarm_name: "ECS [repro-api-production] CPUUtilization"
31 |         state: ALARM
32 |       - alarm_name: "ELB repro-api-a HTTPCode_Backend_5XX"
33 |         state: ALARM
34 |         step: 2
35 |     downscale_triggers:
36 |       - alarm_name: "ECS [repro-api-production] CPUUtilization (low)"
37 |         state: OK
38 | 
39 |   - name: repro-worker-production
40 |     cluster: ecs-cluster-for-worker
41 |     region: ap-northeast-1
42 |     spot_fleet_request_id: sfr-354de735-2c17-4565-88c9-10ada5b957e5
43 |     step: 1
44 |     idle_time: 240
45 |     cooldown_time_for_reach_max: 600
46 |     min_task_count: 0
47 |     required_capacity: 2
48 |     upscale_triggers:
49 |       - alarm_name: "ECS [repro-worker-production] CPUUtilization"
50 |         state: ALARM
51 |     downscale_triggers:
52 |       - alarm_name: "ECS [repro-worker-production] CPUUtilization (low)"
53 |         state: OK
54 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | require "bundler/setup"
 2 | require "ecs_deploy"
 3 | 
 4 | RSpec.configure do |config|
 5 |   # Enable flags like --only-failures and --next-failure
 6 |   config.example_status_persistence_file_path = ".rspec_status"
 7 | 
 8 |   # Disable RSpec exposing methods globally on `Module` and `main`
 9 |   config.disable_monkey_patching!
10 | 
11 |   config.expect_with :rspec do |c|
12 |     c.syntax = :expect
13 |   end
14 | end
15 | 


--------------------------------------------------------------------------------