├── clarify.toml ├── .gitignore ├── NOTICE ├── updater ├── .dockerignore ├── go.mod ├── main_test.go ├── go.sum ├── mock_test.go ├── main.go ├── aws.go └── aws_test.go ├── .golangci.yaml ├── CODE_OF_CONDUCT.md ├── .github ├── ISSUE_TEMPLATE │ └── feature.md ├── workflows │ ├── cfn-lint.yml │ └── ci.yml ├── dependabot.yaml └── pull_request_template.md ├── COPYRIGHT ├── integ ├── common.sh ├── README.md ├── stacks │ ├── cluster.yaml │ └── integ-shared.yaml ├── setup.sh ├── run-updater.sh └── cleanup.sh ├── LICENSE-MIT ├── Dockerfile ├── Dockerfile.licenses ├── CHANGELOG.md ├── Makefile ├── CONTRIBUTING.md ├── LICENSE-APACHE ├── stacks └── bottlerocket-ecs-updater.yaml └── README.md /clarify.toml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.env 2 | /.idea 3 | /updater/bin 4 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /updater/.dockerignore: -------------------------------------------------------------------------------- 1 | *.tar* 2 | bin/ 3 | vendor/ 4 | bottlerocket-ecs-updater 5 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | linters: 2 | enable: 3 | - staticcheck 4 | - unconvert 5 | - goimports 6 | - revive 7 | - ineffassign 8 | - vet 9 | - unused 10 | - misspell 11 | disable: 12 | - errcheck 13 | 14 | run: 15 | timeout: 3m 16 | 17 | issues: 18 | exclude-dirs: 19 | - stacks 20 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Request a change to to the project 4 | --- 5 | 6 | 11 | 12 | **What I'd like:** 13 | 14 | 15 | 16 | **Any alternatives you've considered:** 17 | 18 | -------------------------------------------------------------------------------- /.github/workflows/cfn-lint.yml: -------------------------------------------------------------------------------- 1 | name: cfn-lint 2 | on: 3 | push: 4 | paths: 5 | - 'stacks/*' 6 | - 'integ/stacks/*' 7 | pull_request: 8 | paths: 9 | - 'stacks/*' 10 | - 'integ/stacks/*' 11 | jobs: 12 | cfn-lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 16 | - run: pip install cfn-lint 17 | - run: make cfn-lint 18 | -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | 4 | # Maintain dependencies for GitHub Actions 5 | - package-ecosystem: "github-actions" 6 | directory: "/" 7 | schedule: 8 | interval: "weekly" 9 | labels: 10 | - "area/dependencies" 11 | - package-ecosystem: "gomod" 12 | directory: "/" 13 | schedule: 14 | interval: "daily" 15 | labels: 16 | - "area/dependencies" 17 | open-pull-requests-limit: 0 18 | -------------------------------------------------------------------------------- /updater/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/bottlerocket-os/bottlerocket-ecs-updater 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/aws/aws-sdk-go v1.51.20 7 | github.com/stretchr/testify v1.8.1 8 | ) 9 | 10 | replace golang.org/x/net => golang.org/x/net v0.8.0 11 | 12 | require ( 13 | github.com/davecgh/go-spew v1.1.1 // indirect 14 | github.com/jmespath/go-jmespath v0.4.0 // indirect 15 | github.com/pmezard/go-difflib v1.0.0 // indirect 16 | gopkg.in/yaml.v3 v3.0.1 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | **Issue number:** 8 | 9 | 10 | 11 | **Description of changes:** 12 | 13 | 14 | 15 | **Testing done:** 16 | 17 | 18 | 19 | **Terms of contribution:** 20 | 21 | By submitting this pull request, I agree that this contribution is dual-licensed under the terms of both the Apache License, version 2.0, and the MIT license. 22 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc., its affiliates, or other contributors. All Rights Reserved. 2 | 3 | Except as otherwise noted (below and/or in individual files), Bottlerocket is dual-licensed under 4 | the Apache License, version 2.0 or the MIT license , at your option. 5 | 6 | Copyrights in Bottlerocket are retained by their contributors. No copyright assignment is required 7 | to contribute to Bottlerocket. Contributions to Bottlerocket are explicitly made under both the 8 | Apache License, version 2.0, and the MIT license. For full authorship information, see the version 9 | control history. 10 | 11 | Bottlerocket operating system images include packages written by third parties, which may carry 12 | their own copyright notices and license terms. These are available in /usr/share/licenses on the 13 | operating system images. 14 | -------------------------------------------------------------------------------- /integ/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Cloudformation stack template file name to set up VPC, security group, IAM roles, and log group 4 | INTEG_STACK_TEMPLATE="integ-shared.yaml" 5 | 6 | # Cloudformation stack template file name to set up an ECS cluster 7 | CLUSTER_STACK_TEMPLATE="cluster.yaml" 8 | 9 | # The stack name for deploying `integ-shared.yaml` template 10 | INTEG_STACK_NAME="ecs-updater-integ-shared" 11 | 12 | # Prefix for ECS Updater stack name, resulting stack name will be below prefix + cluster name 13 | UPDATER_STACK_PREFIX="UPDATER-" 14 | 15 | log() { 16 | local lvl="$1" 17 | shift 18 | local msg="$*" 19 | echo "${lvl}: ${msg}" >&2 20 | } 21 | 22 | required_arg() { 23 | local arg="${1:?}" 24 | local value="${2}" 25 | if [ -z "${value}" ]; then 26 | echo "ERROR: ${arg} is required" >&2 27 | exit 2 28 | fi 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright Amazon.com, Inc., its affiliates, or other contributors. All Rights Reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | - push 4 | - pull_request 5 | jobs: 6 | golangci: 7 | name: lint 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 11 | with: 12 | go-version: 1.19 13 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 14 | - name: golangci-lint 15 | uses: golangci/golangci-lint-action@55c2c1448f86e01eaae002a5a3a9624417608d84 16 | with: 17 | version: latest 18 | working-directory: updater 19 | build: 20 | name: build 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 24 | with: 25 | go-version: 1.19 26 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 27 | - run: make 28 | test: 29 | name: unit tests 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 33 | with: 34 | go-version: 1.19 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 36 | - run: make test 37 | -------------------------------------------------------------------------------- /updater/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestTaskDefFamily(t *testing.T) { 12 | cases := []struct { 13 | name string 14 | taskDefARN string 15 | expectedErr string 16 | expectedFamily string 17 | }{ 18 | { 19 | name: "success", 20 | taskDefARN: "arn:aws:ecs:us-west-2:1234567:task-definition/updater-family:1", 21 | expectedFamily: "updater-family", 22 | }, 23 | { 24 | name: "fail parse arn", 25 | taskDefARN: "arn:ecs:us-west-2:1234567updater-family:1", 26 | expectedFamily: "", 27 | expectedErr: "arn: not enough sections", 28 | }, 29 | { 30 | name: "fail empty arn", 31 | taskDefARN: "", 32 | expectedFamily: "", 33 | expectedErr: "arn: invalid prefix", 34 | }, 35 | { 36 | name: "fail extract family", 37 | taskDefARN: "arn:aws:ecs:us-west-2:1234567:task-def/updater-family1", 38 | expectedFamily: "", 39 | expectedErr: "not a task definition arn:", 40 | }, 41 | } 42 | for _, tc := range cases { 43 | t.Run(tc.name, func(t *testing.T) { 44 | originalValue := os.Getenv(taskDefARNEnv) 45 | defer func() { os.Setenv(taskDefARNEnv, originalValue) }() 46 | os.Setenv(taskDefARNEnv, tc.taskDefARN) 47 | family, err := taskDefFamily() 48 | if tc.expectedErr == "" { 49 | require.NoError(t, err) 50 | } else { 51 | require.Error(t, err) 52 | assert.Contains(t, err.Error(), tc.expectedErr) 53 | } 54 | assert.Equal(t, tc.expectedFamily, family) 55 | }) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.1.3-experimental 2 | ARG BUILDER_IMAGE 3 | # LICENSES_IMAGE is a container image that contains license files for the source 4 | # and its dependencies. When building with `make container`, the licenses 5 | # container image is built and provided as LICENSE_IMAGE. 6 | ARG LICENSES_IMAGE=scratch 7 | 8 | # build the updater image 9 | FROM ${BUILDER_IMAGE} as builder 10 | USER builder 11 | WORKDIR /wrkdir 12 | ENV GOPROXY=direct 13 | # Sets the target architecture for the binary 14 | ARG GOARCH 15 | ENV OUTPUT_DIR=/wrkdir/target/${GOARCH}/release 16 | COPY ./updater /wrkdir/ 17 | RUN go mod download 18 | RUN CGO_ENABLED=0 go build -v -o ${OUTPUT_DIR}/bottlerocket-ecs-updater . && \ 19 | cp ${OUTPUT_DIR}/bottlerocket-ecs-updater /wrkdir/bottlerocket-ecs-updater 20 | 21 | FROM ${LICENSES_IMAGE} as licenses 22 | # Set WORKDIR to create /licenses/ if the directory is missing. 23 | # 24 | # Having an image with /licenses/ lets scratch be substituted in when 25 | # LICENSES_IMAGE isn't provided. For example, a user can manually run `docker 26 | # build -t neio:latest .` to build a working image without providing an expected 27 | # LICENSES_IMAGE. 28 | WORKDIR /licenses/ 29 | 30 | # create an image with just the binary 31 | FROM scratch 32 | # Copy CA certificates store 33 | COPY --from=public.ecr.aws/amazonlinux/amazonlinux:2 /etc/ssl /etc/ssl 34 | COPY --from=public.ecr.aws/amazonlinux/amazonlinux:2 /etc/pki /etc/pki 35 | COPY --from=builder \ 36 | /wrkdir/bottlerocket-ecs-updater \ 37 | /bottlerocket-ecs-updater 38 | COPY COPYRIGHT LICENSE-* /usr/share/licenses/bottlerocket-ecs-updater/ 39 | COPY --from=licenses /licenses/ /usr/share/licenses/bottlerocket-ecs-updater/vendor/ 40 | ENTRYPOINT ["/bottlerocket-ecs-updater"] 41 | -------------------------------------------------------------------------------- /Dockerfile.licenses: -------------------------------------------------------------------------------- 1 | # This Dockerfile produces an image that has only the licenses of dependencies 2 | # used in the updater. 3 | # 4 | # LICENSE, and other legal notices, are collected by bottlerocket-license-scan 5 | # to be organized into a project-wide conventional directory structure rooted at 6 | # /licenses in the resulting image. 7 | 8 | # SDK_IMAGE is the Bottlerocket SDK container image that provides 9 | # `bottlerocket-license-scan` in it. 10 | ARG SDK_IMAGE 11 | 12 | # GOLANG_IMAGE is the image to be used for collecting modules. This should be 13 | # the same image used in the build. The idea is to have the same toolchain to 14 | # avoid running into any differences between versions. 15 | ARG GOLANG_IMAGE=golang:1.19.1 16 | 17 | # Fetch dependencies into a vendor/ directory. 18 | # 19 | # The first several steps should match that of the build's Dockerfile to share 20 | # the go module package cache. 21 | FROM $GOLANG_IMAGE as src 22 | USER builder 23 | WORKDIR /wrkdir 24 | ENV GOPROXY=direct 25 | # Sets the target architecture for the binary 26 | ARG GOARCH 27 | COPY ./updater /wrkdir/ 28 | RUN go mod download 29 | # Unpack go modules into a vendor/ directory to run scanner on. 30 | RUN go mod vendor 31 | 32 | # Run the license scanner and dump its processed & collected license data to be 33 | # used in distributed container image. 34 | FROM $SDK_IMAGE as license-scan 35 | COPY --from=src /wrkdir/vendor /wrkdir/vendor 36 | COPY clarify.toml /wrkdir/clarify.toml 37 | USER root 38 | RUN bottlerocket-license-scan \ 39 | --spdx-data /usr/libexec/tools/spdx-data \ 40 | --out-dir /out/licenses \ 41 | --clarify /wrkdir/clarify.toml \ 42 | go-vendor /wrkdir/vendor 43 | 44 | # Final container image has LICENSE files and accompanying attributions 45 | # collected and produced by the license scanner. 46 | FROM scratch as licenses 47 | COPY --from=license-scan /out/licenses /licenses -------------------------------------------------------------------------------- /updater/go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-sdk-go v1.51.20 h1:ziM90ujYHKKkoTZL+Wg2LwjbQecL+l298GGJeG4ktZs= 2 | github.com/aws/aws-sdk-go v1.51.20/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 7 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 8 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 9 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 10 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 11 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 12 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 13 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 14 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 15 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 16 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 17 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 18 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 19 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 20 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 21 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 22 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 23 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 24 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 25 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.2.2 2 | 3 | * Bump version of Go to 1.19.2 4 | * Upgrade `github.com/aws/aws-sdk-go` dependency to `v1.44.137` 5 | * Upgrade `github.com/stretchr/testify` dependency to `v1.8.1` 6 | 7 | # 0.2.1 8 | 9 | * Bump version of Go to 1.19.1 and upgrade dependencies 10 | 11 | # 0.2.0 12 | 13 | * Add support for clusters larger than 50 container instances. 14 | * Add after-action summary and done message to log output. 15 | * Add check to reduce the chance of concurrent runs. 16 | 17 | Note: In the Bottlerocket ECS updater v0.1.0 release, support for clusters was limited to 50 container instances. In this release, clusters larger than 50 container instances are now supported. :tada: 18 | 19 | # 0.1.0 20 | 21 | Initial release of the **Bottlerocket ECS updater** - A service to automatically manage Bottlerocket updates in an Amazon ECS cluster. 22 | 23 | The Bottlerocket ECS updater is designed to help you safely automate the routine maintenance of updating the Bottlerocket instances in your cluster. 24 | The updater's safety features include: 25 | 26 | * Only tasks that are part of a [service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) will be interrupted. 27 | Container instances with non-service tasks are skipped for upgrade so no critical workloads will be automatically interrupted. 28 | * Only container instances in the [ACTIVE state](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-draining.html) will be upgrade. 29 | Instances that have been placed into the DRAINING state are skipped for upgrade so other maintenance or debugging can be performed without interruption. 30 | 31 | In this first release of the updater, the following considerations should be kept in mind: 32 | 33 | * Only clusters of up to 50 container instances are supported. 34 | If the updater is configured to target a cluster with more than 50 instances, some instances may not be updated. 35 | * When configuring the provided CloudFormation template, ensure that the CloudWatch log group already exists. 36 | The updater will not automatically create the log group and a missing log group will cause the updater to fail to run. 37 | When creating a log group, you can configure your desired log retention settings. 38 | 39 | See the [README](README.md) for additional information. -------------------------------------------------------------------------------- /integ/README.md: -------------------------------------------------------------------------------- 1 | # Integration tests 2 | 3 | The following integration workflow is how you can 4 | test your changes and verifying that new dependencies didn’t break the updater mechanisms. 5 | It’s also similar to how we verify versions of the ECS Updater, 6 | so it’s useful to go through it when making changes 7 | and should in total take less than 1 hour. 8 | 9 | 1. You’ll want to set up a test ECS cluster. 10 | 11 | Thankfully, this is really easy with the existing integration tests setup script: 12 | https://github.com/bottlerocket-os/bottlerocket-ecs-updater/blob/develop/integ/setup.sh 13 | 14 | ```sh 15 | ./setup.sh --ami-id ami-05d2e4a6b8399095a 16 | ``` 17 | 18 | This script expects the ami-id of a Bottlerocket ECS variant. 19 | This will setup an ECS cluster using the integration CloudFormation stack 20 | and using that Bottlerocket ECS variant as EC2 compute. 21 | 22 | 2. Build an ECS updater image from your changes: 23 | 24 | ``` 25 | # Build the image and tag it as "latest" 26 | make image 27 | 28 | # Verify the image was built and tagged a moment ago 29 | docker images | head -n 10 30 | 31 | # Re-tag the image to wherever you want to land it on your ECR registry 32 | docker tag bottlerocket-ecs-updater:latest \ 33 | .dkr.ecr.us-west-2.amazonaws.com/bottlerocket-ecs-updater:my-test 34 | 35 | # Push it to your ECR registry 36 | docker push \ 37 | .dkr.ecr.us-west-2.amazonaws.com/bottlerocket-ecs-updater:my-test 38 | ``` 39 | 40 | 3. Once your integration ECS cluster is up and you’ve built/pushed a new image, 41 | you can execute the run-updater script to actually do the integration tests! 42 | 43 | Note that you need to provide the image URL of the new image you just built. 44 | This is the actual image that gets deployed as a fargate task! 45 | 46 | ``` 47 | ./run-updater.sh \ 48 | --cluster ecs-updater-integ-cluster \ 49 | --updater-image .dkr.ecr.us-west-2.amazonaws.com/bottlerocket-ecs-updater:my-test 50 | ``` 51 | 52 | 4. Cleanup is also easy! There’s a script for that as well: 53 | 54 | ``` 55 | ./cleanup.sh --cluster ecs-updater-integ-cluster 56 | ``` 57 | 58 | This tears down the ECS cluster by name releasing any artifacts from the integration tests. 59 | 60 | In all, the total process takes well under an hour. ECS clusters spin up and down very quickly. 61 | -------------------------------------------------------------------------------- /integ/stacks/cluster.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: 2010-09-09 2 | Description: 'ECS Updater Integration Test Cluster' 3 | Parameters: 4 | IntegSharedResourceStack: 5 | Type: String 6 | Description: 'Name of the CloudFormation stack that sets up the shared resource for testing.' 7 | ImageID: 8 | Description: 'Bottlerocket `aws-ecs-1` variant image id' 9 | Type: AWS::EC2::Image::Id 10 | InstanceCount: 11 | Description: 'Desired number of Bottlerocket instances in cluster' 12 | Default: 10 13 | Type: Number 14 | InstanceType: 15 | Type: String 16 | Default: m5.xlarge 17 | Description: 'Instance type for the instances' 18 | Resources: 19 | Cluster: 20 | Type: AWS::ECS::Cluster 21 | Properties: 22 | ClusterName: !Sub '${AWS::StackName}' 23 | LaunchTemplate: 24 | Type: AWS::EC2::LaunchTemplate 25 | Properties: 26 | LaunchTemplateData: 27 | IamInstanceProfile: 28 | Name: 29 | Fn::ImportValue: 30 | !Sub "${IntegSharedResourceStack}:EcsInstanceProfile" 31 | ImageId: !Ref ImageID 32 | InstanceType: !Ref InstanceType 33 | SecurityGroupIds: 34 | - Fn::ImportValue: 35 | !Sub "${IntegSharedResourceStack}:SecurityGroupID" 36 | TagSpecifications: 37 | - ResourceType: instance 38 | Tags: 39 | - Key: "Name" 40 | Value: !Sub "${AWS::StackName}-instance" 41 | UserData: 42 | Fn::Base64: 43 | !Sub | 44 | [settings.ecs] 45 | cluster = "${AWS::StackName}" 46 | AutoScalingGroup: 47 | Type: AWS::AutoScaling::AutoScalingGroup 48 | Properties: 49 | MinSize: !Ref InstanceCount 50 | MaxSize: "50" 51 | DesiredCapacity: !Ref InstanceCount 52 | VPCZoneIdentifier: 53 | Fn::Split: 54 | - "," 55 | - Fn::ImportValue: 56 | !Sub "${IntegSharedResourceStack}:PublicSubnets" 57 | MixedInstancesPolicy: 58 | InstancesDistribution: 59 | OnDemandBaseCapacity: !Ref InstanceCount 60 | LaunchTemplate: 61 | LaunchTemplateSpecification: 62 | LaunchTemplateId: !Ref LaunchTemplate 63 | Version: !GetAtt LaunchTemplate.LatestVersionNumber 64 | Outputs: 65 | AutoScalingGroupName: 66 | Description: 'Auto scaling group name' 67 | Value: !Ref AutoScalingGroup 68 | Export: 69 | Name: !Sub "${AWS::StackName}:AutoScalingGroup" 70 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SHELL is set as bash to use some bashisms. 2 | SHELL = bash 3 | 4 | BOTTLEROCKET_SDK_VERSION = v0.37.0 5 | BOTTLEROCKET_SDK_ARCH = x86_64 6 | UPDATER_TARGET_ARCH = amd64 7 | 8 | # the docker image that will be used to compile go code 9 | BUILDER_IMAGE = public.ecr.aws/bottlerocket/bottlerocket-sdk-${BOTTLEROCKET_SDK_ARCH}:${BOTTLEROCKET_SDK_VERSION} 10 | 11 | # IMAGE_NAME is the full name of the container being built 12 | IMAGE_NAME = bottlerocket-ecs-updater:latest 13 | # LICENSES_IMAGE is the name of the container image that has LICENSE files 14 | # for distribution. 15 | LICENSES_IMAGE = $(IMAGE_NAME)-licenses 16 | 17 | SOURCEDIR=./updater 18 | SOURCES := $(shell find $(SOURCEDIR) -name '*.go') 19 | export GO111MODULE=on 20 | export DOCKER_BUILDKIT=1 21 | 22 | all: build 23 | 24 | .PHONY: tidy 25 | tidy: 26 | cd updater && go mod tidy 27 | 28 | .PHONY: build # builds updater 29 | build: updater/bin/bottlerocket-ecs-updater 30 | updater/bin/bottlerocket-ecs-updater: $(SOURCES) updater/go.mod updater/go.sum 31 | GOARCH=$(UPDATER_TARGET_ARCH) 32 | cd updater && go build -v -o bin/bottlerocket-ecs-updater . 33 | 34 | .PHONY: test 35 | test: 36 | cd updater && go test -v ./... 37 | 38 | .PHONY: image # creates a docker image with the updater binary 39 | image: licenses 40 | docker build \ 41 | --tag '$(IMAGE_NAME)' \ 42 | --build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \ 43 | --build-arg GOARCH=$(UPDATER_TARGET_ARCH) \ 44 | --build-arg LICENSES_IMAGE=$(LICENSES_IMAGE) \ 45 | . 46 | 47 | .PHONY: licenses 48 | licenses: 49 | docker build \ 50 | --tag '$(LICENSES_IMAGE)' \ 51 | --build-arg SDK_IMAGE=$(BUILDER_IMAGE) \ 52 | --build-arg GOLANG_IMAGE=$(BUILDER_IMAGE) \ 53 | --build-arg GOARCH=$(UPDATER_TARGET_ARCH) \ 54 | -f Dockerfile.licenses \ 55 | . 56 | 57 | .PHONY: lint 58 | lint: golang-lint cfn-lint 59 | 60 | .PHONY: golang-lint 61 | golang-lint: 62 | cd updater; golangci-lint run 63 | 64 | .PHONY: cfn-lint 65 | cfn-lint: 66 | cfn-lint ./stacks/bottlerocket-ecs-updater.yaml 67 | cfn-lint ./integ/stacks/integ-shared.yaml 68 | cfn-lint ./integ/stacks/cluster.yaml 69 | 70 | # Check that the container has LICENSE files included for its dependencies. 71 | .PHONY: check-licenses 72 | check-licenses: CHECK_CONTAINER_NAME=check-licenses-bottlerocket-ecs-updater 73 | check-licenses: 74 | @echo "Running check: $@" 75 | @-if docker inspect $(CHECK_CONTAINER_NAME) &>/dev/null; then\ 76 | docker rm $(CHECK_CONTAINER_NAME) &>/dev/null; \ 77 | fi 78 | @docker create --name $(CHECK_CONTAINER_NAME) $(IMAGE_NAME) >/dev/null 2>&1 79 | @echo "Checking if container image included dependencies' LICENSE files..." 80 | @docker export $(CHECK_CONTAINER_NAME) | tar -tf - \ 81 | | grep usr/share/licenses/bottlerocket-ecs-updater/vendor \ 82 | | grep -q LICENSE || { \ 83 | echo "Container image is missing required LICENSE files (checked $(IMAGE_NAME))"; \ 84 | docker rm $(CHECK_CONTAINER_NAME) &>/dev/null; \ 85 | exit 1; \ 86 | } 87 | @-docker rm $(CHECK_CONTAINER_NAME) 88 | 89 | clean: 90 | -rm -rf updater/bin 91 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE-APACHE](LICENSE-APACHE) or [LICENSE-MIT](LICENSE-MIT) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /updater/mock_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/aws/aws-sdk-go/aws" 5 | "github.com/aws/aws-sdk-go/aws/request" 6 | "github.com/aws/aws-sdk-go/service/ec2" 7 | "github.com/aws/aws-sdk-go/service/ecs" 8 | "github.com/aws/aws-sdk-go/service/ssm" 9 | ) 10 | 11 | type MockECS struct { 12 | ListContainerInstancesPagesFn func(input *ecs.ListContainerInstancesInput, fn func(*ecs.ListContainerInstancesOutput, bool) bool) error 13 | DescribeContainerInstancesFn func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) 14 | UpdateContainerInstancesStateFn func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) 15 | ListTasksFn func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) 16 | DescribeTasksFn func(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) 17 | WaitUntilTasksStoppedWithContextFn func(ctx aws.Context, input *ecs.DescribeTasksInput, opts ...request.WaiterOption) error 18 | } 19 | 20 | var _ ECSAPI = (*MockECS)(nil) 21 | 22 | type MockSSM struct { 23 | // WaitUntilCommandExecutedWithContextFn is executed concurrently through 24 | // ECS code paths and tests should treat any data in a parallel safe manner 25 | WaitUntilCommandExecutedWithContextFn func(ctx aws.Context, input *ssm.GetCommandInvocationInput, opts ...request.WaiterOption) error 26 | SendCommandFn func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) 27 | GetCommandInvocationFn func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) 28 | } 29 | 30 | var _ SSMAPI = (*MockSSM)(nil) 31 | 32 | type MockEC2 struct { 33 | WaitUntilInstanceStatusOkFn func(input *ec2.DescribeInstanceStatusInput) error 34 | } 35 | 36 | var _ EC2API = (*MockEC2)(nil) 37 | 38 | func (m MockECS) ListContainerInstancesPages(input *ecs.ListContainerInstancesInput, fn func(*ecs.ListContainerInstancesOutput, bool) bool) error { 39 | return m.ListContainerInstancesPagesFn(input, fn) 40 | } 41 | 42 | func (m MockECS) DescribeContainerInstances(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) { 43 | return m.DescribeContainerInstancesFn(input) 44 | } 45 | 46 | func (m MockECS) UpdateContainerInstancesState(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) { 47 | return m.UpdateContainerInstancesStateFn(input) 48 | } 49 | 50 | func (m MockECS) ListTasks(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 51 | return m.ListTasksFn(input) 52 | } 53 | 54 | func (m MockECS) DescribeTasks(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) { 55 | return m.DescribeTasksFn(input) 56 | } 57 | 58 | func (m MockECS) WaitUntilTasksStoppedWithContext(ctx aws.Context, input *ecs.DescribeTasksInput, opts ...request.WaiterOption) error { 59 | return m.WaitUntilTasksStoppedWithContextFn(ctx, input, opts...) 60 | } 61 | 62 | func (m MockSSM) SendCommand(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 63 | return m.SendCommandFn(input) 64 | } 65 | 66 | func (m MockSSM) WaitUntilCommandExecutedWithContext(ctx aws.Context, input *ssm.GetCommandInvocationInput, opts ...request.WaiterOption) error { 67 | return m.WaitUntilCommandExecutedWithContextFn(ctx, input, opts...) 68 | } 69 | 70 | func (m MockSSM) GetCommandInvocation(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 71 | return m.GetCommandInvocationFn(input) 72 | } 73 | 74 | func (c MockEC2) WaitUntilInstanceStatusOk(input *ec2.DescribeInstanceStatusInput) error { 75 | return c.WaitUntilInstanceStatusOkFn(input) 76 | } 77 | -------------------------------------------------------------------------------- /integ/setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | THISDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | 5 | source "${THISDIR}/common.sh" 6 | 7 | # Default ECS cluster name 8 | DEFAULT_CLUSTER_NAME="ecs-updater-integ-cluster" 9 | 10 | # Default number of instances to launch in the cluster 11 | DEFAULT_INSTANCE_COUNT=10 12 | 13 | # Default instance type for instances in the cluster 14 | DEFAULT_INSTANCE_TYPE="m5.xlarge" 15 | 16 | # Helper functions 17 | usage() { 18 | cat >&2 <&2 64 | usage 65 | exit 2 66 | ;; 67 | esac 68 | shift 69 | done 70 | 71 | INSTANCE_TYPE="${INSTANCE_TYPE:-$DEFAULT_INSTANCE_TYPE}" 72 | INSTANCE_COUNT="${INSTANCE_COUNT:-$DEFAULT_INSTANCE_COUNT}" 73 | CLUSTER_STACK_NAME="${CLUSTER_STACK_NAME:-$DEFAULT_CLUSTER_NAME}" 74 | 75 | # Required arguments 76 | required_arg "--ami-id" "${AMI_ID}" 77 | } 78 | 79 | # Initial setup and checks 80 | parse_args "${@}" 81 | 82 | # deploy stack to create integ resources 83 | log INFO "Deploying stack template '${INTEG_STACK_TEMPLATE}'" 84 | if ! aws cloudformation deploy \ 85 | --stack-name "${INTEG_STACK_NAME}" \ 86 | --template-file "${THISDIR}/stacks/${INTEG_STACK_TEMPLATE}" \ 87 | --capabilities CAPABILITY_NAMED_IAM; then 88 | log ERROR "Failed to deploy '${INTEG_STACK_TEMPLATE}' stack template" 89 | exit 1 90 | fi 91 | log INFO "Stack template '${INTEG_STACK_TEMPLATE}' deployed with name '${INTEG_STACK_NAME}'" 92 | 93 | # deploy stack to start ecs cluster using auto-scaling group 94 | log INFO "Deploying stack template '${CLUSTER_STACK_TEMPLATE}' to set up an ECS cluster" 95 | if ! aws cloudformation deploy \ 96 | --stack-name "${CLUSTER_STACK_NAME}" \ 97 | --template-file "${THISDIR}/stacks/${CLUSTER_STACK_TEMPLATE}" \ 98 | --capabilities CAPABILITY_NAMED_IAM \ 99 | --parameter-overrides \ 100 | IntegSharedResourceStack="${INTEG_STACK_NAME}" \ 101 | InstanceCount="${INSTANCE_COUNT}" \ 102 | ImageID="${AMI_ID}" \ 103 | InstanceType="${INSTANCE_TYPE}"; then 104 | log ERROR "Failed to deploy stack '${CLUSTER_STACK_TEMPLATE}' stack template" 105 | exit 1 106 | fi 107 | log INFO "ECS cluster '${CLUSTER_STACK_NAME}' with '${INSTANCE_COUNT}' instances and instance type '${INSTANCE_TYPE}' created!" 108 | -------------------------------------------------------------------------------- /integ/run-updater.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | THISDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | 5 | source "${THISDIR}/common.sh" 6 | 7 | # Helper functions 8 | usage() { 9 | cat >&2 <&2 40 | usage 41 | exit 2 42 | ;; 43 | esac 44 | shift 45 | done 46 | 47 | UPDATER_STACK_NAME="${UPDATER_STACK_PREFIX}${CLUSTER}" 48 | 49 | # Required arguments 50 | required_arg "--cluster" "${CLUSTER}" 51 | required_arg "--updater-image" "${UPDATER_IMAGE}" 52 | } 53 | 54 | # Initial setup and checks 55 | parse_args "${@}" 56 | 57 | log INFO "Extracting output resource id's from '${INTEG_STACK_NAME}' stack" 58 | if ! integ_resources=$(aws cloudformation describe-stacks \ 59 | --stack-name "${INTEG_STACK_NAME}" \ 60 | --output json \ 61 | --query 'Stacks[].Outputs[]'); then 62 | log ERROR "Failed to get outputs from '${INTEG_STACK_NAME}' stack" 63 | exit 1 64 | fi 65 | 66 | # Get Subnets 67 | if ! subnets=$(echo "${integ_resources}" | jq --raw-output '.[] | select(.OutputKey == "PublicSubnets") | .OutputValue'); then 68 | log ERROR "Failed to extract list of subnets from '${INTEG_STACK_NAME}' stack outputs" 69 | exit 1 70 | fi 71 | log INFO "Subnets are '${subnets}'" 72 | # check the data to make sure its usable in our context 73 | if [[ "${#subnets[@]}" -lt 1 ]]; then 74 | log ERROR "No usable subnets" 75 | exit 1 76 | fi 77 | 78 | # Get LogGroupName 79 | if ! log_group=$(echo "${integ_resources}" | jq --raw-output '.[] | select(.OutputKey == "LogGroupName") | .OutputValue'); then 80 | log ERROR "Failed to extract LogGroup name from '${INTEG_STACK_NAME}' stack outputs" 81 | exit 1 82 | fi 83 | log INFO "LogGroup name is '${log_group}'" 84 | 85 | # Get LogGroupName 86 | if ! security_grp=$(echo "${integ_resources}" | jq --raw-output '.[] | select(.OutputKey == "SecurityGroupID") | .OutputValue'); then 87 | log ERROR "Failed to extract security group id from '${INTEG_STACK_NAME}' stack outputs" 88 | exit 1 89 | fi 90 | log INFO "Security group id is '${security_grp}'" 91 | 92 | # start updater on cluster 93 | log INFO "Deploying ECS updater stack on cluster '${CLUSTER}' with cron event rule disabled" 94 | if ! aws cloudformation deploy \ 95 | --stack-name "${UPDATER_STACK_NAME}" \ 96 | --template-file "${THISDIR}/../stacks/bottlerocket-ecs-updater.yaml" \ 97 | --capabilities CAPABILITY_NAMED_IAM \ 98 | --parameter-overrides \ 99 | ClusterName="${CLUSTER}" \ 100 | Subnets="${subnets}" \ 101 | UpdaterImage="${UPDATER_IMAGE}" \ 102 | LogGroupName="${log_group}" \ 103 | ScheduleState="DISABLED"; then 104 | log ERROR "Failed to deploy Bottlerocket ECS updater" 105 | exit 1 106 | fi 107 | 108 | log INFO "Extracting updater task definition arn from '${UPDATER_STACK_NAME}' stack" 109 | if ! output=$(aws cloudformation describe-stacks \ 110 | --stack-name "${UPDATER_STACK_NAME}" \ 111 | --output json \ 112 | --query 'Stacks[].Outputs[]'); then 113 | log ERROR "Failed to get outputs from '${UPDATER_STACK_NAME}' stack" 114 | exit 1 115 | fi 116 | 117 | if ! task_def=$(echo "${output}" | jq --raw-output '.[] | select(.OutputKey == "UpdaterTaskDefinitionArn") | .OutputValue'); then 118 | log ERROR "Failed to extract updater task definition arn from '${UPDATER_STACK_NAME}' stack outputs" 119 | exit 1 120 | fi 121 | 122 | log INFO "Starting ECS updater task on cluster '${CLUSTER}'" 123 | if ! aws ecs run-task \ 124 | --cluster "${CLUSTER}" \ 125 | --task-definition "${task_def}" \ 126 | --launch-type "FARGATE" \ 127 | --network-configuration="awsvpcConfiguration={subnets=[${subnets}],securityGroups=${security_grp},assignPublicIp=ENABLED}"; then 128 | log ERROR "Failed to start updater task '${task_def}'" 129 | exit 1 130 | fi 131 | 132 | log INFO "ECS updater is running on cluster '${CLUSTER}'. Check logs in Cloudwatch LogGroup '${log_group}'" 133 | -------------------------------------------------------------------------------- /integ/stacks/integ-shared.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: "2010-09-09" 2 | Description: Bottlerocket ECS updater integration tests shared resources 3 | Resources: 4 | VPC: 5 | Type: AWS::EC2::VPC 6 | Properties: 7 | EnableDnsSupport: true 8 | EnableDnsHostnames: true 9 | CidrBlock: 10.0.0.0/16 10 | Tags: 11 | - Key: Name 12 | Value: ECSUpdaterInteg 13 | SubnetA: 14 | Type: AWS::EC2::Subnet 15 | Properties: 16 | VpcId: !Ref VPC 17 | CidrBlock: 10.0.5.0/24 18 | AvailabilityZone: !Select 19 | - 0 20 | - !GetAZs 21 | Ref: 'AWS::Region' 22 | MapPublicIpOnLaunch: true 23 | Tags: 24 | - Key: Name 25 | Value: ECSUpdaterIntegSubnetA 26 | SubnetB: 27 | Type: AWS::EC2::Subnet 28 | Properties: 29 | VpcId: !Ref VPC 30 | CidrBlock: 10.0.6.0/24 31 | AvailabilityZone: !Select 32 | - 1 33 | - !GetAZs 34 | Ref: 'AWS::Region' 35 | MapPublicIpOnLaunch: true 36 | Tags: 37 | - Key: Name 38 | Value: ECSUpdaterIntegSubnetB 39 | SubnetC: 40 | Type: AWS::EC2::Subnet 41 | Properties: 42 | VpcId: !Ref VPC 43 | CidrBlock: 10.0.7.0/24 44 | AvailabilityZone: !Select 45 | - 2 46 | - !GetAZs 47 | Ref: 'AWS::Region' 48 | MapPublicIpOnLaunch: true 49 | Tags: 50 | - Key: Name 51 | Value: ECSUpdaterIntegSubnetC 52 | SecurityGroup: 53 | Type: AWS::EC2::SecurityGroup 54 | Properties: 55 | GroupDescription: Security Group for ECS Updater Task 56 | VpcId: !Ref VPC 57 | Tags: 58 | - Key: Name 59 | Value: ECSUpdaterInteg 60 | InternetGateway: 61 | Type: AWS::EC2::InternetGateway 62 | Properties: 63 | Tags: 64 | - Key: Name 65 | Value: ECSUpdaterInteg 66 | GatewayAttachement: 67 | Type: AWS::EC2::VPCGatewayAttachment 68 | Properties: 69 | VpcId: !Ref VPC 70 | InternetGatewayId: !Ref InternetGateway 71 | RouteTable: 72 | Type: AWS::EC2::RouteTable 73 | Properties: 74 | VpcId: !Ref VPC 75 | Tags: 76 | - Key: Name 77 | Value: ECSUpdaterInteg 78 | DefaultRoute: 79 | Type: AWS::EC2::Route 80 | Properties: 81 | RouteTableId: !Ref RouteTable 82 | DestinationCidrBlock: 0.0.0.0/0 83 | GatewayId: !Ref InternetGateway 84 | PublicSubnetARouteTableAssociation: 85 | Type: AWS::EC2::SubnetRouteTableAssociation 86 | Properties: 87 | SubnetId: !Ref SubnetA 88 | RouteTableId: !Ref RouteTable 89 | PublicSubnetBRouteTableAssociation: 90 | Type: AWS::EC2::SubnetRouteTableAssociation 91 | Properties: 92 | SubnetId: !Ref SubnetB 93 | RouteTableId: !Ref RouteTable 94 | PublicSubnetCRouteTableAssociation: 95 | Type: AWS::EC2::SubnetRouteTableAssociation 96 | Properties: 97 | SubnetId: !Ref SubnetC 98 | RouteTableId: !Ref RouteTable 99 | EcsInstanceRole: 100 | Type: AWS::IAM::Role 101 | Properties: 102 | Description: 'Role for Bottlerocket container instances' 103 | Path: !Sub '/bottlerocket/ecs-updater-integ/${AWS::StackName}/' 104 | AssumeRolePolicyDocument: 105 | Version: 2012-10-17 106 | Statement: 107 | - Effect: Allow 108 | Principal: 109 | Service: 'ec2.amazonaws.com' 110 | Action: 111 | - 'sts:AssumeRole' 112 | ManagedPolicyArns: 113 | - 'arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role' 114 | - 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore' 115 | EcsInstanceProfile: 116 | Type: AWS::IAM::InstanceProfile 117 | Properties: 118 | InstanceProfileName: !Ref EcsInstanceRole 119 | Path: !Sub '/bottlerocket/ecs-updater-integ/${AWS::StackName}/' 120 | Roles: 121 | - !Ref EcsInstanceRole 122 | LogGroup: 123 | Type: AWS::Logs::LogGroup 124 | Properties: 125 | RetentionInDays: 60 126 | LogGroupName: 'bottlerocket-ecs-updater-integ' 127 | Outputs: 128 | PublicSubnets: 129 | Description: 'List of Subnets' 130 | Value: !Join [ ",", [ !Ref SubnetA, !Ref SubnetB, !Ref SubnetC ] ] 131 | Export: 132 | Name: !Sub "${AWS::StackName}:PublicSubnets" 133 | SecurityGroupID: 134 | Description: 'Security group ID' 135 | Value: !GetAtt SecurityGroup.GroupId 136 | Export: 137 | Name: !Sub "${AWS::StackName}:SecurityGroupID" 138 | InstanceProfile: 139 | Description: 'Security group ID' 140 | Value: !Ref EcsInstanceProfile 141 | Export: 142 | Name: !Sub "${AWS::StackName}:EcsInstanceProfile" 143 | LogGroupName: 144 | Description: 'Cloudwatch log group' 145 | Value: !Ref LogGroup 146 | Export: 147 | Name: !Sub "${AWS::StackName}:LogGroup" 148 | -------------------------------------------------------------------------------- /integ/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | THISDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | 5 | source "${THISDIR}/common.sh" 6 | 7 | delete_integ=0 8 | 9 | # Helper functions 10 | usage() { 11 | cat >&2 <&2 44 | usage 45 | exit 2 46 | ;; 47 | esac 48 | shift 49 | done 50 | 51 | # Required arguments 52 | required_arg "--cluster" "${CLUSTER}" 53 | } 54 | 55 | delete_stack() { 56 | local stack_name="${1:?}" 57 | log INFO "Deleting Cloudformation stack '${stack_name}'" 58 | if ! aws cloudformation delete-stack \ 59 | --stack-name "${stack_name}"; then 60 | log ERROR "Failed to delete '${stack_name}'" 61 | return 62 | fi 63 | 64 | log INFO "Waiting for Cloudformation stack '${stack_name}' to be deleted" 65 | if ! aws cloudformation wait stack-delete-complete \ 66 | --stack-name "${stack_name}"; then 67 | log ERROR "Failed to wait for ${stack_name} to delete" 68 | aws cloudformation describe-stack-events \ 69 | --stack-name "${stack_name}" 70 | return 71 | fi 72 | log INFO "Cloudformation stack '${stack_name}' deleted!" 73 | } 74 | 75 | delete_services() { 76 | local cluster="${1:?}" 77 | log INFO "Deleting services running on cluster '${cluster}'" 78 | if ! services=$(aws ecs list-services \ 79 | --cluster ecs-updater-integ-cluster \ 80 | --query 'serviceArns[]' \ 81 | --output text); then 82 | log ERROR "Failed to list services in cluster '${cluster}'" 83 | return 84 | fi 85 | 86 | for service in ${services}; do 87 | log INFO "Deleting service '${service}'" 88 | if ! aws ecs delete-service \ 89 | --cluster "${cluster}" \ 90 | --service "${service}" \ 91 | --force >/dev/null; then 92 | log ERROR "Failed to delete service '${service}'" 93 | fi 94 | done 95 | } 96 | 97 | stop_tasks() { 98 | local cluster="${1:?}" 99 | log INFO "Stopping tasks running on cluster '${cluster}'" 100 | if ! tasks=$(aws ecs list-tasks \ 101 | --cluster ecs-updater-integ-cluster \ 102 | --query 'taskArns[]' \ 103 | --output text); then 104 | log ERROR "Failed to list tasks in cluster '${cluster}'" 105 | return 106 | fi 107 | 108 | for task in ${tasks}; do 109 | log INFO "Stopping task '${task}'" 110 | if ! aws ecs stop-task \ 111 | --cluster "${cluster}" \ 112 | --task "${task}" >/dev/null; then 113 | log ERROR "Failed to stop task '${task}'" 114 | fi 115 | done 116 | } 117 | 118 | terminate_instances() { 119 | local cluster="${1:?}" 120 | log INFO "Extracting auto-scaling group name from '${cluster}' stack" 121 | if ! output=$(aws cloudformation describe-stacks \ 122 | --stack-name "${cluster}" \ 123 | --output json \ 124 | --query 'Stacks[].Outputs[]'); then 125 | log ERROR "Failed to get outputs from '${cluster}' stack" 126 | return 127 | fi 128 | 129 | if ! auto_scaling_group=$(echo "${output}" | jq --raw-output '.[] | select(.OutputKey == "AutoScalingGroupName") | .OutputValue'); then 130 | log ERROR "Failed to extract auto scaling group name from '${cluster}' stack outputs" 131 | return 132 | fi 133 | 134 | log INFO "Describing auto-scaling group '${auto_scaling_group}' to get instance ids" 135 | if ! instance_ids=$(aws autoscaling describe-auto-scaling-groups \ 136 | --auto-scaling-group-name "${auto_scaling_group}" \ 137 | --query "AutoScalingGroups[].Instances[].InstanceId" \ 138 | --output text); then 139 | log ERROR "Failed to get instance ids from auto scaling group '${auto_scaling_group}'" 140 | return 141 | fi 142 | log INFO "Instances '${instance_ids}' found" 143 | 144 | log INFO "Setting auto scaling group desired count to zero" 145 | if ! aws autoscaling update-auto-scaling-group \ 146 | --auto-scaling-group-name "${auto_scaling_group}" \ 147 | --desired-capacity 0 \ 148 | --min-size 0; then 149 | log ERROR "Failed to change auto scaling group '${auto_scaling_group}' desired count to 0" 150 | return 151 | fi 152 | 153 | for inst_id in ${instance_ids}; do 154 | log INFO "Waiting for instance '${inst_id}' to terminate" 155 | if ! aws ec2 wait instance-terminated \ 156 | --instance-ids "${inst_id}"; then 157 | log ERROR "Failed to terminate instance '${inst_id}'" 158 | fi 159 | done 160 | } 161 | 162 | # Initial setup and checks 163 | parse_args "${@}" 164 | 165 | delete_stack "${UPDATER_STACK_PREFIX}${CLUSTER}" 166 | 167 | terminate_instances "${CLUSTER}" 168 | 169 | delete_services "${CLUSTER}" 170 | 171 | stop_tasks "${CLUSTER}" 172 | 173 | delete_stack "${CLUSTER}" 174 | 175 | if [[ "${delete_integ}" -eq 1 ]]; then 176 | delete_stack "${INTEG_STACK_NAME}" 177 | fi 178 | -------------------------------------------------------------------------------- /updater/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "strings" 10 | "time" 11 | 12 | "github.com/aws/aws-sdk-go/aws" 13 | "github.com/aws/aws-sdk-go/aws/arn" 14 | "github.com/aws/aws-sdk-go/aws/session" 15 | "github.com/aws/aws-sdk-go/service/ec2" 16 | "github.com/aws/aws-sdk-go/service/ecs" 17 | "github.com/aws/aws-sdk-go/service/ssm" 18 | ) 19 | 20 | var ( 21 | flagCluster = flag.String("cluster", "", "The short name or full Amazon Resource Name (ARN) of the cluster in which we will manage Bottlerocket instances.") 22 | flagRegion = flag.String("region", "", "The AWS Region in which cluster is running.") 23 | flagCheck = flag.String("check-document", "", "The SSM document name for checking available updates.") 24 | flagApply = flag.String("apply-document", "", "The SSM document name for applying updates.") 25 | flagReboot = flag.String("reboot-document", "", "The SSM document name to initiate a reboot.") 26 | ) 27 | 28 | const taskDefARNEnv = "TASK_DEFINITION_ARN" 29 | 30 | type updater struct { 31 | cluster string 32 | checkDocument string 33 | applyDocument string 34 | rebootDocument string 35 | ecs ECSAPI 36 | ssm SSMAPI 37 | ec2 EC2API 38 | } 39 | 40 | func main() { 41 | if err := _main(); err != nil { 42 | log.Println(err.Error()) 43 | os.Exit(1) 44 | } 45 | } 46 | 47 | func _main() error { 48 | flag.Parse() 49 | switch { 50 | case *flagCluster == "": 51 | flag.Usage() 52 | return errors.New("cluster is required") 53 | case *flagRegion == "": 54 | flag.Usage() 55 | return errors.New("region is required") 56 | case *flagCheck == "": 57 | flag.Usage() 58 | return errors.New("check-document is required") 59 | case *flagApply == "": 60 | flag.Usage() 61 | return errors.New("apply-document is required") 62 | case *flagReboot == "": 63 | flag.Usage() 64 | return errors.New("reboot-document is required") 65 | } 66 | 67 | sess := session.Must(session.NewSession(&aws.Config{ 68 | Region: aws.String(*flagRegion), 69 | })) 70 | 71 | u := &updater{ 72 | cluster: *flagCluster, 73 | checkDocument: *flagCheck, 74 | applyDocument: *flagApply, 75 | rebootDocument: *flagReboot, 76 | ecs: ecs.New(sess, aws.NewConfig()), 77 | ssm: ssm.New(sess, aws.NewConfig()), 78 | ec2: ec2.New(sess, aws.NewConfig()), 79 | } 80 | 81 | family, err := taskDefFamily() 82 | if err != nil { 83 | log.Printf("Failed to parse updater task definition arn: %v", err) 84 | log.Printf("Ignoring check for already running updater") 85 | } else { 86 | ok, err := u.alreadyRunning(family) 87 | if err != nil { 88 | return fmt.Errorf("Cannot determine running updater tasks, therefore stopping this run to avoid risk of multiple runs: %w", err) 89 | } 90 | if ok { 91 | log.Printf("Another updater is running, therefore exiting this run.") 92 | return nil 93 | } 94 | } 95 | 96 | listedInstances, err := u.listContainerInstances() 97 | if err != nil { 98 | return fmt.Errorf("Failed to get container instances in cluster %q: %w", u.cluster, err) 99 | } 100 | if len(listedInstances) == 0 { 101 | log.Print("Zero instances in the cluster") 102 | return nil 103 | } 104 | 105 | bottlerocketInstances, err := u.filterBottlerocketInstances(listedInstances) 106 | if err != nil { 107 | return fmt.Errorf("Failed to filter Bottlerocket instances: %w", err) 108 | } 109 | 110 | if len(bottlerocketInstances) == 0 { 111 | log.Printf("No Bottlerocket instances detected") 112 | return nil 113 | } 114 | candidates, err := u.filterAvailableUpdates(bottlerocketInstances) 115 | if err != nil { 116 | return fmt.Errorf("Failed to check updates: %w", err) 117 | } 118 | if len(candidates) == 0 { 119 | log.Printf("No instances to update") 120 | return nil 121 | } 122 | log.Printf("Instances ready for update: %#q", candidates) 123 | 124 | summary := make(map[string]string) 125 | for _, i := range candidates { 126 | eligible, err := u.eligible(i.containerInstanceID) 127 | if err != nil { 128 | log.Printf("Failed to determine eligibility for update of instance %#q: %v", i, err) 129 | summary[i.instanceID] = fmt.Sprintf("Failed to determine eligibility for update: %v", err) 130 | continue 131 | } 132 | if !eligible { 133 | log.Printf("Instance %#q is not eligible for updates because it contains non-service task", i) 134 | summary[i.instanceID] = "Instance is not eligible for updates because it contains non-service task(s)" 135 | continue 136 | } 137 | log.Printf("Instance %q is eligible for update", i) 138 | 139 | err = u.drainInstance(i.containerInstanceID) 140 | if err != nil { 141 | log.Printf("Failed to drain instance %#q: %v", i, err) 142 | summary[i.instanceID] = fmt.Sprintf("Failed to drain: %v", err) 143 | continue 144 | } 145 | log.Printf("Instance %#q successfully drained!", i) 146 | 147 | updateErr := u.updateInstance(i) 148 | activateErr := u.activateInstance(i.containerInstanceID) 149 | if updateErr != nil && activateErr != nil { 150 | log.Printf("Failed to update instance %#q: %v", i, updateErr) 151 | return fmt.Errorf("instance %#q failed to re-activate after failing to update: %w", i, activateErr) 152 | } else if updateErr != nil { 153 | log.Printf("Failed to update instance %#q: %v", i, updateErr) 154 | summary[i.instanceID] = fmt.Sprintf("Failed to update: %v", updateErr) 155 | continue 156 | } else if activateErr != nil { 157 | return fmt.Errorf("instance %#q failed to re-activate after update: %w", i, activateErr) 158 | } 159 | 160 | // Reboots are not immediate, and initiating an SSM command races with reboot. Add some 161 | // sleep time to allow the reboot to progress before we verify update. 162 | time.Sleep(20 * time.Second) 163 | ok, err := u.verifyUpdate(i) 164 | if err != nil { 165 | log.Printf("Failed to verify update for instance %#q: %v", i, err) 166 | } 167 | if !ok { 168 | log.Printf("Update failed for instance %#q", i) 169 | summary[i.instanceID] = "Update failed" 170 | } else { 171 | log.Printf("Instance %#q updated successfully!", i) 172 | summary[i.instanceID] = "Instance updated successfully" 173 | } 174 | } 175 | log.Printf("After action summary:") 176 | for k, v := range summary { 177 | log.Printf("%s: %s", k, v) 178 | } 179 | log.Printf("Update operations complete!") 180 | return nil 181 | } 182 | 183 | func taskDefFamily() (string, error) { 184 | taskDefInput := os.Getenv(taskDefARNEnv) 185 | taskDefARN, err := arn.Parse(taskDefInput) 186 | if err != nil { 187 | return "", err 188 | } 189 | const taskDefPrefix = "task-definition/" 190 | if !strings.Contains(taskDefARN.Resource, taskDefPrefix) { 191 | return "", fmt.Errorf("not a task definition arn: %q", taskDefInput) 192 | } 193 | // extract task definition family from resource: task-definition/: 194 | taskDef := strings.TrimPrefix(taskDefARN.Resource, taskDefPrefix) 195 | family := strings.SplitN(taskDef, ":", 2)[0] 196 | log.Printf("Updater task definition family: %q", family) 197 | return family, nil 198 | } 199 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /stacks/bottlerocket-ecs-updater.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: 'Bottlerocket ECS updater automation & resources' 3 | Parameters: 4 | ClusterName: 5 | Description: 'Name of ECS cluster to manage Bottlerocket instances in' 6 | Type: String 7 | Subnets: 8 | Description: 'List of VPC Subnet IDs where the updater should run. The subnets must have a route to the Internet via an Internet Gateway.' 9 | Type: List 10 | UpdaterImage: 11 | Description: 'Bottlerocket updater container image' 12 | Type: String 13 | Default: 'public.ecr.aws/bottlerocket/bottlerocket-ecs-updater:v0.2.2' 14 | LogGroupName: 15 | Description: 'Log group name for Bottlerocket updater logs' 16 | Type: String 17 | ScheduleState: 18 | Description: 'Schedule events rule state; allows disabling of scheduling' 19 | Type: String 20 | Default: 'ENABLED' 21 | Resources: 22 | ExecutionRole: 23 | Type: 'AWS::IAM::Role' 24 | Properties: 25 | AssumeRolePolicyDocument: 26 | Version: '2012-10-17' 27 | Statement: 28 | - Effect: Allow 29 | Principal: 30 | Service: 31 | - 'ecs-tasks.amazonaws.com' 32 | Action: 33 | - 'sts:AssumeRole' 34 | Policies: 35 | - PolicyName: CreateLogGroupPolicy 36 | PolicyDocument: 37 | Version: '2012-10-17' 38 | Statement: 39 | # Allows creating log group if it does not exist 40 | - Effect: Allow 41 | Action: 42 | - 'logs:CreateLogGroup' 43 | Resource: 44 | - 'arn:aws:logs:*:*:*' 45 | Path: !Sub /${AWS::StackName}/ 46 | ManagedPolicyArns: 47 | - !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy' 48 | TaskRole: 49 | Type: AWS::IAM::Role 50 | Properties: 51 | Description: 'Role allowing the Bottlerocket ECS Updater to manage Bottlerocket instances' 52 | Path: !Sub '/${AWS::StackName}/' 53 | AssumeRolePolicyDocument: 54 | Version: 2012-10-17 55 | Statement: 56 | - Effect: Allow 57 | Principal: 58 | Service: 'ecs-tasks.amazonaws.com' 59 | Action: 60 | - 'sts:AssumeRole' 61 | Policies: 62 | - PolicyName: 'BottlerocketEcsUpdaterPolicy' 63 | PolicyDocument: 64 | Version: 2012-10-17 65 | Statement: 66 | # Allows listing all container instances in a cluster 67 | - Effect: Allow 68 | Action: 69 | - 'ecs:ListContainerInstances' 70 | Resource: 71 | - !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}' 72 | # Allows describe container instances to get ec2 instance ID and ecs attributes to filter Bottlerocket instances 73 | # Allows list tasks to filter instances running standalone tasks 74 | # Allows update container instance state for draining 75 | # Allows describe tasks to identify tasks not started by service 76 | - Effect: Allow 77 | Action: 78 | - 'ecs:DescribeContainerInstances' 79 | - 'ecs:ListTasks' 80 | - 'ecs:UpdateContainerInstancesState' 81 | - 'ecs:DescribeTasks' 82 | Resource: '*' 83 | Condition: 84 | ArnEquals: 85 | ecs:cluster: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}' 86 | # Allows ssm send command to make Bottlerocket update API calls 87 | - Effect: Allow 88 | Action: 89 | - 'ssm:SendCommand' 90 | Resource: 91 | - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${UpdateCheckCommand}" 92 | - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${UpdateApplyCommand}" 93 | - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${RebootCommand}" 94 | - !Sub "arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*" 95 | # Allows get command invocation to get Bottlerocket API calls output 96 | - Effect: Allow 97 | Action: 98 | - 'ssm:GetCommandInvocation' 99 | Resource: 100 | - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:*" 101 | # Allows checking the EC2 instance state after an update occurs 102 | - Effect: Allow 103 | Action: 104 | - 'ec2:DescribeInstanceStatus' 105 | Resource: '*' 106 | UpdaterTaskDefinition: 107 | Type: AWS::ECS::TaskDefinition 108 | Properties: 109 | NetworkMode: awsvpc 110 | RequiresCompatibilities: 111 | - FARGATE 112 | Cpu: "256" 113 | Memory: "0.5GB" 114 | ExecutionRoleArn: !GetAtt ExecutionRole.Arn 115 | TaskRoleArn: !GetAtt TaskRole.Arn 116 | ContainerDefinitions: 117 | - Name: BottlerocketEcsUpdaterService 118 | Image: !Ref UpdaterImage 119 | Command: 120 | - -cluster 121 | - !Ref ClusterName 122 | - -region 123 | - !Ref AWS::Region 124 | - -check-document 125 | - !Ref UpdateCheckCommand 126 | - -apply-document 127 | - !Ref UpdateApplyCommand 128 | - -reboot-document 129 | - !Ref RebootCommand 130 | LogConfiguration: 131 | LogDriver: awslogs 132 | Options: 133 | awslogs-create-group: 'true' 134 | awslogs-region: !Ref AWS::Region 135 | awslogs-group: !Ref LogGroupName 136 | awslogs-stream-prefix: !Sub '/ecs/bottlerocket-updater/${ClusterName}' 137 | BottlerocketUpdaterSchedule: 138 | Type: AWS::Events::Rule 139 | Properties: 140 | Description: "Check for Bottlerocket updates on a schedule" 141 | # Run Task every 12 hours 142 | ScheduleExpression: "rate(12 hours)" 143 | State: !Ref ScheduleState 144 | Targets: 145 | - Id: ecs-updater-fargate-task 146 | RoleArn: !GetAtt CronRole.Arn 147 | Arn: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}' 148 | Input: 149 | !Sub | 150 | { 151 | "containerOverrides": [ 152 | { 153 | "name": "BottlerocketEcsUpdaterService", 154 | "environment": [ 155 | { 156 | "name" : "TASK_DEFINITION_ARN", 157 | "value": "${UpdaterTaskDefinition}" 158 | } 159 | ] 160 | } 161 | ] 162 | } 163 | EcsParameters: 164 | LaunchType: FARGATE 165 | TaskCount: 1 166 | TaskDefinitionArn: !Ref UpdaterTaskDefinition 167 | NetworkConfiguration: 168 | AwsVpcConfiguration: 169 | # The Bottlerocket ECS Updater does not need a public IP for its operations. The public IP 170 | # is only required to pull images from ECR as a Fargate task 171 | AssignPublicIp: ENABLED 172 | Subnets: !Ref Subnets 173 | CronRole: 174 | Type: AWS::IAM::Role 175 | Properties: 176 | AssumeRolePolicyDocument: 177 | Version: "2012-10-17" 178 | Statement: 179 | - Effect: "Allow" 180 | Principal: 181 | Service: 182 | - "events.amazonaws.com" 183 | Action: 184 | - "sts:AssumeRole" 185 | Path: !Sub '/${AWS::StackName}/' 186 | Policies: 187 | - PolicyName: "BottlerocketEcsUpdaterSchedulerPolicy" 188 | PolicyDocument: 189 | Statement: 190 | - Effect: "Allow" 191 | Condition: 192 | ArnEquals: 193 | ecs:cluster: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}' 194 | Action: "ecs:RunTask" 195 | Resource: 196 | - !Ref UpdaterTaskDefinition 197 | - Effect: "Allow" 198 | Condition: 199 | ArnEquals: 200 | ecs:cluster: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}' 201 | Action: 202 | - "iam:PassRole" 203 | Resource: 204 | - !GetAtt TaskRole.Arn 205 | - !GetAtt ExecutionRole.Arn 206 | UpdateCheckCommand: 207 | Type: AWS::SSM::Document 208 | Properties: 209 | DocumentType: Command 210 | Content: 211 | schemaVersion: "2.2" 212 | description: "Bottlerocket - Check available updates" 213 | mainSteps: 214 | - action: "aws:runShellScript" 215 | name: "CheckUpdate" 216 | precondition: 217 | StringEquals: 218 | - platformType 219 | - Linux 220 | inputs: 221 | timeoutSeconds: '1800' 222 | runCommand: 223 | - "apiclient update check" 224 | UpdateApplyCommand: 225 | Type: AWS::SSM::Document 226 | Properties: 227 | DocumentType: Command 228 | Content: 229 | schemaVersion: "2.2" 230 | description: "Bottlerocket - Apply update" 231 | mainSteps: 232 | - action: "aws:runShellScript" 233 | name: "ApplyUpdate" 234 | precondition: 235 | StringEquals: 236 | - platformType 237 | - Linux 238 | inputs: 239 | timeoutSeconds: '1800' 240 | runCommand: 241 | - "apiclient update apply" 242 | RebootCommand: 243 | Type: AWS::SSM::Document 244 | Properties: 245 | DocumentType: Command 246 | Content: 247 | schemaVersion: "2.2" 248 | description: "Bottlerocket - Reboot" 249 | mainSteps: 250 | - action: "aws:runShellScript" 251 | name: "Reboot" 252 | precondition: 253 | StringEquals: 254 | - platformType 255 | - Linux 256 | inputs: 257 | timeoutSeconds: '1800' 258 | runCommand: 259 | - "apiclient reboot" 260 | Outputs: 261 | UpdaterTaskDefinitionArn: 262 | Description: 'Updater task definition ARN' 263 | Value: !Ref UpdaterTaskDefinition 264 | Export: 265 | Name: !Sub "${AWS::StackName}:UpdaterTaskDefinition" 266 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bottlerocket ECS Updater 2 | 3 | The Bottlerocket ECS Updater is a service you can install into your ECS cluster that helps you keep your Bottlerocket container instances up to date. 4 | When installed, the Bottlerocket ECS Updater will periodically query each Bottlerocket container instance to find whether an update is available and drain tasks while an update is in progress. 5 | Updates to Bottlerocket are rolled out in [waves](https://github.com/bottlerocket-os/bottlerocket/tree/develop/sources/updater/waves) to reduce the impact of issues; the container instances in your cluster may not all see updates at the same time. 6 | 7 | ## Installation 8 | 9 | You can install the Bottlerocket ECS Updater into your cluster with the provided [CloudFormation template](stacks/bottlerocket-ecs-updater.yaml). 10 | The following information is required when creating the CloudFormation stack: 11 | 12 | * The name of the ECS cluster where you are running Bottlerocket container instances 13 | * The name of the CloudWatch Logs log group where the Bottlerocket ECS Updater will send its logs 14 | * At least one subnet ID that has Internet access (which does not need to be shared with the rest of your cluster) 15 | 16 | When installed, the CloudFormation template will create the following resources in your account: 17 | 18 | * A task definition for the Bottlerocket ECS Updater 19 | * A CloudWatch Events scheduled rule to execute the Bottlerocket ECS Updater 20 | * An IAM role for the Bottlerocket ECS Updater task itself as well as roles for Fargate and CloudWatch Events 21 | * SSM documents to query and execute updates on Bottlerocket instances 22 | 23 | ## Getting Started 24 | 25 | To install the Bottlerocket ECS Updater, you will need to fetch some information first. 26 | 27 | ### Subnet info 28 | 29 | You should either have a default virtual private cloud (VPC) or have already 30 | [created a VPC](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/get-set-up-for-amazon-ecs.html#create-a-vpc) 31 | in your account. 32 | 33 | To find your default VPC, run this command. 34 | (If you use an AWS region other than "us-west-2", make sure to change that.) 35 | 36 | ```sh 37 | aws ec2 describe-vpcs \ 38 | --region us-west-2 \ 39 | --filters=Name=isDefault,Values=true \ 40 | | jq --raw-output '.Vpcs[].VpcId' 41 | ``` 42 | 43 | If you want to use a different VPC you created, run this to get the ID for your VPC. 44 | Make sure to change VPC_NAME to the name of the VPC you created. 45 | (If you use an EC2 region other than "us-west-2", make sure to change that too.) 46 | 47 | ```sh 48 | aws ec2 describe-vpcs \ 49 | --region us-west-2 \ 50 | --filters=Name=tag:Name,Values=VPC_NAME \ 51 | | jq --raw-output '.Vpcs[].VpcId' 52 | ``` 53 | 54 | Next, run this to get information about the subnets in your VPC. 55 | It will give you a list of the subnets and tell you whether each is public or private. 56 | Make sure to change VPC_ID to the value you received from the previous command. 57 | (If you use an EC2 region other than "us-west-2", make sure to change that too.) 58 | 59 | ```sh 60 | aws ec2 describe-subnets \ 61 | --region us-west-2 \ 62 | --filter=Name=vpc-id,Values=VPC_ID \ 63 | | jq '.Subnets[] | {id: .SubnetId, public: .MapPublicIpOnLaunch, az: .AvailabilityZone}' 64 | ``` 65 | 66 | You'll want to pick at least one and save it for the launch command later. 67 | Make sure the subnets you select have Internet access so the updater can reach its dependencies. 68 | Public subnets usually have Internet access via an [Internet gateway](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html) while private subnets may be configured with NAT. 69 | For more information, see [the VPC user guide](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html#vpc-igw-internet-access). 70 | 71 | We recommend picking several subnets in different availability zones. 72 | However, if you want to launch in a specific availability zone, make sure you pick a subnet that matches; the AZ is listed right below the public/private status. 73 | 74 | ### Log Group 75 | 76 | You can either choose an existing log group or create a new one to get your ECS updater logs. 77 | 78 | You can run this to get the list of existing log-groups 79 | ```sh 80 | aws logs describe-log-groups 81 | ``` 82 | 83 | You'll want to pick one and save it for the installation command later. 84 | 85 | If you want to create a new log group, run this (Make sure to provide LOG_GROUP_NAME) 86 | ```sh 87 | aws logs create-log-group --log-group-name LOG_GROUP_NAME 88 | ``` 89 | 90 | ### Install 91 | 92 | Now we can install the [CloudFormation template](stacks/bottlerocket-ecs-updater.yaml) to start the ECS updater for your cluster! 93 | 94 | There are a few values to make sure you change in this command: 95 | * CLUSTER_NAME: the name of the cluster you want ECS updater to manage Bottlerocket instances in 96 | * SUBNET_IDS: a comma-separated list of the subnets you selected earlier 97 | * LOG_GROUP_NAME: the log group name you selected or created earlier 98 | 99 | ```sh 100 | aws cloudformation deploy \ 101 | --stack-name "bottlerocket-ecs-updater" \ 102 | --template-file "./stacks/bottlerocket-ecs-updater.yaml" \ 103 | --capabilities CAPABILITY_NAMED_IAM \ 104 | --parameter-overrides \ 105 | ClusterName="CLUSTER_NAME" \ 106 | Subnets="SUBNET_IDS" \ 107 | LogGroupName="LOG_GROUP_NAME" 108 | ``` 109 | 110 | ## How it works 111 | 112 | The Bottlerocket ECS Updater is designed to run as a scheduled Fargate task that queries, drains, and performs updates in your ECS cluster. 113 | A rule in CloudWatch Events periodically launches the updater as a new Fargate task. 114 | The updater queries the ECS API to discover all the container instances in your cluster and filters for Bottlerocket instances by reading the `bottlerocket.variant` attribute. 115 | For each Bottlerocket instance found, the updater executes an SSM document that queries for available updates using the `apiclient update check` command. 116 | When an update is available, the updater checks to see whether the tasks currently running on the container instance are part of a [service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) and eligible for replacement. 117 | If all the tasks are part of a service, the updater marks the container instance for [draining](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-draining.html) and waits for the tasks to be successfully drained. 118 | After the container instance has been drained, the updater executes an SSM document to download the update, apply the update, and reboot. 119 | Finally, the updater will mark the container instance as active and move on to the next one. 120 | 121 | ## Troubleshooting 122 | 123 | When installed with the provided CloudFormation template, the logs for the updater will be available the CloudWatch Logs group you configured. 124 | Checking the logs is a good first step in understanding why something happened or didn't happen. 125 | 126 | ### Why do only some of my Bottlerocket instances have an update available? 127 | 128 | Updates to Bottlerocket are rolled out in [waves](https://github.com/bottlerocket-os/bottlerocket/tree/develop/sources/updater/waves) to reduce the impact of issues; the container instances in your cluster may not all see updates at the same time. 129 | You can check whether an update is available on your instance by running the `apiclient update check` command from within the [control](https://github.com/bottlerocket-os/bottlerocket#control-container) or [admin](https://github.com/bottlerocket-os/bottlerocket#admin-container) container. 130 | 131 | ### My Bottlerocket instance has an update available. Why didn't the Bottlerocket ECS Updater update it? 132 | 133 | The Bottlerocket ECS Updater attempts to update container instances without disrupting the workloads in your cluster. 134 | Applying an update to Bottlerocket requires a reboot. 135 | To avoid disruption in your cluster, the Bottlerocket ECS Updater uses the [container instance draining](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-draining.html) feature of ECS. 136 | A container instance may be skipped for update when: 137 | 138 | * _A non-service task is running._ 139 | Non-service tasks are not automatically replaced when they are stopped. 140 | To avoid disrupting a critical workload, the Bottlerocket ECS Updater will not stop a non-service task. 141 | * _No spare capacity is available in the cluster._ 142 | The service scheduler attempts to replace the tasks according to the service's deployment configuration parameters, `minimumHealthyPercent` and `maximumPercent`. 143 | If stopping a task would reduce the running count below your service's `minimumHealthyPercent`, ECS will not stop the task. 144 | The Bottlerocket ECS Updater will wait for draining to complete for a fixed period of time (currently 25 minutes). 145 | If draining has not completed by the end of the period, the updater will restore the instance and move to the next one. 146 | * _Draining takes too long._ 147 | The Bottlerocket ECS Updater will wait for draining to complete for a fixed period of time (currently 25 minutes). 148 | If draining has not completed by the end of the period, the updater will restore the instance and move to the next one. 149 | The time it takes for a task to be stopped is related to the `stopTimeout` task definition parameter and to any associated resources like load balancers. 150 | If your tasks are taking too long to drain, you can ensure that your task responds to `SIGTERM`, shorten the `stopTimeout`, or shorten the load balancer's health check and deregistration delay settings. 151 | * _Bottlerocket version is too old._ 152 | The Bottlerocket ECS Updater uses newer [`apiclient update` commands](https://github.com/bottlerocket-os/bottlerocket#update-api) that were added in version [1.0.5](https://github.com/bottlerocket-os/bottlerocket/blob/develop/CHANGELOG.md#v105-2021-01-15). 153 | The SSM commands will fail if your Bottlerocket OS version is less than 1.0.5. 154 | Instances running Bottlerocket versions less than 1.0.5 need to be manually updated. 155 | 156 | ### Why do new container instances launch with older Bottlerocket versions? 157 | 158 | The Bottlerocket ECS Updater performs in-place updates for instances in your ECS cluster. 159 | The updater does not influence how those instances are launched. 160 | If you use an auto-scaling group to launch your instances, you can update the AMI ID in your launch configuration or launch template to use a newer version of Bottlerocket. 161 | 162 | Note: We do not recommend using the Bottlerocket ECS Updater in conjunction with EC2 Spot. 163 | The ECS Updater is designed to keep services safe from interruption by updating one instance at a time. 164 | With the short average lifetime of Spot instances, the updater may not update them until relatively late in their life, meaning they may not be up to date when serving your application. 165 | 166 | ## Developer guide 167 | 168 | To get started with building and developing the ECS updater, make sure you have: 169 | 170 | * [Go installed](https://go.dev/doc/install) 171 | * [`golangci-lint` installed locally](https://golangci-lint.run/usage/install/#local-installation) 172 | * make 173 | * [amazon-ecr-credential-helper](https://github.com/awslabs/amazon-ecr-credential-helper) setup for Docker and access to ECR (or your preferred image registry) 174 | * And the [cloud formation template linter installed](https://github.com/aws-cloudformation/cfn-lint) 175 | 176 | Make sure everything is ready and installed by running the tests with `make test`. 177 | Ensure the local builds work by running `make`. 178 | You might first need to get the modules downloaded to your local go mod cache by running `make tidy`. 179 | 180 | ## Security 181 | 182 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 183 | 184 | ## License 185 | 186 | This project is dual licensed under either the Apache-2.0 License or the MIT license, your choice. 187 | 188 | -------------------------------------------------------------------------------- /updater/aws.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "log" 7 | "strings" 8 | "sync" 9 | "time" 10 | 11 | "github.com/aws/aws-sdk-go/aws" 12 | "github.com/aws/aws-sdk-go/aws/request" 13 | "github.com/aws/aws-sdk-go/service/ec2" 14 | "github.com/aws/aws-sdk-go/service/ecs" 15 | "github.com/aws/aws-sdk-go/service/ssm" 16 | ) 17 | 18 | const ( 19 | ecsPageSize = 100 20 | ssmPageSize = 50 21 | updateStateIdle = "Idle" 22 | updateStateStaged = "Staged" 23 | updateStateAvailable = "Available" 24 | updateStateReady = "Ready" 25 | waiterDelay = time.Duration(15) * time.Second 26 | waiterMaxAttempts = 100 27 | // If this time is reached and the ssm command has not already started running, it will not run. 28 | deliveryTimeoutSeconds = 600 29 | ) 30 | 31 | type instance struct { 32 | instanceID string 33 | containerInstanceID string 34 | bottlerocketVersion string 35 | } 36 | 37 | type checkOutput struct { 38 | UpdateState string `json:"update_state"` 39 | ActivePartition struct { 40 | Image struct { 41 | Version string `json:"version"` 42 | } `json:"image"` 43 | } `json:"active_partition"` 44 | } 45 | 46 | type ECSAPI interface { 47 | ListContainerInstancesPages(*ecs.ListContainerInstancesInput, func(*ecs.ListContainerInstancesOutput, bool) bool) error 48 | DescribeContainerInstances(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) 49 | UpdateContainerInstancesState(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) 50 | ListTasks(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) 51 | DescribeTasks(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) 52 | WaitUntilTasksStoppedWithContext(ctx aws.Context, input *ecs.DescribeTasksInput, opts ...request.WaiterOption) error 53 | } 54 | 55 | type SSMAPI interface { 56 | WaitUntilCommandExecutedWithContext(ctx aws.Context, input *ssm.GetCommandInvocationInput, opts ...request.WaiterOption) error 57 | SendCommand(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) 58 | GetCommandInvocation(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) 59 | } 60 | 61 | type EC2API interface { 62 | WaitUntilInstanceStatusOk(input *ec2.DescribeInstanceStatusInput) error 63 | } 64 | 65 | func (u *updater) alreadyRunning(family string) (bool, error) { 66 | log.Print("Checking for running updater tasks") 67 | list, err := u.ecs.ListTasks(&ecs.ListTasksInput{ 68 | Cluster: &u.cluster, 69 | Family: aws.String(family), 70 | }) 71 | if err != nil { 72 | return false, fmt.Errorf("failed to list running updater tasks: %w", err) 73 | } 74 | if len(list.TaskArns) > 1 { 75 | return true, nil 76 | } 77 | log.Println("This is the only running updater.") 78 | return false, nil 79 | } 80 | 81 | func (u *updater) listContainerInstances() ([]*string, error) { 82 | log.Printf("Listing active container instances in cluster %q", u.cluster) 83 | containerInstances := make([]*string, 0) 84 | input := &ecs.ListContainerInstancesInput{ 85 | Cluster: &u.cluster, 86 | Status: aws.String(ecs.ContainerInstanceStatusActive), 87 | } 88 | if err := u.ecs.ListContainerInstancesPages(input, func(output *ecs.ListContainerInstancesOutput, _ bool) bool { 89 | containerInstances = append(containerInstances, output.ContainerInstanceArns...) 90 | return true 91 | }); err != nil { 92 | return nil, fmt.Errorf("failed to list container instances: %w", err) 93 | } 94 | log.Printf("Found %d container instances in the cluster", len(containerInstances)) 95 | return containerInstances, nil 96 | } 97 | 98 | // filterBottlerocketInstances filters container instances and returns list of 99 | // instances that are running Bottlerocket OS 100 | func (u *updater) filterBottlerocketInstances(instances []*string) ([]instance, error) { 101 | log.Printf("Filtering container instances running Bottlerocket OS") 102 | bottlerocketInstances := make([]instance, 0) 103 | errCount := 0 104 | var lastErr error 105 | pageCount, err := eachPage(len(instances), ecsPageSize, func(start, stop int) error { 106 | resp, err := u.ecs.DescribeContainerInstances(&ecs.DescribeContainerInstancesInput{ 107 | Cluster: &u.cluster, 108 | ContainerInstances: instances[start:stop], 109 | }) 110 | // count errors per page. 111 | if err != nil { 112 | log.Printf("Failed to describe container instances from %d to %d: %v", start, stop, err) 113 | errCount++ 114 | lastErr = err 115 | return nil 116 | } 117 | for _, containerInstance := range resp.ContainerInstances { 118 | if containsAttribute(containerInstance.Attributes, "bottlerocket.variant") { 119 | bottlerocketInstances = append(bottlerocketInstances, instance{ 120 | instanceID: aws.StringValue(containerInstance.Ec2InstanceId), 121 | containerInstanceID: aws.StringValue(containerInstance.ContainerInstanceArn), 122 | }) 123 | log.Printf("Bottlerocket instance %q detected.", aws.StringValue(containerInstance.Ec2InstanceId)) 124 | } 125 | } 126 | return nil 127 | }) 128 | if err != nil { 129 | return nil, err 130 | } 131 | // check if every page had an error; errors are only fatal if each page failed. 132 | if errCount == pageCount { 133 | return nil, fmt.Errorf("failed to describe any container instances: %w", lastErr) 134 | } 135 | return bottlerocketInstances, nil 136 | } 137 | 138 | // containsAttribute checks if a slice of ECS Attributes struct contains a specified name. 139 | func containsAttribute(attrs []*ecs.Attribute, searchString string) bool { 140 | for _, attr := range attrs { 141 | if aws.StringValue(attr.Name) == searchString { 142 | return true 143 | } 144 | } 145 | return false 146 | } 147 | 148 | // eachPage defines batch processing boundaries for handling paginated results of API calls. 149 | func eachPage(inputLen int, size int, fn func(start, stop int) error) (int, error) { 150 | pageCount := 0 151 | for start := 0; start < inputLen; start += size { 152 | stop := start + size 153 | if stop > inputLen { 154 | stop = inputLen 155 | } 156 | if err := fn(start, stop); err != nil { 157 | return 0, err 158 | } 159 | pageCount++ 160 | } 161 | return pageCount, nil 162 | } 163 | 164 | // filterAvailableUpdates returns a list of instances that have updates available 165 | func (u *updater) filterAvailableUpdates(bottlerocketInstances []instance) ([]instance, error) { 166 | log.Printf("Filtering instances with available updates") 167 | // make slice of Bottlerocket instances to use with SendCommand and checkCommandOutput 168 | instances := make([]string, 0) 169 | for _, inst := range bottlerocketInstances { 170 | instances = append(instances, inst.instanceID) 171 | } 172 | 173 | var lastErr error 174 | errCount := 0 175 | candidates := make([]instance, 0) 176 | pageCount, err := eachPage(len(instances), ssmPageSize, func(start, stop int) error { 177 | commandID, err := u.sendCommand(instances[start:stop], u.checkDocument) 178 | if err != nil { 179 | // errors here are considered non-fatal. 180 | log.Printf("Failed to send document %s: %v", u.checkDocument, err) 181 | errCount++ 182 | lastErr = err 183 | return nil 184 | } 185 | for _, inst := range bottlerocketInstances[start:stop] { 186 | commandOutput, err := u.getCommandResult(commandID, inst.instanceID) 187 | if err != nil { 188 | // errors here are considered non-fatal 189 | log.Printf("Failed to get output for command %s, document %s and instance %q: %v", commandID, u.checkDocument, inst, err) 190 | continue 191 | } 192 | output, err := parseCommandOutput(commandOutput) 193 | if err != nil { 194 | log.Printf("Failed to parse command output %q for instance %q: %v", string(commandOutput), inst, err) 195 | continue 196 | } 197 | if output.UpdateState == updateStateAvailable || output.UpdateState == updateStateReady { 198 | inst.bottlerocketVersion = output.ActivePartition.Image.Version 199 | candidates = append(candidates, inst) 200 | } 201 | } 202 | return nil 203 | }) 204 | if err != nil { 205 | return nil, err 206 | } 207 | if errCount == pageCount { 208 | return nil, fmt.Errorf("all attempts to send SSM document %s failed: %w", u.checkDocument, lastErr) 209 | } 210 | return candidates, nil 211 | } 212 | 213 | // eligible checks the eligibility of container instance for update. It's eligible 214 | // if all the running tasks were started by a service. 215 | func (u *updater) eligible(containerInstance string) (bool, error) { 216 | log.Printf("Checking eligiblity for update of container instance %q", containerInstance) 217 | list, err := u.ecs.ListTasks(&ecs.ListTasksInput{ 218 | Cluster: &u.cluster, 219 | ContainerInstance: aws.String(containerInstance), 220 | }) 221 | if err != nil { 222 | return false, fmt.Errorf("failed to list tasks: %w", err) 223 | } 224 | taskARNs := list.TaskArns 225 | if len(list.TaskArns) == 0 { 226 | return true, nil 227 | } 228 | 229 | desc, err := u.ecs.DescribeTasks(&ecs.DescribeTasksInput{ 230 | Cluster: &u.cluster, 231 | Tasks: taskARNs, 232 | }) 233 | if err != nil { 234 | return false, fmt.Errorf("failed to describe tasks: %w", err) 235 | } 236 | for _, listResult := range desc.Tasks { 237 | startedBy := aws.StringValue(listResult.StartedBy) 238 | if !strings.HasPrefix(startedBy, "ecs-svc/") { 239 | log.Printf("Container instance %q has a non-service task running: %s", containerInstance, aws.StringValue(listResult.TaskArn)) 240 | return false, nil 241 | } 242 | } 243 | return true, nil 244 | } 245 | 246 | func (u *updater) drainInstance(containerInstance string) error { 247 | log.Printf("Starting drain on container instance %q", containerInstance) 248 | resp, err := u.ecs.UpdateContainerInstancesState(&ecs.UpdateContainerInstancesStateInput{ 249 | Cluster: &u.cluster, 250 | ContainerInstances: aws.StringSlice([]string{containerInstance}), 251 | Status: aws.String("DRAINING"), 252 | }) 253 | if err != nil { 254 | return fmt.Errorf("failed to change instance state to DRAINING: %w", err) 255 | } 256 | if len(resp.Failures) != 0 { 257 | log.Printf("There are API failures in draining the container instance %q, therefore attempting to"+ 258 | " re-activate", containerInstance) 259 | err = u.activateInstance(containerInstance) 260 | if err != nil { 261 | log.Printf("Instance failed to re-activate after failing to change state to DRAINING: %v", err) 262 | } 263 | return fmt.Errorf("failures in API call: %v", resp.Failures) 264 | } 265 | log.Printf("Container instance state changed to DRAINING") 266 | 267 | err = u.waitUntilDrained(containerInstance) 268 | if err != nil { 269 | log.Printf("Container instance %q failed to drain, therefore attempting to re-activate", containerInstance) 270 | err2 := u.activateInstance(containerInstance) 271 | if err2 != nil { 272 | log.Printf("Instance failed to re-activate after failing to wait for drain to complete: %v", err2) 273 | } 274 | return fmt.Errorf("error while waiting to drain: %w", err) 275 | } 276 | log.Printf("Container instance %q drained successfully!", containerInstance) 277 | return nil 278 | } 279 | 280 | func (u *updater) activateInstance(containerInstance string) error { 281 | resp, err := u.ecs.UpdateContainerInstancesState(&ecs.UpdateContainerInstancesStateInput{ 282 | Cluster: &u.cluster, 283 | ContainerInstances: aws.StringSlice([]string{containerInstance}), 284 | Status: aws.String("ACTIVE"), 285 | }) 286 | if err != nil { 287 | return fmt.Errorf("failed to change state to ACTIVE: %w", err) 288 | } 289 | if len(resp.Failures) != 0 { 290 | if aws.StringValue(resp.Failures[0].Reason) == "INACTIVE" { 291 | log.Printf("Container instance %q is in INACTIVE state", containerInstance) 292 | return nil 293 | } 294 | return fmt.Errorf("API failures while activating: %v", resp.Failures) 295 | } 296 | log.Printf("Container instance %q state changed to ACTIVE successfully!", containerInstance) 297 | return nil 298 | } 299 | 300 | func (u *updater) waitUntilDrained(containerInstance string) error { 301 | log.Printf("Waiting for container instance %q to drain", containerInstance) 302 | list, err := u.ecs.ListTasks(&ecs.ListTasksInput{ 303 | Cluster: &u.cluster, 304 | ContainerInstance: aws.String(containerInstance), 305 | }) 306 | if err != nil { 307 | return fmt.Errorf("failed to list tasks: %w", err) 308 | } 309 | taskARNs := list.TaskArns 310 | 311 | if len(taskARNs) == 0 { 312 | log.Printf("No tasks to drain") 313 | return nil 314 | } 315 | 316 | return u.ecs.WaitUntilTasksStoppedWithContext(aws.BackgroundContext(), &ecs.DescribeTasksInput{ 317 | Cluster: &u.cluster, 318 | Tasks: taskARNs, 319 | }, 320 | request.WithWaiterMaxAttempts(waiterMaxAttempts), 321 | request.WithWaiterDelay(request.ConstantWaiterDelay(waiterDelay)), 322 | ) 323 | } 324 | 325 | // updateInstance starts an update process on an instance. 326 | func (u *updater) updateInstance(inst instance) error { 327 | log.Printf("Starting update on instance %q", inst.instanceID) 328 | ec2IDs := []string{inst.instanceID} 329 | log.Printf("Checking current update state of instance %q", inst.instanceID) 330 | 331 | commandID, err := u.sendCommand(ec2IDs, u.checkDocument) 332 | if err != nil { 333 | return fmt.Errorf("failed to send check command: %w", err) 334 | } 335 | output, err := u.getCommandResult(commandID, inst.instanceID) 336 | if err != nil { 337 | return fmt.Errorf("failed to get check command output: %w", err) 338 | } 339 | check, err := parseCommandOutput(output) 340 | if err != nil { 341 | return fmt.Errorf("failed to parse command output %q: %w", string(output), err) 342 | } 343 | 344 | switch check.UpdateState { 345 | case updateStateIdle: 346 | log.Printf("No new update available for instance %q", inst.instanceID) 347 | return nil 348 | case updateStateStaged: 349 | return fmt.Errorf("unexpected update state %q; skipping instance", check.UpdateState) 350 | case updateStateAvailable: 351 | log.Printf("Starting update apply on instance %q", inst.instanceID) 352 | _, err := u.sendCommand(ec2IDs, u.applyDocument) 353 | if err != nil { 354 | return fmt.Errorf("failed to send update apply command: %w", err) 355 | } 356 | case updateStateReady: 357 | log.Printf("Update is previously applied on instance %q", inst.instanceID) 358 | default: 359 | return fmt.Errorf("unknown update state %q", check.UpdateState) 360 | } 361 | 362 | // occasionally instance goes into reboot before reporting command output, therefore 363 | // we do not poll for command output. Instead we rely on verifyUpdate to confirm update 364 | // success or failure. 365 | log.Printf("Sending SSM document %q on instance %q", u.rebootDocument, inst.instanceID) 366 | // SendCommand is directly called here because we do not want to wait on command complete. 367 | resp, err := u.ssm.SendCommand(&ssm.SendCommandInput{ 368 | DocumentName: aws.String(u.rebootDocument), 369 | DocumentVersion: aws.String("$DEFAULT"), 370 | InstanceIds: aws.StringSlice(ec2IDs), 371 | TimeoutSeconds: aws.Int64(deliveryTimeoutSeconds), 372 | }) 373 | if err != nil { 374 | return fmt.Errorf("failed to send reboot command: %w", err) 375 | } 376 | rebootID := *resp.Command.CommandId 377 | log.Printf("SSM document %q posted with command ID %q", u.rebootDocument, rebootID) 378 | 379 | // added some sleep time for reboot to start before we check instance state 380 | time.Sleep(15 * time.Second) 381 | err = u.waitUntilOk(inst.instanceID) 382 | if err != nil { 383 | return fmt.Errorf("failed to reach Ok status after reboot: %w", err) 384 | } 385 | return nil 386 | } 387 | 388 | // verifyUpdate verifies if instance was properly updated 389 | func (u *updater) verifyUpdate(inst instance) (bool, error) { 390 | log.Println("Verifying update by checking there is no new version available to update" + 391 | " and validate the active version") 392 | ec2IDs := []string{inst.instanceID} 393 | updateStatus, err := u.sendCommand(ec2IDs, u.checkDocument) 394 | if err != nil { 395 | return false, fmt.Errorf("failed to send update check command: %w", err) 396 | } 397 | 398 | updateResult, err := u.getCommandResult(updateStatus, inst.instanceID) 399 | if err != nil { 400 | return false, fmt.Errorf("failed to get check command output: %w", err) 401 | } 402 | output, err := parseCommandOutput(updateResult) 403 | if err != nil { 404 | return false, fmt.Errorf("failed to parse command output %q, manual verification required: %w", string(updateResult), err) 405 | } 406 | updatedVersion := output.ActivePartition.Image.Version 407 | if updatedVersion == inst.bottlerocketVersion { 408 | log.Printf("Container instance %q did not update, its current "+ 409 | "version %s and updated version %s are the same", inst.containerInstanceID, inst.bottlerocketVersion, updatedVersion) 410 | return false, nil 411 | } else if output.UpdateState == updateStateAvailable { 412 | log.Printf("Container instance %q was updated to version %q successfully, however another newer version was recently released;"+ 413 | " Instance will be updated to newer version in next iteration.", inst.containerInstanceID, updatedVersion) 414 | return true, nil 415 | } 416 | log.Printf("Container instance %q updated to version %q", inst.containerInstanceID, updatedVersion) 417 | return true, nil 418 | } 419 | 420 | func (u *updater) sendCommand(instanceIDs []string, ssmDocument string) (string, error) { 421 | log.Printf("Sending SSM document %q", ssmDocument) 422 | resp, err := u.ssm.SendCommand(&ssm.SendCommandInput{ 423 | DocumentName: aws.String(ssmDocument), 424 | DocumentVersion: aws.String("$DEFAULT"), 425 | InstanceIds: aws.StringSlice(instanceIDs), 426 | TimeoutSeconds: aws.Int64(deliveryTimeoutSeconds), 427 | }) 428 | if err != nil { 429 | return "", fmt.Errorf("send command failed: %w", err) 430 | } 431 | commandID := *resp.Command.CommandId 432 | log.Printf("SSM document %q posted with command id %q", ssmDocument, commandID) 433 | 434 | // Wait for the sent commands to complete. 435 | wg := sync.WaitGroup{} 436 | instanceCount := len(instanceIDs) 437 | errChan := make(chan error, instanceCount) 438 | for _, v := range instanceIDs { 439 | log.Printf("Waiting for command %q to complete for instance %q", commandID, v) 440 | wg.Add(1) 441 | go func(instanceID string) { 442 | defer wg.Done() 443 | err = u.ssm.WaitUntilCommandExecutedWithContext(aws.BackgroundContext(), &ssm.GetCommandInvocationInput{ 444 | CommandId: aws.String(commandID), 445 | InstanceId: aws.String(instanceID), 446 | }, 447 | request.WithWaiterMaxAttempts(waiterMaxAttempts), 448 | request.WithWaiterDelay(request.ConstantWaiterDelay(waiterDelay))) 449 | if err != nil { 450 | errChan <- err 451 | log.Printf("Error encountered while awaiting document %q execution for instance: %q: %s", ssmDocument, instanceID, err) 452 | u.logCommmandOutput(commandID, instanceID) 453 | } 454 | }(aws.StringValue(&v)) 455 | } 456 | wg.Wait() 457 | close(errChan) 458 | 459 | errCount := 0 460 | for err = range errChan { 461 | errCount++ 462 | if errCount == instanceCount { 463 | return "", fmt.Errorf("too many failures while awaiting document execution: %w", err) 464 | } 465 | } 466 | return commandID, nil 467 | } 468 | 469 | func (u *updater) getCommandResult(commandID string, instanceID string) ([]byte, error) { 470 | resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{ 471 | CommandId: aws.String(commandID), 472 | InstanceId: aws.String(instanceID), 473 | }) 474 | if err != nil { 475 | return nil, fmt.Errorf("failed to retrieve command invocation output: %w", err) 476 | } 477 | commandResults := []byte(aws.StringValue(resp.StandardOutputContent)) 478 | if aws.StringValue(resp.Status) != ssm.CommandInvocationStatusSuccess { 479 | return nil, fmt.Errorf("command %s has not reached success status, current status %q", commandID, aws.StringValue(resp.Status)) 480 | } 481 | return commandResults, nil 482 | } 483 | 484 | // logCommmandOutput logs the ssm command invocation response 485 | func (u *updater) logCommmandOutput(commandID string, instanceID string) { 486 | resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{ 487 | CommandId: aws.String(commandID), 488 | InstanceId: aws.String(instanceID), 489 | }) 490 | if err != nil { 491 | log.Printf("Failed to get invocation output for instance %q: %v", instanceID, err) 492 | } 493 | log.Printf("Invocation output for instance %q: %#q", instanceID, resp) 494 | } 495 | 496 | // waitUntilOk takes an EC2 ID as a parameter and waits until the specified EC2 instance is in an Ok status. 497 | func (u *updater) waitUntilOk(ec2ID string) error { 498 | log.Printf("Waiting for instance %q to reach Ok status", ec2ID) 499 | return u.ec2.WaitUntilInstanceStatusOk(&ec2.DescribeInstanceStatusInput{ 500 | InstanceIds: []*string{aws.String(ec2ID)}, 501 | }) 502 | } 503 | 504 | // parseCommandOutput takes raw bytes of ssm command output and converts it into a struct 505 | func parseCommandOutput(commandOutput []byte) (checkOutput, error) { 506 | output := checkOutput{} 507 | err := json.Unmarshal(commandOutput, &output) 508 | if err != nil { 509 | return output, fmt.Errorf("failed to unmarshal json: %w", err) 510 | } 511 | if output.UpdateState == "" || output.ActivePartition.Image.Version == "" { 512 | return output, fmt.Errorf("mandatory fields are not available") 513 | } 514 | return output, nil 515 | } 516 | -------------------------------------------------------------------------------- /updater/aws_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "strconv" 7 | "sync" 8 | "testing" 9 | 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/aws/request" 12 | "github.com/aws/aws-sdk-go/service/ec2" 13 | "github.com/aws/aws-sdk-go/service/ecs" 14 | "github.com/aws/aws-sdk-go/service/ssm" 15 | "github.com/stretchr/testify/assert" 16 | "github.com/stretchr/testify/require" 17 | ) 18 | 19 | func TestFilterAvailableUpdates(t *testing.T) { 20 | instances := []instance{ 21 | { 22 | instanceID: "inst-id-1", 23 | containerInstanceID: "cont-inst-1", 24 | }, 25 | { 26 | instanceID: "inst-id-2", 27 | containerInstanceID: "cont-inst-2", 28 | }, 29 | { 30 | instanceID: "inst-id-3", 31 | containerInstanceID: "cont-inst-3", 32 | }, 33 | { 34 | instanceID: "inst-id-4", 35 | containerInstanceID: "cont-inst-4", 36 | }, 37 | { 38 | instanceID: "inst-id-5", 39 | containerInstanceID: "cont-inst-5", 40 | }, 41 | } 42 | expected := []instance{ 43 | { 44 | instanceID: "inst-id-1", 45 | containerInstanceID: "cont-inst-1", 46 | bottlerocketVersion: "v1.0.5", 47 | }, 48 | { 49 | instanceID: "inst-id-2", 50 | containerInstanceID: "cont-inst-2", 51 | bottlerocketVersion: "v1.0.5", 52 | }, 53 | { 54 | instanceID: "inst-id-5", 55 | containerInstanceID: "cont-inst-5", 56 | bottlerocketVersion: "v1.0.5", 57 | }, 58 | } 59 | responses := map[string]string{ 60 | "inst-id-1": `{"update_state": "Available", "active_partition": { "image": { "version": "v1.0.5"}}}`, 61 | "inst-id-2": `{"update_state": "Ready", "active_partition": { "image": { "version": "v1.0.5"}}}`, 62 | "inst-id-3": `{"update_state": "Idle", "active_partition": { "image": { "version": "v1.1.1"}}}`, 63 | "inst-id-4": `{"update_state": "Staged", "active_partition": { "image": { "version": "v1.1.1"}}}`, 64 | "inst-id-5": `{"update_state": "Available", "active_partition": { "image": { "version": "v1.0.5"}}}`, 65 | } 66 | 67 | // mutex needed to prevent race condition when incrementing counter in concurrent 68 | // execution of WaitUntilCommandExecutedWithContextFn 69 | var m sync.Mutex 70 | sendCommandCalls := 0 71 | commandWaiterCalls := 0 72 | getCommandInvocationCalls := 0 73 | mockSSM := MockSSM{ 74 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 75 | getCommandInvocationCalls++ 76 | return &ssm.GetCommandInvocationOutput{ 77 | Status: aws.String("Success"), 78 | StandardOutputContent: aws.String(responses[*input.InstanceId]), 79 | }, nil 80 | }, 81 | SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 82 | sendCommandCalls++ 83 | return &ssm.SendCommandOutput{ 84 | Command: &ssm.Command{ 85 | CommandId: aws.String("command-id"), 86 | DocumentName: aws.String("check-document"), 87 | }, 88 | }, nil 89 | }, 90 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 91 | m.Lock() 92 | commandWaiterCalls++ 93 | m.Unlock() 94 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 95 | return nil 96 | }, 97 | } 98 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 99 | actual, err := u.filterAvailableUpdates(instances) 100 | require.NoError(t, err) 101 | assert.Equal(t, expected, actual, "Should only contain instances in Aavailable or Ready update state") 102 | assert.Equal(t, 1, sendCommandCalls, "should send commands for each page") 103 | assert.Equal(t, 5, commandWaiterCalls, "should wait for each instance") 104 | assert.Equal(t, 5, getCommandInvocationCalls, "should collect output for each instance") 105 | } 106 | 107 | func TestPaginatedFilterAvailableUpdatesSuccess(t *testing.T) { 108 | checkPattern := `{"update_state": "%s", "active_partition": { "image": { "version": "%s"}}}` 109 | expected := make([]instance, 0) 110 | instances := make([]instance, 0) 111 | getOut := &ssm.GetCommandInvocationOutput{ 112 | Status: aws.String("Success"), 113 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "v1.0.5")), 114 | } 115 | 116 | for i := 0; i < 100; i++ { // 100 is chosen here to reprsent 2 full pages of SSM (limited to 50 per page) 117 | containerID := "cont-inst-br" + strconv.Itoa(i) 118 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 119 | instances = append(instances, instance{ 120 | instanceID: ec2ID, 121 | containerInstanceID: containerID, 122 | }) 123 | expected = append(expected, instance{ 124 | instanceID: ec2ID, 125 | containerInstanceID: containerID, 126 | bottlerocketVersion: "v1.0.5", 127 | }) 128 | } 129 | 130 | // mutex needed to prevent race condition when incrementing counter in concurrent 131 | // execution of WaitUntilCommandExecutedWithContextFn 132 | var m sync.Mutex 133 | sendCommandCalls := 0 134 | commandWaiterCalls := 0 135 | getCommandInvocationCalls := 0 136 | mockSSM := MockSSM{ 137 | GetCommandInvocationFn: func(_ *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 138 | getCommandInvocationCalls++ 139 | return getOut, nil 140 | }, 141 | SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 142 | sendCommandCalls++ 143 | return &ssm.SendCommandOutput{ 144 | Command: &ssm.Command{ 145 | CommandId: aws.String("command-id"), 146 | DocumentName: aws.String("check-document"), 147 | }, 148 | }, nil 149 | }, 150 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 151 | m.Lock() 152 | commandWaiterCalls++ 153 | m.Unlock() 154 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 155 | return nil 156 | }, 157 | } 158 | u := updater{ssm: mockSSM} 159 | actual, err := u.filterAvailableUpdates(instances) 160 | require.NoError(t, err) 161 | assert.EqualValues(t, expected, actual, "should contain all instances") 162 | assert.Equal(t, 2, sendCommandCalls, "should send commands for each page") 163 | assert.Equal(t, 100, commandWaiterCalls, "should wait for each instance") 164 | assert.Equal(t, 100, getCommandInvocationCalls, "should collect output for each instance") 165 | } 166 | 167 | func TestPaginatedFilterAvailableUpdatesAllFail(t *testing.T) { 168 | instances := make([]instance, 0) 169 | 170 | for i := 0; i < 100; i++ { 171 | containerID := "cont-inst-br" + strconv.Itoa(i) 172 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 173 | instances = append(instances, instance{ 174 | instanceID: ec2ID, 175 | containerInstanceID: containerID, 176 | }) 177 | } 178 | 179 | sendCommandCalls := 0 180 | mockSSM := MockSSM{ 181 | SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 182 | sendCommandCalls++ 183 | return nil, errors.New("Failed to send document") 184 | }, 185 | } 186 | u := updater{ssm: mockSSM} 187 | actual, err := u.filterAvailableUpdates(instances) 188 | require.Error(t, err) 189 | assert.Contains(t, err.Error(), "Failed to send document") 190 | assert.Empty(t, actual) 191 | assert.Equal(t, 2, sendCommandCalls, "should send commands for each page") 192 | } 193 | 194 | func TestPaginatedFilterAvailableUpdatesInPageFailures(t *testing.T) { 195 | instances := make([]instance, 0) 196 | checkPattern := `{"update_state": "%s", "active_partition": { "image": { "version": "%s"}}}` 197 | for i := 0; i < 120; i++ { // 120 chosen here to ensure multiple pages are tested and that number instances divides by 3 evenly 198 | containerID := "cont-inst-br" + strconv.Itoa(i) 199 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 200 | instances = append(instances, instance{ 201 | instanceID: ec2ID, 202 | containerInstanceID: containerID, 203 | }) 204 | } 205 | 206 | // mutex needed to prevent race condition when incrementing counter in concurrent 207 | // execution of WaitUntilCommandExecutedWithContextFn 208 | var m sync.Mutex 209 | sendCommandCalls := 0 210 | commandWaiterCalls := 0 211 | getCommandInvocationCalls := 0 212 | count := 0 213 | mockSSM := MockSSM{ 214 | GetCommandInvocationFn: func(_ *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 215 | count++ 216 | getCommandInvocationCalls++ 217 | switch count % 3 { 218 | case 0: 219 | return nil, errors.New("Failed to get command output") // validate getCommandResult failure 220 | case 1: 221 | return &ssm.GetCommandInvocationOutput{ 222 | Status: aws.String("Success"), 223 | StandardOutputContent: aws.String("{}"), 224 | }, nil // validates parseCommandOutput failure 225 | case 2: 226 | return &ssm.GetCommandInvocationOutput{ 227 | Status: aws.String("Success"), 228 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "v1.0.5")), 229 | }, nil // validate success case 230 | } 231 | return nil, nil 232 | }, 233 | SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 234 | sendCommandCalls++ 235 | return &ssm.SendCommandOutput{ 236 | Command: &ssm.Command{ 237 | CommandId: aws.String("command-id"), 238 | DocumentName: aws.String("check-document"), 239 | }, 240 | }, nil 241 | }, 242 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 243 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 244 | m.Lock() 245 | commandWaiterCalls++ 246 | m.Unlock() 247 | return nil 248 | }, 249 | } 250 | u := updater{ssm: mockSSM} 251 | actual, err := u.filterAvailableUpdates(instances) 252 | require.NoError(t, err) 253 | assert.EqualValues(t, 40, len(actual), "Every 3rd instance of 120 should succeed") 254 | assert.Equal(t, 3, sendCommandCalls, "should send commands for each page") 255 | assert.Equal(t, 120, commandWaiterCalls, "should wait for each instance") 256 | assert.Equal(t, 120, getCommandInvocationCalls, "should collect output for each instance") 257 | } 258 | 259 | func TestPaginatedFilterAvailableUpdatesSingleErr(t *testing.T) { 260 | checkPattern := `{"update_state": "%s", "active_partition": { "image": { "version": "%s"}}}` 261 | expected := make([]instance, 0) 262 | instances := make([]instance, 0) 263 | getOut := &ssm.GetCommandInvocationOutput{ 264 | Status: aws.String("Success"), 265 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "v1.0.5")), 266 | } 267 | 268 | for i := 0; i < 100; i++ { 269 | containerID := "cont-inst-br" + strconv.Itoa(i) 270 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 271 | instances = append(instances, instance{ 272 | instanceID: ec2ID, 273 | containerInstanceID: containerID, 274 | }) 275 | expected = append(expected, instance{ 276 | instanceID: ec2ID, 277 | containerInstanceID: containerID, 278 | bottlerocketVersion: "v1.0.5", 279 | }) 280 | } 281 | 282 | pageErrors := []error{errors.New("Failed to send document"), nil} 283 | 284 | // mutex needed to prevent race condition when incrementing counter in concurrent 285 | // execution of WaitUntilCommandExecutedWithContextFn 286 | var m sync.Mutex 287 | sendCommandCalls := 0 288 | commandWaiterCalls := 0 289 | getCommandInvocationCalls := 0 290 | callCount := 0 291 | mockSSM := MockSSM{ 292 | GetCommandInvocationFn: func(_ *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 293 | getCommandInvocationCalls++ 294 | return getOut, nil 295 | }, 296 | SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 297 | require.Less(t, callCount, len(pageErrors)) 298 | failErr := pageErrors[callCount] 299 | callCount++ 300 | sendCommandCalls++ 301 | return &ssm.SendCommandOutput{ 302 | Command: &ssm.Command{ 303 | CommandId: aws.String("command-id"), 304 | DocumentName: aws.String("check-document"), 305 | }, 306 | }, failErr 307 | }, 308 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 309 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 310 | m.Lock() 311 | commandWaiterCalls++ 312 | m.Unlock() 313 | return nil 314 | }, 315 | } 316 | u := updater{ssm: mockSSM} 317 | actual, err := u.filterAvailableUpdates(instances) 318 | 319 | require.NoError(t, err) 320 | assert.EqualValues(t, actual, expected[50:], "Should only contain instances from the 2nd page") 321 | assert.Equal(t, 2, sendCommandCalls, "should send commands for each page") 322 | assert.Equal(t, 50, commandWaiterCalls, "should wait for each instance") 323 | assert.Equal(t, 50, getCommandInvocationCalls, "should collect output for each instance") 324 | } 325 | 326 | func TestGetCommandResult(t *testing.T) { 327 | cases := []struct { 328 | name string 329 | invocationOut *ssm.GetCommandInvocationOutput 330 | expectedError string 331 | expectedOut []byte 332 | invocationError error 333 | }{ 334 | { 335 | name: "getCommand success", 336 | invocationOut: &ssm.GetCommandInvocationOutput{ 337 | Status: aws.String("Success"), 338 | StandardOutputContent: aws.String("OutputContent"), 339 | }, 340 | expectedOut: []byte(aws.StringValue(aws.String("OutputContent"))), 341 | }, 342 | { 343 | name: "getCommand fail", 344 | invocationError: errors.New("failed to get command invocation"), 345 | expectedError: "failed to retrieve command invocation output: failed to get command invocation", 346 | invocationOut: nil, 347 | expectedOut: nil, 348 | }, 349 | { 350 | name: "command status non-Success", 351 | invocationOut: &ssm.GetCommandInvocationOutput{ 352 | Status: aws.String("TimedOut"), 353 | StandardOutputContent: nil, 354 | }, 355 | expectedError: "command command-id has not reached success status, current status \"TimedOut\"", 356 | expectedOut: nil, 357 | }, 358 | } 359 | for _, tc := range cases { 360 | t.Run(tc.name, func(t *testing.T) { 361 | mockSSM := MockSSM{ 362 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 363 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 364 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 365 | return tc.invocationOut, tc.invocationError 366 | }, 367 | } 368 | u := updater{ssm: mockSSM} 369 | actual, err := u.getCommandResult("command-id", "instance-id") 370 | if tc.expectedOut != nil { 371 | require.NoError(t, err) 372 | assert.EqualValues(t, tc.expectedOut, actual) 373 | } else { 374 | require.Error(t, err) 375 | assert.EqualError(t, err, tc.expectedError) 376 | } 377 | }) 378 | } 379 | } 380 | 381 | func TestSendCommandSuccess(t *testing.T) { 382 | instances := []string{"inst-id-1", "inst-id-2"} 383 | // mutex needed to prevent race condition when appending to instances slice in concurrent 384 | // execution of WaitUntilCommandExecutedWithContextFn 385 | var m sync.Mutex 386 | waitInstanceIDs := []string{} 387 | mockSSM := MockSSM{ 388 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 389 | assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName)) 390 | assert.Equal(t, "$DEFAULT", aws.StringValue(input.DocumentVersion)) 391 | assert.Equal(t, aws.StringSlice(instances), input.InstanceIds) 392 | return &ssm.SendCommandOutput{Command: &ssm.Command{CommandId: aws.String("command-id")}}, nil 393 | }, 394 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 395 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 396 | m.Lock() 397 | waitInstanceIDs = append(waitInstanceIDs, aws.StringValue(input.InstanceId)) 398 | m.Unlock() 399 | return nil 400 | }, 401 | } 402 | u := updater{ssm: mockSSM} 403 | commandID, err := u.sendCommand(instances, "test-doc") 404 | require.NoError(t, err) 405 | assert.EqualValues(t, "command-id", commandID) 406 | assert.ElementsMatch(t, instances, waitInstanceIDs) 407 | } 408 | 409 | func TestSendCommandErr(t *testing.T) { 410 | instances := []string{"inst-id-1", "inst-id-2"} 411 | sendError := errors.New("failed to send command") 412 | mockSSM := MockSSM{ 413 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 414 | assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName)) 415 | assert.Equal(t, "$DEFAULT", aws.StringValue(input.DocumentVersion)) 416 | assert.Equal(t, aws.StringSlice(instances), input.InstanceIds) 417 | return nil, sendError 418 | }, 419 | } 420 | u := updater{ssm: mockSSM} 421 | commandID, err := u.sendCommand(instances, "test-doc") 422 | require.Error(t, err) 423 | assert.Equal(t, "", commandID) 424 | assert.ErrorIs(t, err, sendError) 425 | 426 | } 427 | 428 | func TestSendCommandWaitErr(t *testing.T) { 429 | cases := []struct { 430 | name string 431 | instances []string 432 | }{ 433 | { 434 | name: "wait single failure", 435 | instances: []string{"inst-id-1"}, 436 | }, 437 | { 438 | name: "wait fail all", 439 | instances: []string{"inst-id-1", "inst-id-2", "inst-id-3"}, 440 | }, 441 | } 442 | for _, tc := range cases { 443 | t.Run(tc.name, func(t *testing.T) { 444 | waitError := errors.New("exceeded max attempts") 445 | failedInstanceIDs := []string{} 446 | mockSSM := MockSSM{ 447 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 448 | assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName)) 449 | assert.Equal(t, aws.StringSlice(tc.instances), input.InstanceIds) 450 | return &ssm.SendCommandOutput{ 451 | Command: &ssm.Command{CommandId: aws.String("command-id")}, 452 | }, nil 453 | }, 454 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 455 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 456 | return waitError 457 | }, 458 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 459 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 460 | failedInstanceIDs = append(failedInstanceIDs, aws.StringValue(input.InstanceId)) 461 | return &ssm.GetCommandInvocationOutput{}, nil 462 | }, 463 | } 464 | u := updater{ssm: mockSSM} 465 | commandID, err := u.sendCommand(tc.instances, "test-doc") 466 | require.Error(t, err) 467 | assert.ErrorIs(t, err, waitError) 468 | assert.Equal(t, "", commandID) 469 | assert.ElementsMatch(t, tc.instances, failedInstanceIDs, "should match instances for which wait fails") 470 | }) 471 | } 472 | } 473 | 474 | func TestSendCommandWaitSuccess(t *testing.T) { 475 | mockSendCommand := func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 476 | assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName)) 477 | return &ssm.SendCommandOutput{ 478 | Command: &ssm.Command{CommandId: aws.String("command-id")}, 479 | }, nil 480 | } 481 | t.Run("wait one success", func(t *testing.T) { 482 | // commandSuccessInstance indicates an instance for which the command should succeed 483 | const commandSuccessInstance = "inst-success" 484 | instances := []string{"inst-id-1", "inst-id-2", commandSuccessInstance} 485 | expectedFailInstances := []string{"inst-id-1", "inst-id-2"} 486 | failedInstanceIDs := []string{} 487 | mockSSM := MockSSM{ 488 | SendCommandFn: mockSendCommand, 489 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 490 | if aws.StringValue(input.InstanceId) == commandSuccessInstance { 491 | return nil 492 | } 493 | return errors.New("exceeded max attempts") 494 | }, 495 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 496 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 497 | failedInstanceIDs = append(failedInstanceIDs, aws.StringValue(input.InstanceId)) 498 | return &ssm.GetCommandInvocationOutput{}, nil 499 | }, 500 | } 501 | u := updater{ssm: mockSSM} 502 | commandID, err := u.sendCommand(instances, "test-doc") 503 | require.NoError(t, err) 504 | assert.Equal(t, "command-id", commandID) 505 | assert.ElementsMatch(t, expectedFailInstances, failedInstanceIDs, "should match instances for which wait fails") 506 | }) 507 | t.Run("wait all success", func(t *testing.T) { 508 | instances := []string{"inst-id-1", "inst-id-2"} 509 | // mutex needed to prevent race condition when appending to instances slice in concurrent 510 | // execution of WaitUntilCommandExecutedWithContextFn 511 | var m sync.Mutex 512 | waitInstanceIDs := []string{} 513 | mockSSM := MockSSM{ 514 | SendCommandFn: mockSendCommand, 515 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 516 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 517 | m.Lock() 518 | waitInstanceIDs = append(waitInstanceIDs, aws.StringValue(input.InstanceId)) 519 | m.Unlock() 520 | return nil 521 | }, 522 | } 523 | u := updater{ssm: mockSSM} 524 | commandID, err := u.sendCommand(instances, "test-doc") 525 | require.NoError(t, err) 526 | assert.Equal(t, "command-id", commandID) 527 | assert.ElementsMatch(t, instances, waitInstanceIDs, "should match instances for which wait succeeds") 528 | }) 529 | 530 | } 531 | 532 | func TestListContainerInstances(t *testing.T) { 533 | cases := []struct { 534 | name string 535 | listOutput *ecs.ListContainerInstancesOutput 536 | listOutput2 *ecs.ListContainerInstancesOutput 537 | listError error 538 | expectedError string 539 | expectedOut []*string 540 | }{ 541 | { 542 | name: "with instances", 543 | listOutput: &ecs.ListContainerInstancesOutput{ 544 | ContainerInstanceArns: []*string{ 545 | aws.String("cont-inst-arn1"), 546 | aws.String("cont-inst-arn2"), 547 | aws.String("cont-inst-arn3")}, 548 | NextToken: aws.String("token"), 549 | }, 550 | listOutput2: &ecs.ListContainerInstancesOutput{ 551 | ContainerInstanceArns: []*string{ 552 | aws.String("cont-inst-arn4"), 553 | aws.String("cont-inst-arn5"), 554 | aws.String("cont-inst-arn6")}, 555 | NextToken: nil, 556 | }, 557 | expectedOut: []*string{ 558 | aws.String("cont-inst-arn1"), 559 | aws.String("cont-inst-arn2"), 560 | aws.String("cont-inst-arn3"), 561 | aws.String("cont-inst-arn4"), 562 | aws.String("cont-inst-arn5"), 563 | aws.String("cont-inst-arn6")}, 564 | }, 565 | { 566 | name: "without instances", 567 | listOutput: &ecs.ListContainerInstancesOutput{ 568 | ContainerInstanceArns: []*string{}, 569 | }, 570 | listOutput2: &ecs.ListContainerInstancesOutput{ 571 | ContainerInstanceArns: []*string{}, 572 | }, 573 | expectedOut: []*string{}, 574 | }, 575 | { 576 | name: "list fail", 577 | listError: errors.New("failed to list instances"), 578 | listOutput: &ecs.ListContainerInstancesOutput{ 579 | ContainerInstanceArns: []*string{}, 580 | }, 581 | listOutput2: &ecs.ListContainerInstancesOutput{ 582 | ContainerInstanceArns: []*string{}, 583 | }, 584 | expectedError: "failed to list container instances", 585 | }, 586 | } 587 | 588 | for _, tc := range cases { 589 | t.Run(tc.name, func(t *testing.T) { 590 | mockECS := MockECS{ 591 | ListContainerInstancesPagesFn: func(input *ecs.ListContainerInstancesInput, fn func(*ecs.ListContainerInstancesOutput, bool) bool) error { 592 | assert.Equal(t, ecs.ContainerInstanceStatusActive, aws.StringValue(input.Status)) 593 | fn(tc.listOutput, true) 594 | fn(tc.listOutput2, false) 595 | return tc.listError 596 | }, 597 | } 598 | u := updater{ecs: mockECS} 599 | actual, err := u.listContainerInstances() 600 | if tc.expectedOut != nil { 601 | assert.EqualValues(t, tc.expectedOut, actual) 602 | assert.NoError(t, err) 603 | } else { 604 | assert.Empty(t, actual) 605 | assert.ErrorIs(t, err, tc.listError) 606 | assert.Contains(t, err.Error(), tc.expectedError) 607 | } 608 | }) 609 | } 610 | } 611 | 612 | func TestFilterBottlerocketInstances(t *testing.T) { 613 | output := &ecs.DescribeContainerInstancesOutput{ 614 | ContainerInstances: []*ecs.ContainerInstance{{ 615 | // Bottlerocket with single attribute 616 | Attributes: []*ecs.Attribute{{Name: aws.String("bottlerocket.variant")}}, 617 | ContainerInstanceArn: aws.String("cont-inst-br1"), 618 | Ec2InstanceId: aws.String("ec2-id-br1"), 619 | }, { 620 | // Bottlerocket with extra attribute 621 | Attributes: []*ecs.Attribute{ 622 | {Name: aws.String("different-attribute")}, 623 | {Name: aws.String("bottlerocket.variant")}, 624 | }, 625 | ContainerInstanceArn: aws.String("cont-inst-br2"), 626 | Ec2InstanceId: aws.String("ec2-id-br2"), 627 | }, { 628 | // Not Bottlerocket, single attribute 629 | Attributes: []*ecs.Attribute{ 630 | {Name: aws.String("different-attribute")}, 631 | }, 632 | ContainerInstanceArn: aws.String("cont-inst-not1"), 633 | Ec2InstanceId: aws.String("ec2-id-not1"), 634 | }, { 635 | // Not Bottlerocket, no attribute 636 | ContainerInstanceArn: aws.String("cont-inst-not2"), 637 | Ec2InstanceId: aws.String("ec2-id-not2"), 638 | }}, 639 | } 640 | expected := []instance{ 641 | { 642 | instanceID: "ec2-id-br1", 643 | containerInstanceID: "cont-inst-br1", 644 | }, 645 | { 646 | instanceID: "ec2-id-br2", 647 | containerInstanceID: "cont-inst-br2", 648 | }, 649 | } 650 | 651 | mockECS := MockECS{ 652 | DescribeContainerInstancesFn: func(_ *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) { 653 | return output, nil 654 | }, 655 | } 656 | u := updater{ecs: mockECS} 657 | 658 | actual, err := u.filterBottlerocketInstances([]*string{ 659 | aws.String("ec2-id-br1"), 660 | aws.String("ec2-id-br2"), 661 | aws.String("ec2-id-not1"), 662 | aws.String("ec2-id-not2"), 663 | }) 664 | require.NoError(t, err) 665 | assert.EqualValues(t, expected, actual) 666 | } 667 | 668 | func TestPaginatedFilterBottlerocketInstancesAllFail(t *testing.T) { 669 | instances := make([]*string, 0) 670 | for i := 0; i < 150; i++ { 671 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 672 | instances = append(instances, aws.String(ec2ID)) 673 | } 674 | 675 | responses := []struct { 676 | inputLen int 677 | ContainerInstances []*ecs.ContainerInstance 678 | err error 679 | }{{ 680 | 100, 681 | nil, 682 | errors.New("Failed to describe container instances"), 683 | }, { 684 | 50, 685 | nil, 686 | errors.New("Failed to describe container instances"), 687 | }} 688 | 689 | callCount := 0 690 | mockECS := MockECS{ 691 | DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) { 692 | require.Less(t, callCount, len(responses)) 693 | resp := responses[callCount] 694 | callCount++ 695 | assert.Equal(t, resp.inputLen, len(input.ContainerInstances)) 696 | return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err 697 | }, 698 | } 699 | 700 | u := updater{ecs: mockECS} 701 | actual, err := u.filterBottlerocketInstances(instances) 702 | require.Error(t, err) 703 | assert.Empty(t, actual) 704 | assert.Contains(t, err.Error(), "Failed to describe container instances") 705 | } 706 | 707 | func TestPaginatedFilterBottlerocketInstancesSingleFailure(t *testing.T) { 708 | descOut := make([]*ecs.ContainerInstance, 0) 709 | instances := make([]*string, 0) 710 | expected := make([]instance, 0) 711 | for i := 0; i < 150; i++ { 712 | instanceARN := "cont-inst-br" + strconv.Itoa(i) 713 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 714 | instances = append(instances, aws.String(ec2ID)) 715 | descOut = append(descOut, &ecs.ContainerInstance{ 716 | Attributes: []*ecs.Attribute{{Name: aws.String("bottlerocket.variant")}}, 717 | ContainerInstanceArn: aws.String(instanceARN), 718 | Ec2InstanceId: aws.String(ec2ID), 719 | }) 720 | expected = append(expected, instance{ 721 | instanceID: ec2ID, 722 | containerInstanceID: instanceARN, 723 | }) 724 | } 725 | 726 | responses := []struct { 727 | inputLen int 728 | ContainerInstances []*ecs.ContainerInstance 729 | err error 730 | }{{ 731 | 100, 732 | nil, 733 | errors.New("Failed to describe container instances"), 734 | }, { 735 | 50, 736 | descOut[100:], 737 | nil, 738 | }} 739 | 740 | callCount := 0 741 | mockECS := MockECS{ 742 | DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) { 743 | require.Less(t, callCount, len(responses)) 744 | resp := responses[callCount] 745 | callCount++ 746 | assert.Equal(t, resp.inputLen, len(input.ContainerInstances)) 747 | return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err 748 | }, 749 | } 750 | 751 | u := updater{ecs: mockECS} 752 | actual, err := u.filterBottlerocketInstances(instances) 753 | require.NoError(t, err) 754 | assert.EqualValues(t, expected[100:], actual, "should contain only the last 50 instnaces") 755 | } 756 | 757 | func TestPaginatedFilterBottlerocketInstancesNoBR(t *testing.T) { 758 | descOut := make([]*ecs.ContainerInstance, 0) 759 | instances := make([]*string, 0) 760 | for i := 0; i < 150; i++ { 761 | instanceARN := "cont-inst-br" + strconv.Itoa(i) 762 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 763 | instances = append(instances, aws.String(ec2ID)) 764 | descOut = append(descOut, &ecs.ContainerInstance{ 765 | Attributes: []*ecs.Attribute{{Name: aws.String("nottlerocket.variant")}}, 766 | ContainerInstanceArn: aws.String(instanceARN), 767 | Ec2InstanceId: aws.String(ec2ID), 768 | }) 769 | } 770 | 771 | responses := []struct { 772 | inputLen int 773 | ContainerInstances []*ecs.ContainerInstance 774 | err error 775 | }{{ 776 | 100, 777 | descOut[:100], 778 | nil, 779 | }, { 780 | 50, 781 | descOut[100:], 782 | nil, 783 | }} 784 | 785 | callCount := 0 786 | mockECS := MockECS{ 787 | DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) { 788 | require.Less(t, callCount, len(responses)) 789 | resp := responses[callCount] 790 | callCount++ 791 | assert.Equal(t, resp.inputLen, len(input.ContainerInstances)) 792 | return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err 793 | }, 794 | } 795 | 796 | u := updater{ecs: mockECS} 797 | actual, err := u.filterBottlerocketInstances(instances) 798 | require.NoError(t, err) 799 | assert.Empty(t, actual) 800 | } 801 | 802 | func TestPaginatedFilterBottlerocketInstancesAllBRInstances(t *testing.T) { 803 | descOut := make([]*ecs.ContainerInstance, 0) 804 | instances := make([]*string, 0) 805 | expected := make([]instance, 0) 806 | for i := 0; i < 150; i++ { 807 | instanceARN := "cont-inst-br" + strconv.Itoa(i) 808 | ec2ID := "ec2-id-br" + strconv.Itoa(i) 809 | instances = append(instances, aws.String(ec2ID)) 810 | descOut = append(descOut, &ecs.ContainerInstance{ 811 | Attributes: []*ecs.Attribute{{Name: aws.String("bottlerocket.variant")}}, 812 | ContainerInstanceArn: aws.String(instanceARN), 813 | Ec2InstanceId: aws.String(ec2ID), 814 | }) 815 | expected = append(expected, instance{ 816 | instanceID: ec2ID, 817 | containerInstanceID: instanceARN, 818 | }) 819 | } 820 | 821 | responses := []struct { 822 | inputLen int 823 | ContainerInstances []*ecs.ContainerInstance 824 | err error 825 | }{{ 826 | 100, 827 | descOut[:100], 828 | nil, 829 | }, { 830 | 50, 831 | descOut[100:], 832 | nil, 833 | }} 834 | 835 | callCount := 0 836 | mockECS := MockECS{ 837 | DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) { 838 | require.Less(t, callCount, len(responses)) 839 | resp := responses[callCount] 840 | callCount++ 841 | assert.Equal(t, resp.inputLen, len(input.ContainerInstances)) 842 | return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err 843 | }, 844 | } 845 | 846 | u := updater{ecs: mockECS} 847 | actual, err := u.filterBottlerocketInstances(instances) 848 | require.NoError(t, err) 849 | assert.EqualValues(t, expected, actual, "should contain all the instances") 850 | } 851 | 852 | func TestEligible(t *testing.T) { 853 | cases := []struct { 854 | name string 855 | listOut *ecs.ListTasksOutput 856 | describeOut *ecs.DescribeTasksOutput 857 | expectedOk bool 858 | }{ 859 | { 860 | name: "only service tasks", 861 | listOut: &ecs.ListTasksOutput{ 862 | TaskArns: []*string{ 863 | aws.String("task-arn-1"), 864 | }, 865 | }, 866 | describeOut: &ecs.DescribeTasksOutput{ 867 | Tasks: []*ecs.Task{ 868 | { 869 | // contains proper prefix "ecs-svc" for task started by service 870 | StartedBy: aws.String("ecs-svc/svc-id"), 871 | }, 872 | }, 873 | }, 874 | expectedOk: true, 875 | }, { 876 | name: "no task", 877 | listOut: &ecs.ListTasksOutput{ 878 | TaskArns: []*string{}, 879 | }, 880 | expectedOk: true, 881 | }, { 882 | name: "non service task", 883 | listOut: &ecs.ListTasksOutput{ 884 | TaskArns: []*string{ 885 | aws.String("task-arn-1"), 886 | }, 887 | }, 888 | describeOut: &ecs.DescribeTasksOutput{ 889 | Tasks: []*ecs.Task{{ 890 | // Does not contain prefix "ecs-svc" 891 | StartedBy: aws.String("standalone-task-id"), 892 | }}, 893 | }, 894 | expectedOk: false, 895 | }, { 896 | name: "non service task empty StartedBy", 897 | listOut: &ecs.ListTasksOutput{ 898 | TaskArns: []*string{ 899 | aws.String("task-arn-1"), 900 | }, 901 | }, 902 | describeOut: &ecs.DescribeTasksOutput{ 903 | Tasks: []*ecs.Task{{}}, 904 | }, 905 | expectedOk: false, 906 | }, { 907 | name: "service and non service tasks", 908 | listOut: &ecs.ListTasksOutput{ 909 | TaskArns: []*string{ 910 | aws.String("task-arn-1"), 911 | aws.String("task-arn-2"), 912 | }, 913 | }, 914 | describeOut: &ecs.DescribeTasksOutput{ 915 | Tasks: []*ecs.Task{{ 916 | // Does not contain prefix "ecs-svc" 917 | StartedBy: aws.String("standalone-task-id"), 918 | }, { 919 | // contains proper prefix "ecs-svc" for task started by service 920 | StartedBy: aws.String("ecs-svc/svc-id"), 921 | }}, 922 | }, 923 | expectedOk: false, 924 | }, 925 | } 926 | for _, tc := range cases { 927 | t.Run(tc.name, func(t *testing.T) { 928 | mockECS := MockECS{ 929 | ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 930 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 931 | assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance)) 932 | return tc.listOut, nil 933 | }, 934 | DescribeTasksFn: func(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) { 935 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 936 | assert.Equal(t, tc.listOut.TaskArns, input.Tasks) 937 | return tc.describeOut, nil 938 | }, 939 | } 940 | u := updater{ecs: mockECS, cluster: "test-cluster"} 941 | ok, err := u.eligible("cont-inst-id") 942 | require.NoError(t, err) 943 | assert.Equal(t, ok, tc.expectedOk) 944 | }) 945 | } 946 | } 947 | 948 | func TestEligibleErr(t *testing.T) { 949 | t.Run("list task err", func(t *testing.T) { 950 | listErr := errors.New("failed to list tasks") 951 | mockECS := MockECS{ 952 | ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 953 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 954 | assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance)) 955 | return nil, listErr 956 | }, 957 | } 958 | u := updater{ecs: mockECS, cluster: "test-cluster"} 959 | ok, err := u.eligible("cont-inst-id") 960 | require.Error(t, err) 961 | assert.ErrorIs(t, err, listErr) 962 | assert.False(t, ok) 963 | }) 964 | 965 | t.Run("describe task err", func(t *testing.T) { 966 | describeErr := errors.New("failed to describe tasks") 967 | mockECS := MockECS{ 968 | ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 969 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 970 | assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance)) 971 | return &ecs.ListTasksOutput{ 972 | TaskArns: []*string{ 973 | aws.String("task-arn-1"), 974 | }, 975 | }, nil 976 | }, 977 | DescribeTasksFn: func(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) { 978 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 979 | assert.Equal(t, []*string{ 980 | aws.String("task-arn-1"), 981 | }, input.Tasks) 982 | return nil, describeErr 983 | }, 984 | } 985 | u := updater{ecs: mockECS, cluster: "test-cluster"} 986 | ok, err := u.eligible("cont-inst-id") 987 | require.Error(t, err) 988 | assert.ErrorIs(t, err, describeErr) 989 | assert.False(t, ok) 990 | }) 991 | } 992 | 993 | func TestDrainInstance(t *testing.T) { 994 | stateChangeCalls := []string{} 995 | mockStateChange := func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) { 996 | stateChangeCalls = append(stateChangeCalls, aws.StringValue(input.Status)) 997 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 998 | assert.Equal(t, []*string{aws.String("cont-inst-id")}, input.ContainerInstances) 999 | return &ecs.UpdateContainerInstancesStateOutput{ 1000 | Failures: []*ecs.Failure{}, 1001 | }, nil 1002 | } 1003 | mockListTasks := func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 1004 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1005 | assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance)) 1006 | return &ecs.ListTasksOutput{ 1007 | TaskArns: []*string{ 1008 | aws.String("task-arn-1"), 1009 | }, 1010 | }, nil 1011 | } 1012 | cleanup := func() { 1013 | stateChangeCalls = []string{} 1014 | } 1015 | 1016 | t.Run("no tasks success", func(t *testing.T) { 1017 | defer cleanup() 1018 | listTaskCount := 0 1019 | mockECS := MockECS{ 1020 | UpdateContainerInstancesStateFn: mockStateChange, 1021 | ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 1022 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1023 | assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance)) 1024 | listTaskCount++ 1025 | return &ecs.ListTasksOutput{ 1026 | TaskArns: []*string{}, 1027 | }, nil 1028 | }, 1029 | } 1030 | u := updater{ecs: mockECS, cluster: "test-cluster"} 1031 | err := u.drainInstance("cont-inst-id") 1032 | require.NoError(t, err) 1033 | assert.Equal(t, 1, listTaskCount) 1034 | assert.Equal(t, []string{"DRAINING"}, stateChangeCalls) 1035 | }) 1036 | 1037 | t.Run("with tasks success", func(t *testing.T) { 1038 | defer cleanup() 1039 | waitCount := 0 1040 | mockECS := MockECS{ 1041 | UpdateContainerInstancesStateFn: mockStateChange, 1042 | ListTasksFn: mockListTasks, 1043 | WaitUntilTasksStoppedWithContextFn: func(_ aws.Context, input *ecs.DescribeTasksInput, _ ...request.WaiterOption) error { 1044 | assert.Equal(t, []*string{ 1045 | aws.String("task-arn-1"), 1046 | }, input.Tasks) 1047 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1048 | waitCount++ 1049 | return nil 1050 | }, 1051 | } 1052 | u := updater{ecs: mockECS, cluster: "test-cluster"} 1053 | err := u.drainInstance("cont-inst-id") 1054 | require.NoError(t, err) 1055 | assert.Equal(t, []string{"DRAINING"}, stateChangeCalls) 1056 | assert.Equal(t, 1, waitCount) 1057 | }) 1058 | 1059 | t.Run("state change err", func(t *testing.T) { 1060 | defer cleanup() 1061 | stateOutErr := errors.New("failed to change state") 1062 | mockECS := MockECS{ 1063 | UpdateContainerInstancesStateFn: func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) { 1064 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1065 | assert.Equal(t, []*string{aws.String("cont-inst-id")}, input.ContainerInstances) 1066 | return nil, stateOutErr 1067 | }, 1068 | } 1069 | u := updater{ecs: mockECS, cluster: "test-cluster"} 1070 | err := u.drainInstance("cont-inst-id") 1071 | require.Error(t, err) 1072 | assert.ErrorIs(t, err, stateOutErr) 1073 | }) 1074 | 1075 | t.Run("state change api err", func(t *testing.T) { 1076 | defer cleanup() 1077 | stateOutAPIFailure := &ecs.UpdateContainerInstancesStateOutput{ 1078 | Failures: []*ecs.Failure{ 1079 | { 1080 | Reason: aws.String("failed"), 1081 | }, 1082 | }, 1083 | } 1084 | mockECS := MockECS{ 1085 | UpdateContainerInstancesStateFn: func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) { 1086 | stateChangeCalls = append(stateChangeCalls, aws.StringValue(input.Status)) 1087 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1088 | assert.Equal(t, []*string{aws.String("cont-inst-id")}, input.ContainerInstances) 1089 | return stateOutAPIFailure, nil 1090 | }, 1091 | } 1092 | u := updater{ecs: mockECS, cluster: "test-cluster"} 1093 | err := u.drainInstance("cont-inst-id") 1094 | require.Error(t, err) 1095 | assert.Contains(t, err.Error(), fmt.Sprintf("%v", stateOutAPIFailure.Failures)) 1096 | assert.Equal(t, []string{"DRAINING", "ACTIVE"}, stateChangeCalls) 1097 | }) 1098 | 1099 | t.Run("list task err", func(t *testing.T) { 1100 | defer cleanup() 1101 | listTaskErr := errors.New("failed to list tasks") 1102 | mockECS := MockECS{ 1103 | UpdateContainerInstancesStateFn: mockStateChange, 1104 | ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 1105 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1106 | assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance)) 1107 | return nil, listTaskErr 1108 | }, 1109 | } 1110 | u := updater{ecs: mockECS, cluster: "test-cluster"} 1111 | err := u.drainInstance("cont-inst-id") 1112 | require.Error(t, err) 1113 | assert.ErrorIs(t, err, listTaskErr) 1114 | assert.Equal(t, []string{"DRAINING", "ACTIVE"}, stateChangeCalls) 1115 | }) 1116 | 1117 | t.Run("wait tasks stop err", func(t *testing.T) { 1118 | defer cleanup() 1119 | waitTaskErr := errors.New("failed to wait for tasks to stop") 1120 | mockECS := MockECS{ 1121 | UpdateContainerInstancesStateFn: mockStateChange, 1122 | ListTasksFn: mockListTasks, 1123 | WaitUntilTasksStoppedWithContextFn: func(_ aws.Context, input *ecs.DescribeTasksInput, _ ...request.WaiterOption) error { 1124 | assert.Equal(t, []*string{ 1125 | aws.String("task-arn-1"), 1126 | }, input.Tasks) 1127 | assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster)) 1128 | return waitTaskErr 1129 | }, 1130 | } 1131 | u := updater{ecs: mockECS, cluster: "test-cluster"} 1132 | err := u.drainInstance("cont-inst-id") 1133 | require.Error(t, err) 1134 | assert.ErrorIs(t, err, waitTaskErr) 1135 | assert.Equal(t, []string{"DRAINING", "ACTIVE"}, stateChangeCalls) 1136 | }) 1137 | } 1138 | 1139 | func TestUpdateInstance(t *testing.T) { 1140 | checkPattern := "{\"update_state\": \"%s\", \"active_partition\": { \"image\": { \"version\": \"0.0.0\"}}}" 1141 | cases := []struct { 1142 | name string 1143 | invocationOut *ssm.GetCommandInvocationOutput 1144 | expectedSSMCommandCallOrder []string 1145 | expectedErr string 1146 | }{ 1147 | { 1148 | name: "update state available", 1149 | invocationOut: &ssm.GetCommandInvocationOutput{ 1150 | Status: aws.String("Success"), 1151 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable)), 1152 | }, 1153 | expectedSSMCommandCallOrder: []string{"check-document", "apply-document", "reboot-document"}, 1154 | }, { 1155 | name: "update state ready", 1156 | invocationOut: &ssm.GetCommandInvocationOutput{ 1157 | Status: aws.String("Success"), 1158 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateReady)), 1159 | }, 1160 | expectedSSMCommandCallOrder: []string{"check-document", "reboot-document"}, 1161 | }, { 1162 | name: "update state idle", 1163 | invocationOut: &ssm.GetCommandInvocationOutput{ 1164 | Status: aws.String("Success"), 1165 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateIdle)), 1166 | }, 1167 | expectedSSMCommandCallOrder: []string{"check-document"}, 1168 | }, { 1169 | name: "update state staged", 1170 | invocationOut: &ssm.GetCommandInvocationOutput{ 1171 | Status: aws.String("Success"), 1172 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateStaged)), 1173 | }, 1174 | expectedSSMCommandCallOrder: []string{"check-document"}, 1175 | expectedErr: "unexpected update state \"Staged\"; skipping instance", 1176 | }, 1177 | } 1178 | for _, tc := range cases { 1179 | t.Run(tc.name, func(t *testing.T) { 1180 | ssmCommandCallOrder := []string{} 1181 | mockSSM := MockSSM{ 1182 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1183 | ssmCommandCallOrder = append(ssmCommandCallOrder, aws.StringValue(input.DocumentName)) 1184 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1185 | return &ssm.SendCommandOutput{ 1186 | Command: &ssm.Command{ 1187 | CommandId: aws.String("command-id"), 1188 | }, 1189 | }, nil 1190 | }, 1191 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1192 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1193 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1194 | return tc.invocationOut, nil 1195 | }, 1196 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 1197 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1198 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1199 | return nil 1200 | }, 1201 | } 1202 | mockEC2 := MockEC2{ 1203 | WaitUntilInstanceStatusOkFn: func(input *ec2.DescribeInstanceStatusInput) error { 1204 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1205 | return nil 1206 | }, 1207 | } 1208 | u := updater{ssm: mockSSM, ec2: mockEC2, checkDocument: "check-document", applyDocument: "apply-document", rebootDocument: "reboot-document"} 1209 | err := u.updateInstance(instance{ 1210 | instanceID: "instance-id", 1211 | containerInstanceID: "cont-inst-id", 1212 | bottlerocketVersion: "v0.1.0", 1213 | }) 1214 | if tc.expectedErr != "" { 1215 | require.Error(t, err) 1216 | assert.Contains(t, err.Error(), tc.expectedErr) 1217 | } else { 1218 | require.NoError(t, err) 1219 | } 1220 | assert.Equal(t, tc.expectedSSMCommandCallOrder, ssmCommandCallOrder) 1221 | }) 1222 | } 1223 | } 1224 | 1225 | func TestUpdateInstanceErr(t *testing.T) { 1226 | commandOutput := &ssm.SendCommandOutput{ 1227 | Command: &ssm.Command{ 1228 | CommandId: aws.String("command-id"), 1229 | }, 1230 | } 1231 | mockSendCommand := func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1232 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1233 | return commandOutput, nil 1234 | } 1235 | mockGetCommandInvocation := func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1236 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1237 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1238 | return &ssm.GetCommandInvocationOutput{ 1239 | Status: aws.String("Success"), 1240 | StandardOutputContent: aws.String("{\"update_state\": \"Available\", \"active_partition\": { \"image\": { \"version\": \"0.0.0\"}}}"), 1241 | }, nil 1242 | } 1243 | mockWaitCommandExecution := func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 1244 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1245 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1246 | return nil 1247 | } 1248 | 1249 | t.Run("check err", func(t *testing.T) { 1250 | checkErr := errors.New("failed to send check command") 1251 | mockSSM := MockSSM{ 1252 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1253 | assert.Equal(t, "check-document", aws.StringValue(input.DocumentName)) 1254 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1255 | return nil, checkErr 1256 | }, 1257 | } 1258 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1259 | err := u.updateInstance(instance{ 1260 | instanceID: "instance-id", 1261 | containerInstanceID: "cont-inst-id", 1262 | }) 1263 | require.Error(t, err) 1264 | assert.ErrorIs(t, err, checkErr) 1265 | }) 1266 | t.Run("apply err", func(t *testing.T) { 1267 | applyErr := errors.New("failed to send apply command") 1268 | mockSSM := MockSSM{ 1269 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1270 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1271 | if aws.StringValue(input.DocumentName) == "apply-document" { 1272 | return nil, applyErr 1273 | } 1274 | return commandOutput, nil 1275 | }, 1276 | GetCommandInvocationFn: mockGetCommandInvocation, 1277 | WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution, 1278 | } 1279 | u := updater{ssm: mockSSM, checkDocument: "check-document", applyDocument: "apply-document"} 1280 | err := u.updateInstance(instance{ 1281 | instanceID: "instance-id", 1282 | containerInstanceID: "cont-inst-id", 1283 | }) 1284 | require.Error(t, err) 1285 | assert.ErrorIs(t, err, applyErr) 1286 | }) 1287 | t.Run("reboot err", func(t *testing.T) { 1288 | rebootErr := errors.New("failed to send reboot command") 1289 | mockSSM := MockSSM{ 1290 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1291 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1292 | if aws.StringValue(input.DocumentName) == "reboot-document" { 1293 | return nil, rebootErr 1294 | } 1295 | return commandOutput, nil 1296 | }, 1297 | GetCommandInvocationFn: mockGetCommandInvocation, 1298 | WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution, 1299 | } 1300 | u := updater{ssm: mockSSM, checkDocument: "check-document", applyDocument: "apply-document", rebootDocument: "reboot-document"} 1301 | err := u.updateInstance(instance{ 1302 | instanceID: "instance-id", 1303 | containerInstanceID: "cont-inst-id", 1304 | }) 1305 | require.Error(t, err) 1306 | assert.ErrorIs(t, err, rebootErr) 1307 | }) 1308 | t.Run("invocation err", func(t *testing.T) { 1309 | ssmGetInvocationErr := errors.New("failed to get command invocation") 1310 | mockSSM := MockSSM{ 1311 | SendCommandFn: mockSendCommand, 1312 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1313 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1314 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1315 | return nil, ssmGetInvocationErr 1316 | }, 1317 | WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution, 1318 | } 1319 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1320 | err := u.updateInstance(instance{ 1321 | instanceID: "instance-id", 1322 | containerInstanceID: "cont-inst-id", 1323 | }) 1324 | require.Error(t, err) 1325 | assert.ErrorIs(t, err, ssmGetInvocationErr) 1326 | }) 1327 | t.Run("wait ssm err", func(t *testing.T) { 1328 | waitExecErr := errors.New("failed to wait ssm execution complete") 1329 | mockSSM := MockSSM{ 1330 | SendCommandFn: mockSendCommand, 1331 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 1332 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1333 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1334 | return waitExecErr 1335 | }, 1336 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1337 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1338 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1339 | return &ssm.GetCommandInvocationOutput{}, nil 1340 | }, 1341 | } 1342 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1343 | err := u.updateInstance(instance{ 1344 | instanceID: "instance-id", 1345 | containerInstanceID: "cont-inst-id", 1346 | }) 1347 | require.Error(t, err) 1348 | assert.ErrorIs(t, err, waitExecErr) 1349 | }) 1350 | t.Run("wait instance ok err", func(t *testing.T) { 1351 | waitErr := errors.New("failed to wait instance ok") 1352 | mockSSM := MockSSM{ 1353 | SendCommandFn: mockSendCommand, 1354 | GetCommandInvocationFn: mockGetCommandInvocation, 1355 | WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution, 1356 | } 1357 | 1358 | mockEC2 := MockEC2{ 1359 | WaitUntilInstanceStatusOkFn: func(input *ec2.DescribeInstanceStatusInput) error { 1360 | assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds) 1361 | return waitErr 1362 | }, 1363 | } 1364 | u := updater{ssm: mockSSM, ec2: mockEC2, checkDocument: "check-document", applyDocument: "apply-document", rebootDocument: "reboot-document"} 1365 | err := u.updateInstance(instance{ 1366 | instanceID: "instance-id", 1367 | containerInstanceID: "cont-inst-id", 1368 | }) 1369 | require.Error(t, err) 1370 | assert.ErrorIs(t, err, waitErr) 1371 | }) 1372 | } 1373 | 1374 | func TestVerifyUpdate(t *testing.T) { 1375 | checkPattern := "{\"update_state\": \"%s\", \"active_partition\": { \"image\": { \"version\": \"%s\"}}}" 1376 | cases := []struct { 1377 | name string 1378 | invocationOut *ssm.GetCommandInvocationOutput 1379 | expectedOk bool 1380 | }{ 1381 | { 1382 | name: "verify success", 1383 | invocationOut: &ssm.GetCommandInvocationOutput{ 1384 | Status: aws.String("Success"), 1385 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateIdle, "0.0.1")), 1386 | }, 1387 | expectedOk: true, 1388 | }, 1389 | { 1390 | name: "version is same", 1391 | invocationOut: &ssm.GetCommandInvocationOutput{ 1392 | Status: aws.String("Success"), 1393 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateIdle, "0.0.0")), 1394 | }, 1395 | expectedOk: false, 1396 | }, 1397 | { 1398 | name: "another version is available", 1399 | invocationOut: &ssm.GetCommandInvocationOutput{ 1400 | Status: aws.String("Success"), 1401 | StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "0.0.1")), 1402 | }, 1403 | expectedOk: true, 1404 | }, 1405 | } 1406 | 1407 | for _, tc := range cases { 1408 | t.Run(tc.name, func(t *testing.T) { 1409 | mockSSM := MockSSM{ 1410 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1411 | assert.Equal(t, "check-document", aws.StringValue(input.DocumentName)) 1412 | return &ssm.SendCommandOutput{ 1413 | Command: &ssm.Command{ 1414 | CommandId: aws.String("command-id"), 1415 | }, 1416 | }, nil 1417 | }, 1418 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1419 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1420 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1421 | return tc.invocationOut, nil 1422 | }, 1423 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 1424 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1425 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1426 | return nil 1427 | }, 1428 | } 1429 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1430 | ok, err := u.verifyUpdate(instance{ 1431 | instanceID: "instance-id", 1432 | containerInstanceID: "cont-inst-id", 1433 | bottlerocketVersion: "0.0.0", 1434 | }) 1435 | require.NoError(t, err) 1436 | assert.Equal(t, tc.expectedOk, ok) 1437 | }) 1438 | } 1439 | } 1440 | 1441 | func TestVerifyUpdateErr(t *testing.T) { 1442 | mockSSMCommandOut := func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1443 | assert.Equal(t, "check-document", aws.StringValue(input.DocumentName)) 1444 | assert.Equal(t, 1, len(input.InstanceIds)) 1445 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceIds[0])) 1446 | return &ssm.SendCommandOutput{ 1447 | Command: &ssm.Command{ 1448 | CommandId: aws.String("command-id"), 1449 | }, 1450 | }, nil 1451 | } 1452 | mockWaitCommandExecution := func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 1453 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1454 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1455 | return nil 1456 | } 1457 | mockGetCommandInvocation := func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1458 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1459 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1460 | return &ssm.GetCommandInvocationOutput{ 1461 | Status: aws.String("Success"), 1462 | }, nil 1463 | } 1464 | t.Run("check err", func(t *testing.T) { 1465 | ssmCheckErr := errors.New("failed to send check command") 1466 | mockSSM := MockSSM{ 1467 | SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) { 1468 | assert.Equal(t, "check-document", aws.StringValue(input.DocumentName)) 1469 | assert.Equal(t, 1, len(input.InstanceIds)) 1470 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceIds[0])) 1471 | return nil, ssmCheckErr 1472 | }, 1473 | } 1474 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1475 | ok, err := u.verifyUpdate(instance{ 1476 | instanceID: "instance-id", 1477 | containerInstanceID: "cont-inst-id", 1478 | bottlerocketVersion: "0.0.0", 1479 | }) 1480 | require.Error(t, err) 1481 | assert.ErrorIs(t, err, ssmCheckErr) 1482 | assert.False(t, ok) 1483 | }) 1484 | t.Run("wait ssm err", func(t *testing.T) { 1485 | waitExecErr := errors.New("failed to wait ssm execution complete") 1486 | mockSSM := MockSSM{ 1487 | SendCommandFn: mockSSMCommandOut, 1488 | WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error { 1489 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1490 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1491 | return waitExecErr 1492 | }, 1493 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1494 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1495 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1496 | return &ssm.GetCommandInvocationOutput{}, nil 1497 | }, 1498 | } 1499 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1500 | ok, err := u.verifyUpdate(instance{ 1501 | instanceID: "instance-id", 1502 | containerInstanceID: "cont-inst-id", 1503 | bottlerocketVersion: "0.0.0", 1504 | }) 1505 | require.Error(t, err) 1506 | assert.ErrorIs(t, err, waitExecErr) 1507 | assert.False(t, ok) 1508 | }) 1509 | t.Run("invocation err", func(t *testing.T) { 1510 | ssmGetInvocationErr := errors.New("failed to get command invocation") 1511 | mockSSM := MockSSM{ 1512 | SendCommandFn: mockSSMCommandOut, 1513 | WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution, 1514 | GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) { 1515 | assert.Equal(t, "command-id", aws.StringValue(input.CommandId)) 1516 | assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId)) 1517 | return nil, ssmGetInvocationErr 1518 | }, 1519 | } 1520 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1521 | ok, err := u.verifyUpdate(instance{ 1522 | instanceID: "instance-id", 1523 | containerInstanceID: "cont-inst-id", 1524 | bottlerocketVersion: "0.0.0", 1525 | }) 1526 | require.Error(t, err) 1527 | assert.ErrorIs(t, err, ssmGetInvocationErr) 1528 | assert.False(t, ok) 1529 | }) 1530 | 1531 | t.Run("parse output err", func(t *testing.T) { 1532 | mockSSM := MockSSM{ 1533 | SendCommandFn: mockSSMCommandOut, 1534 | WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution, 1535 | GetCommandInvocationFn: mockGetCommandInvocation, 1536 | } 1537 | u := updater{ssm: mockSSM, checkDocument: "check-document"} 1538 | ok, err := u.verifyUpdate(instance{ 1539 | instanceID: "instance-id", 1540 | containerInstanceID: "cont-inst-id", 1541 | bottlerocketVersion: "0.0.0", 1542 | }) 1543 | require.Error(t, err) 1544 | assert.Contains(t, err.Error(), `failed to parse command output "", manual verification required`) 1545 | assert.False(t, ok) 1546 | }) 1547 | } 1548 | 1549 | func TestActivateInstance(t *testing.T) { 1550 | cases := []struct { 1551 | name string 1552 | stateOut *ecs.UpdateContainerInstancesStateOutput 1553 | stateErr error 1554 | expectedErr string 1555 | }{ 1556 | { 1557 | name: "activate success", 1558 | stateOut: &ecs.UpdateContainerInstancesStateOutput{}, 1559 | }, { 1560 | name: "activate api fail", 1561 | stateOut: &ecs.UpdateContainerInstancesStateOutput{ 1562 | Failures: []*ecs.Failure{ 1563 | { 1564 | Reason: aws.String("OTHER"), 1565 | }, 1566 | }, 1567 | }, 1568 | expectedErr: "API failures while activating: [{\n Reason: \"OTHER\"\n}]", 1569 | }, 1570 | { 1571 | name: "activate api fail inactive", 1572 | stateOut: &ecs.UpdateContainerInstancesStateOutput{ 1573 | Failures: []*ecs.Failure{ 1574 | { 1575 | Reason: aws.String("INACTIVE"), 1576 | }, 1577 | }, 1578 | }, 1579 | }, 1580 | { 1581 | name: "activate failure", 1582 | stateErr: errors.New("failed to activate"), 1583 | expectedErr: "failed to activate", 1584 | }, 1585 | } 1586 | for _, tc := range cases { 1587 | t.Run(tc.name, func(t *testing.T) { 1588 | mockECS := MockECS{ 1589 | UpdateContainerInstancesStateFn: func(_ *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) { 1590 | return tc.stateOut, tc.stateErr 1591 | }, 1592 | } 1593 | u := updater{ecs: mockECS} 1594 | err := u.activateInstance("cont-inst-id") 1595 | if tc.expectedErr == "" { 1596 | require.NoError(t, err) 1597 | } else { 1598 | require.Error(t, err) 1599 | assert.Contains(t, err.Error(), tc.expectedErr) 1600 | } 1601 | }) 1602 | } 1603 | } 1604 | 1605 | func TestAlreadyRunning(t *testing.T) { 1606 | cases := []struct { 1607 | name string 1608 | listOut *ecs.ListTasksOutput 1609 | listErr error 1610 | expectedOk bool 1611 | expectedErr string 1612 | }{ 1613 | { 1614 | name: "success", 1615 | listOut: &ecs.ListTasksOutput{ 1616 | TaskArns: []*string{ 1617 | aws.String("task-arn-1"), 1618 | aws.String("task-arn-2"), 1619 | }, 1620 | }, 1621 | expectedOk: true, 1622 | }, 1623 | { 1624 | name: "only one task", 1625 | listOut: &ecs.ListTasksOutput{ 1626 | TaskArns: []*string{ 1627 | aws.String("tarsk-arn-1"), 1628 | }, 1629 | }, 1630 | expectedOk: false, 1631 | }, 1632 | { 1633 | name: "fail list task", 1634 | listErr: errors.New("failed to list task"), 1635 | expectedOk: false, 1636 | expectedErr: "failed to list task", 1637 | }, 1638 | } 1639 | for _, tc := range cases { 1640 | t.Run(tc.name, func(t *testing.T) { 1641 | mockECS := MockECS{ 1642 | ListTasksFn: func(_ *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) { 1643 | return tc.listOut, tc.listErr 1644 | }, 1645 | } 1646 | u := updater{ecs: mockECS, cluster: "ecs-cluster"} 1647 | ok, err := u.alreadyRunning("updater-family") 1648 | if tc.expectedErr == "" { 1649 | require.NoError(t, err) 1650 | } else { 1651 | require.Error(t, err) 1652 | assert.Contains(t, err.Error(), tc.expectedErr) 1653 | } 1654 | assert.Equal(t, tc.expectedOk, ok) 1655 | }) 1656 | } 1657 | } 1658 | --------------------------------------------------------------------------------