├── clarify.toml
├── .gitignore
├── NOTICE
├── updater
    ├── .dockerignore
    ├── go.mod
    ├── main_test.go
    ├── go.sum
    ├── mock_test.go
    ├── main.go
    ├── aws.go
    └── aws_test.go
├── .golangci.yaml
├── CODE_OF_CONDUCT.md
├── .github
    ├── ISSUE_TEMPLATE
    │   └── feature.md
    ├── workflows
    │   ├── cfn-lint.yml
    │   └── ci.yml
    ├── dependabot.yaml
    └── pull_request_template.md
├── COPYRIGHT
├── integ
    ├── common.sh
    ├── README.md
    ├── stacks
    │   ├── cluster.yaml
    │   └── integ-shared.yaml
    ├── setup.sh
    ├── run-updater.sh
    └── cleanup.sh
├── LICENSE-MIT
├── Dockerfile
├── Dockerfile.licenses
├── CHANGELOG.md
├── Makefile
├── CONTRIBUTING.md
├── LICENSE-APACHE
├── stacks
    └── bottlerocket-ecs-updater.yaml
└── README.md


/clarify.toml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.env
2 | /.idea
3 | /updater/bin
4 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/updater/.dockerignore:
--------------------------------------------------------------------------------
1 | *.tar*
2 | bin/
3 | vendor/
4 | bottlerocket-ecs-updater
5 | 


--------------------------------------------------------------------------------
/.golangci.yaml:
--------------------------------------------------------------------------------
 1 | linters:
 2 |   enable:
 3 |     - staticcheck
 4 |     - unconvert
 5 |     - goimports
 6 |     - revive
 7 |     - ineffassign
 8 |     - vet
 9 |     - unused
10 |     - misspell
11 |   disable:
12 |     - errcheck
13 | 
14 | run:
15 |   timeout: 3m
16 | 
17 | issues:
18 |   exclude-dirs:
19 |     - stacks
20 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Request a change to to the project
 4 | ---
 5 | 
 6 | <!--
 7 | Tips:
 8 | - Please search for similar requests, including closed issues.
 9 | - Please include details about the environment you're running in.
10 | -->
11 | 
12 | **What I'd like:**
13 | 
14 | 
15 | 
16 | **Any alternatives you've considered:**
17 | 
18 | 


--------------------------------------------------------------------------------
/.github/workflows/cfn-lint.yml:
--------------------------------------------------------------------------------
 1 | name: cfn-lint
 2 | on:
 3 |   push:
 4 |     paths:
 5 |       - 'stacks/*'
 6 |       - 'integ/stacks/*'
 7 |   pull_request:
 8 |     paths:
 9 |       - 'stacks/*'
10 |       - 'integ/stacks/*'
11 | jobs:
12 |   cfn-lint:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
16 |       - run: pip install cfn-lint
17 |       - run: make cfn-lint
18 | 


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | 
 4 |   # Maintain dependencies for GitHub Actions
 5 |   - package-ecosystem: "github-actions"
 6 |     directory: "/"
 7 |     schedule:
 8 |       interval: "weekly"
 9 |     labels:
10 |       - "area/dependencies"
11 |   - package-ecosystem: "gomod"
12 |     directory: "/"
13 |     schedule:
14 |       interval: "daily"
15 |     labels:
16 |       - "area/dependencies"
17 |     open-pull-requests-limit: 0
18 | 


--------------------------------------------------------------------------------
/updater/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/bottlerocket-os/bottlerocket-ecs-updater
 2 | 
 3 | go 1.19
 4 | 
 5 | require (
 6 | 	github.com/aws/aws-sdk-go v1.51.20
 7 | 	github.com/stretchr/testify v1.8.1
 8 | )
 9 | 
10 | replace golang.org/x/net => golang.org/x/net v0.8.0
11 | 
12 | require (
13 | 	github.com/davecgh/go-spew v1.1.1 // indirect
14 | 	github.com/jmespath/go-jmespath v0.4.0 // indirect
15 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
16 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
17 | )
18 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Tips:
 3 | - Please read CONTRIBUTING.md to understand our process and our requests for PRs.
 4 | - Please file an issue before creating a PR so we can discuss the change and confirm it's not already being worked on.
 5 | -->
 6 | 
 7 | **Issue number:**
 8 | 
 9 | 
10 | 
11 | **Description of changes:**
12 | 
13 | 
14 | 
15 | **Testing done:**
16 | 
17 | 
18 | 
19 | **Terms of contribution:**
20 | 
21 | By submitting this pull request, I agree that this contribution is dual-licensed under the terms of both the Apache License, version 2.0, and the MIT license.
22 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc., its affiliates, or other contributors. All Rights Reserved.
 2 | 
 3 | Except as otherwise noted (below and/or in individual files), Bottlerocket is dual-licensed under
 4 | the Apache License, version 2.0 <LICENSE-APACHE> or the MIT license <LICENSE-MIT>, at your option.
 5 | 
 6 | Copyrights in Bottlerocket are retained by their contributors. No copyright assignment is required
 7 | to contribute to Bottlerocket. Contributions to Bottlerocket are explicitly made under both the
 8 | Apache License, version 2.0, and the MIT license. For full authorship information, see the version
 9 | control history.
10 | 
11 | Bottlerocket operating system images include packages written by third parties, which may carry
12 | their own copyright notices and license terms. These are available in /usr/share/licenses on the
13 | operating system images.
14 | 


--------------------------------------------------------------------------------
/integ/common.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Cloudformation stack template file name to set up VPC, security group, IAM roles, and log group
 4 | INTEG_STACK_TEMPLATE="integ-shared.yaml"
 5 | 
 6 | # Cloudformation stack template file name to set up an ECS cluster
 7 | CLUSTER_STACK_TEMPLATE="cluster.yaml"
 8 | 
 9 | # The stack name for deploying `integ-shared.yaml` template
10 | INTEG_STACK_NAME="ecs-updater-integ-shared"
11 | 
12 | # Prefix for ECS Updater stack name, resulting stack name will be below prefix + cluster name
13 | UPDATER_STACK_PREFIX="UPDATER-"
14 | 
15 | log() {
16 |     local lvl="$1"
17 |     shift
18 |     local msg="$*"
19 |     echo "${lvl}: ${msg}" >&2
20 | }
21 | 
22 | required_arg() {
23 |     local arg="${1:?}"
24 |     local value="${2}"
25 |     if [ -z "${value}" ]; then
26 |         echo "ERROR: ${arg} is required" >&2
27 |         exit 2
28 |     fi
29 | }
30 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 | Copyright Amazon.com, Inc., its affiliates, or other contributors. All Rights Reserved.
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including  without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to  the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN  NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   - push
 4 |   - pull_request
 5 | jobs:
 6 |   golangci:
 7 |     name: lint
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
11 |         with:
12 |           go-version: 1.19
13 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
14 |       - name: golangci-lint
15 |         uses: golangci/golangci-lint-action@55c2c1448f86e01eaae002a5a3a9624417608d84
16 |         with:
17 |           version: latest
18 |           working-directory: updater
19 |   build:
20 |     name: build
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
24 |         with:
25 |           go-version: 1.19
26 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
27 |       - run: make
28 |   test:
29 |     name: unit tests
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
33 |         with:
34 |           go-version: 1.19
35 |       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
36 |       - run: make test
37 | 


--------------------------------------------------------------------------------
/updater/main_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"github.com/stretchr/testify/require"
 9 | )
10 | 
11 | func TestTaskDefFamily(t *testing.T) {
12 | 	cases := []struct {
13 | 		name           string
14 | 		taskDefARN     string
15 | 		expectedErr    string
16 | 		expectedFamily string
17 | 	}{
18 | 		{
19 | 			name:           "success",
20 | 			taskDefARN:     "arn:aws:ecs:us-west-2:1234567:task-definition/updater-family:1",
21 | 			expectedFamily: "updater-family",
22 | 		},
23 | 		{
24 | 			name:           "fail parse arn",
25 | 			taskDefARN:     "arn:ecs:us-west-2:1234567updater-family:1",
26 | 			expectedFamily: "",
27 | 			expectedErr:    "arn: not enough sections",
28 | 		},
29 | 		{
30 | 			name:           "fail empty arn",
31 | 			taskDefARN:     "",
32 | 			expectedFamily: "",
33 | 			expectedErr:    "arn: invalid prefix",
34 | 		},
35 | 		{
36 | 			name:           "fail extract family",
37 | 			taskDefARN:     "arn:aws:ecs:us-west-2:1234567:task-def/updater-family1",
38 | 			expectedFamily: "",
39 | 			expectedErr:    "not a task definition arn:",
40 | 		},
41 | 	}
42 | 	for _, tc := range cases {
43 | 		t.Run(tc.name, func(t *testing.T) {
44 | 			originalValue := os.Getenv(taskDefARNEnv)
45 | 			defer func() { os.Setenv(taskDefARNEnv, originalValue) }()
46 | 			os.Setenv(taskDefARNEnv, tc.taskDefARN)
47 | 			family, err := taskDefFamily()
48 | 			if tc.expectedErr == "" {
49 | 				require.NoError(t, err)
50 | 			} else {
51 | 				require.Error(t, err)
52 | 				assert.Contains(t, err.Error(), tc.expectedErr)
53 | 			}
54 | 			assert.Equal(t, tc.expectedFamily, family)
55 | 		})
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax=docker/dockerfile:1.1.3-experimental
 2 | ARG BUILDER_IMAGE
 3 | # LICENSES_IMAGE is a container image that contains license files for the source
 4 | # and its dependencies. When building with `make container`, the licenses
 5 | # container image is built and provided as LICENSE_IMAGE.
 6 | ARG LICENSES_IMAGE=scratch
 7 | 
 8 | # build the updater image
 9 | FROM ${BUILDER_IMAGE} as builder
10 | USER builder
11 | WORKDIR /wrkdir
12 | ENV GOPROXY=direct
13 | # Sets the target architecture for the binary
14 | ARG GOARCH
15 | ENV OUTPUT_DIR=/wrkdir/target/${GOARCH}/release
16 | COPY ./updater /wrkdir/
17 | RUN go mod download
18 | RUN CGO_ENABLED=0 go build -v -o ${OUTPUT_DIR}/bottlerocket-ecs-updater . && \
19 |     cp ${OUTPUT_DIR}/bottlerocket-ecs-updater /wrkdir/bottlerocket-ecs-updater
20 | 
21 | FROM ${LICENSES_IMAGE} as licenses
22 | # Set WORKDIR to create /licenses/ if the directory is missing.
23 | #
24 | # Having an image with /licenses/ lets scratch be substituted in when
25 | # LICENSES_IMAGE isn't provided. For example, a user can manually run `docker
26 | # build -t neio:latest .` to build a working image without providing an expected
27 | # LICENSES_IMAGE.
28 | WORKDIR /licenses/
29 | 
30 | # create an image with just the binary
31 | FROM scratch
32 | # Copy CA certificates store
33 | COPY --from=public.ecr.aws/amazonlinux/amazonlinux:2 /etc/ssl /etc/ssl
34 | COPY --from=public.ecr.aws/amazonlinux/amazonlinux:2 /etc/pki /etc/pki
35 | COPY --from=builder \
36 |     /wrkdir/bottlerocket-ecs-updater \
37 |     /bottlerocket-ecs-updater
38 | COPY COPYRIGHT LICENSE-* /usr/share/licenses/bottlerocket-ecs-updater/
39 | COPY --from=licenses /licenses/ /usr/share/licenses/bottlerocket-ecs-updater/vendor/
40 | ENTRYPOINT ["/bottlerocket-ecs-updater"]
41 | 


--------------------------------------------------------------------------------
/Dockerfile.licenses:
--------------------------------------------------------------------------------
 1 | # This Dockerfile produces an image that has only the licenses of dependencies
 2 | # used in the updater.
 3 | #
 4 | # LICENSE, and other legal notices, are collected by bottlerocket-license-scan
 5 | # to be organized into a project-wide conventional directory structure rooted at
 6 | # /licenses in the resulting image.
 7 | 
 8 | # SDK_IMAGE is the Bottlerocket SDK container image that provides
 9 | # `bottlerocket-license-scan` in it.
10 | ARG SDK_IMAGE
11 | 
12 | # GOLANG_IMAGE is the image to be used for collecting modules. This should be
13 | # the same image used in the build. The idea is to have the same toolchain to
14 | # avoid running into any differences between versions.
15 | ARG GOLANG_IMAGE=golang:1.19.1
16 | 
17 | # Fetch dependencies into a vendor/ directory.
18 | #
19 | # The first several steps should match that of the build's Dockerfile to share
20 | # the go module package cache.
21 | FROM $GOLANG_IMAGE as src
22 | USER builder
23 | WORKDIR /wrkdir
24 | ENV GOPROXY=direct
25 | # Sets the target architecture for the binary
26 | ARG GOARCH
27 | COPY ./updater /wrkdir/
28 | RUN go mod download
29 | # Unpack go modules into a vendor/ directory to run scanner on.
30 | RUN go mod vendor
31 | 
32 | # Run the license scanner and dump its processed & collected license data to be
33 | # used in distributed container image.
34 | FROM $SDK_IMAGE as license-scan
35 | COPY --from=src /wrkdir/vendor /wrkdir/vendor
36 | COPY clarify.toml /wrkdir/clarify.toml
37 | USER root
38 | RUN bottlerocket-license-scan \
39 |     --spdx-data /usr/libexec/tools/spdx-data \
40 |     --out-dir /out/licenses \
41 |     --clarify /wrkdir/clarify.toml \
42 |     go-vendor /wrkdir/vendor
43 | 
44 | # Final container image has LICENSE files and accompanying attributions
45 | # collected and produced by the license scanner.
46 | FROM scratch as licenses
47 | COPY --from=license-scan /out/licenses /licenses


--------------------------------------------------------------------------------
/updater/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/aws/aws-sdk-go v1.51.20 h1:ziM90ujYHKKkoTZL+Wg2LwjbQecL+l298GGJeG4ktZs=
 2 | github.com/aws/aws-sdk-go v1.51.20/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 6 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
 7 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
 8 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
 9 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
10 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
11 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
12 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
13 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
14 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
15 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
16 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
17 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
18 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
19 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
20 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
21 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
22 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
23 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
24 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
25 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
26 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # 0.2.2
 2 | 
 3 | * Bump version of Go to 1.19.2
 4 | * Upgrade `github.com/aws/aws-sdk-go` dependency to `v1.44.137`
 5 | * Upgrade `github.com/stretchr/testify` dependency to `v1.8.1`
 6 | 
 7 | # 0.2.1
 8 | 
 9 | * Bump version of Go to 1.19.1 and upgrade dependencies
10 | 
11 | # 0.2.0
12 | 
13 | * Add support for clusters larger than 50 container instances.
14 | * Add after-action summary and done message to log output.
15 | * Add check to reduce the chance of concurrent runs.
16 | 
17 | Note: In the Bottlerocket ECS updater v0.1.0 release, support for clusters was limited to 50 container instances. In this release, clusters larger than 50 container instances are now supported. :tada: 
18 | 
19 | # 0.1.0
20 | 
21 | Initial release of the **Bottlerocket ECS updater** - A service to automatically manage Bottlerocket updates in an Amazon ECS cluster.
22 | 
23 | The Bottlerocket ECS updater is designed to help you safely automate the routine maintenance of updating the Bottlerocket instances in your cluster.
24 | The updater's safety features include:
25 | 
26 | * Only tasks that are part of a [service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) will be interrupted.
27 |   Container instances with non-service tasks are skipped for upgrade so no critical workloads will be automatically interrupted.
28 | * Only container instances in the [ACTIVE state](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-draining.html) will be upgrade.
29 |   Instances that have been placed into the DRAINING state are skipped for upgrade so other maintenance or debugging can be performed without interruption.
30 | 
31 | In this first release of the updater, the following considerations should be kept in mind:
32 | 
33 | * Only clusters of up to 50 container instances are supported.
34 |   If the updater is configured to target a cluster with more than 50 instances, some instances may not be updated.
35 | * When configuring the provided CloudFormation template, ensure that the CloudWatch log group already exists.
36 |   The updater will not automatically create the log group and a missing log group will cause the updater to fail to run.
37 |   When creating a log group, you can configure your desired log retention settings.
38 | 
39 | See the [README](README.md) for additional information.


--------------------------------------------------------------------------------
/integ/README.md:
--------------------------------------------------------------------------------
 1 | # Integration tests
 2 | 
 3 | The following integration workflow is how you can
 4 | test your changes and verifying that new dependencies didn’t break the updater mechanisms.
 5 | It’s also similar to how we verify versions of the ECS Updater,
 6 | so it’s useful to go through it when making changes
 7 | and should in total take less than 1 hour.
 8 | 
 9 | 1. You’ll want to set up a test ECS cluster. 
10 | 
11 |    Thankfully, this is really easy with the existing integration tests setup script:
12 |    https://github.com/bottlerocket-os/bottlerocket-ecs-updater/blob/develop/integ/setup.sh
13 | 
14 |    ```sh
15 |    ./setup.sh --ami-id ami-05d2e4a6b8399095a
16 |    ```
17 | 
18 |    This script expects the ami-id of a Bottlerocket ECS variant.
19 |    This will setup an ECS cluster using the integration CloudFormation stack
20 |    and using that Bottlerocket ECS variant as EC2 compute.
21 | 
22 | 2. Build an ECS updater image from your changes:
23 | 
24 |    ```
25 |    # Build the image and tag it as "latest"
26 |    make image
27 | 
28 |    # Verify the image was built and tagged a moment ago
29 |    docker images | head -n 10
30 | 
31 |    # Re-tag the image to wherever you want to land it on your ECR registry
32 |    docker tag bottlerocket-ecs-updater:latest \
33 |        <account-id>.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-ecs-updater:my-test
34 | 
35 |    # Push it to your ECR registry
36 |    docker push \
37 |        <account-id>.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-ecs-updater:my-test
38 |    ```
39 | 
40 | 3. Once your integration ECS cluster is up and you’ve built/pushed a new image,
41 | you can execute the run-updater script to actually do the integration tests!
42 | 
43 |    Note that you need to provide the image URL of the new image you just built.
44 |    This is the actual image that gets deployed as a fargate task!
45 | 
46 |    ```
47 |    ./run-updater.sh \
48 |        --cluster ecs-updater-integ-cluster \
49 |        --updater-image <account-id>.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-ecs-updater:my-test
50 |    ```
51 | 
52 | 4. Cleanup is also easy! There’s a script for that as well: 
53 | 
54 |    ```
55 |    ./cleanup.sh --cluster ecs-updater-integ-cluster
56 |    ```
57 | 
58 |    This tears down the ECS cluster by name releasing any artifacts from the integration tests.
59 | 
60 | In all, the total process takes well under an hour. ECS clusters spin up and down very quickly.
61 | 


--------------------------------------------------------------------------------
/integ/stacks/cluster.yaml:
--------------------------------------------------------------------------------
 1 | AWSTemplateFormatVersion: 2010-09-09
 2 | Description: 'ECS Updater Integration Test Cluster'
 3 | Parameters:
 4 |   IntegSharedResourceStack:
 5 |     Type: String
 6 |     Description: 'Name of the CloudFormation stack that sets up the shared resource for testing.'
 7 |   ImageID:
 8 |       Description: 'Bottlerocket `aws-ecs-1` variant image id'
 9 |       Type: AWS::EC2::Image::Id
10 |   InstanceCount:
11 |       Description: 'Desired number of Bottlerocket instances in cluster'
12 |       Default: 10
13 |       Type: Number
14 |   InstanceType:
15 |     Type: String
16 |     Default: m5.xlarge
17 |     Description: 'Instance type for the instances'
18 | Resources:
19 |   Cluster:
20 |     Type: AWS::ECS::Cluster
21 |     Properties:
22 |       ClusterName: !Sub '${AWS::StackName}'
23 |   LaunchTemplate:
24 |     Type: AWS::EC2::LaunchTemplate
25 |     Properties:
26 |       LaunchTemplateData:
27 |         IamInstanceProfile:
28 |           Name:
29 |             Fn::ImportValue:
30 |               !Sub "${IntegSharedResourceStack}:EcsInstanceProfile"
31 |         ImageId: !Ref ImageID
32 |         InstanceType: !Ref InstanceType
33 |         SecurityGroupIds:
34 |           - Fn::ImportValue:
35 |               !Sub "${IntegSharedResourceStack}:SecurityGroupID"
36 |         TagSpecifications:
37 |           - ResourceType: instance
38 |             Tags:
39 |               - Key: "Name"
40 |                 Value: !Sub "${AWS::StackName}-instance"
41 |         UserData:
42 |           Fn::Base64:
43 |             !Sub |
44 |               [settings.ecs]
45 |               cluster = "${AWS::StackName}"
46 |   AutoScalingGroup:
47 |     Type: AWS::AutoScaling::AutoScalingGroup
48 |     Properties:
49 |       MinSize: !Ref InstanceCount
50 |       MaxSize: "50"
51 |       DesiredCapacity: !Ref InstanceCount
52 |       VPCZoneIdentifier:
53 |         Fn::Split:
54 |         - ","
55 |         - Fn::ImportValue:
56 |             !Sub "${IntegSharedResourceStack}:PublicSubnets"
57 |       MixedInstancesPolicy:
58 |         InstancesDistribution:
59 |           OnDemandBaseCapacity: !Ref InstanceCount
60 |         LaunchTemplate:
61 |           LaunchTemplateSpecification:
62 |             LaunchTemplateId: !Ref LaunchTemplate
63 |             Version: !GetAtt LaunchTemplate.LatestVersionNumber
64 | Outputs:
65 |   AutoScalingGroupName:
66 |     Description: 'Auto scaling group name'
67 |     Value: !Ref AutoScalingGroup
68 |     Export:
69 |       Name: !Sub "${AWS::StackName}:AutoScalingGroup"
70 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # SHELL is set as bash to use some bashisms.
 2 | SHELL = bash
 3 | 
 4 | BOTTLEROCKET_SDK_VERSION = v0.37.0
 5 | BOTTLEROCKET_SDK_ARCH    = x86_64
 6 | UPDATER_TARGET_ARCH      = amd64
 7 | 
 8 | # the docker image that will be used to compile go code
 9 | BUILDER_IMAGE = public.ecr.aws/bottlerocket/bottlerocket-sdk-${BOTTLEROCKET_SDK_ARCH}:${BOTTLEROCKET_SDK_VERSION}
10 | 
11 | # IMAGE_NAME is the full name of the container being built
12 | IMAGE_NAME = bottlerocket-ecs-updater:latest
13 | # LICENSES_IMAGE is the name of the container image that has LICENSE files
14 | # for distribution.
15 | LICENSES_IMAGE = $(IMAGE_NAME)-licenses
16 | 
17 | SOURCEDIR=./updater
18 | SOURCES := $(shell find $(SOURCEDIR) -name '*.go')
19 | export GO111MODULE=on
20 | export DOCKER_BUILDKIT=1
21 | 
22 | all: build
23 | 
24 | .PHONY: tidy
25 | tidy:
26 | 	cd updater && go mod tidy
27 | 
28 | .PHONY: build # builds updater
29 | build: updater/bin/bottlerocket-ecs-updater
30 | updater/bin/bottlerocket-ecs-updater: $(SOURCES) updater/go.mod updater/go.sum
31 | 	GOARCH=$(UPDATER_TARGET_ARCH)
32 | 	cd updater && go build -v -o bin/bottlerocket-ecs-updater .
33 | 
34 | .PHONY: test
35 | test:
36 | 	cd updater && go test -v ./...
37 | 
38 | .PHONY: image # creates a docker image with the updater binary
39 | image: licenses
40 | 	docker build \
41 | 		--tag '$(IMAGE_NAME)' \
42 | 		--build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \
43 | 		--build-arg GOARCH=$(UPDATER_TARGET_ARCH) \
44 | 		--build-arg LICENSES_IMAGE=$(LICENSES_IMAGE) \
45 | 		.
46 | 
47 | .PHONY: licenses
48 | licenses:
49 | 	docker build \
50 | 		--tag '$(LICENSES_IMAGE)' \
51 | 		--build-arg SDK_IMAGE=$(BUILDER_IMAGE) \
52 | 		--build-arg GOLANG_IMAGE=$(BUILDER_IMAGE) \
53 | 		--build-arg GOARCH=$(UPDATER_TARGET_ARCH) \
54 | 		-f Dockerfile.licenses \
55 | 		.
56 | 
57 | .PHONY: lint
58 | lint: golang-lint cfn-lint
59 | 
60 | .PHONY: golang-lint
61 | golang-lint:
62 | 	cd updater; golangci-lint run
63 | 
64 | .PHONY: cfn-lint
65 | cfn-lint:
66 | 	cfn-lint ./stacks/bottlerocket-ecs-updater.yaml
67 | 	cfn-lint ./integ/stacks/integ-shared.yaml
68 | 	cfn-lint ./integ/stacks/cluster.yaml
69 | 
70 | # Check that the container has LICENSE files included for its dependencies.
71 | .PHONY: check-licenses
72 | check-licenses: CHECK_CONTAINER_NAME=check-licenses-bottlerocket-ecs-updater
73 | check-licenses:
74 | 	@echo "Running check: $@"
75 | 	@-if docker inspect $(CHECK_CONTAINER_NAME) &>/dev/null; then\
76 | 		docker rm $(CHECK_CONTAINER_NAME) &>/dev/null; \
77 | 	fi
78 | 	@docker create --name $(CHECK_CONTAINER_NAME) $(IMAGE_NAME) >/dev/null 2>&1
79 | 	@echo "Checking if container image included dependencies' LICENSE files..."
80 | 	@docker export $(CHECK_CONTAINER_NAME) | tar -tf - \
81 | 		| grep usr/share/licenses/bottlerocket-ecs-updater/vendor \
82 | 		| grep -q LICENSE || { \
83 | 			echo "Container image is missing required LICENSE files (checked $(IMAGE_NAME))"; \
84 | 			docker rm $(CHECK_CONTAINER_NAME) &>/dev/null; \
85 | 			exit 1; \
86 | 		}
87 | 	@-docker rm $(CHECK_CONTAINER_NAME)
88 | 
89 | clean:
90 | 	-rm -rf updater/bin
91 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE-APACHE](LICENSE-APACHE) or [LICENSE-MIT](LICENSE-MIT) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/updater/mock_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"github.com/aws/aws-sdk-go/aws"
 5 | 	"github.com/aws/aws-sdk-go/aws/request"
 6 | 	"github.com/aws/aws-sdk-go/service/ec2"
 7 | 	"github.com/aws/aws-sdk-go/service/ecs"
 8 | 	"github.com/aws/aws-sdk-go/service/ssm"
 9 | )
10 | 
11 | type MockECS struct {
12 | 	ListContainerInstancesPagesFn      func(input *ecs.ListContainerInstancesInput, fn func(*ecs.ListContainerInstancesOutput, bool) bool) error
13 | 	DescribeContainerInstancesFn       func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error)
14 | 	UpdateContainerInstancesStateFn    func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error)
15 | 	ListTasksFn                        func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error)
16 | 	DescribeTasksFn                    func(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error)
17 | 	WaitUntilTasksStoppedWithContextFn func(ctx aws.Context, input *ecs.DescribeTasksInput, opts ...request.WaiterOption) error
18 | }
19 | 
20 | var _ ECSAPI = (*MockECS)(nil)
21 | 
22 | type MockSSM struct {
23 | 	// WaitUntilCommandExecutedWithContextFn is executed concurrently through
24 | 	// ECS code paths and tests should treat any data in a parallel safe manner
25 | 	WaitUntilCommandExecutedWithContextFn func(ctx aws.Context, input *ssm.GetCommandInvocationInput, opts ...request.WaiterOption) error
26 | 	SendCommandFn                         func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error)
27 | 	GetCommandInvocationFn                func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error)
28 | }
29 | 
30 | var _ SSMAPI = (*MockSSM)(nil)
31 | 
32 | type MockEC2 struct {
33 | 	WaitUntilInstanceStatusOkFn func(input *ec2.DescribeInstanceStatusInput) error
34 | }
35 | 
36 | var _ EC2API = (*MockEC2)(nil)
37 | 
38 | func (m MockECS) ListContainerInstancesPages(input *ecs.ListContainerInstancesInput, fn func(*ecs.ListContainerInstancesOutput, bool) bool) error {
39 | 	return m.ListContainerInstancesPagesFn(input, fn)
40 | }
41 | 
42 | func (m MockECS) DescribeContainerInstances(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) {
43 | 	return m.DescribeContainerInstancesFn(input)
44 | }
45 | 
46 | func (m MockECS) UpdateContainerInstancesState(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) {
47 | 	return m.UpdateContainerInstancesStateFn(input)
48 | }
49 | 
50 | func (m MockECS) ListTasks(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
51 | 	return m.ListTasksFn(input)
52 | }
53 | 
54 | func (m MockECS) DescribeTasks(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) {
55 | 	return m.DescribeTasksFn(input)
56 | }
57 | 
58 | func (m MockECS) WaitUntilTasksStoppedWithContext(ctx aws.Context, input *ecs.DescribeTasksInput, opts ...request.WaiterOption) error {
59 | 	return m.WaitUntilTasksStoppedWithContextFn(ctx, input, opts...)
60 | }
61 | 
62 | func (m MockSSM) SendCommand(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
63 | 	return m.SendCommandFn(input)
64 | }
65 | 
66 | func (m MockSSM) WaitUntilCommandExecutedWithContext(ctx aws.Context, input *ssm.GetCommandInvocationInput, opts ...request.WaiterOption) error {
67 | 	return m.WaitUntilCommandExecutedWithContextFn(ctx, input, opts...)
68 | }
69 | 
70 | func (m MockSSM) GetCommandInvocation(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
71 | 	return m.GetCommandInvocationFn(input)
72 | }
73 | 
74 | func (c MockEC2) WaitUntilInstanceStatusOk(input *ec2.DescribeInstanceStatusInput) error {
75 | 	return c.WaitUntilInstanceStatusOkFn(input)
76 | }
77 | 


--------------------------------------------------------------------------------
/integ/setup.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | THISDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  4 | 
  5 | source "${THISDIR}/common.sh"
  6 | 
  7 | # Default ECS cluster name
  8 | DEFAULT_CLUSTER_NAME="ecs-updater-integ-cluster"
  9 | 
 10 | # Default number of instances to launch in the cluster
 11 | DEFAULT_INSTANCE_COUNT=10
 12 | 
 13 | # Default instance type for instances in the cluster
 14 | DEFAULT_INSTANCE_TYPE="m5.xlarge"
 15 | 
 16 | # Helper functions
 17 | usage() {
 18 |     cat >&2 <<EOF
 19 | ${0##*/}
 20 |                  --ami-id AMI-ID
 21 |                  [--instance-type ${DEFAULT_INSTANCE_TYPE}]
 22 |                  [--instance-count ${DEFAULT_INSTANCE_COUNT}]
 23 |                  [--cluster ${DEFAULT_CLUSTER_NAME}]
 24 | 
 25 | Deploys templates '${INTEG_STACK_TEMPLATE}' and '${CLUSTER_STACK_TEMPLATE}' to set up an ECS cluster.
 26 | 
 27 | Required:
 28 |    --ami-id                           Image ID for test instance in cluster (an aws-ecs-1 AMI ID)
 29 | 
 30 | Optional:
 31 |    --instance-type                    Instance type for test instances (default ${DEFAULT_INSTANCE_TYPE})
 32 |    --instance-count                   Number of instances to launch in the cluster (default ${DEFAULT_INSTANCE_COUNT})
 33 |    --cluster                          Name of the cluster (default ${DEFAULT_CLUSTER_NAME}). New cluster is created if it does not exist.
 34 | 
 35 | EOF
 36 | }
 37 | 
 38 | parse_args() {
 39 |     while [ ${#} -gt 0 ]; do
 40 |         case "${1}" in
 41 |         --ami-id)
 42 |             shift
 43 |             AMI_ID="${1}"
 44 |             ;;
 45 |         --instance-type)
 46 |             shift
 47 |             INSTANCE_TYPE="${1}"
 48 |             ;;
 49 |         --instance-count)
 50 |             shift
 51 |             INSTANCE_COUNT="${1}"
 52 |             ;;
 53 |         --cluster)
 54 |             shift
 55 |             CLUSTER_STACK_NAME="${1}"
 56 |             ;;
 57 | 
 58 |         --help)
 59 |             usage
 60 |             exit 0
 61 |             ;;
 62 |         *)
 63 |             log ERROR "Unknown argument: ${1}" >&2
 64 |             usage
 65 |             exit 2
 66 |             ;;
 67 |         esac
 68 |         shift
 69 |     done
 70 | 
 71 |     INSTANCE_TYPE="${INSTANCE_TYPE:-$DEFAULT_INSTANCE_TYPE}"
 72 |     INSTANCE_COUNT="${INSTANCE_COUNT:-$DEFAULT_INSTANCE_COUNT}"
 73 |     CLUSTER_STACK_NAME="${CLUSTER_STACK_NAME:-$DEFAULT_CLUSTER_NAME}"
 74 | 
 75 |     # Required arguments
 76 |     required_arg "--ami-id" "${AMI_ID}"
 77 | }
 78 | 
 79 | # Initial setup and checks
 80 | parse_args "${@}"
 81 | 
 82 | # deploy stack to create integ resources
 83 | log INFO "Deploying stack template '${INTEG_STACK_TEMPLATE}'"
 84 | if ! aws cloudformation deploy \
 85 |     --stack-name "${INTEG_STACK_NAME}" \
 86 |     --template-file "${THISDIR}/stacks/${INTEG_STACK_TEMPLATE}" \
 87 |     --capabilities CAPABILITY_NAMED_IAM; then
 88 |     log ERROR "Failed to deploy '${INTEG_STACK_TEMPLATE}' stack template"
 89 |     exit 1
 90 | fi
 91 | log INFO "Stack template '${INTEG_STACK_TEMPLATE}' deployed with name '${INTEG_STACK_NAME}'"
 92 | 
 93 | # deploy stack to start ecs cluster using auto-scaling group
 94 | log INFO "Deploying stack template '${CLUSTER_STACK_TEMPLATE}' to set up an ECS cluster"
 95 | if ! aws cloudformation deploy \
 96 |     --stack-name "${CLUSTER_STACK_NAME}" \
 97 |     --template-file "${THISDIR}/stacks/${CLUSTER_STACK_TEMPLATE}" \
 98 |     --capabilities CAPABILITY_NAMED_IAM \
 99 |     --parameter-overrides \
100 |     IntegSharedResourceStack="${INTEG_STACK_NAME}" \
101 |     InstanceCount="${INSTANCE_COUNT}" \
102 |     ImageID="${AMI_ID}" \
103 |     InstanceType="${INSTANCE_TYPE}"; then
104 |     log ERROR "Failed to deploy stack '${CLUSTER_STACK_TEMPLATE}' stack template"
105 |     exit 1
106 | fi
107 | log INFO "ECS cluster '${CLUSTER_STACK_NAME}'  with '${INSTANCE_COUNT}' instances and instance type '${INSTANCE_TYPE}' created!"
108 | 


--------------------------------------------------------------------------------
/integ/run-updater.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | THISDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  4 | 
  5 | source "${THISDIR}/common.sh"
  6 | 
  7 | # Helper functions
  8 | usage() {
  9 |     cat >&2 <<EOF
 10 | ${0##*/}
 11 |                  --cluster CLUSTER --updater-image UPDATER-IMAGE
 12 | 
 13 | Starts an ECS updater to manage Bottlerocket instances in a given cluster
 14 | 
 15 | Required:
 16 |    --cluster                          Cluster name to manage Bottlerocket instances in
 17 |    --updater-image                    Bottlerocket ECS updater image ECR location
 18 | 
 19 | EOF
 20 | }
 21 | 
 22 | parse_args() {
 23 |     while [ ${#} -gt 0 ]; do
 24 |         case "${1}" in
 25 |         --cluster)
 26 |             shift
 27 |             CLUSTER="${1}"
 28 |             ;;
 29 |         --updater-image)
 30 |             shift
 31 |             UPDATER_IMAGE="${1}"
 32 |             ;;
 33 | 
 34 |         --help)
 35 |             usage
 36 |             exit 0
 37 |             ;;
 38 |         *)
 39 |             log ERROR "Unknown argument: ${1}" >&2
 40 |             usage
 41 |             exit 2
 42 |             ;;
 43 |         esac
 44 |         shift
 45 |     done
 46 | 
 47 |     UPDATER_STACK_NAME="${UPDATER_STACK_PREFIX}${CLUSTER}"
 48 | 
 49 |     # Required arguments
 50 |     required_arg "--cluster" "${CLUSTER}"
 51 |     required_arg "--updater-image" "${UPDATER_IMAGE}"
 52 | }
 53 | 
 54 | # Initial setup and checks
 55 | parse_args "${@}"
 56 | 
 57 | log INFO "Extracting output resource id's from '${INTEG_STACK_NAME}' stack"
 58 | if ! integ_resources=$(aws cloudformation describe-stacks \
 59 |     --stack-name "${INTEG_STACK_NAME}" \
 60 |     --output json \
 61 |     --query 'Stacks[].Outputs[]'); then
 62 |     log ERROR "Failed to get outputs from '${INTEG_STACK_NAME}' stack"
 63 |     exit 1
 64 | fi
 65 | 
 66 | # Get Subnets
 67 | if ! subnets=$(echo "${integ_resources}" | jq --raw-output '.[] | select(.OutputKey == "PublicSubnets") | .OutputValue'); then
 68 |     log ERROR "Failed to extract list of subnets from '${INTEG_STACK_NAME}' stack outputs"
 69 |     exit 1
 70 | fi
 71 | log INFO "Subnets are '${subnets}'"
 72 | # check the data to make sure its usable in our context
 73 | if [[ "${#subnets[@]}" -lt 1 ]]; then
 74 |     log ERROR "No usable subnets"
 75 |     exit 1
 76 | fi
 77 | 
 78 | # Get LogGroupName
 79 | if ! log_group=$(echo "${integ_resources}" | jq --raw-output '.[] | select(.OutputKey == "LogGroupName") | .OutputValue'); then
 80 |     log ERROR "Failed to extract LogGroup name from '${INTEG_STACK_NAME}' stack outputs"
 81 |     exit 1
 82 | fi
 83 | log INFO "LogGroup name is '${log_group}'"
 84 | 
 85 | # Get LogGroupName
 86 | if ! security_grp=$(echo "${integ_resources}" | jq --raw-output '.[] | select(.OutputKey == "SecurityGroupID") | .OutputValue'); then
 87 |     log ERROR "Failed to extract security group id from '${INTEG_STACK_NAME}' stack outputs"
 88 |     exit 1
 89 | fi
 90 | log INFO "Security group id is '${security_grp}'"
 91 | 
 92 | # start updater on cluster
 93 | log INFO "Deploying ECS updater stack on cluster '${CLUSTER}' with cron event rule disabled"
 94 | if ! aws cloudformation deploy \
 95 |     --stack-name "${UPDATER_STACK_NAME}" \
 96 |     --template-file "${THISDIR}/../stacks/bottlerocket-ecs-updater.yaml" \
 97 |     --capabilities CAPABILITY_NAMED_IAM \
 98 |     --parameter-overrides \
 99 |     ClusterName="${CLUSTER}" \
100 |     Subnets="${subnets}" \
101 |     UpdaterImage="${UPDATER_IMAGE}" \
102 |     LogGroupName="${log_group}" \
103 |     ScheduleState="DISABLED"; then
104 |     log ERROR "Failed to deploy Bottlerocket ECS updater"
105 |     exit 1
106 | fi
107 | 
108 | log INFO "Extracting updater task definition arn from '${UPDATER_STACK_NAME}' stack"
109 | if ! output=$(aws cloudformation describe-stacks \
110 |     --stack-name "${UPDATER_STACK_NAME}" \
111 |     --output json \
112 |     --query 'Stacks[].Outputs[]'); then
113 |     log ERROR "Failed to get outputs from '${UPDATER_STACK_NAME}' stack"
114 |     exit 1
115 | fi
116 | 
117 | if ! task_def=$(echo "${output}" | jq --raw-output '.[] | select(.OutputKey == "UpdaterTaskDefinitionArn") | .OutputValue'); then
118 |     log ERROR "Failed to extract updater task definition arn from '${UPDATER_STACK_NAME}' stack outputs"
119 |     exit 1
120 | fi
121 | 
122 | log INFO "Starting ECS updater task on cluster '${CLUSTER}'"
123 | if ! aws ecs run-task \
124 |     --cluster "${CLUSTER}" \
125 |     --task-definition "${task_def}" \
126 |     --launch-type "FARGATE" \
127 |     --network-configuration="awsvpcConfiguration={subnets=[${subnets}],securityGroups=${security_grp},assignPublicIp=ENABLED}"; then
128 |     log ERROR "Failed to start updater task '${task_def}'"
129 |     exit 1
130 | fi
131 | 
132 | log INFO "ECS updater is running on cluster '${CLUSTER}'. Check logs in Cloudwatch LogGroup '${log_group}'"
133 | 


--------------------------------------------------------------------------------
/integ/stacks/integ-shared.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: "2010-09-09"
  2 | Description: Bottlerocket ECS updater integration tests shared resources
  3 | Resources:
  4 |   VPC:
  5 |     Type: AWS::EC2::VPC
  6 |     Properties:
  7 |       EnableDnsSupport: true
  8 |       EnableDnsHostnames: true
  9 |       CidrBlock: 10.0.0.0/16
 10 |       Tags:
 11 |         - Key: Name
 12 |           Value: ECSUpdaterInteg
 13 |   SubnetA:
 14 |     Type: AWS::EC2::Subnet
 15 |     Properties:
 16 |       VpcId: !Ref VPC
 17 |       CidrBlock: 10.0.5.0/24
 18 |       AvailabilityZone: !Select
 19 |         - 0
 20 |         - !GetAZs
 21 |           Ref: 'AWS::Region'
 22 |       MapPublicIpOnLaunch: true
 23 |       Tags:
 24 |         - Key: Name
 25 |           Value: ECSUpdaterIntegSubnetA
 26 |   SubnetB:
 27 |     Type: AWS::EC2::Subnet
 28 |     Properties:
 29 |       VpcId: !Ref VPC
 30 |       CidrBlock: 10.0.6.0/24
 31 |       AvailabilityZone: !Select
 32 |         - 1
 33 |         - !GetAZs
 34 |           Ref: 'AWS::Region'
 35 |       MapPublicIpOnLaunch: true
 36 |       Tags:
 37 |         - Key: Name
 38 |           Value: ECSUpdaterIntegSubnetB
 39 |   SubnetC:
 40 |     Type: AWS::EC2::Subnet
 41 |     Properties:
 42 |       VpcId: !Ref VPC
 43 |       CidrBlock: 10.0.7.0/24
 44 |       AvailabilityZone: !Select
 45 |         - 2
 46 |         - !GetAZs
 47 |           Ref: 'AWS::Region'
 48 |       MapPublicIpOnLaunch: true
 49 |       Tags:
 50 |         - Key: Name
 51 |           Value: ECSUpdaterIntegSubnetC
 52 |   SecurityGroup:
 53 |     Type: AWS::EC2::SecurityGroup
 54 |     Properties:
 55 |       GroupDescription: Security Group for ECS Updater Task
 56 |       VpcId: !Ref VPC
 57 |       Tags:
 58 |         - Key: Name
 59 |           Value: ECSUpdaterInteg
 60 |   InternetGateway:
 61 |     Type: AWS::EC2::InternetGateway
 62 |     Properties:
 63 |       Tags:
 64 |         - Key: Name
 65 |           Value: ECSUpdaterInteg
 66 |   GatewayAttachement:
 67 |     Type: AWS::EC2::VPCGatewayAttachment
 68 |     Properties:
 69 |       VpcId: !Ref VPC
 70 |       InternetGatewayId: !Ref InternetGateway
 71 |   RouteTable:
 72 |     Type: AWS::EC2::RouteTable
 73 |     Properties:
 74 |       VpcId: !Ref VPC
 75 |       Tags:
 76 |         - Key: Name
 77 |           Value: ECSUpdaterInteg
 78 |   DefaultRoute:
 79 |     Type: AWS::EC2::Route
 80 |     Properties:
 81 |       RouteTableId: !Ref RouteTable
 82 |       DestinationCidrBlock: 0.0.0.0/0
 83 |       GatewayId: !Ref InternetGateway
 84 |   PublicSubnetARouteTableAssociation:
 85 |     Type: AWS::EC2::SubnetRouteTableAssociation
 86 |     Properties:
 87 |       SubnetId: !Ref SubnetA
 88 |       RouteTableId: !Ref RouteTable
 89 |   PublicSubnetBRouteTableAssociation:
 90 |     Type: AWS::EC2::SubnetRouteTableAssociation
 91 |     Properties:
 92 |       SubnetId: !Ref SubnetB
 93 |       RouteTableId: !Ref RouteTable
 94 |   PublicSubnetCRouteTableAssociation:
 95 |     Type: AWS::EC2::SubnetRouteTableAssociation
 96 |     Properties:
 97 |       SubnetId: !Ref SubnetC
 98 |       RouteTableId: !Ref RouteTable
 99 |   EcsInstanceRole:
100 |     Type: AWS::IAM::Role
101 |     Properties:
102 |       Description: 'Role for Bottlerocket container instances'
103 |       Path: !Sub '/bottlerocket/ecs-updater-integ/${AWS::StackName}/'
104 |       AssumeRolePolicyDocument:
105 |         Version: 2012-10-17
106 |         Statement:
107 |           - Effect: Allow
108 |             Principal:
109 |               Service: 'ec2.amazonaws.com'
110 |             Action:
111 |               - 'sts:AssumeRole'
112 |       ManagedPolicyArns:
113 |         - 'arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role'
114 |         - 'arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore'
115 |   EcsInstanceProfile:
116 |     Type: AWS::IAM::InstanceProfile
117 |     Properties:
118 |       InstanceProfileName: !Ref EcsInstanceRole
119 |       Path: !Sub '/bottlerocket/ecs-updater-integ/${AWS::StackName}/'
120 |       Roles:
121 |         - !Ref EcsInstanceRole
122 |   LogGroup:
123 |     Type: AWS::Logs::LogGroup
124 |     Properties:
125 |       RetentionInDays: 60
126 |       LogGroupName: 'bottlerocket-ecs-updater-integ'
127 | Outputs:
128 |   PublicSubnets:
129 |     Description: 'List of Subnets'
130 |     Value: !Join [ ",", [ !Ref SubnetA, !Ref SubnetB, !Ref SubnetC ] ]
131 |     Export:
132 |       Name: !Sub "${AWS::StackName}:PublicSubnets"
133 |   SecurityGroupID:
134 |     Description: 'Security group ID'
135 |     Value: !GetAtt SecurityGroup.GroupId
136 |     Export:
137 |       Name: !Sub "${AWS::StackName}:SecurityGroupID"
138 |   InstanceProfile:
139 |     Description: 'Security group ID'
140 |     Value: !Ref EcsInstanceProfile
141 |     Export:
142 |       Name: !Sub "${AWS::StackName}:EcsInstanceProfile"
143 |   LogGroupName:
144 |     Description: 'Cloudwatch log group'
145 |     Value: !Ref LogGroup
146 |     Export:
147 |       Name: !Sub "${AWS::StackName}:LogGroup"
148 | 


--------------------------------------------------------------------------------
/integ/cleanup.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | THISDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  4 | 
  5 | source "${THISDIR}/common.sh"
  6 | 
  7 | delete_integ=0
  8 | 
  9 | # Helper functions
 10 | usage() {
 11 |     cat >&2 <<EOF
 12 | ${0##*/}
 13 |                  --cluster CLUSTER-NAME
 14 |                  [--delete-integ-stack]
 15 | 
 16 | Cleans up resources started for integration testing
 17 | 
 18 | Required:
 19 |    --cluster                          Name of the cluster to delete
 20 | 
 21 | Optional:
 22 |    --delete-integ-stack               deletes Integ resources stack '${INTEG_STACK_NAME}' along with the cluster
 23 | 
 24 | EOF
 25 | }
 26 | 
 27 | parse_args() {
 28 |     while [ ${#} -gt 0 ]; do
 29 |         case "${1}" in
 30 |         --cluster)
 31 |             shift
 32 |             CLUSTER="${1}"
 33 |             ;;
 34 |         --delete-integ-stack)
 35 |             delete_integ=1
 36 |             ;;
 37 | 
 38 |         --help)
 39 |             usage
 40 |             exit 0
 41 |             ;;
 42 |         *)
 43 |             log ERROR "Unknown argument: ${1}" >&2
 44 |             usage
 45 |             exit 2
 46 |             ;;
 47 |         esac
 48 |         shift
 49 |     done
 50 | 
 51 |     # Required arguments
 52 |     required_arg "--cluster" "${CLUSTER}"
 53 | }
 54 | 
 55 | delete_stack() {
 56 |     local stack_name="${1:?}"
 57 |     log INFO "Deleting Cloudformation stack '${stack_name}'"
 58 |     if ! aws cloudformation delete-stack \
 59 |         --stack-name "${stack_name}"; then
 60 |         log ERROR "Failed to delete '${stack_name}'"
 61 |         return
 62 |     fi
 63 | 
 64 |     log INFO "Waiting for Cloudformation stack '${stack_name}' to be deleted"
 65 |     if ! aws cloudformation wait stack-delete-complete \
 66 |         --stack-name "${stack_name}"; then
 67 |         log ERROR "Failed to wait for ${stack_name} to delete"
 68 |         aws cloudformation describe-stack-events \
 69 |             --stack-name "${stack_name}"
 70 |         return
 71 |     fi
 72 |     log INFO "Cloudformation stack '${stack_name}' deleted!"
 73 | }
 74 | 
 75 | delete_services() {
 76 |     local cluster="${1:?}"
 77 |     log INFO "Deleting services running on cluster '${cluster}'"
 78 |     if ! services=$(aws ecs list-services \
 79 |         --cluster ecs-updater-integ-cluster \
 80 |         --query 'serviceArns[]' \
 81 |         --output text); then
 82 |         log ERROR "Failed to list services in cluster '${cluster}'"
 83 |         return
 84 |     fi
 85 | 
 86 |     for service in ${services}; do
 87 |         log INFO "Deleting service '${service}'"
 88 |         if ! aws ecs delete-service \
 89 |             --cluster "${cluster}" \
 90 |             --service "${service}" \
 91 |             --force >/dev/null; then
 92 |             log ERROR "Failed to delete service '${service}'"
 93 |         fi
 94 |     done
 95 | }
 96 | 
 97 | stop_tasks() {
 98 |     local cluster="${1:?}"
 99 |     log INFO "Stopping tasks running on cluster '${cluster}'"
100 |     if ! tasks=$(aws ecs list-tasks \
101 |         --cluster ecs-updater-integ-cluster \
102 |         --query 'taskArns[]' \
103 |         --output text); then
104 |         log ERROR "Failed to list tasks in cluster '${cluster}'"
105 |         return
106 |     fi
107 | 
108 |     for task in ${tasks}; do
109 |         log INFO "Stopping task '${task}'"
110 |         if ! aws ecs stop-task \
111 |             --cluster "${cluster}" \
112 |             --task "${task}" >/dev/null; then
113 |             log ERROR "Failed to stop task '${task}'"
114 |         fi
115 |     done
116 | }
117 | 
118 | terminate_instances() {
119 |     local cluster="${1:?}"
120 |     log INFO "Extracting auto-scaling group name from '${cluster}' stack"
121 |     if ! output=$(aws cloudformation describe-stacks \
122 |         --stack-name "${cluster}" \
123 |         --output json \
124 |         --query 'Stacks[].Outputs[]'); then
125 |         log ERROR "Failed to get outputs from '${cluster}' stack"
126 |         return
127 |     fi
128 | 
129 |     if ! auto_scaling_group=$(echo "${output}" | jq --raw-output '.[] | select(.OutputKey == "AutoScalingGroupName") | .OutputValue'); then
130 |         log ERROR "Failed to extract auto scaling group name from '${cluster}' stack outputs"
131 |         return
132 |     fi
133 | 
134 |     log INFO "Describing auto-scaling group '${auto_scaling_group}' to get instance ids"
135 |     if ! instance_ids=$(aws autoscaling describe-auto-scaling-groups \
136 |         --auto-scaling-group-name "${auto_scaling_group}" \
137 |         --query "AutoScalingGroups[].Instances[].InstanceId" \
138 |         --output text); then
139 |         log ERROR "Failed to get instance ids from auto scaling group '${auto_scaling_group}'"
140 |         return
141 |     fi
142 |     log INFO "Instances '${instance_ids}' found"
143 | 
144 |     log INFO "Setting auto scaling group desired count to zero"
145 |     if ! aws autoscaling update-auto-scaling-group \
146 |         --auto-scaling-group-name "${auto_scaling_group}" \
147 |         --desired-capacity 0 \
148 |         --min-size 0; then
149 |         log ERROR "Failed to change auto scaling group '${auto_scaling_group}' desired count to 0"
150 |         return
151 |     fi
152 | 
153 |     for inst_id in ${instance_ids}; do
154 |         log INFO "Waiting for instance '${inst_id}' to terminate"
155 |         if ! aws ec2 wait instance-terminated \
156 |             --instance-ids "${inst_id}"; then
157 |             log ERROR "Failed to terminate instance '${inst_id}'"
158 |         fi
159 |     done
160 | }
161 | 
162 | # Initial setup and checks
163 | parse_args "${@}"
164 | 
165 | delete_stack "${UPDATER_STACK_PREFIX}${CLUSTER}"
166 | 
167 | terminate_instances "${CLUSTER}"
168 | 
169 | delete_services "${CLUSTER}"
170 | 
171 | stop_tasks "${CLUSTER}"
172 | 
173 | delete_stack "${CLUSTER}"
174 | 
175 | if [[ "${delete_integ}" -eq 1 ]]; then
176 |     delete_stack "${INTEG_STACK_NAME}"
177 | fi
178 | 


--------------------------------------------------------------------------------
/updater/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"os"
  9 | 	"strings"
 10 | 	"time"
 11 | 
 12 | 	"github.com/aws/aws-sdk-go/aws"
 13 | 	"github.com/aws/aws-sdk-go/aws/arn"
 14 | 	"github.com/aws/aws-sdk-go/aws/session"
 15 | 	"github.com/aws/aws-sdk-go/service/ec2"
 16 | 	"github.com/aws/aws-sdk-go/service/ecs"
 17 | 	"github.com/aws/aws-sdk-go/service/ssm"
 18 | )
 19 | 
 20 | var (
 21 | 	flagCluster = flag.String("cluster", "", "The short name or full Amazon Resource Name (ARN) of the cluster in which we will manage Bottlerocket instances.")
 22 | 	flagRegion  = flag.String("region", "", "The AWS Region in which cluster is running.")
 23 | 	flagCheck   = flag.String("check-document", "", "The SSM document name for checking available updates.")
 24 | 	flagApply   = flag.String("apply-document", "", "The SSM document name for applying updates.")
 25 | 	flagReboot  = flag.String("reboot-document", "", "The SSM document name to initiate a reboot.")
 26 | )
 27 | 
 28 | const taskDefARNEnv = "TASK_DEFINITION_ARN"
 29 | 
 30 | type updater struct {
 31 | 	cluster        string
 32 | 	checkDocument  string
 33 | 	applyDocument  string
 34 | 	rebootDocument string
 35 | 	ecs            ECSAPI
 36 | 	ssm            SSMAPI
 37 | 	ec2            EC2API
 38 | }
 39 | 
 40 | func main() {
 41 | 	if err := _main(); err != nil {
 42 | 		log.Println(err.Error())
 43 | 		os.Exit(1)
 44 | 	}
 45 | }
 46 | 
 47 | func _main() error {
 48 | 	flag.Parse()
 49 | 	switch {
 50 | 	case *flagCluster == "":
 51 | 		flag.Usage()
 52 | 		return errors.New("cluster is required")
 53 | 	case *flagRegion == "":
 54 | 		flag.Usage()
 55 | 		return errors.New("region is required")
 56 | 	case *flagCheck == "":
 57 | 		flag.Usage()
 58 | 		return errors.New("check-document is required")
 59 | 	case *flagApply == "":
 60 | 		flag.Usage()
 61 | 		return errors.New("apply-document is required")
 62 | 	case *flagReboot == "":
 63 | 		flag.Usage()
 64 | 		return errors.New("reboot-document is required")
 65 | 	}
 66 | 
 67 | 	sess := session.Must(session.NewSession(&aws.Config{
 68 | 		Region: aws.String(*flagRegion),
 69 | 	}))
 70 | 
 71 | 	u := &updater{
 72 | 		cluster:        *flagCluster,
 73 | 		checkDocument:  *flagCheck,
 74 | 		applyDocument:  *flagApply,
 75 | 		rebootDocument: *flagReboot,
 76 | 		ecs:            ecs.New(sess, aws.NewConfig()),
 77 | 		ssm:            ssm.New(sess, aws.NewConfig()),
 78 | 		ec2:            ec2.New(sess, aws.NewConfig()),
 79 | 	}
 80 | 
 81 | 	family, err := taskDefFamily()
 82 | 	if err != nil {
 83 | 		log.Printf("Failed to parse updater task definition arn: %v", err)
 84 | 		log.Printf("Ignoring check for already running updater")
 85 | 	} else {
 86 | 		ok, err := u.alreadyRunning(family)
 87 | 		if err != nil {
 88 | 			return fmt.Errorf("Cannot determine running updater tasks, therefore stopping this run to avoid risk of multiple runs: %w", err)
 89 | 		}
 90 | 		if ok {
 91 | 			log.Printf("Another updater is running, therefore exiting this run.")
 92 | 			return nil
 93 | 		}
 94 | 	}
 95 | 
 96 | 	listedInstances, err := u.listContainerInstances()
 97 | 	if err != nil {
 98 | 		return fmt.Errorf("Failed to get container instances in cluster %q: %w", u.cluster, err)
 99 | 	}
100 | 	if len(listedInstances) == 0 {
101 | 		log.Print("Zero instances in the cluster")
102 | 		return nil
103 | 	}
104 | 
105 | 	bottlerocketInstances, err := u.filterBottlerocketInstances(listedInstances)
106 | 	if err != nil {
107 | 		return fmt.Errorf("Failed to filter Bottlerocket instances: %w", err)
108 | 	}
109 | 
110 | 	if len(bottlerocketInstances) == 0 {
111 | 		log.Printf("No Bottlerocket instances detected")
112 | 		return nil
113 | 	}
114 | 	candidates, err := u.filterAvailableUpdates(bottlerocketInstances)
115 | 	if err != nil {
116 | 		return fmt.Errorf("Failed to check updates: %w", err)
117 | 	}
118 | 	if len(candidates) == 0 {
119 | 		log.Printf("No instances to update")
120 | 		return nil
121 | 	}
122 | 	log.Printf("Instances ready for update: %#q", candidates)
123 | 
124 | 	summary := make(map[string]string)
125 | 	for _, i := range candidates {
126 | 		eligible, err := u.eligible(i.containerInstanceID)
127 | 		if err != nil {
128 | 			log.Printf("Failed to determine eligibility for update of instance %#q: %v", i, err)
129 | 			summary[i.instanceID] = fmt.Sprintf("Failed to determine eligibility for update: %v", err)
130 | 			continue
131 | 		}
132 | 		if !eligible {
133 | 			log.Printf("Instance %#q is not eligible for updates because it contains non-service task", i)
134 | 			summary[i.instanceID] = "Instance is not eligible for updates because it contains non-service task(s)"
135 | 			continue
136 | 		}
137 | 		log.Printf("Instance %q is eligible for update", i)
138 | 
139 | 		err = u.drainInstance(i.containerInstanceID)
140 | 		if err != nil {
141 | 			log.Printf("Failed to drain instance %#q: %v", i, err)
142 | 			summary[i.instanceID] = fmt.Sprintf("Failed to drain: %v", err)
143 | 			continue
144 | 		}
145 | 		log.Printf("Instance %#q successfully drained!", i)
146 | 
147 | 		updateErr := u.updateInstance(i)
148 | 		activateErr := u.activateInstance(i.containerInstanceID)
149 | 		if updateErr != nil && activateErr != nil {
150 | 			log.Printf("Failed to update instance %#q: %v", i, updateErr)
151 | 			return fmt.Errorf("instance %#q failed to re-activate after failing to update: %w", i, activateErr)
152 | 		} else if updateErr != nil {
153 | 			log.Printf("Failed to update instance %#q: %v", i, updateErr)
154 | 			summary[i.instanceID] = fmt.Sprintf("Failed to update: %v", updateErr)
155 | 			continue
156 | 		} else if activateErr != nil {
157 | 			return fmt.Errorf("instance %#q failed to re-activate after update: %w", i, activateErr)
158 | 		}
159 | 
160 | 		// Reboots are not immediate, and initiating an SSM command races with reboot. Add some
161 | 		// sleep time to allow the reboot to progress before we verify update.
162 | 		time.Sleep(20 * time.Second)
163 | 		ok, err := u.verifyUpdate(i)
164 | 		if err != nil {
165 | 			log.Printf("Failed to verify update for instance %#q: %v", i, err)
166 | 		}
167 | 		if !ok {
168 | 			log.Printf("Update failed for instance %#q", i)
169 | 			summary[i.instanceID] = "Update failed"
170 | 		} else {
171 | 			log.Printf("Instance %#q updated successfully!", i)
172 | 			summary[i.instanceID] = "Instance updated successfully"
173 | 		}
174 | 	}
175 | 	log.Printf("After action summary:")
176 | 	for k, v := range summary {
177 | 		log.Printf("%s: %s", k, v)
178 | 	}
179 | 	log.Printf("Update operations complete!")
180 | 	return nil
181 | }
182 | 
183 | func taskDefFamily() (string, error) {
184 | 	taskDefInput := os.Getenv(taskDefARNEnv)
185 | 	taskDefARN, err := arn.Parse(taskDefInput)
186 | 	if err != nil {
187 | 		return "", err
188 | 	}
189 | 	const taskDefPrefix = "task-definition/"
190 | 	if !strings.Contains(taskDefARN.Resource, taskDefPrefix) {
191 | 		return "", fmt.Errorf("not a task definition arn: %q", taskDefInput)
192 | 	}
193 | 	// extract task definition family from resource: task-definition/<task definition family>:<revision>
194 | 	taskDef := strings.TrimPrefix(taskDefARN.Resource, taskDefPrefix)
195 | 	family := strings.SplitN(taskDef, ":", 2)[0]
196 | 	log.Printf("Updater task definition family: %q", family)
197 | 	return family, nil
198 | }
199 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 


--------------------------------------------------------------------------------
/stacks/bottlerocket-ecs-updater.yaml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Description: 'Bottlerocket ECS updater automation & resources'
  3 | Parameters:
  4 |   ClusterName:
  5 |     Description: 'Name of ECS cluster to manage Bottlerocket instances in'
  6 |     Type: String
  7 |   Subnets:
  8 |     Description: 'List of VPC Subnet IDs where the updater should run. The subnets must have a route to the Internet via an Internet Gateway.'
  9 |     Type: List<AWS::EC2::Subnet::Id>
 10 |   UpdaterImage:
 11 |     Description: 'Bottlerocket updater container image'
 12 |     Type: String
 13 |     Default: 'public.ecr.aws/bottlerocket/bottlerocket-ecs-updater:v0.2.2'
 14 |   LogGroupName:
 15 |     Description: 'Log group name for Bottlerocket updater logs'
 16 |     Type: String
 17 |   ScheduleState:
 18 |     Description: 'Schedule events rule state; allows disabling of scheduling'
 19 |     Type: String
 20 |     Default: 'ENABLED'
 21 | Resources:
 22 |   ExecutionRole:
 23 |     Type: 'AWS::IAM::Role'
 24 |     Properties:
 25 |       AssumeRolePolicyDocument:
 26 |         Version: '2012-10-17'
 27 |         Statement:
 28 |           - Effect: Allow
 29 |             Principal:
 30 |               Service:
 31 |                 - 'ecs-tasks.amazonaws.com'
 32 |             Action:
 33 |               - 'sts:AssumeRole'
 34 |       Policies:
 35 |         - PolicyName: CreateLogGroupPolicy
 36 |           PolicyDocument:
 37 |             Version: '2012-10-17'
 38 |             Statement:
 39 |               # Allows creating log group if it does not exist
 40 |               - Effect: Allow
 41 |                 Action:
 42 |                   - 'logs:CreateLogGroup'
 43 |                 Resource:
 44 |                   - 'arn:aws:logs:*:*:*'
 45 |       Path: !Sub /${AWS::StackName}/
 46 |       ManagedPolicyArns:
 47 |         - !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy'
 48 |   TaskRole:
 49 |     Type: AWS::IAM::Role
 50 |     Properties:
 51 |       Description: 'Role allowing the Bottlerocket ECS Updater to manage Bottlerocket instances'
 52 |       Path: !Sub '/${AWS::StackName}/'
 53 |       AssumeRolePolicyDocument:
 54 |         Version: 2012-10-17
 55 |         Statement:
 56 |           - Effect: Allow
 57 |             Principal:
 58 |               Service: 'ecs-tasks.amazonaws.com'
 59 |             Action:
 60 |               - 'sts:AssumeRole'
 61 |       Policies:
 62 |         - PolicyName: 'BottlerocketEcsUpdaterPolicy'
 63 |           PolicyDocument:
 64 |             Version: 2012-10-17
 65 |             Statement:
 66 |               # Allows listing all container instances in a cluster
 67 |               - Effect: Allow
 68 |                 Action:
 69 |                   - 'ecs:ListContainerInstances'
 70 |                 Resource:
 71 |                   - !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}'
 72 |               # Allows describe container instances to get ec2 instance ID and ecs attributes to filter Bottlerocket instances
 73 |               # Allows list tasks to filter instances running standalone tasks
 74 |               # Allows update container instance state for draining
 75 |               # Allows describe tasks to identify tasks not started by service
 76 |               - Effect: Allow
 77 |                 Action:
 78 |                   - 'ecs:DescribeContainerInstances'
 79 |                   - 'ecs:ListTasks'
 80 |                   - 'ecs:UpdateContainerInstancesState'
 81 |                   - 'ecs:DescribeTasks'
 82 |                 Resource: '*'
 83 |                 Condition:
 84 |                   ArnEquals:
 85 |                     ecs:cluster: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}'
 86 |               # Allows ssm send command to make Bottlerocket update API calls
 87 |               - Effect: Allow
 88 |                 Action:
 89 |                   - 'ssm:SendCommand'
 90 |                 Resource:
 91 |                   - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${UpdateCheckCommand}"
 92 |                   - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${UpdateApplyCommand}"
 93 |                   - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:document/${RebootCommand}"
 94 |                   - !Sub "arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*"
 95 |               # Allows get command invocation to get Bottlerocket API calls output
 96 |               - Effect: Allow
 97 |                 Action:
 98 |                   - 'ssm:GetCommandInvocation'
 99 |                 Resource:
100 |                   - !Sub "arn:${AWS::Partition}:ssm:${AWS::Region}:${AWS::AccountId}:*"
101 |               # Allows checking the EC2 instance state after an update occurs
102 |               - Effect: Allow
103 |                 Action:
104 |                   - 'ec2:DescribeInstanceStatus'
105 |                 Resource: '*'
106 |   UpdaterTaskDefinition:
107 |     Type: AWS::ECS::TaskDefinition
108 |     Properties:
109 |       NetworkMode: awsvpc
110 |       RequiresCompatibilities:
111 |         - FARGATE
112 |       Cpu: "256"
113 |       Memory: "0.5GB"
114 |       ExecutionRoleArn: !GetAtt ExecutionRole.Arn
115 |       TaskRoleArn: !GetAtt TaskRole.Arn
116 |       ContainerDefinitions:
117 |         - Name: BottlerocketEcsUpdaterService
118 |           Image: !Ref UpdaterImage
119 |           Command:
120 |             - -cluster
121 |             - !Ref ClusterName
122 |             - -region
123 |             - !Ref AWS::Region
124 |             - -check-document
125 |             - !Ref UpdateCheckCommand
126 |             - -apply-document
127 |             - !Ref UpdateApplyCommand
128 |             - -reboot-document
129 |             - !Ref RebootCommand
130 |           LogConfiguration:
131 |             LogDriver: awslogs
132 |             Options:
133 |               awslogs-create-group: 'true'
134 |               awslogs-region: !Ref AWS::Region
135 |               awslogs-group: !Ref LogGroupName
136 |               awslogs-stream-prefix: !Sub '/ecs/bottlerocket-updater/${ClusterName}'
137 |   BottlerocketUpdaterSchedule:
138 |     Type: AWS::Events::Rule
139 |     Properties:
140 |       Description: "Check for Bottlerocket updates on a schedule"
141 |       # Run Task every 12 hours
142 |       ScheduleExpression: "rate(12 hours)"
143 |       State: !Ref ScheduleState
144 |       Targets:
145 |         - Id: ecs-updater-fargate-task
146 |           RoleArn: !GetAtt CronRole.Arn
147 |           Arn: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}'
148 |           Input:
149 |              !Sub |
150 |               {
151 |                   "containerOverrides": [
152 |                       {
153 |                          "name": "BottlerocketEcsUpdaterService",
154 |                          "environment": [
155 |                              {
156 |                                  "name" : "TASK_DEFINITION_ARN",
157 |                                  "value": "${UpdaterTaskDefinition}"
158 |                              }
159 |                          ]
160 |                       }
161 |                   ]
162 |               }
163 |           EcsParameters:
164 |             LaunchType: FARGATE
165 |             TaskCount: 1
166 |             TaskDefinitionArn: !Ref UpdaterTaskDefinition
167 |             NetworkConfiguration:
168 |               AwsVpcConfiguration:
169 |                 # The Bottlerocket ECS Updater does not need a public IP for its operations. The public IP
170 |                 # is only required to pull images from ECR as a Fargate task
171 |                 AssignPublicIp: ENABLED
172 |                 Subnets: !Ref Subnets
173 |   CronRole:
174 |     Type: AWS::IAM::Role
175 |     Properties:
176 |       AssumeRolePolicyDocument:
177 |         Version: "2012-10-17"
178 |         Statement:
179 |           - Effect: "Allow"
180 |             Principal:
181 |               Service:
182 |                 - "events.amazonaws.com"
183 |             Action:
184 |               - "sts:AssumeRole"
185 |       Path: !Sub '/${AWS::StackName}/'
186 |       Policies:
187 |         - PolicyName: "BottlerocketEcsUpdaterSchedulerPolicy"
188 |           PolicyDocument:
189 |             Statement:
190 |               - Effect: "Allow"
191 |                 Condition:
192 |                   ArnEquals:
193 |                     ecs:cluster: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}'
194 |                 Action: "ecs:RunTask"
195 |                 Resource:
196 |                   - !Ref UpdaterTaskDefinition
197 |               - Effect: "Allow"
198 |                 Condition:
199 |                   ArnEquals:
200 |                     ecs:cluster: !Sub 'arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}'
201 |                 Action:
202 |                   - "iam:PassRole"
203 |                 Resource:
204 |                   - !GetAtt TaskRole.Arn
205 |                   - !GetAtt ExecutionRole.Arn
206 |   UpdateCheckCommand:
207 |     Type: AWS::SSM::Document
208 |     Properties:
209 |       DocumentType: Command
210 |       Content:
211 |         schemaVersion: "2.2"
212 |         description: "Bottlerocket - Check available updates"
213 |         mainSteps:
214 |           - action: "aws:runShellScript"
215 |             name: "CheckUpdate"
216 |             precondition:
217 |               StringEquals:
218 |                 - platformType
219 |                 - Linux
220 |             inputs:
221 |               timeoutSeconds: '1800'
222 |               runCommand:
223 |                 - "apiclient update check"
224 |   UpdateApplyCommand:
225 |     Type: AWS::SSM::Document
226 |     Properties:
227 |       DocumentType: Command
228 |       Content:
229 |         schemaVersion: "2.2"
230 |         description: "Bottlerocket - Apply update"
231 |         mainSteps:
232 |           - action: "aws:runShellScript"
233 |             name: "ApplyUpdate"
234 |             precondition:
235 |               StringEquals:
236 |                 - platformType
237 |                 - Linux
238 |             inputs:
239 |               timeoutSeconds: '1800'
240 |               runCommand:
241 |                 - "apiclient update apply"
242 |   RebootCommand:
243 |     Type: AWS::SSM::Document
244 |     Properties:
245 |       DocumentType: Command
246 |       Content:
247 |         schemaVersion: "2.2"
248 |         description: "Bottlerocket - Reboot"
249 |         mainSteps:
250 |           - action: "aws:runShellScript"
251 |             name: "Reboot"
252 |             precondition:
253 |               StringEquals:
254 |                 - platformType
255 |                 - Linux
256 |             inputs:
257 |               timeoutSeconds: '1800'
258 |               runCommand:
259 |                 - "apiclient reboot"
260 | Outputs:
261 |   UpdaterTaskDefinitionArn:
262 |     Description: 'Updater task definition ARN'
263 |     Value: !Ref UpdaterTaskDefinition
264 |     Export:
265 |       Name: !Sub "${AWS::StackName}:UpdaterTaskDefinition"
266 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Bottlerocket ECS Updater
  2 | 
  3 | The Bottlerocket ECS Updater is a service you can install into your ECS cluster that helps you keep your Bottlerocket container instances up to date.
  4 | When installed, the Bottlerocket ECS Updater will periodically query each Bottlerocket container instance to find whether an update is available and drain tasks while an update is in progress.
  5 | Updates to Bottlerocket are rolled out in [waves](https://github.com/bottlerocket-os/bottlerocket/tree/develop/sources/updater/waves) to reduce the impact of issues; the container instances in your cluster may not all see updates at the same time.
  6 | 
  7 | ## Installation
  8 | 
  9 | You can install the Bottlerocket ECS Updater into your cluster with the provided [CloudFormation template](stacks/bottlerocket-ecs-updater.yaml).
 10 | The following information is required when creating the CloudFormation stack:
 11 | 
 12 | * The name of the ECS cluster where you are running Bottlerocket container instances
 13 | * The name of the CloudWatch Logs log group where the Bottlerocket ECS Updater will send its logs
 14 | * At least one subnet ID that has Internet access (which does not need to be shared with the rest of your cluster)
 15 | 
 16 | When installed, the CloudFormation template will create the following resources in your account:
 17 | 
 18 | * A task definition for the Bottlerocket ECS Updater
 19 | * A CloudWatch Events scheduled rule to execute the Bottlerocket ECS Updater
 20 | * An IAM role for the Bottlerocket ECS Updater task itself as well as roles for Fargate and CloudWatch Events
 21 | * SSM documents to query and execute updates on Bottlerocket instances
 22 | 
 23 | ## Getting Started
 24 | 
 25 | To install the Bottlerocket ECS Updater, you will need to fetch some information first.
 26 | 
 27 | ### Subnet info
 28 | 
 29 | You should either have a default virtual private cloud (VPC) or have already
 30 | [created a VPC](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/get-set-up-for-amazon-ecs.html#create-a-vpc)
 31 | in your account.
 32 | 
 33 | To find your default VPC, run this command.
 34 | (If you use an AWS region other than "us-west-2", make sure to change that.)
 35 | 
 36 | ```sh
 37 | aws ec2 describe-vpcs \
 38 |    --region us-west-2 \
 39 |    --filters=Name=isDefault,Values=true \
 40 |    | jq --raw-output '.Vpcs[].VpcId'
 41 | ```
 42 | 
 43 | If you want to use a different VPC you created, run this to get the ID for your VPC.
 44 | Make sure to change VPC_NAME to the name of the VPC you created.
 45 | (If you use an EC2 region other than "us-west-2", make sure to change that too.)
 46 | 
 47 | ```sh
 48 | aws ec2 describe-vpcs \
 49 |    --region us-west-2 \
 50 |    --filters=Name=tag:Name,Values=VPC_NAME \
 51 |    | jq --raw-output '.Vpcs[].VpcId'
 52 | ```
 53 | 
 54 | Next, run this to get information about the subnets in your VPC.
 55 | It will give you a list of the subnets and tell you whether each is public or private.
 56 | Make sure to change VPC_ID to the value you received from the previous command.
 57 | (If you use an EC2 region other than "us-west-2", make sure to change that too.)
 58 | 
 59 | ```sh
 60 | aws ec2 describe-subnets \
 61 |    --region us-west-2 \
 62 |    --filter=Name=vpc-id,Values=VPC_ID \
 63 |    | jq '.Subnets[] | {id: .SubnetId, public: .MapPublicIpOnLaunch, az: .AvailabilityZone}'
 64 | ```
 65 | 
 66 | You'll want to pick at least one and save it for the launch command later.
 67 | Make sure the subnets you select have Internet access so the updater can reach its dependencies.
 68 | Public subnets usually have Internet access via an [Internet gateway](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html) while private subnets may be configured with NAT.
 69 | For more information, see [the VPC user guide](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html#vpc-igw-internet-access).
 70 | 
 71 | We recommend picking several subnets in different availability zones.
 72 | However, if you want to launch in a specific availability zone, make sure you pick a subnet that matches; the AZ is listed right below the public/private status.
 73 | 
 74 | ### Log Group
 75 | 
 76 | You can either choose an existing log group or create a new one to get your ECS updater logs.
 77 | 
 78 | You can run this to get the list of existing log-groups
 79 | ```sh
 80 | aws logs describe-log-groups
 81 | ```
 82 | 
 83 | You'll want to pick one and save it for the installation command later.
 84 | 
 85 | If you want to create a new log group, run this (Make sure to provide LOG_GROUP_NAME)
 86 | ```sh
 87 | aws logs create-log-group --log-group-name LOG_GROUP_NAME
 88 | ```
 89 | 
 90 | ### Install
 91 | 
 92 | Now we can install the [CloudFormation template](stacks/bottlerocket-ecs-updater.yaml) to start the ECS updater for your cluster!
 93 | 
 94 | There are a few values to make sure you change in this command:
 95 | * CLUSTER_NAME: the name of the cluster you want ECS updater to manage Bottlerocket instances in
 96 | * SUBNET_IDS: a comma-separated list of the subnets you selected earlier
 97 | * LOG_GROUP_NAME: the log group name you selected or created earlier
 98 | 
 99 | ```sh
100 | aws cloudformation deploy \
101 |     --stack-name "bottlerocket-ecs-updater" \
102 |     --template-file "./stacks/bottlerocket-ecs-updater.yaml" \
103 |     --capabilities CAPABILITY_NAMED_IAM \
104 |     --parameter-overrides \
105 |     ClusterName="CLUSTER_NAME" \
106 |     Subnets="SUBNET_IDS" \
107 |     LogGroupName="LOG_GROUP_NAME"
108 | ```
109 | 
110 | ## How it works
111 | 
112 | The Bottlerocket ECS Updater is designed to run as a scheduled Fargate task that queries, drains, and performs updates in your ECS cluster.
113 | A rule in CloudWatch Events periodically launches the updater as a new Fargate task.
114 | The updater queries the ECS API to discover all the container instances in your cluster and filters for Bottlerocket instances by reading the `bottlerocket.variant` attribute.
115 | For each Bottlerocket instance found, the updater executes an SSM document that queries for available updates using the `apiclient update check` command.
116 | When an update is available, the updater checks to see whether the tasks currently running on the container instance are part of a [service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_services.html) and eligible for replacement.
117 | If all the tasks are part of a service, the updater marks the container instance for [draining](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-draining.html) and waits for the tasks to be successfully drained.
118 | After the container instance has been drained, the updater executes an SSM document to download the update, apply the update, and reboot.
119 | Finally, the updater will mark the container instance as active and move on to the next one.
120 | 
121 | ## Troubleshooting
122 | 
123 | When installed with the provided CloudFormation template, the logs for the updater will be available the CloudWatch Logs group you configured.
124 | Checking the logs is a good first step in understanding why something happened or didn't happen.
125 | 
126 | ### Why do only some of my Bottlerocket instances have an update available?
127 | 
128 | Updates to Bottlerocket are rolled out in [waves](https://github.com/bottlerocket-os/bottlerocket/tree/develop/sources/updater/waves) to reduce the impact of issues; the container instances in your cluster may not all see updates at the same time.
129 | You can check whether an update is available on your instance by running the `apiclient update check` command from within the [control](https://github.com/bottlerocket-os/bottlerocket#control-container) or [admin](https://github.com/bottlerocket-os/bottlerocket#admin-container) container.
130 | 
131 | ### My Bottlerocket instance has an update available.  Why didn't the Bottlerocket ECS Updater update it?
132 | 
133 | The Bottlerocket ECS Updater attempts to update container instances without disrupting the workloads in your cluster.
134 | Applying an update to Bottlerocket requires a reboot.
135 | To avoid disruption in your cluster, the Bottlerocket ECS Updater uses the [container instance draining](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/container-instance-draining.html) feature of ECS.
136 | A container instance may be skipped for update when:
137 | 
138 | * _A non-service task is running._
139 |   Non-service tasks are not automatically replaced when they are stopped.
140 |   To avoid disrupting a critical workload, the Bottlerocket ECS Updater will not stop a non-service task.
141 | * _No spare capacity is available in the cluster._
142 |   The service scheduler attempts to replace the tasks according to the service's deployment configuration parameters, `minimumHealthyPercent` and `maximumPercent`.
143 |   If stopping a task would reduce the running count below your service's `minimumHealthyPercent`, ECS will not stop the task.
144 |   The Bottlerocket ECS Updater will wait for draining to complete for a fixed period of time (currently 25 minutes).
145 |   If draining has not completed by the end of the period, the updater will restore the instance and move to the next one.
146 | * _Draining takes too long._
147 |   The Bottlerocket ECS Updater will wait for draining to complete for a fixed period of time (currently 25 minutes).
148 |   If draining has not completed by the end of the period, the updater will restore the instance and move to the next one.
149 |   The time it takes for a task to be stopped is related to the `stopTimeout` task definition parameter and to any associated resources like load balancers.
150 |   If your tasks are taking too long to drain, you can ensure that your task responds to `SIGTERM`, shorten the `stopTimeout`, or shorten the load balancer's health check and deregistration delay settings.
151 | * _Bottlerocket version is too old._
152 |   The Bottlerocket ECS Updater uses newer [`apiclient update` commands](https://github.com/bottlerocket-os/bottlerocket#update-api) that were added in version [1.0.5](https://github.com/bottlerocket-os/bottlerocket/blob/develop/CHANGELOG.md#v105-2021-01-15).
153 |   The SSM commands will fail if your Bottlerocket OS version is less than 1.0.5.
154 |   Instances running Bottlerocket versions less than 1.0.5 need to be manually updated.
155 | 
156 | ### Why do new container instances launch with older Bottlerocket versions?
157 | 
158 | The Bottlerocket ECS Updater performs in-place updates for instances in your ECS cluster.
159 | The updater does not influence how those instances are launched.
160 | If you use an auto-scaling group to launch your instances, you can update the AMI ID in your launch configuration or launch template to use a newer version of Bottlerocket.
161 | 
162 | Note: We do not recommend using the Bottlerocket ECS Updater in conjunction with EC2 Spot.
163 | The ECS Updater is designed to keep services safe from interruption by updating one instance at a time.
164 | With the short average lifetime of Spot instances, the updater may not update them until relatively late in their life, meaning they may not be up to date when serving your application.
165 | 
166 | ## Developer guide
167 | 
168 | To get started with building and developing the ECS updater, make sure you have:
169 | 
170 | * [Go installed](https://go.dev/doc/install)
171 | * [`golangci-lint` installed locally](https://golangci-lint.run/usage/install/#local-installation)
172 | * make
173 | * [amazon-ecr-credential-helper](https://github.com/awslabs/amazon-ecr-credential-helper) setup for Docker and access to ECR (or your preferred image registry)
174 | * And the [cloud formation template linter installed](https://github.com/aws-cloudformation/cfn-lint)
175 | 
176 | Make sure everything is ready and installed by running the tests with `make test`.
177 | Ensure the local builds work by running `make`.
178 | You might first need to get the modules downloaded to your local go mod cache by running `make tidy`.
179 | 
180 | ## Security
181 | 
182 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
183 | 
184 | ## License
185 | 
186 | This project is dual licensed under either the Apache-2.0 License or the MIT license, your choice.
187 | 
188 | 


--------------------------------------------------------------------------------
/updater/aws.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"strings"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	"github.com/aws/aws-sdk-go/aws"
 12 | 	"github.com/aws/aws-sdk-go/aws/request"
 13 | 	"github.com/aws/aws-sdk-go/service/ec2"
 14 | 	"github.com/aws/aws-sdk-go/service/ecs"
 15 | 	"github.com/aws/aws-sdk-go/service/ssm"
 16 | )
 17 | 
 18 | const (
 19 | 	ecsPageSize          = 100
 20 | 	ssmPageSize          = 50
 21 | 	updateStateIdle      = "Idle"
 22 | 	updateStateStaged    = "Staged"
 23 | 	updateStateAvailable = "Available"
 24 | 	updateStateReady     = "Ready"
 25 | 	waiterDelay          = time.Duration(15) * time.Second
 26 | 	waiterMaxAttempts    = 100
 27 | 	// If this time is reached and the ssm command has not already started running, it will not run.
 28 | 	deliveryTimeoutSeconds = 600
 29 | )
 30 | 
 31 | type instance struct {
 32 | 	instanceID          string
 33 | 	containerInstanceID string
 34 | 	bottlerocketVersion string
 35 | }
 36 | 
 37 | type checkOutput struct {
 38 | 	UpdateState     string `json:"update_state"`
 39 | 	ActivePartition struct {
 40 | 		Image struct {
 41 | 			Version string `json:"version"`
 42 | 		} `json:"image"`
 43 | 	} `json:"active_partition"`
 44 | }
 45 | 
 46 | type ECSAPI interface {
 47 | 	ListContainerInstancesPages(*ecs.ListContainerInstancesInput, func(*ecs.ListContainerInstancesOutput, bool) bool) error
 48 | 	DescribeContainerInstances(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error)
 49 | 	UpdateContainerInstancesState(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error)
 50 | 	ListTasks(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error)
 51 | 	DescribeTasks(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error)
 52 | 	WaitUntilTasksStoppedWithContext(ctx aws.Context, input *ecs.DescribeTasksInput, opts ...request.WaiterOption) error
 53 | }
 54 | 
 55 | type SSMAPI interface {
 56 | 	WaitUntilCommandExecutedWithContext(ctx aws.Context, input *ssm.GetCommandInvocationInput, opts ...request.WaiterOption) error
 57 | 	SendCommand(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error)
 58 | 	GetCommandInvocation(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error)
 59 | }
 60 | 
 61 | type EC2API interface {
 62 | 	WaitUntilInstanceStatusOk(input *ec2.DescribeInstanceStatusInput) error
 63 | }
 64 | 
 65 | func (u *updater) alreadyRunning(family string) (bool, error) {
 66 | 	log.Print("Checking for running updater tasks")
 67 | 	list, err := u.ecs.ListTasks(&ecs.ListTasksInput{
 68 | 		Cluster: &u.cluster,
 69 | 		Family:  aws.String(family),
 70 | 	})
 71 | 	if err != nil {
 72 | 		return false, fmt.Errorf("failed to list running updater tasks: %w", err)
 73 | 	}
 74 | 	if len(list.TaskArns) > 1 {
 75 | 		return true, nil
 76 | 	}
 77 | 	log.Println("This is the only running updater.")
 78 | 	return false, nil
 79 | }
 80 | 
 81 | func (u *updater) listContainerInstances() ([]*string, error) {
 82 | 	log.Printf("Listing active container instances in cluster %q", u.cluster)
 83 | 	containerInstances := make([]*string, 0)
 84 | 	input := &ecs.ListContainerInstancesInput{
 85 | 		Cluster: &u.cluster,
 86 | 		Status:  aws.String(ecs.ContainerInstanceStatusActive),
 87 | 	}
 88 | 	if err := u.ecs.ListContainerInstancesPages(input, func(output *ecs.ListContainerInstancesOutput, _ bool) bool {
 89 | 		containerInstances = append(containerInstances, output.ContainerInstanceArns...)
 90 | 		return true
 91 | 	}); err != nil {
 92 | 		return nil, fmt.Errorf("failed to list container instances: %w", err)
 93 | 	}
 94 | 	log.Printf("Found %d container instances in the cluster", len(containerInstances))
 95 | 	return containerInstances, nil
 96 | }
 97 | 
 98 | // filterBottlerocketInstances filters container instances and returns list of
 99 | // instances that are running Bottlerocket OS
100 | func (u *updater) filterBottlerocketInstances(instances []*string) ([]instance, error) {
101 | 	log.Printf("Filtering container instances running Bottlerocket OS")
102 | 	bottlerocketInstances := make([]instance, 0)
103 | 	errCount := 0
104 | 	var lastErr error
105 | 	pageCount, err := eachPage(len(instances), ecsPageSize, func(start, stop int) error {
106 | 		resp, err := u.ecs.DescribeContainerInstances(&ecs.DescribeContainerInstancesInput{
107 | 			Cluster:            &u.cluster,
108 | 			ContainerInstances: instances[start:stop],
109 | 		})
110 | 		// count errors per page.
111 | 		if err != nil {
112 | 			log.Printf("Failed to describe container instances from %d to %d: %v", start, stop, err)
113 | 			errCount++
114 | 			lastErr = err
115 | 			return nil
116 | 		}
117 | 		for _, containerInstance := range resp.ContainerInstances {
118 | 			if containsAttribute(containerInstance.Attributes, "bottlerocket.variant") {
119 | 				bottlerocketInstances = append(bottlerocketInstances, instance{
120 | 					instanceID:          aws.StringValue(containerInstance.Ec2InstanceId),
121 | 					containerInstanceID: aws.StringValue(containerInstance.ContainerInstanceArn),
122 | 				})
123 | 				log.Printf("Bottlerocket instance %q detected.", aws.StringValue(containerInstance.Ec2InstanceId))
124 | 			}
125 | 		}
126 | 		return nil
127 | 	})
128 | 	if err != nil {
129 | 		return nil, err
130 | 	}
131 | 	// check if every page had an error; errors are only fatal if each page failed.
132 | 	if errCount == pageCount {
133 | 		return nil, fmt.Errorf("failed to describe any container instances: %w", lastErr)
134 | 	}
135 | 	return bottlerocketInstances, nil
136 | }
137 | 
138 | // containsAttribute checks if a slice of ECS Attributes struct contains a specified name.
139 | func containsAttribute(attrs []*ecs.Attribute, searchString string) bool {
140 | 	for _, attr := range attrs {
141 | 		if aws.StringValue(attr.Name) == searchString {
142 | 			return true
143 | 		}
144 | 	}
145 | 	return false
146 | }
147 | 
148 | // eachPage defines batch processing boundaries for handling paginated results of API calls.
149 | func eachPage(inputLen int, size int, fn func(start, stop int) error) (int, error) {
150 | 	pageCount := 0
151 | 	for start := 0; start < inputLen; start += size {
152 | 		stop := start + size
153 | 		if stop > inputLen {
154 | 			stop = inputLen
155 | 		}
156 | 		if err := fn(start, stop); err != nil {
157 | 			return 0, err
158 | 		}
159 | 		pageCount++
160 | 	}
161 | 	return pageCount, nil
162 | }
163 | 
164 | // filterAvailableUpdates returns a list of instances that have updates available
165 | func (u *updater) filterAvailableUpdates(bottlerocketInstances []instance) ([]instance, error) {
166 | 	log.Printf("Filtering instances with available updates")
167 | 	// make slice of Bottlerocket instances to use with SendCommand and checkCommandOutput
168 | 	instances := make([]string, 0)
169 | 	for _, inst := range bottlerocketInstances {
170 | 		instances = append(instances, inst.instanceID)
171 | 	}
172 | 
173 | 	var lastErr error
174 | 	errCount := 0
175 | 	candidates := make([]instance, 0)
176 | 	pageCount, err := eachPage(len(instances), ssmPageSize, func(start, stop int) error {
177 | 		commandID, err := u.sendCommand(instances[start:stop], u.checkDocument)
178 | 		if err != nil {
179 | 			// errors here are considered non-fatal.
180 | 			log.Printf("Failed to send document %s: %v", u.checkDocument, err)
181 | 			errCount++
182 | 			lastErr = err
183 | 			return nil
184 | 		}
185 | 		for _, inst := range bottlerocketInstances[start:stop] {
186 | 			commandOutput, err := u.getCommandResult(commandID, inst.instanceID)
187 | 			if err != nil {
188 | 				// errors here are considered non-fatal
189 | 				log.Printf("Failed to get output for command %s, document %s and instance %q: %v", commandID, u.checkDocument, inst, err)
190 | 				continue
191 | 			}
192 | 			output, err := parseCommandOutput(commandOutput)
193 | 			if err != nil {
194 | 				log.Printf("Failed to parse command output %q for instance %q: %v", string(commandOutput), inst, err)
195 | 				continue
196 | 			}
197 | 			if output.UpdateState == updateStateAvailable || output.UpdateState == updateStateReady {
198 | 				inst.bottlerocketVersion = output.ActivePartition.Image.Version
199 | 				candidates = append(candidates, inst)
200 | 			}
201 | 		}
202 | 		return nil
203 | 	})
204 | 	if err != nil {
205 | 		return nil, err
206 | 	}
207 | 	if errCount == pageCount {
208 | 		return nil, fmt.Errorf("all attempts to send SSM document %s failed: %w", u.checkDocument, lastErr)
209 | 	}
210 | 	return candidates, nil
211 | }
212 | 
213 | // eligible checks the eligibility of container instance for update. It's eligible
214 | // if all the running tasks were started by a service.
215 | func (u *updater) eligible(containerInstance string) (bool, error) {
216 | 	log.Printf("Checking eligiblity for update of container instance %q", containerInstance)
217 | 	list, err := u.ecs.ListTasks(&ecs.ListTasksInput{
218 | 		Cluster:           &u.cluster,
219 | 		ContainerInstance: aws.String(containerInstance),
220 | 	})
221 | 	if err != nil {
222 | 		return false, fmt.Errorf("failed to list tasks: %w", err)
223 | 	}
224 | 	taskARNs := list.TaskArns
225 | 	if len(list.TaskArns) == 0 {
226 | 		return true, nil
227 | 	}
228 | 
229 | 	desc, err := u.ecs.DescribeTasks(&ecs.DescribeTasksInput{
230 | 		Cluster: &u.cluster,
231 | 		Tasks:   taskARNs,
232 | 	})
233 | 	if err != nil {
234 | 		return false, fmt.Errorf("failed to describe tasks: %w", err)
235 | 	}
236 | 	for _, listResult := range desc.Tasks {
237 | 		startedBy := aws.StringValue(listResult.StartedBy)
238 | 		if !strings.HasPrefix(startedBy, "ecs-svc/") {
239 | 			log.Printf("Container instance %q has a non-service task running: %s", containerInstance, aws.StringValue(listResult.TaskArn))
240 | 			return false, nil
241 | 		}
242 | 	}
243 | 	return true, nil
244 | }
245 | 
246 | func (u *updater) drainInstance(containerInstance string) error {
247 | 	log.Printf("Starting drain on container instance %q", containerInstance)
248 | 	resp, err := u.ecs.UpdateContainerInstancesState(&ecs.UpdateContainerInstancesStateInput{
249 | 		Cluster:            &u.cluster,
250 | 		ContainerInstances: aws.StringSlice([]string{containerInstance}),
251 | 		Status:             aws.String("DRAINING"),
252 | 	})
253 | 	if err != nil {
254 | 		return fmt.Errorf("failed to change instance state to DRAINING: %w", err)
255 | 	}
256 | 	if len(resp.Failures) != 0 {
257 | 		log.Printf("There are API failures in draining the container instance %q, therefore attempting to"+
258 | 			" re-activate", containerInstance)
259 | 		err = u.activateInstance(containerInstance)
260 | 		if err != nil {
261 | 			log.Printf("Instance failed to re-activate after failing to change state to DRAINING: %v", err)
262 | 		}
263 | 		return fmt.Errorf("failures in API call: %v", resp.Failures)
264 | 	}
265 | 	log.Printf("Container instance state changed to DRAINING")
266 | 
267 | 	err = u.waitUntilDrained(containerInstance)
268 | 	if err != nil {
269 | 		log.Printf("Container instance %q failed to drain, therefore attempting to re-activate", containerInstance)
270 | 		err2 := u.activateInstance(containerInstance)
271 | 		if err2 != nil {
272 | 			log.Printf("Instance failed to re-activate after failing to wait for drain to complete: %v", err2)
273 | 		}
274 | 		return fmt.Errorf("error while waiting to drain: %w", err)
275 | 	}
276 | 	log.Printf("Container instance %q drained successfully!", containerInstance)
277 | 	return nil
278 | }
279 | 
280 | func (u *updater) activateInstance(containerInstance string) error {
281 | 	resp, err := u.ecs.UpdateContainerInstancesState(&ecs.UpdateContainerInstancesStateInput{
282 | 		Cluster:            &u.cluster,
283 | 		ContainerInstances: aws.StringSlice([]string{containerInstance}),
284 | 		Status:             aws.String("ACTIVE"),
285 | 	})
286 | 	if err != nil {
287 | 		return fmt.Errorf("failed to change state to ACTIVE: %w", err)
288 | 	}
289 | 	if len(resp.Failures) != 0 {
290 | 		if aws.StringValue(resp.Failures[0].Reason) == "INACTIVE" {
291 | 			log.Printf("Container instance %q is in INACTIVE state", containerInstance)
292 | 			return nil
293 | 		}
294 | 		return fmt.Errorf("API failures while activating: %v", resp.Failures)
295 | 	}
296 | 	log.Printf("Container instance %q state changed to ACTIVE successfully!", containerInstance)
297 | 	return nil
298 | }
299 | 
300 | func (u *updater) waitUntilDrained(containerInstance string) error {
301 | 	log.Printf("Waiting for container instance %q to drain", containerInstance)
302 | 	list, err := u.ecs.ListTasks(&ecs.ListTasksInput{
303 | 		Cluster:           &u.cluster,
304 | 		ContainerInstance: aws.String(containerInstance),
305 | 	})
306 | 	if err != nil {
307 | 		return fmt.Errorf("failed to list tasks: %w", err)
308 | 	}
309 | 	taskARNs := list.TaskArns
310 | 
311 | 	if len(taskARNs) == 0 {
312 | 		log.Printf("No tasks to drain")
313 | 		return nil
314 | 	}
315 | 
316 | 	return u.ecs.WaitUntilTasksStoppedWithContext(aws.BackgroundContext(), &ecs.DescribeTasksInput{
317 | 		Cluster: &u.cluster,
318 | 		Tasks:   taskARNs,
319 | 	},
320 | 		request.WithWaiterMaxAttempts(waiterMaxAttempts),
321 | 		request.WithWaiterDelay(request.ConstantWaiterDelay(waiterDelay)),
322 | 	)
323 | }
324 | 
325 | // updateInstance starts an update process on an instance.
326 | func (u *updater) updateInstance(inst instance) error {
327 | 	log.Printf("Starting update on instance %q", inst.instanceID)
328 | 	ec2IDs := []string{inst.instanceID}
329 | 	log.Printf("Checking current update state of instance %q", inst.instanceID)
330 | 
331 | 	commandID, err := u.sendCommand(ec2IDs, u.checkDocument)
332 | 	if err != nil {
333 | 		return fmt.Errorf("failed to send check command: %w", err)
334 | 	}
335 | 	output, err := u.getCommandResult(commandID, inst.instanceID)
336 | 	if err != nil {
337 | 		return fmt.Errorf("failed to get check command output: %w", err)
338 | 	}
339 | 	check, err := parseCommandOutput(output)
340 | 	if err != nil {
341 | 		return fmt.Errorf("failed to parse command output %q: %w", string(output), err)
342 | 	}
343 | 
344 | 	switch check.UpdateState {
345 | 	case updateStateIdle:
346 | 		log.Printf("No new update available for instance %q", inst.instanceID)
347 | 		return nil
348 | 	case updateStateStaged:
349 | 		return fmt.Errorf("unexpected update state %q; skipping instance", check.UpdateState)
350 | 	case updateStateAvailable:
351 | 		log.Printf("Starting update apply on instance %q", inst.instanceID)
352 | 		_, err := u.sendCommand(ec2IDs, u.applyDocument)
353 | 		if err != nil {
354 | 			return fmt.Errorf("failed to send update apply command: %w", err)
355 | 		}
356 | 	case updateStateReady:
357 | 		log.Printf("Update is previously applied on instance %q", inst.instanceID)
358 | 	default:
359 | 		return fmt.Errorf("unknown update state %q", check.UpdateState)
360 | 	}
361 | 
362 | 	// occasionally instance goes into reboot before reporting command output, therefore
363 | 	// we do not poll for command output. Instead we rely on verifyUpdate to confirm update
364 | 	// success or failure.
365 | 	log.Printf("Sending SSM document %q on instance %q", u.rebootDocument, inst.instanceID)
366 | 	// SendCommand is directly called here because we do not want to wait on command complete.
367 | 	resp, err := u.ssm.SendCommand(&ssm.SendCommandInput{
368 | 		DocumentName:    aws.String(u.rebootDocument),
369 | 		DocumentVersion: aws.String("$DEFAULT"),
370 | 		InstanceIds:     aws.StringSlice(ec2IDs),
371 | 		TimeoutSeconds:  aws.Int64(deliveryTimeoutSeconds),
372 | 	})
373 | 	if err != nil {
374 | 		return fmt.Errorf("failed to send reboot command: %w", err)
375 | 	}
376 | 	rebootID := *resp.Command.CommandId
377 | 	log.Printf("SSM document %q posted with command ID %q", u.rebootDocument, rebootID)
378 | 
379 | 	// added some sleep time for reboot to start before we check instance state
380 | 	time.Sleep(15 * time.Second)
381 | 	err = u.waitUntilOk(inst.instanceID)
382 | 	if err != nil {
383 | 		return fmt.Errorf("failed to reach Ok status after reboot: %w", err)
384 | 	}
385 | 	return nil
386 | }
387 | 
388 | // verifyUpdate verifies if instance was properly updated
389 | func (u *updater) verifyUpdate(inst instance) (bool, error) {
390 | 	log.Println("Verifying update by checking there is no new version available to update" +
391 | 		" and validate the active version")
392 | 	ec2IDs := []string{inst.instanceID}
393 | 	updateStatus, err := u.sendCommand(ec2IDs, u.checkDocument)
394 | 	if err != nil {
395 | 		return false, fmt.Errorf("failed to send update check command: %w", err)
396 | 	}
397 | 
398 | 	updateResult, err := u.getCommandResult(updateStatus, inst.instanceID)
399 | 	if err != nil {
400 | 		return false, fmt.Errorf("failed to get check command output: %w", err)
401 | 	}
402 | 	output, err := parseCommandOutput(updateResult)
403 | 	if err != nil {
404 | 		return false, fmt.Errorf("failed to parse command output %q, manual verification required: %w", string(updateResult), err)
405 | 	}
406 | 	updatedVersion := output.ActivePartition.Image.Version
407 | 	if updatedVersion == inst.bottlerocketVersion {
408 | 		log.Printf("Container instance %q did not update, its current "+
409 | 			"version %s and updated version %s are the same", inst.containerInstanceID, inst.bottlerocketVersion, updatedVersion)
410 | 		return false, nil
411 | 	} else if output.UpdateState == updateStateAvailable {
412 | 		log.Printf("Container instance %q was updated to version %q successfully, however another newer version was recently released;"+
413 | 			" Instance will be updated to newer version in next iteration.", inst.containerInstanceID, updatedVersion)
414 | 		return true, nil
415 | 	}
416 | 	log.Printf("Container instance %q updated to version %q", inst.containerInstanceID, updatedVersion)
417 | 	return true, nil
418 | }
419 | 
420 | func (u *updater) sendCommand(instanceIDs []string, ssmDocument string) (string, error) {
421 | 	log.Printf("Sending SSM document %q", ssmDocument)
422 | 	resp, err := u.ssm.SendCommand(&ssm.SendCommandInput{
423 | 		DocumentName:    aws.String(ssmDocument),
424 | 		DocumentVersion: aws.String("$DEFAULT"),
425 | 		InstanceIds:     aws.StringSlice(instanceIDs),
426 | 		TimeoutSeconds:  aws.Int64(deliveryTimeoutSeconds),
427 | 	})
428 | 	if err != nil {
429 | 		return "", fmt.Errorf("send command failed: %w", err)
430 | 	}
431 | 	commandID := *resp.Command.CommandId
432 | 	log.Printf("SSM document %q posted with command id %q", ssmDocument, commandID)
433 | 
434 | 	// Wait for the sent commands to complete.
435 | 	wg := sync.WaitGroup{}
436 | 	instanceCount := len(instanceIDs)
437 | 	errChan := make(chan error, instanceCount)
438 | 	for _, v := range instanceIDs {
439 | 		log.Printf("Waiting for command %q to complete for instance %q", commandID, v)
440 | 		wg.Add(1)
441 | 		go func(instanceID string) {
442 | 			defer wg.Done()
443 | 			err = u.ssm.WaitUntilCommandExecutedWithContext(aws.BackgroundContext(), &ssm.GetCommandInvocationInput{
444 | 				CommandId:  aws.String(commandID),
445 | 				InstanceId: aws.String(instanceID),
446 | 			},
447 | 				request.WithWaiterMaxAttempts(waiterMaxAttempts),
448 | 				request.WithWaiterDelay(request.ConstantWaiterDelay(waiterDelay)))
449 | 			if err != nil {
450 | 				errChan <- err
451 | 				log.Printf("Error encountered while awaiting document %q execution for instance: %q: %s", ssmDocument, instanceID, err)
452 | 				u.logCommmandOutput(commandID, instanceID)
453 | 			}
454 | 		}(aws.StringValue(&v))
455 | 	}
456 | 	wg.Wait()
457 | 	close(errChan)
458 | 
459 | 	errCount := 0
460 | 	for err = range errChan {
461 | 		errCount++
462 | 		if errCount == instanceCount {
463 | 			return "", fmt.Errorf("too many failures while awaiting document execution: %w", err)
464 | 		}
465 | 	}
466 | 	return commandID, nil
467 | }
468 | 
469 | func (u *updater) getCommandResult(commandID string, instanceID string) ([]byte, error) {
470 | 	resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{
471 | 		CommandId:  aws.String(commandID),
472 | 		InstanceId: aws.String(instanceID),
473 | 	})
474 | 	if err != nil {
475 | 		return nil, fmt.Errorf("failed to retrieve command invocation output: %w", err)
476 | 	}
477 | 	commandResults := []byte(aws.StringValue(resp.StandardOutputContent))
478 | 	if aws.StringValue(resp.Status) != ssm.CommandInvocationStatusSuccess {
479 | 		return nil, fmt.Errorf("command %s has not reached success status, current status %q", commandID, aws.StringValue(resp.Status))
480 | 	}
481 | 	return commandResults, nil
482 | }
483 | 
484 | // logCommmandOutput logs the ssm command invocation response
485 | func (u *updater) logCommmandOutput(commandID string, instanceID string) {
486 | 	resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{
487 | 		CommandId:  aws.String(commandID),
488 | 		InstanceId: aws.String(instanceID),
489 | 	})
490 | 	if err != nil {
491 | 		log.Printf("Failed to get invocation output for instance %q: %v", instanceID, err)
492 | 	}
493 | 	log.Printf("Invocation output for instance %q: %#q", instanceID, resp)
494 | }
495 | 
496 | // waitUntilOk takes an EC2 ID as a parameter and waits until the specified EC2 instance is in an Ok status.
497 | func (u *updater) waitUntilOk(ec2ID string) error {
498 | 	log.Printf("Waiting for instance %q to reach Ok status", ec2ID)
499 | 	return u.ec2.WaitUntilInstanceStatusOk(&ec2.DescribeInstanceStatusInput{
500 | 		InstanceIds: []*string{aws.String(ec2ID)},
501 | 	})
502 | }
503 | 
504 | // parseCommandOutput takes raw bytes of ssm command output and converts it into a struct
505 | func parseCommandOutput(commandOutput []byte) (checkOutput, error) {
506 | 	output := checkOutput{}
507 | 	err := json.Unmarshal(commandOutput, &output)
508 | 	if err != nil {
509 | 		return output, fmt.Errorf("failed to unmarshal json: %w", err)
510 | 	}
511 | 	if output.UpdateState == "" || output.ActivePartition.Image.Version == "" {
512 | 		return output, fmt.Errorf("mandatory fields are not available")
513 | 	}
514 | 	return output, nil
515 | }
516 | 


--------------------------------------------------------------------------------
/updater/aws_test.go:
--------------------------------------------------------------------------------
   1 | package main
   2 | 
   3 | import (
   4 | 	"errors"
   5 | 	"fmt"
   6 | 	"strconv"
   7 | 	"sync"
   8 | 	"testing"
   9 | 
  10 | 	"github.com/aws/aws-sdk-go/aws"
  11 | 	"github.com/aws/aws-sdk-go/aws/request"
  12 | 	"github.com/aws/aws-sdk-go/service/ec2"
  13 | 	"github.com/aws/aws-sdk-go/service/ecs"
  14 | 	"github.com/aws/aws-sdk-go/service/ssm"
  15 | 	"github.com/stretchr/testify/assert"
  16 | 	"github.com/stretchr/testify/require"
  17 | )
  18 | 
  19 | func TestFilterAvailableUpdates(t *testing.T) {
  20 | 	instances := []instance{
  21 | 		{
  22 | 			instanceID:          "inst-id-1",
  23 | 			containerInstanceID: "cont-inst-1",
  24 | 		},
  25 | 		{
  26 | 			instanceID:          "inst-id-2",
  27 | 			containerInstanceID: "cont-inst-2",
  28 | 		},
  29 | 		{
  30 | 			instanceID:          "inst-id-3",
  31 | 			containerInstanceID: "cont-inst-3",
  32 | 		},
  33 | 		{
  34 | 			instanceID:          "inst-id-4",
  35 | 			containerInstanceID: "cont-inst-4",
  36 | 		},
  37 | 		{
  38 | 			instanceID:          "inst-id-5",
  39 | 			containerInstanceID: "cont-inst-5",
  40 | 		},
  41 | 	}
  42 | 	expected := []instance{
  43 | 		{
  44 | 			instanceID:          "inst-id-1",
  45 | 			containerInstanceID: "cont-inst-1",
  46 | 			bottlerocketVersion: "v1.0.5",
  47 | 		},
  48 | 		{
  49 | 			instanceID:          "inst-id-2",
  50 | 			containerInstanceID: "cont-inst-2",
  51 | 			bottlerocketVersion: "v1.0.5",
  52 | 		},
  53 | 		{
  54 | 			instanceID:          "inst-id-5",
  55 | 			containerInstanceID: "cont-inst-5",
  56 | 			bottlerocketVersion: "v1.0.5",
  57 | 		},
  58 | 	}
  59 | 	responses := map[string]string{
  60 | 		"inst-id-1": `{"update_state": "Available", "active_partition": { "image": { "version": "v1.0.5"}}}`,
  61 | 		"inst-id-2": `{"update_state": "Ready", "active_partition": { "image": { "version": "v1.0.5"}}}`,
  62 | 		"inst-id-3": `{"update_state": "Idle", "active_partition": { "image": { "version": "v1.1.1"}}}`,
  63 | 		"inst-id-4": `{"update_state": "Staged", "active_partition": { "image": { "version": "v1.1.1"}}}`,
  64 | 		"inst-id-5": `{"update_state": "Available", "active_partition": { "image": { "version": "v1.0.5"}}}`,
  65 | 	}
  66 | 
  67 | 	// mutex needed to prevent race condition when incrementing counter in concurrent
  68 | 	// execution of WaitUntilCommandExecutedWithContextFn
  69 | 	var m sync.Mutex
  70 | 	sendCommandCalls := 0
  71 | 	commandWaiterCalls := 0
  72 | 	getCommandInvocationCalls := 0
  73 | 	mockSSM := MockSSM{
  74 | 		GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
  75 | 			getCommandInvocationCalls++
  76 | 			return &ssm.GetCommandInvocationOutput{
  77 | 				Status:                aws.String("Success"),
  78 | 				StandardOutputContent: aws.String(responses[*input.InstanceId]),
  79 | 			}, nil
  80 | 		},
  81 | 		SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
  82 | 			sendCommandCalls++
  83 | 			return &ssm.SendCommandOutput{
  84 | 				Command: &ssm.Command{
  85 | 					CommandId:    aws.String("command-id"),
  86 | 					DocumentName: aws.String("check-document"),
  87 | 				},
  88 | 			}, nil
  89 | 		},
  90 | 		WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
  91 | 			m.Lock()
  92 | 			commandWaiterCalls++
  93 | 			m.Unlock()
  94 | 			assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
  95 | 			return nil
  96 | 		},
  97 | 	}
  98 | 	u := updater{ssm: mockSSM, checkDocument: "check-document"}
  99 | 	actual, err := u.filterAvailableUpdates(instances)
 100 | 	require.NoError(t, err)
 101 | 	assert.Equal(t, expected, actual, "Should only contain instances in Aavailable or Ready update state")
 102 | 	assert.Equal(t, 1, sendCommandCalls, "should send commands for each page")
 103 | 	assert.Equal(t, 5, commandWaiterCalls, "should wait for each instance")
 104 | 	assert.Equal(t, 5, getCommandInvocationCalls, "should collect output for each instance")
 105 | }
 106 | 
 107 | func TestPaginatedFilterAvailableUpdatesSuccess(t *testing.T) {
 108 | 	checkPattern := `{"update_state": "%s", "active_partition": { "image": { "version": "%s"}}}`
 109 | 	expected := make([]instance, 0)
 110 | 	instances := make([]instance, 0)
 111 | 	getOut := &ssm.GetCommandInvocationOutput{
 112 | 		Status:                aws.String("Success"),
 113 | 		StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "v1.0.5")),
 114 | 	}
 115 | 
 116 | 	for i := 0; i < 100; i++ { // 100 is chosen here to reprsent 2 full pages of SSM (limited to 50 per page)
 117 | 		containerID := "cont-inst-br" + strconv.Itoa(i)
 118 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 119 | 		instances = append(instances, instance{
 120 | 			instanceID:          ec2ID,
 121 | 			containerInstanceID: containerID,
 122 | 		})
 123 | 		expected = append(expected, instance{
 124 | 			instanceID:          ec2ID,
 125 | 			containerInstanceID: containerID,
 126 | 			bottlerocketVersion: "v1.0.5",
 127 | 		})
 128 | 	}
 129 | 
 130 | 	// mutex needed to prevent race condition when incrementing counter in concurrent
 131 | 	// execution of WaitUntilCommandExecutedWithContextFn
 132 | 	var m sync.Mutex
 133 | 	sendCommandCalls := 0
 134 | 	commandWaiterCalls := 0
 135 | 	getCommandInvocationCalls := 0
 136 | 	mockSSM := MockSSM{
 137 | 		GetCommandInvocationFn: func(_ *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
 138 | 			getCommandInvocationCalls++
 139 | 			return getOut, nil
 140 | 		},
 141 | 		SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 142 | 			sendCommandCalls++
 143 | 			return &ssm.SendCommandOutput{
 144 | 				Command: &ssm.Command{
 145 | 					CommandId:    aws.String("command-id"),
 146 | 					DocumentName: aws.String("check-document"),
 147 | 				},
 148 | 			}, nil
 149 | 		},
 150 | 		WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 151 | 			m.Lock()
 152 | 			commandWaiterCalls++
 153 | 			m.Unlock()
 154 | 			assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 155 | 			return nil
 156 | 		},
 157 | 	}
 158 | 	u := updater{ssm: mockSSM}
 159 | 	actual, err := u.filterAvailableUpdates(instances)
 160 | 	require.NoError(t, err)
 161 | 	assert.EqualValues(t, expected, actual, "should contain all instances")
 162 | 	assert.Equal(t, 2, sendCommandCalls, "should send commands for each page")
 163 | 	assert.Equal(t, 100, commandWaiterCalls, "should wait for each instance")
 164 | 	assert.Equal(t, 100, getCommandInvocationCalls, "should collect output for each instance")
 165 | }
 166 | 
 167 | func TestPaginatedFilterAvailableUpdatesAllFail(t *testing.T) {
 168 | 	instances := make([]instance, 0)
 169 | 
 170 | 	for i := 0; i < 100; i++ {
 171 | 		containerID := "cont-inst-br" + strconv.Itoa(i)
 172 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 173 | 		instances = append(instances, instance{
 174 | 			instanceID:          ec2ID,
 175 | 			containerInstanceID: containerID,
 176 | 		})
 177 | 	}
 178 | 
 179 | 	sendCommandCalls := 0
 180 | 	mockSSM := MockSSM{
 181 | 		SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 182 | 			sendCommandCalls++
 183 | 			return nil, errors.New("Failed to send document")
 184 | 		},
 185 | 	}
 186 | 	u := updater{ssm: mockSSM}
 187 | 	actual, err := u.filterAvailableUpdates(instances)
 188 | 	require.Error(t, err)
 189 | 	assert.Contains(t, err.Error(), "Failed to send document")
 190 | 	assert.Empty(t, actual)
 191 | 	assert.Equal(t, 2, sendCommandCalls, "should send commands for each page")
 192 | }
 193 | 
 194 | func TestPaginatedFilterAvailableUpdatesInPageFailures(t *testing.T) {
 195 | 	instances := make([]instance, 0)
 196 | 	checkPattern := `{"update_state": "%s", "active_partition": { "image": { "version": "%s"}}}`
 197 | 	for i := 0; i < 120; i++ { // 120 chosen here to ensure multiple pages are tested and that number instances divides by 3 evenly
 198 | 		containerID := "cont-inst-br" + strconv.Itoa(i)
 199 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 200 | 		instances = append(instances, instance{
 201 | 			instanceID:          ec2ID,
 202 | 			containerInstanceID: containerID,
 203 | 		})
 204 | 	}
 205 | 
 206 | 	// mutex needed to prevent race condition when incrementing counter in concurrent
 207 | 	// execution of WaitUntilCommandExecutedWithContextFn
 208 | 	var m sync.Mutex
 209 | 	sendCommandCalls := 0
 210 | 	commandWaiterCalls := 0
 211 | 	getCommandInvocationCalls := 0
 212 | 	count := 0
 213 | 	mockSSM := MockSSM{
 214 | 		GetCommandInvocationFn: func(_ *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
 215 | 			count++
 216 | 			getCommandInvocationCalls++
 217 | 			switch count % 3 {
 218 | 			case 0:
 219 | 				return nil, errors.New("Failed to get command output") // validate getCommandResult failure
 220 | 			case 1:
 221 | 				return &ssm.GetCommandInvocationOutput{
 222 | 					Status:                aws.String("Success"),
 223 | 					StandardOutputContent: aws.String("{}"),
 224 | 				}, nil // validates parseCommandOutput failure
 225 | 			case 2:
 226 | 				return &ssm.GetCommandInvocationOutput{
 227 | 					Status:                aws.String("Success"),
 228 | 					StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "v1.0.5")),
 229 | 				}, nil // validate success case
 230 | 			}
 231 | 			return nil, nil
 232 | 		},
 233 | 		SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 234 | 			sendCommandCalls++
 235 | 			return &ssm.SendCommandOutput{
 236 | 				Command: &ssm.Command{
 237 | 					CommandId:    aws.String("command-id"),
 238 | 					DocumentName: aws.String("check-document"),
 239 | 				},
 240 | 			}, nil
 241 | 		},
 242 | 		WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 243 | 			assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 244 | 			m.Lock()
 245 | 			commandWaiterCalls++
 246 | 			m.Unlock()
 247 | 			return nil
 248 | 		},
 249 | 	}
 250 | 	u := updater{ssm: mockSSM}
 251 | 	actual, err := u.filterAvailableUpdates(instances)
 252 | 	require.NoError(t, err)
 253 | 	assert.EqualValues(t, 40, len(actual), "Every 3rd instance of 120 should succeed")
 254 | 	assert.Equal(t, 3, sendCommandCalls, "should send commands for each page")
 255 | 	assert.Equal(t, 120, commandWaiterCalls, "should wait for each instance")
 256 | 	assert.Equal(t, 120, getCommandInvocationCalls, "should collect output for each instance")
 257 | }
 258 | 
 259 | func TestPaginatedFilterAvailableUpdatesSingleErr(t *testing.T) {
 260 | 	checkPattern := `{"update_state": "%s", "active_partition": { "image": { "version": "%s"}}}`
 261 | 	expected := make([]instance, 0)
 262 | 	instances := make([]instance, 0)
 263 | 	getOut := &ssm.GetCommandInvocationOutput{
 264 | 		Status:                aws.String("Success"),
 265 | 		StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "v1.0.5")),
 266 | 	}
 267 | 
 268 | 	for i := 0; i < 100; i++ {
 269 | 		containerID := "cont-inst-br" + strconv.Itoa(i)
 270 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 271 | 		instances = append(instances, instance{
 272 | 			instanceID:          ec2ID,
 273 | 			containerInstanceID: containerID,
 274 | 		})
 275 | 		expected = append(expected, instance{
 276 | 			instanceID:          ec2ID,
 277 | 			containerInstanceID: containerID,
 278 | 			bottlerocketVersion: "v1.0.5",
 279 | 		})
 280 | 	}
 281 | 
 282 | 	pageErrors := []error{errors.New("Failed to send document"), nil}
 283 | 
 284 | 	// mutex needed to prevent race condition when incrementing counter in concurrent
 285 | 	// execution of WaitUntilCommandExecutedWithContextFn
 286 | 	var m sync.Mutex
 287 | 	sendCommandCalls := 0
 288 | 	commandWaiterCalls := 0
 289 | 	getCommandInvocationCalls := 0
 290 | 	callCount := 0
 291 | 	mockSSM := MockSSM{
 292 | 		GetCommandInvocationFn: func(_ *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
 293 | 			getCommandInvocationCalls++
 294 | 			return getOut, nil
 295 | 		},
 296 | 		SendCommandFn: func(_ *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 297 | 			require.Less(t, callCount, len(pageErrors))
 298 | 			failErr := pageErrors[callCount]
 299 | 			callCount++
 300 | 			sendCommandCalls++
 301 | 			return &ssm.SendCommandOutput{
 302 | 				Command: &ssm.Command{
 303 | 					CommandId:    aws.String("command-id"),
 304 | 					DocumentName: aws.String("check-document"),
 305 | 				},
 306 | 			}, failErr
 307 | 		},
 308 | 		WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 309 | 			assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 310 | 			m.Lock()
 311 | 			commandWaiterCalls++
 312 | 			m.Unlock()
 313 | 			return nil
 314 | 		},
 315 | 	}
 316 | 	u := updater{ssm: mockSSM}
 317 | 	actual, err := u.filterAvailableUpdates(instances)
 318 | 
 319 | 	require.NoError(t, err)
 320 | 	assert.EqualValues(t, actual, expected[50:], "Should only contain instances from the 2nd page")
 321 | 	assert.Equal(t, 2, sendCommandCalls, "should send commands for each page")
 322 | 	assert.Equal(t, 50, commandWaiterCalls, "should wait for each instance")
 323 | 	assert.Equal(t, 50, getCommandInvocationCalls, "should collect output for each instance")
 324 | }
 325 | 
 326 | func TestGetCommandResult(t *testing.T) {
 327 | 	cases := []struct {
 328 | 		name            string
 329 | 		invocationOut   *ssm.GetCommandInvocationOutput
 330 | 		expectedError   string
 331 | 		expectedOut     []byte
 332 | 		invocationError error
 333 | 	}{
 334 | 		{
 335 | 			name: "getCommand success",
 336 | 			invocationOut: &ssm.GetCommandInvocationOutput{
 337 | 				Status:                aws.String("Success"),
 338 | 				StandardOutputContent: aws.String("OutputContent"),
 339 | 			},
 340 | 			expectedOut: []byte(aws.StringValue(aws.String("OutputContent"))),
 341 | 		},
 342 | 		{
 343 | 			name:            "getCommand fail",
 344 | 			invocationError: errors.New("failed to get command invocation"),
 345 | 			expectedError:   "failed to retrieve command invocation output: failed to get command invocation",
 346 | 			invocationOut:   nil,
 347 | 			expectedOut:     nil,
 348 | 		},
 349 | 		{
 350 | 			name: "command status non-Success",
 351 | 			invocationOut: &ssm.GetCommandInvocationOutput{
 352 | 				Status:                aws.String("TimedOut"),
 353 | 				StandardOutputContent: nil,
 354 | 			},
 355 | 			expectedError: "command command-id has not reached success status, current status \"TimedOut\"",
 356 | 			expectedOut:   nil,
 357 | 		},
 358 | 	}
 359 | 	for _, tc := range cases {
 360 | 		t.Run(tc.name, func(t *testing.T) {
 361 | 			mockSSM := MockSSM{
 362 | 				GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
 363 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 364 | 					assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
 365 | 					return tc.invocationOut, tc.invocationError
 366 | 				},
 367 | 			}
 368 | 			u := updater{ssm: mockSSM}
 369 | 			actual, err := u.getCommandResult("command-id", "instance-id")
 370 | 			if tc.expectedOut != nil {
 371 | 				require.NoError(t, err)
 372 | 				assert.EqualValues(t, tc.expectedOut, actual)
 373 | 			} else {
 374 | 				require.Error(t, err)
 375 | 				assert.EqualError(t, err, tc.expectedError)
 376 | 			}
 377 | 		})
 378 | 	}
 379 | }
 380 | 
 381 | func TestSendCommandSuccess(t *testing.T) {
 382 | 	instances := []string{"inst-id-1", "inst-id-2"}
 383 | 	// mutex needed to prevent race condition when appending to instances slice in concurrent
 384 | 	// execution of WaitUntilCommandExecutedWithContextFn
 385 | 	var m sync.Mutex
 386 | 	waitInstanceIDs := []string{}
 387 | 	mockSSM := MockSSM{
 388 | 		SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 389 | 			assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName))
 390 | 			assert.Equal(t, "$DEFAULT", aws.StringValue(input.DocumentVersion))
 391 | 			assert.Equal(t, aws.StringSlice(instances), input.InstanceIds)
 392 | 			return &ssm.SendCommandOutput{Command: &ssm.Command{CommandId: aws.String("command-id")}}, nil
 393 | 		},
 394 | 		WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 395 | 			assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 396 | 			m.Lock()
 397 | 			waitInstanceIDs = append(waitInstanceIDs, aws.StringValue(input.InstanceId))
 398 | 			m.Unlock()
 399 | 			return nil
 400 | 		},
 401 | 	}
 402 | 	u := updater{ssm: mockSSM}
 403 | 	commandID, err := u.sendCommand(instances, "test-doc")
 404 | 	require.NoError(t, err)
 405 | 	assert.EqualValues(t, "command-id", commandID)
 406 | 	assert.ElementsMatch(t, instances, waitInstanceIDs)
 407 | }
 408 | 
 409 | func TestSendCommandErr(t *testing.T) {
 410 | 	instances := []string{"inst-id-1", "inst-id-2"}
 411 | 	sendError := errors.New("failed to send command")
 412 | 	mockSSM := MockSSM{
 413 | 		SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 414 | 			assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName))
 415 | 			assert.Equal(t, "$DEFAULT", aws.StringValue(input.DocumentVersion))
 416 | 			assert.Equal(t, aws.StringSlice(instances), input.InstanceIds)
 417 | 			return nil, sendError
 418 | 		},
 419 | 	}
 420 | 	u := updater{ssm: mockSSM}
 421 | 	commandID, err := u.sendCommand(instances, "test-doc")
 422 | 	require.Error(t, err)
 423 | 	assert.Equal(t, "", commandID)
 424 | 	assert.ErrorIs(t, err, sendError)
 425 | 
 426 | }
 427 | 
 428 | func TestSendCommandWaitErr(t *testing.T) {
 429 | 	cases := []struct {
 430 | 		name      string
 431 | 		instances []string
 432 | 	}{
 433 | 		{
 434 | 			name:      "wait single failure",
 435 | 			instances: []string{"inst-id-1"},
 436 | 		},
 437 | 		{
 438 | 			name:      "wait fail all",
 439 | 			instances: []string{"inst-id-1", "inst-id-2", "inst-id-3"},
 440 | 		},
 441 | 	}
 442 | 	for _, tc := range cases {
 443 | 		t.Run(tc.name, func(t *testing.T) {
 444 | 			waitError := errors.New("exceeded max attempts")
 445 | 			failedInstanceIDs := []string{}
 446 | 			mockSSM := MockSSM{
 447 | 				SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 448 | 					assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName))
 449 | 					assert.Equal(t, aws.StringSlice(tc.instances), input.InstanceIds)
 450 | 					return &ssm.SendCommandOutput{
 451 | 						Command: &ssm.Command{CommandId: aws.String("command-id")},
 452 | 					}, nil
 453 | 				},
 454 | 				WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 455 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 456 | 					return waitError
 457 | 				},
 458 | 				GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
 459 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 460 | 					failedInstanceIDs = append(failedInstanceIDs, aws.StringValue(input.InstanceId))
 461 | 					return &ssm.GetCommandInvocationOutput{}, nil
 462 | 				},
 463 | 			}
 464 | 			u := updater{ssm: mockSSM}
 465 | 			commandID, err := u.sendCommand(tc.instances, "test-doc")
 466 | 			require.Error(t, err)
 467 | 			assert.ErrorIs(t, err, waitError)
 468 | 			assert.Equal(t, "", commandID)
 469 | 			assert.ElementsMatch(t, tc.instances, failedInstanceIDs, "should match instances for which wait fails")
 470 | 		})
 471 | 	}
 472 | }
 473 | 
 474 | func TestSendCommandWaitSuccess(t *testing.T) {
 475 | 	mockSendCommand := func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
 476 | 		assert.Equal(t, "test-doc", aws.StringValue(input.DocumentName))
 477 | 		return &ssm.SendCommandOutput{
 478 | 			Command: &ssm.Command{CommandId: aws.String("command-id")},
 479 | 		}, nil
 480 | 	}
 481 | 	t.Run("wait one success", func(t *testing.T) {
 482 | 		// commandSuccessInstance indicates an instance for which the command should succeed
 483 | 		const commandSuccessInstance = "inst-success"
 484 | 		instances := []string{"inst-id-1", "inst-id-2", commandSuccessInstance}
 485 | 		expectedFailInstances := []string{"inst-id-1", "inst-id-2"}
 486 | 		failedInstanceIDs := []string{}
 487 | 		mockSSM := MockSSM{
 488 | 			SendCommandFn: mockSendCommand,
 489 | 			WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 490 | 				if aws.StringValue(input.InstanceId) == commandSuccessInstance {
 491 | 					return nil
 492 | 				}
 493 | 				return errors.New("exceeded max attempts")
 494 | 			},
 495 | 			GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
 496 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 497 | 				failedInstanceIDs = append(failedInstanceIDs, aws.StringValue(input.InstanceId))
 498 | 				return &ssm.GetCommandInvocationOutput{}, nil
 499 | 			},
 500 | 		}
 501 | 		u := updater{ssm: mockSSM}
 502 | 		commandID, err := u.sendCommand(instances, "test-doc")
 503 | 		require.NoError(t, err)
 504 | 		assert.Equal(t, "command-id", commandID)
 505 | 		assert.ElementsMatch(t, expectedFailInstances, failedInstanceIDs, "should match instances for which wait fails")
 506 | 	})
 507 | 	t.Run("wait all success", func(t *testing.T) {
 508 | 		instances := []string{"inst-id-1", "inst-id-2"}
 509 | 		// mutex needed to prevent race condition when appending to instances slice in concurrent
 510 | 		// execution of WaitUntilCommandExecutedWithContextFn
 511 | 		var m sync.Mutex
 512 | 		waitInstanceIDs := []string{}
 513 | 		mockSSM := MockSSM{
 514 | 			SendCommandFn: mockSendCommand,
 515 | 			WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
 516 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
 517 | 				m.Lock()
 518 | 				waitInstanceIDs = append(waitInstanceIDs, aws.StringValue(input.InstanceId))
 519 | 				m.Unlock()
 520 | 				return nil
 521 | 			},
 522 | 		}
 523 | 		u := updater{ssm: mockSSM}
 524 | 		commandID, err := u.sendCommand(instances, "test-doc")
 525 | 		require.NoError(t, err)
 526 | 		assert.Equal(t, "command-id", commandID)
 527 | 		assert.ElementsMatch(t, instances, waitInstanceIDs, "should match instances for which wait succeeds")
 528 | 	})
 529 | 
 530 | }
 531 | 
 532 | func TestListContainerInstances(t *testing.T) {
 533 | 	cases := []struct {
 534 | 		name          string
 535 | 		listOutput    *ecs.ListContainerInstancesOutput
 536 | 		listOutput2   *ecs.ListContainerInstancesOutput
 537 | 		listError     error
 538 | 		expectedError string
 539 | 		expectedOut   []*string
 540 | 	}{
 541 | 		{
 542 | 			name: "with instances",
 543 | 			listOutput: &ecs.ListContainerInstancesOutput{
 544 | 				ContainerInstanceArns: []*string{
 545 | 					aws.String("cont-inst-arn1"),
 546 | 					aws.String("cont-inst-arn2"),
 547 | 					aws.String("cont-inst-arn3")},
 548 | 				NextToken: aws.String("token"),
 549 | 			},
 550 | 			listOutput2: &ecs.ListContainerInstancesOutput{
 551 | 				ContainerInstanceArns: []*string{
 552 | 					aws.String("cont-inst-arn4"),
 553 | 					aws.String("cont-inst-arn5"),
 554 | 					aws.String("cont-inst-arn6")},
 555 | 				NextToken: nil,
 556 | 			},
 557 | 			expectedOut: []*string{
 558 | 				aws.String("cont-inst-arn1"),
 559 | 				aws.String("cont-inst-arn2"),
 560 | 				aws.String("cont-inst-arn3"),
 561 | 				aws.String("cont-inst-arn4"),
 562 | 				aws.String("cont-inst-arn5"),
 563 | 				aws.String("cont-inst-arn6")},
 564 | 		},
 565 | 		{
 566 | 			name: "without instances",
 567 | 			listOutput: &ecs.ListContainerInstancesOutput{
 568 | 				ContainerInstanceArns: []*string{},
 569 | 			},
 570 | 			listOutput2: &ecs.ListContainerInstancesOutput{
 571 | 				ContainerInstanceArns: []*string{},
 572 | 			},
 573 | 			expectedOut: []*string{},
 574 | 		},
 575 | 		{
 576 | 			name:      "list fail",
 577 | 			listError: errors.New("failed to list instances"),
 578 | 			listOutput: &ecs.ListContainerInstancesOutput{
 579 | 				ContainerInstanceArns: []*string{},
 580 | 			},
 581 | 			listOutput2: &ecs.ListContainerInstancesOutput{
 582 | 				ContainerInstanceArns: []*string{},
 583 | 			},
 584 | 			expectedError: "failed to list container instances",
 585 | 		},
 586 | 	}
 587 | 
 588 | 	for _, tc := range cases {
 589 | 		t.Run(tc.name, func(t *testing.T) {
 590 | 			mockECS := MockECS{
 591 | 				ListContainerInstancesPagesFn: func(input *ecs.ListContainerInstancesInput, fn func(*ecs.ListContainerInstancesOutput, bool) bool) error {
 592 | 					assert.Equal(t, ecs.ContainerInstanceStatusActive, aws.StringValue(input.Status))
 593 | 					fn(tc.listOutput, true)
 594 | 					fn(tc.listOutput2, false)
 595 | 					return tc.listError
 596 | 				},
 597 | 			}
 598 | 			u := updater{ecs: mockECS}
 599 | 			actual, err := u.listContainerInstances()
 600 | 			if tc.expectedOut != nil {
 601 | 				assert.EqualValues(t, tc.expectedOut, actual)
 602 | 				assert.NoError(t, err)
 603 | 			} else {
 604 | 				assert.Empty(t, actual)
 605 | 				assert.ErrorIs(t, err, tc.listError)
 606 | 				assert.Contains(t, err.Error(), tc.expectedError)
 607 | 			}
 608 | 		})
 609 | 	}
 610 | }
 611 | 
 612 | func TestFilterBottlerocketInstances(t *testing.T) {
 613 | 	output := &ecs.DescribeContainerInstancesOutput{
 614 | 		ContainerInstances: []*ecs.ContainerInstance{{
 615 | 			// Bottlerocket with single attribute
 616 | 			Attributes:           []*ecs.Attribute{{Name: aws.String("bottlerocket.variant")}},
 617 | 			ContainerInstanceArn: aws.String("cont-inst-br1"),
 618 | 			Ec2InstanceId:        aws.String("ec2-id-br1"),
 619 | 		}, {
 620 | 			// Bottlerocket with extra attribute
 621 | 			Attributes: []*ecs.Attribute{
 622 | 				{Name: aws.String("different-attribute")},
 623 | 				{Name: aws.String("bottlerocket.variant")},
 624 | 			},
 625 | 			ContainerInstanceArn: aws.String("cont-inst-br2"),
 626 | 			Ec2InstanceId:        aws.String("ec2-id-br2"),
 627 | 		}, {
 628 | 			// Not Bottlerocket, single attribute
 629 | 			Attributes: []*ecs.Attribute{
 630 | 				{Name: aws.String("different-attribute")},
 631 | 			},
 632 | 			ContainerInstanceArn: aws.String("cont-inst-not1"),
 633 | 			Ec2InstanceId:        aws.String("ec2-id-not1"),
 634 | 		}, {
 635 | 			// Not Bottlerocket, no attribute
 636 | 			ContainerInstanceArn: aws.String("cont-inst-not2"),
 637 | 			Ec2InstanceId:        aws.String("ec2-id-not2"),
 638 | 		}},
 639 | 	}
 640 | 	expected := []instance{
 641 | 		{
 642 | 			instanceID:          "ec2-id-br1",
 643 | 			containerInstanceID: "cont-inst-br1",
 644 | 		},
 645 | 		{
 646 | 			instanceID:          "ec2-id-br2",
 647 | 			containerInstanceID: "cont-inst-br2",
 648 | 		},
 649 | 	}
 650 | 
 651 | 	mockECS := MockECS{
 652 | 		DescribeContainerInstancesFn: func(_ *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) {
 653 | 			return output, nil
 654 | 		},
 655 | 	}
 656 | 	u := updater{ecs: mockECS}
 657 | 
 658 | 	actual, err := u.filterBottlerocketInstances([]*string{
 659 | 		aws.String("ec2-id-br1"),
 660 | 		aws.String("ec2-id-br2"),
 661 | 		aws.String("ec2-id-not1"),
 662 | 		aws.String("ec2-id-not2"),
 663 | 	})
 664 | 	require.NoError(t, err)
 665 | 	assert.EqualValues(t, expected, actual)
 666 | }
 667 | 
 668 | func TestPaginatedFilterBottlerocketInstancesAllFail(t *testing.T) {
 669 | 	instances := make([]*string, 0)
 670 | 	for i := 0; i < 150; i++ {
 671 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 672 | 		instances = append(instances, aws.String(ec2ID))
 673 | 	}
 674 | 
 675 | 	responses := []struct {
 676 | 		inputLen           int
 677 | 		ContainerInstances []*ecs.ContainerInstance
 678 | 		err                error
 679 | 	}{{
 680 | 		100,
 681 | 		nil,
 682 | 		errors.New("Failed to describe container instances"),
 683 | 	}, {
 684 | 		50,
 685 | 		nil,
 686 | 		errors.New("Failed to describe container instances"),
 687 | 	}}
 688 | 
 689 | 	callCount := 0
 690 | 	mockECS := MockECS{
 691 | 		DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) {
 692 | 			require.Less(t, callCount, len(responses))
 693 | 			resp := responses[callCount]
 694 | 			callCount++
 695 | 			assert.Equal(t, resp.inputLen, len(input.ContainerInstances))
 696 | 			return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err
 697 | 		},
 698 | 	}
 699 | 
 700 | 	u := updater{ecs: mockECS}
 701 | 	actual, err := u.filterBottlerocketInstances(instances)
 702 | 	require.Error(t, err)
 703 | 	assert.Empty(t, actual)
 704 | 	assert.Contains(t, err.Error(), "Failed to describe container instances")
 705 | }
 706 | 
 707 | func TestPaginatedFilterBottlerocketInstancesSingleFailure(t *testing.T) {
 708 | 	descOut := make([]*ecs.ContainerInstance, 0)
 709 | 	instances := make([]*string, 0)
 710 | 	expected := make([]instance, 0)
 711 | 	for i := 0; i < 150; i++ {
 712 | 		instanceARN := "cont-inst-br" + strconv.Itoa(i)
 713 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 714 | 		instances = append(instances, aws.String(ec2ID))
 715 | 		descOut = append(descOut, &ecs.ContainerInstance{
 716 | 			Attributes:           []*ecs.Attribute{{Name: aws.String("bottlerocket.variant")}},
 717 | 			ContainerInstanceArn: aws.String(instanceARN),
 718 | 			Ec2InstanceId:        aws.String(ec2ID),
 719 | 		})
 720 | 		expected = append(expected, instance{
 721 | 			instanceID:          ec2ID,
 722 | 			containerInstanceID: instanceARN,
 723 | 		})
 724 | 	}
 725 | 
 726 | 	responses := []struct {
 727 | 		inputLen           int
 728 | 		ContainerInstances []*ecs.ContainerInstance
 729 | 		err                error
 730 | 	}{{
 731 | 		100,
 732 | 		nil,
 733 | 		errors.New("Failed to describe container instances"),
 734 | 	}, {
 735 | 		50,
 736 | 		descOut[100:],
 737 | 		nil,
 738 | 	}}
 739 | 
 740 | 	callCount := 0
 741 | 	mockECS := MockECS{
 742 | 		DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) {
 743 | 			require.Less(t, callCount, len(responses))
 744 | 			resp := responses[callCount]
 745 | 			callCount++
 746 | 			assert.Equal(t, resp.inputLen, len(input.ContainerInstances))
 747 | 			return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err
 748 | 		},
 749 | 	}
 750 | 
 751 | 	u := updater{ecs: mockECS}
 752 | 	actual, err := u.filterBottlerocketInstances(instances)
 753 | 	require.NoError(t, err)
 754 | 	assert.EqualValues(t, expected[100:], actual, "should contain only the last 50 instnaces")
 755 | }
 756 | 
 757 | func TestPaginatedFilterBottlerocketInstancesNoBR(t *testing.T) {
 758 | 	descOut := make([]*ecs.ContainerInstance, 0)
 759 | 	instances := make([]*string, 0)
 760 | 	for i := 0; i < 150; i++ {
 761 | 		instanceARN := "cont-inst-br" + strconv.Itoa(i)
 762 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 763 | 		instances = append(instances, aws.String(ec2ID))
 764 | 		descOut = append(descOut, &ecs.ContainerInstance{
 765 | 			Attributes:           []*ecs.Attribute{{Name: aws.String("nottlerocket.variant")}},
 766 | 			ContainerInstanceArn: aws.String(instanceARN),
 767 | 			Ec2InstanceId:        aws.String(ec2ID),
 768 | 		})
 769 | 	}
 770 | 
 771 | 	responses := []struct {
 772 | 		inputLen           int
 773 | 		ContainerInstances []*ecs.ContainerInstance
 774 | 		err                error
 775 | 	}{{
 776 | 		100,
 777 | 		descOut[:100],
 778 | 		nil,
 779 | 	}, {
 780 | 		50,
 781 | 		descOut[100:],
 782 | 		nil,
 783 | 	}}
 784 | 
 785 | 	callCount := 0
 786 | 	mockECS := MockECS{
 787 | 		DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) {
 788 | 			require.Less(t, callCount, len(responses))
 789 | 			resp := responses[callCount]
 790 | 			callCount++
 791 | 			assert.Equal(t, resp.inputLen, len(input.ContainerInstances))
 792 | 			return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err
 793 | 		},
 794 | 	}
 795 | 
 796 | 	u := updater{ecs: mockECS}
 797 | 	actual, err := u.filterBottlerocketInstances(instances)
 798 | 	require.NoError(t, err)
 799 | 	assert.Empty(t, actual)
 800 | }
 801 | 
 802 | func TestPaginatedFilterBottlerocketInstancesAllBRInstances(t *testing.T) {
 803 | 	descOut := make([]*ecs.ContainerInstance, 0)
 804 | 	instances := make([]*string, 0)
 805 | 	expected := make([]instance, 0)
 806 | 	for i := 0; i < 150; i++ {
 807 | 		instanceARN := "cont-inst-br" + strconv.Itoa(i)
 808 | 		ec2ID := "ec2-id-br" + strconv.Itoa(i)
 809 | 		instances = append(instances, aws.String(ec2ID))
 810 | 		descOut = append(descOut, &ecs.ContainerInstance{
 811 | 			Attributes:           []*ecs.Attribute{{Name: aws.String("bottlerocket.variant")}},
 812 | 			ContainerInstanceArn: aws.String(instanceARN),
 813 | 			Ec2InstanceId:        aws.String(ec2ID),
 814 | 		})
 815 | 		expected = append(expected, instance{
 816 | 			instanceID:          ec2ID,
 817 | 			containerInstanceID: instanceARN,
 818 | 		})
 819 | 	}
 820 | 
 821 | 	responses := []struct {
 822 | 		inputLen           int
 823 | 		ContainerInstances []*ecs.ContainerInstance
 824 | 		err                error
 825 | 	}{{
 826 | 		100,
 827 | 		descOut[:100],
 828 | 		nil,
 829 | 	}, {
 830 | 		50,
 831 | 		descOut[100:],
 832 | 		nil,
 833 | 	}}
 834 | 
 835 | 	callCount := 0
 836 | 	mockECS := MockECS{
 837 | 		DescribeContainerInstancesFn: func(input *ecs.DescribeContainerInstancesInput) (*ecs.DescribeContainerInstancesOutput, error) {
 838 | 			require.Less(t, callCount, len(responses))
 839 | 			resp := responses[callCount]
 840 | 			callCount++
 841 | 			assert.Equal(t, resp.inputLen, len(input.ContainerInstances))
 842 | 			return &ecs.DescribeContainerInstancesOutput{ContainerInstances: resp.ContainerInstances}, resp.err
 843 | 		},
 844 | 	}
 845 | 
 846 | 	u := updater{ecs: mockECS}
 847 | 	actual, err := u.filterBottlerocketInstances(instances)
 848 | 	require.NoError(t, err)
 849 | 	assert.EqualValues(t, expected, actual, "should contain all the instances")
 850 | }
 851 | 
 852 | func TestEligible(t *testing.T) {
 853 | 	cases := []struct {
 854 | 		name        string
 855 | 		listOut     *ecs.ListTasksOutput
 856 | 		describeOut *ecs.DescribeTasksOutput
 857 | 		expectedOk  bool
 858 | 	}{
 859 | 		{
 860 | 			name: "only service tasks",
 861 | 			listOut: &ecs.ListTasksOutput{
 862 | 				TaskArns: []*string{
 863 | 					aws.String("task-arn-1"),
 864 | 				},
 865 | 			},
 866 | 			describeOut: &ecs.DescribeTasksOutput{
 867 | 				Tasks: []*ecs.Task{
 868 | 					{
 869 | 						// contains proper prefix "ecs-svc" for task started by service
 870 | 						StartedBy: aws.String("ecs-svc/svc-id"),
 871 | 					},
 872 | 				},
 873 | 			},
 874 | 			expectedOk: true,
 875 | 		}, {
 876 | 			name: "no task",
 877 | 			listOut: &ecs.ListTasksOutput{
 878 | 				TaskArns: []*string{},
 879 | 			},
 880 | 			expectedOk: true,
 881 | 		}, {
 882 | 			name: "non service task",
 883 | 			listOut: &ecs.ListTasksOutput{
 884 | 				TaskArns: []*string{
 885 | 					aws.String("task-arn-1"),
 886 | 				},
 887 | 			},
 888 | 			describeOut: &ecs.DescribeTasksOutput{
 889 | 				Tasks: []*ecs.Task{{
 890 | 					// Does not contain prefix "ecs-svc"
 891 | 					StartedBy: aws.String("standalone-task-id"),
 892 | 				}},
 893 | 			},
 894 | 			expectedOk: false,
 895 | 		}, {
 896 | 			name: "non service task empty StartedBy",
 897 | 			listOut: &ecs.ListTasksOutput{
 898 | 				TaskArns: []*string{
 899 | 					aws.String("task-arn-1"),
 900 | 				},
 901 | 			},
 902 | 			describeOut: &ecs.DescribeTasksOutput{
 903 | 				Tasks: []*ecs.Task{{}},
 904 | 			},
 905 | 			expectedOk: false,
 906 | 		}, {
 907 | 			name: "service and non service tasks",
 908 | 			listOut: &ecs.ListTasksOutput{
 909 | 				TaskArns: []*string{
 910 | 					aws.String("task-arn-1"),
 911 | 					aws.String("task-arn-2"),
 912 | 				},
 913 | 			},
 914 | 			describeOut: &ecs.DescribeTasksOutput{
 915 | 				Tasks: []*ecs.Task{{
 916 | 					// Does not contain prefix "ecs-svc"
 917 | 					StartedBy: aws.String("standalone-task-id"),
 918 | 				}, {
 919 | 					// contains proper prefix "ecs-svc" for task started by service
 920 | 					StartedBy: aws.String("ecs-svc/svc-id"),
 921 | 				}},
 922 | 			},
 923 | 			expectedOk: false,
 924 | 		},
 925 | 	}
 926 | 	for _, tc := range cases {
 927 | 		t.Run(tc.name, func(t *testing.T) {
 928 | 			mockECS := MockECS{
 929 | 				ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
 930 | 					assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
 931 | 					assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance))
 932 | 					return tc.listOut, nil
 933 | 				},
 934 | 				DescribeTasksFn: func(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) {
 935 | 					assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
 936 | 					assert.Equal(t, tc.listOut.TaskArns, input.Tasks)
 937 | 					return tc.describeOut, nil
 938 | 				},
 939 | 			}
 940 | 			u := updater{ecs: mockECS, cluster: "test-cluster"}
 941 | 			ok, err := u.eligible("cont-inst-id")
 942 | 			require.NoError(t, err)
 943 | 			assert.Equal(t, ok, tc.expectedOk)
 944 | 		})
 945 | 	}
 946 | }
 947 | 
 948 | func TestEligibleErr(t *testing.T) {
 949 | 	t.Run("list task err", func(t *testing.T) {
 950 | 		listErr := errors.New("failed to list tasks")
 951 | 		mockECS := MockECS{
 952 | 			ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
 953 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
 954 | 				assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance))
 955 | 				return nil, listErr
 956 | 			},
 957 | 		}
 958 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
 959 | 		ok, err := u.eligible("cont-inst-id")
 960 | 		require.Error(t, err)
 961 | 		assert.ErrorIs(t, err, listErr)
 962 | 		assert.False(t, ok)
 963 | 	})
 964 | 
 965 | 	t.Run("describe task err", func(t *testing.T) {
 966 | 		describeErr := errors.New("failed to describe tasks")
 967 | 		mockECS := MockECS{
 968 | 			ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
 969 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
 970 | 				assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance))
 971 | 				return &ecs.ListTasksOutput{
 972 | 					TaskArns: []*string{
 973 | 						aws.String("task-arn-1"),
 974 | 					},
 975 | 				}, nil
 976 | 			},
 977 | 			DescribeTasksFn: func(input *ecs.DescribeTasksInput) (*ecs.DescribeTasksOutput, error) {
 978 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
 979 | 				assert.Equal(t, []*string{
 980 | 					aws.String("task-arn-1"),
 981 | 				}, input.Tasks)
 982 | 				return nil, describeErr
 983 | 			},
 984 | 		}
 985 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
 986 | 		ok, err := u.eligible("cont-inst-id")
 987 | 		require.Error(t, err)
 988 | 		assert.ErrorIs(t, err, describeErr)
 989 | 		assert.False(t, ok)
 990 | 	})
 991 | }
 992 | 
 993 | func TestDrainInstance(t *testing.T) {
 994 | 	stateChangeCalls := []string{}
 995 | 	mockStateChange := func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) {
 996 | 		stateChangeCalls = append(stateChangeCalls, aws.StringValue(input.Status))
 997 | 		assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
 998 | 		assert.Equal(t, []*string{aws.String("cont-inst-id")}, input.ContainerInstances)
 999 | 		return &ecs.UpdateContainerInstancesStateOutput{
1000 | 			Failures: []*ecs.Failure{},
1001 | 		}, nil
1002 | 	}
1003 | 	mockListTasks := func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
1004 | 		assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1005 | 		assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance))
1006 | 		return &ecs.ListTasksOutput{
1007 | 			TaskArns: []*string{
1008 | 				aws.String("task-arn-1"),
1009 | 			},
1010 | 		}, nil
1011 | 	}
1012 | 	cleanup := func() {
1013 | 		stateChangeCalls = []string{}
1014 | 	}
1015 | 
1016 | 	t.Run("no tasks success", func(t *testing.T) {
1017 | 		defer cleanup()
1018 | 		listTaskCount := 0
1019 | 		mockECS := MockECS{
1020 | 			UpdateContainerInstancesStateFn: mockStateChange,
1021 | 			ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
1022 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1023 | 				assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance))
1024 | 				listTaskCount++
1025 | 				return &ecs.ListTasksOutput{
1026 | 					TaskArns: []*string{},
1027 | 				}, nil
1028 | 			},
1029 | 		}
1030 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
1031 | 		err := u.drainInstance("cont-inst-id")
1032 | 		require.NoError(t, err)
1033 | 		assert.Equal(t, 1, listTaskCount)
1034 | 		assert.Equal(t, []string{"DRAINING"}, stateChangeCalls)
1035 | 	})
1036 | 
1037 | 	t.Run("with tasks success", func(t *testing.T) {
1038 | 		defer cleanup()
1039 | 		waitCount := 0
1040 | 		mockECS := MockECS{
1041 | 			UpdateContainerInstancesStateFn: mockStateChange,
1042 | 			ListTasksFn:                     mockListTasks,
1043 | 			WaitUntilTasksStoppedWithContextFn: func(_ aws.Context, input *ecs.DescribeTasksInput, _ ...request.WaiterOption) error {
1044 | 				assert.Equal(t, []*string{
1045 | 					aws.String("task-arn-1"),
1046 | 				}, input.Tasks)
1047 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1048 | 				waitCount++
1049 | 				return nil
1050 | 			},
1051 | 		}
1052 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
1053 | 		err := u.drainInstance("cont-inst-id")
1054 | 		require.NoError(t, err)
1055 | 		assert.Equal(t, []string{"DRAINING"}, stateChangeCalls)
1056 | 		assert.Equal(t, 1, waitCount)
1057 | 	})
1058 | 
1059 | 	t.Run("state change err", func(t *testing.T) {
1060 | 		defer cleanup()
1061 | 		stateOutErr := errors.New("failed to change state")
1062 | 		mockECS := MockECS{
1063 | 			UpdateContainerInstancesStateFn: func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) {
1064 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1065 | 				assert.Equal(t, []*string{aws.String("cont-inst-id")}, input.ContainerInstances)
1066 | 				return nil, stateOutErr
1067 | 			},
1068 | 		}
1069 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
1070 | 		err := u.drainInstance("cont-inst-id")
1071 | 		require.Error(t, err)
1072 | 		assert.ErrorIs(t, err, stateOutErr)
1073 | 	})
1074 | 
1075 | 	t.Run("state change api err", func(t *testing.T) {
1076 | 		defer cleanup()
1077 | 		stateOutAPIFailure := &ecs.UpdateContainerInstancesStateOutput{
1078 | 			Failures: []*ecs.Failure{
1079 | 				{
1080 | 					Reason: aws.String("failed"),
1081 | 				},
1082 | 			},
1083 | 		}
1084 | 		mockECS := MockECS{
1085 | 			UpdateContainerInstancesStateFn: func(input *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) {
1086 | 				stateChangeCalls = append(stateChangeCalls, aws.StringValue(input.Status))
1087 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1088 | 				assert.Equal(t, []*string{aws.String("cont-inst-id")}, input.ContainerInstances)
1089 | 				return stateOutAPIFailure, nil
1090 | 			},
1091 | 		}
1092 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
1093 | 		err := u.drainInstance("cont-inst-id")
1094 | 		require.Error(t, err)
1095 | 		assert.Contains(t, err.Error(), fmt.Sprintf("%v", stateOutAPIFailure.Failures))
1096 | 		assert.Equal(t, []string{"DRAINING", "ACTIVE"}, stateChangeCalls)
1097 | 	})
1098 | 
1099 | 	t.Run("list task err", func(t *testing.T) {
1100 | 		defer cleanup()
1101 | 		listTaskErr := errors.New("failed to list tasks")
1102 | 		mockECS := MockECS{
1103 | 			UpdateContainerInstancesStateFn: mockStateChange,
1104 | 			ListTasksFn: func(input *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
1105 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1106 | 				assert.Equal(t, "cont-inst-id", aws.StringValue(input.ContainerInstance))
1107 | 				return nil, listTaskErr
1108 | 			},
1109 | 		}
1110 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
1111 | 		err := u.drainInstance("cont-inst-id")
1112 | 		require.Error(t, err)
1113 | 		assert.ErrorIs(t, err, listTaskErr)
1114 | 		assert.Equal(t, []string{"DRAINING", "ACTIVE"}, stateChangeCalls)
1115 | 	})
1116 | 
1117 | 	t.Run("wait tasks stop err", func(t *testing.T) {
1118 | 		defer cleanup()
1119 | 		waitTaskErr := errors.New("failed to wait for tasks to stop")
1120 | 		mockECS := MockECS{
1121 | 			UpdateContainerInstancesStateFn: mockStateChange,
1122 | 			ListTasksFn:                     mockListTasks,
1123 | 			WaitUntilTasksStoppedWithContextFn: func(_ aws.Context, input *ecs.DescribeTasksInput, _ ...request.WaiterOption) error {
1124 | 				assert.Equal(t, []*string{
1125 | 					aws.String("task-arn-1"),
1126 | 				}, input.Tasks)
1127 | 				assert.Equal(t, "test-cluster", aws.StringValue(input.Cluster))
1128 | 				return waitTaskErr
1129 | 			},
1130 | 		}
1131 | 		u := updater{ecs: mockECS, cluster: "test-cluster"}
1132 | 		err := u.drainInstance("cont-inst-id")
1133 | 		require.Error(t, err)
1134 | 		assert.ErrorIs(t, err, waitTaskErr)
1135 | 		assert.Equal(t, []string{"DRAINING", "ACTIVE"}, stateChangeCalls)
1136 | 	})
1137 | }
1138 | 
1139 | func TestUpdateInstance(t *testing.T) {
1140 | 	checkPattern := "{\"update_state\": \"%s\", \"active_partition\": { \"image\": { \"version\": \"0.0.0\"}}}"
1141 | 	cases := []struct {
1142 | 		name                        string
1143 | 		invocationOut               *ssm.GetCommandInvocationOutput
1144 | 		expectedSSMCommandCallOrder []string
1145 | 		expectedErr                 string
1146 | 	}{
1147 | 		{
1148 | 			name: "update state available",
1149 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1150 | 				Status:                aws.String("Success"),
1151 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable)),
1152 | 			},
1153 | 			expectedSSMCommandCallOrder: []string{"check-document", "apply-document", "reboot-document"},
1154 | 		}, {
1155 | 			name: "update state ready",
1156 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1157 | 				Status:                aws.String("Success"),
1158 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateReady)),
1159 | 			},
1160 | 			expectedSSMCommandCallOrder: []string{"check-document", "reboot-document"},
1161 | 		}, {
1162 | 			name: "update state idle",
1163 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1164 | 				Status:                aws.String("Success"),
1165 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateIdle)),
1166 | 			},
1167 | 			expectedSSMCommandCallOrder: []string{"check-document"},
1168 | 		}, {
1169 | 			name: "update state staged",
1170 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1171 | 				Status:                aws.String("Success"),
1172 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateStaged)),
1173 | 			},
1174 | 			expectedSSMCommandCallOrder: []string{"check-document"},
1175 | 			expectedErr:                 "unexpected update state \"Staged\"; skipping instance",
1176 | 		},
1177 | 	}
1178 | 	for _, tc := range cases {
1179 | 		t.Run(tc.name, func(t *testing.T) {
1180 | 			ssmCommandCallOrder := []string{}
1181 | 			mockSSM := MockSSM{
1182 | 				SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1183 | 					ssmCommandCallOrder = append(ssmCommandCallOrder, aws.StringValue(input.DocumentName))
1184 | 					assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1185 | 					return &ssm.SendCommandOutput{
1186 | 						Command: &ssm.Command{
1187 | 							CommandId: aws.String("command-id"),
1188 | 						},
1189 | 					}, nil
1190 | 				},
1191 | 				GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1192 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1193 | 					assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1194 | 					return tc.invocationOut, nil
1195 | 				},
1196 | 				WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
1197 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1198 | 					assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1199 | 					return nil
1200 | 				},
1201 | 			}
1202 | 			mockEC2 := MockEC2{
1203 | 				WaitUntilInstanceStatusOkFn: func(input *ec2.DescribeInstanceStatusInput) error {
1204 | 					assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1205 | 					return nil
1206 | 				},
1207 | 			}
1208 | 			u := updater{ssm: mockSSM, ec2: mockEC2, checkDocument: "check-document", applyDocument: "apply-document", rebootDocument: "reboot-document"}
1209 | 			err := u.updateInstance(instance{
1210 | 				instanceID:          "instance-id",
1211 | 				containerInstanceID: "cont-inst-id",
1212 | 				bottlerocketVersion: "v0.1.0",
1213 | 			})
1214 | 			if tc.expectedErr != "" {
1215 | 				require.Error(t, err)
1216 | 				assert.Contains(t, err.Error(), tc.expectedErr)
1217 | 			} else {
1218 | 				require.NoError(t, err)
1219 | 			}
1220 | 			assert.Equal(t, tc.expectedSSMCommandCallOrder, ssmCommandCallOrder)
1221 | 		})
1222 | 	}
1223 | }
1224 | 
1225 | func TestUpdateInstanceErr(t *testing.T) {
1226 | 	commandOutput := &ssm.SendCommandOutput{
1227 | 		Command: &ssm.Command{
1228 | 			CommandId: aws.String("command-id"),
1229 | 		},
1230 | 	}
1231 | 	mockSendCommand := func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1232 | 		assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1233 | 		return commandOutput, nil
1234 | 	}
1235 | 	mockGetCommandInvocation := func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1236 | 		assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1237 | 		assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1238 | 		return &ssm.GetCommandInvocationOutput{
1239 | 			Status:                aws.String("Success"),
1240 | 			StandardOutputContent: aws.String("{\"update_state\": \"Available\", \"active_partition\": { \"image\": { \"version\": \"0.0.0\"}}}"),
1241 | 		}, nil
1242 | 	}
1243 | 	mockWaitCommandExecution := func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
1244 | 		assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1245 | 		assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1246 | 		return nil
1247 | 	}
1248 | 
1249 | 	t.Run("check err", func(t *testing.T) {
1250 | 		checkErr := errors.New("failed to send check command")
1251 | 		mockSSM := MockSSM{
1252 | 			SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1253 | 				assert.Equal(t, "check-document", aws.StringValue(input.DocumentName))
1254 | 				assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1255 | 				return nil, checkErr
1256 | 			},
1257 | 		}
1258 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1259 | 		err := u.updateInstance(instance{
1260 | 			instanceID:          "instance-id",
1261 | 			containerInstanceID: "cont-inst-id",
1262 | 		})
1263 | 		require.Error(t, err)
1264 | 		assert.ErrorIs(t, err, checkErr)
1265 | 	})
1266 | 	t.Run("apply err", func(t *testing.T) {
1267 | 		applyErr := errors.New("failed to send apply command")
1268 | 		mockSSM := MockSSM{
1269 | 			SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1270 | 				assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1271 | 				if aws.StringValue(input.DocumentName) == "apply-document" {
1272 | 					return nil, applyErr
1273 | 				}
1274 | 				return commandOutput, nil
1275 | 			},
1276 | 			GetCommandInvocationFn:                mockGetCommandInvocation,
1277 | 			WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution,
1278 | 		}
1279 | 		u := updater{ssm: mockSSM, checkDocument: "check-document", applyDocument: "apply-document"}
1280 | 		err := u.updateInstance(instance{
1281 | 			instanceID:          "instance-id",
1282 | 			containerInstanceID: "cont-inst-id",
1283 | 		})
1284 | 		require.Error(t, err)
1285 | 		assert.ErrorIs(t, err, applyErr)
1286 | 	})
1287 | 	t.Run("reboot err", func(t *testing.T) {
1288 | 		rebootErr := errors.New("failed to send reboot command")
1289 | 		mockSSM := MockSSM{
1290 | 			SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1291 | 				assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1292 | 				if aws.StringValue(input.DocumentName) == "reboot-document" {
1293 | 					return nil, rebootErr
1294 | 				}
1295 | 				return commandOutput, nil
1296 | 			},
1297 | 			GetCommandInvocationFn:                mockGetCommandInvocation,
1298 | 			WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution,
1299 | 		}
1300 | 		u := updater{ssm: mockSSM, checkDocument: "check-document", applyDocument: "apply-document", rebootDocument: "reboot-document"}
1301 | 		err := u.updateInstance(instance{
1302 | 			instanceID:          "instance-id",
1303 | 			containerInstanceID: "cont-inst-id",
1304 | 		})
1305 | 		require.Error(t, err)
1306 | 		assert.ErrorIs(t, err, rebootErr)
1307 | 	})
1308 | 	t.Run("invocation err", func(t *testing.T) {
1309 | 		ssmGetInvocationErr := errors.New("failed to get command invocation")
1310 | 		mockSSM := MockSSM{
1311 | 			SendCommandFn: mockSendCommand,
1312 | 			GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1313 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1314 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1315 | 				return nil, ssmGetInvocationErr
1316 | 			},
1317 | 			WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution,
1318 | 		}
1319 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1320 | 		err := u.updateInstance(instance{
1321 | 			instanceID:          "instance-id",
1322 | 			containerInstanceID: "cont-inst-id",
1323 | 		})
1324 | 		require.Error(t, err)
1325 | 		assert.ErrorIs(t, err, ssmGetInvocationErr)
1326 | 	})
1327 | 	t.Run("wait ssm err", func(t *testing.T) {
1328 | 		waitExecErr := errors.New("failed to wait ssm execution complete")
1329 | 		mockSSM := MockSSM{
1330 | 			SendCommandFn: mockSendCommand,
1331 | 			WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
1332 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1333 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1334 | 				return waitExecErr
1335 | 			},
1336 | 			GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1337 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1338 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1339 | 				return &ssm.GetCommandInvocationOutput{}, nil
1340 | 			},
1341 | 		}
1342 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1343 | 		err := u.updateInstance(instance{
1344 | 			instanceID:          "instance-id",
1345 | 			containerInstanceID: "cont-inst-id",
1346 | 		})
1347 | 		require.Error(t, err)
1348 | 		assert.ErrorIs(t, err, waitExecErr)
1349 | 	})
1350 | 	t.Run("wait instance ok err", func(t *testing.T) {
1351 | 		waitErr := errors.New("failed to wait instance ok")
1352 | 		mockSSM := MockSSM{
1353 | 			SendCommandFn:                         mockSendCommand,
1354 | 			GetCommandInvocationFn:                mockGetCommandInvocation,
1355 | 			WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution,
1356 | 		}
1357 | 
1358 | 		mockEC2 := MockEC2{
1359 | 			WaitUntilInstanceStatusOkFn: func(input *ec2.DescribeInstanceStatusInput) error {
1360 | 				assert.Equal(t, []*string{aws.String("instance-id")}, input.InstanceIds)
1361 | 				return waitErr
1362 | 			},
1363 | 		}
1364 | 		u := updater{ssm: mockSSM, ec2: mockEC2, checkDocument: "check-document", applyDocument: "apply-document", rebootDocument: "reboot-document"}
1365 | 		err := u.updateInstance(instance{
1366 | 			instanceID:          "instance-id",
1367 | 			containerInstanceID: "cont-inst-id",
1368 | 		})
1369 | 		require.Error(t, err)
1370 | 		assert.ErrorIs(t, err, waitErr)
1371 | 	})
1372 | }
1373 | 
1374 | func TestVerifyUpdate(t *testing.T) {
1375 | 	checkPattern := "{\"update_state\": \"%s\", \"active_partition\": { \"image\": { \"version\": \"%s\"}}}"
1376 | 	cases := []struct {
1377 | 		name          string
1378 | 		invocationOut *ssm.GetCommandInvocationOutput
1379 | 		expectedOk    bool
1380 | 	}{
1381 | 		{
1382 | 			name: "verify success",
1383 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1384 | 				Status:                aws.String("Success"),
1385 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateIdle, "0.0.1")),
1386 | 			},
1387 | 			expectedOk: true,
1388 | 		},
1389 | 		{
1390 | 			name: "version is same",
1391 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1392 | 				Status:                aws.String("Success"),
1393 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateIdle, "0.0.0")),
1394 | 			},
1395 | 			expectedOk: false,
1396 | 		},
1397 | 		{
1398 | 			name: "another version is available",
1399 | 			invocationOut: &ssm.GetCommandInvocationOutput{
1400 | 				Status:                aws.String("Success"),
1401 | 				StandardOutputContent: aws.String(fmt.Sprintf(checkPattern, updateStateAvailable, "0.0.1")),
1402 | 			},
1403 | 			expectedOk: true,
1404 | 		},
1405 | 	}
1406 | 
1407 | 	for _, tc := range cases {
1408 | 		t.Run(tc.name, func(t *testing.T) {
1409 | 			mockSSM := MockSSM{
1410 | 				SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1411 | 					assert.Equal(t, "check-document", aws.StringValue(input.DocumentName))
1412 | 					return &ssm.SendCommandOutput{
1413 | 						Command: &ssm.Command{
1414 | 							CommandId: aws.String("command-id"),
1415 | 						},
1416 | 					}, nil
1417 | 				},
1418 | 				GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1419 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1420 | 					assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1421 | 					return tc.invocationOut, nil
1422 | 				},
1423 | 				WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
1424 | 					assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1425 | 					assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1426 | 					return nil
1427 | 				},
1428 | 			}
1429 | 			u := updater{ssm: mockSSM, checkDocument: "check-document"}
1430 | 			ok, err := u.verifyUpdate(instance{
1431 | 				instanceID:          "instance-id",
1432 | 				containerInstanceID: "cont-inst-id",
1433 | 				bottlerocketVersion: "0.0.0",
1434 | 			})
1435 | 			require.NoError(t, err)
1436 | 			assert.Equal(t, tc.expectedOk, ok)
1437 | 		})
1438 | 	}
1439 | }
1440 | 
1441 | func TestVerifyUpdateErr(t *testing.T) {
1442 | 	mockSSMCommandOut := func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1443 | 		assert.Equal(t, "check-document", aws.StringValue(input.DocumentName))
1444 | 		assert.Equal(t, 1, len(input.InstanceIds))
1445 | 		assert.Equal(t, "instance-id", aws.StringValue(input.InstanceIds[0]))
1446 | 		return &ssm.SendCommandOutput{
1447 | 			Command: &ssm.Command{
1448 | 				CommandId: aws.String("command-id"),
1449 | 			},
1450 | 		}, nil
1451 | 	}
1452 | 	mockWaitCommandExecution := func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
1453 | 		assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1454 | 		assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1455 | 		return nil
1456 | 	}
1457 | 	mockGetCommandInvocation := func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1458 | 		assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1459 | 		assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1460 | 		return &ssm.GetCommandInvocationOutput{
1461 | 			Status: aws.String("Success"),
1462 | 		}, nil
1463 | 	}
1464 | 	t.Run("check err", func(t *testing.T) {
1465 | 		ssmCheckErr := errors.New("failed to send check command")
1466 | 		mockSSM := MockSSM{
1467 | 			SendCommandFn: func(input *ssm.SendCommandInput) (*ssm.SendCommandOutput, error) {
1468 | 				assert.Equal(t, "check-document", aws.StringValue(input.DocumentName))
1469 | 				assert.Equal(t, 1, len(input.InstanceIds))
1470 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceIds[0]))
1471 | 				return nil, ssmCheckErr
1472 | 			},
1473 | 		}
1474 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1475 | 		ok, err := u.verifyUpdate(instance{
1476 | 			instanceID:          "instance-id",
1477 | 			containerInstanceID: "cont-inst-id",
1478 | 			bottlerocketVersion: "0.0.0",
1479 | 		})
1480 | 		require.Error(t, err)
1481 | 		assert.ErrorIs(t, err, ssmCheckErr)
1482 | 		assert.False(t, ok)
1483 | 	})
1484 | 	t.Run("wait ssm err", func(t *testing.T) {
1485 | 		waitExecErr := errors.New("failed to wait ssm execution complete")
1486 | 		mockSSM := MockSSM{
1487 | 			SendCommandFn: mockSSMCommandOut,
1488 | 			WaitUntilCommandExecutedWithContextFn: func(_ aws.Context, input *ssm.GetCommandInvocationInput, _ ...request.WaiterOption) error {
1489 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1490 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1491 | 				return waitExecErr
1492 | 			},
1493 | 			GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1494 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1495 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1496 | 				return &ssm.GetCommandInvocationOutput{}, nil
1497 | 			},
1498 | 		}
1499 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1500 | 		ok, err := u.verifyUpdate(instance{
1501 | 			instanceID:          "instance-id",
1502 | 			containerInstanceID: "cont-inst-id",
1503 | 			bottlerocketVersion: "0.0.0",
1504 | 		})
1505 | 		require.Error(t, err)
1506 | 		assert.ErrorIs(t, err, waitExecErr)
1507 | 		assert.False(t, ok)
1508 | 	})
1509 | 	t.Run("invocation err", func(t *testing.T) {
1510 | 		ssmGetInvocationErr := errors.New("failed to get command invocation")
1511 | 		mockSSM := MockSSM{
1512 | 			SendCommandFn:                         mockSSMCommandOut,
1513 | 			WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution,
1514 | 			GetCommandInvocationFn: func(input *ssm.GetCommandInvocationInput) (*ssm.GetCommandInvocationOutput, error) {
1515 | 				assert.Equal(t, "command-id", aws.StringValue(input.CommandId))
1516 | 				assert.Equal(t, "instance-id", aws.StringValue(input.InstanceId))
1517 | 				return nil, ssmGetInvocationErr
1518 | 			},
1519 | 		}
1520 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1521 | 		ok, err := u.verifyUpdate(instance{
1522 | 			instanceID:          "instance-id",
1523 | 			containerInstanceID: "cont-inst-id",
1524 | 			bottlerocketVersion: "0.0.0",
1525 | 		})
1526 | 		require.Error(t, err)
1527 | 		assert.ErrorIs(t, err, ssmGetInvocationErr)
1528 | 		assert.False(t, ok)
1529 | 	})
1530 | 
1531 | 	t.Run("parse output err", func(t *testing.T) {
1532 | 		mockSSM := MockSSM{
1533 | 			SendCommandFn:                         mockSSMCommandOut,
1534 | 			WaitUntilCommandExecutedWithContextFn: mockWaitCommandExecution,
1535 | 			GetCommandInvocationFn:                mockGetCommandInvocation,
1536 | 		}
1537 | 		u := updater{ssm: mockSSM, checkDocument: "check-document"}
1538 | 		ok, err := u.verifyUpdate(instance{
1539 | 			instanceID:          "instance-id",
1540 | 			containerInstanceID: "cont-inst-id",
1541 | 			bottlerocketVersion: "0.0.0",
1542 | 		})
1543 | 		require.Error(t, err)
1544 | 		assert.Contains(t, err.Error(), `failed to parse command output "", manual verification required`)
1545 | 		assert.False(t, ok)
1546 | 	})
1547 | }
1548 | 
1549 | func TestActivateInstance(t *testing.T) {
1550 | 	cases := []struct {
1551 | 		name        string
1552 | 		stateOut    *ecs.UpdateContainerInstancesStateOutput
1553 | 		stateErr    error
1554 | 		expectedErr string
1555 | 	}{
1556 | 		{
1557 | 			name:     "activate success",
1558 | 			stateOut: &ecs.UpdateContainerInstancesStateOutput{},
1559 | 		}, {
1560 | 			name: "activate api fail",
1561 | 			stateOut: &ecs.UpdateContainerInstancesStateOutput{
1562 | 				Failures: []*ecs.Failure{
1563 | 					{
1564 | 						Reason: aws.String("OTHER"),
1565 | 					},
1566 | 				},
1567 | 			},
1568 | 			expectedErr: "API failures while activating: [{\n  Reason: \"OTHER\"\n}]",
1569 | 		},
1570 | 		{
1571 | 			name: "activate api fail inactive",
1572 | 			stateOut: &ecs.UpdateContainerInstancesStateOutput{
1573 | 				Failures: []*ecs.Failure{
1574 | 					{
1575 | 						Reason: aws.String("INACTIVE"),
1576 | 					},
1577 | 				},
1578 | 			},
1579 | 		},
1580 | 		{
1581 | 			name:        "activate failure",
1582 | 			stateErr:    errors.New("failed to activate"),
1583 | 			expectedErr: "failed to activate",
1584 | 		},
1585 | 	}
1586 | 	for _, tc := range cases {
1587 | 		t.Run(tc.name, func(t *testing.T) {
1588 | 			mockECS := MockECS{
1589 | 				UpdateContainerInstancesStateFn: func(_ *ecs.UpdateContainerInstancesStateInput) (*ecs.UpdateContainerInstancesStateOutput, error) {
1590 | 					return tc.stateOut, tc.stateErr
1591 | 				},
1592 | 			}
1593 | 			u := updater{ecs: mockECS}
1594 | 			err := u.activateInstance("cont-inst-id")
1595 | 			if tc.expectedErr == "" {
1596 | 				require.NoError(t, err)
1597 | 			} else {
1598 | 				require.Error(t, err)
1599 | 				assert.Contains(t, err.Error(), tc.expectedErr)
1600 | 			}
1601 | 		})
1602 | 	}
1603 | }
1604 | 
1605 | func TestAlreadyRunning(t *testing.T) {
1606 | 	cases := []struct {
1607 | 		name        string
1608 | 		listOut     *ecs.ListTasksOutput
1609 | 		listErr     error
1610 | 		expectedOk  bool
1611 | 		expectedErr string
1612 | 	}{
1613 | 		{
1614 | 			name: "success",
1615 | 			listOut: &ecs.ListTasksOutput{
1616 | 				TaskArns: []*string{
1617 | 					aws.String("task-arn-1"),
1618 | 					aws.String("task-arn-2"),
1619 | 				},
1620 | 			},
1621 | 			expectedOk: true,
1622 | 		},
1623 | 		{
1624 | 			name: "only one task",
1625 | 			listOut: &ecs.ListTasksOutput{
1626 | 				TaskArns: []*string{
1627 | 					aws.String("tarsk-arn-1"),
1628 | 				},
1629 | 			},
1630 | 			expectedOk: false,
1631 | 		},
1632 | 		{
1633 | 			name:        "fail list task",
1634 | 			listErr:     errors.New("failed to list task"),
1635 | 			expectedOk:  false,
1636 | 			expectedErr: "failed to list task",
1637 | 		},
1638 | 	}
1639 | 	for _, tc := range cases {
1640 | 		t.Run(tc.name, func(t *testing.T) {
1641 | 			mockECS := MockECS{
1642 | 				ListTasksFn: func(_ *ecs.ListTasksInput) (*ecs.ListTasksOutput, error) {
1643 | 					return tc.listOut, tc.listErr
1644 | 				},
1645 | 			}
1646 | 			u := updater{ecs: mockECS, cluster: "ecs-cluster"}
1647 | 			ok, err := u.alreadyRunning("updater-family")
1648 | 			if tc.expectedErr == "" {
1649 | 				require.NoError(t, err)
1650 | 			} else {
1651 | 				require.Error(t, err)
1652 | 				assert.Contains(t, err.Error(), tc.expectedErr)
1653 | 			}
1654 | 			assert.Equal(t, tc.expectedOk, ok)
1655 | 		})
1656 | 	}
1657 | }
1658 | 


--------------------------------------------------------------------------------