├── .circleci └── config.yml ├── .gitignore ├── Gopkg.lock ├── Gopkg.toml ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── adjust-desired-instance-count │ └── main.go ├── count-outdated-instances │ └── main.go ├── count-running-executions │ └── main.go └── start-roller │ └── main.go ├── internal ├── autoscaling.go ├── request.go └── util.go ├── terraform └── grow-method │ ├── adjust_desired_instance_count.tf │ ├── aws.tf │ ├── cloudwatch.tf │ ├── count_outdated_instances.tf │ ├── count_running_executions.tf │ ├── output.tf │ ├── start_roller.tf │ ├── step_function.tf │ └── variables.tf └── test └── grow-method ├── grow_method_test.go └── infra.tf /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | jobs: 4 | build_and_upload: 5 | docker: 6 | - image: circleci/golang:1.10.3-stretch 7 | working_directory: /go/src/github.com/otterley/rollerbot-aws 8 | steps: 9 | - checkout 10 | - run: 11 | name: Prepare workspace 12 | command: | 13 | sudo apt-get update && sudo apt-get -y install python-pip 14 | sudo pip install awscli >/dev/null 15 | curl -sS https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 16 | dep ensure 17 | - run: 18 | name: Compile and build ZIP files 19 | command: make zip 20 | - run: 21 | name: Upload to S3 22 | command: make upload 23 | - save_cache: 24 | key: gopkg-v1-{{ checksum "Gopkg.lock" }} 25 | paths: 26 | - /go/bin/dep 27 | - vendor 28 | 29 | 30 | test_grow_method: 31 | docker: 32 | - image: circleci/golang:1.10.3-stretch 33 | working_directory: /go/src/github.com/otterley/rollerbot-aws 34 | steps: 35 | - checkout 36 | - restore_cache: 37 | key: gopkg-v1-{{ checksum "Gopkg.lock" }} 38 | - run: 39 | name: Prepare workspace 40 | command: | 41 | curl -sSL -o /tmp/terraform.zip https://releases.hashicorp.com/terraform/0.11.8/terraform_0.11.8_linux_amd64.zip 42 | sudo unzip -q -d /usr/bin /tmp/terraform.zip && sudo chmod +x /usr/bin/terraform 43 | dep ensure 44 | - run: 45 | name: Run tests 46 | command: | 47 | if ! make test_grow_method; then 48 | cd test/grow-method 49 | terraform destroy -var lambda_version=0.0.0 -auto-approve 50 | false 51 | fi 52 | 53 | workflows: 54 | version: 2 55 | build_test: 56 | jobs: 57 | - build_and_upload: 58 | context: AWS 59 | filters: 60 | branches: 61 | only: master 62 | - test_grow_method: 63 | context: AWS 64 | requires: 65 | - build_and_upload 66 | filters: 67 | branches: 68 | only: master 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.backup 3 | *.tfstate 4 | *.sw[a-z] 5 | *.tmp 6 | *.zip 7 | dist/ 8 | vendor/ 9 | .terraform/ 10 | .vscode/ 11 | -------------------------------------------------------------------------------- /Gopkg.lock: -------------------------------------------------------------------------------- 1 | # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. 2 | 3 | 4 | [[projects]] 5 | digest = "1:7637de64e2bad97a7b9d1665e9cb0f9b709b1ef28d09232da908ead542bf89f0" 6 | name = "github.com/aws/aws-lambda-go" 7 | packages = [ 8 | "lambda", 9 | "lambda/messages", 10 | "lambdacontext", 11 | ] 12 | pruneopts = "UT" 13 | revision = "2d482ef09017ae953b1e8d5a6ddac5b696663a3c" 14 | version = "v1.6.0" 15 | 16 | [[projects]] 17 | digest = "1:a918fbb2204dd61b4bd2fd46d92a169e9c24a1996d716fe8fa375646af8cb2ce" 18 | name = "github.com/aws/aws-sdk-go" 19 | packages = [ 20 | "aws", 21 | "aws/awserr", 22 | "aws/awsutil", 23 | "aws/client", 24 | "aws/client/metadata", 25 | "aws/corehandlers", 26 | "aws/credentials", 27 | "aws/credentials/ec2rolecreds", 28 | "aws/credentials/endpointcreds", 29 | "aws/credentials/stscreds", 30 | "aws/csm", 31 | "aws/defaults", 32 | "aws/ec2metadata", 33 | "aws/endpoints", 34 | "aws/request", 35 | "aws/session", 36 | "aws/signer/v4", 37 | "internal/sdkio", 38 | "internal/sdkrand", 39 | "internal/sdkuri", 40 | "internal/shareddefaults", 41 | "private/protocol", 42 | "private/protocol/json/jsonutil", 43 | "private/protocol/jsonrpc", 44 | "private/protocol/query", 45 | "private/protocol/query/queryutil", 46 | "private/protocol/rest", 47 | "private/protocol/xml/xmlutil", 48 | "service/autoscaling", 49 | "service/cloudwatchevents", 50 | "service/sfn", 51 | "service/sts", 52 | ] 53 | pruneopts = "UT" 54 | revision = "66974140c322f22c1daaf95a18930ea6a9e4d21e" 55 | version = "v1.15.16" 56 | 57 | [[projects]] 58 | digest = "1:ffe9824d294da03b391f44e1ae8281281b4afc1bdaa9588c9097785e3af10cec" 59 | name = "github.com/davecgh/go-spew" 60 | packages = ["spew"] 61 | pruneopts = "UT" 62 | revision = "8991bc29aa16c548c550c7ff78260e27b9ab7c73" 63 | version = "v1.1.1" 64 | 65 | [[projects]] 66 | digest = "1:fe8a03a8222d5b913f256972933d26d24ad7c8286692a42943bc01633cc8fce3" 67 | name = "github.com/go-ini/ini" 68 | packages = ["."] 69 | pruneopts = "UT" 70 | revision = "358ee7663966325963d4e8b2e1fbd570c5195153" 71 | version = "v1.38.1" 72 | 73 | [[projects]] 74 | digest = "1:e9c6ba1df4e7953669dd5c2b13f6ce239ce2a7e76c498504368969b3eec87821" 75 | name = "github.com/gruntwork-io/terratest" 76 | packages = [ 77 | "modules/collections", 78 | "modules/logger", 79 | "modules/retry", 80 | "modules/shell", 81 | "modules/terraform", 82 | ] 83 | pruneopts = "UT" 84 | revision = "8bcb4e1ad409e19aac4ad7fab99fe632326a1884" 85 | version = "v0.9.17" 86 | 87 | [[projects]] 88 | digest = "1:e22af8c7518e1eab6f2eab2b7d7558927f816262586cd6ed9f349c97a6c285c4" 89 | name = "github.com/jmespath/go-jmespath" 90 | packages = ["."] 91 | pruneopts = "UT" 92 | revision = "0b12d6b5" 93 | 94 | [[projects]] 95 | digest = "1:40e195917a951a8bf867cd05de2a46aaf1806c50cf92eebf4c16f78cd196f747" 96 | name = "github.com/pkg/errors" 97 | packages = ["."] 98 | pruneopts = "UT" 99 | revision = "645ef00459ed84a119197bfb8d8205042c6df63d" 100 | version = "v0.8.0" 101 | 102 | [[projects]] 103 | digest = "1:0028cb19b2e4c3112225cd871870f2d9cf49b9b4276531f03438a88e94be86fe" 104 | name = "github.com/pmezard/go-difflib" 105 | packages = ["difflib"] 106 | pruneopts = "UT" 107 | revision = "792786c7400a136282c1664665ae0a8db921c6c2" 108 | version = "v1.0.0" 109 | 110 | [[projects]] 111 | digest = "1:18752d0b95816a1b777505a97f71c7467a8445b8ffb55631a7bf779f6ba4fa83" 112 | name = "github.com/stretchr/testify" 113 | packages = ["assert"] 114 | pruneopts = "UT" 115 | revision = "f35b8ab0b5a2cef36673838d662e249dd9c94686" 116 | version = "v1.2.2" 117 | 118 | [[projects]] 119 | branch = "master" 120 | digest = "1:76ee51c3f468493aff39dbacc401e8831fbb765104cbf613b89bef01cf4bad70" 121 | name = "golang.org/x/net" 122 | packages = ["context"] 123 | pruneopts = "UT" 124 | revision = "922f4815f713f213882e8ef45e0d315b164d705c" 125 | 126 | [solve-meta] 127 | analyzer-name = "dep" 128 | analyzer-version = 1 129 | input-imports = [ 130 | "github.com/aws/aws-lambda-go/lambda", 131 | "github.com/aws/aws-sdk-go/aws", 132 | "github.com/aws/aws-sdk-go/aws/awsutil", 133 | "github.com/aws/aws-sdk-go/aws/client", 134 | "github.com/aws/aws-sdk-go/aws/session", 135 | "github.com/aws/aws-sdk-go/service/autoscaling", 136 | "github.com/aws/aws-sdk-go/service/cloudwatchevents", 137 | "github.com/aws/aws-sdk-go/service/sfn", 138 | "github.com/gruntwork-io/terratest/modules/terraform", 139 | "github.com/pkg/errors", 140 | "github.com/stretchr/testify/assert", 141 | "golang.org/x/net/context", 142 | ] 143 | solver-name = "gps-cdcl" 144 | solver-version = 1 145 | -------------------------------------------------------------------------------- /Gopkg.toml: -------------------------------------------------------------------------------- 1 | # Gopkg.toml example 2 | # 3 | # Refer to https://golang.github.io/dep/docs/Gopkg.toml.html 4 | # for detailed Gopkg.toml documentation. 5 | # 6 | # required = ["github.com/user/thing/cmd/thing"] 7 | # ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] 8 | # 9 | # [[constraint]] 10 | # name = "github.com/user/project" 11 | # version = "1.0.0" 12 | # 13 | # [[constraint]] 14 | # name = "github.com/user/project2" 15 | # branch = "dev" 16 | # source = "github.com/myfork/project2" 17 | # 18 | # [[override]] 19 | # name = "github.com/x/y" 20 | # version = "2.4.0" 21 | # 22 | # [prune] 23 | # non-go = false 24 | # go-tests = true 25 | # unused-packages = true 26 | 27 | 28 | [prune] 29 | go-tests = true 30 | unused-packages = true 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Michael S. Fischer. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export GOOS := linux 2 | export GOARCH := amd64 3 | 4 | S3BUCKET := rollerbot-aws 5 | VERSION := $(shell git describe --tags --always) 6 | 7 | PROGS := $(subst cmd/,,$(wildcard cmd/*)) 8 | 9 | zip: $(patsubst %,dist/%.zip,$(PROGS)) 10 | 11 | bin/%: ./cmd/%/main.go internal/*.go 12 | go build -o $@ $< 13 | 14 | dist/%.zip: bin/% | dist 15 | zip -u -j $@ $< 16 | 17 | dist: 18 | mkdir dist 19 | 20 | upload: zip 21 | aws s3 sync dist/ s3://$(S3BUCKET)/v$(VERSION)/ 22 | .PHONY: upload 23 | 24 | test_grow_method: 25 | LAMBDA_VERSION=$(VERSION) go test -v -timeout 30m ./test/grow-method 26 | .PHONY: test_grow_method 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CircleCI](https://circleci.com/gh/otterley/rollerbot-aws/tree/master.svg?style=svg)](https://circleci.com/gh/otterley/rollerbot-aws/tree/master) 2 | 3 | # RollerBot 4 | 5 | ## Purpose 6 | 7 | RollerBot is a system for automating the replacement of instances in an AWS EC2 8 | Auto Scaling Group. If you need the ability to update the AMI on an existing 9 | group without downtime, with low cost, and with little performance impact (at 10 | the price of more transition latency), RollerBot may be right for you. 11 | 12 | RollerBot is especially useful for: 13 | 14 | * Updating ECS cluster instances 15 | * Updating instances that host stateful services such as Kafka and Consul 16 | 17 | ## How it works 18 | 19 | RollerBot has several different implementations, depending on your cluster's needs. 20 | 21 | ### Grow method 22 | 23 | The "grow" method is best for stateless Auto Scaling Groups. It leverages your 24 | existing Scaling Policy to achieve a slow update of the instances. It works 25 | by increasing the Auto Scaling Group's Desired Count, then allowing the existing 26 | Scaling Policy to decrease the number of instances back to the steady-state count. 27 | Once all instances have been replaced, the process is complete. 28 | 29 | ### Replace-after method 30 | 31 | The "replace-after" method is best for stateful Auto Scaling Groups whose state 32 | is replicated on multiple instances in the cluster (for example, Kafka and Consul). 33 | It works by decreasing the Auto Scaling Group's Desired Count by one, waiting for 34 | an instance to cleanly terminate, then increasing the Desired Count by one again, 35 | to return the Group to its steady-state count. 36 | 37 | ## Installation 38 | 39 | TBD 40 | 41 | ## Notes 42 | 43 | ### CloudTrail 44 | CloudTrail Logs **must** be enabled on the AWS account in which RollerBot is 45 | used so that it can detect when the Auto Scaling Group's Launch Configuration 46 | has been updated. 47 | 48 | ## License 49 | 50 | Apache 2.0 licensed. 51 | -------------------------------------------------------------------------------- /cmd/adjust-desired-instance-count/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aws/aws-lambda-go/lambda" 7 | "github.com/aws/aws-sdk-go/aws" 8 | "github.com/aws/aws-sdk-go/aws/session" 9 | "github.com/aws/aws-sdk-go/service/autoscaling" 10 | "github.com/otterley/rollerbot-aws/internal" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | func adjustDesiredInstanceCount(request internal.RollerParameters) (response internal.RollerParameters, err error) { 15 | response = request 16 | sess := session.Must(session.NewSession()) 17 | client := autoscaling.New(sess) 18 | 19 | asgInfo, err := client.DescribeAutoScalingGroups( 20 | &autoscaling.DescribeAutoScalingGroupsInput{ 21 | AutoScalingGroupNames: aws.StringSlice([]string{request.AutoScalingGroupName}), 22 | }, 23 | ) 24 | if err != nil { 25 | return response, errors.WithMessage(err, "DescribeAutoScalingGroups") 26 | } 27 | if len(asgInfo.AutoScalingGroups) != 1 { 28 | return response, errors.New("Assertion failure: DescribeAutoScalingGroups did not return exactly 1 group") 29 | } 30 | 31 | desiredCapacity := aws.Int64Value(asgInfo.AutoScalingGroups[0].DesiredCapacity) + 32 | int64(request.StepSize) 33 | 34 | fmt.Printf("Adjusting desired capacity on Auto Scaling Group %s from %d to %d\n", 35 | request.AutoScalingGroupName, asgInfo.AutoScalingGroups[0].DesiredCapacity, desiredCapacity) 36 | 37 | _, err = client.UpdateAutoScalingGroup( 38 | &autoscaling.UpdateAutoScalingGroupInput{ 39 | AutoScalingGroupName: aws.String(request.AutoScalingGroupName), 40 | DesiredCapacity: aws.Int64(desiredCapacity), 41 | }, 42 | ) 43 | if err != nil { 44 | return response, errors.WithMessage(err, "UpdateAutoScalingGroup") 45 | } 46 | return 47 | } 48 | 49 | func main() { 50 | lambda.Start(adjustDesiredInstanceCount) 51 | } 52 | -------------------------------------------------------------------------------- /cmd/count-outdated-instances/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/aws/aws-lambda-go/lambda" 7 | "github.com/aws/aws-sdk-go/aws/session" 8 | "github.com/otterley/rollerbot-aws/internal" 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | func countOutdatedInstances(request internal.RollerParameters) (response internal.RollerParameters, err error) { 13 | response = request 14 | sess := session.Must(session.NewSession()) 15 | 16 | response.OutdatedInstanceCount, err = internal.CountOutdatedAutoScalingInstances(sess, request.AutoScalingGroupName) 17 | if err != nil { 18 | return response, errors.WithMessage(err, "CountOutdatedAutoScalingInstances") 19 | } 20 | 21 | fmt.Printf("Auto Scaling Group %s has %d outdated instances\n", response.AutoScalingGroupName, response.OutdatedInstanceCount) 22 | 23 | return 24 | } 25 | 26 | func main() { 27 | lambda.Start(countOutdatedInstances) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/count-running-executions/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/aws/aws-lambda-go/lambda" 5 | "github.com/aws/aws-sdk-go/aws" 6 | "github.com/aws/aws-sdk-go/aws/session" 7 | "github.com/aws/aws-sdk-go/service/sfn" 8 | "github.com/otterley/rollerbot-aws/internal" 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | func countRunningExecutions(request internal.RollerParameters) (response internal.RollerParameters, err error) { 13 | response = request 14 | response.RunningExecutionCount = 0 15 | 16 | sess := session.Must(session.NewSession()) 17 | 18 | client := sfn.New(sess) 19 | if err := client.ListExecutionsPages( 20 | &sfn.ListExecutionsInput{ 21 | StateMachineArn: aws.String(request.StateMachineARN), 22 | StatusFilter: aws.String("RUNNING"), 23 | }, 24 | func(result *sfn.ListExecutionsOutput, lastPage bool) bool { 25 | response.RunningExecutionCount += len(result.Executions) 26 | return lastPage 27 | }, 28 | ); err != nil { 29 | return response, errors.WithMessage(err, "ListExecutions") 30 | } 31 | 32 | return 33 | } 34 | 35 | func main() { 36 | lambda.Start(countRunningExecutions) 37 | } 38 | -------------------------------------------------------------------------------- /cmd/start-roller/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "math" 7 | "os" 8 | "strconv" 9 | "time" 10 | 11 | "github.com/aws/aws-lambda-go/lambda" 12 | "github.com/aws/aws-sdk-go/aws" 13 | "github.com/aws/aws-sdk-go/aws/session" 14 | "github.com/aws/aws-sdk-go/service/sfn" 15 | "github.com/otterley/rollerbot-aws/internal" 16 | "github.com/pkg/errors" 17 | ) 18 | 19 | func startRoller(input internal.CloudwatchEvent) error { 20 | stateMachineARN := os.Getenv("STATE_MACHINE_ARN") 21 | autoScalingGroupName := input.Detail.RequestParameters.AutoScalingGroupName 22 | 23 | requestedStepSize, err := strconv.Atoi(os.Getenv("STEP_SIZE")) 24 | if err != nil { 25 | return errors.Errorf("Atoi: Could not convert STEP_SIZE %s to int", os.Getenv("STEP_SIZE")) 26 | } 27 | if requestedStepSize < 0 { 28 | return errors.Errorf("Invalid STEP_SIZE %d, must be >= 0", requestedStepSize) 29 | } 30 | 31 | requestedStepPercent, err := strconv.ParseFloat(os.Getenv("STEP_PERCENT"), 64) 32 | if err != nil { 33 | return errors.Errorf("Atoi: Could not convert STEP_PERCENT %s to float64", os.Getenv("STEP_PERCENT")) 34 | } 35 | if requestedStepPercent < 0 || requestedStepPercent > 100 { 36 | return errors.Errorf("Invalid STEP_PERCENT %d, must be >= 0", requestedStepPercent) 37 | } 38 | 39 | if input.ErrorCode != "" { 40 | fmt.Printf("UpdateAutoScalingGroups request returned error: %s - skipping\n", input.ErrorCode) 41 | return nil 42 | } 43 | 44 | if input.Detail.RequestParameters.LaunchConfigurationName == "" { 45 | fmt.Println("No LaunchConfigurationName was specified in UpdateAutoScalingGroups request - skipping") 46 | return nil 47 | } 48 | 49 | if autoScalingGroupName != os.Getenv("AUTOSCALING_GROUP_NAME") { 50 | return errors.Errorf("Assertion failed: AUTOSCALING_GROUP_NAME is %s, but request had %s", os.Getenv("AUTOSCALING_GROUP_NAME"), autoScalingGroupName) 51 | } 52 | 53 | sess := session.Must(session.NewSession()) 54 | outdatedCount, err := internal.CountOutdatedAutoScalingInstances(sess, autoScalingGroupName) 55 | if err != nil { 56 | return errors.WithMessage(err, "CountOutdatedAutoScalingInstances") 57 | } 58 | if outdatedCount < 1 { 59 | fmt.Printf("No outdated instances found for Auto Scaling Group %s - skipping\n", autoScalingGroupName) 60 | return nil 61 | } 62 | 63 | startTime := time.Now() 64 | executionName := startTime.Format("20060102T150405Z0700") 65 | 66 | sfnInput, err := json.Marshal( 67 | internal.RollerParameters{ 68 | StateMachineARN: stateMachineARN, 69 | AutoScalingGroupName: autoScalingGroupName, 70 | StartTime: startTime.Format(time.RFC3339), 71 | StepSize: calculateStepSize(outdatedCount, requestedStepSize, requestedStepPercent), 72 | OutdatedInstanceCount: outdatedCount, 73 | }, 74 | ) 75 | if err != nil { 76 | return errors.WithMessage(err, "Error marshaling JSON") 77 | } 78 | 79 | client := sfn.New(sess) 80 | _, err = client.StartExecution(&sfn.StartExecutionInput{ 81 | Name: aws.String(executionName), 82 | StateMachineArn: aws.String(stateMachineARN), 83 | Input: aws.String(string(sfnInput)), 84 | }) 85 | if err != nil { 86 | return errors.WithMessage(err, "StartExecution") 87 | } 88 | 89 | fmt.Printf("Started Step Function %s with execution name %s\n", stateMachineARN, executionName) 90 | fmt.Printf("Input:\n%s\n", sfnInput) 91 | return nil 92 | } 93 | 94 | func calculateStepSize(outdatedCount, requestedStepSize int, requestedStepPercent float64) int { 95 | if requestedStepSize == 0 && requestedStepPercent == 0 { 96 | return 1 97 | } 98 | if requestedStepSize > 0 { 99 | return requestedStepSize 100 | } 101 | return int(math.Floor(float64(outdatedCount) * (requestedStepPercent / 100))) 102 | } 103 | 104 | func main() { 105 | lambda.Start(startRoller) 106 | } 107 | -------------------------------------------------------------------------------- /internal/autoscaling.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "github.com/aws/aws-sdk-go/aws" 5 | "github.com/aws/aws-sdk-go/aws/client" 6 | "github.com/aws/aws-sdk-go/service/autoscaling" 7 | "github.com/pkg/errors" 8 | ) 9 | 10 | func CountOutdatedAutoScalingInstances(sess client.ConfigProvider, autoScalingGroupName string) (count int, err error) { 11 | client := autoscaling.New(sess) 12 | 13 | // Determine Launch Configuration Name 14 | result, err := client.DescribeAutoScalingGroups( 15 | &autoscaling.DescribeAutoScalingGroupsInput{ 16 | AutoScalingGroupNames: aws.StringSlice([]string{autoScalingGroupName}), 17 | }, 18 | ) 19 | if err != nil { 20 | return 0, errors.WithMessage(err, "DescribeAutoScalingGroups") 21 | } 22 | if len(result.AutoScalingGroups) == 0 { 23 | return 0, errors.Errorf("Auto Scaling Group %s not found", autoScalingGroupName) 24 | } 25 | group := result.AutoScalingGroups[0] 26 | 27 | // Iterate through instances 28 | for _, instance := range group.Instances { 29 | if aws.StringValue(instance.LaunchConfigurationName) != aws.StringValue(group.LaunchConfigurationName) { 30 | count++ 31 | } 32 | } 33 | return 34 | } 35 | -------------------------------------------------------------------------------- /internal/request.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | type CloudwatchEvent struct { 4 | Detail CloudwatchEventDetail `json:"detail"` 5 | ErrorCode string `json:"errorCode"` 6 | } 7 | 8 | type CloudwatchEventDetail struct { 9 | RequestParameters UpdateAutoScalingGroupParameters `json:"requestParameters"` 10 | } 11 | 12 | type UpdateAutoScalingGroupParameters struct { 13 | AutoScalingGroupName string `json:"autoScalingGroupName"` 14 | LaunchConfigurationName string `json:"launchConfigurationName"` 15 | } 16 | 17 | type RollerParameters struct { 18 | StateMachineARN string 19 | AutoScalingGroupName string 20 | StartTime string // RFC3339 format 21 | StepSize int 22 | OutdatedInstanceCount int 23 | RunningExecutionCount int 24 | } 25 | -------------------------------------------------------------------------------- /internal/util.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | // MustEnv returns the value of the environment variable specified by name. 9 | // It will panic if no such variable is defined, or the value is empty. 10 | func MustEnv(name string) string { 11 | val := os.Getenv(name) 12 | if val == "" { 13 | panic(fmt.Errorf("Env var %s not defined", val)) 14 | } 15 | return val 16 | } 17 | -------------------------------------------------------------------------------- /terraform/grow-method/adjust_desired_instance_count.tf: -------------------------------------------------------------------------------- 1 | resource "aws_lambda_function" "adjust_desired_instance_count" { 2 | function_name = "${format("%.64s", "rollerbot-adjust_count-${var.autoscaling_group_name}")}" 3 | description = "Rollerbot - adjust-desired-instance-count for ${var.autoscaling_group_name} Auto Scaling Group" 4 | role = "${aws_iam_role.adjust_desired_instance_count.arn}" 5 | 6 | s3_bucket = "${var.s3_bucket}" 7 | s3_key = "v${var.lambda_version}/adjust-desired-instance-count.zip" 8 | handler = "adjust-desired-instance-count" 9 | runtime = "go1.x" 10 | } 11 | 12 | data "aws_iam_policy_document" "adjust_desired_instance_count_assume_role" { 13 | statement { 14 | actions = ["sts:AssumeRole"] 15 | 16 | principals { 17 | type = "Service" 18 | identifiers = ["lambda.amazonaws.com"] 19 | } 20 | } 21 | } 22 | 23 | data "aws_iam_policy_document" "adjust_desired_instance_count_policy" { 24 | statement { 25 | actions = [ 26 | "logs:CreateLogGroup", 27 | "logs:CreateLogStream", 28 | "logs:PutLogEvents", 29 | ] 30 | 31 | resources = ["*"] 32 | } 33 | 34 | statement { 35 | actions = [ 36 | "autoscaling:DescribeAutoScalingGroups", 37 | "autoscaling:UpdateAutoScalingGroup", 38 | ] 39 | 40 | resources = ["*"] 41 | } 42 | } 43 | 44 | resource "aws_iam_role" "adjust_desired_instance_count" { 45 | name = "${format("%.64s", "rollerbot-adjust_count-${var.autoscaling_group_name}")}" 46 | assume_role_policy = "${data.aws_iam_policy_document.adjust_desired_instance_count_assume_role.json}" 47 | } 48 | 49 | resource "aws_iam_role_policy" "adjust_desired_instance_count" { 50 | name = "adjust_desired_instance_count" 51 | role = "${aws_iam_role.adjust_desired_instance_count.name}" 52 | policy = "${data.aws_iam_policy_document.adjust_desired_instance_count_policy.json}" 53 | } 54 | -------------------------------------------------------------------------------- /terraform/grow-method/aws.tf: -------------------------------------------------------------------------------- 1 | provider "aws" {} 2 | -------------------------------------------------------------------------------- /terraform/grow-method/cloudwatch.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_event_rule" "update_group" { 2 | name = "${format("%.64s", "rollerbot-update_group-${var.autoscaling_group_name}")}" 3 | description = "Invoked when UpdateAutoScalingGroup is called on ${var.autoscaling_group_name}" 4 | 5 | # "detail-type": [ "AWS API Call via CloudTrail" ], 6 | 7 | event_pattern = < 1 { 120 | return 121 | } 122 | select { 123 | case <-ctx.Done(): 124 | // timed out 125 | return 126 | case <-time.After(30 * time.Second): 127 | // check again 128 | } 129 | } 130 | } 131 | } 132 | 133 | func testAllInstancesHaveLaunchConfig(ctx context.Context, autoScalingGroupName, launchConfigurationName string) func(t *testing.T) { 134 | return func(t *testing.T) { 135 | client := autoscaling.New(session.Must(session.NewSession())) 136 | for { 137 | result, err := client.DescribeAutoScalingGroupsWithContext( 138 | ctx, 139 | &autoscaling.DescribeAutoScalingGroupsInput{ 140 | AutoScalingGroupNames: aws.StringSlice([]string{autoScalingGroupName}), 141 | }, 142 | ) 143 | assert.NoError(t, err) 144 | assert.Equal(t, 1, len(result.AutoScalingGroups)) 145 | nonMatching := 0 146 | for _, instance := range result.AutoScalingGroups[0].Instances { 147 | if aws.StringValue(instance.LaunchConfigurationName) != launchConfigurationName { 148 | nonMatching++ 149 | } 150 | } 151 | if nonMatching == 0 { 152 | return 153 | } 154 | fmt.Printf("%d instances still running with old Launch Configuration\n", nonMatching) 155 | select { 156 | case <-ctx.Done(): 157 | // timed out 158 | return 159 | case <-time.After(30 * time.Second): 160 | // check again 161 | } 162 | } 163 | } 164 | } 165 | 166 | func testStepFunctionOK(ctx context.Context, stateMachineARN string) func(t *testing.T) { 167 | return func(t *testing.T) { 168 | client := sfn.New(session.Must(session.NewSession())) 169 | 170 | for { 171 | executions, err := client.ListExecutionsWithContext( 172 | ctx, 173 | &sfn.ListExecutionsInput{ 174 | StateMachineArn: aws.String(stateMachineARN), 175 | }, 176 | ) 177 | assert.NoError(t, err) 178 | for _, execution := range executions.Executions { 179 | if aws.StringValue(execution.Status) == "SUCCEEDED" { 180 | return 181 | } 182 | assert.NotContains(t, aws.StringValue(execution.Status), []string{"FAILED", "TIMED_OUT", "ABORTED"}) 183 | } 184 | select { 185 | case <-ctx.Done(): 186 | // timed out 187 | return 188 | case <-time.After(10 * time.Second): 189 | // check again 190 | } 191 | } 192 | } 193 | } 194 | 195 | func copyAndAssignLaunchConfig(autoScalingGroupName, launchConfigurationName string) (string, error) { 196 | client := autoscaling.New(session.Must(session.NewSession())) 197 | launchConfigs, err := client.DescribeLaunchConfigurations( 198 | &autoscaling.DescribeLaunchConfigurationsInput{ 199 | LaunchConfigurationNames: aws.StringSlice([]string{launchConfigurationName}), 200 | }, 201 | ) 202 | if err != nil { 203 | return "", err 204 | } 205 | if len(launchConfigs.LaunchConfigurations) != 1 { 206 | return "", fmt.Errorf("Did not find exactly 1 Launch Configuration named %s", launchConfigurationName) 207 | } 208 | var input autoscaling.CreateLaunchConfigurationInput 209 | awsutil.Copy(&input, launchConfigs.LaunchConfigurations[0]) 210 | input.LaunchConfigurationName = aws.String(aws.StringValue(input.LaunchConfigurationName) + "TestCopy") 211 | // awsutil.Copy sets these to empty strings; the API does not approve of this. 212 | input.KernelId = nil 213 | input.KeyName = nil 214 | input.RamdiskId = nil 215 | _, err = client.CreateLaunchConfiguration(&input) 216 | if err != nil { 217 | return "", err 218 | } 219 | 220 | _, err = client.UpdateAutoScalingGroup( 221 | &autoscaling.UpdateAutoScalingGroupInput{ 222 | AutoScalingGroupName: aws.String(autoScalingGroupName), 223 | LaunchConfigurationName: input.LaunchConfigurationName, 224 | }, 225 | ) 226 | return aws.StringValue(input.LaunchConfigurationName), err 227 | } 228 | 229 | func deleteLaunchConfig(name string) { 230 | client := autoscaling.New(session.Must(session.NewSession())) 231 | client.DeleteLaunchConfiguration( 232 | &autoscaling.DeleteLaunchConfigurationInput{ 233 | LaunchConfigurationName: aws.String(name), 234 | }, 235 | ) 236 | } 237 | -------------------------------------------------------------------------------- /test/grow-method/infra.tf: -------------------------------------------------------------------------------- 1 | provider "aws" {} 2 | 3 | variable "lambda_version" { 4 | type = "string" 5 | } 6 | 7 | variable "wait_interval" { 8 | type = "string" 9 | default = "600" 10 | } 11 | 12 | module "vpc" { 13 | source = "terraform-aws-modules/vpc/aws" 14 | version = "1.37.0" 15 | 16 | name = "test-rollerbot-grow-method" 17 | cidr = "10.0.0.0/16" 18 | 19 | azs = ["us-west-2a"] 20 | public_subnets = ["10.0.0.0/24"] 21 | 22 | tags = { 23 | Test = "rollerbot-grow-method" 24 | } 25 | } 26 | 27 | module "security_group" { 28 | source = "terraform-aws-modules/security-group/aws//modules/ssh" 29 | version = "2.1.0" 30 | 31 | name = "test-rollerbot-grow-method-ssh" 32 | vpc_id = "${module.vpc.vpc_id}" 33 | 34 | ingress_cidr_blocks = ["0.0.0.0/0"] 35 | } 36 | 37 | data "aws_ami" "amazon_linux" { 38 | most_recent = true 39 | 40 | filter { 41 | name = "name" 42 | values = ["amzn-ami-hvm-*-x86_64-gp2"] 43 | } 44 | 45 | filter { 46 | name = "owner-alias" 47 | values = ["amazon"] 48 | } 49 | } 50 | 51 | module "asg" { 52 | source = "terraform-aws-modules/autoscaling/aws" 53 | version = "2.7.0" 54 | 55 | name = "test-rollerbot-grow-method" 56 | image_id = "${data.aws_ami.amazon_linux.image_id}" 57 | instance_type = "t2.micro" 58 | health_check_type = "EC2" 59 | security_groups = ["${module.security_group.this_security_group_id}"] 60 | vpc_zone_identifier = ["${module.vpc.public_subnets}"] 61 | associate_public_ip_address = false 62 | 63 | desired_capacity = 1 64 | min_size = 1 65 | max_size = 5 66 | termination_policies = ["OldestInstance"] 67 | 68 | wait_for_capacity_timeout = 0 69 | } 70 | 71 | resource "aws_autoscaling_policy" "target_tracking" { 72 | name = "test-rollerbot-grow-method" 73 | autoscaling_group_name = "${module.asg.this_autoscaling_group_name}" 74 | policy_type = "TargetTrackingScaling" 75 | 76 | target_tracking_configuration { 77 | predefined_metric_specification { 78 | predefined_metric_type = "ASGAverageCPUUtilization" 79 | } 80 | 81 | target_value = 90 82 | } 83 | } 84 | 85 | module "roller" { 86 | source = "../../terraform/grow-method" 87 | 88 | lambda_version = "${var.lambda_version}" 89 | 90 | autoscaling_group_name = "${module.asg.this_autoscaling_group_name}" 91 | step_size = 1 92 | wait_interval = "${var.wait_interval}" 93 | } 94 | 95 | output "launch_configuration_name" { 96 | value = "${module.asg.this_launch_configuration_name}" 97 | } 98 | 99 | output "start_roller_lambda_arn" { 100 | value = "${module.roller.start_roller_lambda_arn}" 101 | } 102 | 103 | output "step_function_arn" { 104 | value = "${module.roller.step_function_arn}" 105 | } 106 | 107 | output "autoscaling_group_name" { 108 | value = "${module.asg.this_autoscaling_group_name}" 109 | } 110 | --------------------------------------------------------------------------------