├── .gitignore ├── .go-version ├── .travis.yml ├── Deps ├── Makefile ├── NOTES.md ├── Procfile ├── Procfile.dev ├── README.md ├── app.json ├── autoscaling_group.go ├── autoscaling_group_build.go ├── autoscaling_group_build_payload.go ├── autoscaling_lifecycle_action.go ├── bin ├── clean ├── fmtpolice ├── fold-coverprofiles ├── get-compressed-env-var ├── lintall ├── set-compressed-env-var ├── test-job └── test-request ├── cmd ├── pudding-server │ └── main.go └── pudding-workers │ └── main.go ├── compressed_env_var.go ├── db ├── db.go ├── db_test.go ├── images.go ├── init_scripts.go ├── instances.go └── jobs.go ├── ec2helpers.go ├── errors.go ├── examples └── simple │ ├── iam-autoscaling.json │ ├── iam-ec2.json │ ├── iam-misc.json │ ├── init-script.tmpl.bash │ └── meta.yml ├── flags.go ├── image.go ├── init_script_template.go ├── instance.go ├── instance_build.go ├── instance_build_payload.go ├── instance_lifecycle_transition.go ├── instance_lifecycle_transition_payload.go ├── instance_rsa.go ├── instance_termination_payload.go ├── meta_yml.go ├── multi_error.go ├── notifier.go ├── pudding_test.go ├── script ├── Gemfile.sidekiq-web ├── Gemfile.sidekiq-web.lock ├── server ├── sidekiq-web └── workers ├── sentry.go ├── server ├── auther.go ├── autoscaling_group_builds.go ├── config.go ├── instance_builder.go ├── instance_lifecycle_transition_handler.go ├── instance_terminator.go ├── jsonapi │ ├── jsonapi_test.go │ └── response.go ├── main.go ├── negroniraven │ ├── middleware.go │ └── negroniraven_test.go ├── server.go ├── server_test.go └── sns_handler.go ├── slack_notifier.go ├── sns_message.go ├── sns_message_payload.go ├── version.go ├── workers ├── autoscaling_group_builds.go ├── config.go ├── ec2_syncer.go ├── init_script_context.go ├── instance_builds.go ├── instance_lifecycle_transitions.go ├── instance_terminations.go ├── internal_config.go ├── main.go ├── middleware_raven.go ├── mini_workers.go ├── sns_messages.go ├── workers.go └── workers_test.go └── zomg_globals.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | gin-bin 27 | *.log 28 | 29 | *.coverprofile 30 | coverage.html 31 | 32 | config*yml 33 | -------------------------------------------------------------------------------- /.go-version: -------------------------------------------------------------------------------- 1 | 1.4 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: go 3 | go: 4 | - 1.6.2 5 | env: 6 | global: 7 | - PATH="$HOME/gopath/bin:$HOME/bin:$PATH" 8 | - REDIS_PORT=16379 9 | - REDIS_URL="redis://localhost:$REDIS_PORT/0" 10 | before_install: 11 | - go get github.com/hamfist/deppy 12 | - go get github.com/alecthomas/gometalinter 13 | - go get golang.org/x/tools/cmd/cover 14 | install: 15 | - gometalinter --install 16 | - deppy restore 17 | before_script: 18 | - redis-server --port 16379 > redis-server.log 2>&1 & 19 | - echo $! > redis-server.pid 20 | script: 21 | - make 22 | - ~/gopath/bin/pudding-server --version 23 | - ~/gopath/bin/pudding-workers --version 24 | - ~/gopath/bin/pudding-server >> server.log 2>&1 & 25 | - for r in 0 1 2 3 4 ; do 26 | ./bin/test-request instance-build ; 27 | done 28 | - redis-cli -r 5 --raw -p "$REDIS_PORT" RPOP "pudding:queue:instance-builds" | 29 | while read line ; do echo $line | python -m json.tool ; done 30 | - kill "$(cat redis-server.pid)" 31 | - ./bin/test-request shutdown 32 | - make save 33 | - git diff --exit-code 34 | - git diff --cached --exit-code 35 | after_script: 36 | - cat server.log 37 | notifications: 38 | slack: 39 | rooms: 40 | - secure: "yZ1p5BGWUPq7hssHbYXiS/z5q0Ojm4ip7zpAA+aqGFs+yKJnpkCCEENXDHlZX1VhbYZfkYKg8mQo40088qjbt9IAdSiXZ4PcKRTucrKJ+7NWuFryHSpmMmW4u0Xp7iTdHEDphfQHndJqtivKdbKuEFtKp02dikzQlMuaizOB36U=" 41 | on_success: always 42 | on_failure: always 43 | -------------------------------------------------------------------------------- /Deps: -------------------------------------------------------------------------------- 1 | { 2 | "ImportPath": "github.com/travis-ci/pudding", 3 | "GoVersion": "go1.6.2", 4 | "Packages": [ 5 | "./..." 6 | ], 7 | "Deps": [ 8 | { 9 | "ImportPath": "github.com/Sirupsen/logrus", 10 | "Comment": "v0.10.0-16-gcd7d1bb", 11 | "Rev": "cd7d1bbe41066b6c1f19780f895901052150a575" 12 | }, 13 | { 14 | "ImportPath": "github.com/aws/aws-sdk-go/aws", 15 | "Comment": "v1.2.1-1-g101d2e2", 16 | "Rev": "101d2e228fea0ab462a7e0180c607290c4850f15" 17 | }, 18 | { 19 | "ImportPath": "github.com/aws/aws-sdk-go/private/endpoints", 20 | "Comment": "v1.2.1-1-g101d2e2", 21 | "Rev": "101d2e228fea0ab462a7e0180c607290c4850f15" 22 | }, 23 | { 24 | "ImportPath": "github.com/aws/aws-sdk-go/private/protocol", 25 | "Comment": "v1.2.1-1-g101d2e2", 26 | "Rev": "101d2e228fea0ab462a7e0180c607290c4850f15" 27 | }, 28 | { 29 | "ImportPath": "github.com/aws/aws-sdk-go/service/sns", 30 | "Comment": "v1.2.1-1-g101d2e2", 31 | "Rev": "101d2e228fea0ab462a7e0180c607290c4850f15" 32 | }, 33 | { 34 | "ImportPath": "github.com/aws/aws-sdk-go/vendor/github.com/go-ini/ini", 35 | "Comment": "v1.2.1-1-g101d2e2", 36 | "Rev": "101d2e228fea0ab462a7e0180c607290c4850f15" 37 | }, 38 | { 39 | "ImportPath": "github.com/aws/aws-sdk-go/vendor/github.com/jmespath/go-jmespath", 40 | "Comment": "v1.2.1-1-g101d2e2", 41 | "Rev": "101d2e228fea0ab462a7e0180c607290c4850f15" 42 | }, 43 | { 44 | "ImportPath": "github.com/bitly/go-simplejson", 45 | "Comment": "v0.5.0", 46 | "Rev": "aabad6e819789e569bd6aabf444c935aa9ba1e44" 47 | }, 48 | { 49 | "ImportPath": "github.com/braintree/manners", 50 | "Comment": "0.4.0-15-g82a8879", 51 | "Rev": "82a8879fc5fd0381fa8b2d8033b19bf255252088" 52 | }, 53 | { 54 | "ImportPath": "github.com/codegangsta/cli", 55 | "Comment": "v1.17.0-16-g7f0ca9a", 56 | "Rev": "7f0ca9a34958d6702ac8f38530d19001fa5b1560" 57 | }, 58 | { 59 | "ImportPath": "github.com/codegangsta/negroni", 60 | "Comment": "v0.2.0-22-gdc45282", 61 | "Rev": "dc45282b3653ee198a3c06a82bc7847b03666497" 62 | }, 63 | { 64 | "ImportPath": "github.com/feyeleanor/raw", 65 | "Rev": "724aedf6e1a5d8971aafec384b6bde3d5608fba4" 66 | }, 67 | { 68 | "ImportPath": "github.com/feyeleanor/sets", 69 | "Rev": "6c54cb57ea406ff6354256a4847e37298194478f" 70 | }, 71 | { 72 | "ImportPath": "github.com/feyeleanor/slices", 73 | "Rev": "bb44bb2e4817fe71ba7082d351fd582e7d40e3ea" 74 | }, 75 | { 76 | "ImportPath": "github.com/garyburd/redigo/internal", 77 | "Comment": "v1.0.0", 78 | "Rev": "8873b2f1995f59d4bcdd2b0dc9858e2cb9bf0c13" 79 | }, 80 | { 81 | "ImportPath": "github.com/garyburd/redigo/redis", 82 | "Comment": "v1.0.0", 83 | "Rev": "8873b2f1995f59d4bcdd2b0dc9858e2cb9bf0c13" 84 | }, 85 | { 86 | "ImportPath": "github.com/getsentry/raven-go", 87 | "Rev": "e39495fea085e98d1281fac0ff4d6eb8dc56f86d" 88 | }, 89 | { 90 | "ImportPath": "github.com/goamz/goamz/autoscaling", 91 | "Rev": "02d5144a587b982e33b95f484a34164ce6923c99" 92 | }, 93 | { 94 | "ImportPath": "github.com/goamz/goamz/aws", 95 | "Rev": "02d5144a587b982e33b95f484a34164ce6923c99" 96 | }, 97 | { 98 | "ImportPath": "github.com/goamz/goamz/cloudwatch", 99 | "Rev": "02d5144a587b982e33b95f484a34164ce6923c99" 100 | }, 101 | { 102 | "ImportPath": "github.com/goamz/goamz/ec2", 103 | "Rev": "02d5144a587b982e33b95f484a34164ce6923c99" 104 | }, 105 | { 106 | "ImportPath": "github.com/gorilla/context", 107 | "Comment": "v1.1-2-ga8d44e7", 108 | "Rev": "a8d44e7d8e4d532b6a27a02dd82abb31cc1b01bd" 109 | }, 110 | { 111 | "ImportPath": "github.com/gorilla/feeds", 112 | "Rev": "441264de03a8117ed530ae8e049d8f601a33a099" 113 | }, 114 | { 115 | "ImportPath": "github.com/gorilla/mux", 116 | "Comment": "v1.1-7-g9c19ed5", 117 | "Rev": "9c19ed558d5df4da88e2ade9c8940d742aef0e7e" 118 | }, 119 | { 120 | "ImportPath": "github.com/hamfist/yaml", 121 | "Rev": "eca94c41d994ae2215d455ce578ae6e2dc6ee516" 122 | }, 123 | { 124 | "ImportPath": "github.com/jrallison/go-workers", 125 | "Rev": "287ed37cbdd72e67c1679ced0a12352a255abfe3" 126 | }, 127 | { 128 | "ImportPath": "github.com/meatballhat/expvarplus", 129 | "Rev": "bd74cc65cda7c733b812fc41e1121f5e5baafde2" 130 | }, 131 | { 132 | "ImportPath": "github.com/meatballhat/negroni-logrus", 133 | "Rev": "07babc99e899d3776e7b745d0b4c56680775136a" 134 | }, 135 | { 136 | "ImportPath": "github.com/phyber/negroni-gzip/gzip", 137 | "Rev": "1211b7a0892223f5b3f1af8629ea6c3d9f984028" 138 | }, 139 | { 140 | "ImportPath": "github.com/vaughan0/go-ini", 141 | "Rev": "a98ad7ee00ec53921f08832bc06ecf7fd600e6a1" 142 | } 143 | ] 144 | } 145 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE := github.com/travis-ci/pudding 2 | SUBPACKAGES := \ 3 | $(PACKAGE)/cmd/pudding-server \ 4 | $(PACKAGE)/cmd/pudding-workers \ 5 | $(PACKAGE)/db \ 6 | $(PACKAGE)/server \ 7 | $(PACKAGE)/server/jsonapi \ 8 | $(PACKAGE)/server/negroniraven \ 9 | $(PACKAGE)/workers 10 | 11 | VERSION_VAR := $(PACKAGE)/pudding.VersionString 12 | VERSION_VALUE ?= $(shell git describe --always --dirty --tags 2>/dev/null) 13 | REV_VAR := $(PACKAGE)/pudding.RevisionString 14 | REV_VALUE ?= $(shell git rev-parse --sq HEAD 2>/dev/null || echo "'???'") 15 | GENERATED_VAR := $(PACKAGE)/pudding.GeneratedString 16 | GENERATED_VALUE ?= $(shell date -u +'%Y-%m-%dT%H:%M:%S%z') 17 | 18 | FIND ?= find 19 | GO ?= go 20 | DEPPY ?= deppy 21 | GOPATH := $(shell echo $${GOPATH%%:*}) 22 | GOBUILD_LDFLAGS ?= -ldflags "\ 23 | -X $(VERSION_VAR)='$(VERSION_VALUE)' \ 24 | -X $(REV_VAR)=$(REV_VALUE) \ 25 | -X $(GENERATED_VAR)='$(GENERATED_VALUE)' \ 26 | " 27 | GOBUILD_FLAGS ?= -x 28 | 29 | PORT ?= 42151 30 | export PORT 31 | 32 | COVERPROFILES := \ 33 | db-coverage.coverprofile \ 34 | server-coverage.coverprofile \ 35 | server-jsonapi-coverage.coverprofile \ 36 | server-negroniraven-coverage.coverprofile \ 37 | workers-coverage.coverprofile 38 | 39 | %-coverage.coverprofile: 40 | $(GO) test -covermode=count -coverprofile=$@ \ 41 | $(GOBUILD_LDFLAGS) $(PACKAGE)/$(subst -,/,$(subst -coverage.coverprofile,,$@)) 42 | 43 | .PHONY: all 44 | all: clean deps test 45 | 46 | .PHONY: buildpack 47 | buildpack: 48 | @$(MAKE) build \ 49 | GOBUILD_FLAGS= \ 50 | REV_VALUE="'$(shell git log -1 --format='%H')'" \ 51 | VERSION_VALUE=buildpack-$(STACK)-$(USER)-$(DYNO) 52 | 53 | .PHONY: test 54 | test: build fmtpolice test-deps coverage.html 55 | 56 | .PHONY: test-deps 57 | test-deps: 58 | $(GO) test -i $(GOBUILD_LDFLAGS) $(PACKAGE) $(SUBPACKAGES) 59 | 60 | # .PHONY: test-race 61 | # test-race: 62 | # $(GO) test -race $(GOBUILD_LDFLAGS) $(PACKAGE) $(SUBPACKAGES) 63 | 64 | coverage.html: coverage.coverprofile 65 | $(GO) tool cover -html=$^ -o $@ 66 | 67 | coverage.coverprofile: $(COVERPROFILES) 68 | ./bin/fold-coverprofiles $^ > $@ 69 | $(GO) tool cover -func=$@ 70 | 71 | .PHONY: build 72 | build: 73 | $(GO) install $(GOBUILD_FLAGS) $(GOBUILD_LDFLAGS) $(PACKAGE) $(SUBPACKAGES) 74 | 75 | .PHONY: deps 76 | deps: 77 | $(GO) get -t $(GOBUILD_FLAGS) $(GOBUILD_LDFLAGS) $(PACKAGE) $(SUBPACKAGES) 78 | 79 | .PHONY: clean 80 | clean: 81 | ./bin/clean 82 | 83 | .PHONY: annotations 84 | annotations: 85 | @git grep -E '(TODO|FIXME|XXX):' | grep -v Makefile 86 | 87 | .PHONY: save 88 | save: 89 | $(DEPPY) save ./... 90 | 91 | .PHONY: fmtpolice 92 | fmtpolice: 93 | ./bin/fmtpolice 94 | 95 | lintall: 96 | ./bin/lintall 97 | -------------------------------------------------------------------------------- /NOTES.md: -------------------------------------------------------------------------------- 1 | # Managing autoscaling bits 2 | 3 | A new autoscaling group may be defined by specifying only the name of the autoscaling group and an existing instance id. 4 | Without specifying the tags, only the autoscaling group name tag will be applied, though, so instead tags should be 5 | passed at autoscaling group creation time, e.g.: 6 | 7 | ``` bash 8 | aws autoscaling create-auto-scaling-group \ 9 | --instance-id i-$INSTANCE_ID \ 10 | --tags \ 11 | Key=role,Value=worker \ 12 | Key=queue,Value=docker \ 13 | Key=site,Value=org \ 14 | Key=env,Value=staging \ 15 | Key=Name,Value=org-staging-docker-asg-$INSTANCE_ID \ 16 | --min-size 1 \ 17 | --max-size 3 \ 18 | --desired-capacity 1 \ 19 | --auto-scaling-group-name org-staging-docker-asg-$INSTANCE_ID 20 | ``` 21 | 22 | Each autoscaling group will need both scale-in and scale-out policies, e.g. scale out: 23 | 24 | ``` bash 25 | aws autoscaling put-scaling-policy \ 26 | --policy-name org-staging-docker-sop-$INSTANCE_ID \ 27 | --auto-scaling-group-name org-staging-docker-asg-$INSTANCE_ID \ 28 | --adjustment-type ChangeInCapacity \ 29 | --scaling-adjustment 1 30 | ``` 31 | 32 | and scale in: 33 | 34 | ``` bash 35 | aws autoscaling put-scaling-policy \ 36 | --policy-name org-staging-docker-sip-$INSTANCE_ID \ 37 | --auto-scaling-group-name org-staging-docker-asg-$INSTANCE_ID \ 38 | --adjustment-type ChangeInCapacity \ 39 | --scaling-adjustment -1 40 | ``` 41 | 42 | The above call responds with a policy ARN which must be used when assigning the metric alarm, e.g. scale out: 43 | 44 | ``` bash 45 | aws cloudwatch put-metric-alarm \ 46 | --alarm-name org-staging-docker-$INSTANCE_ID-add-capacity \ 47 | --metric-name 'v1.travis.rabbitmq.queues.builds.docker.messages_ready' \ 48 | --namespace Travis/org \ 49 | --statistic Maximum \ 50 | --period 120 \ 51 | --threshold 1 \ 52 | --comparison-operator GreaterThanOrEqualToThreshold \ 53 | --dimensions Name=AutoScalingGroupName,Value=org-staging-docker-asg-$INSTANCE_ID \ 54 | --evaluation-periods 2 \ 55 | --alarm-actions "arn:aws:autoscaling:us-east-1:341288657826:scalingPolicy:59a4e27a-0538-4edd-9fcb-dd9a6d9d5f77:autoScalingGroupName/org-staging-docker-asg-$INSTANCE_ID:policyName/org-staging-docker-sop" 56 | ``` 57 | 58 | and scale in: 59 | 60 | ``` bash 61 | aws cloudwatch put-metric-alarm \ 62 | --alarm-name org-staging-docker-$INSTANCE_ID-remove-capacity \ 63 | --metric-name 'v1.travis.rabbitmq.queues.builds.docker.messages_ready' \ 64 | --namespace Travis/org \ 65 | --statistic Maximum \ 66 | --period 120 \ 67 | --threshold 1 \ 68 | --comparison-operator LessThanOrEqualToThreshold \ 69 | --dimensions Name=AutoScalingGroupName,Value=org-staging-docker-asg-$INSTANCE_ID \ 70 | --evaluation-periods 2 \ 71 | --alarm-actions "arn:aws:autoscaling:us-east-1:341288657826:scalingPolicy:ff543466-6f36-4d62-b41f-94601078b147:autoScalingGroupName/org-staging-docker-asg-$INSTANCE_ID:policyName/org-staging-docker-sip" 72 | ``` 73 | 74 | Because of the nature of the workload we typically run on our instances, we can't take advantage of plain autoscaling 75 | policies that result in scale in/out with immediate instance termination. Instead, we use lifecycle management events 76 | to account for instance setup/teardown time. Managing capacity in this way means more interactions between AWS and 77 | pudding, as well as between pudding and the individual instances (via consul?). 78 | 79 | Lifecycle hooks for both launching and terminating may be supported, e.g.: 80 | 81 | ``` bash 82 | aws autoscaling put-lifecycle-hook \ 83 | --auto-scaling-group-name org-staging-docker-asg-$INSTANCE_ID \ 84 | --lifecycle-hook-name org-staging-docker-$INSTANCE_ID-lch-launching \ 85 | --lifecycle-transition autoscaling:EC2_INSTANCE_LAUNCHING \ 86 | --notification-target-arn arn:aws:sns:us-east-1:341288657826:pudding-test-topic \ 87 | --role-arn arn:aws:iam::341288657826:role/pudding-sns-test 88 | 89 | aws autoscaling put-lifecycle-hook \ 90 | --auto-scaling-group-name org-staging-docker-asg-$INSTANCE_ID \ 91 | --lifecycle-hook-name org-staging-docker-$INSTANCE_ID-lch-terminating \ 92 | --lifecycle-transition autoscaling:EC2_INSTANCE_TERMINATING \ 93 | --notification-target-arn arn:aws:sns:us-east-1:341288657826:pudding-test-topic \ 94 | --role-arn arn:aws:iam::341288657826:role/pudding-sns-test 95 | ``` 96 | 97 | The actions taken for these lifecycle events are now in our control (as opposed to `shutdown -h now`). Yay! 98 | 99 | According to the AWS docs, this is the basic sequence for adding a lifecycle hook to an Auto Scaling Group: 100 | 101 | 1. Create a notification target. A target can be either an Amazon SQS queue or an Amazon SNS topic. 102 | 1. Create an IAM role. This role allows Auto Scaling to publish lifecycle notifications to the designated SQS queue or SNS 103 | topic. 104 | 1. Create the lifecycle hook. You can create a hook that acts when instances launch or when instances terminate. 105 | 1. If necessary, record the lifecycle action heartbeat to keep the instance in a pending state. 106 | 1. Complete the lifecycle action. 107 | 108 | The way this sequence can be applied to pudding might go something like this: 109 | 110 | 1. The SNS topic is expected to already exist, along with a confirmed subscription with an endpoint URL pointing back at 111 | pudding over https. The topic ARN must be provided via env configuration a la `PUDDING_SNS_TOPIC_ARN`. 112 | 1. The IAM role is expected to already exist, and must be provided via env configuration a la `PUDDING_ROLE_ARN`. The 113 | role must have a policy that allows for publishing to the sns topic. 114 | 1. Creation of the lifecycle hook(s) happens automatically during creation of the autoscaling group, with the 115 | asg-specific SNS topic being specified 116 | 1. Either have pudding repeatedly enqueue `RecordLifecycleActionHeartbeat` API calls, or perhaps set the 117 | `HeartbeatTimeout` higher than the build job timeout for the site/env. 118 | 1. During both instance launch and termination, the completion of the lifecycle will happen when the instance phones 119 | home to pudding and pudding then forwards the event as a `CompleteLifecycleAction` request. In the case of the 120 | launch event, this hook should probably fire when the instance is ready to begin consuming work and potentially wipe the 121 | hook after the first execution so that subsequent restarts don't result in failed `CompleteLifecycleAction` requests. 122 | 123 | ## SNS Topic bits 124 | 125 | Upon subscribing to an SNS Topic, the HTTP(S) URL will receive a subscription confirmation payload like this: 126 | 127 | ``` javascript 128 | { 129 | "Type" : "SubscriptionConfirmation", 130 | "MessageId" : "98a3094e-c7e8-4d38-a730-939f361c6065", 131 | "Token" : "2336412f37fb687f5d51e6e241d638b114f4e9b52623c594ff666aff11609847fd78b02578f0a1aa8b6ff0ed1e5c37dfe94f118833bfc5b99b20240993dbe294721f4ebf79f904e692bcc4ef2d30af482bd4c1e7a4342d3483783da546e9d39da8315b1b28d6693fd54280be2df46a3befa6669a7a4c2661279cef2fa857d057", 132 | "TopicArn" : "arn:aws:sns:us-east-1:341288657826:pudding-test-topic", 133 | "Message" : "You have chosen to subscribe to the topic arn:aws:sns:us-east-1:341288657826:pudding-test-topic.\nTo confirm the subscription, visit the SubscribeURL included in this message.", 134 | "SubscribeURL" : "https://sns.us-east-1.amazonaws.com/?Action=ConfirmSubscription&TopicArn=arn:aws:sns:us-east-1:341288657826:pudding-test-topic&Token=2336412f37fb687f5d51e6e241d638b114f4e9b52623c594ff666aff11609847fd78b02578f0a1aa8b6ff0ed1e5c37dfe94f118833bfc5b99b20240993dbe294721f4ebf79f904e692bcc4ef2d30af482bd4c1e7a4342d3483783da546e9d39da8315b1b28d6693fd54280be2df46a3befa6669a7a4c2661279cef2fa857d057", 135 | "Timestamp" : "2014-12-22T20:29:28.282Z", 136 | "SignatureVersion" : "1", 137 | "Signature" : "oIcRPV7fIfrsGBElsVbWVOdXS7DeoDttUtGX386Hd2BRSWd8uzMKbF4F8GnrW/TKVmbXYu30/SlWAQKzhx7Ud2eMGqmVUZS96g2o2lkgyCl+VdkcfwYQ8TBGzmClVIEtsKV+map2yq6HIxxnQMNLGTxq/DT4NQGvqYaMet8mxq4roYM4lA/lNLZdLhYs9h8on5uxjAAw2WHQ/gUH2LxUx6N10CKSSV6lHQr+Ior0VLaAHNxCp2d0fLLJM3XvW0HUFZD5JEohq27/q5d37Uc3N7+DZ+fKrmurjkV721YwXgeHlo5a/lQ6WrEN4wpGznxFPBFlVtbczi/6HO+PsCpSqA==", 138 | "SigningCertURL" : "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-d6d679a1d18e95c2f9ffcf11f4f9e198.pem" 139 | } 140 | ``` 141 | 142 | SNS Notifications have a body like this: 143 | 144 | ``` javascript 145 | { 146 | "Type" : "Notification", 147 | "MessageId" : "375f381a-a143-50b5-8e61-7508234b4255", 148 | "TopicArn" : "arn:aws:sns:us-east-1:341288657826:pudding-test-topic", 149 | "Subject" : "Just Testing", 150 | "Message" : "This is a test eh", 151 | "Timestamp" : "2014-12-22T20:32:08.437Z", 152 | "SignatureVersion" : "1", 153 | "Signature" : "oxUggncdas6GcSzheXSRU9MZtvvFEGqd4IwGTG1ljj9CRWF/AxQ+/hS986bW4bGrh9ic5Z+uIUXRq/XfN34aFGMsLy9RSNgAwKoDe0e+g9OFWP3DrK+oe+Lr2HfwyRtS7J5YnHAeRkuuCIVkCRX+RgXLJvCfosSmgKGiYBToDakoEVsJyBh1MbuPCz33Czw974UdsWfCSzUhM0gOceQ6LbkHBUdfXcPH8wFVpoSoJZcnDIKqjTjRAhmYKdC85c2J1Jca35PY2gaPPDtiPtnoKxDMfJ4PTlrW2jVefaZjKBRj43o+aaWzBVNG1931OpjtMu6d5Lml/148bweB27am3A==", 154 | "SigningCertURL" : "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-d6d679a1d18e95c2f9ffcf11f4f9e198.pem", 155 | "UnsubscribeURL" : "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:341288657826:pudding-test-topic:8a210808-2c56-4f43-8411-bf23666b8625", 156 | "MessageAttributes" : { 157 | "AWS.SNS.MOBILE.MPNS.Type" : {"Type":"String","Value":"token"}, 158 | "AWS.SNS.MOBILE.WNS.Type" : {"Type":"String","Value":"wns/badge"}, 159 | "AWS.SNS.MOBILE.MPNS.NotificationClass" : {"Type":"String","Value":"realtime"} 160 | } 161 | } 162 | ``` 163 | 164 | When a lifecycle hook is configured for an autoscaling group, a test notification is sent to the SNS topic with a 165 | payload like this for each subscription (each lifecyle transition): 166 | 167 | ``` javascript 168 | { 169 | "Type" : "Notification", 170 | "MessageId" : "3edbc59a-0358-5152-aa1a-88888b0e3347", 171 | "TopicArn" : "arn:aws:sns:us-east-1:341288657826:pudding-test-topic", 172 | "Subject" : "Auto Scaling: test notification for group \"org-staging-docker-asg-$INSTANCE_ID\"", 173 | "Message" : "{\"AutoScalingGroupName\":\"org-staging-docker-asg-$INSTANCE_ID\",\"Service\":\"AWS Auto Scaling\",\"Time\":\"2014-12-22T20:58:56.930Z\",\"AccountId\":\"341288657826\",\"Event\":\"autoscaling:TEST_NOTIFICATION\",\"RequestId\":\"585ad5cd-8a1d-11e4-b467-4194aad3947b\",\"AutoScalingGroupARN\":\"arn:aws:autoscaling:us-east-1:341288657826:autoScalingGroup:6b164a47-9782-493c-99d0-86e5ec3a8c1a:autoScalingGroupName/org-staging-docker-asg-$INSTANCE_ID\"}", 174 | "Timestamp" : "2014-12-22T20:59:02.057Z", 175 | "SignatureVersion" : "1", 176 | "Signature" : "wxMkfMRjZJWAK086ehDNZcLmQ4WPkO8V/biC7FjW5ok9SLH7jWbPHMyFYhBNfGEzOA2t2tVBuSUJDlzQ/jRjQQZqRx0Sgvtuvpwn9cHpRMJNWSxXkJP6Z8sD1I9S1NdNAADzEG02DV4zOZgkUVkItoGYrJw1DYO14/xQr9kcVDLNr2r6PJk1SLxR85Y+y72ZloKLshKYGdZlXqL5hv8DWa53hlzf1vEb+gZ2BTpjuFVxRaIbvsCconIXEDdOdSWOzW/9NzP46iDTAp79eBnENo+P5WYLCTUIX072eENZ+WnzuvCSMOI4uxB4/rqsj+BnirgTILztw6r5F7GMyqOLVg==", 177 | "SigningCertURL" : "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-d6d679a1d18e95c2f9ffcf11f4f9e198.pem", 178 | "UnsubscribeURL" : "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:341288657826:pudding-test-topic:8a210808-2c56-4f43-8411-bf23666b8625" 179 | } 180 | ``` 181 | 182 | The `autoscaling:EC2_INSTANCE_TERMINATING` event results in a message like this: 183 | 184 | ``` javascript 185 | { 186 | "Type" : "Notification", 187 | "MessageId" : "c87337ab-c19e-51f2-8a5d-7ab80071cc4b", 188 | "TopicArn" : "arn:aws:sns:us-east-1:341288657826:pudding-test-topic", 189 | "Subject" : "Auto Scaling: Lifecycle action 'TERMINATING' for instance i-4c87e963 in progress.", 190 | "Message" : "{\"AutoScalingGroupName\":\"org-staging-docker-asg-$INSTANCE_ID\",\"Service\":\"AWS Auto Scaling\",\"Time\":\"2014-12-23T19:17:03.843Z\",\"AccountId\":\"341288657826\",\"LifecycleTransition\":\"autoscaling:EC2_INSTANCE_TERMINATING\",\"RequestId\":\"8fb86310-cc3f-45b6-9577-7997b4bfad0d\",\"LifecycleActionToken\":\"2f346e45-4866-4bf1-a752-f6eea23011c7\",\"EC2InstanceId\":\"i-4c87e963\",\"LifecycleHookName\":\"org-staging-docker-lch-$INSTANCE_ID-terminating\"}", 191 | "Timestamp" : "2014-12-23T19:17:03.874Z", 192 | "SignatureVersion" : "1", 193 | "Signature" : "S0oU0BB373Z1dm8d088j+5fD90A3ZD35xWsUrL93nRarX5P4dK+h0Yxsy79Ur1MeJQcdzYCbflHxyhywHuDWWDScxEHqOv7x5PRUPBCpz5BszTK52SEsXPd1LXS/dAZKU+zHBiV9/IJxEzzXgq4JlXrPUd4WCAr0zitZJi/1nhZWdoar41UATJcWh4xKszSmK5bV3CGd4OEs2CE4zdMktfpVGKxJ5qLVGnSqeO0jL6pTOh6hXQDighRTxU6ryrY0/n8ZlMkKxOs60x/hHsmPjRkITa6TRlfUt4y4f7H/K4OB+F/bM/svJvHJi7b+vQaHO0gIgLRSM1QglekcHQihSQ==", 194 | "SigningCertURL" : "https://sns.us-east-1.amazonaws.com/SimpleNotificationService-d6d679a1d18e95c2f9ffcf11f4f9e198.pem", 195 | "UnsubscribeURL" : "https://sns.us-east-1.amazonaws.com/?Action=Unsubscribe&SubscriptionArn=arn:aws:sns:us-east-1:341288657826:pudding-test-topic:8a210808-2c56-4f43-8411-bf23666b8625" 196 | } 197 | ``` 198 | 199 | ## Cycling out instances 200 | 201 | Given that we're defining an autoscaling group from a template instance, the "cycling" or replacement process is a bit 202 | involved. The rough steps might be: 203 | 204 | 1. Get the current capacity of the existing autoscaling group 205 | 1. Create a new instance based on the latest or specified AMI 206 | 1. Once the instance has started, create a replacement autoscaling group from the instance id, setting the desired 207 | capacity to the existing autoscaling group's capacity 208 | 1. Create scaling policies, metric alarms, lifecycle hooks, etc. for the replacement autoscaling group that are copies 209 | of those assigned to the existing autoscaling group. 210 | 1. Set the desired capacity of the existing autoscaling group to 0. 211 | 1. Upon termination of all instances in the existing autoscaling group, delete the autoscaling group and all assigned 212 | resources. 213 | 214 | Roughly the same process would apply to promoting a new AMI in a canary-style roll out, except that we would be 215 | intentionally keeping more than one autoscaling group around for a given site-org-queue pool until the replacement is 216 | complete. Perhaps we should stick to full replacement (?) 217 | 218 | ## Problems: 219 | 220 | ### user-data init script lifespan 221 | 222 | The `user-data` for the instances created by the worker manager service is currently doing an `#include` of an init 223 | script URL that's intented to be short-lived. If this user-data is going to be used within an auto-scaling context, 224 | then we'll either have to know up front and give it a considerably longer expiry (or no expiry at all), or perhaps 225 | remove expiry from init-scripts and their auths altogether. 226 | 227 | ### Name tags no longer unique 228 | 229 | The `Name` tag for instances within an autoscaling group cannot (?) be based on the instance id, e.g. 230 | `travis-org-staging-docker-abcd1234`. One option is to do like the above `aws autoscaling create-auto-scaling-group` 231 | invocation and assign a name that ends with the root instance id and `-asg`, but then individual instances within the 232 | autoscaling group will not be unique. This may require setting the system hostname dynamically during cloud init so 233 | that it includes the instance id fetched from the metadata API. 234 | 235 | ## Putting it all together 236 | 237 | When creating an autoscaling group in pudding, the required inputs are: 238 | 239 | * an existing instance id OR an existing autoscaling group name *REQUIRED* 240 | * an existing IAM role ARN for setting up SNS bits *REQUIRED* 241 | * site *REQUIRED* 242 | * env *REQUIRED* 243 | * queue *REQUIRED* 244 | * min size (default `0`) 245 | * max size (default `1`) 246 | * desired capacity (default `1`) 247 | * scale out metric alarm spec, which is 248 | `{"namespace":"","metric_name":"","statistic":"","op":"","threshold":"","period":"","evaluation_periods":""}` 249 | (default `{"namespace":"AWS/EC2","metric_name":"CPUUtilization","statistic":"Average","op":"GreaterThanOrEqualToThreshold","threshold":"95","period":"120","evaluation_periods":"2"}`) 250 | * scale in metric alarm spec, which is 251 | `{"namespace":"","metric_name":"","statistic":"","op":"","threshold":"","period":"","evaluation_periods":""}` 252 | (default `{"namespace":"AWS/EC2","metric_name":"CPUUtilization","statistic":"Average","op":"LessThanOrEqualToThreshold","threshold":"10","period":"120","evaluation_periods":"2"}`) 253 | 254 | **Autoscaling group name**:`"{{.Site}}-{{.Env}}-{{.Queue}}-asg-{{.InstanceID}}"`. 255 | 256 | Upon creation of the autoscaling group, the next step is to create scaling policies for scaling out and scaling in in 257 | adjustments of 1. 258 | 259 | **Scale out policy name**: `"{{.Site}}-{{.Env}}-{{.Queue}}-sop-{{.InstanceID}}"` 260 | 261 | **Scale in policy name**: `"{{.Site}}-{{.Env}}-{{.Queue}}-sip-{{.InstanceID}}"` 262 | 263 | The policy ARNs resulting from the creation of the scaling policies are then used to create metric alarms, the params 264 | for which must be supplied at autoscaling group creation time. For the purposes of scaling instances running 265 | `travis-worker` and build env containers, it is unlikely we'll be able to use any of the builtin cloudwatch metrics, but 266 | instead we would rely on a custom cloudwatch metric shipped from elsewhere such as rabbitmq messages ready. 267 | 268 | **Scale out metric alarm name**: `"{{.Site}}-{{.Env}}-{{.Queue}}-soma-{{.InstanceID}}"` 269 | 270 | **Scale in metric alarm name**: `"{{.Site}}-{{.Env}}-{{.Queue}}-sima-{{.InstanceID}}"` 271 | 272 | Before being able to create lifecycle hooks for the autoscaling group, we'll have to create an SNS topic and subscribe 273 | to it via HTTP(S). 274 | 275 | **SNS topic name**: `"{{.Site}}-{{.Env}}-{{.Queue}}-topic-{{.InstanceID}}"` 276 | 277 | Once we have the topic ARN, this is used to subscribe pudding to the topic, specifying a notification endpoint specific 278 | to this autoscaling group, e.g. `https://$PUDDING_HOST/autoscaling-group-notifications/$AUTOSCALING_GROUP_NAME`. 279 | 280 | As soon as this subscription is created, the expectation is that a subscription confirmation request will come to 281 | pudding. The request signature should be verified, then subscription confirmed. 282 | 283 | At this point, the autoscaling group definition is complete. The remaining work performed by pudding will be in the 284 | form of responding to lifecycle hook notifications and custom internal events related to instance lifecycle management. 285 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: script/server 2 | worker: script/workers 3 | -------------------------------------------------------------------------------- /Procfile.dev: -------------------------------------------------------------------------------- 1 | web: script/server 2 | worker: script/workers 3 | sidekiq-web: script/sidekiq-web 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pudding 2 | ====================== 3 | 4 | [![Build Status](https://travis-ci.org/travis-ci/pudding.svg?branch=master)](https://travis-ci.org/travis-ci/pudding) 5 | 6 | [![Deploy](https://www.herokucdn.com/deploy/button.png)](https://heroku.com/deploy) 7 | 8 | ## Development and such 9 | 10 | This repo should be cloned into your `GOPATH` at 11 | `${GOPATH%%:*}/src/github.com/travis-ci/pudding`. 12 | If you don't know what `GOPATH` is or are unsure if the top entry 13 | is in a non-volatile location, you should Ask Someone ™ 14 | 15 | ### prerequisites 16 | 17 | ``` bash 18 | go get github.com/hamfist/deppy 19 | go get github.com/golang/lint/golint 20 | go get golang.org/x/tools/cmd/cover 21 | 22 | gometalinter --install 23 | ``` 24 | 25 | ### build/test cycle 26 | 27 | Do everything: 28 | ``` bash 29 | make 30 | ``` 31 | 32 | Only clean and build, with less output: 33 | ``` bash 34 | make clean build GOBUILD_FLAGS= 35 | ``` 36 | 37 | ### Running things locally 38 | 39 | As with other heroku apps: 40 | ``` bash 41 | foreman start 42 | ``` 43 | 44 | The same, but without `rerun` in the mix: 45 | ``` bash 46 | DYNO=1 foreman start 47 | ``` 48 | 49 | ## Usage 50 | 51 | ### web 52 | 53 | The web API exposes the following resources, with most requiring 54 | authentication via token: 55 | 56 | #### `GET /` 57 | 58 | Provides a friendly greeting 59 | 60 | #### `DELETE /` **requires auth** 61 | 62 | Gracefully shut down the server 63 | 64 | #### `POST /kaboom` **requires auth** 65 | 66 | Simulate a panic. No body expected. 67 | 68 | #### `GET /instances` **requires auth** 69 | 70 | Provide a list of instances, optionally filtered with `env` 71 | and `site` query params. 72 | 73 | #### `GET /instances/{instance_id}` **requires auth** 74 | 75 | Provide a list containing a single instance matching the given 76 | `instance_id`, if it exists. 77 | 78 | #### `DELETE /instances/{instance_id}` **requires auth** 79 | 80 | Terminate an instance that matches the given `instance_id`, if it 81 | exists. 82 | 83 | #### `POST /instance-builds` **requires auth** 84 | 85 | Start an instance build, which will result in an EC2 instance being 86 | created. The expected body is a jsonapi singular collection of 87 | `"instance_build"`, like so: 88 | 89 | ``` javascript 90 | { 91 | "instance_builds": { 92 | "role": "worker", 93 | "site": "org", 94 | "env": "staging", 95 | "instance_type": "c3.2xlarge", 96 | "slack_channel": "#general", 97 | "count": 4, 98 | "queue": "docker", 99 | "boot_instance": true 100 | } 101 | } 102 | 103 | ``` 104 | 105 | > Note: You can prevent pudding from booting an instance by setting 106 | > the `boot_instance` flag to `false` -- in this case it will only 107 | > create a cloud-init script. 108 | 109 | #### `PATCH /instance-builds/{instance_build_id}` **requires auth** 110 | 111 | "Update" an instance build; currently used to send notifications to 112 | Slack upon completion of a build. Expects 113 | `application/x-www-form-urlencoded` params in the body, a la: 114 | 115 | ``` 116 | state=finished&instance-id=i-abcd1234&slack-channel=general 117 | ``` 118 | 119 | #### `GET /init-scripts/{instance_build_id}` **requires auth** 120 | 121 | This route accepts both token auth and "init script auth", which is 122 | basic auth specific to the instance build and is kept in a redis 123 | key with expiry. This is the route hit by the cloud-init 124 | `#include` set in EC2 instance user data when the instance is 125 | created. It responds with a content type of `text/x-shellscript; 126 | charset=utf-8`, which is expected (but not enforced) by cloud-init. 127 | 128 | #### `GET /images` **requires auth** 129 | 130 | Provide a list of images per role, denoting which is active. Example response: 131 | 132 | ``` javascript 133 | { 134 | "images": [ 135 | { 136 | "ami": "ami-00aabbcc", 137 | "active": true, 138 | "role": "web" 139 | }, 140 | { 141 | "ami": "ami-00aabbcd", 142 | "active": false, 143 | "role": "web" 144 | } 145 | ] 146 | } 147 | ``` 148 | 149 | ### workers 150 | 151 | The background job workers are started as a separate process and 152 | communicate with the web server via redis. The sidekiq-compatible 153 | workers are built using 154 | [`go-workers`](https://github.com/jrallison/go-workers). There are 155 | also non-evented "mini workers" that run in a simple run-sleep loop 156 | in a separate goroutine. 157 | 158 | #### `instance-builds` queue 159 | 160 | Jobs handled on the `instance-builds` queue perform the following 161 | actions: 162 | 163 | * resolve the `ami` id, using the most recent available if absent 164 | * create a custom security group and authorize inbound port 22 165 | * prepare a cloud-init script and store it in redis 166 | * prepare an `#include` statement with custom URL to be used in the 167 | instance user-data 168 | * create an instance with the resolved ami id, `#include ` 169 | user-data, custom security group, and specified instance type 170 | * tag the instance with `role`, `Name`, `site`, `env`, and `queue` 171 | * send slack notification that the instance has been created 172 | 173 | #### `instance-terminations` queue 174 | 175 | Jobs handled on the `instance-terminations` queue perform the 176 | following actions: 177 | 178 | * terminate the instance by id, e.g. `i-abcd1234` 179 | * remove the instance from the redis cache 180 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Pudding", 3 | "description": "It's a thing for managing instances!", 4 | "keywords": [ 5 | "ec2", 6 | "chatops", 7 | "tool" 8 | ], 9 | "website": "https://github.com/travis-ci/pudding", 10 | "repository": "https://github.com/travis-ci/pudding", 11 | "logo": "http://docs.travis-ci.com/images/travis-mascot-200px.png", 12 | "success_url": "/", 13 | "addons": [ 14 | "redisgreen:minidev" 15 | ], 16 | "env": { 17 | "AWS_ACCESS_KEY_ID": { 18 | "description": "AWS access key ID", 19 | "required": true 20 | }, 21 | "AWS_SECRET_ACCESS_KEY": { 22 | "description": "AWS secret access key", 23 | "required": true 24 | }, 25 | "BUILDPACK_URL": "https://github.com/travis-ci/heroku-buildpack-go", 26 | "REDIS_PROVIDER": "REDISGREEN_URL", 27 | "PUDDING_AUTH_TOKEN": { 28 | "description": "A secret token used for web authentication", 29 | "generator": "secret", 30 | "required": true 31 | }, 32 | "PUDDING_SLACK_CHANNEL": { 33 | "description": "Default Slack channel name for notifications", 34 | "value": "general", 35 | "required": false 36 | }, 37 | "PUDDING_SLACK_TEAM": { 38 | "description": "Slack team name for hubot notifications API requests", 39 | "required": false 40 | }, 41 | "PUDDING_SLACK_TOKEN": { 42 | "description": "Slack auth token for hubot notification API requests", 43 | "required": false 44 | }, 45 | "PUDDING_WEB_HOSTNAME": { 46 | "description": "The publicly-accessible hostname of your heroku app, e.g. https://the-name-of-your-app.herokuapp.com/", 47 | "required": true 48 | }, 49 | "PUDDING_INSTANCE_RSA": { 50 | "description": "RSA private key for use in cloud init script templates. Must be a gzipped and base64-encoded string", 51 | "required": true 52 | }, 53 | "PUDDING_INSTANCE_YML": { 54 | "description": "YAML configuration used to generate instance-specific configurations based on \"site\" and \"env\". Must be a gzipped and base64-encoded string", 55 | "required": true 56 | }, 57 | "PUDDING_SENTRY_DSN": { 58 | "description": "Sentry DSN for sentry integration, e.g. https://abcd1234@app.getsentry.com/1234", 59 | "required": false 60 | }, 61 | "PUDDING_INIT_SCRIPT_TEMPLATE": { 62 | "description": "A Golang text/template string used to generate the script executed by cloud init. Must be a gzipped and base64-encoded string", 63 | "required": true 64 | } 65 | }, 66 | "formation": [ 67 | { 68 | "process": "web", 69 | "quantity": 1, 70 | "size": "1X" 71 | }, 72 | { 73 | "process": "worker", 74 | "quantity": 1, 75 | "size": "1X" 76 | } 77 | ] 78 | } 79 | -------------------------------------------------------------------------------- /autoscaling_group.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // AutoscalingGroup is the internal representation of an EC2 4 | // autoscaling group 5 | type AutoscalingGroup struct { 6 | Name string `json:"name" redis:"name"` 7 | InstanceID string `json:"instance_id" redis:"instance_id"` 8 | Queue string `json:"queue" redis:"queue"` 9 | Env string `json:"env" redis:"env"` 10 | Site string `json:"site" redis:"site"` 11 | Role string `json:"role" redis:"role"` 12 | MinSize int `json:"min_size" redis:"min_size"` 13 | MaxSize int `json:"max_size" redis:"max_size"` 14 | DesiredCapacity int `json:"desired_capacity" redis:"desired_capacity"` 15 | } 16 | 17 | // Hydrate is used to overwrite "null" defaults that result from 18 | // serialize/deserialize via JSON 19 | func (asg *AutoscalingGroup) Hydrate() { 20 | if asg.MinSize == 0 { 21 | asg.MinSize = 1 22 | } 23 | if asg.MaxSize == 0 { 24 | asg.MaxSize = 1 25 | } 26 | if asg.DesiredCapacity == 0 { 27 | asg.DesiredCapacity = 1 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /autoscaling_group_build.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "strings" 5 | "time" 6 | ) 7 | 8 | // AutoscalingGroupBuildsCollectionSingular is the singular representation 9 | // used in jsonapi bodies 10 | type AutoscalingGroupBuildsCollectionSingular struct { 11 | AutoscalingGroupBuilds *AutoscalingGroupBuild `json:"autoscaling_group_builds"` 12 | } 13 | 14 | // AutoscalingGroupBuildsCollection is the collection representation used 15 | // in jsonapi bodies 16 | type AutoscalingGroupBuildsCollection struct { 17 | AutoscalingGroupBuilds []*AutoscalingGroupBuild `json:"autoscaling_group_builds"` 18 | } 19 | 20 | // AutoscalingGroupBuild contains everything needed by a background worker 21 | // to build the autoscaling group 22 | type AutoscalingGroupBuild struct { 23 | // Name is generated via the NameTemplate and will be overridden at creation time 24 | Name string `json:"name" redis:"name"` 25 | 26 | ID string `json:"id,omitempty"` 27 | InstanceID string `json:"instance_id,omitempty"` 28 | RoleARN string `json:"role_arn,omitempty"` 29 | TopicARN string `json:"topic_arn,omitempty"` 30 | NameTemplate string `json:"name_template,omitempty"` 31 | Queue string `json:"queue" redis:"queue"` 32 | Env string `json:"env" redis:"env"` 33 | Site string `json:"site" redis:"site"` 34 | Role string `json:"role" redis:"role"` 35 | MinSize int `json:"min_size" redis:"min_size"` 36 | MaxSize int `json:"max_size" redis:"max_size"` 37 | DesiredCapacity int `json:"desired_capacity" redis:"desired_capacity"` 38 | DefaultCooldown int `json:"default_cooldown" redis:"default_cooldown"` 39 | SlackChannel string `json:"slack_channel"` 40 | Timestamp int64 `json:"timestamp"` 41 | 42 | LifecycleDefaultResult string `json:"lifecycle_default_result,omitempty"` 43 | LifecycleHeartbeatTimeout int `json:"lifecycle_heartbeat_timeout,omitempty"` 44 | 45 | ScaleOutCooldown int `json:"scale_out_cooldown,omitempty"` 46 | ScaleOutAdjustment int `json:"scale_out_adjustment,omitempty"` 47 | ScaleOutMetricName string `json:"scale_out_metric_name,omitempty"` 48 | ScaleOutMetricNamespace string `json:"scale_out_metric_namespace,omitempty"` 49 | ScaleOutMetricStatistic string `json:"scale_out_metric_statistic,omitempty"` 50 | ScaleOutMetricPeriod int `json:"scale_out_metric_period,omitempty"` 51 | ScaleOutMetricEvaluationPeriods int `json:"scale_out_metric_evaluation_periods,omitempty"` 52 | ScaleOutMetricThreshold float64 `json:"scale_out_metric_threshold,omitempty"` 53 | ScaleOutMetricComparisonOperator string `json:"scale_out_metric_comparison_operator,omitempty"` 54 | 55 | ScaleInCooldown int `json:"scale_in_cooldown,omitempty"` 56 | ScaleInAdjustment int `json:"scale_in_adjustment,omitempty"` 57 | ScaleInMetricName string `json:"scale_in_metric_name,omitempty"` 58 | ScaleInMetricNamespace string `json:"scale_in_metric_namespace,omitempty"` 59 | ScaleInMetricStatistic string `json:"scale_in_metric_statistic,omitempty"` 60 | ScaleInMetricPeriod int `json:"scale_in_metric_period,omitempty"` 61 | ScaleInMetricEvaluationPeriods int `json:"scale_in_metric_evaluation_periods,omitempty"` 62 | ScaleInMetricThreshold float64 `json:"scale_in_metric_threshold,omitempty"` 63 | ScaleInMetricComparisonOperator string `json:"scale_in_metric_comparison_operator,omitempty"` 64 | } 65 | 66 | // NewAutoscalingGroupBuild makes a new AutoscalingGroupBuild 67 | func NewAutoscalingGroupBuild() *AutoscalingGroupBuild { 68 | return &AutoscalingGroupBuild{} 69 | } 70 | 71 | // Hydrate is used to overwrite "null" defaults that result from 72 | // serialize/deserialize via JSON 73 | func (b *AutoscalingGroupBuild) Hydrate() { 74 | if b.NameTemplate == "" { 75 | b.NameTemplate = "{{.Role}}-{{.Site}}-{{.Env}}-{{.Queue}}-{{.InstanceIDWithoutPrefix}}-{{.Timestamp}}" 76 | } 77 | 78 | if b.Timestamp == 0 { 79 | b.Timestamp = time.Now().UTC().Unix() 80 | } 81 | 82 | if b.DefaultCooldown == 0 { 83 | b.DefaultCooldown = 300 84 | } 85 | 86 | if b.LifecycleDefaultResult == "" { 87 | b.LifecycleDefaultResult = "CONTINUE" 88 | } 89 | 90 | if b.LifecycleHeartbeatTimeout == 0 { 91 | b.LifecycleHeartbeatTimeout = 900 92 | } 93 | 94 | if b.ScaleOutCooldown == 0 { 95 | b.ScaleOutCooldown = 300 96 | } 97 | 98 | if b.ScaleInCooldown == 0 { 99 | b.ScaleInCooldown = 300 100 | } 101 | 102 | if b.ScaleOutAdjustment == 0 { 103 | b.ScaleOutAdjustment = 1 104 | } 105 | 106 | if b.ScaleInAdjustment == 0 { 107 | b.ScaleInAdjustment = -1 108 | } 109 | 110 | if b.ScaleOutMetricName == "" { 111 | b.ScaleOutMetricName = "CPUUtilization" 112 | } 113 | 114 | if b.ScaleOutMetricNamespace == "" { 115 | b.ScaleOutMetricNamespace = "AWS/EC2" 116 | } 117 | 118 | if b.ScaleOutMetricStatistic == "" { 119 | b.ScaleOutMetricStatistic = "Average" 120 | } 121 | 122 | if b.ScaleOutMetricPeriod == 0 { 123 | b.ScaleOutMetricPeriod = 120 124 | } 125 | 126 | if b.ScaleOutMetricEvaluationPeriods == 0 { 127 | b.ScaleOutMetricEvaluationPeriods = 2 128 | } 129 | 130 | if b.ScaleOutMetricThreshold == float64(0) { 131 | b.ScaleOutMetricThreshold = float64(90) 132 | } 133 | 134 | if b.ScaleOutMetricComparisonOperator == "" { 135 | b.ScaleOutMetricComparisonOperator = "GreaterThanOrEqualToThreshold" 136 | } 137 | 138 | if b.ScaleInMetricName == "" { 139 | b.ScaleInMetricName = "CPUUtilization" 140 | } 141 | 142 | if b.ScaleInMetricNamespace == "" { 143 | b.ScaleInMetricNamespace = "AWS/EC2" 144 | } 145 | 146 | if b.ScaleInMetricStatistic == "" { 147 | b.ScaleInMetricStatistic = "Average" 148 | } 149 | 150 | if b.ScaleInMetricPeriod == 0 { 151 | b.ScaleInMetricPeriod = 120 152 | } 153 | 154 | if b.ScaleInMetricEvaluationPeriods == 0 { 155 | b.ScaleInMetricEvaluationPeriods = 2 156 | } 157 | 158 | if b.ScaleInMetricThreshold == float64(0) { 159 | b.ScaleInMetricThreshold = float64(10) 160 | } 161 | 162 | if b.ScaleInMetricComparisonOperator == "" { 163 | b.ScaleInMetricComparisonOperator = "LessThanThreshold" 164 | } 165 | } 166 | 167 | // Validate performs multiple validity checks and returns a slice of all errors 168 | // found 169 | func (b *AutoscalingGroupBuild) Validate() []error { 170 | errors := []error{} 171 | if b.InstanceID == "" { 172 | errors = append(errors, errEmptyInstanceID) 173 | } 174 | if b.Site == "" { 175 | errors = append(errors, errEmptySite) 176 | } 177 | if b.Env == "" { 178 | errors = append(errors, errEmptyEnv) 179 | } 180 | if b.Queue == "" { 181 | errors = append(errors, errEmptyQueue) 182 | } 183 | if b.Role == "" { 184 | errors = append(errors, errEmptyRole) 185 | } 186 | if b.RoleARN == "" { 187 | errors = append(errors, errEmptyRoleARN) 188 | } 189 | if b.TopicARN == "" { 190 | errors = append(errors, errEmptyTopicARN) 191 | } 192 | 193 | return errors 194 | } 195 | 196 | // InstanceIDWithoutPrefix returns the instance id without the "i-" 197 | func (b *AutoscalingGroupBuild) InstanceIDWithoutPrefix() string { 198 | return strings.TrimPrefix(b.InstanceID, "i-") 199 | } 200 | -------------------------------------------------------------------------------- /autoscaling_group_build_payload.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // AutoscalingGroupBuildPayload is the AutoscalingGroupBuild 4 | // representation sent to the background workers 5 | type AutoscalingGroupBuildPayload struct { 6 | Args []*AutoscalingGroupBuild `json:"args"` 7 | Queue string `json:"queue,omitempty"` 8 | JID string `json:"jid,omitempty"` 9 | Retry bool `json:"retry,omitempty"` 10 | EnqueuedAt float64 `json:"enqueued_at,omitempty"` 11 | } 12 | 13 | // AutoscalingGroupBuild casts the first argument to an AutoscalingGroupBuild type 14 | func (asgbp *AutoscalingGroupBuildPayload) AutoscalingGroupBuild() *AutoscalingGroupBuild { 15 | if len(asgbp.Args) < 1 { 16 | return nil 17 | } 18 | 19 | return asgbp.Args[0] 20 | } 21 | -------------------------------------------------------------------------------- /autoscaling_lifecycle_action.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // AutoscalingLifecycleAction is an SNS message payload of the form: 4 | // { 5 | // "AutoScalingGroupName":"name string", 6 | // "Service":"prose goop string", 7 | // "Time":"iso 8601 timestamp string", 8 | // "AccountId":"account id string", 9 | // "LifecycleTransition":"transition string, e.g.: autoscaling:EC2_INSTANCE_TERMINATING", 10 | // "RequestId":"uuid string", 11 | // "LifecycleActionToken":"uuid string", 12 | // "EC2InstanceId":"instance id string", 13 | // "LifecycleHookName":"name string" 14 | // } 15 | type AutoscalingLifecycleAction struct { 16 | Event string 17 | AutoScalingGroupName string `redis:"auto_scaling_group_name"` 18 | Service string 19 | Time string 20 | AccountID string `json:"AccountId"` 21 | LifecycleTransition string 22 | RequestID string `json:"RequestId"` 23 | LifecycleActionToken string `redis:"lifecycle_action_token"` 24 | EC2InstanceID string `json:"EC2InstanceId"` 25 | LifecycleHookName string `redis:"lifecycle_hook_name"` 26 | } 27 | -------------------------------------------------------------------------------- /bin/clean: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | TOP_GOPATH=${GOPATH%%:*} 6 | GO=${GO:-go} 7 | PACKAGE=${PACKAGE:-github.com/travis-ci/pudding} 8 | # SUBPACKAGES=$(echo ${PACKAGE}/{}) 9 | 10 | rm -vf "${TOP_GOPATH}/bin/pudding-server" 11 | rm -vf "${TOP_GOPATH}/bin/pudding-workers" 12 | rm -vf coverage.html *coverage.coverprofile 13 | ${GO} clean ${PACKAGE} ${SUBPACKAGES} || true 14 | if [ -d ${TOP_GOPATH}/pkg ] ; then 15 | find ${TOP_GOPATH}/pkg -wholename \ 16 | '*travis-ci/pudding*' | xargs rm -rfv || true 17 | fi 18 | -------------------------------------------------------------------------------- /bin/fmtpolice: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | for f in $(git ls-files '*.go' | grep -v Deps) ; do 5 | gofmt ${f} | if ! diff -u ${f} - ; then 6 | echo fmtpolice:${f} ✗ 7 | exit 1 8 | else 9 | echo fmtpolice:${f} ✓ 10 | fi 11 | done 12 | 13 | echo 14 | echo ★★★★★ ALL HAPPY ★★★★★ 15 | -------------------------------------------------------------------------------- /bin/fold-coverprofiles: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ $# -lt 1 ]] ; then 6 | echo "Usage: $(basename $0) [coverprofile, coverprofile, ...]" 7 | exit 1 8 | fi 9 | 10 | GO=${GO:-deppy go} 11 | OUT_TMP=$(mktemp pudding.XXXXX) 12 | PACKAGE=${PACKAGE:-github.com/travis-ci/pudding} 13 | 14 | trap "rm -f $OUT_TMP" EXIT TERM QUIT 15 | 16 | ${GO} test \ 17 | -covermode=count \ 18 | -coverprofile=${OUT_TMP} \ 19 | ${GOBUILD_LDFLAGS} \ 20 | ${PACKAGE} 1>&2 21 | 22 | echo 'mode: count' 23 | grep -h -v 'mode: count' "${OUT_TMP}" || true 24 | grep -h -v 'mode: count' $* 25 | -------------------------------------------------------------------------------- /bin/get-compressed-env-var: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | 4 | usage() { 5 | echo "Usage: $(basename $0) [heroku args]" 6 | } 7 | 8 | if [[ $# < 1 ]] ; then 9 | usage 10 | exit 1 11 | fi 12 | 13 | ENVVAR="$1" 14 | shift 15 | 16 | heroku config:get "$@" $ENVVAR | base64 -D | exec gunzip 17 | -------------------------------------------------------------------------------- /bin/lintall: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | gometalinter -D structcheck -D errcheck 6 | for pkg in server server/jsonapi db workers ; do 7 | gometalinter -D structcheck -D errcheck "${pkg}" 8 | done 9 | -------------------------------------------------------------------------------- /bin/set-compressed-env-var: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | 4 | usage() { 5 | echo "Usage: $(basename $0) [heroku args]" 6 | } 7 | 8 | if [[ $# < 2 ]] ; then 9 | usage 10 | exit 1 11 | fi 12 | 13 | ENVVAR="$1" 14 | shift 15 | FILENAME="$1" 16 | shift 17 | 18 | exec heroku config:set "$@" $ENVVAR="$(gzip < "$FILENAME" | base64)" 19 | -------------------------------------------------------------------------------- /bin/test-job: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | 5 | : ${NAMESPACE:=pudding} 6 | : ${INSTANCE_ID:=i-abcd1234} 7 | : ${REDIS_URL:=redis://localhost:6379/0} 8 | export REDIS_URL 9 | 10 | eval "$(ruby -ruri <<-EORUBY 11 | parts = URI(ENV['REDIS_URL']) 12 | puts "export REDIS_PORT=#{parts.port}" 13 | puts "export REDIS_HOST=#{parts.host}" 14 | EORUBY 15 | )" 16 | 17 | case "$1" in 18 | autoscaling-group-build) 19 | NOW=$(date +%s) 20 | exec redis-cli \ 21 | -p "$REDIS_PORT" \ 22 | -h "$REDIS_HOST" \ 23 | LPUSH "${NAMESPACE}:queue:autoscaling-group-builds" \ 24 | "{ 25 | \"args\": [ 26 | { 27 | \"id\": \"f9774b24-ea61-4dd3-abd0-6829aff0ba7e\", 28 | \"name\": \"test-asg-$NOW\", 29 | \"instance_id\": \"$INSTANCE_ID\", 30 | \"queue\": \"docker\", 31 | \"env\": \"test\", 32 | \"site\": \"org\", 33 | \"role\": \"worker\", 34 | \"min_size\": 1, 35 | \"max_size\": 1, 36 | \"desired_capacity\": 1, 37 | \"state\": \"pending\", 38 | \"slack_channel\": \"#general\" 39 | } 40 | ], 41 | \"enqueued_at\": $NOW, 42 | \"jid\": \"$(ruby -rsecurerandom -e 'puts SecureRandom.uuid')\", 43 | \"queue\": \"autoscaling-group-builds\", 44 | \"retry\": true 45 | }" 46 | ;; 47 | *) 48 | echo "unknown job type '$1'" 49 | exit 1 50 | ;; 51 | esac 52 | -------------------------------------------------------------------------------- /bin/test-request: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | : ${HOST:=http://localhost} 4 | : ${PORT:=42151} 5 | : ${PUDDING_AUTH_TOKEN:=swordfish} 6 | 7 | case "$1" in 8 | instance-build) 9 | exec curl \ 10 | -s \ 11 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 12 | -X POST \ 13 | -d "{ 14 | \"instance_builds\": { 15 | \"count\": 1, 16 | \"site\": \"org\", 17 | \"env\": \"test\", 18 | \"queue\": \"docker\", 19 | \"role\": \"worker\", 20 | \"instance_type\": \"c3.4xlarge\" 21 | } 22 | }" \ 23 | ${HOST}:${PORT}/instance-builds 24 | ;; 25 | autoscaling-group-build) 26 | if [[ $# < 4 ]] ; then 27 | echo "Usage: $(basename $0) autoscaling-group-build " 28 | exit 1 29 | fi 30 | exec curl \ 31 | -v \ 32 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 33 | -X POST \ 34 | -d "{ 35 | \"autoscaling_group_builds\": { 36 | \"site\": \"${SITE:-org}\", 37 | \"env\": \"${ENV:-staging}\", 38 | \"queue\": \"${QUEUE:-docker}\", 39 | \"role\": \"${ROLE:-worker}\", 40 | \"instance_id\": \"$2\", 41 | \"role_arn\": \"$3\", 42 | \"topic_arn\": \"$4\", 43 | \"instance_type\": \"${INST:-c3.2xlarge}\", 44 | \"slack_channel\": \"${SLACK_CHANNEL:-#general}\" 45 | } 46 | }" \ 47 | ${HOST}:${PORT}/autoscaling-group-builds 48 | ;; 49 | list-instances) 50 | exec curl \ 51 | -s \ 52 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 53 | ${HOST}:${PORT}/instances 54 | ;; 55 | terminate-instance) 56 | if [[ ! $2 ]] ; then 57 | echo "missing instance id" 58 | exit 1 59 | fi 60 | exec curl \ 61 | -s \ 62 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 63 | -X DELETE \ 64 | ${HOST}:${PORT}/instances/$2 65 | ;; 66 | bogus-instance-build) 67 | exec curl \ 68 | -s \ 69 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 70 | -X POST \ 71 | -d "{\"instance_builds\": {}}" \ 72 | ${HOST}:${PORT}/instance-builds 73 | ;; 74 | shutdown) 75 | exec curl \ 76 | -s \ 77 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 78 | -X DELETE \ 79 | ${HOST}:${PORT}/ 80 | ;; 81 | kaboom) 82 | exec curl \ 83 | -s \ 84 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 85 | -X POST \ 86 | ${HOST}:${PORT}/kaboom 87 | ;; 88 | expvars) 89 | exec curl \ 90 | -s \ 91 | -H "Authorization: token ${PUDDING_AUTH_TOKEN}" \ 92 | ${HOST}:${PORT}/debug/vars 93 | ;; 94 | *) 95 | echo "unknown request type '$1'" 96 | exit 1 97 | ;; 98 | esac 99 | -------------------------------------------------------------------------------- /cmd/pudding-server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/codegangsta/cli" 7 | "github.com/travis-ci/pudding" 8 | "github.com/travis-ci/pudding/server" 9 | ) 10 | 11 | func main() { 12 | app := cli.NewApp() 13 | app.Usage = "Serving up the pudding" 14 | app.Author = "Travis CI" 15 | app.Email = "contact+pudding-server@travis-ci.org" 16 | app.Version = pudding.VersionString 17 | app.Compiled = pudding.GeneratedTime() 18 | app.Flags = []cli.Flag{ 19 | pudding.AddrFlag, 20 | pudding.RedisURLFlag, 21 | cli.StringFlag{ 22 | Name: "instance-builds-queue-name", 23 | Value: "instance-builds", 24 | EnvVar: "PUDDING_INSTANCE_BUILDS_QUEUE_NAME", 25 | }, 26 | cli.StringFlag{ 27 | Name: "instance-terminations-queue-name", 28 | Value: "instance-terminations", 29 | EnvVar: "PUDDING_INSTANCE_TERMINATIONS_QUEUE_NAME", 30 | }, 31 | cli.StringFlag{ 32 | Name: "autoscaling-group-builds-queue-name", 33 | Value: "autoscaling-group-builds", 34 | EnvVar: "PUDDING_AUTOSCALING_GROUP_BUILDS_QUEUE_NAME", 35 | }, 36 | cli.StringFlag{ 37 | Name: "sns-messages-queue-name", 38 | Value: "sns-messages", 39 | EnvVar: "PUDDING_SNS_MESSAGES_QUEUE_NAME", 40 | }, 41 | cli.StringFlag{ 42 | Name: "instance-lifecycle-transitions-queue-name", 43 | Value: "instance-lifecycle-transitions", 44 | EnvVar: "PUDDING_INSTANCE_LIFECYCLE_TRANSITIONS_QUEUE_NAME", 45 | }, 46 | cli.StringFlag{ 47 | Name: "A, auth-token", 48 | Value: "swordfish", 49 | EnvVar: "PUDDING_AUTH_TOKEN", 50 | }, 51 | pudding.SlackHookPathFlag, 52 | pudding.SlackUsernameFlag, 53 | pudding.SlackChannelFlag, 54 | pudding.SlackIconFlag, 55 | pudding.SentryDSNFlag, 56 | pudding.InstanceExpiryFlag, 57 | pudding.ImageExpiryFlag, 58 | pudding.DebugFlag, 59 | } 60 | app.Action = runServer 61 | 62 | app.Run(os.Args) 63 | } 64 | 65 | func runServer(c *cli.Context) { 66 | pudding.WriteFlagsToEnv(c) 67 | 68 | server.Main(&server.Config{ 69 | Addr: c.String("addr"), 70 | AuthToken: c.String("auth-token"), 71 | Debug: c.Bool("debug"), 72 | 73 | RedisURL: c.String("redis-url"), 74 | 75 | SlackHookPath: c.String("slack-hook-path"), 76 | SlackUsername: c.String("slack-username"), 77 | SlackIcon: c.String("slack-icon"), 78 | DefaultSlackChannel: c.String("default-slack-channel"), 79 | 80 | SentryDSN: c.String("sentry-dsn"), 81 | 82 | InstanceExpiry: c.Int("instance-expiry"), 83 | ImageExpiry: c.Int("image-expiry"), 84 | 85 | QueueNames: map[string]string{ 86 | "instance-builds": c.String("instance-builds-queue-name"), 87 | "instance-terminations": c.String("instance-terminations-queue-name"), 88 | "autoscaling-group-builds": c.String("autoscaling-group-builds-queue-name"), 89 | "sns-messages": c.String("sns-messages-queue-name"), 90 | "instance-lifecycle-transitions": c.String("instance-lifecycle-transitions-queue-name"), 91 | }, 92 | }) 93 | } 94 | -------------------------------------------------------------------------------- /cmd/pudding-workers/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/codegangsta/cli" 8 | "github.com/travis-ci/pudding" 9 | "github.com/travis-ci/pudding/workers" 10 | ) 11 | 12 | func main() { 13 | app := cli.NewApp() 14 | app.Usage = "Working on the pudding" 15 | app.Author = "Travis CI" 16 | app.Email = "contact+pudding-workers@travis-ci.org" 17 | app.Version = pudding.VersionString 18 | app.Compiled = pudding.GeneratedTime() 19 | app.Flags = []cli.Flag{ 20 | pudding.RedisURLFlag, 21 | cli.StringFlag{ 22 | Name: "redis-pool-size", 23 | Value: "30", 24 | EnvVar: "PUDDING_REDIS_POOL_SIZE", 25 | }, 26 | cli.StringFlag{ 27 | Name: "q, queues", 28 | Value: "instance-builds,instance-terminations,autoscaling-group-builds,sns-messages,instance-lifecycle-transitions", 29 | EnvVar: "QUEUES", 30 | }, 31 | cli.StringFlag{ 32 | Name: "P, process-id", 33 | Value: func() string { 34 | v := os.Getenv("DYNO") 35 | if v == "" { 36 | v = fmt.Sprintf("%d", os.Getpid()) 37 | } 38 | return v 39 | }(), 40 | EnvVar: "PUDDING_PROCESS_ID", 41 | }, 42 | cli.StringFlag{ 43 | Name: "K, aws-key", 44 | EnvVar: "AWS_ACCESS_KEY_ID", 45 | }, 46 | cli.StringFlag{ 47 | Name: "S, aws-secret", 48 | EnvVar: "AWS_SECRET_ACCESS_KEY", 49 | }, 50 | cli.StringFlag{ 51 | Name: "R, aws-region", 52 | Value: "us-east-1", 53 | EnvVar: "AWS_DEFAULT_REGION", 54 | }, 55 | cli.StringFlag{ 56 | Name: "instance-rsa", 57 | }, 58 | cli.StringFlag{ 59 | Name: "H, web-hostname", 60 | Usage: "publicly-accessible hostname with protocol", 61 | Value: "http://localhost:42151", 62 | EnvVar: "PUDDING_WEB_HOSTNAME", 63 | }, 64 | cli.StringFlag{ 65 | Name: "Y, instance-yml", 66 | }, 67 | cli.StringFlag{ 68 | Name: "T, init-script-template", 69 | }, 70 | cli.IntFlag{ 71 | Name: "I, mini-worker-interval", 72 | Value: 30, 73 | Usage: "interval in seconds for the mini worker loop", 74 | EnvVar: "PUDDING_MINI_WORKER_INTERVAL", 75 | }, 76 | pudding.SlackHookPathFlag, 77 | pudding.SlackUsernameFlag, 78 | pudding.SlackIconFlag, 79 | pudding.SentryDSNFlag, 80 | pudding.InstanceExpiryFlag, 81 | pudding.ImageExpiryFlag, 82 | pudding.DebugFlag, 83 | } 84 | app.Action = runWorkers 85 | app.Run(os.Args) 86 | } 87 | 88 | func runWorkers(c *cli.Context) { 89 | instanceRSA := c.String("instance-rsa") 90 | if instanceRSA == "" { 91 | instanceRSA = pudding.GetInstanceRSAKey() 92 | } 93 | 94 | instanceYML := c.String("instance-yml") 95 | if instanceYML == "" { 96 | instanceYML = pudding.GetInstanceYML() 97 | } 98 | 99 | initScriptTemplate := c.String("init-script-template") 100 | if initScriptTemplate == "" { 101 | initScriptTemplate = pudding.GetInitScriptTemplate() 102 | } 103 | 104 | pudding.WriteFlagsToEnv(c) 105 | 106 | workers.Main(&workers.Config{ 107 | ProcessID: c.String("process-id"), 108 | WebHostname: c.String("web-hostname"), 109 | Debug: c.Bool("debug"), 110 | 111 | Queues: c.String("queues"), 112 | RedisPoolSize: c.String("redis-pool-size"), 113 | RedisURL: c.String("redis-url"), 114 | 115 | AWSKey: c.String("aws-key"), 116 | AWSSecret: c.String("aws-secret"), 117 | AWSRegion: c.String("aws-region"), 118 | 119 | InstanceRSA: instanceRSA, 120 | InstanceYML: instanceYML, 121 | InstanceTagRetries: 10, 122 | 123 | InitScriptTemplate: initScriptTemplate, 124 | MiniWorkerInterval: c.Int("mini-worker-interval"), 125 | InstanceExpiry: c.Int("instance-expiry"), 126 | ImageExpiry: c.Int("image-expiry"), 127 | 128 | SlackHookPath: c.String("slack-hook-path"), 129 | SlackUsername: c.String("slack-username"), 130 | SlackIcon: c.String("slack-icon"), 131 | 132 | SentryDSN: c.String("sentry-dsn"), 133 | }) 134 | } 135 | -------------------------------------------------------------------------------- /compressed_env_var.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/base64" 7 | "fmt" 8 | "io/ioutil" 9 | "os" 10 | 11 | "github.com/Sirupsen/logrus" 12 | ) 13 | 14 | var ( 15 | // ErrMissingEnvVar is used to signal when an env var is missing 16 | // :boom: 17 | ErrMissingEnvVar = fmt.Errorf("missing env var") 18 | ) 19 | 20 | // GetCompressedEnvVar looks up an env var and base64-decodes and 21 | // gunzips it if present 22 | func GetCompressedEnvVar(key string) (string, error) { 23 | value := os.Getenv(key) 24 | if value == "" { 25 | return "", ErrMissingEnvVar 26 | } 27 | 28 | return Decompress(value) 29 | } 30 | 31 | // Decompress takes a string and base64-decodes and gunzips it 32 | func Decompress(b64gz string) (string, error) { 33 | decoded, err := base64.StdEncoding.DecodeString(b64gz) 34 | if err != nil { 35 | return "", err 36 | } 37 | 38 | r, err := gzip.NewReader(bytes.NewReader(decoded)) 39 | if err != nil { 40 | return "", err 41 | } 42 | 43 | b, err := ioutil.ReadAll(r) 44 | if err != nil { 45 | return "", err 46 | } 47 | 48 | return string(b), nil 49 | } 50 | 51 | // MakeTemplateUncompressFunc creates a func suitable for use in a template 52 | // Execute with errors logged to the injected logger 53 | func MakeTemplateUncompressFunc(log *logrus.Logger) func(string) string { 54 | return func(b64gz string) string { 55 | s, err := Decompress(b64gz) 56 | if err != nil { 57 | log.WithFields(logrus.Fields{ 58 | "err": err, 59 | }).Warn("failed to decompress string") 60 | } 61 | return s 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /db/db.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "reflect" 7 | "strings" 8 | "time" 9 | 10 | "github.com/garyburd/redigo/redis" 11 | "github.com/goamz/goamz/ec2" 12 | "github.com/travis-ci/pudding" 13 | ) 14 | 15 | // BuildRedisPool builds a *redis.Pool given a redis URL yey ☃ 16 | func BuildRedisPool(redisURL string) (*redis.Pool, error) { 17 | u, err := url.Parse(redisURL) 18 | if err != nil { 19 | return nil, err 20 | } 21 | 22 | pool := &redis.Pool{ 23 | MaxIdle: 3, 24 | IdleTimeout: 240 * time.Second, 25 | Dial: func() (redis.Conn, error) { 26 | c, err := redis.Dial("tcp", u.Host) 27 | if err != nil { 28 | return nil, err 29 | } 30 | if u.User == nil { 31 | return c, err 32 | } 33 | if auth, ok := u.User.Password(); ok { 34 | if _, err := c.Do("AUTH", auth); err != nil { 35 | c.Close() 36 | return nil, err 37 | } 38 | } 39 | return c, err 40 | }, 41 | TestOnBorrow: func(c redis.Conn, t time.Time) error { 42 | _, err := c.Do("PING") 43 | return err 44 | }, 45 | } 46 | return pool, nil 47 | } 48 | 49 | // FetchInstances gets a slice of instances given a redis conn and 50 | // optional filter map 51 | func FetchInstances(conn redis.Conn, f map[string]string) ([]*pudding.Instance, error) { 52 | var err error 53 | keys := []string{} 54 | 55 | if key, ok := f["instance_id"]; ok { 56 | keys = append(keys, key) 57 | } else { 58 | keys, err = redis.Strings(conn.Do("SMEMBERS", fmt.Sprintf("%s:instances", pudding.RedisNamespace))) 59 | if err != nil { 60 | return nil, err 61 | } 62 | } 63 | 64 | instances := []*pudding.Instance{} 65 | 66 | for _, key := range keys { 67 | reply, err := redis.Values(conn.Do("HGETALL", fmt.Sprintf("%s:instance:%s", pudding.RedisNamespace, key))) 68 | if err != nil { 69 | return nil, err 70 | } 71 | 72 | inst := &pudding.Instance{} 73 | err = redis.ScanStruct(reply, inst) 74 | if err != nil { 75 | return nil, err 76 | } 77 | 78 | failedChecks := 0 79 | for key, value := range f { 80 | switch key { 81 | case "env": 82 | if inst.Env != value { 83 | failedChecks++ 84 | } 85 | case "site": 86 | if inst.Site != value { 87 | failedChecks++ 88 | } 89 | case "role": 90 | if inst.Role != value { 91 | failedChecks++ 92 | } 93 | case "queue": 94 | if inst.Queue != value { 95 | failedChecks++ 96 | } 97 | } 98 | } 99 | 100 | if failedChecks == 0 && !reflect.DeepEqual(inst, &pudding.Instance{}) { 101 | instances = append(instances, inst) 102 | } 103 | } 104 | 105 | return instances, nil 106 | } 107 | 108 | // SetInstanceAttributes sets key-value pair attributes on the 109 | // given instance's hash 110 | func SetInstanceAttributes(conn redis.Conn, instanceID string, attrs map[string]string) error { 111 | instanceAttrsKey := fmt.Sprintf("%s:instance:%s", pudding.RedisNamespace, instanceID) 112 | hmSet := []interface{}{instanceAttrsKey} 113 | for key, value := range attrs { 114 | hmSet = append(hmSet, key, value) 115 | } 116 | 117 | _, err := conn.Do("HMSET", hmSet...) 118 | return err 119 | } 120 | 121 | // StoreInstances stores the ec2 representation of an instance 122 | // given a redis conn and slice of ec2 instances, as well as an 123 | // expiry integer that is used to to run EXPIRE on all sets and 124 | // hashes involved 125 | func StoreInstances(conn redis.Conn, instances map[string]ec2.Instance, expiry int) error { 126 | err := conn.Send("MULTI") 127 | if err != nil { 128 | return err 129 | } 130 | 131 | instanceSetKey := fmt.Sprintf("%s:instances", pudding.RedisNamespace) 132 | 133 | err = conn.Send("DEL", instanceSetKey) 134 | if err != nil { 135 | conn.Do("DISCARD") 136 | return err 137 | } 138 | 139 | for ID, inst := range instances { 140 | instanceAttrsKey := fmt.Sprintf("%s:instance:%s", pudding.RedisNamespace, ID) 141 | 142 | err = conn.Send("SADD", instanceSetKey, ID) 143 | if err != nil { 144 | conn.Do("DISCARD") 145 | return err 146 | } 147 | 148 | hmSet := []interface{}{ 149 | instanceAttrsKey, 150 | "instance_id", inst.InstanceId, 151 | "instance_type", inst.InstanceType, 152 | "image_id", inst.ImageId, 153 | "ip", inst.IPAddress, 154 | "private_ip", inst.PrivateIPAddress, 155 | "launch_time", inst.LaunchTime, 156 | } 157 | 158 | for _, tag := range inst.Tags { 159 | switch tag.Key { 160 | case "queue", "env", "site", "role": 161 | hmSet = append(hmSet, tag.Key, tag.Value) 162 | case "Name": 163 | hmSet = append(hmSet, "name", tag.Value) 164 | } 165 | } 166 | 167 | err = conn.Send("HMSET", hmSet...) 168 | if err != nil { 169 | conn.Do("DISCARD") 170 | return err 171 | } 172 | 173 | err = conn.Send("EXPIRE", instanceAttrsKey, expiry) 174 | if err != nil { 175 | conn.Do("DISCARD") 176 | return err 177 | } 178 | } 179 | 180 | err = conn.Send("EXPIRE", instanceSetKey, expiry) 181 | if err != nil { 182 | conn.Do("DISCARD") 183 | return err 184 | } 185 | 186 | _, err = conn.Do("EXEC") 187 | return err 188 | } 189 | 190 | // RemoveInstances removes the given instances from the instance 191 | // set 192 | func RemoveInstances(conn redis.Conn, IDs []string) error { 193 | err := conn.Send("MULTI") 194 | if err != nil { 195 | return err 196 | } 197 | 198 | instanceSetKey := fmt.Sprintf("%s:instances", pudding.RedisNamespace) 199 | 200 | for _, ID := range IDs { 201 | err = conn.Send("SREM", instanceSetKey, ID) 202 | if err != nil { 203 | conn.Do("DISCARD") 204 | return err 205 | } 206 | } 207 | 208 | _, err = conn.Do("EXEC") 209 | return err 210 | } 211 | 212 | // FetchImages gets a slice of images given a redis conn and 213 | // optional filter map 214 | func FetchImages(conn redis.Conn, f map[string]string) ([]*pudding.Image, error) { 215 | var err error 216 | keys := []string{} 217 | 218 | if key, ok := f["image_id"]; ok { 219 | keys = append(keys, key) 220 | } else { 221 | keys, err = redis.Strings(conn.Do("SMEMBERS", fmt.Sprintf("%s:images", pudding.RedisNamespace))) 222 | if err != nil { 223 | return nil, err 224 | } 225 | } 226 | 227 | images := []*pudding.Image{} 228 | 229 | for _, key := range keys { 230 | reply, err := redis.Values(conn.Do("HGETALL", fmt.Sprintf("%s:image:%s", pudding.RedisNamespace, key))) 231 | if err != nil { 232 | return nil, err 233 | } 234 | 235 | img := &pudding.Image{} 236 | err = redis.ScanStruct(reply, img) 237 | if err != nil { 238 | return nil, err 239 | } 240 | 241 | failedChecks := 0 242 | for key, value := range f { 243 | switch key { 244 | case "active": 245 | if img.Active != (value == "true") { 246 | failedChecks++ 247 | } 248 | case "role": 249 | if img.Role != value { 250 | failedChecks++ 251 | } 252 | } 253 | } 254 | 255 | if failedChecks == 0 { 256 | images = append(images, img) 257 | } 258 | } 259 | 260 | return images, nil 261 | } 262 | 263 | // StoreImages stores the ec2 representation of an image 264 | // given a redis conn and slice of ec2 images, as well as an 265 | // expiry integer that is used to to run EXPIRE on all sets and 266 | // hashes involved 267 | func StoreImages(conn redis.Conn, images map[string]ec2.Image, expiry int) error { 268 | err := conn.Send("MULTI") 269 | if err != nil { 270 | return err 271 | } 272 | 273 | imageSetKey := fmt.Sprintf("%s:images", pudding.RedisNamespace) 274 | 275 | err = conn.Send("DEL", imageSetKey) 276 | if err != nil { 277 | conn.Do("DISCARD") 278 | return err 279 | } 280 | 281 | for ID, img := range images { 282 | imageAttrsKey := fmt.Sprintf("%s:image:%s", pudding.RedisNamespace, ID) 283 | 284 | err = conn.Send("SADD", imageSetKey, ID) 285 | if err != nil { 286 | conn.Do("DISCARD") 287 | return err 288 | } 289 | 290 | hmSet := []interface{}{ 291 | imageAttrsKey, 292 | "image_id", img.Id, 293 | "name", img.Name, 294 | "state", img.State, 295 | } 296 | 297 | for _, tag := range img.Tags { 298 | switch tag.Key { 299 | case "role": 300 | hmSet = append(hmSet, tag.Key, tag.Value) 301 | case "active": 302 | hmSet = append(hmSet, tag.Key, true) 303 | } 304 | } 305 | 306 | err = conn.Send("HMSET", hmSet...) 307 | if err != nil { 308 | conn.Do("DISCARD") 309 | return err 310 | } 311 | 312 | err = conn.Send("EXPIRE", imageAttrsKey, expiry) 313 | if err != nil { 314 | conn.Do("DISCARD") 315 | return err 316 | } 317 | } 318 | 319 | err = conn.Send("EXPIRE", imageSetKey, expiry) 320 | if err != nil { 321 | conn.Do("DISCARD") 322 | return err 323 | } 324 | 325 | _, err = conn.Do("EXEC") 326 | return err 327 | } 328 | 329 | // RemoveImages removes the given images from the image 330 | // set 331 | func RemoveImages(conn redis.Conn, IDs []string) error { 332 | err := conn.Send("MULTI") 333 | if err != nil { 334 | return err 335 | } 336 | 337 | imageSetKey := fmt.Sprintf("%s:images", pudding.RedisNamespace) 338 | 339 | for _, ID := range IDs { 340 | err = conn.Send("SREM", imageSetKey, ID) 341 | if err != nil { 342 | conn.Do("DISCARD") 343 | return err 344 | } 345 | } 346 | 347 | _, err = conn.Do("EXEC") 348 | return err 349 | } 350 | 351 | // StoreInstanceLifecycleAction stores a pudding.AutoscalingLifecycleAction in a transition-specific set and hash 352 | func StoreInstanceLifecycleAction(conn redis.Conn, a *pudding.AutoscalingLifecycleAction) error { 353 | err := conn.Send("MULTI") 354 | if err != nil { 355 | return err 356 | } 357 | 358 | transition := strings.ToLower(strings.Replace(a.LifecycleTransition, "autoscaling:EC2_INSTANCE_", "", 1)) 359 | instSetKey := fmt.Sprintf("%s:instance_%s", pudding.RedisNamespace, transition) 360 | hashKey := fmt.Sprintf("%s:instance_%s:%s", pudding.RedisNamespace, transition, a.EC2InstanceID) 361 | 362 | err = conn.Send("SADD", instSetKey, a.EC2InstanceID) 363 | if err != nil { 364 | conn.Do("DISCARD") 365 | return err 366 | } 367 | 368 | hmSet := []interface{}{ 369 | hashKey, 370 | "lifecycle_action_token", a.LifecycleActionToken, 371 | "auto_scaling_group_name", a.AutoScalingGroupName, 372 | "lifecycle_hook_name", a.LifecycleHookName, 373 | } 374 | 375 | err = conn.Send("HMSET", hmSet...) 376 | if err != nil { 377 | conn.Do("DISCARD") 378 | return err 379 | } 380 | 381 | _, err = conn.Do("EXEC") 382 | return err 383 | } 384 | 385 | // FetchInstanceLifecycleAction retrieves a pudding.AutoscalingLifecycleAction 386 | func FetchInstanceLifecycleAction(conn redis.Conn, transition, instanceID string) (*pudding.AutoscalingLifecycleAction, error) { 387 | exists, err := redis.Bool(conn.Do("SISMEMBER", fmt.Sprintf("%s:instance_%s", pudding.RedisNamespace, transition), instanceID)) 388 | if !exists { 389 | return nil, nil 390 | } 391 | 392 | attrs, err := redis.Values(conn.Do("HGETALL", fmt.Sprintf("%s:instance_%s:%s", pudding.RedisNamespace, transition, instanceID))) 393 | if err != nil { 394 | return nil, err 395 | } 396 | 397 | ala := &pudding.AutoscalingLifecycleAction{} 398 | err = redis.ScanStruct(attrs, ala) 399 | return ala, err 400 | } 401 | 402 | // WipeInstanceLifecycleAction cleans up the keys for a given lifecycle action 403 | func WipeInstanceLifecycleAction(conn redis.Conn, transition, instanceID string) error { 404 | err := conn.Send("MULTI") 405 | if err != nil { 406 | return err 407 | } 408 | 409 | err = conn.Send("SREM", fmt.Sprintf("%s:instance_%s", pudding.RedisNamespace, transition), instanceID) 410 | if err != nil { 411 | conn.Do("DISCARD") 412 | return err 413 | } 414 | 415 | err = conn.Send("DEL", fmt.Sprintf("%s:instance_%s:%s", pudding.RedisNamespace, transition, instanceID)) 416 | if err != nil { 417 | conn.Do("DISCARD") 418 | return err 419 | } 420 | 421 | _, err = conn.Do("EXEC") 422 | return err 423 | } 424 | -------------------------------------------------------------------------------- /db/db_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import "testing" 4 | 5 | func TestNothing(t *testing.T) { 6 | if 1 != 1 { 7 | t.Fail() 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /db/images.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "github.com/Sirupsen/logrus" 5 | "github.com/garyburd/redigo/redis" 6 | "github.com/goamz/goamz/ec2" 7 | "github.com/travis-ci/pudding" 8 | ) 9 | 10 | // ImageFetcherStorer defines the interface for fetching and 11 | // storing the internal image representation 12 | type ImageFetcherStorer interface { 13 | Fetch(map[string]string) ([]*pudding.Image, error) 14 | Store(map[string]ec2.Image) error 15 | } 16 | 17 | // Images represents the instance collection 18 | type Images struct { 19 | Expiry int 20 | r *redis.Pool 21 | log *logrus.Logger 22 | } 23 | 24 | // NewImages creates a new Images collection 25 | func NewImages(r *redis.Pool, log *logrus.Logger, expiry int) (*Images, error) { 26 | return &Images{ 27 | Expiry: expiry, 28 | r: r, 29 | log: log, 30 | }, nil 31 | } 32 | 33 | // Fetch returns a slice of images, optionally with filter params 34 | func (i *Images) Fetch(f map[string]string) ([]*pudding.Image, error) { 35 | conn := i.r.Get() 36 | defer conn.Close() 37 | 38 | return FetchImages(conn, f) 39 | } 40 | 41 | // Store accepts the ec2 representation of an image and stores it 42 | func (i *Images) Store(images map[string]ec2.Image) error { 43 | conn := i.r.Get() 44 | defer conn.Close() 45 | 46 | return StoreImages(conn, images, i.Expiry) 47 | } 48 | -------------------------------------------------------------------------------- /db/init_scripts.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "crypto/subtle" 7 | "encoding/base64" 8 | "fmt" 9 | "io/ioutil" 10 | "strings" 11 | 12 | "github.com/Sirupsen/logrus" 13 | "github.com/garyburd/redigo/redis" 14 | "github.com/travis-ci/pudding" 15 | ) 16 | 17 | // InstanceBuildAuther is the interface used to authenticate 18 | // against temporary auth creds for download of init scripts via 19 | // cloud-init on the remote instance 20 | type InstanceBuildAuther interface { 21 | HasValidAuth(string, string) bool 22 | } 23 | 24 | // InitScriptGetterAuther is the extension of InstanceBuildAuther 25 | // that performs the fetching of the init script for cloud-init 26 | type InitScriptGetterAuther interface { 27 | InstanceBuildAuther 28 | Get(string) (string, error) 29 | } 30 | 31 | // InitScripts represents the internal init scripts collection 32 | type InitScripts struct { 33 | r *redis.Pool 34 | log *logrus.Logger 35 | } 36 | 37 | // NewInitScripts creates a new *InitScripts 38 | func NewInitScripts(r *redis.Pool, log *logrus.Logger) (*InitScripts, error) { 39 | return &InitScripts{ 40 | r: r, 41 | log: log, 42 | }, nil 43 | } 44 | 45 | // Get retrieves a given init script by ID, which is expected to be 46 | // a uuid, although it really doesn't matter ☃ 47 | func (is *InitScripts) Get(ID string) (string, error) { 48 | conn := is.r.Get() 49 | defer conn.Close() 50 | 51 | b64Script, err := redis.String(conn.Do("HGET", fmt.Sprintf("%s:init-scripts", pudding.RedisNamespace), ID)) 52 | if err != nil { 53 | return "", err 54 | } 55 | 56 | b, err := base64.StdEncoding.DecodeString(string(b64Script)) 57 | if err != nil { 58 | return "", err 59 | } 60 | 61 | zr, err := gzip.NewReader(bytes.NewReader(b)) 62 | if err != nil { 63 | return "", err 64 | } 65 | defer zr.Close() 66 | 67 | script, err := ioutil.ReadAll(zr) 68 | if err != nil { 69 | return "", err 70 | } 71 | 72 | return string(script), nil 73 | } 74 | 75 | // HasValidAuth checks the provided temporary auth creds against 76 | // what is stored in redis for the given init script id 77 | func (is *InitScripts) HasValidAuth(ID, auth string) bool { 78 | conn := is.r.Get() 79 | defer conn.Close() 80 | 81 | hKey := fmt.Sprintf("%s:auths", pudding.RedisNamespace) 82 | dbAuth, err := redis.String(conn.Do("HGET", hKey, ID)) 83 | if err != nil { 84 | is.log.WithFields(logrus.Fields{ 85 | "err": err, 86 | "hash": hKey, 87 | "key": ID, 88 | }).Error("failed to fetch auth from database") 89 | return false 90 | } 91 | 92 | is.log.WithFields(logrus.Fields{ 93 | "instance_build_id": ID, 94 | "auth": auth, 95 | "db_auth": dbAuth, 96 | }).Debug("comparing auths") 97 | 98 | return 0 == subtle.ConstantTimeCompare( 99 | []byte(strings.TrimSpace(dbAuth)), 100 | []byte(strings.TrimSpace(auth)), 101 | ) 102 | } 103 | -------------------------------------------------------------------------------- /db/instances.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "github.com/Sirupsen/logrus" 5 | "github.com/garyburd/redigo/redis" 6 | "github.com/goamz/goamz/ec2" 7 | "github.com/travis-ci/pudding" 8 | ) 9 | 10 | // InstanceFetcherStorer defines the interface for fetching and 11 | // storing the internal instance representation 12 | type InstanceFetcherStorer interface { 13 | Fetch(map[string]string) ([]*pudding.Instance, error) 14 | Store(map[string]ec2.Instance) error 15 | } 16 | 17 | // Instances represents the instance collection 18 | type Instances struct { 19 | Expiry int 20 | r *redis.Pool 21 | log *logrus.Logger 22 | } 23 | 24 | // NewInstances creates a new Instances collection 25 | func NewInstances(r *redis.Pool, log *logrus.Logger, expiry int) (*Instances, error) { 26 | return &Instances{ 27 | Expiry: expiry, 28 | r: r, 29 | log: log, 30 | }, nil 31 | } 32 | 33 | // Fetch returns a slice of instances, optionally with filter params 34 | func (i *Instances) Fetch(f map[string]string) ([]*pudding.Instance, error) { 35 | conn := i.r.Get() 36 | defer conn.Close() 37 | 38 | return FetchInstances(conn, f) 39 | } 40 | 41 | // Store accepts the ec2 representation of an instance and stores it 42 | func (i *Instances) Store(instances map[string]ec2.Instance) error { 43 | conn := i.r.Get() 44 | defer conn.Close() 45 | 46 | return StoreInstances(conn, instances, i.Expiry) 47 | } 48 | -------------------------------------------------------------------------------- /db/jobs.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/garyburd/redigo/redis" 7 | "github.com/travis-ci/pudding" 8 | ) 9 | 10 | // EnqueueJob pushes a given payload onto the given queue name to 11 | // be consumed by the workers 12 | func EnqueueJob(conn redis.Conn, queueName, payload string) error { 13 | err := conn.Send("MULTI") 14 | if err != nil { 15 | return err 16 | } 17 | err = conn.Send("SADD", fmt.Sprintf("%s:queues", pudding.RedisNamespace), queueName) 18 | if err != nil { 19 | conn.Send("DISCARD") 20 | return err 21 | } 22 | 23 | err = conn.Send("LPUSH", fmt.Sprintf("%s:queue:%s", pudding.RedisNamespace, queueName), payload) 24 | if err != nil { 25 | conn.Send("DISCARD") 26 | return err 27 | } 28 | 29 | _, err = conn.Do("EXEC") 30 | return err 31 | } 32 | -------------------------------------------------------------------------------- /ec2helpers.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | 7 | "github.com/goamz/goamz/ec2" 8 | ) 9 | 10 | var ( 11 | errNoLatestImage = fmt.Errorf("no latest image available matching filter") 12 | ) 13 | 14 | // ResolveAMI attempts to get an ec2.Image by id, falling back to 15 | // fetching the most recently provisioned worker ami via 16 | // FetchLatestWorkerAMI 17 | func ResolveAMI(conn *ec2.EC2, ID string, f *ec2.Filter) (*ec2.Image, error) { 18 | if ID != "" { 19 | resp, err := conn.Images([]string{ID}, ec2.NewFilter()) 20 | if err != nil { 21 | return nil, err 22 | } 23 | for _, img := range resp.Images { 24 | if img.Id == ID { 25 | return &img, nil 26 | } 27 | } 28 | } 29 | 30 | return FetchLatestAMIWithFilter(conn, f) 31 | } 32 | 33 | // FetchLatestAMIWithFilter looks up all images matching the given 34 | // filter (with `tag:active=true` added), then sorts by the image 35 | // name which is assumed to contain a timestamp, then returns the 36 | // most recent image. 37 | func FetchLatestAMIWithFilter(conn *ec2.EC2, f *ec2.Filter) (*ec2.Image, error) { 38 | f.Add("tag-key", "active") 39 | 40 | allImages, err := conn.Images([]string{}, f) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | if len(allImages.Images) == 0 { 46 | return nil, errNoLatestImage 47 | } 48 | 49 | imgNames := []string{} 50 | imgMap := map[string]ec2.Image{} 51 | 52 | for _, img := range allImages.Images { 53 | imgNames = append(imgNames, img.Name) 54 | imgMap[img.Name] = img 55 | } 56 | 57 | sort.Strings(imgNames) 58 | img := imgMap[imgNames[len(imgNames)-1]] 59 | return &img, nil 60 | } 61 | 62 | // GetInstancesWithFilter fetches all instances that match the 63 | // given filter 64 | func GetInstancesWithFilter(conn *ec2.EC2, f *ec2.Filter) (map[string]ec2.Instance, error) { 65 | resp, err := conn.DescribeInstances([]string{}, f) 66 | 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | instances := map[string]ec2.Instance{} 72 | 73 | for _, res := range resp.Reservations { 74 | for _, inst := range res.Instances { 75 | instances[inst.InstanceId] = inst 76 | } 77 | } 78 | 79 | return instances, nil 80 | } 81 | 82 | // GetImagesWithFilter fetches all images that match the 83 | // given filter 84 | func GetImagesWithFilter(conn *ec2.EC2, f *ec2.Filter) (map[string]ec2.Image, error) { 85 | resp, err := conn.Images([]string{}, f) 86 | 87 | if err != nil { 88 | return nil, err 89 | } 90 | 91 | images := map[string]ec2.Image{} 92 | 93 | for _, img := range resp.Images { 94 | images[img.Id] = img 95 | } 96 | 97 | return images, nil 98 | } 99 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import "fmt" 4 | 5 | var ( 6 | errEmptyEnv = fmt.Errorf("empty \"env\" param") 7 | errEmptyInstanceID = fmt.Errorf("empty \"instance_id\" param") 8 | errEmptyInstanceType = fmt.Errorf("empty \"instance_type\" param") 9 | errEmptyQueue = fmt.Errorf("empty \"queue\" param") 10 | errEmptyRole = fmt.Errorf("empty \"role\" param") 11 | errEmptyRoleARN = fmt.Errorf("empty \"role_arn\" param") 12 | errEmptySite = fmt.Errorf("empty \"site\" param") 13 | errEmptyTopicARN = fmt.Errorf("empty \"topic_arn\" param") 14 | 15 | errInvalidInstanceCount = fmt.Errorf("count must be more than 0") 16 | errInvalidState = fmt.Errorf("state must be pending, started, or finished") 17 | ) 18 | -------------------------------------------------------------------------------- /examples/simple/iam-autoscaling.json: -------------------------------------------------------------------------------- 1 | { 2 | "Statement": [ 3 | { 4 | "Effect": "Allow", 5 | "Action": [ 6 | "autoscaling:*" 7 | ], 8 | "Resource": "*" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /examples/simple/iam-ec2.json: -------------------------------------------------------------------------------- 1 | { 2 | "Statement": [ 3 | { 4 | "Effect": "Allow", 5 | "Action": [ 6 | "ec2:AttachVolume", 7 | "ec2:AuthorizeSecurityGroupIngress", 8 | "ec2:CreateImage", 9 | "ec2:CreateKeyPair", 10 | "ec2:CreateSecurityGroup", 11 | "ec2:CreateSnapshot", 12 | "ec2:CreateTags", 13 | "ec2:CreateVolume", 14 | "ec2:DeleteKeyPair", 15 | "ec2:DeleteSecurityGroup", 16 | "ec2:DeleteSnapshot", 17 | "ec2:DeleteVolume", 18 | "ec2:DescribeImages", 19 | "ec2:DescribeInstances", 20 | "ec2:DescribeSnapshots", 21 | "ec2:DescribeSecurityGroups", 22 | "ec2:DescribeVolumes", 23 | "ec2:DescribeTags", 24 | "ec2:DetachVolume", 25 | "ec2:ModifyImageAttribute", 26 | "ec2:RegisterImage", 27 | "ec2:RunInstances", 28 | "ec2:StopInstances", 29 | "ec2:TerminateInstances" 30 | ], 31 | "Resource": "*" 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /examples/simple/iam-misc.json: -------------------------------------------------------------------------------- 1 | { 2 | "Statement": [ 3 | { 4 | "Effect": "Allow", 5 | "Action": [ 6 | "cloudwatch:PutMetricAlarm", 7 | "iam:PassRole" 8 | ], 9 | "Resource": "*" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /examples/simple/init-script.tmpl.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | export INSTANCE_ID="$(curl -s 'http://169.254.169.254/latest/meta-data/instance-id')" 6 | 7 | mkdir -p /app 8 | cd /app 9 | 10 | cat > id_rsa < start-hook < stop-hook < os.Getenv(`API_HOSTNAME_ORG_PROD`) 105 | func MakeInstanceBuildEnvForFunc(b *InstanceBuild) func(string, ...string) string { 106 | return func(key string, filters ...string) string { 107 | for _, filter := range filters { 108 | v := "" 109 | switch filter { 110 | case "site": 111 | v = b.Site 112 | case "env": 113 | v = b.Env 114 | case "queue": 115 | v = b.Queue 116 | case "role": 117 | v = b.Role 118 | } 119 | 120 | if v == "" { 121 | continue 122 | } 123 | 124 | key = fmt.Sprintf("%s_%s", key, strings.ToUpper(v)) 125 | } 126 | return os.Getenv(key) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /instance_build_payload.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // InstanceBuildPayload is the InstanceBuild representation sent to 4 | // the background workers 5 | type InstanceBuildPayload struct { 6 | Args []*InstanceBuild `json:"args"` 7 | Queue string `json:"queue,omitempty"` 8 | JID string `json:"jid,omitempty"` 9 | Retry bool `json:"retry,omitempty"` 10 | EnqueuedAt float64 `json:"enqueued_at,omitempty"` 11 | } 12 | 13 | // InstanceBuild returns the inner instance build from the Args 14 | // slice 15 | func (ibp *InstanceBuildPayload) InstanceBuild() *InstanceBuild { 16 | if len(ibp.Args) < 1 { 17 | return nil 18 | } 19 | 20 | return ibp.Args[0] 21 | } 22 | -------------------------------------------------------------------------------- /instance_lifecycle_transition.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // InstanceLifecycleTransition is an event received from instances when launching and terminating 4 | type InstanceLifecycleTransition struct { 5 | ID string `json:"id,omitempty"` 6 | InstanceID string `json:"instance_id"` 7 | Transition string `json:"transition"` 8 | } 9 | -------------------------------------------------------------------------------- /instance_lifecycle_transition_payload.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // InstanceLifecycleTransitionPayload is the background job payload for instance lifecycle transitions 4 | type InstanceLifecycleTransitionPayload struct { 5 | Args []*InstanceLifecycleTransition `json:"args"` 6 | Queue string `json:"queue,omitempty"` 7 | JID string `json:"jid,omitempty"` 8 | Retry bool `json:"retry,omitempty"` 9 | EnqueuedAt float64 `json:"enqueued_at,omitempty"` 10 | } 11 | 12 | // InstanceLifecycleTransition returns the inner *InstanceLifecycleTransition if available 13 | func (iltp *InstanceLifecycleTransitionPayload) InstanceLifecycleTransition() *InstanceLifecycleTransition { 14 | if len(iltp.Args) < 1 { 15 | return nil 16 | } 17 | 18 | return iltp.Args[0] 19 | } 20 | -------------------------------------------------------------------------------- /instance_rsa.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path/filepath" 7 | ) 8 | 9 | // GetInstanceRSAKey attempts to retrieve the instance rsa private 10 | // key from compressed env vars INSTANCE_RSA and 11 | // PUDDING_INSTANCE_RSA, then falls back to attempting to read 12 | // $PWD/instance_rsa 13 | func GetInstanceRSAKey() string { 14 | value := getInstanceRSAKeyFromEnv() 15 | if value != "" { 16 | return value 17 | } 18 | 19 | return getInstanceRSAKeyFromFile() 20 | } 21 | 22 | func getInstanceRSAKeyFromEnv() string { 23 | for _, key := range []string{"INSTANCE_RSA", "PUDDING_INSTANCE_RSA"} { 24 | value, err := GetCompressedEnvVar(key) 25 | if err == nil { 26 | return value 27 | } 28 | } 29 | 30 | return "" 31 | } 32 | 33 | func getInstanceRSAKeyFromFile() string { 34 | wd, err := os.Getwd() 35 | if err != nil { 36 | return "" 37 | } 38 | 39 | b, err := ioutil.ReadFile(filepath.Join(wd, "instance_rsa")) 40 | if err != nil { 41 | return "" 42 | } 43 | 44 | return string(b) 45 | } 46 | -------------------------------------------------------------------------------- /instance_termination_payload.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // InstanceTerminationPayload is the representation used when 4 | // enqueueing an instance termination to the background workers 5 | type InstanceTerminationPayload struct { 6 | JID string `json:"jid,omitempty"` 7 | Retry bool `json:"retry,omitempty"` 8 | InstanceID string `json:"instance_id"` 9 | SlackChannel string `json:"slack_channel"` 10 | } 11 | -------------------------------------------------------------------------------- /meta_yml.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/hamfist/yaml" 7 | ) 8 | 9 | var ( 10 | errMissingSiteConfig = fmt.Errorf("missing \"site\" sub-config") 11 | errMissingEnvConfig = fmt.Errorf("missing \"env\" sub-config") 12 | ) 13 | 14 | // MetaYML represents a yml structure that generally has two levels 15 | // of nesting below the concern-specific keys, one for site and 16 | // another for env. 17 | type MetaYML struct { 18 | AMQP map[string]map[string]*amqpConfig `yaml:"amqp"` 19 | Build map[string]map[string]*buildConfig `yaml:"build"` 20 | Librato map[string]*libratoConfig `yaml:"librato"` 21 | Cache map[string]map[string]*cacheConfig `yaml:"cache"` 22 | Papertrail map[string]string `yaml:"papertrail"` 23 | } 24 | 25 | type amqpConfig struct { 26 | Host string `yaml:"host"` 27 | Port int `yaml:"port"` 28 | Username string `yaml:"username"` 29 | Password string `yaml:"password"` 30 | Vhost string `yaml:"vhost"` 31 | TLS string `yaml:"tls,omitempty"` 32 | } 33 | 34 | type buildConfig struct { 35 | APIToken string `yaml:"api_token"` 36 | URL string `yaml:"url"` 37 | } 38 | 39 | type libratoConfig struct { 40 | Email string `yaml:"email"` 41 | Token string `yaml:"token"` 42 | } 43 | 44 | type cacheConfig struct { 45 | Type string `yaml:"type"` 46 | S3 *s3config `yaml:"s3"` 47 | FetchTimeout int `yaml:"fetch_timeout"` 48 | PushTimeout int `yaml:"push_timeout"` 49 | } 50 | 51 | type s3config struct { 52 | AccessKeyID string `yaml:"access_key_id"` 53 | SecretAccessKey string `yaml:"secret_access_key"` 54 | Bucket string `yaml:"bucket"` 55 | } 56 | 57 | // InstanceSpecificYML is the instance-specific configuration 58 | // generated from a MetaYML 59 | type InstanceSpecificYML struct { 60 | Env string `yaml:"env"` 61 | LinuxConfig *instanceEnvConfig `yaml:"linux,omitempty"` 62 | PapertrailSite string `yaml:"papertrail_site,omitempty"` 63 | } 64 | 65 | func (isy *InstanceSpecificYML) String() (string, error) { 66 | out, err := yaml.Marshal(isy) 67 | if out == nil { 68 | out = []byte{} 69 | } 70 | return string(out), err 71 | } 72 | 73 | type instanceEnvConfig struct { 74 | Host string `yaml:"host"` 75 | LogLevel string `yaml:"log_level"` 76 | Queue string `yaml:"queue"` 77 | AMQP *amqpConfig `yaml:"amqp"` 78 | VMs *vmsConfig `yaml:"vms"` 79 | Build *buildConfig `yaml:"build"` 80 | // FIXME: rename the docker bits to "instance" ? 81 | Docker *dockerConfig `yaml:"docker"` 82 | Paranoid bool `yaml:"paranoid"` 83 | SkipResolvUpdates bool `yaml:"skip_resolv_updates"` 84 | SkipEtcHostsFix bool `yaml:"skip_etc_hosts_fix"` 85 | Librato *libratoConfig `yaml:"librato"` 86 | LanguageMappings map[string]string `yaml:"language_mappings"` 87 | CacheOptions *cacheConfig `yaml:"cache_options"` 88 | Timeouts *timeoutsConfig `yaml:"timeouts"` 89 | } 90 | 91 | type vmsConfig struct { 92 | Provider string `yaml:"provider"` 93 | Count int `yaml:"count"` 94 | } 95 | 96 | type dockerConfig struct { 97 | PrivateKeyPath string `yaml:"private_key_path"` 98 | } 99 | 100 | type timeoutsConfig struct { 101 | HardLimit int `yaml:"hard_limit"` 102 | } 103 | 104 | // BuildInstanceSpecificYML accepts a string form of MetaYML, site, 105 | // env, queue, and count, and constructs a instance-specific 106 | // configuration 107 | func BuildInstanceSpecificYML(site, env, rawYML, queue string, count int) (*InstanceSpecificYML, error) { 108 | multiYML := &MetaYML{ 109 | AMQP: map[string]map[string]*amqpConfig{}, 110 | Build: map[string]map[string]*buildConfig{}, 111 | Librato: map[string]*libratoConfig{}, 112 | Cache: map[string]map[string]*cacheConfig{}, 113 | Papertrail: map[string]string{}, 114 | } 115 | 116 | err := yaml.Unmarshal([]byte(rawYML), multiYML) 117 | if err != nil { 118 | return nil, err 119 | } 120 | 121 | amqpSite, ok := multiYML.AMQP[site] 122 | if !ok { 123 | return nil, errMissingSiteConfig 124 | } 125 | 126 | amqp, ok := amqpSite[env] 127 | if !ok { 128 | return nil, errMissingEnvConfig 129 | } 130 | 131 | buildSite, ok := multiYML.Build[site] 132 | if !ok { 133 | return nil, errMissingSiteConfig 134 | } 135 | 136 | build, ok := buildSite[env] 137 | if !ok { 138 | return nil, errMissingEnvConfig 139 | } 140 | 141 | librato, ok := multiYML.Librato[site] 142 | if !ok { 143 | return nil, errMissingSiteConfig 144 | } 145 | 146 | cacheSite, ok := multiYML.Cache[site] 147 | if !ok { 148 | return nil, errMissingSiteConfig 149 | } 150 | 151 | cache, ok := cacheSite[env] 152 | if !ok { 153 | return nil, errMissingEnvConfig 154 | } 155 | 156 | ps, ok := multiYML.Papertrail[site] 157 | if !ok { 158 | return nil, errMissingSiteConfig 159 | } 160 | 161 | isy := &InstanceSpecificYML{ 162 | Env: "linux", 163 | LinuxConfig: &instanceEnvConfig{ 164 | Host: "$INSTANCE_HOST_NAME", 165 | LogLevel: "info", 166 | Queue: fmt.Sprintf("builds.%s", queue), 167 | AMQP: amqp, 168 | VMs: &vmsConfig{ 169 | Provider: "docker", 170 | Count: count, 171 | }, 172 | Build: build, 173 | Docker: &dockerConfig{ 174 | PrivateKeyPath: "/home/deploy/.ssh/docker_rsa", 175 | }, 176 | Paranoid: true, 177 | SkipResolvUpdates: true, 178 | SkipEtcHostsFix: true, 179 | Librato: librato, 180 | LanguageMappings: map[string]string{ 181 | "clojure": "jvm", 182 | "elixir": "erlang", 183 | "groovy": "jvm", 184 | "java": "jvm", 185 | "scala": "jvm", 186 | }, 187 | CacheOptions: cache, 188 | Timeouts: &timeoutsConfig{ 189 | HardLimit: 7200, 190 | }, 191 | }, 192 | PapertrailSite: ps, 193 | } 194 | 195 | return isy, err 196 | } 197 | 198 | // GetInstanceYML attempts to look up the MetaYML 199 | // string as a compressed env var at both INSTANCE_YML and 200 | // PUDDING_INSTANCE_YML. 201 | func GetInstanceYML() string { 202 | for _, key := range []string{"INSTANCE_YML", "PUDDING_INSTANCE_YML"} { 203 | value, err := GetCompressedEnvVar(key) 204 | if err == nil { 205 | return value 206 | } 207 | } 208 | 209 | return "" 210 | } 211 | -------------------------------------------------------------------------------- /multi_error.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import "strings" 4 | 5 | // MultiError contains a slice of errors and implements the error 6 | // interface 7 | type MultiError struct { 8 | Errors []error 9 | } 10 | 11 | // Error provides a string that is the combination of all errors in 12 | // the internal error slice 13 | func (m *MultiError) Error() string { 14 | s := []string{} 15 | for _, err := range m.Errors { 16 | s = append(s, err.Error()) 17 | } 18 | 19 | return strings.Join(s, ", ") 20 | } 21 | -------------------------------------------------------------------------------- /notifier.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // Notifier is the interface fulfilled by things like the 4 | // SlackNotifier 5 | type Notifier interface { 6 | Notify(string, string) error 7 | } 8 | -------------------------------------------------------------------------------- /pudding_test.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import "testing" 4 | 5 | func TestNothing(t *testing.T) { 6 | if 1 != 1 { 7 | t.Fail() 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /script/Gemfile.sidekiq-web: -------------------------------------------------------------------------------- 1 | # vim:filetype=ruby 2 | source 'https://rubygems.org' 3 | 4 | gem 'puma' 5 | gem 'sidekiq' 6 | gem 'sinatra' 7 | -------------------------------------------------------------------------------- /script/Gemfile.sidekiq-web.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | celluloid (0.15.2) 5 | timers (~> 1.1.0) 6 | connection_pool (2.0.0) 7 | json (1.8.1) 8 | puma (2.8.2) 9 | rack (>= 1.1, < 2.0) 10 | rack (1.5.2) 11 | rack-protection (1.5.3) 12 | rack 13 | redis (3.1.0) 14 | redis-namespace (1.5.1) 15 | redis (~> 3.0, >= 3.0.4) 16 | sidekiq (3.2.6) 17 | celluloid (= 0.15.2) 18 | connection_pool (>= 2.0.0) 19 | json 20 | redis (>= 3.0.6) 21 | redis-namespace (>= 1.3.1) 22 | sinatra (1.4.5) 23 | rack (~> 1.4) 24 | rack-protection (~> 1.4) 25 | tilt (~> 1.3, >= 1.3.4) 26 | tilt (1.4.1) 27 | timers (1.1.0) 28 | 29 | PLATFORMS 30 | ruby 31 | 32 | DEPENDENCIES 33 | puma 34 | sidekiq 35 | sinatra 36 | -------------------------------------------------------------------------------- /script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd "$(dirname "$0")/.." 3 | : ${PORT:=3000} 4 | 5 | cmd="pudding-server" 6 | if [[ ! $DYNO ]] ; then 7 | exec rerun -p '**/*.{js,css,go}' "make && $cmd" 8 | fi 9 | exec $cmd 10 | -------------------------------------------------------------------------------- /script/sidekiq-web: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | ENV['BUNDLE_GEMFILE'] = File.expand_path('../Gemfile.sidekiq-web', __FILE__) 4 | 5 | require 'bundler/setup' 6 | require 'sinatra' 7 | require 'sidekiq' 8 | require 'sidekiq/web' 9 | 10 | Sidekiq.redis = { 11 | namespace: 'pudding', 12 | size: 30, 13 | url: ENV[ENV['REDIS_PROVIDER'] || 'REDIS_URL'] || 'redis://localhost:6379/0' 14 | } 15 | 16 | configure do 17 | use Rack::Auth::Basic do |username, password| 18 | ENV['SIDEKIQ_WEB_AUTH_BASIC'] && ( 19 | "#{username}:#{password}" == ENV['SIDEKIQ_WEB_AUTH_BASIC'] 20 | ) 21 | end if ENV['DYNO'] 22 | end 23 | 24 | class App < Sidekiq::Web 25 | run! 26 | end 27 | -------------------------------------------------------------------------------- /script/workers: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd "$(dirname "$0")/.." 3 | 4 | if [[ $PORT ]] ; then 5 | : ${PUDDING_WEB_HOSTNAME:=http://localhost:5000} 6 | fi 7 | 8 | export PUDDING_WEB_HOSTNAME 9 | 10 | cmd="pudding-workers" 11 | if [[ ! $DYNO ]] ; then 12 | exec rerun -p '**/*.{js,css,go}' "make && $cmd" 13 | fi 14 | exec $cmd 15 | -------------------------------------------------------------------------------- /sentry.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/Sirupsen/logrus" 7 | "github.com/getsentry/raven-go" 8 | ) 9 | 10 | var ( 11 | // SentryTags are the tags provided to each sentry client and are applied to 12 | // each packet sent to sentry 13 | SentryTags = map[string]string{ 14 | "level": "panic", 15 | "logger": "root", 16 | "dyno": os.Getenv("DYNO"), 17 | "hostname": os.Getenv("HOSTNAME"), 18 | "revision": RevisionString, 19 | "version": VersionString, 20 | } 21 | ) 22 | 23 | // SendRavenPacket encapsulates the raven packet send, plus logging 24 | // around errors and such 25 | func SendRavenPacket(packet *raven.Packet, cl *raven.Client, log *logrus.Logger, tags map[string]string) error { 26 | log.WithFields(logrus.Fields{ 27 | "packet": packet, 28 | }).Info("sending sentry packet") 29 | 30 | eventID, ch := cl.Capture(packet, tags) 31 | err := <-ch 32 | if err != nil { 33 | log.WithFields(logrus.Fields{ 34 | "event_id": eventID, 35 | "err": err, 36 | }).Error("problem sending sentry packet") 37 | } else { 38 | log.WithFields(logrus.Fields{ 39 | "event_id": eventID, 40 | }).Info("successfully sent sentry packet") 41 | } 42 | 43 | return err 44 | } 45 | -------------------------------------------------------------------------------- /server/auther.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "crypto/subtle" 5 | "encoding/base64" 6 | "net/http" 7 | "regexp" 8 | "strings" 9 | 10 | "github.com/Sirupsen/logrus" 11 | "github.com/garyburd/redigo/redis" 12 | "github.com/gorilla/feeds" 13 | "github.com/gorilla/mux" 14 | "github.com/travis-ci/pudding/db" 15 | ) 16 | 17 | const ( 18 | internalAuthHeader = "Pudding-Internal-Is-Authorized" 19 | ) 20 | 21 | var ( 22 | basicAuthValueRegexp = regexp.MustCompile("(?i:^basic[= ])") 23 | uuidPathRegexp = regexp.MustCompile("(?:instance-builds|instance-launches|instance-terminations|init-scripts)/(.*)") 24 | ) 25 | 26 | type serverAuther struct { 27 | Token string 28 | is db.InstanceBuildAuther 29 | log *logrus.Logger 30 | rt string 31 | } 32 | 33 | func newServerAuther(token string, r *redis.Pool, log *logrus.Logger) (*serverAuther, error) { 34 | sa := &serverAuther{ 35 | Token: token, 36 | log: log, 37 | rt: feeds.NewUUID().String(), 38 | } 39 | 40 | is, err := db.NewInitScripts(r, log) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | sa.is = is 46 | return sa, nil 47 | } 48 | 49 | func (sa *serverAuther) Authenticate(w http.ResponseWriter, req *http.Request) bool { 50 | vars := mux.Vars(req) 51 | 52 | sa.log.WithFields(logrus.Fields{ 53 | "path": req.URL.Path, 54 | "vars": vars, 55 | }).Debug("extracting instance build id if present") 56 | 57 | instanceBuildID, ok := vars["uuid"] 58 | if !ok { 59 | matches := uuidPathRegexp.FindStringSubmatch(req.URL.Path) 60 | if len(matches) > 1 { 61 | instanceBuildID = matches[1] 62 | } 63 | } 64 | 65 | authHeader := req.Header.Get("Authorization") 66 | sa.log.WithField("authorization", authHeader).Debug("raw authorization header") 67 | 68 | if authHeader != "" && (sa.hasValidTokenAuth(authHeader) || sa.hasValidInstanceBuildBasicAuth(authHeader, instanceBuildID)) { 69 | req.Header.Set(internalAuthHeader, sa.rt) 70 | sa.log.WithFields(logrus.Fields{ 71 | "request_id": req.Header.Get("X-Request-ID"), 72 | "instance_build_id": instanceBuildID, 73 | }).Debug("allowing authorized request yey") 74 | return true 75 | } 76 | 77 | if authHeader == "" { 78 | w.Header().Set("WWW-Authenticate", "token") 79 | sa.log.WithFields(logrus.Fields{ 80 | "request_id": req.Header.Get("X-Request-ID"), 81 | }).Debug("responding 401 due to empty Authorization header") 82 | http.Error(w, "NO", http.StatusUnauthorized) 83 | return false 84 | } 85 | 86 | http.Error(w, "NO", http.StatusForbidden) 87 | return false 88 | } 89 | 90 | func (sa *serverAuther) hasValidTokenAuth(authHeader string) bool { 91 | authHeaderBytes := []byte(authHeader) 92 | if subtle.ConstantTimeCompare(authHeaderBytes, []byte("token "+sa.Token)) == 0 || subtle.ConstantTimeCompare(authHeaderBytes, []byte("token="+sa.Token)) == 0 { 93 | sa.log.Debug("token auth matches yey") 94 | return true 95 | } 96 | 97 | sa.log.Debug("token auth does not match") 98 | return false 99 | } 100 | 101 | func (sa *serverAuther) hasValidInstanceBuildBasicAuth(authHeader, instanceBuildID string) bool { 102 | if !basicAuthValueRegexp.MatchString(authHeader) { 103 | return false 104 | } 105 | 106 | b64Auth := basicAuthValueRegexp.ReplaceAllString(authHeader, "") 107 | decoded, err := base64.StdEncoding.DecodeString(b64Auth) 108 | if err != nil { 109 | sa.log.WithField("err", err).Error("failed to base64 decade basic auth header") 110 | return false 111 | } 112 | 113 | authParts := strings.Split(string(decoded), ":") 114 | if len(authParts) != 2 { 115 | sa.log.Error("basic auth does not contain two parts") 116 | return false 117 | } 118 | 119 | sa.log.WithFields(logrus.Fields{ 120 | "basic_auth": authParts[1], 121 | "instance_build_id": instanceBuildID, 122 | }).Debug("checking basic auth against database") 123 | return sa.is.HasValidAuth(instanceBuildID, authParts[1]) 124 | } 125 | -------------------------------------------------------------------------------- /server/autoscaling_group_builds.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | 7 | "github.com/garyburd/redigo/redis" 8 | "github.com/travis-ci/pudding" 9 | "github.com/travis-ci/pudding/db" 10 | ) 11 | 12 | type autoscalingGroupBuilder struct { 13 | QueueName string 14 | r *redis.Pool 15 | } 16 | 17 | func newAutoscalingGroupBuilder(r *redis.Pool, queueName string) (*autoscalingGroupBuilder, error) { 18 | return &autoscalingGroupBuilder{ 19 | QueueName: queueName, 20 | 21 | r: r, 22 | }, nil 23 | } 24 | 25 | func (asgb *autoscalingGroupBuilder) Build(b *pudding.AutoscalingGroupBuild) (*pudding.AutoscalingGroupBuild, error) { 26 | conn := asgb.r.Get() 27 | defer func() { _ = conn.Close() }() 28 | 29 | buildPayload := &pudding.AutoscalingGroupBuildPayload{ 30 | Args: []*pudding.AutoscalingGroupBuild{b}, 31 | Queue: asgb.QueueName, 32 | JID: b.ID, 33 | Retry: false, 34 | EnqueuedAt: float64(time.Now().UTC().Unix()), 35 | } 36 | 37 | buildPayloadJSON, err := json.Marshal(buildPayload) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | err = db.EnqueueJob(conn, asgb.QueueName, string(buildPayloadJSON)) 43 | return b, err 44 | } 45 | -------------------------------------------------------------------------------- /server/config.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | // Config is everything needed to run the server 4 | type Config struct { 5 | Addr string 6 | AuthToken string 7 | Debug bool 8 | 9 | RedisURL string 10 | 11 | SlackHookPath string 12 | SlackUsername string 13 | SlackIcon string 14 | DefaultSlackChannel string 15 | 16 | SentryDSN string 17 | 18 | InstanceExpiry int 19 | ImageExpiry int 20 | 21 | QueueNames map[string]string 22 | } 23 | -------------------------------------------------------------------------------- /server/instance_builder.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | 7 | "github.com/garyburd/redigo/redis" 8 | "github.com/travis-ci/pudding" 9 | "github.com/travis-ci/pudding/db" 10 | ) 11 | 12 | type instanceBuilder struct { 13 | QueueName string 14 | r *redis.Pool 15 | } 16 | 17 | func newInstanceBuilder(r *redis.Pool, queueName string) (*instanceBuilder, error) { 18 | return &instanceBuilder{ 19 | QueueName: queueName, 20 | 21 | r: r, 22 | }, nil 23 | } 24 | 25 | func (ib *instanceBuilder) Build(b *pudding.InstanceBuild) (*pudding.InstanceBuild, error) { 26 | conn := ib.r.Get() 27 | defer func() { _ = conn.Close() }() 28 | 29 | buildPayload := &pudding.InstanceBuildPayload{ 30 | Args: []*pudding.InstanceBuild{b}, 31 | Queue: ib.QueueName, 32 | JID: b.ID, 33 | Retry: true, 34 | EnqueuedAt: float64(time.Now().UTC().Unix()), 35 | } 36 | 37 | buildPayloadJSON, err := json.Marshal(buildPayload) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | err = db.EnqueueJob(conn, ib.QueueName, string(buildPayloadJSON)) 43 | return b, err 44 | } 45 | -------------------------------------------------------------------------------- /server/instance_lifecycle_transition_handler.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | 7 | "github.com/garyburd/redigo/redis" 8 | "github.com/travis-ci/pudding" 9 | "github.com/travis-ci/pudding/db" 10 | ) 11 | 12 | type instanceLifecycleTransitionHandler struct { 13 | QueueName string 14 | r *redis.Pool 15 | } 16 | 17 | func newInstanceLifecycleTransitionHandler(r *redis.Pool, queueName string) (*instanceLifecycleTransitionHandler, error) { 18 | return &instanceLifecycleTransitionHandler{ 19 | QueueName: queueName, 20 | r: r, 21 | }, nil 22 | } 23 | 24 | func (th *instanceLifecycleTransitionHandler) Handle(t *pudding.InstanceLifecycleTransition) (*pudding.InstanceLifecycleTransition, error) { 25 | conn := th.r.Get() 26 | defer func() { _ = conn.Close() }() 27 | 28 | messagePayload := &pudding.InstanceLifecycleTransitionPayload{ 29 | Args: []*pudding.InstanceLifecycleTransition{t}, 30 | Queue: th.QueueName, 31 | JID: t.ID, 32 | Retry: true, 33 | EnqueuedAt: float64(time.Now().UTC().Unix()), 34 | } 35 | 36 | messagePayloadJSON, err := json.Marshal(messagePayload) 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | err = db.EnqueueJob(conn, th.QueueName, string(messagePayloadJSON)) 42 | return t, err 43 | } 44 | -------------------------------------------------------------------------------- /server/instance_terminator.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/garyburd/redigo/redis" 7 | "github.com/gorilla/feeds" 8 | "github.com/travis-ci/pudding" 9 | "github.com/travis-ci/pudding/db" 10 | ) 11 | 12 | type instanceTerminator struct { 13 | QueueName string 14 | r *redis.Pool 15 | } 16 | 17 | func newInstanceTerminator(r *redis.Pool, queueName string) (*instanceTerminator, error) { 18 | return &instanceTerminator{ 19 | QueueName: queueName, 20 | 21 | r: r, 22 | }, nil 23 | } 24 | 25 | func (it *instanceTerminator) Terminate(instanceID, slackChannel string) error { 26 | conn := it.r.Get() 27 | defer func() { _ = conn.Close() }() 28 | 29 | buildPayload := &pudding.InstanceTerminationPayload{ 30 | JID: feeds.NewUUID().String(), 31 | Retry: true, 32 | InstanceID: instanceID, 33 | SlackChannel: slackChannel, 34 | } 35 | 36 | buildPayloadJSON, err := json.Marshal(buildPayload) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | return db.EnqueueJob(conn, it.QueueName, string(buildPayloadJSON)) 42 | } 43 | -------------------------------------------------------------------------------- /server/jsonapi/jsonapi_test.go: -------------------------------------------------------------------------------- 1 | package jsonapi 2 | 3 | import "testing" 4 | 5 | func TestNothing(t *testing.T) { 6 | if 1 != 1 { 7 | t.Fail() 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /server/jsonapi/response.go: -------------------------------------------------------------------------------- 1 | package jsonapi 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | ) 8 | 9 | type errResponse struct { 10 | Errors []*jsonError `json:"errors"` 11 | } 12 | 13 | type jsonError struct { 14 | Details string `json:"details"` 15 | } 16 | 17 | func newErrResponse(errors []error) *errResponse { 18 | r := &errResponse{Errors: []*jsonError{}} 19 | for _, err := range errors { 20 | r.Errors = append(r.Errors, &jsonError{Details: err.Error()}) 21 | } 22 | 23 | return r 24 | } 25 | 26 | func setContentType(w http.ResponseWriter) { 27 | w.Header().Set("Content-Type", "application/vnd.api+json") 28 | } 29 | 30 | // Error takes a singular error and responds appropriately 31 | func Error(w http.ResponseWriter, err error, st int) { 32 | Errors(w, []error{err}, st) 33 | } 34 | 35 | // Errors takes an array of errors and responds appropriately 36 | func Errors(w http.ResponseWriter, errors []error, st int) { 37 | b, err := json.MarshalIndent(newErrResponse(errors), "", " ") 38 | if err != nil { 39 | http.Error(w, "BOOM", http.StatusInternalServerError) 40 | return 41 | } 42 | 43 | setContentType(w) 44 | w.WriteHeader(st) 45 | fmt.Fprintf(w, string(b)+"\n") 46 | } 47 | 48 | // Respond takes an arbitrary thing and `json.MarshalIndent`s it 49 | func Respond(w http.ResponseWriter, thing interface{}, st int) { 50 | b, err := json.MarshalIndent(thing, "", " ") 51 | if err != nil { 52 | Error(w, err, http.StatusInternalServerError) 53 | return 54 | } 55 | 56 | setContentType(w) 57 | w.WriteHeader(st) 58 | fmt.Fprintf(w, string(b)+"\n") 59 | } 60 | -------------------------------------------------------------------------------- /server/main.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "log" 4 | 5 | // Main is the whole shebang 6 | func Main(cfg *Config) { 7 | srv, err := newServer(cfg) 8 | 9 | if err != nil { 10 | log.Fatalf("BOOM: %q", err) 11 | } 12 | srv.Setup() 13 | srv.Run() 14 | } 15 | -------------------------------------------------------------------------------- /server/negroniraven/middleware.go: -------------------------------------------------------------------------------- 1 | package negroniraven 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net/http" 7 | 8 | "github.com/Sirupsen/logrus" 9 | "github.com/getsentry/raven-go" 10 | "github.com/travis-ci/pudding" 11 | ) 12 | 13 | type Middleware struct { 14 | cl *raven.Client 15 | log *logrus.Logger 16 | } 17 | 18 | func NewMiddleware(sentryDSN string) (*Middleware, error) { 19 | cl, err := raven.NewClient(sentryDSN, pudding.SentryTags) 20 | if err != nil { 21 | return nil, err 22 | } 23 | 24 | return &Middleware{cl: cl, log: logrus.New()}, nil 25 | } 26 | 27 | func (mw *Middleware) ServeHTTP(w http.ResponseWriter, req *http.Request, next http.HandlerFunc) { 28 | defer func() { 29 | var packet *raven.Packet 30 | 31 | p := recover() 32 | switch rval := p.(type) { 33 | case nil: 34 | return 35 | case error: 36 | packet = raven.NewPacket(rval.Error(), raven.NewException(rval, raven.NewStacktrace(2, 3, nil)), raven.NewHttp(req)) 37 | case *logrus.Entry: 38 | entryErrInterface, ok := rval.Data["err"] 39 | if !ok { 40 | entryErrInterface = fmt.Errorf(rval.Message) 41 | } 42 | 43 | entryErr, ok := entryErrInterface.(error) 44 | if !ok { 45 | entryErr = fmt.Errorf(rval.Message) 46 | } 47 | 48 | packet = raven.NewPacket(rval.Message, raven.NewException(entryErr, raven.NewStacktrace(2, 3, nil)), raven.NewHttp(req)) 49 | default: 50 | rvalStr := fmt.Sprint(rval) 51 | packet = raven.NewPacket(rvalStr, raven.NewException(errors.New(rvalStr), raven.NewStacktrace(2, 3, nil)), raven.NewHttp(req)) 52 | } 53 | 54 | pudding.SendRavenPacket(packet, mw.cl, mw.log, nil) 55 | panic(p) 56 | }() 57 | 58 | next(w, req) 59 | } 60 | -------------------------------------------------------------------------------- /server/negroniraven/negroniraven_test.go: -------------------------------------------------------------------------------- 1 | package negroniraven 2 | 3 | import "testing" 4 | 5 | func TestNothing(t *testing.T) { 6 | if 1 != 1 { 7 | t.Fail() 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | 8 | "github.com/Sirupsen/logrus" 9 | "github.com/braintree/manners" 10 | "github.com/codegangsta/negroni" 11 | "github.com/gorilla/feeds" 12 | "github.com/gorilla/mux" 13 | "github.com/meatballhat/expvarplus" 14 | "github.com/meatballhat/negroni-logrus" 15 | "github.com/phyber/negroni-gzip/gzip" 16 | "github.com/travis-ci/pudding" 17 | "github.com/travis-ci/pudding/db" 18 | "github.com/travis-ci/pudding/server/jsonapi" 19 | "github.com/travis-ci/pudding/server/negroniraven" 20 | ) 21 | 22 | var ( 23 | errMissingInstanceBuildID = fmt.Errorf("missing instance build id") 24 | errMissingInstanceID = fmt.Errorf("missing instance id") 25 | errKaboom = fmt.Errorf("simulated kaboom ʕノ•ᴥ•ʔノ ︵ ┻━┻") 26 | // errNotImplemented = fmt.Errorf("not implemented nope nope nope") 27 | errUnknownInstance = fmt.Errorf("unknown instance") 28 | ) 29 | 30 | const ( 31 | stateOutOfServiceMsg = "is out of service :arrow_down:" 32 | stateInServiceMsg = "is in service :arrow_up:" 33 | ) 34 | 35 | func init() { 36 | expvarplus.AddToEnvWhitelist("BUILDPACK_URL", 37 | "DEBUG", 38 | "DYNO", 39 | "GENERATED", 40 | "HOSTNAME", 41 | "PORT", 42 | "QUEUES", 43 | "REVISION", 44 | "VERSION", 45 | 46 | "PUDDING_DEFAULT_SLACK_CHANNEL", 47 | "PUDDING_INIT_SCRIPT_TEMPLATE", 48 | "PUDDING_INSTANCE_BUILDS_QUEUE_NAME", 49 | "PUDDING_INSTANCE_EXPIRY", 50 | "PUDDING_INSTANCE_RSA", 51 | "PUDDING_INSTANCE_TERMINATIONS_QUEUE_NAME", 52 | "PUDDING_INSTANCE_YML", 53 | "PUDDING_MINI_WORKER_INTERVAL", 54 | "PUDDING_PROCESS_ID", 55 | "PUDDING_REDIS_POOL_SIZE", 56 | "PUDDING_REDIS_URL", 57 | "PUDDING_SENTRY_DSN", 58 | "PUDDING_SLACK_TEAM", 59 | "PUDDING_TEMPORARY_INIT_EXPIRY", 60 | "PUDDING_WEB_HOSTNAME") 61 | } 62 | 63 | type server struct { 64 | addr, authToken, slackHookPath, slackUsername, slackIcon, slackChannel, sentryDSN string 65 | 66 | log *logrus.Logger 67 | builder *instanceBuilder 68 | asgBuilder *autoscalingGroupBuilder 69 | snsHandler *snsHandler 70 | iltHandler *instanceLifecycleTransitionHandler 71 | terminator *instanceTerminator 72 | auther *serverAuther 73 | is db.InitScriptGetterAuther 74 | i db.InstanceFetcherStorer 75 | img db.ImageFetcherStorer 76 | 77 | skipGracefulClose bool 78 | 79 | n *negroni.Negroni 80 | r *mux.Router 81 | s *manners.GracefulServer 82 | } 83 | 84 | func newServer(cfg *Config) (*server, error) { 85 | log := logrus.New() 86 | if cfg.Debug { 87 | log.Level = logrus.DebugLevel 88 | } 89 | 90 | r, err := db.BuildRedisPool(cfg.RedisURL) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | builder, err := newInstanceBuilder(r, cfg.QueueNames["instance-builds"]) 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | asgBuilder, err := newAutoscalingGroupBuilder(r, cfg.QueueNames["autoscaling-group-builds"]) 101 | if err != nil { 102 | return nil, err 103 | } 104 | 105 | snsHandler, err := newSNSHandler(r, cfg.QueueNames["sns-messages"]) 106 | if err != nil { 107 | return nil, err 108 | } 109 | 110 | iltHandler, err := newInstanceLifecycleTransitionHandler(r, cfg.QueueNames["instance-lifecycle-transitions"]) 111 | if err != nil { 112 | return nil, err 113 | } 114 | 115 | terminator, err := newInstanceTerminator(r, cfg.QueueNames["instance-terminations"]) 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | i, err := db.NewInstances(r, log, cfg.InstanceExpiry) 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | img, err := db.NewImages(r, log, cfg.ImageExpiry) 126 | if err != nil { 127 | return nil, err 128 | } 129 | 130 | is, err := db.NewInitScripts(r, log) 131 | if err != nil { 132 | return nil, err 133 | } 134 | 135 | auther, err := newServerAuther(cfg.AuthToken, r, log) 136 | if err != nil { 137 | return nil, err 138 | } 139 | 140 | srv := &server{ 141 | addr: cfg.Addr, 142 | authToken: cfg.AuthToken, 143 | auther: auther, 144 | 145 | slackHookPath: cfg.SlackHookPath, 146 | slackUsername: cfg.SlackUsername, 147 | slackIcon: cfg.SlackIcon, 148 | slackChannel: cfg.DefaultSlackChannel, 149 | 150 | sentryDSN: cfg.SentryDSN, 151 | 152 | builder: builder, 153 | asgBuilder: asgBuilder, 154 | snsHandler: snsHandler, 155 | iltHandler: iltHandler, 156 | terminator: terminator, 157 | is: is, 158 | i: i, 159 | img: img, 160 | log: log, 161 | 162 | skipGracefulClose: false, 163 | 164 | n: negroni.New(), 165 | r: mux.NewRouter(), 166 | } 167 | 168 | srv.s = manners.NewWithServer(&http.Server{ 169 | Addr: cfg.Addr, 170 | Handler: srv.n, 171 | }) 172 | 173 | return srv, nil 174 | } 175 | 176 | func (srv *server) Setup() { 177 | srv.setupRoutes() 178 | srv.setupMiddleware() 179 | } 180 | 181 | func (srv *server) Run() { 182 | srv.log.WithField("addr", srv.addr).Info("Listening") 183 | _ = srv.s.ListenAndServe() 184 | } 185 | 186 | func (srv *server) ServeHTTP(w http.ResponseWriter, req *http.Request) { 187 | srv.r.ServeHTTP(w, req) 188 | } 189 | 190 | func (srv *server) setupRoutes() { 191 | srv.r.HandleFunc(`/`, srv.handleGetRoot).Methods("GET").Name("ohai") 192 | srv.r.HandleFunc(`/`, srv.ifAuth(srv.handleDeleteRoot)).Methods("DELETE").Name("shutdown") 193 | srv.r.HandleFunc(`/debug/vars`, srv.ifAuth(expvarplus.HandleExpvars)).Methods("GET").Name("expvars") 194 | srv.r.HandleFunc(`/kaboom`, srv.ifAuth(srv.handleKaboom)).Methods("POST").Name("kaboom") 195 | 196 | srv.r.HandleFunc(`/autoscaling-group-builds`, srv.ifAuth(srv.handleAutoscalingGroupBuildsCreate)).Methods("POST").Name("autoscaling-group-builds-create") 197 | 198 | srv.r.HandleFunc(`/instances`, srv.ifAuth(srv.handleInstances)).Methods("GET").Name("instances") 199 | srv.r.HandleFunc(`/instances/{instance_id}`, srv.ifAuth(srv.handleInstanceByIDFetch)).Methods("GET").Name("instances-by-id") 200 | srv.r.HandleFunc(`/instances/{instance_id}`, srv.ifAuth(srv.handleInstanceByIDTerminate)).Methods("DELETE").Name("delete-instances-by-id") 201 | 202 | srv.r.HandleFunc(`/instance-builds`, srv.ifAuth(srv.handleInstanceBuildsCreate)).Methods("POST").Name("instance-builds-create") 203 | srv.r.HandleFunc(`/instance-builds/{uuid}`, srv.ifAuth(srv.handleInstanceBuildUpdateByID)).Methods("PATCH").Name("instance-builds-update-by-id") 204 | 205 | srv.r.HandleFunc(`/instance-launches/{uuid}`, srv.ifAuth(srv.handleInstanceLaunchesCreate)).Methods("POST").Name("instance-launches-create") 206 | 207 | srv.r.HandleFunc(`/instance-terminations/{uuid}`, srv.ifAuth(srv.handleInstanceTerminationsCreate)).Methods("POST").Name("instance-terminations-create") 208 | 209 | srv.r.HandleFunc(`/instance-heartbeats/{uuid}`, srv.ifAuth(srv.handleInstanceHeartbeat)).Methods("POST").Name("instance-heartbeats") 210 | 211 | srv.r.HandleFunc(`/init-scripts/{uuid}`, srv.ifAuth(srv.handleInitScripts)).Methods("GET").Name("init-scripts") 212 | 213 | srv.r.HandleFunc(`/sns-messages`, srv.handleSNSMessages).Name("sns-messages") 214 | 215 | srv.r.HandleFunc(`/images`, srv.ifAuth(srv.handleImages)).Methods("GET").Name("images") 216 | } 217 | 218 | func (srv *server) setupMiddleware() { 219 | srv.n.Use(negroni.NewRecovery()) 220 | srv.n.Use(negronilogrus.NewMiddleware()) 221 | srv.n.Use(gzip.Gzip(gzip.DefaultCompression)) 222 | nr, err := negroniraven.NewMiddleware(srv.sentryDSN) 223 | if err != nil { 224 | panic(err) 225 | } 226 | srv.n.Use(nr) 227 | srv.n.UseHandler(srv.r) 228 | } 229 | 230 | func (srv *server) ifAuth(f func(http.ResponseWriter, *http.Request)) func(http.ResponseWriter, *http.Request) { 231 | return func(w http.ResponseWriter, req *http.Request) { 232 | if !srv.auther.Authenticate(w, req) { 233 | return 234 | } 235 | 236 | f(w, req) 237 | } 238 | } 239 | func (srv *server) handleGetRoot(w http.ResponseWriter, req *http.Request) { 240 | w.Header().Set("Content-Type", "text-plain; charset=utf-8") 241 | w.WriteHeader(http.StatusOK) 242 | fmt.Fprintf(w, "ohai\n") 243 | } 244 | 245 | func (srv *server) handleDeleteRoot(w http.ResponseWriter, req *http.Request) { 246 | w.WriteHeader(http.StatusNoContent) 247 | if !srv.skipGracefulClose { 248 | srv.s.Close() 249 | } 250 | } 251 | 252 | func (srv *server) handleKaboom(w http.ResponseWriter, req *http.Request) { 253 | panic(errKaboom) 254 | } 255 | 256 | func (srv *server) handleInstances(w http.ResponseWriter, req *http.Request) { 257 | f := map[string]string{} 258 | for _, qv := range []string{"env", "site", "role", "queue"} { 259 | v := req.FormValue(qv) 260 | if v != "" { 261 | f[qv] = v 262 | } 263 | } 264 | 265 | instances, err := srv.i.Fetch(f) 266 | if err != nil { 267 | jsonapi.Error(w, err, http.StatusInternalServerError) 268 | return 269 | } 270 | 271 | jsonapi.Respond(w, map[string][]*pudding.Instance{ 272 | "instances": instances, 273 | }, http.StatusOK) 274 | } 275 | 276 | func (srv *server) handleInstanceByIDFetch(w http.ResponseWriter, req *http.Request) { 277 | vars := mux.Vars(req) 278 | instances, err := srv.i.Fetch(map[string]string{"instance_id": vars["instance_id"]}) 279 | if err != nil { 280 | srv.log.WithFields(logrus.Fields{ 281 | "err": err, 282 | "instance_id": vars["instance_id"], 283 | }).Error("failed to fetch instance") 284 | jsonapi.Error(w, err, http.StatusInternalServerError) 285 | return 286 | } 287 | 288 | jsonapi.Respond(w, map[string][]*pudding.Instance{ 289 | "instances": instances, 290 | }, http.StatusOK) 291 | } 292 | 293 | func (srv *server) handleInstanceByIDTerminate(w http.ResponseWriter, req *http.Request) { 294 | vars := mux.Vars(req) 295 | instanceID, ok := vars["instance_id"] 296 | if !ok { 297 | jsonapi.Error(w, errMissingInstanceID, http.StatusBadRequest) 298 | return 299 | } 300 | 301 | err := srv.terminator.Terminate(instanceID, req.FormValue("slack-channel")) 302 | if err != nil { 303 | jsonapi.Error(w, err, http.StatusInternalServerError) 304 | return 305 | } 306 | 307 | jsonapi.Respond(w, map[string]string{"ok": "working on that"}, http.StatusAccepted) 308 | } 309 | 310 | func (srv *server) handleInstanceBuildsCreate(w http.ResponseWriter, req *http.Request) { 311 | payload := &pudding.InstanceBuildsCollectionSingular{ 312 | InstanceBuilds: pudding.NewInstanceBuild(), 313 | } 314 | err := json.NewDecoder(req.Body).Decode(payload) 315 | if err != nil { 316 | jsonapi.Error(w, err, http.StatusBadRequest) 317 | return 318 | } 319 | 320 | build := payload.InstanceBuilds 321 | if build.ID == "" { 322 | build.ID = feeds.NewUUID().String() 323 | } 324 | 325 | if build.State == "" { 326 | build.State = "pending" 327 | } 328 | 329 | if v := req.FormValue("slack-channel"); v != "" { 330 | build.SlackChannel = v 331 | } 332 | 333 | if build.SlackChannel == "" { 334 | build.SlackChannel = srv.slackChannel 335 | } 336 | 337 | validationErrors := build.Validate() 338 | if len(validationErrors) > 0 { 339 | jsonapi.Errors(w, validationErrors, http.StatusBadRequest) 340 | return 341 | } 342 | 343 | build, err = srv.builder.Build(build) 344 | if err != nil { 345 | jsonapi.Error(w, err, http.StatusInternalServerError) 346 | return 347 | } 348 | 349 | jsonapi.Respond(w, &pudding.InstanceBuildsCollection{ 350 | InstanceBuilds: []*pudding.InstanceBuild{build}, 351 | }, http.StatusAccepted) 352 | } 353 | 354 | func (srv *server) handleInstanceHeartbeat(w http.ResponseWriter, req *http.Request) { 355 | instanceID := req.FormValue("instance-id") 356 | if instanceID == "" { 357 | jsonapi.Error(w, errMissingInstanceID, http.StatusBadRequest) 358 | return 359 | } 360 | 361 | instances, err := srv.i.Fetch(map[string]string{"instance_id": instanceID}) 362 | if err != nil { 363 | jsonapi.Error(w, err, http.StatusInternalServerError) 364 | return 365 | } 366 | 367 | if len(instances) < 1 { 368 | jsonapi.Error(w, errUnknownInstance, http.StatusNotFound) 369 | return 370 | } 371 | 372 | instance := instances[0] 373 | 374 | if instance.ExpectedState == "" { 375 | instance.ExpectedState = "up" 376 | } 377 | 378 | // XXX: the response format isn't really jsonapi, but I don't want the remote to have specific knowledge of 379 | // "instances" (grumble) 380 | jsonapi.Respond(w, instance, http.StatusOK) 381 | } 382 | 383 | func (srv *server) handleInstanceBuildUpdateByID(w http.ResponseWriter, req *http.Request) { 384 | vars := mux.Vars(req) 385 | instanceBuildID, ok := vars["uuid"] 386 | if !ok { 387 | jsonapi.Error(w, errMissingInstanceBuildID, http.StatusBadRequest) 388 | return 389 | } 390 | 391 | state := req.FormValue("state") 392 | if state != "finished" { 393 | srv.log.WithField("state", state).Debug("no-op state") 394 | jsonapi.Respond(w, map[string]string{"no": "op"}, http.StatusOK) 395 | return 396 | } 397 | 398 | slackChannel := req.FormValue("slack-channel") 399 | if slackChannel == "" { 400 | slackChannel = srv.slackChannel 401 | } 402 | 403 | instanceID := req.FormValue("instance-id") 404 | instances, err := srv.i.Fetch(map[string]string{"instance_id": instanceID}) 405 | if err != nil { 406 | srv.log.WithFields(logrus.Fields{ 407 | "err": err, 408 | "instance_id": instanceID, 409 | }).Error("failed to fetch instance details") 410 | jsonapi.Error(w, errUnknownInstance, http.StatusNotFound) 411 | return 412 | } 413 | 414 | // FIXME: extract this bit for other notification types? 415 | if srv.slackHookPath != "" && slackChannel != "" && len(instances) > 0 { 416 | srv.log.Debug("sending slack notification!") 417 | inst := instances[0] 418 | 419 | notifier := pudding.NewSlackNotifier(srv.slackHookPath, srv.slackUsername, srv.slackIcon) 420 | err := notifier.Notify(slackChannel, 421 | fmt.Sprintf("Finished starting instance `%s` for instance build *%s* %s", 422 | instanceID, instanceBuildID, pudding.NotificationInstanceSummary(inst))) 423 | if err != nil { 424 | srv.log.WithField("err", err).Error("failed to send slack notification") 425 | } 426 | } else { 427 | srv.log.WithFields(logrus.Fields{ 428 | "slack_hook_path": srv.slackHookPath, 429 | }).Debug("slack fields empty or no matching instances?") 430 | } 431 | 432 | jsonapi.Respond(w, map[string]string{"sure": "why not"}, http.StatusOK) 433 | } 434 | 435 | func (srv *server) handleAutoscalingGroupBuildsCreate(w http.ResponseWriter, req *http.Request) { 436 | payload := &pudding.AutoscalingGroupBuildsCollectionSingular{} 437 | err := json.NewDecoder(req.Body).Decode(payload) 438 | if err != nil { 439 | jsonapi.Error(w, err, http.StatusBadRequest) 440 | return 441 | } 442 | 443 | build := payload.AutoscalingGroupBuilds 444 | if build.ID == "" { 445 | build.ID = feeds.NewUUID().String() 446 | } 447 | 448 | if v := req.FormValue("slack-channel"); v != "" { 449 | build.SlackChannel = v 450 | } 451 | 452 | if build.SlackChannel == "" { 453 | build.SlackChannel = srv.slackChannel 454 | } 455 | 456 | validationErrors := build.Validate() 457 | if len(validationErrors) > 0 { 458 | jsonapi.Errors(w, validationErrors, http.StatusBadRequest) 459 | return 460 | } 461 | 462 | build, err = srv.asgBuilder.Build(build) 463 | if err != nil { 464 | jsonapi.Error(w, err, http.StatusInternalServerError) 465 | return 466 | } 467 | 468 | jsonapi.Respond(w, &pudding.AutoscalingGroupBuildsCollection{ 469 | AutoscalingGroupBuilds: []*pudding.AutoscalingGroupBuild{build}, 470 | }, http.StatusAccepted) 471 | } 472 | 473 | func (srv *server) handleInstanceLaunchesCreate(w http.ResponseWriter, req *http.Request) { 474 | srv.handleInstanceLifecycleTransition("launching", w, req) 475 | } 476 | 477 | func (srv *server) handleInstanceTerminationsCreate(w http.ResponseWriter, req *http.Request) { 478 | srv.handleInstanceLifecycleTransition("terminating", w, req) 479 | } 480 | 481 | func (srv *server) handleInstanceLifecycleTransition(transition string, w http.ResponseWriter, req *http.Request) { 482 | t := &pudding.InstanceLifecycleTransition{} 483 | 484 | err := json.NewDecoder(req.Body).Decode(t) 485 | if err != nil { 486 | jsonapi.Error(w, err, http.StatusBadRequest) 487 | return 488 | } 489 | 490 | t.Transition = transition 491 | t.ID = feeds.NewUUID().String() 492 | 493 | _, err = srv.iltHandler.Handle(t) 494 | if err != nil { 495 | jsonapi.Error(w, err, http.StatusInternalServerError) 496 | return 497 | } 498 | 499 | slackChannel := req.FormValue("slack-channel") 500 | if slackChannel == "" { 501 | slackChannel = srv.slackChannel 502 | } 503 | 504 | instances, _ := srv.i.Fetch(map[string]string{"instance_id": t.InstanceID}) 505 | 506 | if srv.slackHookPath != "" && slackChannel != "" && instances != nil && len(instances) > 0 { 507 | srv.log.Debug("sending slack notification!") 508 | inst := instances[0] 509 | notifier := pudding.NewSlackNotifier(srv.slackHookPath, srv.slackUsername, srv.slackIcon) 510 | stateMsg := "" 511 | switch transition { 512 | case "terminating": 513 | stateMsg = stateOutOfServiceMsg 514 | case "launching": 515 | stateMsg = stateInServiceMsg 516 | } 517 | if stateMsg != "" { 518 | err := notifier.Notify(slackChannel, fmt.Sprintf("Instance `%s` is %s %s", 519 | t.InstanceID, stateMsg, pudding.NotificationInstanceSummary(inst))) 520 | if err != nil { 521 | srv.log.WithField("err", err).Error("failed to send slack notification") 522 | } 523 | } 524 | } 525 | 526 | jsonapi.Respond(w, map[string]string{"yay": t.InstanceID}, http.StatusOK) 527 | } 528 | 529 | func (srv *server) handleInitScripts(w http.ResponseWriter, req *http.Request) { 530 | vars := mux.Vars(req) 531 | instanceBuildID, ok := vars["uuid"] 532 | if !ok { 533 | jsonapi.Error(w, errMissingInstanceBuildID, http.StatusBadRequest) 534 | return 535 | } 536 | 537 | srv.sendInitScript(w, instanceBuildID) 538 | } 539 | 540 | func (srv *server) sendInitScript(w http.ResponseWriter, ID string) { 541 | script, err := srv.is.Get(ID) 542 | if err != nil { 543 | srv.log.WithFields(logrus.Fields{ 544 | "err": err, 545 | "id": ID, 546 | }).Error("failed to get init script") 547 | jsonapi.Error(w, err, http.StatusInternalServerError) 548 | return 549 | } 550 | 551 | w.Header().Set("Content-Type", "text/x-shellscript; charset=utf-8") 552 | w.WriteHeader(http.StatusOK) 553 | fmt.Fprintf(w, script) 554 | } 555 | 556 | func (srv *server) handleSNSMessages(w http.ResponseWriter, req *http.Request) { 557 | msg := pudding.NewSNSMessage() 558 | 559 | err := json.NewDecoder(req.Body).Decode(&msg) 560 | if err != nil { 561 | jsonapi.Error(w, err, http.StatusBadRequest) 562 | return 563 | } 564 | 565 | _, err = srv.snsHandler.Handle(msg) 566 | if err != nil { 567 | jsonapi.Error(w, err, http.StatusInternalServerError) 568 | return 569 | } 570 | 571 | jsonapi.Respond(w, map[string][]*pudding.SNSMessage{ 572 | "sns_messages": []*pudding.SNSMessage{msg}, 573 | }, http.StatusOK) 574 | } 575 | 576 | func (srv *server) handleImages(w http.ResponseWriter, req *http.Request) { 577 | f := map[string]string{} 578 | for _, qv := range []string{"active", "role"} { 579 | v := req.FormValue(qv) 580 | if v != "" { 581 | f[qv] = v 582 | } 583 | } 584 | 585 | images, err := srv.img.Fetch(f) 586 | if err != nil { 587 | jsonapi.Error(w, err, http.StatusInternalServerError) 588 | return 589 | } 590 | 591 | jsonapi.Respond(w, map[string][]*pudding.Image{ 592 | "images": images, 593 | }, http.StatusOK) 594 | } 595 | -------------------------------------------------------------------------------- /server/server_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "net/http/httptest" 10 | "net/url" 11 | "os" 12 | "regexp" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/garyburd/redigo/redis" 17 | "github.com/goamz/goamz/ec2" 18 | "github.com/travis-ci/pudding" 19 | "github.com/travis-ci/pudding/db" 20 | ) 21 | 22 | var ( 23 | defaultTestAuthToken = "swordfish" 24 | defaultTestInstanceID = "i-abcd123" 25 | defaultTestInstanceBuildUUID = "abcd1234-abcd-abcd-abcd-abcd12345678" 26 | defaultTestInstanceBuildAuth = "swordfish-9000" 27 | ) 28 | 29 | func init() { 30 | pudding.RedisNamespace = "pudding-test" 31 | } 32 | 33 | func buildTestConfig() *Config { 34 | return &Config{ 35 | Addr: ":17321", 36 | AuthToken: defaultTestAuthToken, 37 | Debug: true, 38 | RedisURL: func() string { 39 | v := os.Getenv("REDIS_URL") 40 | if v == "" { 41 | v = "redis://localhost:6379/0" 42 | } 43 | return v 44 | }(), 45 | } 46 | } 47 | 48 | func collapsedJSON(s string) string { 49 | out := []string{} 50 | for _, part := range strings.Split(s, "\n") { 51 | for _, subpart := range strings.Split(part, " ") { 52 | out = append(out, strings.TrimSpace(subpart)) 53 | } 54 | } 55 | return strings.Join(out, "") 56 | } 57 | 58 | func ensureExampleDataPresent(redisURL string) { 59 | u, err := url.Parse(redisURL) 60 | if err != nil { 61 | panic(err) 62 | } 63 | 64 | conn, err := redis.Dial("tcp", u.Host) 65 | if err != nil { 66 | panic(err) 67 | } 68 | 69 | err = db.StoreInstances(conn, map[string]ec2.Instance{ 70 | defaultTestInstanceID: ec2.Instance{ 71 | InstanceId: defaultTestInstanceID, 72 | InstanceType: "c3.2xlarge", 73 | ImageId: "ami-abcd123", 74 | IPAddress: "", 75 | PrivateIPAddress: "10.0.0.1", 76 | LaunchTime: "1955-11-05T21:30:19+0800", 77 | }, 78 | }, 300) 79 | if err != nil { 80 | panic(err) 81 | } 82 | 83 | err = conn.Send("HSET", fmt.Sprintf("%s:auths", pudding.RedisNamespace), defaultTestInstanceBuildUUID, defaultTestInstanceBuildAuth) 84 | if err != nil { 85 | panic(err) 86 | } 87 | } 88 | 89 | func buildTestServer(cfg *Config) *server { 90 | if cfg == nil { 91 | cfg = buildTestConfig() 92 | } 93 | 94 | srv, err := newServer(cfg) 95 | if err != nil { 96 | panic(err) 97 | } 98 | 99 | srv.Setup() 100 | srv.skipGracefulClose = true 101 | 102 | ensureExampleDataPresent(cfg.RedisURL) 103 | return srv 104 | } 105 | 106 | func makeRequest(method, path string, body io.Reader) *httptest.ResponseRecorder { 107 | return makeRequestWithHeaders(method, path, body, map[string]string{}) 108 | } 109 | 110 | func makeAuthenticatedRequest(method, path string, body io.Reader) *httptest.ResponseRecorder { 111 | return makeRequestWithHeaders(method, path, body, 112 | map[string]string{"Authorization": fmt.Sprintf("token %s", defaultTestAuthToken)}) 113 | } 114 | 115 | func makeRequestWithHeaders(method, path string, body io.Reader, headers map[string]string) *httptest.ResponseRecorder { 116 | srv := buildTestServer(nil) 117 | 118 | if body == nil { 119 | body = bytes.NewReader([]byte("")) 120 | } 121 | req, err := http.NewRequest(method, fmt.Sprintf("http://example.com%s", path), body) 122 | if err != nil { 123 | panic(err) 124 | } 125 | 126 | for key, value := range headers { 127 | req.Header.Set(key, value) 128 | } 129 | 130 | w := httptest.NewRecorder() 131 | srv.ServeHTTP(w, req) 132 | 133 | return w 134 | } 135 | 136 | func makeTestAutoscalingGroupBuildRequest() io.Reader { 137 | return strings.NewReader(`{ 138 | "autoscaling_group_builds": { 139 | "site": "com", 140 | "env": "prod", 141 | "queue": "fancy", 142 | "role": "worky", 143 | "instance_id": "i-abcd123", 144 | "role_arn": "arn:aws:iam::1234567899:role/pudding-test-foo", 145 | "topic_arn": "arn:aws:sns:us-east-1::1234567899:pudding-test-foo", 146 | "min_size": 1, 147 | "max_size": 10, 148 | "desired_capacity": 1, 149 | "default_cooldown": 1200, 150 | "instance_type": "c3.4xlarge" 151 | } 152 | }`) 153 | } 154 | 155 | func makeTestInstanceBuildsRequest() io.Reader { 156 | return strings.NewReader(`{ 157 | "instance_builds": { 158 | "count": 1, 159 | "site": "org", 160 | "env": "test", 161 | "queue": "docker", 162 | "role": "worker", 163 | "instance_type": "c3.4xlarge", 164 | "boot_instance": true 165 | } 166 | }`) 167 | } 168 | 169 | func makeTestInstanceLaunchesRequest() io.Reader { 170 | return strings.NewReader(fmt.Sprintf(`{ 171 | "id": "whatever", 172 | "instance_id": "%s", 173 | "transition": "launching" 174 | }`, defaultTestInstanceID)) 175 | } 176 | 177 | func makeTestInstanceTerminationsRequest() io.Reader { 178 | return strings.NewReader(fmt.Sprintf(`{ 179 | "id": "whatever", 180 | "instance_id": "%s", 181 | "transition": "terminating" 182 | }`, defaultTestInstanceID)) 183 | } 184 | 185 | func assertStatus(t *testing.T, expected, actual int) { 186 | if actual != expected { 187 | t.Errorf("response status %v != %v", actual, expected) 188 | } 189 | } 190 | 191 | func assertBody(t *testing.T, expected, actual string) { 192 | if actual != expected { 193 | t.Errorf("response body %q != %q", actual, expected) 194 | } 195 | } 196 | 197 | func assertBodyMatches(t *testing.T, expected, actual string) { 198 | re := regexp.MustCompile(expected) 199 | if !re.MatchString(actual) { 200 | t.Errorf("response body %q !~ %q", expected, actual) 201 | } 202 | } 203 | 204 | func assertNotBody(t *testing.T, notExpected, actual string) { 205 | if actual == notExpected { 206 | t.Errorf("response body %q == %q", actual, notExpected) 207 | } 208 | } 209 | 210 | func TestGetOhai(t *testing.T) { 211 | w := makeRequest("GET", "/", nil) 212 | assertStatus(t, 200, w.Code) 213 | assertBody(t, "ohai\n", w.Body.String()) 214 | } 215 | 216 | func TestShutdown(t *testing.T) { 217 | w := makeAuthenticatedRequest("DELETE", "/", nil) 218 | assertStatus(t, 204, w.Code) 219 | } 220 | 221 | func TestExpvars(t *testing.T) { 222 | w := makeAuthenticatedRequest("GET", "/debug/vars", nil) 223 | assertStatus(t, 200, w.Code) 224 | } 225 | 226 | func TestKaboom(t *testing.T) { 227 | defer func() { 228 | if recover() == nil { 229 | t.Fatalf("kaboom did not panic") 230 | } 231 | }() 232 | makeAuthenticatedRequest("POST", "/kaboom", nil) 233 | } 234 | 235 | func TestCreateAutoscalingGroupBuild(t *testing.T) { 236 | w := makeAuthenticatedRequest("POST", "/autoscaling-group-builds", nil) 237 | assertStatus(t, 400, w.Code) 238 | 239 | w = makeAuthenticatedRequest("POST", "/autoscaling-group-builds", makeTestAutoscalingGroupBuildRequest()) 240 | assertStatus(t, 202, w.Code) 241 | } 242 | 243 | func TestGetInstances(t *testing.T) { 244 | w := makeAuthenticatedRequest("GET", "/instances", nil) 245 | assertStatus(t, 200, w.Code) 246 | assertNotBody(t, `{"instances":[]}`, collapsedJSON(w.Body.String())) 247 | } 248 | 249 | func TestGetInstanceByID(t *testing.T) { 250 | w := makeAuthenticatedRequest("GET", "/instances/i-bogus123", nil) 251 | assertStatus(t, 200, w.Code) 252 | assertBody(t, `{"instances":[]}`, collapsedJSON(w.Body.String())) 253 | 254 | w = makeAuthenticatedRequest("GET", fmt.Sprintf("/instances/%s", defaultTestInstanceID), nil) 255 | assertStatus(t, 200, w.Code) 256 | assertNotBody(t, `{"instances":[]}`, collapsedJSON(w.Body.String())) 257 | } 258 | 259 | func TestDeleteInstanceByID(t *testing.T) { 260 | w := makeAuthenticatedRequest("DELETE", "/instances/i-bogus123", nil) 261 | assertStatus(t, 202, w.Code) 262 | assertBody(t, `{"ok":"workingonthat"}`, collapsedJSON(w.Body.String())) 263 | 264 | w = makeAuthenticatedRequest("DELETE", fmt.Sprintf("/instances/%s", defaultTestInstanceID), nil) 265 | assertStatus(t, 202, w.Code) 266 | assertBody(t, `{"ok":"workingonthat"}`, collapsedJSON(w.Body.String())) 267 | } 268 | 269 | func TestInstanceBuildsCreate(t *testing.T) { 270 | w := makeAuthenticatedRequest("POST", "/instance-builds", nil) 271 | assertStatus(t, 400, w.Code) 272 | 273 | w = makeAuthenticatedRequest("POST", "/instance-builds", makeTestInstanceBuildsRequest()) 274 | assertStatus(t, 202, w.Code) 275 | assertBodyMatches(t, `^{"instance_builds":\[{"role":"worker","site":"org","env":"test","ami":"",`+ 276 | `"instance_type":"c3.4xlarge","slack_channel":"","count":1,"queue":"docker",`+ 277 | `"state":"pending","id":"[^"]{36}","boot_instance":true}\]}$`, collapsedJSON(w.Body.String())) 278 | } 279 | 280 | func TestInstancebuildsUpdate(t *testing.T) { 281 | w := makeAuthenticatedRequest("POST", "/instance-builds", makeTestInstanceBuildsRequest()) 282 | assertStatus(t, 202, w.Code) 283 | body := w.Body.String() 284 | assertBodyMatches(t, `^{"instance_builds":\[{"role":"worker","site":"org","env":"test","ami":"",`+ 285 | `"instance_type":"c3.4xlarge","slack_channel":"","count":1,"queue":"docker",`+ 286 | `"state":"pending","id":"[^"]{36}","boot_instance":true}\]}$`, collapsedJSON(body)) 287 | 288 | bodyMap := map[string][]map[string]interface{}{} 289 | err := json.Unmarshal([]byte(body), &bodyMap) 290 | if err != nil { 291 | t.Error(err) 292 | } 293 | 294 | fmt.Fprintf(os.Stderr, "%#v\n", bodyMap) 295 | id := bodyMap["instance_builds"][0]["id"].(string) 296 | 297 | basePath := fmt.Sprintf("/instance-builds/%s?instance_id=%s&state=", id, defaultTestInstanceID) 298 | w = makeAuthenticatedRequest("PATCH", basePath+"started", nil) 299 | assertStatus(t, 200, w.Code) 300 | assertBody(t, `{"no":"op"}`, collapsedJSON(w.Body.String())) 301 | 302 | w = makeAuthenticatedRequest("PATCH", basePath+"almost-there", nil) 303 | assertStatus(t, 200, w.Code) 304 | assertBody(t, `{"no":"op"}`, collapsedJSON(w.Body.String())) 305 | 306 | w = makeAuthenticatedRequest("PATCH", basePath+"finished", nil) 307 | assertStatus(t, 200, w.Code) 308 | assertBody(t, `{"sure":"whynot"}`, collapsedJSON(w.Body.String())) 309 | } 310 | 311 | func TestInstanceLaunchesCreate(t *testing.T) { 312 | w := makeAuthenticatedRequest("POST", fmt.Sprintf("/instance-launches/%s", defaultTestInstanceBuildUUID), strings.NewReader("{")) 313 | assertStatus(t, 400, w.Code) 314 | 315 | w = makeAuthenticatedRequest("POST", fmt.Sprintf("/instance-launches/%s", defaultTestInstanceBuildUUID), 316 | makeTestInstanceLaunchesRequest()) 317 | assertStatus(t, 200, w.Code) 318 | assertBody(t, fmt.Sprintf(`{"yay":"%s"}`, defaultTestInstanceID), collapsedJSON(w.Body.String())) 319 | } 320 | 321 | func TestInstanceTerminationsCreate(t *testing.T) { 322 | w := makeAuthenticatedRequest("POST", fmt.Sprintf("/instance-terminations/%s", defaultTestInstanceBuildUUID), strings.NewReader("{")) 323 | assertStatus(t, 400, w.Code) 324 | 325 | w = makeAuthenticatedRequest("POST", fmt.Sprintf("/instance-terminations/%s", defaultTestInstanceBuildUUID), 326 | makeTestInstanceTerminationsRequest()) 327 | assertStatus(t, 200, w.Code) 328 | assertBody(t, fmt.Sprintf(`{"yay":"%s"}`, defaultTestInstanceID), collapsedJSON(w.Body.String())) 329 | } 330 | -------------------------------------------------------------------------------- /server/sns_handler.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | 7 | "github.com/garyburd/redigo/redis" 8 | "github.com/travis-ci/pudding" 9 | "github.com/travis-ci/pudding/db" 10 | ) 11 | 12 | type snsHandler struct { 13 | QueueName string 14 | r *redis.Pool 15 | } 16 | 17 | func newSNSHandler(r *redis.Pool, queueName string) (*snsHandler, error) { 18 | return &snsHandler{ 19 | QueueName: queueName, 20 | r: r, 21 | }, nil 22 | } 23 | 24 | func (sh *snsHandler) Handle(msg *pudding.SNSMessage) (*pudding.SNSMessage, error) { 25 | conn := sh.r.Get() 26 | defer func() { _ = conn.Close() }() 27 | 28 | messagePayload := &pudding.SNSMessagePayload{ 29 | Args: []*pudding.SNSMessage{msg}, 30 | Queue: sh.QueueName, 31 | JID: msg.MessageID, 32 | Retry: true, 33 | EnqueuedAt: float64(time.Now().UTC().Unix()), 34 | } 35 | 36 | messagePayloadJSON, err := json.Marshal(messagePayload) 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | err = db.EnqueueJob(conn, sh.QueueName, string(messagePayloadJSON)) 42 | return msg, err 43 | } 44 | -------------------------------------------------------------------------------- /slack_notifier.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "strings" 9 | ) 10 | 11 | var ( 12 | errBadSlackResponse = fmt.Errorf("received a response status > 299 from slack") 13 | ) 14 | 15 | const ( 16 | instSummaryFmt = "_(site=*%s* env=*%s* queue=*%s* role=*%s*)_" 17 | ) 18 | 19 | // SlackNotifier notifies on slack omgeeeee! ☃ 20 | type SlackNotifier struct { 21 | hookPath, username, icon string 22 | } 23 | 24 | // NewSlackNotifier creates a new *SlackNotifier given a team and 25 | // token 26 | func NewSlackNotifier(hookPath, username, icon string) *SlackNotifier { 27 | return &SlackNotifier{ 28 | hookPath: hookPath, 29 | username: username, 30 | icon: func() string { 31 | if icon == "" { 32 | return ":travis:" 33 | } 34 | return icon 35 | }(), 36 | } 37 | } 38 | 39 | // Notify sends a notification message (msg) to the given channel, 40 | // which may or may not begin with `#` 41 | func (sn *SlackNotifier) Notify(channel, msg string) error { 42 | if !strings.HasPrefix("#", channel) { 43 | channel = fmt.Sprintf("#%s", channel) 44 | } 45 | 46 | bodyMap := map[string]string{ 47 | "text": msg, 48 | "channel": channel, 49 | "username": sn.username, 50 | "icon_emoji": sn.icon, 51 | } 52 | 53 | b, err := json.Marshal(bodyMap) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | u := fmt.Sprintf("https://hooks.slack.com/services/%s", sn.hookPath) 59 | resp, err := http.Post(u, "application/x-www-form-urlencoded", bytes.NewReader(b)) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | if resp.StatusCode > 299 { 65 | return errBadSlackResponse 66 | } 67 | 68 | return nil 69 | } 70 | 71 | // NotificationInstanceSummary returns either an empty string or a summary of relevant bits for use in a 72 | // notification 73 | func NotificationInstanceSummary(inst *Instance) string { 74 | if inst.Site == "" && inst.Env == "" && inst.Queue == "" && inst.Role == "" { 75 | return "" 76 | } 77 | 78 | return fmt.Sprintf(instSummaryFmt, inst.Site, inst.Env, inst.Queue, inst.Role) 79 | } 80 | 81 | // NotificationInstanceBuildSummary returns either an empty string or a summary of relevant bits for use in a 82 | // notification 83 | func NotificationInstanceBuildSummary(ib *InstanceBuild) string { 84 | if ib.Site == "" && ib.Env == "" && ib.Queue == "" && ib.Role == "" { 85 | return "" 86 | } 87 | 88 | return fmt.Sprintf(instSummaryFmt, ib.Site, ib.Env, ib.Queue, ib.Role) 89 | } 90 | -------------------------------------------------------------------------------- /sns_message.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import "encoding/json" 4 | 5 | // SNSMessage is totally an SNS message, eh 6 | type SNSMessage struct { 7 | Message string 8 | MessageID string `json:"MessageId"` 9 | Signature string 10 | SignatureVersion string 11 | SigningCertURL string 12 | Subject string 13 | SubscribeURL string 14 | Timestamp string 15 | Token string 16 | TopicARN string `json:"TopicArn"` 17 | Type string 18 | UnsubscribeURL string 19 | MessageAttributes map[string]*SNSMessageAttribute 20 | } 21 | 22 | // SNSMessageAttribute is what shows up in MessageAttributes 23 | type SNSMessageAttribute struct { 24 | Type string 25 | Value string 26 | } 27 | 28 | // NewSNSMessage makes a new SNSMessage with empty MessageAttributes map 29 | func NewSNSMessage() *SNSMessage { 30 | return &SNSMessage{ 31 | MessageAttributes: map[string]*SNSMessageAttribute{}, 32 | } 33 | } 34 | 35 | // AutoscalingLifecycleAction attempts to unmarshal the message payload into an *AutoscalingLifecycleAction 36 | func (m *SNSMessage) AutoscalingLifecycleAction() (*AutoscalingLifecycleAction, error) { 37 | a := &AutoscalingLifecycleAction{} 38 | err := json.Unmarshal([]byte(m.Message), a) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | return a, nil 44 | } 45 | -------------------------------------------------------------------------------- /sns_message_payload.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // SNSMessagePayload is the raw SNS message representation sent to the 4 | // background workers 5 | type SNSMessagePayload struct { 6 | Args []*SNSMessage `json:"args"` 7 | Queue string `json:"queue,omitempty"` 8 | JID string `json:"jid,omitempty"` 9 | Retry bool `json:"retry,omitempty"` 10 | EnqueuedAt float64 `json:"enqueued_at,omitempty"` 11 | } 12 | 13 | // SNSMessage returns the SNS message from the args array 14 | func (smp *SNSMessagePayload) SNSMessage() *SNSMessage { 15 | if len(smp.Args) < 1 { 16 | return nil 17 | } 18 | 19 | return smp.Args[0] 20 | } 21 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/codegangsta/cli" 9 | ) 10 | 11 | const ( 12 | generatedTimeFormat = "2006-01-02T15:04:05-0700" 13 | ) 14 | 15 | var ( 16 | // VersionString is the git describe version set at build time 17 | VersionString = "?" 18 | // RevisionString is the git revision set at build time 19 | RevisionString = "?" 20 | // GeneratedString is the build date set at build time 21 | GeneratedString = "?" 22 | ) 23 | 24 | func init() { 25 | cli.VersionPrinter = customVersionPrinter 26 | _ = os.Setenv("VERSION", VersionString) 27 | _ = os.Setenv("REVISION", RevisionString) 28 | _ = os.Setenv("GENERATED", GeneratedString) 29 | } 30 | 31 | func customVersionPrinter(c *cli.Context) { 32 | fmt.Printf("%v v=%v rev=%v d=%v\n", 33 | c.App.Name, c.App.Version, RevisionString, c.App.Compiled.Format(generatedTimeFormat)) 34 | } 35 | 36 | // GeneratedTime returns the parsed GeneratedString if it isn't `?` 37 | func GeneratedTime() time.Time { 38 | if GeneratedString != "?" { 39 | t, err := time.Parse(generatedTimeFormat, GeneratedString) 40 | if err == nil { 41 | return t 42 | } 43 | } 44 | 45 | info, err := os.Stat(os.Args[0]) 46 | if err != nil { 47 | return time.Now().UTC() 48 | } 49 | return info.ModTime() 50 | } 51 | -------------------------------------------------------------------------------- /workers/autoscaling_group_builds.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "html/template" 8 | 9 | "github.com/Sirupsen/logrus" 10 | "github.com/garyburd/redigo/redis" 11 | "github.com/goamz/goamz/autoscaling" 12 | "github.com/goamz/goamz/cloudwatch" 13 | "github.com/goamz/goamz/ec2" 14 | "github.com/jrallison/go-workers" 15 | "github.com/travis-ci/pudding" 16 | ) 17 | 18 | func init() { 19 | defaultQueueFuncs["autoscaling-group-builds"] = autoscalingGroupBuildsMain 20 | } 21 | 22 | func autoscalingGroupBuildsMain(cfg *internalConfig, msg *workers.Msg) { 23 | buildPayloadJSON := []byte(msg.OriginalJson()) 24 | buildPayload := &pudding.AutoscalingGroupBuildPayload{ 25 | Args: []*pudding.AutoscalingGroupBuild{}, 26 | } 27 | 28 | err := json.Unmarshal(buildPayloadJSON, buildPayload) 29 | if err != nil { 30 | log.WithField("err", err).Panic("failed to deserialize message") 31 | } 32 | 33 | b := buildPayload.AutoscalingGroupBuild() 34 | b.Hydrate() 35 | 36 | w, err := newAutoscalingGroupBuilderWorker(b, cfg, msg.Jid(), workers.Config.Pool.Get()) 37 | if err != nil { 38 | log.WithField("err", err).Panic("autoscaling group build worker creation failed") 39 | } 40 | 41 | err = w.Build() 42 | if err != nil { 43 | log.WithField("err", err).Panic("autoscaling group build failed") 44 | } 45 | } 46 | 47 | type autoscalingGroupBuilderWorker struct { 48 | rc redis.Conn 49 | n []pudding.Notifier 50 | jid string 51 | cfg *internalConfig 52 | ec2 *ec2.EC2 53 | as *autoscaling.AutoScaling 54 | cw *cloudwatch.CloudWatch 55 | b *pudding.AutoscalingGroupBuild 56 | name string 57 | sopARN string 58 | sipARN string 59 | } 60 | 61 | func newAutoscalingGroupBuilderWorker(b *pudding.AutoscalingGroupBuild, cfg *internalConfig, jid string, redisConn redis.Conn) (*autoscalingGroupBuilderWorker, error) { 62 | notifier := pudding.NewSlackNotifier(cfg.SlackHookPath, cfg.SlackUsername, cfg.SlackIcon) 63 | 64 | cw, err := cloudwatch.NewCloudWatch(cfg.AWSAuth, cfg.AWSRegion.CloudWatchServicepoint) 65 | if err != nil { 66 | return nil, err 67 | } 68 | 69 | return &autoscalingGroupBuilderWorker{ 70 | rc: redisConn, 71 | jid: jid, 72 | cfg: cfg, 73 | n: []pudding.Notifier{notifier}, 74 | b: b, 75 | ec2: ec2.New(cfg.AWSAuth, cfg.AWSRegion), 76 | as: autoscaling.New(cfg.AWSAuth, cfg.AWSRegion), 77 | cw: cw, 78 | }, nil 79 | } 80 | 81 | func (asgbw *autoscalingGroupBuilderWorker) Build() error { 82 | asg, err := asgbw.createAutoscalingGroup() 83 | if err != nil { 84 | log.WithFields(logrus.Fields{ 85 | "err": err, 86 | "jid": asgbw.jid, 87 | }).Error("failed to create autoscaling group") 88 | return err 89 | } 90 | 91 | sopARN, err := asgbw.createScaleOutPolicy() 92 | if err != nil { 93 | log.WithFields(logrus.Fields{ 94 | "err": err, 95 | "name": asg.AutoScalingGroupName, 96 | "jid": asgbw.jid, 97 | }).Error("failed to create scale out policy") 98 | return err 99 | } 100 | 101 | asgbw.sopARN = sopARN 102 | 103 | sipARN, err := asgbw.createScaleInPolicy() 104 | if err != nil { 105 | log.WithFields(logrus.Fields{ 106 | "err": err, 107 | "name": asg.AutoScalingGroupName, 108 | "jid": asgbw.jid, 109 | }).Error("failed to create scale in policy") 110 | return err 111 | } 112 | 113 | asgbw.sipARN = sipARN 114 | 115 | err = asgbw.createScaleOutMetricAlarm() 116 | if err != nil { 117 | log.WithFields(logrus.Fields{ 118 | "err": err, 119 | "name": asg.AutoScalingGroupName, 120 | "jid": asgbw.jid, 121 | }).Error("failed to create scale out metric alarm") 122 | return err 123 | } 124 | 125 | err = asgbw.createScaleInMetricAlarm() 126 | if err != nil { 127 | log.WithFields(logrus.Fields{ 128 | "err": err, 129 | "name": asg.AutoScalingGroupName, 130 | "jid": asgbw.jid, 131 | }).Error("failed to create scale in metric alarm") 132 | return err 133 | } 134 | 135 | err = asgbw.createLaunchingLifecycleHook() 136 | if err != nil { 137 | log.WithFields(logrus.Fields{ 138 | "err": err, 139 | "name": asg.AutoScalingGroupName, 140 | "jid": asgbw.jid, 141 | }).Error("failed to create launching lifecycle hook") 142 | return err 143 | } 144 | 145 | err = asgbw.createTerminatingLifecycleHook() 146 | if err != nil { 147 | log.WithFields(logrus.Fields{ 148 | "err": err, 149 | "name": asg.AutoScalingGroupName, 150 | "jid": asgbw.jid, 151 | }).Error("failed to create terminating lifecycle hook") 152 | return err 153 | } 154 | 155 | log.WithField("jid", asgbw.jid).Debug("all done") 156 | return nil 157 | } 158 | 159 | func (asgbw *autoscalingGroupBuilderWorker) createAutoscalingGroup() (*autoscaling.CreateAutoScalingGroupParams, error) { 160 | b := asgbw.b 161 | 162 | nameTmpl, err := template.New(fmt.Sprintf("name-template-%s", asgbw.jid)).Parse(b.NameTemplate) 163 | if err != nil { 164 | return nil, err 165 | } 166 | 167 | var nameBuf bytes.Buffer 168 | err = nameTmpl.Execute(&nameBuf, b) 169 | if err != nil { 170 | return nil, err 171 | } 172 | 173 | asgbw.name = nameBuf.String() 174 | 175 | tags := []autoscaling.Tag{ 176 | autoscaling.Tag{ 177 | Key: "role", Value: b.Role, PropagateAtLaunch: true, 178 | }, 179 | autoscaling.Tag{ 180 | Key: "queue", Value: b.Queue, PropagateAtLaunch: true, 181 | }, 182 | autoscaling.Tag{ 183 | Key: "site", Value: b.Site, PropagateAtLaunch: true, 184 | }, 185 | autoscaling.Tag{ 186 | Key: "env", Value: b.Env, PropagateAtLaunch: true, 187 | }, 188 | autoscaling.Tag{ 189 | Key: "Name", Value: asgbw.name, PropagateAtLaunch: true, 190 | }, 191 | } 192 | 193 | asg := &autoscaling.CreateAutoScalingGroupParams{ 194 | AutoScalingGroupName: asgbw.name, 195 | InstanceId: b.InstanceID, 196 | MinSize: b.MinSize, 197 | MaxSize: b.MaxSize, 198 | DesiredCapacity: b.DesiredCapacity, 199 | DefaultCooldown: b.DefaultCooldown, 200 | Tags: tags, 201 | } 202 | 203 | log.WithFields(logrus.Fields{ 204 | "jid": asgbw.jid, 205 | "asg": fmt.Sprintf("%#v", asg), 206 | }).Debug("creating autoscaling group") 207 | 208 | _, err = asgbw.as.CreateAutoScalingGroup(asg) 209 | return asg, err 210 | } 211 | 212 | func (asgbw *autoscalingGroupBuilderWorker) createScaleOutPolicy() (string, error) { 213 | log.WithFields(logrus.Fields{ 214 | "jid": asgbw.jid, 215 | "name": asgbw.name, 216 | }).Debug("creating scale out policy") 217 | 218 | sop := &autoscaling.PutScalingPolicyParams{ 219 | PolicyName: fmt.Sprintf("%s-sop", asgbw.name), 220 | AutoScalingGroupName: asgbw.name, 221 | AdjustmentType: "ChangeInCapacity", 222 | Cooldown: asgbw.b.ScaleOutCooldown, 223 | ScalingAdjustment: asgbw.b.ScaleOutAdjustment, 224 | } 225 | 226 | resp, err := asgbw.as.PutScalingPolicy(sop) 227 | if err != nil { 228 | return "", err 229 | } 230 | 231 | return resp.PolicyARN, nil 232 | } 233 | 234 | func (asgbw *autoscalingGroupBuilderWorker) createScaleInPolicy() (string, error) { 235 | log.WithFields(logrus.Fields{ 236 | "jid": asgbw.jid, 237 | "name": asgbw.name, 238 | }).Debug("creating scale in policy") 239 | 240 | sip := &autoscaling.PutScalingPolicyParams{ 241 | PolicyName: fmt.Sprintf("%s-sip", asgbw.name), 242 | AutoScalingGroupName: asgbw.name, 243 | AdjustmentType: "ChangeInCapacity", 244 | Cooldown: asgbw.b.ScaleInCooldown, 245 | ScalingAdjustment: asgbw.b.ScaleInAdjustment, 246 | } 247 | 248 | resp, err := asgbw.as.PutScalingPolicy(sip) 249 | if err != nil { 250 | return "", err 251 | } 252 | 253 | return resp.PolicyARN, nil 254 | } 255 | 256 | func (asgbw *autoscalingGroupBuilderWorker) createScaleOutMetricAlarm() error { 257 | log.WithFields(logrus.Fields{ 258 | "jid": asgbw.jid, 259 | "name": asgbw.name, 260 | }).Debug("creating scale out metric alarm") 261 | 262 | ma := &cloudwatch.MetricAlarm{ 263 | AlarmName: fmt.Sprintf("%s-add-capacity", asgbw.name), 264 | MetricName: asgbw.b.ScaleOutMetricName, 265 | Namespace: asgbw.b.ScaleOutMetricNamespace, 266 | Statistic: asgbw.b.ScaleOutMetricStatistic, 267 | Period: asgbw.b.ScaleOutMetricPeriod, 268 | Threshold: asgbw.b.ScaleOutMetricThreshold, 269 | ComparisonOperator: asgbw.b.ScaleOutMetricComparisonOperator, 270 | EvaluationPeriods: asgbw.b.ScaleOutMetricEvaluationPeriods, 271 | AlarmActions: []cloudwatch.AlarmAction{ 272 | cloudwatch.AlarmAction{ 273 | ARN: asgbw.sopARN, 274 | }, 275 | }, 276 | // Dimensions: []cloudwatch.Dimension{ 277 | // cloudwatch.Dimension{ 278 | // Name: "AutoScalingGroupName", 279 | // Value: asgbw.name, 280 | // }, 281 | // }, 282 | } 283 | 284 | _, err := asgbw.cw.PutMetricAlarm(ma) 285 | return err 286 | } 287 | 288 | func (asgbw *autoscalingGroupBuilderWorker) createScaleInMetricAlarm() error { 289 | log.WithFields(logrus.Fields{ 290 | "jid": asgbw.jid, 291 | "name": asgbw.name, 292 | }).Debug("creating scale in metric alarm") 293 | 294 | ma := &cloudwatch.MetricAlarm{ 295 | AlarmName: fmt.Sprintf("%s-remove-capacity", asgbw.name), 296 | MetricName: asgbw.b.ScaleInMetricName, 297 | Namespace: asgbw.b.ScaleInMetricNamespace, 298 | Statistic: asgbw.b.ScaleInMetricStatistic, 299 | Period: asgbw.b.ScaleInMetricPeriod, 300 | Threshold: asgbw.b.ScaleInMetricThreshold, 301 | ComparisonOperator: asgbw.b.ScaleInMetricComparisonOperator, 302 | EvaluationPeriods: asgbw.b.ScaleInMetricEvaluationPeriods, 303 | AlarmActions: []cloudwatch.AlarmAction{ 304 | cloudwatch.AlarmAction{ 305 | ARN: asgbw.sipARN, 306 | }, 307 | }, 308 | // Dimensions: []cloudwatch.Dimension{ 309 | // cloudwatch.Dimension{ 310 | // Name: "AutoScalingGroupName", 311 | // Value: asgbw.name, 312 | // }, 313 | // }, 314 | } 315 | 316 | _, err := asgbw.cw.PutMetricAlarm(ma) 317 | return err 318 | } 319 | 320 | func (asgbw *autoscalingGroupBuilderWorker) createLaunchingLifecycleHook() error { 321 | log.WithFields(logrus.Fields{ 322 | "jid": asgbw.jid, 323 | "name": asgbw.name, 324 | }).Debug("creating launching lifecycle hook") 325 | 326 | llch := &autoscaling.PutLifecycleHookParams{ 327 | AutoScalingGroupName: asgbw.name, 328 | DefaultResult: asgbw.b.LifecycleDefaultResult, 329 | HeartbeatTimeout: asgbw.b.LifecycleHeartbeatTimeout, 330 | LifecycleHookName: fmt.Sprintf("%s-lch-launching", asgbw.name), 331 | LifecycleTransition: "autoscaling:EC2_INSTANCE_LAUNCHING", 332 | NotificationTargetARN: asgbw.b.TopicARN, 333 | RoleARN: asgbw.b.RoleARN, 334 | } 335 | 336 | _, err := asgbw.as.PutLifecycleHook(llch) 337 | return err 338 | } 339 | 340 | func (asgbw *autoscalingGroupBuilderWorker) createTerminatingLifecycleHook() error { 341 | log.WithFields(logrus.Fields{ 342 | "jid": asgbw.jid, 343 | "name": asgbw.name, 344 | }).Debug("creating terminating lifecycle hook") 345 | 346 | tlch := &autoscaling.PutLifecycleHookParams{ 347 | AutoScalingGroupName: asgbw.name, 348 | DefaultResult: asgbw.b.LifecycleDefaultResult, 349 | HeartbeatTimeout: asgbw.b.LifecycleHeartbeatTimeout, 350 | LifecycleHookName: fmt.Sprintf("%s-lch-terminating", asgbw.name), 351 | LifecycleTransition: "autoscaling:EC2_INSTANCE_TERMINATING", 352 | NotificationTargetARN: asgbw.b.TopicARN, 353 | RoleARN: asgbw.b.RoleARN, 354 | } 355 | 356 | _, err := asgbw.as.PutLifecycleHook(tlch) 357 | return err 358 | } 359 | -------------------------------------------------------------------------------- /workers/config.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | // Config is everything needed to run the workers 4 | type Config struct { 5 | ProcessID string 6 | WebHostname string 7 | Debug bool 8 | 9 | Queues string 10 | RedisPoolSize string 11 | RedisURL string 12 | 13 | AWSKey string 14 | AWSSecret string 15 | AWSRegion string 16 | 17 | InstanceRSA string 18 | InstanceYML string 19 | InstanceTagRetries int 20 | 21 | InitScriptTemplate string 22 | MiniWorkerInterval int 23 | InstanceExpiry int 24 | ImageExpiry int 25 | 26 | SlackHookPath string 27 | SlackUsername string 28 | SlackIcon string 29 | 30 | SentryDSN string 31 | } 32 | -------------------------------------------------------------------------------- /workers/ec2_syncer.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "net" 5 | "net/url" 6 | 7 | "github.com/Sirupsen/logrus" 8 | "github.com/garyburd/redigo/redis" 9 | "github.com/goamz/goamz/ec2" 10 | "github.com/travis-ci/pudding" 11 | "github.com/travis-ci/pudding/db" 12 | ) 13 | 14 | type ec2Syncer struct { 15 | cfg *internalConfig 16 | ec2 *ec2.EC2 17 | log *logrus.Logger 18 | i db.InstanceFetcherStorer 19 | img db.ImageFetcherStorer 20 | } 21 | 22 | func newEC2Syncer(cfg *internalConfig, r *redis.Pool, log *logrus.Logger) (*ec2Syncer, error) { 23 | i, err := db.NewInstances(r, log, cfg.InstanceStoreExpiry) 24 | if err != nil { 25 | return nil, err 26 | } 27 | 28 | img, err := db.NewImages(r, log, cfg.ImageStoreExpiry) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | return &ec2Syncer{ 34 | cfg: cfg, 35 | log: log, 36 | i: i, 37 | img: img, 38 | ec2: ec2.New(cfg.AWSAuth, cfg.AWSRegion), 39 | }, nil 40 | } 41 | 42 | func (es *ec2Syncer) Sync() error { 43 | var ( 44 | instances map[string]ec2.Instance 45 | images map[string]ec2.Image 46 | err error 47 | ) 48 | 49 | es.log.Debug("ec2 syncer fetching instances") 50 | for i := 3; i > 0; i-- { 51 | instances, err = es.fetchInstances() 52 | if err == nil { 53 | break 54 | } 55 | } 56 | 57 | if err != nil { 58 | panic(err) 59 | } 60 | 61 | if instances == nil { 62 | es.log.Debug("ec2 syncer failed to get any instances; assuming temporary network error") 63 | return nil 64 | } 65 | 66 | es.log.Debug("ec2 syncer storing instances") 67 | err = es.i.Store(instances) 68 | if err != nil { 69 | panic(err) 70 | } 71 | 72 | es.log.Debug("ec2 syncer fetching images") 73 | for i := 3; i > 0; i-- { 74 | images, err = es.fetchImages() 75 | if err == nil { 76 | break 77 | } 78 | } 79 | 80 | if err != nil { 81 | panic(err) 82 | } 83 | 84 | if images == nil { 85 | es.log.Debug("ec2 syncer failed to get any images; assuming temporary network error") 86 | return nil 87 | } 88 | 89 | es.log.Debug("ec2 syncer storing images") 90 | err = es.img.Store(images) 91 | if err != nil { 92 | panic(err) 93 | } 94 | 95 | return nil 96 | } 97 | 98 | func (es *ec2Syncer) fetchInstances() (map[string]ec2.Instance, error) { 99 | f := ec2.NewFilter() 100 | f.Add("instance-state-name", "running") 101 | instances, err := pudding.GetInstancesWithFilter(es.ec2, f) 102 | if err == nil { 103 | return instances, nil 104 | } 105 | 106 | switch err.(type) { 107 | case *url.Error, *net.OpError: 108 | log.WithFields(logrus.Fields{"err": err}).Warn("network error while fetching ec2 instances") 109 | return nil, nil 110 | default: 111 | return nil, err 112 | } 113 | } 114 | 115 | func (es *ec2Syncer) fetchImages() (map[string]ec2.Image, error) { 116 | f := ec2.NewFilter() 117 | f.Add("tag-key", "role") 118 | images, err := pudding.GetImagesWithFilter(es.ec2, f) 119 | if err == nil { 120 | return images, nil 121 | } 122 | 123 | switch err.(type) { 124 | case *url.Error, *net.OpError: 125 | log.WithFields(logrus.Fields{"err": err}).Warn("network error while fetching ec2 images") 126 | return nil, nil 127 | default: 128 | return nil, err 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /workers/init_script_context.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | type initScriptContext struct { 4 | Env string 5 | Site string 6 | Queue string 7 | Role string 8 | AMI string 9 | Count int 10 | InstanceType string 11 | InstanceRSA string 12 | SlackChannel string 13 | PapertrailSite string 14 | InstanceYML string 15 | InstanceBuildID string 16 | InstanceBuildURL string 17 | InstanceLaunchURL string 18 | InstanceTerminateURL string 19 | InstanceHeartbeatURL string 20 | } 21 | -------------------------------------------------------------------------------- /workers/instance_builds.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/base64" 7 | "encoding/json" 8 | "fmt" 9 | "io" 10 | "net/url" 11 | "os" 12 | "text/template" 13 | "time" 14 | 15 | "github.com/Sirupsen/logrus" 16 | "github.com/garyburd/redigo/redis" 17 | "github.com/goamz/goamz/ec2" 18 | "github.com/gorilla/feeds" 19 | "github.com/jrallison/go-workers" 20 | "github.com/travis-ci/pudding" 21 | ) 22 | 23 | func init() { 24 | defaultQueueFuncs["instance-builds"] = instanceBuildsMain 25 | } 26 | 27 | func instanceBuildsMain(cfg *internalConfig, msg *workers.Msg) { 28 | buildPayloadJSON := []byte(msg.OriginalJson()) 29 | buildPayload := &pudding.InstanceBuildPayload{ 30 | Args: []*pudding.InstanceBuild{ 31 | pudding.NewInstanceBuild(), 32 | }, 33 | } 34 | 35 | err := json.Unmarshal(buildPayloadJSON, buildPayload) 36 | if err != nil { 37 | log.WithField("err", err).Panic("failed to deserialize message") 38 | } 39 | 40 | b := buildPayload.InstanceBuild() 41 | b.Hydrate() 42 | 43 | ibw, err := newInstanceBuilderWorker(b, cfg, msg.Jid(), workers.Config.Pool.Get()) 44 | if err != nil { 45 | log.WithField("err", err).Panic("failed to make an instance build worker") 46 | } 47 | 48 | if b.BootInstance { 49 | ibw.Build() 50 | } else { 51 | ibw.CreateUserData() 52 | } 53 | 54 | if err != nil { 55 | log.WithField("err", err).Panic("instance build failed") 56 | } 57 | } 58 | 59 | type instanceBuilderWorker struct { 60 | rc redis.Conn 61 | n []pudding.Notifier 62 | jid string 63 | cfg *internalConfig 64 | ec2 *ec2.EC2 65 | sg *ec2.SecurityGroup 66 | sgName string 67 | ami *ec2.Image 68 | b *pudding.InstanceBuild 69 | i *ec2.Instance 70 | t *template.Template 71 | } 72 | 73 | func newInstanceBuilderWorker(b *pudding.InstanceBuild, cfg *internalConfig, jid string, redisConn redis.Conn) (*instanceBuilderWorker, error) { 74 | var err error 75 | notifier := pudding.NewSlackNotifier(cfg.SlackHookPath, cfg.SlackUsername, cfg.SlackIcon) 76 | 77 | t := template.New("init-script") 78 | t.Funcs(template.FuncMap{ 79 | "env_for": pudding.MakeInstanceBuildEnvForFunc(b), 80 | "env": os.Getenv, 81 | "uncompress": pudding.MakeTemplateUncompressFunc(log), 82 | }) 83 | 84 | t, err = t.Parse(cfg.InitScriptTemplateString) 85 | if err != nil { 86 | return nil, err 87 | } 88 | 89 | ibw := &instanceBuilderWorker{ 90 | rc: redisConn, 91 | jid: jid, 92 | cfg: cfg, 93 | n: []pudding.Notifier{notifier}, 94 | b: b, 95 | ec2: ec2.New(cfg.AWSAuth, cfg.AWSRegion), 96 | t: t, 97 | } 98 | 99 | ibw.sgName = fmt.Sprintf("pudding-%d-%p", time.Now().UTC().Unix(), ibw) 100 | return ibw, nil 101 | } 102 | 103 | func (ibw *instanceBuilderWorker) Build() error { 104 | var err error 105 | 106 | f := ec2.NewFilter() 107 | if ibw.b.Role != "" { 108 | f.Add("tag:role", ibw.b.Role) 109 | } 110 | 111 | log.WithFields(logrus.Fields{ 112 | "jid": ibw.jid, 113 | "filter": f, 114 | }).Debug("resolving ami") 115 | 116 | ibw.ami, err = pudding.ResolveAMI(ibw.ec2, ibw.b.AMI, f) 117 | if err != nil { 118 | log.WithFields(logrus.Fields{ 119 | "jid": ibw.jid, 120 | "ami_id": ibw.b.AMI, 121 | "err": err, 122 | }).Error("failed to resolve ami") 123 | return err 124 | } 125 | 126 | if ibw.b.SecurityGroupID != "" { 127 | ibw.sg = &ec2.SecurityGroup{Id: ibw.b.SecurityGroupID} 128 | } else { 129 | log.WithField("jid", ibw.jid).Debug("creating security group") 130 | err = ibw.createSecurityGroup() 131 | if err != nil { 132 | log.WithFields(logrus.Fields{ 133 | "jid": ibw.jid, 134 | "security_group_name": ibw.sgName, 135 | "err": err, 136 | }).Error("failed to create security group") 137 | return err 138 | } 139 | } 140 | 141 | log.WithField("jid", ibw.jid).Debug("creating instance") 142 | err = ibw.createInstance() 143 | if err != nil { 144 | log.WithFields(logrus.Fields{ 145 | "err": err, 146 | "err_details": fmt.Sprintf("%#v", err), 147 | "jid": ibw.jid, 148 | }).Error("failed to create instance(s)") 149 | return err 150 | } 151 | 152 | ibw.b.InstanceID = ibw.i.InstanceId 153 | 154 | for i := ibw.cfg.InstanceTagRetries; i > 0; i-- { 155 | log.WithField("jid", ibw.jid).Debug("tagging instance") 156 | err = ibw.tagInstance() 157 | if err == nil { 158 | break 159 | } 160 | time.Sleep(3 * time.Second) 161 | } 162 | 163 | if err != nil { 164 | log.WithFields(logrus.Fields{ 165 | "err": err, 166 | "jid": ibw.jid, 167 | }).Error("failed to tag instance(s)") 168 | return err 169 | } 170 | 171 | ibw.notifyInstanceLaunched() 172 | 173 | log.WithField("jid", ibw.jid).Debug("all done") 174 | return nil 175 | } 176 | 177 | func (ibw *instanceBuilderWorker) CreateUserData() ([]byte, error) { 178 | log.WithFields(logrus.Fields{ 179 | "jid": ibw.jid, 180 | "instance_type": ibw.b.InstanceType, 181 | }).Info("creating user data") 182 | 183 | return ibw.buildUserData() 184 | } 185 | 186 | func (ibw *instanceBuilderWorker) createSecurityGroup() error { 187 | newSg := ec2.SecurityGroup{ 188 | Name: ibw.sgName, 189 | Description: "custom security group", 190 | } 191 | 192 | log.WithFields(logrus.Fields{ 193 | "jid": ibw.jid, 194 | "security_group_name": ibw.sgName, 195 | }).Debug("creating security group") 196 | 197 | resp, err := ibw.ec2.CreateSecurityGroup(newSg) 198 | if err != nil { 199 | log.WithFields(logrus.Fields{ 200 | "err": err, 201 | "jid": ibw.jid, 202 | }).Error("failed to create security group") 203 | return err 204 | } 205 | 206 | ibw.sg = &resp.SecurityGroup 207 | 208 | log.WithFields(logrus.Fields{ 209 | "jid": ibw.jid, 210 | "security_group_name": ibw.sgName, 211 | }).Debug("authorizing port 22 on security group") 212 | 213 | _, err = ibw.ec2.AuthorizeSecurityGroup(*ibw.sg, []ec2.IPPerm{ 214 | ec2.IPPerm{ 215 | Protocol: "tcp", 216 | FromPort: 22, 217 | ToPort: 22, 218 | SourceIPs: []string{"0.0.0.0/0"}, 219 | }, 220 | }) 221 | if err != nil { 222 | log.WithFields(logrus.Fields{ 223 | "err": err, 224 | "jid": ibw.jid, 225 | "security_group_name": ibw.sgName, 226 | }).Error("failed to authorize port 22") 227 | return err 228 | } 229 | 230 | return nil 231 | } 232 | 233 | func (ibw *instanceBuilderWorker) createInstance() error { 234 | log.WithFields(logrus.Fields{ 235 | "jid": ibw.jid, 236 | "instance_type": ibw.b.InstanceType, 237 | "ami.id": ibw.ami.Id, 238 | "ami.name": ibw.ami.Name, 239 | "count": ibw.b.Count, 240 | }).Info("booting instance") 241 | 242 | userData, err := ibw.buildUserData() 243 | if err != nil { 244 | return err 245 | } 246 | 247 | resp, err := ibw.ec2.RunInstances(&ec2.RunInstancesOptions{ 248 | ImageId: ibw.ami.Id, 249 | UserData: userData, 250 | InstanceType: ibw.b.InstanceType, 251 | SecurityGroups: []ec2.SecurityGroup{*ibw.sg}, 252 | SubnetId: ibw.b.SubnetID, 253 | }) 254 | if err != nil { 255 | return err 256 | } 257 | 258 | ibw.i = &resp.Instances[0] 259 | return nil 260 | } 261 | 262 | func (ibw *instanceBuilderWorker) tagInstance() error { 263 | nameTmpl, err := template.New(fmt.Sprintf("name-template-%s", ibw.jid)).Parse(ibw.b.NameTemplate) 264 | if err != nil { 265 | return err 266 | } 267 | 268 | var nameBuf bytes.Buffer 269 | err = nameTmpl.Execute(&nameBuf, ibw.b) 270 | if err != nil { 271 | return err 272 | } 273 | 274 | tags := []ec2.Tag{ 275 | ec2.Tag{Key: "Name", Value: nameBuf.String()}, 276 | ec2.Tag{Key: "role", Value: ibw.b.Role}, 277 | ec2.Tag{Key: "site", Value: ibw.b.Site}, 278 | ec2.Tag{Key: "env", Value: ibw.b.Env}, 279 | ec2.Tag{Key: "queue", Value: ibw.b.Queue}, 280 | } 281 | 282 | log.WithFields(logrus.Fields{ 283 | "jid": ibw.jid, 284 | "tags": tags, 285 | }).Debug("tagging instance") 286 | 287 | _, err = ibw.ec2.CreateTags([]string{ibw.i.InstanceId}, tags) 288 | 289 | return err 290 | } 291 | 292 | func (ibw *instanceBuilderWorker) buildUserData() ([]byte, error) { 293 | webURL, err := url.Parse(ibw.cfg.WebHost) 294 | if err != nil { 295 | return nil, err 296 | } 297 | 298 | instAuth := feeds.NewUUID().String() 299 | webURL.User = url.UserPassword("x", instAuth) 300 | 301 | webURL.Path = fmt.Sprintf("/instance-launches/%s", ibw.b.ID) 302 | instanceLaunchURL := webURL.String() 303 | 304 | webURL.Path = fmt.Sprintf("/instance-terminations/%s", ibw.b.ID) 305 | instanceTerminateURL := webURL.String() 306 | 307 | webURL.Path = fmt.Sprintf("/instance-builds/%s", ibw.b.ID) 308 | instanceBuildURL := webURL.String() 309 | 310 | webURL.Path = fmt.Sprintf("/init-scripts/%s", ibw.b.ID) 311 | initScriptURL := webURL.String() 312 | 313 | webURL.Path = fmt.Sprintf("/instance-heartbeats/%s", ibw.b.ID) 314 | instanceHeartbeatURL := webURL.String() 315 | 316 | buf := &bytes.Buffer{} 317 | gzw, err := gzip.NewWriterLevel(buf, gzip.BestCompression) 318 | if err != nil { 319 | return nil, err 320 | } 321 | 322 | tw := &bytes.Buffer{} 323 | w := io.MultiWriter(tw, gzw) 324 | 325 | yml, err := pudding.BuildInstanceSpecificYML(ibw.b.Site, ibw.b.Env, ibw.cfg.InstanceYML, ibw.b.Queue, ibw.b.Count) 326 | if err != nil { 327 | return nil, err 328 | } 329 | 330 | ymlString, err := yml.String() 331 | if err != nil { 332 | return nil, err 333 | } 334 | 335 | err = ibw.t.Execute(w, &initScriptContext{ 336 | Env: ibw.b.Env, 337 | Site: ibw.b.Site, 338 | Queue: ibw.b.Queue, 339 | Role: ibw.b.Role, 340 | AMI: ibw.b.AMI, 341 | Count: ibw.b.Count, 342 | SlackChannel: ibw.b.SlackChannel, 343 | InstanceType: ibw.b.InstanceType, 344 | InstanceBuildID: ibw.b.ID, 345 | InstanceBuildURL: instanceBuildURL, 346 | InstanceLaunchURL: instanceLaunchURL, 347 | InstanceTerminateURL: instanceTerminateURL, 348 | InstanceHeartbeatURL: instanceHeartbeatURL, 349 | 350 | // TODO: extract InstanceRSA key via `env` func 351 | InstanceRSA: ibw.cfg.InstanceRSA, 352 | // TODO: extract PapertrailSite key via `instance_env` func 353 | PapertrailSite: yml.PapertrailSite, 354 | // TODO: extract InstanceYML key via an `instance_env` func 355 | InstanceYML: ymlString, 356 | }) 357 | if err != nil { 358 | return nil, err 359 | } 360 | 361 | log.WithFields(logrus.Fields{ 362 | "jid": ibw.jid, 363 | "script": tw.String(), 364 | }).Debug("rendered init script") 365 | 366 | err = gzw.Close() 367 | if err != nil { 368 | return nil, err 369 | } 370 | 371 | initScriptB64 := base64.StdEncoding.EncodeToString(buf.Bytes()) 372 | 373 | err = ibw.rc.Send("MULTI") 374 | if err != nil { 375 | return nil, err 376 | } 377 | 378 | err = ibw.rc.Send("HSET", fmt.Sprintf("%s:init-scripts", pudding.RedisNamespace), ibw.b.ID, initScriptB64) 379 | if err != nil { 380 | ibw.rc.Send("DISCARD") 381 | return nil, err 382 | } 383 | 384 | err = ibw.rc.Send("HSET", fmt.Sprintf("%s:auths", pudding.RedisNamespace), ibw.b.ID, instAuth) 385 | if err != nil { 386 | ibw.rc.Send("DISCARD") 387 | return nil, err 388 | } 389 | 390 | _, err = ibw.rc.Do("EXEC") 391 | if err != nil { 392 | return nil, err 393 | } 394 | 395 | return []byte(fmt.Sprintf("#include %s\n", initScriptURL)), nil 396 | } 397 | 398 | func (ibw *instanceBuilderWorker) notifyInstanceLaunched() { 399 | for _, notifier := range ibw.n { 400 | notifier.Notify(ibw.b.SlackChannel, 401 | fmt.Sprintf("Started instance `%s` for instance build *%s* %s", 402 | ibw.i.InstanceId, ibw.b.ID, pudding.NotificationInstanceBuildSummary(ibw.b))) 403 | } 404 | } 405 | -------------------------------------------------------------------------------- /workers/instance_lifecycle_transitions.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/Sirupsen/logrus" 8 | "github.com/garyburd/redigo/redis" 9 | "github.com/goamz/goamz/autoscaling" 10 | "github.com/jrallison/go-workers" 11 | "github.com/travis-ci/pudding" 12 | "github.com/travis-ci/pudding/db" 13 | ) 14 | 15 | var ( 16 | errMissingInstanceLifecycleTransition = fmt.Errorf("missing instance lifecycle transition") 17 | ) 18 | 19 | func init() { 20 | defaultQueueFuncs["instance-lifecycle-transitions"] = instanceLifecycleTransitionsMain 21 | } 22 | 23 | func instanceLifecycleTransitionsMain(cfg *internalConfig, msg *workers.Msg) { 24 | iltPayloadJSON := []byte(msg.OriginalJson()) 25 | iltPayload := &pudding.InstanceLifecycleTransitionPayload{ 26 | Args: []*pudding.InstanceLifecycleTransition{}, 27 | } 28 | 29 | err := json.Unmarshal(iltPayloadJSON, iltPayload) 30 | if err != nil { 31 | log.WithField("err", err).Panic("failed to deserialize instance lifecycle transition") 32 | } 33 | 34 | ilt := iltPayload.InstanceLifecycleTransition() 35 | if ilt == nil { 36 | log.WithField("err", errMissingInstanceLifecycleTransition).Panic("no instance lifecycle transition available") 37 | return 38 | } 39 | 40 | err = handleInstanceLifecycleTransition(cfg, workers.Config.Pool.Get(), msg.Jid(), ilt) 41 | if err != nil { 42 | switch err.(type) { 43 | case *autoscaling.Error: 44 | log.WithField("err", err).Error("discarding autoscaling error") 45 | default: 46 | log.WithField("err", err).Panic("instance lifecycle transition handler returned an error") 47 | } 48 | } 49 | } 50 | 51 | func handleInstanceLifecycleTransition(cfg *internalConfig, rc redis.Conn, jid string, ilt *pudding.InstanceLifecycleTransition) error { 52 | ala, err := db.FetchInstanceLifecycleAction(rc, ilt.Transition, ilt.InstanceID) 53 | if err != nil { 54 | log.WithFields(logrus.Fields{ 55 | "err": err, 56 | "jid": jid, 57 | "transition": ilt.Transition, 58 | "instance": ilt.InstanceID, 59 | }).Error("failed to fetch instance lifecycle action") 60 | return err 61 | } 62 | 63 | if ala == nil { 64 | log.WithFields(logrus.Fields{ 65 | "jid": jid, 66 | "transition": ilt.Transition, 67 | "instance": ilt.InstanceID, 68 | }).Warn("discarding unknown lifecycle transition") 69 | return nil 70 | } 71 | 72 | as := autoscaling.New(cfg.AWSAuth, cfg.AWSRegion) 73 | 74 | cla := &autoscaling.CompleteLifecycleActionParams{ 75 | AutoScalingGroupName: ala.AutoScalingGroupName, 76 | LifecycleActionResult: "CONTINUE", 77 | LifecycleActionToken: ala.LifecycleActionToken, 78 | LifecycleHookName: ala.LifecycleHookName, 79 | } 80 | 81 | log.WithFields(logrus.Fields{ 82 | "jid": jid, 83 | "transition": ilt.Transition, 84 | "instance": ilt.InstanceID, 85 | "params": fmt.Sprintf("%#v", cla), 86 | }).Info("completing lifecycle action") 87 | 88 | _, err = as.CompleteLifecycleAction(cla) 89 | if err != nil { 90 | log.WithFields(logrus.Fields{ 91 | "err": err, 92 | "jid": jid, 93 | "transition": ilt.Transition, 94 | "instance": ilt.InstanceID, 95 | "params": fmt.Sprintf("%#v", cla), 96 | }).Error("failed to complete lifecycle action") 97 | return err 98 | } 99 | 100 | err = db.WipeInstanceLifecycleAction(rc, ilt.Transition, ilt.InstanceID) 101 | if err != nil { 102 | log.WithField("err", err).Warn("failed to clean up lifecycle action bits") 103 | } 104 | 105 | return nil 106 | } 107 | -------------------------------------------------------------------------------- /workers/instance_terminations.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | "github.com/Sirupsen/logrus" 8 | "github.com/garyburd/redigo/redis" 9 | "github.com/goamz/goamz/ec2" 10 | "github.com/jrallison/go-workers" 11 | "github.com/travis-ci/pudding" 12 | "github.com/travis-ci/pudding/db" 13 | ) 14 | 15 | func init() { 16 | defaultQueueFuncs["instance-terminations"] = instanceTerminationsMain 17 | } 18 | 19 | func instanceTerminationsMain(cfg *internalConfig, msg *workers.Msg) { 20 | log.WithFields(logrus.Fields{ 21 | "jid": msg.Jid(), 22 | }).Debug("starting processing of termination job") 23 | 24 | buildPayloadJSON := []byte(msg.OriginalJson()) 25 | buildPayload := &pudding.InstanceTerminationPayload{} 26 | 27 | err := json.Unmarshal(buildPayloadJSON, buildPayload) 28 | if err != nil { 29 | log.WithField("err", err).Panic("failed to deserialize message") 30 | } 31 | 32 | err = newInstanceTerminatorWorker(buildPayload.InstanceID, buildPayload.SlackChannel, 33 | cfg, msg.Jid(), workers.Config.Pool.Get()).Terminate() 34 | if err != nil { 35 | log.WithField("err", err).Panic("instance termination failed") 36 | } 37 | } 38 | 39 | type instanceTerminatorWorker struct { 40 | rc redis.Conn 41 | jid string 42 | nc string 43 | n []pudding.Notifier 44 | iid string 45 | cfg *internalConfig 46 | ec2 *ec2.EC2 47 | } 48 | 49 | func newInstanceTerminatorWorker(instanceID, slackChannel string, cfg *internalConfig, jid string, redisConn redis.Conn) *instanceTerminatorWorker { 50 | notifier := pudding.NewSlackNotifier(cfg.SlackHookPath, cfg.SlackUsername, cfg.SlackIcon) 51 | 52 | return &instanceTerminatorWorker{ 53 | rc: redisConn, 54 | jid: jid, 55 | cfg: cfg, 56 | nc: slackChannel, 57 | n: []pudding.Notifier{notifier}, 58 | iid: instanceID, 59 | ec2: ec2.New(cfg.AWSAuth, cfg.AWSRegion), 60 | } 61 | } 62 | 63 | func (itw *instanceTerminatorWorker) Terminate() error { 64 | _, err := itw.ec2.TerminateInstances([]string{itw.iid}) 65 | if err != nil { 66 | return err 67 | } 68 | 69 | instances, _ := db.FetchInstances(itw.rc, map[string]string{"instance_id": itw.iid}) 70 | 71 | err = db.RemoveInstances(itw.rc, []string{itw.iid}) 72 | if err != nil && instances != nil && len(instances) > 0 { 73 | inst := instances[0] 74 | for _, notifier := range itw.n { 75 | notifier.Notify(itw.nc, 76 | fmt.Sprintf("Failed to terminate *%s* :scream_cat: _(%s)_ %s", 77 | itw.iid, err, pudding.NotificationInstanceSummary(inst))) 78 | } 79 | return err 80 | } 81 | 82 | if instances != nil && len(instances) > 0 { 83 | inst := instances[0] 84 | for _, notifier := range itw.n { 85 | notifier.Notify(itw.nc, fmt.Sprintf("Terminating *%s* :boom: %s", 86 | itw.iid, pudding.NotificationInstanceSummary(inst))) 87 | } 88 | } 89 | return nil 90 | } 91 | -------------------------------------------------------------------------------- /workers/internal_config.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "net/url" 5 | "text/template" 6 | 7 | "github.com/goamz/goamz/aws" 8 | "github.com/jrallison/go-workers" 9 | ) 10 | 11 | type internalConfig struct { 12 | AWSAuth aws.Auth 13 | AWSRegion aws.Region 14 | 15 | RedisURL *url.URL 16 | RedisPoolSize string 17 | 18 | SlackHookPath string 19 | SlackUsername string 20 | SlackIcon string 21 | 22 | SentryDSN string 23 | 24 | WebHost string 25 | ProcessID string 26 | 27 | InstanceRSA string 28 | InstanceYML string 29 | InstanceTagRetries int 30 | 31 | Queues []string 32 | QueueFuncs map[string]func(*internalConfig, *workers.Msg) 33 | QueueConcurrencies map[string]int 34 | 35 | MiniWorkerInterval int 36 | InstanceStoreExpiry int 37 | ImageStoreExpiry int 38 | 39 | InitScriptTemplate *template.Template 40 | InitScriptTemplateString string 41 | } 42 | -------------------------------------------------------------------------------- /workers/main.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "net/url" 5 | "os" 6 | "strconv" 7 | "strings" 8 | "time" 9 | 10 | "github.com/Sirupsen/logrus" 11 | "github.com/goamz/goamz/aws" 12 | ) 13 | 14 | // Main is the whole shebang 15 | func Main(cfg *Config) { 16 | if cfg.Debug { 17 | log.Level = logrus.DebugLevel 18 | } 19 | 20 | ic := &internalConfig{ 21 | RedisPoolSize: cfg.RedisPoolSize, 22 | 23 | SlackHookPath: cfg.SlackHookPath, 24 | SlackUsername: cfg.SlackUsername, 25 | SlackIcon: cfg.SlackIcon, 26 | 27 | SentryDSN: cfg.SentryDSN, 28 | 29 | WebHost: cfg.WebHostname, 30 | ProcessID: cfg.ProcessID, 31 | 32 | InstanceRSA: cfg.InstanceRSA, 33 | InstanceYML: cfg.InstanceYML, 34 | InstanceTagRetries: cfg.InstanceTagRetries, 35 | 36 | Queues: []string{}, 37 | QueueConcurrencies: map[string]int{}, 38 | QueueFuncs: defaultQueueFuncs, 39 | 40 | MiniWorkerInterval: cfg.MiniWorkerInterval, 41 | InstanceStoreExpiry: cfg.InstanceExpiry, 42 | ImageStoreExpiry: cfg.ImageExpiry, 43 | 44 | InitScriptTemplateString: cfg.InitScriptTemplate, 45 | } 46 | 47 | auth, err := aws.GetAuth(cfg.AWSKey, cfg.AWSSecret, "", time.Now().UTC().Add(8766*time.Hour)) 48 | if err != nil { 49 | log.WithField("err", err).Fatal("failed to load aws auth") 50 | os.Exit(1) 51 | } 52 | 53 | region, ok := aws.Regions[cfg.AWSRegion] 54 | if !ok { 55 | log.WithField("region", cfg.AWSRegion).Fatal("invalid region") 56 | os.Exit(1) 57 | } 58 | ic.AWSAuth = auth 59 | ic.AWSRegion = region 60 | 61 | if ic.InstanceRSA == "" { 62 | log.Fatal("missing instance rsa key") 63 | os.Exit(1) 64 | } 65 | 66 | for _, queue := range strings.Split(cfg.Queues, ",") { 67 | concurrency := 10 68 | qParts := strings.Split(queue, ":") 69 | if len(qParts) == 2 { 70 | queue = qParts[0] 71 | parsedConcurrency, err := strconv.ParseUint(qParts[1], 10, 64) 72 | if err != nil { 73 | log.WithFields(logrus.Fields{ 74 | "err": err, 75 | "queue": queue, 76 | }).Warn("failed to parse concurrency for queue, defaulting to 10") 77 | concurrency = 10 78 | } else { 79 | concurrency = int(parsedConcurrency) 80 | } 81 | } 82 | queue = strings.TrimSpace(queue) 83 | ic.QueueConcurrencies[queue] = concurrency 84 | ic.Queues = append(ic.Queues, queue) 85 | } 86 | 87 | redisURL, err := url.Parse(cfg.RedisURL) 88 | if err != nil { 89 | log.WithField("err", err).Fatal("failed to parse redis url") 90 | os.Exit(1) 91 | } 92 | 93 | ic.RedisURL = redisURL 94 | 95 | err = runWorkers(ic, log) 96 | if err != nil { 97 | log.WithField("err", err).Fatal("failed to start workers") 98 | os.Exit(1) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /workers/middleware_raven.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/Sirupsen/logrus" 7 | "github.com/getsentry/raven-go" 8 | "github.com/jrallison/go-workers" 9 | "github.com/travis-ci/pudding" 10 | ) 11 | 12 | // MiddlewareRaven is the go-workers compatible middleware for 13 | // sentry integration 14 | type MiddlewareRaven struct { 15 | cl *raven.Client 16 | } 17 | 18 | // Call is what does stuff in the middleware stack yey. 19 | // It is largely a copy-pasta of the raven CapturePanic func, fwiw. 20 | func (r *MiddlewareRaven) Call(queue string, message *workers.Msg, next func() bool) (ack bool) { 21 | defer func() { 22 | var packet *raven.Packet 23 | p := recover() 24 | 25 | switch rval := p.(type) { 26 | case nil: 27 | ack = true 28 | return 29 | case error: 30 | packet = raven.NewPacket(rval.Error(), raven.NewException(rval, raven.NewStacktrace(2, 3, nil))) 31 | case *logrus.Entry: 32 | entryErrInterface, ok := rval.Data["err"] 33 | if !ok { 34 | entryErrInterface = fmt.Errorf(rval.Message) 35 | } 36 | 37 | entryErr, ok := entryErrInterface.(error) 38 | if !ok { 39 | entryErr = fmt.Errorf(rval.Message) 40 | } 41 | 42 | packet = raven.NewPacket(rval.Message, raven.NewException(entryErr, raven.NewStacktrace(2, 3, nil))) 43 | default: 44 | rvalStr := fmt.Sprint(rval) 45 | packet = raven.NewPacket(rvalStr, raven.NewException(fmt.Errorf(rvalStr), raven.NewStacktrace(2, 3, nil))) 46 | } 47 | 48 | pudding.SendRavenPacket(packet, r.cl, log, nil) 49 | panic(p) 50 | }() 51 | 52 | ack = next() 53 | return 54 | } 55 | 56 | // Do is a simplified interface used by the mini workers 57 | func (r *MiddlewareRaven) Do(fn func() error) error { 58 | defer func() { 59 | var packet *raven.Packet 60 | p := recover() 61 | 62 | switch rval := p.(type) { 63 | case nil: 64 | return 65 | case error: 66 | errMsg := rval.Error() 67 | if errMsg == "" { 68 | errMsg = "generic worker error (?)" 69 | } 70 | packet = raven.NewPacket(errMsg, raven.NewException(rval, raven.NewStacktrace(2, 3, nil))) 71 | case *logrus.Entry: 72 | entryErrInterface, ok := rval.Data["err"] 73 | if !ok { 74 | entryErrInterface = fmt.Errorf(rval.Message) 75 | } 76 | 77 | entryErr, ok := entryErrInterface.(error) 78 | if !ok { 79 | entryErr = fmt.Errorf(rval.Message) 80 | } 81 | 82 | packet = raven.NewPacket(rval.Message, raven.NewException(entryErr, raven.NewStacktrace(2, 3, nil))) 83 | default: 84 | rvalStr := fmt.Sprint(rval) 85 | if rvalStr == "" { 86 | rvalStr = "generic worker error (?)" 87 | } 88 | packet = raven.NewPacket(rvalStr, raven.NewException(fmt.Errorf(rvalStr), raven.NewStacktrace(2, 3, nil))) 89 | } 90 | 91 | pudding.SendRavenPacket(packet, r.cl, log, nil) 92 | panic(p) 93 | }() 94 | 95 | return fn() 96 | } 97 | 98 | // NewMiddlewareRaven builds a *MiddlewareRaven given a sentry DSN 99 | func NewMiddlewareRaven(sentryDSN string) (*MiddlewareRaven, error) { 100 | cl, err := raven.NewClient(sentryDSN, pudding.SentryTags) 101 | if err != nil { 102 | return nil, err 103 | } 104 | return &MiddlewareRaven{cl: cl}, nil 105 | } 106 | -------------------------------------------------------------------------------- /workers/mini_workers.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/Sirupsen/logrus" 7 | ) 8 | 9 | type miniWorkers struct { 10 | cfg *internalConfig 11 | log *logrus.Logger 12 | r *MiddlewareRaven 13 | w map[string]func() error 14 | } 15 | 16 | func newMiniWorkers(cfg *internalConfig, log *logrus.Logger, r *MiddlewareRaven) *miniWorkers { 17 | return &miniWorkers{ 18 | cfg: cfg, 19 | log: log, 20 | r: r, 21 | w: map[string]func() error{}, 22 | } 23 | } 24 | 25 | func (mw *miniWorkers) Register(name string, f func() error) { 26 | mw.w[name] = f 27 | } 28 | 29 | func (mw *miniWorkers) Run() { 30 | mw.log.Debug("entering mini worker run loop") 31 | for { 32 | mw.runTick() 33 | } 34 | } 35 | 36 | func (mw *miniWorkers) runTick() { 37 | defer func() { 38 | if err := recover(); err != nil { 39 | mw.log.WithField("err", err).Error("recovered from panic, sleeping anyway") 40 | time.Sleep(time.Duration(int32(mw.cfg.MiniWorkerInterval)) * time.Second) 41 | } 42 | }() 43 | 44 | for name, f := range mw.w { 45 | mw.log.WithField("job", name).Debug("running mini worker job") 46 | 47 | err := mw.r.Do(f) 48 | if err != nil { 49 | mw.log.WithFields(logrus.Fields{ 50 | "err": err, 51 | "job": name, 52 | }).Error("mini worker job failed") 53 | } 54 | } 55 | 56 | mw.log.WithField("seconds", mw.cfg.MiniWorkerInterval).Info("mini workers sleeping") 57 | time.Sleep(time.Duration(int32(mw.cfg.MiniWorkerInterval)) * time.Second) 58 | } 59 | -------------------------------------------------------------------------------- /workers/sns_messages.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | "strconv" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/session" 11 | "github.com/aws/aws-sdk-go/service/sns" 12 | "github.com/garyburd/redigo/redis" 13 | "github.com/jrallison/go-workers" 14 | "github.com/travis-ci/pudding" 15 | "github.com/travis-ci/pudding/db" 16 | ) 17 | 18 | var ( 19 | errMissingSNSMessage = fmt.Errorf("missing sns message") 20 | snsMessageHandlers = map[string]func(redis.Conn, *pudding.SNSMessage) error{ 21 | "SubscriptionConfirmation": handleSNSConfirmation, 22 | "Notification": handleSNSNotification, 23 | } 24 | ) 25 | 26 | func init() { 27 | defaultQueueFuncs["sns-messages"] = snsMessagesMain 28 | } 29 | 30 | func snsMessagesMain(cfg *internalConfig, msg *workers.Msg) { 31 | snsMessagePayloadJSON := []byte(msg.OriginalJson()) 32 | snsMessagePayload := &pudding.SNSMessagePayload{ 33 | Args: []*pudding.SNSMessage{}, 34 | } 35 | 36 | err := json.Unmarshal(snsMessagePayloadJSON, snsMessagePayload) 37 | if err != nil { 38 | log.WithField("err", err).Panic("failed to deserialize message") 39 | } 40 | 41 | snsMsg := snsMessagePayload.SNSMessage() 42 | if snsMsg == nil { 43 | log.WithField("err", errMissingSNSMessage).Panic("no sns message available") 44 | return 45 | } 46 | 47 | handlerFunc, ok := snsMessageHandlers[snsMsg.Type] 48 | if !ok { 49 | log.WithField("type", snsMsg.Type).Warn("no handler available for message type") 50 | return 51 | } 52 | 53 | err = handlerFunc(workers.Config.Pool.Get(), snsMsg) 54 | if err != nil { 55 | log.WithField("err", err).Panic("sns handler returned an error") 56 | } 57 | } 58 | 59 | // http://docs.aws.amazon.com/sns/latest/dg/SendMessageToHttp.html 60 | func handleSNSConfirmation(rc redis.Conn, msg *pudding.SNSMessage) error { 61 | if v, _ := strconv.ParseBool(os.Getenv("SNS_CONFIRMATION")); v { 62 | log.WithField("msg", msg).Info("handling subscription confirmation") 63 | 64 | svc := sns.New(session.New(), &aws.Config{Region: aws.String(os.Getenv("SNS_REGION"))}) 65 | 66 | params := &sns.ConfirmSubscriptionInput{ 67 | Token: aws.String(msg.Token), 68 | TopicArn: aws.String(msg.TopicARN), 69 | } 70 | resp, err := svc.ConfirmSubscription(params) 71 | if err != nil { 72 | return err 73 | } 74 | 75 | log.WithField("subscription", resp.String()).Info("confirmed subscription") 76 | 77 | return nil 78 | } 79 | 80 | log.WithField("msg", msg).Info("subscription confirmation not really being handled") 81 | 82 | return nil 83 | } 84 | 85 | func handleSNSNotification(rc redis.Conn, msg *pudding.SNSMessage) error { 86 | log.WithField("msg", msg).Debug("received an SNS notification") 87 | 88 | a, err := msg.AutoscalingLifecycleAction() 89 | if err != nil { 90 | log.WithField("err", err).Warn("unable to handle notification") 91 | return nil 92 | } 93 | 94 | if a.Event == "autoscaling:TEST_NOTIFICATION" { 95 | log.WithField("event", a.Event).Info("ignoring") 96 | return nil 97 | } 98 | 99 | switch a.LifecycleTransition { 100 | case "autoscaling:EC2_INSTANCE_LAUNCHING": 101 | log.WithField("action", a).Debug("storing instance launching lifecycle action") 102 | return db.StoreInstanceLifecycleAction(rc, a) 103 | case "autoscaling:EC2_INSTANCE_TERMINATING": 104 | log.WithField("action", a).Debug("setting expected_state to down") 105 | err = db.SetInstanceAttributes(rc, a.EC2InstanceID, map[string]string{"expected_state": "down"}) 106 | if err != nil { 107 | return err 108 | } 109 | log.WithField("action", a).Debug("storing instance terminating lifecycle action") 110 | return db.StoreInstanceLifecycleAction(rc, a) 111 | default: 112 | log.WithField("action", a).Warn("unable to handle unknown lifecycle transition") 113 | } 114 | 115 | return nil 116 | } 117 | -------------------------------------------------------------------------------- /workers/workers.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | 7 | "github.com/Sirupsen/logrus" 8 | "github.com/garyburd/redigo/redis" 9 | "github.com/jrallison/go-workers" 10 | ) 11 | 12 | var ( 13 | log = logrus.New() 14 | 15 | defaultQueueFuncs = map[string]func(*internalConfig, *workers.Msg){} 16 | ) 17 | 18 | func runWorkers(cfg *internalConfig, log *logrus.Logger) error { 19 | workers.Logger = log 20 | workers.Configure(optsFromConfig(cfg)) 21 | 22 | rm, err := NewMiddlewareRaven(cfg.SentryDSN) 23 | if err != nil { 24 | log.WithFields(logrus.Fields{ 25 | "sentry_dsn": cfg.SentryDSN, 26 | "err": err, 27 | }).Error("failed to build sentry middleware") 28 | return err 29 | } 30 | 31 | workers.Middleware.Prepend(rm) 32 | 33 | for _, queue := range cfg.Queues { 34 | registered, ok := cfg.QueueFuncs[queue] 35 | if !ok { 36 | log.WithField("queue", queue).Warn("no worker func available for queue") 37 | continue 38 | } 39 | 40 | workers.Process(queue, func(msg *workers.Msg) { 41 | registered(cfg, msg) 42 | }, cfg.QueueConcurrencies[queue]) 43 | } 44 | 45 | go setupMiniWorkers(cfg, workers.Config.Pool, log, rm).Run() 46 | 47 | log.Info("starting go-workers") 48 | workers.Run() 49 | return nil 50 | } 51 | 52 | func setupMiniWorkers(cfg *internalConfig, r *redis.Pool, log *logrus.Logger, rm *MiddlewareRaven) *miniWorkers { 53 | mw := newMiniWorkers(cfg, log, rm) 54 | mw.Register("ec2-sync", func() error { 55 | syncer, err := newEC2Syncer(cfg, r, log) 56 | if err != nil { 57 | log.WithField("err", err).Error("failed to build syncer") 58 | return err 59 | } 60 | 61 | return syncer.Sync() 62 | }) 63 | 64 | mw.Register("keepalive", func() error { 65 | _, err := http.Get(cfg.WebHost) 66 | if err != nil { 67 | log.WithField("err", err).Panic("failed to hit web host") 68 | } 69 | 70 | return nil 71 | }) 72 | 73 | return mw 74 | } 75 | 76 | func optsFromConfig(cfg *internalConfig) map[string]string { 77 | opts := map[string]string{ 78 | "server": cfg.RedisURL.Host, 79 | "database": strings.TrimLeft(cfg.RedisURL.Path, "/"), 80 | "pool": cfg.RedisPoolSize, 81 | "process": cfg.ProcessID, 82 | "namespace": "pudding", 83 | } 84 | 85 | if cfg.RedisURL.User != nil { 86 | if p, ok := cfg.RedisURL.User.Password(); ok { 87 | opts["password"] = p 88 | } 89 | } 90 | 91 | return opts 92 | } 93 | -------------------------------------------------------------------------------- /workers/workers_test.go: -------------------------------------------------------------------------------- 1 | package workers 2 | 3 | import "testing" 4 | 5 | func TestNothing(t *testing.T) { 6 | if 1 != 1 { 7 | t.Fail() 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /zomg_globals.go: -------------------------------------------------------------------------------- 1 | package pudding 2 | 3 | // TELL IT LIKE IT IS 4 | 5 | var ( 6 | // RedisNamespace is used throughout as the namespace for all 7 | // redis keys 8 | RedisNamespace = "pudding" 9 | ) 10 | --------------------------------------------------------------------------------