├── .gitignore ├── .travis.yml ├── LICENSE ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── checkpoints.go ├── checkpoints_test.go ├── clients.go ├── cmd └── noopkinsumer │ ├── README.md │ └── main.go ├── config.go ├── config_test.go ├── errors.go ├── go.mod ├── go.sum ├── kinsumer.go ├── kinsumer.goconvey ├── kinsumer_test.go ├── leader.go ├── logger.go ├── mocks ├── dynamo.go └── dynamo_test.go ├── noopstatreceiver.go ├── shard_consumer.go ├── statreceiver.go └── statsd └── statsd.go /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/go,osx 3 | 4 | ### Go ### 5 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 6 | *.o 7 | *.a 8 | *.so 9 | 10 | # Folders 11 | _obj 12 | _test 13 | 14 | # Architecture specific extensions/prefixes 15 | *.[568vq] 16 | [568vq].out 17 | 18 | *.cgo1.go 19 | *.cgo2.c 20 | _cgo_defun.c 21 | _cgo_gotypes.go 22 | _cgo_export.* 23 | 24 | _testmain.go 25 | 26 | *.exe 27 | *.test 28 | *.prof 29 | 30 | 31 | ### OSX ### 32 | .DS_Store 33 | .AppleDouble 34 | .LSOverride 35 | 36 | # Icon must end with two \r 37 | Icon 38 | 39 | 40 | # Thumbnails 41 | ._* 42 | 43 | # Files that might appear in the root of a volume 44 | .DocumentRevisions-V100 45 | .fseventsd 46 | .Spotlight-V100 47 | .TemporaryItems 48 | .Trashes 49 | .VolumeIcon.icns 50 | 51 | # Directories potentially created on remote AFP share 52 | .AppleDB 53 | .AppleDesktop 54 | Network Trash Folder 55 | Temporary Items 56 | .apdisk 57 | 58 | .arcconfig 59 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: go 3 | go: 4 | - "1.11.13" 5 | - "1.12" 6 | - "1.13" 7 | go_import_path: github.com/twitchscience/kinsumer 8 | install: 9 | - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s -- -b $GOPATH/bin v1.21.0 10 | 11 | script: 12 | - GO111MODULE=on go test -short ./... 13 | - GO111MODULE=on golangci-lint run 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Amazon Software License 3 | 4 | This Amazon Software License (“License”) governs your use, reproduction, and distribution of the accompanying software as specified below. 5 | 1. Definitions 6 | 7 | “Licensor” means any person or entity that distributes its Work. 8 | 9 | “Software” means the original work of authorship made available under this License. 10 | 11 | “Work” means the Software and any additions to or derivative works of the Software that are made available under this License. 12 | 13 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 14 | 15 | Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 16 | 2. License Grants 17 | 18 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 19 | 20 | 2.2 Patent Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free patent license to make, have made, use, sell, offer for sale, import, and otherwise transfer its Work, in whole or in part. The foregoing license applies only to the patent claims licensable by Licensor that would be infringed by Licensor’s Work (or portion thereof) individually and excluding any combinations with any other materials or technology. 21 | 3. Limitations 22 | 23 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 24 | 25 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 26 | 27 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use with the web services, computing platforms or applications provided by Amazon.com, Inc. or its affiliates, including Amazon Web Services, Inc. 28 | 29 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately. 30 | 31 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 32 | 33 | 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately. 34 | 4. Disclaimer of Warranty. 35 | 36 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. SOME STATES’ CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU. 37 | 5. Limitation of Liability. 38 | 39 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 40 | 41 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Amazon Software License 3 | 4 | This Amazon Software License (“License”) governs your use, reproduction, and distribution of the accompanying software as specified below. 5 | 1. Definitions 6 | 7 | “Licensor” means any person or entity that distributes its Work. 8 | 9 | “Software” means the original work of authorship made available under this License. 10 | 11 | “Work” means the Software and any additions to or derivative works of the Software that are made available under this License. 12 | 13 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 14 | 15 | Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 16 | 2. License Grants 17 | 18 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 19 | 20 | 2.2 Patent Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free patent license to make, have made, use, sell, offer for sale, import, and otherwise transfer its Work, in whole or in part. The foregoing license applies only to the patent claims licensable by Licensor that would be infringed by Licensor’s Work (or portion thereof) individually and excluding any combinations with any other materials or technology. 21 | 3. Limitations 22 | 23 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 24 | 25 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 26 | 27 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use with the web services, computing platforms or applications provided by Amazon.com, Inc. or its affiliates, including Amazon Web Services, Inc. 28 | 29 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately. 30 | 31 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 32 | 33 | 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately. 34 | 4. Disclaimer of Warranty. 35 | 36 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. SOME STATES’ CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU. 37 | 5. Limitation of Liability. 38 | 39 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 40 | 41 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Kinsumer 2 | Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kinsumer 2 | 3 | Native Go consumer for AWS Kinesis streams. 4 | 5 | [![Build Status](https://travis-ci.org/twitchscience/kinsumer.svg?branch=master)](https://travis-ci.org/TwitchScience/kinsumer) [![Go Report Card](https://goreportcard.com/badge/github.com/twitchscience/kinsumer)](https://goreportcard.com/report/github.com/twitchscience/kinsumer) 6 | 7 | ## Rationale 8 | There are several very good ways to consume Kinesis streams, primarily [The Amazon Kinesis Client Library](http://docs.aws.amazon.com/kinesis/latest/dev/developing-consumers-with-kcl.html), and it is recommended that be investigated as an option. 9 | 10 | Kinsumer is designed for a cluster of Go clients that want each client to consume from multiple shards. Kinsumer is designed to be at-least-once with a strong effort to be exactly-once. Kinsumer by design does not attempt to keep shards on a specific client and will shuffle them around as needed. 11 | 12 | ## Behavior 13 | Kinsumer is designed to suit a specific use case of kinesis consuming, specifically when you need to have multiple clients each handling multiple shards and you do not care which shard is being consumed by which client. 14 | 15 | Kinsumer will rebalance shards to each client whenever it detects the list of shards or list of clients has changed, and does not attempt to keep shards on the same client. 16 | 17 | If you are running multiple Kinsumer apps against a single stream, make sure to increase the throttleDelay to at least `50ms + (200ms * )`. Note that Kinesis does not support more than two readers per writer on a fully utilized stream, so make sure you have enough stream capacity. 18 | 19 | ## Example 20 | See `cmd/noopkinsumer` for a fully working example of a kinsumer client. 21 | 22 | ## Testing 23 | 24 | ### Testing with local test servers 25 | By default the tests look for a dynamodb server at `localhost:4567` and kinesis server at `localhost:4568` 26 | 27 | 28 | For example using [kinesalite](https://github.com/mhart/kinesalite) and [dynalite](https://github.com/mhart/dynalite) 29 | ``` 30 | kinesalite --port 4568 --createStreamMs 1 --deleteStreamMs 1 --updateStreamMs 1 --shardLimit 1000 & 31 | dynalite --port 4567 --createTableMs 1 --deleteTableMs 1 --updateTableMs 1 & 32 | ``` 33 | Then `go test ./...` 34 | 35 | ### Testing with real aws resources 36 | It's possible to run the test against real AWS resources, but the tests create and destroy resources, which can be finicky, and potentially expensive. 37 | 38 | Make sure you have your credentials setup in a way that [aws-sdk-go](https://github.com/aws/aws-sdk-go) is happy with, or be running on an EC2 instance. 39 | 40 | Then `go test . -dynamo_endpoint= -kinesis_endpoint= -resource_change_timeout=30s` 41 | -------------------------------------------------------------------------------- /checkpoints.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "fmt" 7 | "sync" 8 | "time" 9 | 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/aws/awserr" 12 | "github.com/aws/aws-sdk-go/service/dynamodb" 13 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 14 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 15 | ) 16 | 17 | // Note: Not thread safe! 18 | 19 | type checkpointer struct { 20 | shardID string 21 | tableName string 22 | dynamodb dynamodbiface.DynamoDBAPI 23 | sequenceNumber string 24 | ownerName string 25 | ownerID string 26 | maxAgeForClientRecord time.Duration 27 | stats StatReceiver 28 | captured bool 29 | dirty bool 30 | mutex sync.Mutex 31 | finished bool 32 | finalSequenceNumber string 33 | } 34 | 35 | type checkpointRecord struct { 36 | Shard string 37 | SequenceNumber *string // last read sequence number, null if the shard has never been consumed 38 | LastUpdate int64 // timestamp of last commit/ownership change 39 | OwnerName *string // uuid of owning client, null if the shard is unowned 40 | Finished *int64 // timestamp of when the shard was fully consumed, null if it's active 41 | 42 | // Columns added to the table that are never used for decision making in the 43 | // library, rather they are useful for manual troubleshooting 44 | OwnerID *string 45 | LastUpdateRFC string 46 | FinishedRFC *string 47 | } 48 | 49 | // capture is a non-blocking call that attempts to capture the given shard/checkpoint. 50 | // It returns a checkpointer on success, or nil if it fails to capture the checkpoint 51 | func capture( 52 | shardID string, 53 | tableName string, 54 | dynamodbiface dynamodbiface.DynamoDBAPI, 55 | ownerName string, 56 | ownerID string, 57 | maxAgeForClientRecord time.Duration, 58 | stats StatReceiver) (*checkpointer, error) { 59 | 60 | cutoff := time.Now().Add(-maxAgeForClientRecord).UnixNano() 61 | 62 | // Grab the entry from dynamo assuming there is one 63 | resp, err := dynamodbiface.GetItem(&dynamodb.GetItemInput{ 64 | TableName: aws.String(tableName), 65 | ConsistentRead: aws.Bool(true), 66 | Key: map[string]*dynamodb.AttributeValue{ 67 | "Shard": {S: aws.String(shardID)}, 68 | }, 69 | }) 70 | 71 | if err != nil { 72 | return nil, fmt.Errorf("error calling GetItem on shard checkpoint: %v", err) 73 | } 74 | 75 | // Convert to struct so we can work with the values 76 | var record checkpointRecord 77 | if err = dynamodbattribute.UnmarshalMap(resp.Item, &record); err != nil { 78 | return nil, err 79 | } 80 | 81 | // If the record is marked as owned by someone else, and has not expired 82 | if record.OwnerID != nil && record.LastUpdate > cutoff { 83 | // We fail to capture it 84 | return nil, nil 85 | } 86 | 87 | // Make sure the Shard is set in case there was no record 88 | record.Shard = shardID 89 | 90 | // Mark us as the owners 91 | record.OwnerID = &ownerID 92 | record.OwnerName = &ownerName 93 | 94 | // Update timestamp 95 | now := time.Now() 96 | record.LastUpdate = now.UnixNano() 97 | record.LastUpdateRFC = now.UTC().Format(time.RFC1123Z) 98 | 99 | item, err := dynamodbattribute.MarshalMap(record) 100 | if err != nil { 101 | return nil, err 102 | } 103 | 104 | attrVals, err := dynamodbattribute.MarshalMap(map[string]interface{}{ 105 | ":cutoff": aws.Int64(cutoff), 106 | ":nullType": aws.String("NULL"), 107 | }) 108 | if err != nil { 109 | return nil, err 110 | } 111 | if _, err = dynamodbiface.PutItem(&dynamodb.PutItemInput{ 112 | TableName: aws.String(tableName), 113 | Item: item, 114 | // The OwnerID doesn't exist if the entry doesn't exist, but PutItem with a marshaled 115 | // checkpointRecord sets a nil OwnerID to the NULL type. 116 | ConditionExpression: aws.String( 117 | "attribute_not_exists(OwnerID) OR attribute_type(OwnerID, :nullType) OR LastUpdate <= :cutoff"), 118 | ExpressionAttributeValues: attrVals, 119 | }); err != nil { 120 | if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ConditionalCheckFailedException" { 121 | // We failed to capture it 122 | return nil, nil 123 | } 124 | return nil, err 125 | } 126 | 127 | checkpointer := &checkpointer{ 128 | shardID: shardID, 129 | tableName: tableName, 130 | dynamodb: dynamodbiface, 131 | ownerName: ownerName, 132 | ownerID: ownerID, 133 | stats: stats, 134 | sequenceNumber: aws.StringValue(record.SequenceNumber), 135 | maxAgeForClientRecord: maxAgeForClientRecord, 136 | captured: true, 137 | } 138 | 139 | return checkpointer, nil 140 | } 141 | 142 | // commit writes the latest SequenceNumber consumed to dynamo and updates LastUpdate. 143 | // Returns true if we set Finished in dynamo because the library user finished consuming the shard. 144 | // Once that has happened, the checkpointer should be released and never grabbed again. 145 | func (cp *checkpointer) commit() (bool, error) { 146 | cp.mutex.Lock() 147 | defer cp.mutex.Unlock() 148 | if !cp.dirty && !cp.finished { 149 | return false, nil 150 | } 151 | now := time.Now() 152 | 153 | sn := &cp.sequenceNumber 154 | if cp.sequenceNumber == "" { 155 | // We are not allowed to pass empty strings to dynamo, so instead pass a nil *string 156 | // to 'unset' it 157 | sn = nil 158 | } 159 | 160 | record := checkpointRecord{ 161 | Shard: cp.shardID, 162 | SequenceNumber: sn, 163 | LastUpdate: now.UnixNano(), 164 | LastUpdateRFC: now.UTC().Format(time.RFC1123Z), 165 | } 166 | finished := false 167 | if cp.finished && (cp.sequenceNumber == cp.finalSequenceNumber || cp.finalSequenceNumber == "") { 168 | record.Finished = aws.Int64(now.UnixNano()) 169 | record.FinishedRFC = aws.String(now.UTC().Format(time.RFC1123Z)) 170 | finished = true 171 | } 172 | record.OwnerID = &cp.ownerID 173 | record.OwnerName = &cp.ownerName 174 | 175 | item, err := dynamodbattribute.MarshalMap(&record) 176 | if err != nil { 177 | return false, err 178 | } 179 | 180 | attrVals, err := dynamodbattribute.MarshalMap(map[string]interface{}{ 181 | ":ownerID": aws.String(cp.ownerID), 182 | }) 183 | if err != nil { 184 | return false, err 185 | } 186 | if _, err = cp.dynamodb.PutItem(&dynamodb.PutItemInput{ 187 | TableName: aws.String(cp.tableName), 188 | Item: item, 189 | ConditionExpression: aws.String("OwnerID = :ownerID"), 190 | ExpressionAttributeValues: attrVals, 191 | }); err != nil { 192 | return false, fmt.Errorf("error committing checkpoint: %s", err) 193 | } 194 | 195 | if sn != nil { 196 | cp.stats.Checkpoint() 197 | } 198 | cp.dirty = false 199 | return finished, nil 200 | } 201 | 202 | // release releases our ownership of the checkpoint in dynamo so another client can take it 203 | func (cp *checkpointer) release() error { 204 | now := time.Now() 205 | 206 | attrVals, err := dynamodbattribute.MarshalMap(map[string]interface{}{ 207 | ":ownerID": aws.String(cp.ownerID), 208 | ":sequenceNumber": aws.String(cp.sequenceNumber), 209 | ":lastUpdate": aws.Int64(now.UnixNano()), 210 | ":lastUpdateRFC": aws.String(now.UTC().Format(time.RFC1123Z)), 211 | }) 212 | if err != nil { 213 | return err 214 | } 215 | if _, err = cp.dynamodb.UpdateItem(&dynamodb.UpdateItemInput{ 216 | TableName: aws.String(cp.tableName), 217 | Key: map[string]*dynamodb.AttributeValue{ 218 | "Shard": {S: aws.String(cp.shardID)}, 219 | }, 220 | UpdateExpression: aws.String("REMOVE OwnerID, OwnerName " + 221 | "SET LastUpdate = :lastUpdate, LastUpdateRFC = :lastUpdateRFC, " + 222 | "SequenceNumber = :sequenceNumber"), 223 | ConditionExpression: aws.String("OwnerID = :ownerID"), 224 | ExpressionAttributeValues: attrVals, 225 | }); err != nil { 226 | return fmt.Errorf("error releasing checkpoint: %s", err) 227 | } 228 | 229 | if cp.sequenceNumber != "" { 230 | cp.stats.Checkpoint() 231 | } 232 | 233 | cp.captured = false 234 | 235 | return nil 236 | } 237 | 238 | // update updates the current sequenceNumber of the checkpoint, marking it dirty if necessary 239 | func (cp *checkpointer) update(sequenceNumber string) { 240 | cp.mutex.Lock() 241 | defer cp.mutex.Unlock() 242 | cp.dirty = cp.dirty || cp.sequenceNumber != sequenceNumber 243 | cp.sequenceNumber = sequenceNumber 244 | } 245 | 246 | // finish marks the given sequence number as the final one for the shard. 247 | // sequenceNumber is the empty string if we never read anything from the shard. 248 | func (cp *checkpointer) finish(sequenceNumber string) { 249 | cp.mutex.Lock() 250 | defer cp.mutex.Unlock() 251 | cp.finalSequenceNumber = sequenceNumber 252 | cp.finished = true 253 | } 254 | 255 | // loadCheckpoints returns checkpoint records from dynamo mapped by shard id. 256 | func loadCheckpoints(db dynamodbiface.DynamoDBAPI, tableName string) (map[string]*checkpointRecord, error) { 257 | params := &dynamodb.ScanInput{ 258 | TableName: aws.String(tableName), 259 | ConsistentRead: aws.Bool(true), 260 | } 261 | 262 | var records []*checkpointRecord 263 | var innerError error 264 | err := db.ScanPages(params, func(p *dynamodb.ScanOutput, lastPage bool) (shouldContinue bool) { 265 | for _, item := range p.Items { 266 | var record checkpointRecord 267 | innerError = dynamodbattribute.UnmarshalMap(item, &record) 268 | if innerError != nil { 269 | return false 270 | } 271 | records = append(records, &record) 272 | } 273 | 274 | return !lastPage 275 | }) 276 | 277 | if innerError != nil { 278 | return nil, innerError 279 | } 280 | 281 | if err != nil { 282 | return nil, err 283 | } 284 | 285 | checkpointMap := make(map[string]*checkpointRecord, len(records)) 286 | for _, checkpoint := range records { 287 | checkpointMap[checkpoint.Shard] = checkpoint 288 | } 289 | return checkpointMap, nil 290 | } 291 | -------------------------------------------------------------------------------- /checkpoints_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive. 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/twitchscience/kinsumer/mocks" 10 | ) 11 | 12 | func TestCheckpointer(t *testing.T) { 13 | table := "checkpoints" 14 | mock := mocks.NewMockDynamo([]string{table}) 15 | stats := &NoopStatReceiver{} 16 | 17 | cp, err := capture("shard", table, mock, "ownerName", "ownerId", 3*time.Minute, stats) 18 | 19 | // Initially, we expect that there is no record, so our new record should have no sequence number 20 | if err != nil { 21 | t.Errorf("current 1 err=%q", err) 22 | } 23 | if cp == nil { 24 | t.Errorf("Should always be able to capture the shard if there is no entry in dynamo") 25 | } 26 | if cp.sequenceNumber != "" { 27 | t.Errorf("sequence number should initially be an empty string") 28 | } 29 | 30 | // Update the sequence number. This shouldn't cause any external request. 31 | mocks.AssertNoRequestsMade(t, mock.(*mocks.MockDynamo), "update(seq1)", func() { 32 | cp.update("seq1") 33 | }) 34 | 35 | // Now actually commit. 36 | mocks.AssertRequestMade(t, mock.(*mocks.MockDynamo), "commit(seq1)", func() { 37 | if _, err = cp.commit(); err != nil { 38 | t.Errorf("commit seq1 err=%q", err) 39 | } 40 | }) 41 | 42 | // Call update, but keep the same sequence number 43 | cp.update("seq1") 44 | 45 | // Since the sequence number hasn't changed, committing shouldn't make a request. 46 | mocks.AssertNoRequestsMade(t, mock.(*mocks.MockDynamo), "commit unchanged sequence number", func() { 47 | if _, err = cp.commit(); err != nil { 48 | t.Errorf("commit unchanged err=%q", err) 49 | } 50 | }) 51 | 52 | // Call update again with a new value 53 | cp.update("seq2") 54 | 55 | // committing should trigger a request 56 | mocks.AssertRequestMade(t, mock.(*mocks.MockDynamo), "commit(seq2)", func() { 57 | if _, err = cp.commit(); err != nil { 58 | t.Errorf("commit seq2 err=%q", err) 59 | } 60 | }) 61 | 62 | // Call update with a new value twice in a row 63 | cp.update("seq3") 64 | cp.update("seq3") 65 | 66 | // This should still trigger an update 67 | mocks.AssertRequestMade(t, mock.(*mocks.MockDynamo), "commit(seq3)", func() { 68 | if _, err = cp.commit(); err != nil { 69 | t.Errorf("commit seq3 err=%q", err) 70 | } 71 | }) 72 | 73 | // Try to get another checkpointer for this shard, should not succeed but not error 74 | cp2, err := capture("shard", table, mock, "differentOwner", "differentOwnerId", 3*time.Minute, stats) 75 | if err != nil { 76 | t.Errorf("cp2 first attempt err=%q", err) 77 | } 78 | if cp2 != nil { 79 | t.Errorf("Should not be able to steal shard") 80 | } 81 | 82 | cp.update("lastseq") 83 | 84 | // release should trigger an update 85 | mocks.AssertRequestMade(t, mock.(*mocks.MockDynamo), "cp.release()", func() { 86 | if err = cp.release(); err != nil { 87 | t.Errorf("release err=%q", err) 88 | } 89 | }) 90 | 91 | //TODO: Test fails because dynamo mock does not handle replacing records in put, need to resolve that 92 | /* 93 | // Now that we have released the shard, we should be able to grab it 94 | cp2, err = newCheckpointer(aws.String("shard"), table, mock, "differentOwner", "differentOwnerId", 3*time.Minute) 95 | if err != nil { 96 | t.Errorf("cp2 second attempt err=%q", err) 97 | } 98 | if cp2 == nil { 99 | t.Errorf("The shard should be ours!") 100 | } 101 | 102 | if cp2.sequenceNumber != "lastseq" { 103 | t.Errorf("Release should have committed `lastseq` but new checkpointer got %s!", cp2.sequenceNumber) 104 | } 105 | */ 106 | } 107 | -------------------------------------------------------------------------------- /clients.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | //TODO: The filename is bad 6 | 7 | import ( 8 | "sort" 9 | "strconv" 10 | "time" 11 | 12 | "github.com/aws/aws-sdk-go/aws" 13 | "github.com/aws/aws-sdk-go/service/dynamodb" 14 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 15 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 16 | ) 17 | 18 | const clientReapAge = 48 * time.Hour 19 | 20 | type clientRecord struct { 21 | ID string 22 | LastUpdate int64 23 | 24 | // Columns added to the table that are never used for decision making in the 25 | // library, rather they are useful for manual troubleshooting 26 | Name string 27 | LastUpdateRFC string 28 | } 29 | 30 | type sortableClients []clientRecord 31 | 32 | func (sc sortableClients) Len() int { 33 | return len(sc) 34 | } 35 | 36 | func (sc sortableClients) Less(left, right int) bool { 37 | return sc[left].ID < sc[right].ID 38 | } 39 | 40 | func (sc sortableClients) Swap(left, right int) { 41 | sc[left], sc[right] = sc[right], sc[left] 42 | } 43 | 44 | // registerWithClientsTable adds or updates our client with a current LastUpdate in dynamo 45 | func registerWithClientsTable(db dynamodbiface.DynamoDBAPI, id, name, tableName string) error { 46 | now := time.Now() 47 | item, err := dynamodbattribute.MarshalMap(clientRecord{ 48 | ID: id, 49 | Name: name, 50 | LastUpdate: now.UnixNano(), 51 | LastUpdateRFC: now.UTC().Format(time.RFC1123Z), 52 | }) 53 | 54 | if err != nil { 55 | return err 56 | } 57 | 58 | if _, err = db.PutItem(&dynamodb.PutItemInput{ 59 | TableName: aws.String(tableName), 60 | Item: item, 61 | }); err != nil { 62 | return err 63 | } 64 | 65 | return nil 66 | } 67 | 68 | // deregisterWithClientsTable deletes our client from dynamo 69 | func deregisterFromClientsTable(db dynamodbiface.DynamoDBAPI, id, tableName string) error { 70 | idStruct := struct{ ID string }{ID: id} 71 | item, err := dynamodbattribute.MarshalMap(idStruct) 72 | 73 | if err != nil { 74 | return err 75 | } 76 | 77 | if _, err = db.DeleteItem(&dynamodb.DeleteItemInput{ 78 | TableName: aws.String(tableName), 79 | Key: item, 80 | }); err != nil { 81 | return err 82 | } 83 | 84 | return nil 85 | } 86 | 87 | // getClients returns a sorted list of all recently-updated clients in dynamo 88 | func getClients(db dynamodbiface.DynamoDBAPI, name string, tableName string, maxAgeForClientRecord time.Duration) (clients []clientRecord, err error) { 89 | filterExpression := "LastUpdate > :cutoff" 90 | cutoff := strconv.FormatInt(time.Now().Add(-maxAgeForClientRecord).UnixNano(), 10) 91 | 92 | params := &dynamodb.ScanInput{ 93 | TableName: aws.String(tableName), 94 | ConsistentRead: aws.Bool(true), 95 | FilterExpression: aws.String(filterExpression), 96 | ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{ 97 | ":cutoff": {N: &cutoff}, 98 | }, 99 | } 100 | 101 | var innerError error 102 | err = db.ScanPages(params, func(p *dynamodb.ScanOutput, lastPage bool) (shouldContinue bool) { 103 | for _, item := range p.Items { 104 | var record clientRecord 105 | innerError = dynamodbattribute.UnmarshalMap(item, &record) 106 | if innerError != nil { 107 | return false 108 | } 109 | clients = append(clients, record) 110 | } 111 | 112 | return !lastPage 113 | }) 114 | 115 | if innerError != nil { 116 | return nil, innerError 117 | } 118 | 119 | if err != nil { 120 | return nil, err 121 | } 122 | 123 | sort.Sort(sortableClients(clients)) 124 | return clients, nil 125 | } 126 | 127 | // reapClients deletes any sufficiently old clients from dynamo 128 | func reapClients(db dynamodbiface.DynamoDBAPI, tableName string) error { 129 | filterExpression := "LastUpdate < :cutoff" 130 | cutoff := strconv.FormatInt(time.Now().Add(-clientReapAge).UnixNano(), 10) 131 | 132 | params := &dynamodb.ScanInput{ 133 | TableName: aws.String(tableName), 134 | ConsistentRead: aws.Bool(true), 135 | FilterExpression: aws.String(filterExpression), 136 | ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{ 137 | ":cutoff": {N: &cutoff}, 138 | }, 139 | } 140 | 141 | var clients []clientRecord 142 | var innerError error 143 | err := db.ScanPages(params, func(p *dynamodb.ScanOutput, lastPage bool) (shouldContinue bool) { 144 | for _, item := range p.Items { 145 | var record clientRecord 146 | innerError = dynamodbattribute.UnmarshalMap(item, &record) 147 | if innerError != nil { 148 | return false 149 | } 150 | clients = append(clients, record) 151 | } 152 | 153 | return !lastPage 154 | }) 155 | 156 | if innerError != nil { 157 | return innerError 158 | } 159 | 160 | if err != nil { 161 | return err 162 | } 163 | 164 | for _, client := range clients { 165 | idStruct := struct{ ID string }{ID: client.ID} 166 | item, err := dynamodbattribute.MarshalMap(idStruct) 167 | if err != nil { 168 | return err 169 | } 170 | if _, err = db.DeleteItem(&dynamodb.DeleteItemInput{ 171 | TableName: aws.String(tableName), 172 | Key: item, 173 | ConditionExpression: aws.String(filterExpression), 174 | ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{ 175 | ":cutoff": {N: &cutoff}, 176 | }, 177 | }); err != nil { 178 | return err 179 | } 180 | } 181 | return nil 182 | } 183 | -------------------------------------------------------------------------------- /cmd/noopkinsumer/README.md: -------------------------------------------------------------------------------- 1 | # noopkinsumer 2 | 3 | noopkinsumer is a bare-bones kinesis consumer using the kinsumer library. It consumes all the events on a 4 | stream and does nothing with their data. 5 | 6 | In addition to being a minimal example, noopkinsumer is useful for its side effect of writing stats to statsd. 7 | 8 | ## Resources 9 | 10 | To use noopkinsumer you need a kinesis stream with data on it or being written to it, and three dynamo tables with 11 | the following HASH keys 12 | 13 | |TableName|DistKey| 14 | |---------|-------| 15 | |noopkinsumer_clients|ID (String)| 16 | |noopkinsumer_checkpoints|Shard (String)| 17 | |noopkinsumer_metadata|Key (String)| 18 | 19 | The dynamo tables can be created programmatically by running the example with flag `createTables` set to true 20 | 21 | -------------------------------------------------------------------------------- /cmd/noopkinsumer/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "os/signal" 11 | "sync" 12 | "syscall" 13 | "time" 14 | 15 | "github.com/aws/aws-sdk-go/aws" 16 | "github.com/aws/aws-sdk-go/aws/session" 17 | "github.com/google/uuid" 18 | "github.com/twitchscience/kinsumer" 19 | "github.com/twitchscience/kinsumer/statsd" 20 | ) 21 | 22 | var ( 23 | statsdHostPort string 24 | statsdPrefix string 25 | kinesisStreamName string 26 | createDynamoTables bool 27 | ) 28 | 29 | func init() { 30 | flag.StringVar(&statsdHostPort, "statsdHostPort", "", "host:port of statsd server") 31 | flag.StringVar(&statsdPrefix, "statsdPrefix", "", "statsd prefix") 32 | flag.StringVar(&kinesisStreamName, "stream", "", "name of kinesis stream") 33 | flag.BoolVar(&createDynamoTables, "createTables", false, "create dynamo db tables") 34 | } 35 | 36 | var ( 37 | records chan []byte 38 | wg sync.WaitGroup 39 | k *kinsumer.Kinsumer 40 | ) 41 | 42 | func initKinsumer() { 43 | var ( 44 | stats kinsumer.StatReceiver = &kinsumer.NoopStatReceiver{} 45 | err error 46 | ) 47 | 48 | if len(kinesisStreamName) == 0 { 49 | log.Fatalln("stream name commandline parameter is required") 50 | } 51 | 52 | if len(statsdHostPort) > 0 && len(statsdPrefix) > 0 { 53 | stats, err = statsd.New(statsdHostPort, statsdPrefix) 54 | if err != nil { 55 | log.Fatalf("Error creating statsd object: %v", err) 56 | } 57 | } 58 | 59 | config := kinsumer.NewConfig().WithStats(stats) 60 | session := session.Must(session.NewSession(aws.NewConfig())) 61 | 62 | // kinsumer needs a way to differentiate between running clients, generally you want to use information 63 | // about the machine it is running on like ip. For this example we'll use a uuid 64 | name := uuid.New().String() 65 | 66 | k, err = kinsumer.NewWithSession(session, kinesisStreamName, "noopkinsumer", name, config) 67 | if err != nil { 68 | log.Fatalf("Error creating kinsumer: %v", err) 69 | } 70 | 71 | if createDynamoTables { 72 | err = k.CreateRequiredTables() 73 | if err != nil { 74 | log.Fatalf("Error creating kinsumer dynamo db tables: %v", err) 75 | } 76 | } 77 | } 78 | 79 | func runKinsumer() { 80 | err := k.Run() 81 | if err != nil { 82 | log.Fatalf("kinsumer.Kinsumer.Run() returned error %v", err) 83 | } 84 | } 85 | 86 | func consumeRecords() { 87 | wg.Add(1) 88 | go func() { 89 | defer wg.Done() 90 | for { 91 | record, err := k.Next() 92 | if err != nil { 93 | log.Fatalf("k.Next returned error %v", err) 94 | } 95 | if record != nil { 96 | records <- record 97 | } else { 98 | return 99 | } 100 | } 101 | }() 102 | } 103 | 104 | func runLoop() { 105 | var totalConsumed int64 106 | 107 | t := time.NewTicker(3 * time.Second) 108 | defer t.Stop() 109 | 110 | sigc := make(chan os.Signal, 1) 111 | signal.Notify(sigc, syscall.SIGINT) 112 | 113 | defer func() { 114 | log.Println("Total records consumed", totalConsumed) 115 | }() 116 | 117 | for { 118 | select { 119 | case <-sigc: 120 | return 121 | case <-records: 122 | totalConsumed++ 123 | case <-t.C: 124 | fmt.Printf("Consumed %v\r", totalConsumed) 125 | } 126 | } 127 | } 128 | 129 | func main() { 130 | flag.Parse() 131 | 132 | records = make(chan []byte) 133 | initKinsumer() 134 | runKinsumer() 135 | consumeRecords() 136 | 137 | runLoop() 138 | 139 | k.Stop() 140 | wg.Wait() 141 | } 142 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "time" 7 | ) 8 | 9 | //TODO: Update documentation to include the defaults 10 | //TODO: Update the 'with' methods' comments to be less ridiculous 11 | 12 | // Config holds all configuration values for a single Kinsumer instance 13 | type Config struct { 14 | stats StatReceiver 15 | logger Logger 16 | 17 | // ---------- [ Per Shard Worker ] ---------- 18 | // Time to sleep if no records are found 19 | throttleDelay time.Duration 20 | 21 | // Delay between commits to the checkpoint database 22 | commitFrequency time.Duration 23 | 24 | // Delay between tests for the client or shard numbers changing 25 | shardCheckFrequency time.Duration 26 | // ---------- [ For the leader (first client alphabetically) ] ---------- 27 | // Time between leader actions 28 | leaderActionFrequency time.Duration 29 | 30 | // ---------- [ For the entire Kinsumer ] ---------- 31 | // Size of the buffer for the combined records channel. When the channel fills up 32 | // the workers will stop adding new elements to the queue, so a slow client will 33 | // potentially fall behind the kinesis stream. 34 | bufferSize int 35 | 36 | // ---------- [ For the Dynamo DB tables ] ---------- 37 | // Read and write capacity for the Dynamo DB tables when created 38 | // with CreateRequiredTables() call. If tables already exist because they were 39 | // created on a prevoius run or created manually, these parameters will not be used. 40 | dynamoReadCapacity int64 41 | dynamoWriteCapacity int64 42 | // Time to wait between attempts to verify tables were created/deleted completely 43 | dynamoWaiterDelay time.Duration 44 | 45 | // use ListShards to avoid LimitExceedException from DescribeStream 46 | useListShardsForKinesisStreamReady bool 47 | } 48 | 49 | // NewConfig returns a default Config struct 50 | func NewConfig() Config { 51 | return Config{ 52 | throttleDelay: 250 * time.Millisecond, 53 | commitFrequency: 1000 * time.Millisecond, 54 | shardCheckFrequency: 1 * time.Minute, 55 | leaderActionFrequency: 1 * time.Minute, 56 | bufferSize: 100, 57 | stats: &NoopStatReceiver{}, 58 | dynamoReadCapacity: 10, 59 | dynamoWriteCapacity: 10, 60 | dynamoWaiterDelay: 3 * time.Second, 61 | logger: &DefaultLogger{}, 62 | } 63 | } 64 | 65 | // WithThrottleDelay returns a Config with a modified throttle delay 66 | func (c Config) WithThrottleDelay(delay time.Duration) Config { 67 | c.throttleDelay = delay 68 | return c 69 | } 70 | 71 | // WithCommitFrequency returns a Config with a modified commit frequency 72 | func (c Config) WithCommitFrequency(commitFrequency time.Duration) Config { 73 | c.commitFrequency = commitFrequency 74 | return c 75 | } 76 | 77 | // WithShardCheckFrequency returns a Config with a modified shard check frequency 78 | func (c Config) WithShardCheckFrequency(shardCheckFrequency time.Duration) Config { 79 | c.shardCheckFrequency = shardCheckFrequency 80 | return c 81 | } 82 | 83 | // WithLeaderActionFrequency returns a Config with a modified leader action frequency 84 | func (c Config) WithLeaderActionFrequency(leaderActionFrequency time.Duration) Config { 85 | c.leaderActionFrequency = leaderActionFrequency 86 | return c 87 | } 88 | 89 | // WithBufferSize returns a Config with a modified buffer size 90 | func (c Config) WithBufferSize(bufferSize int) Config { 91 | c.bufferSize = bufferSize 92 | return c 93 | } 94 | 95 | // WithStats returns a Config with a modified stats 96 | func (c Config) WithStats(stats StatReceiver) Config { 97 | c.stats = stats 98 | return c 99 | } 100 | 101 | // WithDynamoReadCapacity returns a Config with a modified dynamo read capacity 102 | func (c Config) WithDynamoReadCapacity(readCapacity int64) Config { 103 | c.dynamoReadCapacity = readCapacity 104 | return c 105 | } 106 | 107 | // WithDynamoWriteCapacity returns a Config with a modified dynamo write capacity 108 | func (c Config) WithDynamoWriteCapacity(writeCapacity int64) Config { 109 | c.dynamoWriteCapacity = writeCapacity 110 | return c 111 | } 112 | 113 | // WithDynamoWaiterDelay returns a Config with a modified dynamo waiter delay 114 | func (c Config) WithDynamoWaiterDelay(delay time.Duration) Config { 115 | c.dynamoWaiterDelay = delay 116 | return c 117 | } 118 | 119 | // WithLogger returns a Config with a modified logger 120 | func (c Config) WithLogger(logger Logger) Config { 121 | c.logger = logger 122 | return c 123 | } 124 | 125 | // WithUseListShardsForKinesisStreamReady returns a config with a modified useListShardsForKinesisStreamReady toggle 126 | func (c Config) WithUseListShardsForKinesisStreamReady(shouldUse bool) Config { 127 | c.useListShardsForKinesisStreamReady = shouldUse 128 | return c 129 | } 130 | 131 | // Verify that a config struct has sane and valid values 132 | func validateConfig(c *Config) error { 133 | if c.throttleDelay < 200*time.Millisecond { 134 | return ErrConfigInvalidThrottleDelay 135 | } 136 | 137 | if c.commitFrequency == 0 { 138 | return ErrConfigInvalidCommitFrequency 139 | } 140 | 141 | if c.shardCheckFrequency == 0 { 142 | return ErrConfigInvalidShardCheckFrequency 143 | } 144 | 145 | if c.leaderActionFrequency == 0 { 146 | return ErrConfigInvalidLeaderActionFrequency 147 | } 148 | 149 | if c.shardCheckFrequency > c.leaderActionFrequency { 150 | return ErrConfigInvalidLeaderActionFrequency 151 | } 152 | 153 | if c.bufferSize == 0 { 154 | return ErrConfigInvalidBufferSize 155 | } 156 | 157 | if c.stats == nil { 158 | return ErrConfigInvalidStats 159 | } 160 | 161 | if c.dynamoReadCapacity == 0 || c.dynamoWriteCapacity == 0 { 162 | return ErrConfigInvalidDynamoCapacity 163 | } 164 | 165 | if c.logger == nil { 166 | return ErrConfigInvalidLogger 167 | } 168 | 169 | return nil 170 | } 171 | -------------------------------------------------------------------------------- /config_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestConfigDefault(t *testing.T) { 13 | config := NewConfig() 14 | err := validateConfig(&config) 15 | require.NoError(t, err) 16 | } 17 | 18 | func TestConfigErrors(t *testing.T) { 19 | var ( 20 | config Config 21 | err error 22 | ) 23 | config = NewConfig().WithBufferSize(0) 24 | err = validateConfig(&config) 25 | require.EqualError(t, err, ErrConfigInvalidBufferSize.Error()) 26 | 27 | config = NewConfig().WithThrottleDelay(0) 28 | err = validateConfig(&config) 29 | require.EqualError(t, err, ErrConfigInvalidThrottleDelay.Error()) 30 | 31 | config = NewConfig().WithCommitFrequency(0) 32 | err = validateConfig(&config) 33 | require.EqualError(t, err, ErrConfigInvalidCommitFrequency.Error()) 34 | 35 | config = NewConfig().WithShardCheckFrequency(0) 36 | err = validateConfig(&config) 37 | require.EqualError(t, err, ErrConfigInvalidShardCheckFrequency.Error()) 38 | 39 | config = NewConfig().WithLeaderActionFrequency(0) 40 | err = validateConfig(&config) 41 | require.EqualError(t, err, ErrConfigInvalidLeaderActionFrequency.Error()) 42 | 43 | config = NewConfig().WithLeaderActionFrequency(time.Second).WithShardCheckFrequency(time.Minute) 44 | err = validateConfig(&config) 45 | require.EqualError(t, err, ErrConfigInvalidLeaderActionFrequency.Error()) 46 | 47 | config = NewConfig().WithBufferSize(0) 48 | err = validateConfig(&config) 49 | require.EqualError(t, err, ErrConfigInvalidBufferSize.Error()) 50 | 51 | config = NewConfig().WithStats(nil) 52 | err = validateConfig(&config) 53 | require.EqualError(t, err, ErrConfigInvalidStats.Error()) 54 | } 55 | 56 | func TestConfigWithMethods(t *testing.T) { 57 | stats := &NoopStatReceiver{} 58 | config := NewConfig(). 59 | WithBufferSize(1). 60 | WithCommitFrequency(1 * time.Second). 61 | WithShardCheckFrequency(1 * time.Second). 62 | WithLeaderActionFrequency(1 * time.Second). 63 | WithThrottleDelay(1 * time.Second). 64 | WithStats(stats). 65 | WithUseListShardsForKinesisStreamReady(true) 66 | 67 | err := validateConfig(&config) 68 | require.NoError(t, err) 69 | 70 | require.Equal(t, 1, config.bufferSize) 71 | require.Equal(t, 1*time.Second, config.throttleDelay) 72 | require.Equal(t, 1*time.Second, config.commitFrequency) 73 | require.Equal(t, 1*time.Second, config.shardCheckFrequency) 74 | require.Equal(t, 1*time.Second, config.leaderActionFrequency) 75 | require.Equal(t, stats, config.stats) 76 | require.Equal(t, true, config.useListShardsForKinesisStreamReady) 77 | } 78 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import "errors" 6 | 7 | var ( 8 | // ErrRunTwice - Run() can only ever be run once 9 | ErrRunTwice = errors.New("run() can only ever be run once") 10 | // ErrNoKinesisInterface - Need a kinesis instance 11 | ErrNoKinesisInterface = errors.New("need a kinesis instance") 12 | // ErrNoDynamoInterface - Need a dynamodb instance 13 | ErrNoDynamoInterface = errors.New("need a dynamodb instance") 14 | // ErrNoStreamName - Need a kinesis stream name 15 | ErrNoStreamName = errors.New("need a kinesis stream name") 16 | // ErrNoApplicationName - Need an application name for the dynamo table names 17 | ErrNoApplicationName = errors.New("need an application name for the dynamo table names") 18 | // ErrThisClientNotInDynamo - Unable to find this client in the client list 19 | ErrThisClientNotInDynamo = errors.New("unable to find this client in the client list") 20 | // ErrNoShardsAssigned - We found shards, but got assigned none 21 | ErrNoShardsAssigned = errors.New("we found shards, but got assigned none") 22 | 23 | // ErrConfigInvalidThrottleDelay - ThrottleDelay config value must be at least 200ms 24 | ErrConfigInvalidThrottleDelay = errors.New("throttleDelay config value must be at least 200ms (preferably 250ms)") 25 | // ErrConfigInvalidCommitFrequency - CommitFrequency config value is mandatory 26 | ErrConfigInvalidCommitFrequency = errors.New("commitFrequency config value is mandatory") 27 | // ErrConfigInvalidShardCheckFrequency - ShardCheckFrequency config value is mandatory 28 | ErrConfigInvalidShardCheckFrequency = errors.New("shardCheckFrequency config value is mandatory") 29 | // ErrConfigInvalidLeaderActionFrequency - LeaderActionFrequency config value is mandatory 30 | ErrConfigInvalidLeaderActionFrequency = errors.New("leaderActionFrequency config value is mandatory and must be at least as long as ShardCheckFrequency") 31 | // ErrConfigInvalidBufferSize - BufferSize config value is mandatory 32 | ErrConfigInvalidBufferSize = errors.New("bufferSize config value is mandatory") 33 | // ErrConfigInvalidStats - Stats cannot be nil 34 | ErrConfigInvalidStats = errors.New("stats cannot be nil") 35 | // ErrConfigInvalidDynamoCapacity - Dynamo read/write capacity cannot be 0 36 | ErrConfigInvalidDynamoCapacity = errors.New("dynamo read/write capacity cannot be 0") 37 | // ErrConfigInvalidLogger - Logger cannot be nil 38 | ErrConfigInvalidLogger = errors.New("logger cannot be nil") 39 | 40 | // ErrStreamBusy - Stream is busy 41 | ErrStreamBusy = errors.New("stream is busy") 42 | // ErrNoSuchStream - No such stream 43 | ErrNoSuchStream = errors.New("no such stream") 44 | ) 45 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/twitchscience/kinsumer 2 | 3 | require ( 4 | github.com/aws/aws-sdk-go v1.25.19 5 | github.com/cactus/go-statsd-client/statsd v0.0.0-20190922113730-52b467de415c 6 | github.com/google/uuid v1.1.1 7 | github.com/stretchr/testify v1.4.0 8 | golang.org/x/net v0.0.0-20191028085509-fe3aa8a45271 // indirect 9 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e 10 | ) -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-sdk-go v1.25.19 h1:sp3xP91qIAVhWufyn9qM6Zhhn6kX06WJQcmhRj7QTXc= 2 | github.com/aws/aws-sdk-go v1.25.19/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= 3 | github.com/cactus/go-statsd-client/statsd v0.0.0-20190922113730-52b467de415c h1:rjNo46GktWW4T9RFL1Gx+rubFI+KkPTuvrRBbbovv+g= 4 | github.com/cactus/go-statsd-client/statsd v0.0.0-20190922113730-52b467de415c/go.mod h1:D4RDtP0MffJ3+R36OkGul0LwJLIN8nRb0Ac6jZmJCmo= 5 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= 8 | github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 9 | github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= 10 | github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= 11 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 12 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 13 | github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= 14 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 15 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 16 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 17 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M= 18 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 19 | golang.org/x/net v0.0.0-20191028085509-fe3aa8a45271 h1:N66aaryRB3Ax92gH0v3hp1QYZ3zWWCCUR/j8Ifh45Ss= 20 | golang.org/x/net v0.0.0-20191028085509-fe3aa8a45271/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 21 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY= 22 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 23 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= 24 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 25 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 26 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 27 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 28 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 29 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 30 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 31 | -------------------------------------------------------------------------------- /kinsumer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "fmt" 7 | "sync" 8 | "sync/atomic" 9 | "time" 10 | 11 | "github.com/aws/aws-sdk-go/aws" 12 | "github.com/aws/aws-sdk-go/aws/request" 13 | "github.com/aws/aws-sdk-go/aws/session" 14 | "github.com/aws/aws-sdk-go/service/dynamodb" 15 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 16 | "github.com/aws/aws-sdk-go/service/kinesis" 17 | "github.com/aws/aws-sdk-go/service/kinesis/kinesisiface" 18 | "github.com/google/uuid" 19 | "golang.org/x/sync/errgroup" 20 | ) 21 | 22 | type shardConsumerError struct { 23 | shardID string 24 | action string 25 | err error 26 | } 27 | 28 | type consumedRecord struct { 29 | record *kinesis.Record // Record retrieved from kinesis 30 | checkpointer *checkpointer // Object that will store the checkpoint back to the database 31 | retrievedAt time.Time // Time the record was retrieved from Kinesis 32 | } 33 | 34 | // Kinsumer is a Kinesis Consumer that tries to reduce duplicate reads while allowing for multiple 35 | // clients each processing multiple shards 36 | type Kinsumer struct { 37 | kinesis kinesisiface.KinesisAPI // interface to the kinesis service 38 | dynamodb dynamodbiface.DynamoDBAPI // interface to the dynamodb service 39 | streamName string // name of the kinesis stream to consume from 40 | shardIDs []string // all the shards in the stream, for detecting when the shards change 41 | stop chan struct{} // channel used to signal to all the go routines that we want to stop consuming 42 | stoprequest chan bool // channel used internally to signal to the main go routine to stop processing 43 | records chan *consumedRecord // channel for the go routines to put the consumed records on 44 | output chan *consumedRecord // unbuffered channel used to communicate from the main loop to the Next() method 45 | errors chan error // channel used to communicate errors back to the caller 46 | waitGroup sync.WaitGroup // waitGroup to sync the consumers go routines on 47 | mainWG sync.WaitGroup // WaitGroup for the mainLoop 48 | shardErrors chan shardConsumerError // all the errors found by the consumers that were not handled 49 | clientsTableName string // dynamo table of info about each client 50 | checkpointTableName string // dynamo table of the checkpoints for each shard 51 | metadataTableName string // dynamo table of metadata about the leader and shards 52 | clientID string // identifier to differentiate between the running clients 53 | clientName string // display name of the client - used just for debugging 54 | totalClients int // The number of clients that are currently working on this stream 55 | thisClient int // The (sorted by name) index of this client in the total list 56 | config Config // configuration struct 57 | numberOfRuns int32 // Used to atomically make sure we only ever allow one Run() to be called 58 | isLeader bool // Whether this client is the leader 59 | leaderLost chan bool // Channel that receives an event when the node loses leadership 60 | leaderWG sync.WaitGroup // waitGroup for the leader loop 61 | maxAgeForClientRecord time.Duration // Cutoff for client/checkpoint records we read from dynamodb before we assume the record is stale 62 | maxAgeForLeaderRecord time.Duration // Cutoff for leader/shard cache records we read from dynamodb before we assume the record is stale 63 | } 64 | 65 | // New returns a Kinsumer Interface with default kinesis and dynamodb instances, to be used in ec2 instances to get default auth and config 66 | func New(streamName, applicationName, clientName string, config Config) (*Kinsumer, error) { 67 | s, err := session.NewSession() 68 | if err != nil { 69 | return nil, err 70 | } 71 | return NewWithSession(s, streamName, applicationName, clientName, config) 72 | } 73 | 74 | // NewWithSession should be used if you want to override the Kinesis and Dynamo instances with a non-default aws session 75 | func NewWithSession(session *session.Session, streamName, applicationName, clientName string, config Config) (*Kinsumer, error) { 76 | k := kinesis.New(session) 77 | d := dynamodb.New(session) 78 | 79 | return NewWithInterfaces(k, d, streamName, applicationName, clientName, config) 80 | } 81 | 82 | // NewWithInterfaces allows you to override the Kinesis and Dynamo instances for mocking or using a local set of servers 83 | func NewWithInterfaces(kinesis kinesisiface.KinesisAPI, dynamodb dynamodbiface.DynamoDBAPI, streamName, applicationName, clientName string, config Config) (*Kinsumer, error) { 84 | if kinesis == nil { 85 | return nil, ErrNoKinesisInterface 86 | } 87 | if dynamodb == nil { 88 | return nil, ErrNoDynamoInterface 89 | } 90 | if streamName == "" { 91 | return nil, ErrNoStreamName 92 | } 93 | if applicationName == "" { 94 | return nil, ErrNoApplicationName 95 | } 96 | if err := validateConfig(&config); err != nil { 97 | return nil, err 98 | } 99 | 100 | consumer := &Kinsumer{ 101 | streamName: streamName, 102 | kinesis: kinesis, 103 | dynamodb: dynamodb, 104 | stoprequest: make(chan bool), 105 | records: make(chan *consumedRecord, config.bufferSize), 106 | output: make(chan *consumedRecord), 107 | errors: make(chan error, 10), 108 | shardErrors: make(chan shardConsumerError, 10), 109 | checkpointTableName: applicationName + "_checkpoints", 110 | clientsTableName: applicationName + "_clients", 111 | metadataTableName: applicationName + "_metadata", 112 | clientID: uuid.New().String(), 113 | clientName: clientName, 114 | config: config, 115 | maxAgeForClientRecord: config.shardCheckFrequency * 5, 116 | maxAgeForLeaderRecord: config.leaderActionFrequency * 5, 117 | } 118 | return consumer, nil 119 | } 120 | 121 | // refreshShards registers our client, refreshes the lists of clients and shards, checks if we 122 | // have become/unbecome the leader, and returns whether the shards/clients changed. 123 | //TODO: Write unit test - needs dynamo _and_ kinesis mocking 124 | func (k *Kinsumer) refreshShards() (bool, error) { 125 | var shardIDs []string 126 | 127 | if err := registerWithClientsTable(k.dynamodb, k.clientID, k.clientName, k.clientsTableName); err != nil { 128 | return false, err 129 | } 130 | 131 | //TODO: Move this out of refreshShards and into refreshClients 132 | clients, err := getClients(k.dynamodb, k.clientID, k.clientsTableName, k.maxAgeForClientRecord) 133 | if err != nil { 134 | return false, err 135 | } 136 | 137 | totalClients := len(clients) 138 | thisClient := 0 139 | 140 | found := false 141 | for i, c := range clients { 142 | if c.ID == k.clientID { 143 | thisClient = i 144 | found = true 145 | break 146 | } 147 | } 148 | 149 | if !found { 150 | return false, ErrThisClientNotInDynamo 151 | } 152 | 153 | if thisClient == 0 && !k.isLeader { 154 | k.becomeLeader() 155 | } else if thisClient != 0 && k.isLeader { 156 | k.unbecomeLeader() 157 | } 158 | 159 | shardIDs, err = loadShardIDsFromDynamo(k.dynamodb, k.metadataTableName) 160 | 161 | if err != nil { 162 | return false, err 163 | } 164 | 165 | if len(shardIDs) == 0 { 166 | shardIDs, err = loadShardIDsFromKinesis(k.kinesis, k.streamName) 167 | if err == nil { 168 | err = k.setCachedShardIDs(shardIDs) 169 | } 170 | } 171 | 172 | if err != nil { 173 | return false, err 174 | } 175 | 176 | changed := (totalClients != k.totalClients) || 177 | (thisClient != k.thisClient) || 178 | (len(k.shardIDs) != len(shardIDs)) 179 | 180 | if !changed { 181 | for idx := range shardIDs { 182 | if shardIDs[idx] != k.shardIDs[idx] { 183 | changed = true 184 | break 185 | } 186 | } 187 | } 188 | 189 | if changed { 190 | k.shardIDs = shardIDs 191 | } 192 | 193 | k.thisClient = thisClient 194 | k.totalClients = totalClients 195 | 196 | return changed, nil 197 | } 198 | 199 | // startConsumers launches a shard consumer for each shard we should own 200 | // TODO: Can we unit test this at all? 201 | func (k *Kinsumer) startConsumers() error { 202 | k.stop = make(chan struct{}) 203 | assigned := false 204 | 205 | if k.thisClient >= len(k.shardIDs) { 206 | return nil 207 | } 208 | 209 | for i, shard := range k.shardIDs { 210 | if (i % k.totalClients) == k.thisClient { 211 | k.waitGroup.Add(1) 212 | assigned = true 213 | go k.consume(shard) 214 | } 215 | } 216 | if len(k.shardIDs) != 0 && !assigned { 217 | return ErrNoShardsAssigned 218 | } 219 | return nil 220 | } 221 | 222 | // stopConsumers stops all our shard consumers 223 | func (k *Kinsumer) stopConsumers() { 224 | close(k.stop) 225 | k.waitGroup.Wait() 226 | DrainLoop: 227 | for { 228 | select { 229 | case <-k.records: 230 | default: 231 | break DrainLoop 232 | } 233 | } 234 | } 235 | 236 | // dynamoTableReady returns an error if the given table is not ACTIVE or UPDATING 237 | func (k *Kinsumer) dynamoTableReady(name string) error { 238 | out, err := k.dynamodb.DescribeTable(&dynamodb.DescribeTableInput{ 239 | TableName: aws.String(name), 240 | }) 241 | if err != nil { 242 | return fmt.Errorf("error describing table %s: %v", name, err) 243 | } 244 | status := aws.StringValue(out.Table.TableStatus) 245 | if status != "ACTIVE" && status != "UPDATING" { 246 | return fmt.Errorf("table %s exists but state '%s' is not 'ACTIVE' or 'UPDATING'", 247 | name, status) 248 | } 249 | return nil 250 | } 251 | 252 | // dynamoTableExists returns an true if the given table exists 253 | func (k *Kinsumer) dynamoTableExists(name string) bool { 254 | _, err := k.dynamodb.DescribeTable(&dynamodb.DescribeTableInput{ 255 | TableName: aws.String(name), 256 | }) 257 | return err == nil 258 | } 259 | 260 | // dynamoCreateTableIfNotExists creates a table with the given name and distKey 261 | // if it doesn't exist and will wait until it is created 262 | func (k *Kinsumer) dynamoCreateTableIfNotExists(name, distKey string) error { 263 | if k.dynamoTableExists(name) { 264 | return nil 265 | } 266 | 267 | _, err := k.dynamodb.CreateTable(&dynamodb.CreateTableInput{ 268 | AttributeDefinitions: []*dynamodb.AttributeDefinition{{ 269 | AttributeName: aws.String(distKey), 270 | AttributeType: aws.String(dynamodb.ScalarAttributeTypeS), 271 | }}, 272 | KeySchema: []*dynamodb.KeySchemaElement{{ 273 | AttributeName: aws.String(distKey), 274 | KeyType: aws.String(dynamodb.KeyTypeHash), 275 | }}, 276 | ProvisionedThroughput: &dynamodb.ProvisionedThroughput{ 277 | ReadCapacityUnits: aws.Int64(k.config.dynamoReadCapacity), 278 | WriteCapacityUnits: aws.Int64(k.config.dynamoWriteCapacity), 279 | }, 280 | TableName: aws.String(name), 281 | }) 282 | if err != nil { 283 | return err 284 | } 285 | err = k.dynamodb.WaitUntilTableExistsWithContext( 286 | aws.BackgroundContext(), 287 | &dynamodb.DescribeTableInput{ 288 | TableName: aws.String(name), 289 | }, 290 | request.WithWaiterDelay(request.ConstantWaiterDelay(k.config.dynamoWaiterDelay)), 291 | ) 292 | return err 293 | } 294 | 295 | // dynamoDeleteTableIfExists delete a table with the given name if it exists 296 | // and will wait until it is deleted 297 | func (k *Kinsumer) dynamoDeleteTableIfExists(name string) error { 298 | if !k.dynamoTableExists(name) { 299 | return nil 300 | } 301 | _, err := k.dynamodb.DeleteTable(&dynamodb.DeleteTableInput{ 302 | TableName: aws.String(name), 303 | }) 304 | if err != nil { 305 | return err 306 | } 307 | err = k.dynamodb.WaitUntilTableNotExistsWithContext( 308 | aws.BackgroundContext(), 309 | &dynamodb.DescribeTableInput{ 310 | TableName: aws.String(name), 311 | }, 312 | request.WithWaiterDelay(request.ConstantWaiterDelay(k.config.dynamoWaiterDelay)), 313 | ) 314 | return err 315 | } 316 | 317 | // kinesisStreamReady returns an error if the given stream is not ACTIVE 318 | func (k *Kinsumer) kinesisStreamReady() error { 319 | if k.config.useListShardsForKinesisStreamReady { 320 | _, err := k.kinesis.ListShards(&kinesis.ListShardsInput{ 321 | StreamName: aws.String(k.streamName), 322 | }) 323 | if err != nil { 324 | return fmt.Errorf("error listing shards for stream %s: %v", k.streamName, err) 325 | } 326 | return nil 327 | } 328 | out, err := k.kinesis.DescribeStream(&kinesis.DescribeStreamInput{ 329 | StreamName: aws.String(k.streamName), 330 | }) 331 | if err != nil { 332 | return fmt.Errorf("error describing stream %s: %v", k.streamName, err) 333 | } 334 | 335 | status := aws.StringValue(out.StreamDescription.StreamStatus) 336 | if status != "ACTIVE" && status != "UPDATING" { 337 | return fmt.Errorf("stream %s exists but state '%s' is not 'ACTIVE' or 'UPDATING'", k.streamName, status) 338 | } 339 | return nil 340 | } 341 | 342 | // Run runs the main kinesis consumer process. This is a non-blocking call, use Stop() to force it to return. 343 | // This goroutine is responsible for starting/stopping consumers, aggregating all consumers' records, 344 | // updating checkpointers as records are consumed, and refreshing our shard/client list and leadership 345 | //TODO: Can we unit test this at all? 346 | func (k *Kinsumer) Run() error { 347 | if err := k.dynamoTableReady(k.checkpointTableName); err != nil { 348 | return err 349 | } 350 | if err := k.dynamoTableReady(k.clientsTableName); err != nil { 351 | return err 352 | } 353 | if err := k.kinesisStreamReady(); err != nil { 354 | return err 355 | } 356 | 357 | allowRun := atomic.CompareAndSwapInt32(&k.numberOfRuns, 0, 1) 358 | if !allowRun { 359 | return ErrRunTwice 360 | } 361 | 362 | if _, err := k.refreshShards(); err != nil { 363 | deregErr := deregisterFromClientsTable(k.dynamodb, k.clientID, k.clientsTableName) 364 | if deregErr != nil { 365 | return fmt.Errorf("error in kinsumer Run initial refreshShards: (%v); "+ 366 | "error deregistering from clients table: (%v)", err, deregErr) 367 | } 368 | return fmt.Errorf("error in kinsumer Run initial refreshShards: %v", err) 369 | } 370 | 371 | k.mainWG.Add(1) 372 | go func() { 373 | defer k.mainWG.Done() 374 | 375 | defer func() { 376 | // Deregister is a nice to have but clients also time out if they 377 | // fail to deregister, so ignore error here. 378 | err := deregisterFromClientsTable(k.dynamodb, k.clientID, k.clientsTableName) 379 | if err != nil { 380 | k.errors <- fmt.Errorf("error deregistering client: %s", err) 381 | } 382 | k.unbecomeLeader() 383 | // Do this outside the k.isLeader check in case k.isLeader was false because 384 | // we lost leadership but haven't had time to shutdown the goroutine yet. 385 | k.leaderWG.Wait() 386 | }() 387 | 388 | // We close k.output so that Next() stops, this is also the reason 389 | // we can't allow Run() to be called after Stop() has happened 390 | defer close(k.output) 391 | 392 | shardChangeTicker := time.NewTicker(k.config.shardCheckFrequency) 393 | defer func() { 394 | shardChangeTicker.Stop() 395 | }() 396 | 397 | var record *consumedRecord 398 | if err := k.startConsumers(); err != nil { 399 | k.errors <- fmt.Errorf("error starting consumers: %s", err) 400 | } 401 | defer k.stopConsumers() 402 | 403 | for { 404 | var ( 405 | input chan *consumedRecord 406 | output chan *consumedRecord 407 | ) 408 | 409 | // We only want to be handing one record from the consumers 410 | // to the user of kinsumer at a time. We do this by only reading 411 | // one record off the records queue if we do not already have a 412 | // record to give away 413 | if record != nil { 414 | output = k.output 415 | } else { 416 | input = k.records 417 | } 418 | 419 | select { 420 | case <-k.stoprequest: 421 | return 422 | case record = <-input: 423 | case output <- record: 424 | record.checkpointer.update(aws.StringValue(record.record.SequenceNumber)) 425 | record = nil 426 | case se := <-k.shardErrors: 427 | k.errors <- fmt.Errorf("shard error (%s) in %s: %s", se.shardID, se.action, se.err) 428 | case <-shardChangeTicker.C: 429 | changed, err := k.refreshShards() 430 | if err != nil { 431 | k.errors <- fmt.Errorf("error refreshing shards: %s", err) 432 | } else if changed { 433 | shardChangeTicker.Stop() 434 | k.stopConsumers() 435 | record = nil 436 | if err := k.startConsumers(); err != nil { 437 | k.errors <- fmt.Errorf("error restarting consumers: %s", err) 438 | } 439 | // We create a new shardChangeTicker here so that the time it takes to stop and 440 | // start the consumers is not included in the wait for the next tick. 441 | shardChangeTicker = time.NewTicker(k.config.shardCheckFrequency) 442 | } 443 | } 444 | } 445 | }() 446 | 447 | return nil 448 | } 449 | 450 | // Stop stops the consumption of kinesis events 451 | //TODO: Can we unit test this at all? 452 | func (k *Kinsumer) Stop() { 453 | k.stoprequest <- true 454 | k.mainWG.Wait() 455 | } 456 | 457 | // Next is a blocking function used to get the next record from the kinesis queue, or errors that 458 | // occurred during the processing of kinesis. It's up to the caller to stop processing by calling 'Stop()' 459 | // 460 | // if err is non nil an error occurred in the system. 461 | // if err is nil and data is nil then kinsumer has been stopped 462 | func (k *Kinsumer) Next() (data []byte, err error) { 463 | select { 464 | case err = <-k.errors: 465 | return nil, err 466 | case record, ok := <-k.output: 467 | if ok { 468 | k.config.stats.EventToClient(*record.record.ApproximateArrivalTimestamp, record.retrievedAt) 469 | data = record.record.Data 470 | } 471 | } 472 | 473 | return data, err 474 | } 475 | 476 | // CreateRequiredTables will create the required dynamodb tables 477 | // based on the applicationName 478 | func (k *Kinsumer) CreateRequiredTables() error { 479 | g := &errgroup.Group{} 480 | 481 | g.Go(func() error { 482 | return k.dynamoCreateTableIfNotExists(k.clientsTableName, "ID") 483 | }) 484 | g.Go(func() error { 485 | return k.dynamoCreateTableIfNotExists(k.checkpointTableName, "Shard") 486 | }) 487 | g.Go(func() error { 488 | return k.dynamoCreateTableIfNotExists(k.metadataTableName, "Key") 489 | }) 490 | 491 | return g.Wait() 492 | } 493 | 494 | // DeleteTables will delete the dynamodb tables that were created 495 | // based on the applicationName 496 | func (k *Kinsumer) DeleteTables() error { 497 | g := &errgroup.Group{} 498 | 499 | g.Go(func() error { 500 | return k.dynamoDeleteTableIfExists(k.clientsTableName) 501 | }) 502 | g.Go(func() error { 503 | return k.dynamoDeleteTableIfExists(k.checkpointTableName) 504 | }) 505 | g.Go(func() error { 506 | return k.dynamoDeleteTableIfExists(k.metadataTableName) 507 | }) 508 | 509 | return g.Wait() 510 | } 511 | -------------------------------------------------------------------------------- /kinsumer.goconvey: -------------------------------------------------------------------------------- 1 | -short 2 | -------------------------------------------------------------------------------- /kinsumer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | package kinsumer 3 | 4 | import ( 5 | "flag" 6 | "fmt" 7 | "math/rand" 8 | "sort" 9 | "strconv" 10 | "sync" 11 | "testing" 12 | "time" 13 | 14 | "github.com/aws/aws-sdk-go/aws" 15 | "github.com/aws/aws-sdk-go/aws/awserr" 16 | "github.com/aws/aws-sdk-go/aws/session" 17 | "github.com/aws/aws-sdk-go/service/dynamodb" 18 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 19 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 20 | "github.com/aws/aws-sdk-go/service/kinesis" 21 | "github.com/aws/aws-sdk-go/service/kinesis/kinesisiface" 22 | 23 | "github.com/stretchr/testify/assert" 24 | "github.com/stretchr/testify/require" 25 | ) 26 | 27 | var ( 28 | awsRegion = flag.String("region", "us-west-2", "Region to run tests in") 29 | dynamoEndpoint = flag.String("dynamo_endpoint", "http://localhost:4567", "Endpoint for dynamo test server") 30 | kinesisEndpoint = flag.String("kinesis_endpoint", "http://localhost:4568", "Endpoint for kinesis test server") 31 | resourceChangeTimeout = flag.Duration("resource_change_timeout", 50*time.Millisecond, "Timeout between changes to the resource infrastructure") 32 | streamName = flag.String("stream_name", "kinsumer_test", "Name of kinesis stream to use for tests") 33 | applicationName = flag.String("application_name", "kinsumer_test", "Name of the application, will impact dynamo table names") 34 | ) 35 | 36 | const ( 37 | shardCount int64 = 10 38 | shardLimit int64 = 100 39 | ) 40 | 41 | func TestNewWithInterfaces(t *testing.T) { 42 | s := session.Must(session.NewSession()) 43 | k := kinesis.New(s) 44 | d := dynamodb.New(s) 45 | 46 | // No kinesis 47 | _, err := NewWithInterfaces(nil, d, "stream", "app", "client", NewConfig()) 48 | assert.NotEqual(t, err, nil) 49 | 50 | // No dynamodb 51 | _, err = NewWithInterfaces(k, nil, "stream", "app", "client", NewConfig()) 52 | assert.NotEqual(t, err, nil) 53 | 54 | // No streamName 55 | _, err = NewWithInterfaces(k, d, "", "app", "client", NewConfig()) 56 | assert.NotEqual(t, err, nil) 57 | 58 | // No applicationName 59 | _, err = NewWithInterfaces(k, d, "stream", "", "client", NewConfig()) 60 | assert.NotEqual(t, err, nil) 61 | 62 | // Invalid config 63 | _, err = NewWithInterfaces(k, d, "stream", "app", "client", Config{}) 64 | assert.NotEqual(t, err, nil) 65 | 66 | // All ok 67 | kinsumer, err := NewWithInterfaces(k, d, "stream", "app", "client", NewConfig()) 68 | assert.Equal(t, err, nil) 69 | assert.NotEqual(t, kinsumer, nil) 70 | } 71 | 72 | func CreateFreshStream(t *testing.T, k kinesisiface.KinesisAPI) error { 73 | _, err := k.DeleteStream(&kinesis.DeleteStreamInput{ 74 | StreamName: streamName, 75 | }) 76 | if err != nil { 77 | if awsErr, ok := err.(awserr.Error); ok { 78 | if awsErr.Code() != "ResourceNotFoundException" { 79 | return err 80 | } 81 | } 82 | } else { 83 | // Wait for the stream to be deleted 84 | time.Sleep(*resourceChangeTimeout) 85 | } 86 | 87 | _, err = k.CreateStream(&kinesis.CreateStreamInput{ 88 | ShardCount: aws.Int64(shardCount), 89 | StreamName: streamName, 90 | }) 91 | 92 | if err != nil { 93 | return err 94 | } 95 | time.Sleep(*resourceChangeTimeout) 96 | 97 | return nil 98 | } 99 | 100 | func SetupTestEnvironment(t *testing.T, k kinesisiface.KinesisAPI, d dynamodbiface.DynamoDBAPI) error { 101 | err := CreateFreshStream(t, k) 102 | if err != nil { 103 | return fmt.Errorf("Error creating fresh stream: %s", err) 104 | } 105 | 106 | testConf := NewConfig().WithDynamoWaiterDelay(*resourceChangeTimeout) 107 | client, _ := NewWithInterfaces(k, d, "N/A", *applicationName, "N/A", testConf) 108 | 109 | err = client.DeleteTables() 110 | if err != nil { 111 | return fmt.Errorf("Error deleting tables: %s", err) 112 | } 113 | 114 | err = client.CreateRequiredTables() 115 | if err != nil { 116 | return fmt.Errorf("Error creating fresh tables: %s", err) 117 | } 118 | return nil 119 | } 120 | 121 | func ignoreResourceNotFound(err error) error { 122 | if err != nil { 123 | if awsErr, ok := err.(awserr.Error); ok { 124 | if awsErr.Code() != "ResourceNotFoundException" { 125 | return err 126 | } 127 | } 128 | } else { 129 | time.Sleep(*resourceChangeTimeout) 130 | } 131 | 132 | return nil 133 | } 134 | 135 | func CleanupTestEnvironment(t *testing.T, k kinesisiface.KinesisAPI, d dynamodbiface.DynamoDBAPI) error { 136 | _, err := k.DeleteStream(&kinesis.DeleteStreamInput{ 137 | StreamName: streamName, 138 | }) 139 | 140 | if e := ignoreResourceNotFound(err); e != nil { 141 | return fmt.Errorf("Error deleting kinesis stream: %s", e) 142 | } 143 | 144 | testConf := NewConfig().WithDynamoWaiterDelay(*resourceChangeTimeout) 145 | client, _ := NewWithInterfaces(k, d, "N/A", *applicationName, "", testConf) 146 | 147 | err = client.DeleteTables() 148 | if err != nil { 149 | return fmt.Errorf("Error deleting tables: %s", err) 150 | } 151 | return nil 152 | } 153 | 154 | const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 155 | 156 | func randStringBytes(n int) string { 157 | b := make([]byte, n) 158 | for i := range b { 159 | b[i] = letterBytes[rand.Intn(len(letterBytes))] 160 | } 161 | return string(b) 162 | } 163 | 164 | func SpamStream(t *testing.T, k kinesisiface.KinesisAPI, numEvents int64) error { 165 | var ( 166 | records []*kinesis.PutRecordsRequestEntry 167 | counter int64 168 | ) 169 | 170 | for counter = 0; counter < numEvents; counter++ { 171 | records = append(records, &kinesis.PutRecordsRequestEntry{ 172 | Data: []byte(strconv.FormatInt(counter, 10)), 173 | PartitionKey: aws.String(randStringBytes(10)), 174 | }) 175 | 176 | if len(records) == 100 { 177 | pro, err := k.PutRecords(&kinesis.PutRecordsInput{ 178 | StreamName: streamName, 179 | Records: records, 180 | }) 181 | 182 | if err != nil { 183 | return fmt.Errorf("Error putting records onto stream: %s", err) 184 | } 185 | failed := aws.Int64Value(pro.FailedRecordCount) 186 | require.EqualValues(t, 0, failed) 187 | records = nil 188 | } 189 | } 190 | if len(records) > 0 { 191 | pro, err := k.PutRecords(&kinesis.PutRecordsInput{ 192 | StreamName: streamName, 193 | Records: records, 194 | }) 195 | if err != nil { 196 | return fmt.Errorf("Error putting records onto stream: %s", err) 197 | } 198 | failed := aws.Int64Value(pro.FailedRecordCount) 199 | require.EqualValues(t, 0, failed) 200 | } 201 | 202 | return nil 203 | } 204 | 205 | func KinesisAndDynamoInstances() (kinesisiface.KinesisAPI, dynamodbiface.DynamoDBAPI) { 206 | kc := aws.NewConfig().WithRegion(*awsRegion).WithLogLevel(3) 207 | if len(*kinesisEndpoint) > 0 { 208 | kc = kc.WithEndpoint(*kinesisEndpoint) 209 | } 210 | 211 | dc := aws.NewConfig().WithRegion(*awsRegion).WithLogLevel(3) 212 | if len(*dynamoEndpoint) > 0 { 213 | dc = dc.WithEndpoint(*dynamoEndpoint) 214 | } 215 | 216 | k := kinesis.New(session.Must(session.NewSession(kc))) 217 | d := dynamodb.New(session.Must(session.NewSession(dc))) 218 | 219 | return k, d 220 | } 221 | 222 | func TestSetup(t *testing.T) { 223 | if testing.Short() { 224 | t.Skip("skipping test in short mode.") 225 | } 226 | 227 | k, d := KinesisAndDynamoInstances() 228 | 229 | defer func() { 230 | err := CleanupTestEnvironment(t, k, d) 231 | require.NoError(t, err, "Problems cleaning up the test environment") 232 | }() 233 | 234 | err := SetupTestEnvironment(t, k, d) 235 | require.NoError(t, err, "Problems setting up the test environment") 236 | 237 | err = SpamStream(t, k, 233) 238 | require.NoError(t, err, "Problems spamming stream with events") 239 | 240 | } 241 | 242 | // This is not a real final test. It's just a harness for development and to kind of think through the interface 243 | func TestKinsumer(t *testing.T) { 244 | const ( 245 | numberOfEventsToTest = 4321 246 | numberOfClients = 3 247 | ) 248 | 249 | if testing.Short() { 250 | t.Skip("skipping test in short mode.") 251 | } 252 | 253 | k, d := KinesisAndDynamoInstances() 254 | 255 | defer func() { 256 | err := CleanupTestEnvironment(t, k, d) 257 | require.NoError(t, err, "Problems cleaning up the test environment") 258 | }() 259 | 260 | err := SetupTestEnvironment(t, k, d) 261 | require.NoError(t, err, "Problems setting up the test environment") 262 | 263 | clients := make([]*Kinsumer, numberOfClients) 264 | eventsPerClient := make([]int, numberOfClients) 265 | 266 | output := make(chan int, numberOfClients) 267 | var waitGroup sync.WaitGroup 268 | 269 | config := NewConfig().WithBufferSize(numberOfEventsToTest) 270 | config = config.WithShardCheckFrequency(500 * time.Millisecond) 271 | config = config.WithLeaderActionFrequency(500 * time.Millisecond) 272 | 273 | for i := 0; i < numberOfClients; i++ { 274 | if i > 0 { 275 | time.Sleep(50 * time.Millisecond) // Add the clients slowly 276 | } 277 | 278 | clients[i], err = NewWithInterfaces(k, d, *streamName, *applicationName, fmt.Sprintf("test_%d", i), config) 279 | require.NoError(t, err, "NewWithInterfaces() failed") 280 | 281 | err = clients[i].Run() 282 | require.NoError(t, err, "kinsumer.Run() failed") 283 | err = clients[i].Run() 284 | assert.Error(t, err, "second time calling kinsumer.Run() should fail") 285 | 286 | waitGroup.Add(1) 287 | go func(client *Kinsumer, ci int) { 288 | defer waitGroup.Done() 289 | for { 290 | data, innerError := client.Next() 291 | require.NoError(t, innerError, "kinsumer.Next() failed") 292 | if data == nil { 293 | return 294 | } 295 | idx, _ := strconv.Atoi(string(data)) 296 | output <- idx 297 | eventsPerClient[ci]++ 298 | } 299 | }(clients[i], i) 300 | defer func(ci int) { 301 | if clients[ci] != nil { 302 | clients[ci].Stop() 303 | } 304 | }(i) 305 | } 306 | 307 | err = SpamStream(t, k, numberOfEventsToTest) 308 | require.NoError(t, err, "Problems spamming stream with events") 309 | 310 | readEvents(t, output, numberOfEventsToTest) 311 | 312 | for ci, client := range clients { 313 | client.Stop() 314 | clients[ci] = nil 315 | } 316 | 317 | drain(t, output) 318 | 319 | // Make sure the go routines have finished 320 | waitGroup.Wait() 321 | } 322 | 323 | // TestLeader is an integration test of leadership claiming and deleting old clients. 324 | func TestLeader(t *testing.T) { 325 | const ( 326 | numberOfEventsToTest = 4321 327 | numberOfClients = 2 328 | ) 329 | 330 | if testing.Short() { 331 | t.Skip("skipping test in short mode.") 332 | } 333 | 334 | k, d := KinesisAndDynamoInstances() 335 | 336 | defer func() { 337 | err := CleanupTestEnvironment(t, k, d) 338 | require.NoError(t, err, "Problems cleaning up the test environment") 339 | }() 340 | 341 | err := SetupTestEnvironment(t, k, d) 342 | require.NoError(t, err, "Problems setting up the test environment") 343 | 344 | clients := make([]*Kinsumer, numberOfClients) 345 | 346 | output := make(chan int, numberOfClients) 347 | var waitGroup sync.WaitGroup 348 | 349 | // Put an old client that should be deleted. 350 | now := time.Now().Add(-time.Hour * 24 * 7) 351 | item, err := dynamodbattribute.MarshalMap(clientRecord{ 352 | ID: "Old", 353 | Name: "Old", 354 | LastUpdate: now.UnixNano(), 355 | LastUpdateRFC: now.UTC().Format(time.RFC1123Z), 356 | }) 357 | require.NoError(t, err, "Problems converting old client") 358 | 359 | clientsTableName := aws.String(*streamName + "_clients") 360 | _, err = d.PutItem(&dynamodb.PutItemInput{ 361 | TableName: clientsTableName, 362 | Item: item, 363 | }) 364 | require.NoError(t, err, "Problems putting old client") 365 | 366 | config := NewConfig().WithBufferSize(numberOfEventsToTest) 367 | config = config.WithShardCheckFrequency(500 * time.Millisecond) 368 | config = config.WithLeaderActionFrequency(500 * time.Millisecond) 369 | 370 | for i := 0; i < numberOfClients; i++ { 371 | if i > 0 { 372 | time.Sleep(50 * time.Millisecond) // Add the clients slowly 373 | } 374 | 375 | clients[i], err = NewWithInterfaces(k, d, *streamName, *applicationName, fmt.Sprintf("test_%d", i), config) 376 | require.NoError(t, err, "NewWithInterfaces() failed") 377 | clients[i].clientID = strconv.Itoa(i + 1) 378 | 379 | err = clients[i].Run() 380 | require.NoError(t, err, "kinsumer.Run() failed") 381 | 382 | waitGroup.Add(1) 383 | go func(client *Kinsumer, ci int) { 384 | defer waitGroup.Done() 385 | for { 386 | data, innerError := client.Next() 387 | require.NoError(t, innerError, "kinsumer.Next() failed") 388 | if data == nil { 389 | return 390 | } 391 | idx, _ := strconv.Atoi(string(data)) 392 | output <- idx 393 | } 394 | }(clients[i], i) 395 | defer func(ci int) { 396 | if clients[ci] != nil { 397 | clients[ci].Stop() 398 | } 399 | }(i) 400 | } 401 | 402 | err = SpamStream(t, k, numberOfEventsToTest) 403 | require.NoError(t, err, "Problems spamming stream with events") 404 | 405 | readEvents(t, output, numberOfEventsToTest) 406 | 407 | resp, err := d.GetItem(&dynamodb.GetItemInput{ 408 | TableName: clientsTableName, 409 | ConsistentRead: aws.Bool(true), 410 | Key: map[string]*dynamodb.AttributeValue{ 411 | "ID": {S: aws.String("Old")}, 412 | }, 413 | }) 414 | require.NoError(t, err, "Problem getting old client") 415 | require.Equal(t, 0, len(resp.Item), "Old client was not deleted") 416 | 417 | assert.Equal(t, true, clients[0].isLeader, "First client is not leader") 418 | assert.Equal(t, false, clients[1].isLeader, "Second leader is also leader") 419 | 420 | c, err := NewWithInterfaces(k, d, *streamName, *applicationName, fmt.Sprintf("_test_%d", numberOfClients), config) 421 | require.NoError(t, err, "NewWithInterfaces() failed") 422 | c.clientID = "0" 423 | err = c.Run() 424 | require.NoError(t, err, "kinsumer.Run() failed") 425 | require.Equal(t, true, c.isLeader, "New client is not leader") 426 | _, err = clients[0].refreshShards() 427 | require.NoError(t, err, "Problem refreshing shards of original leader") 428 | require.Equal(t, false, clients[0].isLeader, "Original leader is still leader") 429 | c.Stop() 430 | 431 | for ci, client := range clients { 432 | client.Stop() 433 | clients[ci] = nil 434 | } 435 | 436 | drain(t, output) 437 | // Make sure the go routines have finished 438 | waitGroup.Wait() 439 | } 440 | 441 | // TestSplit is an integration test of merging shards, checking the closed and new shards are handled correctly. 442 | func TestSplit(t *testing.T) { 443 | const ( 444 | numberOfEventsToTest = 4321 445 | numberOfClients = 3 446 | ) 447 | 448 | if testing.Short() { 449 | t.Skip("skipping test in short mode.") 450 | } 451 | 452 | k, d := KinesisAndDynamoInstances() 453 | 454 | defer func() { 455 | err := CleanupTestEnvironment(t, k, d) 456 | require.NoError(t, err, "Problems cleaning up the test environment") 457 | }() 458 | 459 | err := SetupTestEnvironment(t, k, d) 460 | require.NoError(t, err, "Problems setting up the test environment") 461 | 462 | clients := make([]*Kinsumer, numberOfClients) 463 | 464 | output := make(chan int, numberOfClients) 465 | var waitGroup sync.WaitGroup 466 | 467 | config := NewConfig().WithBufferSize(numberOfEventsToTest) 468 | config = config.WithShardCheckFrequency(500 * time.Millisecond) 469 | config = config.WithLeaderActionFrequency(500 * time.Millisecond) 470 | config = config.WithCommitFrequency(50 * time.Millisecond) 471 | 472 | for i := 0; i < numberOfClients; i++ { 473 | if i > 0 { 474 | time.Sleep(50 * time.Millisecond) // Add the clients slowly 475 | } 476 | 477 | clients[i], err = NewWithInterfaces(k, d, *streamName, *applicationName, fmt.Sprintf("test_%d", i), config) 478 | require.NoError(t, err, "NewWithInterfaces() failed") 479 | clients[i].clientID = strconv.Itoa(i + 1) 480 | 481 | err = clients[i].Run() 482 | require.NoError(t, err, "kinsumer.Run() failed") 483 | 484 | waitGroup.Add(1) 485 | go func(client *Kinsumer, ci int) { 486 | defer waitGroup.Done() 487 | for { 488 | data, innerError := client.Next() 489 | require.NoError(t, innerError, "kinsumer.Next() failed") 490 | if data == nil { 491 | return 492 | } 493 | idx, _ := strconv.Atoi(string(data)) 494 | output <- idx 495 | } 496 | }(clients[i], i) 497 | defer func(ci int) { 498 | if clients[ci] != nil { 499 | clients[ci].Stop() 500 | } 501 | }(i) 502 | } 503 | 504 | err = SpamStream(t, k, numberOfEventsToTest) 505 | require.NoError(t, err, "Problems spamming stream with events") 506 | 507 | readEvents(t, output, numberOfEventsToTest) 508 | 509 | desc, err := k.DescribeStream(&kinesis.DescribeStreamInput{ 510 | StreamName: streamName, 511 | Limit: aws.Int64(shardLimit), 512 | }) 513 | require.NoError(t, err, "Error describing stream") 514 | shards := desc.StreamDescription.Shards 515 | shardMap := make(map[string]*kinesis.Shard) 516 | for _, shard := range shards { 517 | shardMap[*shard.ShardId] = shard 518 | } 519 | 520 | require.True(t, len(shards) >= 2, "Fewer than 2 shards") 521 | 522 | _, err = k.MergeShards(&kinesis.MergeShardsInput{ 523 | StreamName: streamName, 524 | ShardToMerge: aws.String(*shards[0].ShardId), 525 | AdjacentShardToMerge: aws.String(*shards[1].ShardId), 526 | }) 527 | require.NoError(t, err, "Problem merging shards") 528 | 529 | require.True(t, shardCount <= shardLimit, "Too many shards") 530 | timeout := time.After(time.Second) 531 | for { 532 | desc, err = k.DescribeStream(&kinesis.DescribeStreamInput{ 533 | StreamName: streamName, 534 | Limit: aws.Int64(shardLimit), 535 | }) 536 | require.NoError(t, err, "Error describing stream") 537 | if *desc.StreamDescription.StreamStatus == "ACTIVE" { 538 | break 539 | } 540 | select { 541 | case <-timeout: 542 | require.FailNow(t, "Timedout after merging shards") 543 | default: 544 | time.Sleep(*resourceChangeTimeout) 545 | } 546 | } 547 | newShards := desc.StreamDescription.Shards 548 | require.Equal(t, shardCount+1, int64(len(newShards)), "Wrong number of shards after merging") 549 | 550 | err = SpamStream(t, k, numberOfEventsToTest) 551 | require.NoError(t, err, "Problems spamming stream with events") 552 | 553 | readEvents(t, output, numberOfEventsToTest) 554 | 555 | // Sleep here to wait for stuff to calm down. When running this test 556 | // by itself it passes without the sleep but when running all the tests 557 | // it fails. Since we delete all tables I suspect it's kinesalite having 558 | // issues. 559 | time.Sleep(500 * time.Millisecond) 560 | // Validate finished shards are no longer in the cache 561 | var expectedShards []string 562 | for _, shard := range newShards { 563 | if *shard.ShardId != *shards[0].ShardId && *shard.ShardId != *shards[1].ShardId { 564 | expectedShards = append(expectedShards, *shard.ShardId) 565 | } 566 | } 567 | sort.Strings(expectedShards) 568 | cachedShards, err := loadShardIDsFromDynamo(d, clients[0].metadataTableName) 569 | require.NoError(t, err, "Error loading cached shard IDs") 570 | require.Equal(t, expectedShards, cachedShards, "Finished shards are still in the cache") 571 | 572 | for ci, client := range clients { 573 | client.Stop() 574 | clients[ci] = nil 575 | } 576 | 577 | drain(t, output) 578 | // Make sure the go routines have finished 579 | waitGroup.Wait() 580 | } 581 | 582 | func drain(t *testing.T, output chan int) { 583 | extraEvents := 0 584 | // Drain in case events duplicated, so we don't hang. 585 | DrainLoop: 586 | for { 587 | select { 588 | case <-output: 589 | extraEvents++ 590 | default: 591 | break DrainLoop 592 | } 593 | } 594 | assert.Equal(t, 0, extraEvents, "Got %d extra events afterwards", extraEvents) 595 | } 596 | 597 | func readEvents(t *testing.T, output chan int, numberOfEventsToTest int) { 598 | eventsFound := make([]bool, numberOfEventsToTest) 599 | total := 0 600 | 601 | ProcessLoop: 602 | for { 603 | select { 604 | case idx := <-output: 605 | assert.Equal(t, false, eventsFound[idx], "Got duplicate event %d", idx) 606 | eventsFound[idx] = true 607 | total++ 608 | if total == numberOfEventsToTest { 609 | break ProcessLoop 610 | } 611 | case <-time.After(3 * time.Second): 612 | break ProcessLoop 613 | } 614 | } 615 | 616 | t.Logf("Got all %d out of %d events\n", total, numberOfEventsToTest) 617 | } 618 | -------------------------------------------------------------------------------- /leader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "fmt" 7 | "sort" 8 | "time" 9 | 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/aws/awserr" 12 | "github.com/aws/aws-sdk-go/service/dynamodb" 13 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 14 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 15 | "github.com/aws/aws-sdk-go/service/kinesis" 16 | "github.com/aws/aws-sdk-go/service/kinesis/kinesisiface" 17 | ) 18 | 19 | const ( 20 | leaderKey = "Leader" 21 | shardCacheKey = "ShardCache" 22 | conditionalFail = "ConditionalCheckFailedException" 23 | ) 24 | 25 | type shardCacheRecord struct { 26 | Key string // must be "ShardCache" 27 | ShardIDs []string // Slice of unfinished shard IDs 28 | LastUpdate int64 // timestamp of last update 29 | 30 | // Debug versions of LastUpdate 31 | LastUpdateRFC string 32 | } 33 | 34 | // becomeLeader starts the leadership goroutine with a channel to stop it. 35 | // TODO(dwe): Factor out dependencies and unit test 36 | func (k *Kinsumer) becomeLeader() { 37 | if k.isLeader { 38 | return 39 | } 40 | k.leaderLost = make(chan bool) 41 | k.leaderWG.Add(1) 42 | go func() { 43 | defer k.leaderWG.Done() 44 | leaderActions := time.NewTicker(k.config.leaderActionFrequency) 45 | defer func() { 46 | leaderActions.Stop() 47 | err := k.deregisterLeadership() 48 | if err != nil { 49 | k.errors <- fmt.Errorf("error deregistering leadership: %v", err) 50 | } 51 | }() 52 | ok, err := k.registerLeadership() 53 | if err != nil { 54 | k.errors <- fmt.Errorf("error registering initial leadership: %v", err) 55 | } 56 | // Perform leadership actions immediately if we became leader. If we didn't 57 | // become leader yet, wait until the first tick to try again. 58 | if ok { 59 | err = k.performLeaderActions() 60 | if err != nil { 61 | k.errors <- fmt.Errorf("error performing initial leader actions: %v", err) 62 | } 63 | } 64 | for { 65 | select { 66 | case <-leaderActions.C: 67 | ok, err := k.registerLeadership() 68 | if err != nil { 69 | k.errors <- fmt.Errorf("error registering leadership: %v", err) 70 | } 71 | if !ok { 72 | continue 73 | } 74 | err = k.performLeaderActions() 75 | if err != nil { 76 | k.errors <- fmt.Errorf("error performing repeated leader actions: %v", err) 77 | } 78 | case <-k.leaderLost: 79 | return 80 | } 81 | } 82 | }() 83 | k.isLeader = true 84 | } 85 | 86 | // unbecomeLeader stops the leadership goroutine. 87 | func (k *Kinsumer) unbecomeLeader() { 88 | if !k.isLeader { 89 | return 90 | } 91 | if k.leaderLost == nil { 92 | k.config.logger.Log("Lost leadership but k.leaderLost was nil") 93 | } else { 94 | close(k.leaderLost) 95 | k.leaderWG.Wait() 96 | k.leaderLost = nil 97 | } 98 | k.isLeader = false 99 | } 100 | 101 | // performLeaderActions updates the shard ID cache and reaps old clients 102 | // TODO(dwe): Factor out dependencies and unit test 103 | func (k *Kinsumer) performLeaderActions() error { 104 | shardCache, err := loadShardCacheFromDynamo(k.dynamodb, k.metadataTableName) 105 | if err != nil { 106 | return fmt.Errorf("error loading shard cache from dynamo: %v", err) 107 | } 108 | cachedShardIDs := shardCache.ShardIDs 109 | now := time.Now().UnixNano() 110 | if now-shardCache.LastUpdate < k.config.leaderActionFrequency.Nanoseconds() { 111 | return nil 112 | } 113 | curShardIDs, err := loadShardIDsFromKinesis(k.kinesis, k.streamName) 114 | if err != nil { 115 | return fmt.Errorf("error loading shard IDs from kinesis: %v", err) 116 | } 117 | 118 | checkpoints, err := loadCheckpoints(k.dynamodb, k.checkpointTableName) 119 | if err != nil { 120 | return fmt.Errorf("error loading shard IDs from dynamo: %v", err) 121 | } 122 | 123 | updatedShardIDs, changed := diffShardIDs(curShardIDs, cachedShardIDs, checkpoints) 124 | if changed { 125 | err = k.setCachedShardIDs(updatedShardIDs) 126 | if err != nil { 127 | return fmt.Errorf("error caching shard IDs to dynamo: %v", err) 128 | } 129 | } 130 | 131 | err = reapClients(k.dynamodb, k.clientsTableName) 132 | if err != nil { 133 | return fmt.Errorf("error reaping old clients: %v", err) 134 | } 135 | 136 | return nil 137 | } 138 | 139 | // setCachedShardIDs updates the shard ID cache in dynamo. 140 | func (k *Kinsumer) setCachedShardIDs(shardIDs []string) error { 141 | if len(shardIDs) == 0 { 142 | return nil 143 | } 144 | now := time.Now() 145 | item, err := dynamodbattribute.MarshalMap(&shardCacheRecord{ 146 | Key: shardCacheKey, 147 | ShardIDs: shardIDs, 148 | LastUpdate: now.UnixNano(), 149 | LastUpdateRFC: now.UTC().Format(time.RFC1123Z), 150 | }) 151 | if err != nil { 152 | return fmt.Errorf("error marshalling map: %v", err) 153 | } 154 | 155 | _, err = k.dynamodb.PutItem(&dynamodb.PutItemInput{ 156 | TableName: aws.String(k.metadataTableName), 157 | Item: item, 158 | }) 159 | if err != nil { 160 | return fmt.Errorf("error updating shard cache: %v", err) 161 | } 162 | return nil 163 | } 164 | 165 | // diffShardIDs takes the current shard IDs and cached shards and returns the new sorted cache, ignoring 166 | // finished shards correctly. 167 | func diffShardIDs(curShardIDs, cachedShardIDs []string, checkpoints map[string]*checkpointRecord) (updatedShardIDs []string, changed bool) { 168 | // Look for differences, ignoring Finished shards. 169 | cur := make(map[string]bool) 170 | for _, s := range curShardIDs { 171 | cur[s] = true 172 | } 173 | for _, s := range cachedShardIDs { 174 | if cur[s] { 175 | delete(cur, s) 176 | // Drop the shard if it's been finished. 177 | if c, ok := checkpoints[s]; ok && c.Finished != nil { 178 | changed = true 179 | } else { 180 | updatedShardIDs = append(updatedShardIDs, s) 181 | } 182 | } else { 183 | // If a shard is no longer returned by ListShards, drop it. 184 | changed = true 185 | } 186 | } 187 | for s := range cur { 188 | // If the shard is returned by ListShards and not already Finished, add it. 189 | if c, ok := checkpoints[s]; !ok || c.Finished == nil { 190 | updatedShardIDs = append(updatedShardIDs, s) 191 | changed = true 192 | } 193 | } 194 | sort.Strings(updatedShardIDs) 195 | return 196 | } 197 | 198 | // deregisterLeadership marks us as no longer the leader in dynamo. 199 | func (k *Kinsumer) deregisterLeadership() error { 200 | now := time.Now() 201 | attrVals, err := dynamodbattribute.MarshalMap(map[string]interface{}{ 202 | ":ID": aws.String(k.clientID), 203 | ":lastUpdate": aws.Int64(now.UnixNano()), 204 | ":lastUpdateRFC": aws.String(now.UTC().Format(time.RFC1123Z)), 205 | }) 206 | if err != nil { 207 | return fmt.Errorf("error marshaling deregisterLeadership ExpressionAttributeValues: %v", err) 208 | } 209 | _, err = k.dynamodb.UpdateItem(&dynamodb.UpdateItemInput{ 210 | TableName: aws.String(k.metadataTableName), 211 | Key: map[string]*dynamodb.AttributeValue{ 212 | "Key": {S: aws.String(leaderKey)}, 213 | }, 214 | ConditionExpression: aws.String("ID = :ID"), 215 | UpdateExpression: aws.String("REMOVE ID SET LastUpdate = :lastUpdate, LastUpdateRFC = :lastUpdateRFC"), 216 | ExpressionAttributeValues: attrVals, 217 | }) 218 | if err != nil { 219 | // It's ok if we never actually became leader. 220 | if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == conditionalFail { 221 | return nil 222 | } 223 | } 224 | return err 225 | } 226 | 227 | // registerLeadership marks us as the leader or just refreshes LastUpdate in dynamo, returning false if 228 | // another node is the leader. 229 | func (k *Kinsumer) registerLeadership() (bool, error) { 230 | now := time.Now() 231 | cutoff := now.Add(-k.maxAgeForLeaderRecord).UnixNano() 232 | attrVals, err := dynamodbattribute.MarshalMap(map[string]interface{}{ 233 | ":ID": aws.String(k.clientID), 234 | ":cutoff": aws.Int64(cutoff), 235 | }) 236 | if err != nil { 237 | return false, fmt.Errorf("error marshaling registerLeadership ExpressionAttributeValues: %v", err) 238 | } 239 | item, err := dynamodbattribute.MarshalMap(map[string]interface{}{ 240 | "Key": aws.String(leaderKey), 241 | "ID": aws.String(k.clientID), 242 | "Name": aws.String(k.clientName), 243 | "LastUpdate": aws.Int64(now.UnixNano()), 244 | "LastUpdateRFC": aws.String(now.UTC().Format(time.RFC1123Z)), 245 | }) 246 | if err != nil { 247 | return false, fmt.Errorf("error marshaling registerLeadership Item: %v", err) 248 | } 249 | _, err = k.dynamodb.PutItem(&dynamodb.PutItemInput{ 250 | TableName: aws.String(k.metadataTableName), 251 | Item: item, 252 | ConditionExpression: aws.String("ID = :ID OR attribute_not_exists(ID) OR LastUpdate <= :cutoff"), 253 | ExpressionAttributeValues: attrVals, 254 | }) 255 | if err != nil { 256 | if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == conditionalFail { 257 | return false, nil 258 | } 259 | return false, err 260 | } 261 | return true, nil 262 | } 263 | 264 | // loadShardIDsFromKinesis returns a sorted slice of shardIDs from kinesis. 265 | // This function used to use kinesis.DescribeStream, which has a very low throttling limit of 10/s per account. 266 | // As such, the leader is responsible for caching the shard list. 267 | // Now that it uses ListShards, you could potentially query the shard list directly from all clients. 268 | //TODO: Write unit test - needs kinesis mocking 269 | func loadShardIDsFromKinesis(kin kinesisiface.KinesisAPI, streamName string) ([]string, error) { 270 | var innerError error 271 | 272 | shardIDs := make([]string, 0) 273 | var token *string 274 | 275 | // Manually page the results since aws-sdk-go has no ListShardsPages. 276 | for { 277 | inputParams := kinesis.ListShardsInput{} 278 | if token != nil { 279 | inputParams.NextToken = token 280 | } else { 281 | inputParams.StreamName = aws.String(streamName) 282 | } 283 | res, err := kin.ListShards(&inputParams) 284 | 285 | if err != nil { 286 | if e, ok := err.(awserr.Error); ok { 287 | switch e.Code() { 288 | case "ResourceInUseException": 289 | innerError = ErrStreamBusy 290 | case "ResourceNotFoundException": 291 | innerError = ErrNoSuchStream 292 | } 293 | } 294 | } 295 | 296 | if innerError != nil { 297 | return nil, innerError 298 | } 299 | 300 | if err != nil { 301 | return nil, err 302 | } 303 | 304 | for _, s := range res.Shards { 305 | shardIDs = append(shardIDs, aws.StringValue(s.ShardId)) 306 | } 307 | if res.NextToken == nil { 308 | break 309 | } 310 | token = res.NextToken 311 | } 312 | sort.Strings(shardIDs) 313 | 314 | return shardIDs, nil 315 | } 316 | 317 | // loadShardIDsFromDynamo returns the sorted slice of shardIDs from the metadata table in dynamo. 318 | func loadShardIDsFromDynamo(db dynamodbiface.DynamoDBAPI, tableName string) ([]string, error) { 319 | record, err := loadShardCacheFromDynamo(db, tableName) 320 | if err != nil { 321 | return nil, err 322 | } 323 | if record == nil { 324 | return nil, nil 325 | } 326 | return record.ShardIDs, nil 327 | } 328 | 329 | // loadShardCacheFromDynamo returns the ShardCache record from the metadata table in dynamo. 330 | func loadShardCacheFromDynamo(db dynamodbiface.DynamoDBAPI, tableName string) (*shardCacheRecord, error) { 331 | resp, err := db.GetItem(&dynamodb.GetItemInput{ 332 | TableName: aws.String(tableName), 333 | ConsistentRead: aws.Bool(true), 334 | Key: map[string]*dynamodb.AttributeValue{ 335 | "Key": {S: aws.String(shardCacheKey)}, 336 | }, 337 | }) 338 | if err != nil { 339 | if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ResourceNotFoundException" { 340 | return nil, nil 341 | } 342 | return nil, err 343 | } 344 | var record shardCacheRecord 345 | if err = dynamodbattribute.UnmarshalMap(resp.Item, &record); err != nil { 346 | return nil, err 347 | } 348 | return &record, nil 349 | } 350 | -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | package kinsumer 2 | 3 | import "log" 4 | 5 | // Logger is a minimal interface to allow custom loggers to be used 6 | type Logger interface { 7 | Log(string, ...interface{}) 8 | } 9 | 10 | // DefaultLogger is a logger that will log using the 11 | // standard golang log library 12 | type DefaultLogger struct{} 13 | 14 | // Log implementation that uses golang log library 15 | func (*DefaultLogger) Log(format string, v ...interface{}) { 16 | log.Printf(format, v...) 17 | } 18 | -------------------------------------------------------------------------------- /mocks/dynamo.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package mocks 4 | 5 | import ( 6 | "bytes" 7 | "fmt" 8 | "strconv" 9 | "strings" 10 | "testing" 11 | 12 | "github.com/aws/aws-sdk-go/aws" 13 | "github.com/aws/aws-sdk-go/aws/awserr" 14 | "github.com/aws/aws-sdk-go/service/dynamodb" 15 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 16 | ) 17 | 18 | var ( 19 | // If mockDynamoErrorTrigger is passed in as the table name in a MockDynamo 20 | // request, then the MockDynamo will respond with nil output and 21 | // mockDynamoError. 22 | mockDynamoErrorTrigger = "error-trigger" 23 | 24 | // items per page when scanning 25 | mockDynamoPageSize = 5 26 | ) 27 | 28 | func errInternalError() error { 29 | return awserr.New("InternalFailure", "triggered error", nil) 30 | } 31 | 32 | func errMissingParameter(param string) error { 33 | return awserr.New("MissingParameter", fmt.Sprintf("missing required parameter %s", param), nil) 34 | } 35 | 36 | func errTableNotFound(tableName string) error { 37 | return awserr.New("ResourceNotFoundException", fmt.Sprintf("table %q not found", tableName), nil) 38 | } 39 | 40 | // Record of a call to MockDynamo. Stores a string name of the API endpoint 41 | // ("PutItem", "GetItem", etc), the input struct received, the output struct 42 | // sent back (if any), and the error sent back. The input and output are stored 43 | // in interface{} so you'll need to do type assertions to pull out meaningful 44 | // values. 45 | type mockDynamoCallRecord struct { 46 | operation string 47 | input interface{} 48 | output interface{} 49 | err error 50 | } 51 | 52 | // MockDynamo mocks the DynamoDB API in memory. It only supports GetItem, 53 | // PutItem, and ScanPages. It only supports the most simple filter expressions: 54 | // they must be of the form :, and operator must be 55 | // =, <, <=, >, >=, or <>. 56 | type MockDynamo struct { 57 | dynamodbiface.DynamoDBAPI 58 | 59 | // Stored data 60 | tables map[string][]mockDynamoItem 61 | 62 | // Diagnostic tools 63 | requests []mockDynamoCallRecord 64 | } 65 | 66 | // NewMockDynamo gets a dynamo interface for testing 67 | func NewMockDynamo(tables []string) dynamodbiface.DynamoDBAPI { 68 | d := &MockDynamo{ 69 | tables: make(map[string][]mockDynamoItem), 70 | requests: make([]mockDynamoCallRecord, 0), 71 | } 72 | for _, t := range tables { 73 | d.addTable(t) 74 | } 75 | return d 76 | } 77 | 78 | func (d *MockDynamo) addTable(name string) { 79 | d.tables[name] = make([]mockDynamoItem, 0) 80 | } 81 | 82 | func (d *MockDynamo) recordCall(operation string, in, out interface{}, err error) { 83 | d.requests = append(d.requests, mockDynamoCallRecord{ 84 | operation: operation, 85 | input: in, 86 | output: out, 87 | err: err, 88 | }) 89 | } 90 | 91 | // PutItem mocks the dynamo PutItem method 92 | func (d *MockDynamo) PutItem(in *dynamodb.PutItemInput) (out *dynamodb.PutItemOutput, err error) { 93 | defer d.recordCall("PutItem", in, out, err) 94 | if in.TableName == nil { 95 | return nil, errMissingParameter("TableName") 96 | } 97 | if in.Item == nil { 98 | return nil, errMissingParameter("Item") 99 | } 100 | 101 | if aws.StringValue(in.TableName) == mockDynamoErrorTrigger { 102 | return nil, errInternalError() 103 | } 104 | 105 | tableName := aws.StringValue(in.TableName) 106 | if _, ok := d.tables[tableName]; !ok { 107 | return nil, errTableNotFound(tableName) 108 | } 109 | 110 | d.tables[tableName] = append(d.tables[tableName], in.Item) 111 | return &dynamodb.PutItemOutput{}, nil 112 | } 113 | 114 | // UpdateItem mocks the dynamo UpdateItem method 115 | func (d *MockDynamo) UpdateItem(in *dynamodb.UpdateItemInput) (out *dynamodb.UpdateItemOutput, err error) { 116 | defer d.recordCall("UpdateItem", in, out, err) 117 | if in.TableName == nil { 118 | return nil, errMissingParameter("TableName") 119 | } 120 | if in.Key == nil { 121 | return nil, errMissingParameter("Key") 122 | } 123 | 124 | return &dynamodb.UpdateItemOutput{}, nil 125 | } 126 | 127 | // GetItem mocks the dynamo GetItem method 128 | func (d *MockDynamo) GetItem(in *dynamodb.GetItemInput) (out *dynamodb.GetItemOutput, err error) { 129 | defer d.recordCall("GetItem", in, out, err) 130 | 131 | if in.TableName == nil { 132 | return nil, errMissingParameter("TableName") 133 | } 134 | if in.Key == nil { 135 | return nil, errMissingParameter("Key") 136 | } 137 | if aws.StringValue(in.TableName) == mockDynamoErrorTrigger { 138 | return nil, errInternalError() 139 | } 140 | 141 | tableName := aws.StringValue(in.TableName) 142 | if _, ok := d.tables[tableName]; !ok { 143 | return nil, errTableNotFound(tableName) 144 | } 145 | 146 | var filters []dynamoFilter 147 | for col, operand := range in.Key { 148 | filters = append(filters, dynamoFilter{ 149 | col: col, 150 | comp: attrEqual, 151 | operand: operand, 152 | }) 153 | } 154 | 155 | var match map[string]*dynamodb.AttributeValue 156 | ItemLoop: 157 | for _, item := range d.tables[tableName] { 158 | for _, filter := range filters { 159 | if !item.applyFilter(filter) { 160 | continue ItemLoop 161 | } 162 | } 163 | match = item 164 | break 165 | } 166 | 167 | return &dynamodb.GetItemOutput{Item: match}, nil 168 | } 169 | 170 | // ScanPages mocks the dynamo ScanPages method 171 | func (d *MockDynamo) ScanPages(in *dynamodb.ScanInput, pager func(*dynamodb.ScanOutput, bool) bool) (err error) { 172 | defer d.recordCall("ScanPages", in, nil, err) 173 | 174 | if in.TableName == nil { 175 | return errMissingParameter("TableName") 176 | } 177 | if aws.StringValue(in.TableName) == mockDynamoErrorTrigger { 178 | return errInternalError() 179 | } 180 | 181 | filter, err := parseFilter(aws.StringValue(in.FilterExpression), in.ExpressionAttributeValues) 182 | if err != nil { 183 | return err 184 | } 185 | 186 | table, ok := d.tables[aws.StringValue(in.TableName)] 187 | if !ok { 188 | return errTableNotFound(aws.StringValue(in.TableName)) 189 | } 190 | 191 | var items []mockDynamoItem 192 | for _, item := range table { 193 | if item.applyFilter(filter) { 194 | items = append(items, item) 195 | } 196 | } 197 | 198 | var pages []*dynamodb.ScanOutput 199 | for i := 0; i < len(items); i += mockDynamoPageSize { 200 | end := i + mockDynamoPageSize 201 | if end > len(items) { 202 | end = len(items) 203 | } 204 | pageItems := make([]map[string]*dynamodb.AttributeValue, end-i) 205 | for j := range pageItems { 206 | pageItems[j] = (map[string]*dynamodb.AttributeValue)(items[i+j]) 207 | } 208 | 209 | page := &dynamodb.ScanOutput{ 210 | Count: aws.Int64(int64(end - i)), 211 | Items: pageItems, 212 | } 213 | pages = append(pages, page) 214 | } 215 | 216 | for i, p := range pages { 217 | if !pager(p, i == len(pages)-1) { 218 | break 219 | } 220 | } 221 | return nil 222 | } 223 | 224 | type mockDynamoItem map[string]*dynamodb.AttributeValue 225 | 226 | type attrType int 227 | 228 | const ( 229 | unknownAttr attrType = iota 230 | binaryAttr 231 | boolAttr 232 | binarySetAttr 233 | listAttr 234 | mapAttr 235 | numberAttr 236 | numberSetAttr 237 | nullAttr 238 | stringAttr 239 | stringSetAttr 240 | ) 241 | 242 | func typeOfAttr(v *dynamodb.AttributeValue) attrType { 243 | switch { 244 | case v.B != nil: 245 | return binaryAttr 246 | case v.BOOL != nil: 247 | return boolAttr 248 | case v.BS != nil: 249 | return binarySetAttr 250 | case v.L != nil: 251 | return listAttr 252 | case v.M != nil: 253 | return mapAttr 254 | case v.N != nil: 255 | return numberAttr 256 | case v.NS != nil: 257 | return numberSetAttr 258 | case v.NULL != nil: 259 | return nullAttr 260 | case v.S != nil: 261 | return stringAttr 262 | case v.SS != nil: 263 | return stringSetAttr 264 | default: 265 | return unknownAttr 266 | } 267 | } 268 | 269 | func attrEqual(l, r *dynamodb.AttributeValue) bool { 270 | if typeOfAttr(l) != typeOfAttr(r) { 271 | return false 272 | } 273 | 274 | // value equality 275 | if !bytes.Equal(l.B, r.B) || 276 | aws.BoolValue(l.BOOL) != aws.BoolValue(r.BOOL) || 277 | aws.BoolValue(l.NULL) != aws.BoolValue(r.NULL) || 278 | parseNum(l.N) != parseNum(r.N) || 279 | aws.StringValue(l.S) != aws.StringValue(r.S) { 280 | return false 281 | } 282 | 283 | // list equality 284 | if l.L != nil { 285 | if len(l.L) != len(r.L) { 286 | return false 287 | } 288 | for i, lv := range l.L { 289 | if !attrEqual(lv, r.L[i]) { 290 | return false 291 | } 292 | } 293 | } 294 | 295 | // map equality 296 | if l.M != nil { 297 | if len(l.M) != len(r.M) { 298 | return false 299 | } 300 | for k, lv := range l.M { 301 | if !attrEqual(lv, r.M[k]) { 302 | return false 303 | } 304 | } 305 | } 306 | 307 | // binary set equality 308 | if l.BS != nil { 309 | if len(l.BS) != len(r.BS) { 310 | return false 311 | } 312 | lSet := make(map[string]struct{}) 313 | for _, k := range l.BS { 314 | lSet[string(k)] = struct{}{} 315 | } 316 | for _, k := range r.BS { 317 | if _, ok := lSet[string(k)]; !ok { 318 | return false 319 | } 320 | } 321 | } 322 | 323 | // number set equality 324 | if l.NS != nil { 325 | if len(l.NS) != len(r.NS) { 326 | return false 327 | } 328 | lSet := make(map[float64]struct{}) 329 | for _, k := range l.NS { 330 | lSet[parseNum(k)] = struct{}{} 331 | } 332 | for _, k := range r.NS { 333 | if _, ok := lSet[parseNum(k)]; !ok { 334 | return false 335 | } 336 | } 337 | } 338 | 339 | // string set equality 340 | if l.SS != nil { 341 | if len(l.SS) != len(r.SS) { 342 | return false 343 | } 344 | lSet := make(map[string]struct{}) 345 | for _, k := range l.SS { 346 | lSet[aws.StringValue(k)] = struct{}{} 347 | } 348 | for _, k := range r.SS { 349 | if _, ok := lSet[aws.StringValue(k)]; !ok { 350 | return false 351 | } 352 | } 353 | } 354 | 355 | return true 356 | } 357 | 358 | func attrNotEqual(l, r *dynamodb.AttributeValue) bool { 359 | return !attrEqual(l, r) 360 | } 361 | 362 | func attrLessThan(l, r *dynamodb.AttributeValue) bool { 363 | if typeOfAttr(l) != typeOfAttr(r) { 364 | return false 365 | } 366 | 367 | switch typeOfAttr(l) { 368 | case stringAttr: 369 | return aws.StringValue(l.S) < aws.StringValue(r.S) 370 | case numberAttr: 371 | return parseNum(l.N) < parseNum(r.N) 372 | default: 373 | return false 374 | } 375 | } 376 | 377 | func attrGreaterThan(l, r *dynamodb.AttributeValue) bool { 378 | return attrLessThan(r, l) 379 | } 380 | 381 | func attrLessThanOrEqual(l, r *dynamodb.AttributeValue) bool { 382 | return !attrLessThan(r, l) 383 | } 384 | 385 | func attrGreaterThanOrEqual(l, r *dynamodb.AttributeValue) bool { 386 | return !attrLessThan(l, r) 387 | } 388 | 389 | func parseNum(raw *string) float64 { 390 | if raw == nil { 391 | return 0 392 | } 393 | n, err := strconv.ParseFloat(*raw, 64) 394 | if err != nil { 395 | panic(err) 396 | } 397 | return n 398 | } 399 | 400 | type dynamoFilter struct { 401 | col string 402 | comp func(l, r *dynamodb.AttributeValue) bool 403 | operand *dynamodb.AttributeValue 404 | } 405 | 406 | // Parse a filter expression. Compound filter expressions with multiple 407 | // conditions aren't supported. Only filters that look like 408 | // 'column :value' work. 409 | func parseFilter(expr string, attrs map[string]*dynamodb.AttributeValue) (dynamoFilter, error) { 410 | out := dynamoFilter{} 411 | if len(expr) == 0 { 412 | out.comp = func(_, _ *dynamodb.AttributeValue) bool { return true } 413 | return out, nil 414 | } 415 | 416 | // parse out column 417 | splitExpr := strings.Split(expr, " ") 418 | if len(splitExpr) != 3 { 419 | return out, fmt.Errorf("unparseable filter, expected 'column cmp :val'. expr=%q", expr) 420 | } 421 | 422 | var rawComp, rawVal string 423 | out.col, rawComp, rawVal = splitExpr[0], splitExpr[1], splitExpr[2] 424 | 425 | // parse comparator 426 | switch rawComp { 427 | case "=": 428 | out.comp = attrEqual 429 | case "<>": 430 | out.comp = attrNotEqual 431 | case "<": 432 | out.comp = attrLessThan 433 | case "<=": 434 | out.comp = attrLessThanOrEqual 435 | case ">": 436 | out.comp = attrGreaterThan 437 | case ">=": 438 | out.comp = attrGreaterThanOrEqual 439 | default: 440 | return out, fmt.Errorf("unknown comparator %q", rawComp) 441 | } 442 | 443 | // parse operand 444 | if !strings.HasPrefix(rawVal, ":") { 445 | return out, fmt.Errorf("unparseable filter, expected 'column cmp :val' style. expr=%q", expr) 446 | } 447 | var ok bool 448 | out.operand, ok = attrs[rawVal] 449 | if !ok { 450 | return out, fmt.Errorf("missing filter argument %q", rawVal) 451 | } 452 | return out, nil 453 | } 454 | 455 | func (i mockDynamoItem) applyFilter(f dynamoFilter) bool { 456 | // Special case: an empty string is a filter which always returns true 457 | if f.col == "" { 458 | return true 459 | } 460 | 461 | itemVal, ok := i[f.col] 462 | if !ok { 463 | return false 464 | } 465 | 466 | if typeOfAttr(itemVal) != typeOfAttr(f.operand) { 467 | return false 468 | } 469 | 470 | return f.comp(itemVal, f.operand) 471 | 472 | } 473 | 474 | // AssertNoRequestsMade will Execute a function, asserting that no requests 475 | // are made over the course of the function. 476 | func AssertNoRequestsMade(t *testing.T, mock *MockDynamo, msg string, f func()) { 477 | nStart := len(mock.requests) 478 | f() 479 | nEnd := len(mock.requests) 480 | if nEnd > nStart { 481 | for i := nStart; i < nEnd; i++ { 482 | t.Errorf("%s: unexpected %s request made to dynamo", msg, mock.requests[i].operation) 483 | } 484 | } 485 | } 486 | 487 | // AssertRequestMade will Execute a function, asserting that at least one request 488 | // is made over the course of the function. 489 | func AssertRequestMade(t *testing.T, mock *MockDynamo, msg string, f func()) { 490 | nStart := len(mock.requests) 491 | f() 492 | nEnd := len(mock.requests) 493 | if nEnd == nStart { 494 | t.Errorf("%s: expected a call to be made to dynamo, but didn't see one", msg) 495 | } 496 | } 497 | -------------------------------------------------------------------------------- /mocks/dynamo_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package mocks 4 | 5 | import ( 6 | "reflect" 7 | "testing" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/service/dynamodb" 11 | "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbattribute" 12 | ) 13 | 14 | func TestMockDynamo(t *testing.T) { 15 | var table = "users" 16 | 17 | mock := NewMockDynamo([]string{table}) 18 | 19 | // Make a few objects storable in dynamo 20 | type user struct { 21 | Name string 22 | ID int64 23 | } 24 | 25 | ken := user{ 26 | Name: "Ken Thompson", 27 | ID: 1, 28 | } 29 | user1, err := dynamodbattribute.MarshalMap(ken) 30 | if err != nil { 31 | t.Fatalf("MarshalMap(user1) err=%q", err) 32 | } 33 | rob := user{ 34 | Name: "Rob Pike", 35 | ID: 2, 36 | } 37 | user2, err := dynamodbattribute.MarshalMap(rob) 38 | if err != nil { 39 | t.Fatalf("MarshalMap(user2) err=%q", err) 40 | } 41 | 42 | // Put the objects in 43 | if _, err = mock.PutItem(&dynamodb.PutItemInput{ 44 | TableName: aws.String(table), 45 | Item: user1, 46 | }); err != nil { 47 | t.Errorf("PutItem(user1) err=%q", err) 48 | } 49 | 50 | if _, err = mock.PutItem(&dynamodb.PutItemInput{ 51 | TableName: aws.String(table), 52 | Item: user2, 53 | }); err != nil { 54 | t.Errorf("PutItem(user2) err=%q", err) 55 | } 56 | 57 | // Try putting one into a nonexistent table - this should error 58 | if _, err = mock.PutItem(&dynamodb.PutItemInput{ 59 | TableName: aws.String("nonexistent table"), 60 | Item: user1, 61 | }); err == nil { 62 | t.Errorf("Writing to a nonexistent table should error") 63 | } 64 | 65 | // Get user1 back out 66 | resp, err := mock.GetItem(&dynamodb.GetItemInput{ 67 | TableName: aws.String(table), 68 | Key: map[string]*dynamodb.AttributeValue{ 69 | "ID": {N: aws.String("1")}, 70 | }, 71 | }) 72 | if err != nil { 73 | t.Errorf("GetItem(key1) err=%q", err) 74 | } 75 | 76 | var returnedUser user 77 | if err = dynamodbattribute.UnmarshalMap(resp.Item, &returnedUser); err != nil { 78 | t.Fatalf("UnmarshalMap(GetItem response) err=%q", err) 79 | } 80 | 81 | if !reflect.DeepEqual(ken, returnedUser) { 82 | t.Errorf("Unexpected response from GetItem call. have=%+v want=%+v", returnedUser, ken) 83 | } 84 | 85 | // Scan users with a filter 86 | scanInput := &dynamodb.ScanInput{ 87 | TableName: aws.String(table), 88 | FilterExpression: aws.String("ID > :id"), 89 | ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{ 90 | ":id": {N: aws.String("1")}, 91 | }, 92 | } 93 | 94 | var result []user 95 | err = mock.ScanPages(scanInput, func(page *dynamodb.ScanOutput, last bool) bool { 96 | for _, item := range page.Items { 97 | var u user 98 | err = dynamodbattribute.UnmarshalMap(item, &u) 99 | if err != nil { 100 | t.Fatalf("UnmarshalMap(Scan response) err=%q", err) 101 | } 102 | result = append(result, u) 103 | } 104 | return !last 105 | }) 106 | if err != nil { 107 | t.Errorf("ScanPages err=%q", err) 108 | } 109 | 110 | if len(result) == 1 { 111 | if !reflect.DeepEqual(result[0], rob) { 112 | t.Errorf("Unexpected result in scan response. have=%+v want=%+v", result[0], rob) 113 | } 114 | } else { 115 | t.Errorf("Unexpected number of results from scan, have=%d want=%d", len(result), 1) 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /noopstatreceiver.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import "time" 6 | 7 | // NoopStatReceiver is a statreceiver that doesn't do anything, use it if you do not want to collect 8 | // stats, or as a base if you want to just collect a subset of stats 9 | type NoopStatReceiver struct { 10 | } 11 | 12 | // Checkpoint implementation that doesn't do anything 13 | func (*NoopStatReceiver) Checkpoint() {} 14 | 15 | // EventToClient implementation that doesn't do anything 16 | func (*NoopStatReceiver) EventToClient(inserted, retrieved time.Time) {} 17 | 18 | // EventsFromKinesis implementation that doesn't do anything 19 | func (*NoopStatReceiver) EventsFromKinesis(num int, shardID string, lag time.Duration) {} 20 | -------------------------------------------------------------------------------- /shard_consumer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import ( 6 | "fmt" 7 | "time" 8 | 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/awserr" 11 | "github.com/aws/aws-sdk-go/aws/request" 12 | "github.com/aws/aws-sdk-go/service/kinesis" 13 | "github.com/aws/aws-sdk-go/service/kinesis/kinesisiface" 14 | ) 15 | 16 | const ( 17 | // getRecordsLimit is the max number of records in a single request. This effectively limits the 18 | // total processing speed to getRecordsLimit*5/n where n is the number of parallel clients trying 19 | // to consume from the same kinesis stream 20 | getRecordsLimit = 10000 // 10,000 is the max according to the docs 21 | 22 | // maxErrorRetries is how many times we will retry on a shard error 23 | maxErrorRetries = 3 24 | 25 | // errorSleepDuration is how long we sleep when an error happens, this is multiplied by the number 26 | // of retries to give a minor backoff behavior 27 | errorSleepDuration = 1 * time.Second 28 | ) 29 | 30 | // getShardIterator gets a shard iterator after the last sequence number we read or at the start of the stream 31 | func getShardIterator(k kinesisiface.KinesisAPI, streamName string, shardID string, sequenceNumber string) (string, error) { 32 | shardIteratorType := kinesis.ShardIteratorTypeAfterSequenceNumber 33 | 34 | // If we do not have a sequenceNumber yet we need to get a shardIterator 35 | // from the horizon 36 | ps := aws.String(sequenceNumber) 37 | if sequenceNumber == "" { 38 | shardIteratorType = kinesis.ShardIteratorTypeTrimHorizon 39 | ps = nil 40 | } else if sequenceNumber == "LATEST" { 41 | shardIteratorType = kinesis.ShardIteratorTypeLatest 42 | ps = nil 43 | } 44 | 45 | resp, err := k.GetShardIterator(&kinesis.GetShardIteratorInput{ 46 | ShardId: aws.String(shardID), 47 | ShardIteratorType: &shardIteratorType, 48 | StartingSequenceNumber: ps, 49 | StreamName: aws.String(streamName), 50 | }) 51 | return aws.StringValue(resp.ShardIterator), err 52 | } 53 | 54 | // getRecords returns the next records and shard iterator from the given shard iterator 55 | func getRecords(k kinesisiface.KinesisAPI, iterator string) (records []*kinesis.Record, nextIterator string, lag time.Duration, err error) { 56 | params := &kinesis.GetRecordsInput{ 57 | Limit: aws.Int64(getRecordsLimit), 58 | ShardIterator: aws.String(iterator), 59 | } 60 | 61 | output, err := k.GetRecords(params) 62 | 63 | if err != nil { 64 | return nil, "", 0, err 65 | } 66 | 67 | records = output.Records 68 | nextIterator = aws.StringValue(output.NextShardIterator) 69 | lag = time.Duration(aws.Int64Value(output.MillisBehindLatest)) * time.Millisecond 70 | 71 | return records, nextIterator, lag, nil 72 | } 73 | 74 | // captureShard blocks until we capture the given shardID 75 | func (k *Kinsumer) captureShard(shardID string) (*checkpointer, error) { 76 | // Attempt to capture the shard in dynamo 77 | for { 78 | // Ask the checkpointer to capture the shard 79 | checkpointer, err := capture( 80 | shardID, 81 | k.checkpointTableName, 82 | k.dynamodb, 83 | k.clientName, 84 | k.clientID, 85 | k.maxAgeForClientRecord, 86 | k.config.stats) 87 | if err != nil { 88 | return nil, err 89 | } 90 | 91 | if checkpointer != nil { 92 | return checkpointer, nil 93 | } 94 | 95 | // Throttle requests so that we don't hammer dynamo 96 | select { 97 | case <-k.stop: 98 | // If we are told to stop consuming we should stop attempting to capture 99 | return nil, nil 100 | case <-time.After(k.config.throttleDelay): 101 | } 102 | } 103 | } 104 | 105 | // consume is a blocking call that captures then consumes the given shard in a loop. 106 | // It is also responsible for writing out the checkpoint updates to dynamo. 107 | // TODO: There are no tests for this file. Not sure how to even unit test this. 108 | func (k *Kinsumer) consume(shardID string) { 109 | defer k.waitGroup.Done() 110 | 111 | // commitTicker is used to periodically commit, so that we don't hammer dynamo every time 112 | // a shard wants to be check pointed 113 | commitTicker := time.NewTicker(k.config.commitFrequency) 114 | defer commitTicker.Stop() 115 | 116 | // capture the checkpointer 117 | checkpointer, err := k.captureShard(shardID) 118 | if err != nil { 119 | k.shardErrors <- shardConsumerError{shardID: shardID, action: "captureShard", err: err} 120 | return 121 | } 122 | 123 | // if we failed to capture the checkpointer but there was no errors 124 | // we must have stopped, so don't process this shard at all 125 | if checkpointer == nil { 126 | return 127 | } 128 | 129 | sequenceNumber := checkpointer.sequenceNumber 130 | 131 | // finished means we have reached the end of the shard but haven't necessarily processed/committed everything 132 | finished := false 133 | // Make sure we release the shard when we are done. 134 | defer func() { 135 | innerErr := checkpointer.release() 136 | if innerErr != nil { 137 | k.shardErrors <- shardConsumerError{shardID: shardID, action: "checkpointer.release", err: innerErr} 138 | return 139 | } 140 | }() 141 | 142 | // Get the starting shard iterator 143 | iterator, err := getShardIterator(k.kinesis, k.streamName, shardID, sequenceNumber) 144 | if err != nil { 145 | k.shardErrors <- shardConsumerError{shardID: shardID, action: "getShardIterator", err: err} 146 | return 147 | } 148 | 149 | // no throttle on the first request. 150 | nextThrottle := time.After(0) 151 | 152 | retryCount := 0 153 | 154 | var lastSeqNum string 155 | mainloop: 156 | for { 157 | // We have reached the end of the shard's data. Set Finished in dynamo and stop processing. 158 | if iterator == "" && !finished { 159 | checkpointer.finish(lastSeqNum) 160 | finished = true 161 | } 162 | 163 | // Handle async actions, and throttle requests to keep kinesis happy 164 | select { 165 | case <-k.stop: 166 | return 167 | case <-commitTicker.C: 168 | finishCommitted, err := checkpointer.commit() 169 | if err != nil { 170 | k.shardErrors <- shardConsumerError{shardID: shardID, action: "checkpointer.commit", err: err} 171 | return 172 | } 173 | if finishCommitted { 174 | return 175 | } 176 | // Go back to waiting for a throttle/stop. 177 | continue mainloop 178 | case <-nextThrottle: 179 | } 180 | 181 | // Reset the nextThrottle 182 | nextThrottle = time.After(k.config.throttleDelay) 183 | 184 | if finished { 185 | continue mainloop 186 | } 187 | 188 | // Get records from kinesis 189 | records, next, lag, err := getRecords(k.kinesis, iterator) 190 | 191 | if err != nil { 192 | if awsErr, ok := err.(awserr.Error); ok { 193 | origErrStr := "" 194 | if awsErr.OrigErr() != nil { 195 | origErrStr = fmt.Sprintf("(%s) ", awsErr.OrigErr()) 196 | } 197 | k.config.logger.Log("Got error: %s %s %sretry count is %d / %d", awsErr.Code(), awsErr.Message(), origErrStr, retryCount, maxErrorRetries) 198 | // Only retry for errors that should be retried; notably, don't retry serialization errors because something bad is happening 199 | shouldRetry := request.IsErrorRetryable(err) || request.IsErrorThrottle(err) 200 | if shouldRetry && retryCount < maxErrorRetries { 201 | retryCount++ 202 | 203 | // casting retryCount here to time.Duration purely for the multiplication, there is 204 | // no meaning to retryCount nanoseconds 205 | time.Sleep(errorSleepDuration * time.Duration(retryCount)) 206 | continue mainloop 207 | } 208 | } 209 | k.shardErrors <- shardConsumerError{shardID: shardID, action: "getRecords", err: err} 210 | return 211 | } 212 | retryCount = 0 213 | 214 | // Put all the records we got onto the channel 215 | k.config.stats.EventsFromKinesis(len(records), shardID, lag) 216 | if len(records) > 0 { 217 | retrievedAt := time.Now() 218 | for _, record := range records { 219 | RecordLoop: 220 | // Loop until we stop or the record is consumed, checkpointing if necessary. 221 | for { 222 | select { 223 | case <-commitTicker.C: 224 | finishCommitted, err := checkpointer.commit() 225 | if err != nil { 226 | k.shardErrors <- shardConsumerError{shardID: shardID, action: "checkpointer.commit", err: err} 227 | return 228 | } 229 | if finishCommitted { 230 | return 231 | } 232 | case <-k.stop: 233 | return 234 | case k.records <- &consumedRecord{ 235 | record: record, 236 | checkpointer: checkpointer, 237 | retrievedAt: retrievedAt, 238 | }: 239 | break RecordLoop 240 | } 241 | } 242 | } 243 | 244 | // Update the last sequence number we saw, in case we reached the end of the stream. 245 | lastSeqNum = aws.StringValue(records[len(records)-1].SequenceNumber) 246 | } 247 | iterator = next 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /statreceiver.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package kinsumer 4 | 5 | import "time" 6 | 7 | // A StatReceiver will have its methods called as operations 8 | // happen inside a running kinsumer, and is useful for tracking 9 | // the operation of the consumer. 10 | // 11 | // The methods will get called from multiple go routines and it is 12 | // the implementors responsibility to handle thread synchronization 13 | type StatReceiver interface { 14 | // Dynamo operations 15 | 16 | // Checkpoint is called every time a checkpoint is written to dynamodb 17 | Checkpoint() 18 | 19 | // EventToClient is called every time a record is returned to the client 20 | // `inserted` is the approximate time the record was inserted into kinesis 21 | // `retrieved` is the time when kinsumer retrieved the record from kinesis 22 | EventToClient(inserted, retrieved time.Time) 23 | 24 | // EventsFromKinesis is called every time a bunch of records is retrieved from 25 | // a kinesis shard. 26 | // `num` Number of records retrieved. 27 | // `shardID` ID of the shard that the records were retrieved from 28 | // `lag` How far the records are from the tip of the stream. 29 | EventsFromKinesis(num int, shardID string, lag time.Duration) 30 | } 31 | -------------------------------------------------------------------------------- /statsd/statsd.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Twitch Interactive 2 | 3 | package statsd 4 | 5 | import ( 6 | "fmt" 7 | "time" 8 | 9 | "github.com/cactus/go-statsd-client/statsd" 10 | ) 11 | 12 | // Statsd is a statreceiver that writes stats to a statsd endpoint 13 | type Statsd struct { 14 | client statsd.StatSender 15 | } 16 | 17 | // New creates a new Statsd statreceiver with a new instance of a cactus statter 18 | func New(addr, prefix string) (*Statsd, error) { 19 | sd, err := statsd.NewClientWithConfig(&statsd.ClientConfig{ 20 | Address: addr, 21 | Prefix: prefix, 22 | }) 23 | 24 | if err != nil { 25 | return nil, err 26 | } 27 | return &Statsd{ 28 | client: sd, 29 | }, nil 30 | } 31 | 32 | // NewWithStatter creates a new statreciever wrapping an existing statter 33 | func NewWithStatter(client statsd.StatSender) *Statsd { 34 | return &Statsd{ 35 | client: client, 36 | } 37 | } 38 | 39 | // Checkpoint implementation that writes to statsd 40 | func (s *Statsd) Checkpoint() { 41 | _ = s.client.Inc("kinsumer.checkpoints", 1, 1.0) 42 | } 43 | 44 | // EventToClient implementation that writes to statsd metrics about a record 45 | // that was consumed by the client 46 | func (s *Statsd) EventToClient(inserted, retrieved time.Time) { 47 | now := time.Now() 48 | 49 | _ = s.client.Inc("kinsumer.consumed", 1, 1.0) 50 | _ = s.client.TimingDuration("kinsumer.in_stream", retrieved.Sub(inserted), 1.0) 51 | _ = s.client.TimingDuration("kinsumer.end_to_end", now.Sub(inserted), 1.0) 52 | _ = s.client.TimingDuration("kinsumer.in_kinsumer", now.Sub(retrieved), 1.0) 53 | } 54 | 55 | // EventsFromKinesis implementation that writes to statsd metrics about records that 56 | // were retrieved from kinesis 57 | func (s *Statsd) EventsFromKinesis(num int, shardID string, lag time.Duration) { 58 | _ = s.client.TimingDuration(fmt.Sprintf("kinsumer.%s.lag", shardID), lag, 1.0) 59 | _ = s.client.Inc(fmt.Sprintf("kinsumer.%s.retrieved", shardID), int64(num), 1.0) 60 | } 61 | --------------------------------------------------------------------------------