├── .gitignore ├── LICENSE ├── README.md ├── consumer.example.properties └── consumer.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is licensed under the LGPLv3, included below. 2 | 3 | As a special exception to the GNU Lesser General Public License version 3 4 | ("LGPL3"), the copyright holders of this Library give you permission to 5 | convey to a third party a Combined Work that links statically or dynamically 6 | to this Library without providing any Minimal Corresponding Source or 7 | Minimal Application Code as set out in 4d or providing the installation 8 | information set out in section 4e, provided that you comply with the other 9 | provisions of LGPL3 and provided that you meet, for the Application the 10 | terms and conditions of the license(s) which apply to the Application. 11 | 12 | Except as stated in this special exception, the provisions of LGPL3 will 13 | continue to comply in full to this Library. If you modify this Library, you 14 | may apply this exception to your version of this Library, but you are not 15 | obliged to do so. If you do not wish to do so, delete this exception 16 | statement from your version. This exception does not (and cannot) modify any 17 | license terms which apply to the Application, with which you must still 18 | comply. 19 | 20 | 21 | GNU LESSER GENERAL PUBLIC LICENSE 22 | Version 3, 29 June 2007 23 | 24 | Copyright (C) 2007 Free Software Foundation, Inc. 25 | Everyone is permitted to copy and distribute verbatim copies 26 | of this license document, but changing it is not allowed. 27 | 28 | 29 | This version of the GNU Lesser General Public License incorporates 30 | the terms and conditions of version 3 of the GNU General Public 31 | License, supplemented by the additional permissions listed below. 32 | 33 | 0. Additional Definitions. 34 | 35 | As used herein, "this License" refers to version 3 of the GNU Lesser 36 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 37 | General Public License. 38 | 39 | "The Library" refers to a covered work governed by this License, 40 | other than an Application or a Combined Work as defined below. 41 | 42 | An "Application" is any work that makes use of an interface provided 43 | by the Library, but which is not otherwise based on the Library. 44 | Defining a subclass of a class defined by the Library is deemed a mode 45 | of using an interface provided by the Library. 46 | 47 | A "Combined Work" is a work produced by combining or linking an 48 | Application with the Library. The particular version of the Library 49 | with which the Combined Work was made is also called the "Linked 50 | Version". 51 | 52 | The "Minimal Corresponding Source" for a Combined Work means the 53 | Corresponding Source for the Combined Work, excluding any source code 54 | for portions of the Combined Work that, considered in isolation, are 55 | based on the Application, and not on the Linked Version. 56 | 57 | The "Corresponding Application Code" for a Combined Work means the 58 | object code and/or source code for the Application, including any data 59 | and utility programs needed for reproducing the Combined Work from the 60 | Application, but excluding the System Libraries of the Combined Work. 61 | 62 | 1. Exception to Section 3 of the GNU GPL. 63 | 64 | You may convey a covered work under sections 3 and 4 of this License 65 | without being bound by section 3 of the GNU GPL. 66 | 67 | 2. Conveying Modified Versions. 68 | 69 | If you modify a copy of the Library, and, in your modifications, a 70 | facility refers to a function or data to be supplied by an Application 71 | that uses the facility (other than as an argument passed when the 72 | facility is invoked), then you may convey a copy of the modified 73 | version: 74 | 75 | a) under this License, provided that you make a good faith effort to 76 | ensure that, in the event an Application does not supply the 77 | function or data, the facility still operates, and performs 78 | whatever part of its purpose remains meaningful, or 79 | 80 | b) under the GNU GPL, with none of the additional permissions of 81 | this License applicable to that copy. 82 | 83 | 3. Object Code Incorporating Material from Library Header Files. 84 | 85 | The object code form of an Application may incorporate material from 86 | a header file that is part of the Library. You may convey such object 87 | code under terms of your choice, provided that, if the incorporated 88 | material is not limited to numerical parameters, data structure 89 | layouts and accessors, or small macros, inline functions and templates 90 | (ten or fewer lines in length), you do both of the following: 91 | 92 | a) Give prominent notice with each copy of the object code that the 93 | Library is used in it and that the Library and its use are 94 | covered by this License. 95 | 96 | b) Accompany the object code with a copy of the GNU GPL and this license 97 | document. 98 | 99 | 4. Combined Works. 100 | 101 | You may convey a Combined Work under terms of your choice that, 102 | taken together, effectively do not restrict modification of the 103 | portions of the Library contained in the Combined Work and reverse 104 | engineering for debugging such modifications, if you also do each of 105 | the following: 106 | 107 | a) Give prominent notice with each copy of the Combined Work that 108 | the Library is used in it and that the Library and its use are 109 | covered by this License. 110 | 111 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 112 | document. 113 | 114 | c) For a Combined Work that displays copyright notices during 115 | execution, include the copyright notice for the Library among 116 | these notices, as well as a reference directing the user to the 117 | copies of the GNU GPL and this license document. 118 | 119 | d) Do one of the following: 120 | 121 | 0) Convey the Minimal Corresponding Source under the terms of this 122 | License, and the Corresponding Application Code in a form 123 | suitable for, and under terms that permit, the user to 124 | recombine or relink the Application with a modified version of 125 | the Linked Version to produce a modified Combined Work, in the 126 | manner specified by section 6 of the GNU GPL for conveying 127 | Corresponding Source. 128 | 129 | 1) Use a suitable shared library mechanism for linking with the 130 | Library. A suitable mechanism is one that (a) uses at run time 131 | a copy of the Library already present on the user's computer 132 | system, and (b) will operate properly with a modified version 133 | of the Library that is interface-compatible with the Linked 134 | Version. 135 | 136 | e) Provide Installation Information, but only if you would otherwise 137 | be required to provide such information under section 6 of the 138 | GNU GPL, and only to the extent that such information is 139 | necessary to install and execute a modified version of the 140 | Combined Work produced by recombining or relinking the 141 | Application with a modified version of the Linked Version. (If 142 | you use option 4d0, the Installation Information must accompany 143 | the Minimal Corresponding Source and Corresponding Application 144 | Code. If you use option 4d1, you must provide the Installation 145 | Information in the manner specified by section 6 of the GNU GPL 146 | for conveying Corresponding Source.) 147 | 148 | 5. Combined Libraries. 149 | 150 | You may place library facilities that are a work based on the 151 | Library side by side in a single library together with other library 152 | facilities that are not Applications and are not covered by this 153 | License, and convey such a combined library under terms of your 154 | choice, if you do both of the following: 155 | 156 | a) Accompany the combined library with a copy of the same work based 157 | on the Library, uncombined with any other library facilities, 158 | conveyed under the terms of this License. 159 | 160 | b) Give prominent notice with the combined library that part of it 161 | is a work based on the Library, and explaining where to find the 162 | accompanying uncombined form of the same work. 163 | 164 | 6. Revised Versions of the GNU Lesser General Public License. 165 | 166 | The Free Software Foundation may publish revised and/or new versions 167 | of the GNU Lesser General Public License from time to time. Such new 168 | versions will be similar in spirit to the present version, but may 169 | differ in detail to address new problems or concerns. 170 | 171 | Each version is given a distinguishing version number. If the 172 | Library as you received it specifies that a certain numbered version 173 | of the GNU Lesser General Public License "or any later version" 174 | applies to it, you have the option of following the terms and 175 | conditions either of that published version or of any later version 176 | published by the Free Software Foundation. If the Library as you 177 | received it does not specify a version number of the GNU Lesser 178 | General Public License, you may choose any version of the GNU Lesser 179 | General Public License ever published by the Free Software Foundation. 180 | 181 | If the Library as you received it specifies that a proxy can decide 182 | whether future versions of the GNU Lesser General Public License shall 183 | apply, that proxy's public statement of acceptance of any version is 184 | permanent authorization for you to choose that version for the 185 | Library. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kafka-s3-go-consumer 2 | ==================== 3 | 4 | A `golang` consumer of Kafka topics, with an S3 sink. 5 | 6 | Install 7 | -------------------- 8 | 9 | First, get all dependencies with `go get .` 10 | 11 | Don't forget to make your own config file, by copying and updating the example one, found at `consumer.example.properties`. 12 | 13 | Run 14 | -------------------- 15 | ```bash 16 | go run consumer.go -c -k 17 | ``` 18 | 19 | * `-c` Defaults to conf.properties in the current working directory 20 | * `-k` Defaults to false and specifies whether or not to keep chunkbuffer files around for inspection 21 | 22 | Deployment 23 | -------------------- 24 | 25 | There's a Chef recipe to fetch and build this on any server, at https://github.com/crowdmob/chef-kafka-s3-consumer-cookbook 26 | 27 | This recipe also works with Amazon OpsWorks, and we've been using it in a production environment. 28 | 29 | 30 | Dependencies 31 | --------------------- 32 | Several dependencies: 33 | 34 | ``` 35 | github.com/crowdmob/kafka 36 | github.com/crowdmob/goconfig 37 | github.com/crowdmob/goamz/s3 38 | ``` 39 | 40 | But these can all be gotten by `go get .` 41 | 42 | 43 | License and Author 44 | =============================== 45 | Author:: Matthew Moore 46 | 47 | Copyright:: 2013, CrowdMob Inc. 48 | 49 | 50 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 51 | 52 | http://www.apache.org/licenses/LICENSE-2.0 53 | 54 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 55 | 56 | -------------------------------------------------------------------------------- /consumer.example.properties: -------------------------------------------------------------------------------- 1 | [default] 2 | debug=true 3 | filebufferpath=/mnt/tmp/kafka-s3-go-consumer 4 | maxchunksizebytes=1048576 5 | maxchunkagemins=5 6 | pollsleepmillis=10 7 | 8 | [kafka] 9 | host=127.0.0.1 10 | port=9092 11 | maxmessagesize=4096 12 | topics=mytopic1,mytopic2 13 | partitions=0,0 14 | 15 | [s3] 16 | bucket=my-sink-bucket-$(NUTTY_ENV)s 17 | region=us-east-1 18 | accesskey=$(AWS_ACCESS_KEY_ID)s 19 | secretkey=$(AWS_SECRET_ACCESS_KEY)s 20 | -------------------------------------------------------------------------------- /consumer.go: -------------------------------------------------------------------------------- 1 | /* 2 | Author: Matthew Moore, CrowdMob Inc. 3 | */ 4 | 5 | package main 6 | 7 | import ( 8 | "flag" 9 | "fmt" 10 | "github.com/crowdmob/kafka" 11 | "os" 12 | "os/signal" 13 | "io/ioutil" 14 | "strings" 15 | "strconv" 16 | "time" 17 | "mime" 18 | "path/filepath" 19 | 20 | configfile "github.com/crowdmob/goconfig" 21 | "github.com/crowdmob/goamz/aws" 22 | "github.com/crowdmob/goamz/s3" 23 | ) 24 | 25 | var configFilename string 26 | var keepBufferFiles bool 27 | var debug bool 28 | var shouldOutputVersion bool 29 | const ( 30 | VERSION = "0.1" 31 | ONE_MINUTE_IN_NANOS = 60000000000 32 | S3_REWIND_IN_DAYS_BEFORE_LONG_LOOP = 14 33 | DAY_IN_SECONDS = 24 * 60 * 60 34 | ) 35 | 36 | func init() { 37 | flag.StringVar(&configFilename, "c", "conf.properties", "path to config file") 38 | flag.BoolVar(&keepBufferFiles, "k", false, "keep buffer files around for inspection") 39 | flag.BoolVar(&shouldOutputVersion, "v", false, "output the current version and quit") 40 | } 41 | 42 | 43 | type ChunkBuffer struct { 44 | File *os.File 45 | FilePath *string 46 | MaxAgeInMins int64 47 | MaxSizeInBytes int64 48 | Topic *string 49 | Partition int64 50 | Offset uint64 51 | expiresAt int64 52 | length int64 53 | } 54 | 55 | func (chunkBuffer *ChunkBuffer) BaseFilename() string { 56 | return fmt.Sprintf("kafka-s3-go-consumer-buffer-topic_%s-partition_%d-offset_%d-", *chunkBuffer.Topic, chunkBuffer.Partition, chunkBuffer.Offset) 57 | } 58 | 59 | func (chunkBuffer *ChunkBuffer) CreateBufferFileOrPanic() { 60 | tmpfile, err := ioutil.TempFile(*chunkBuffer.FilePath, chunkBuffer.BaseFilename()) 61 | chunkBuffer.File = tmpfile 62 | chunkBuffer.expiresAt = time.Now().UnixNano() + (chunkBuffer.MaxAgeInMins * ONE_MINUTE_IN_NANOS) 63 | chunkBuffer.length = 0 64 | if err != nil { 65 | fmt.Errorf("Error opening buffer file: %#v\n", err) 66 | panic(err) 67 | } 68 | } 69 | 70 | func (chunkBuffer *ChunkBuffer) TooBig() bool { 71 | return chunkBuffer.length >= chunkBuffer.MaxSizeInBytes 72 | } 73 | 74 | func (chunkBuffer *ChunkBuffer) TooOld() bool { 75 | return time.Now().UnixNano() >= chunkBuffer.expiresAt 76 | } 77 | 78 | func (chunkBuffer *ChunkBuffer) NeedsRotation() bool { 79 | return chunkBuffer.TooBig() || chunkBuffer.TooOld() 80 | } 81 | 82 | func S3DatePrefix(t *time.Time) string { 83 | return fmt.Sprintf("%d/%d/%d/", t.Year(), t.Month(), t.Day()) 84 | } 85 | 86 | func S3TopicPartitionPrefix(topic *string, partition int64) string { 87 | return fmt.Sprintf("%s/p%d/", *topic, partition) 88 | } 89 | 90 | func KafkaMsgGuidPrefix(topic *string, partition int64) string { 91 | return fmt.Sprintf("t_%s-p_%d-o_", *topic, partition) 92 | } 93 | 94 | func (chunkBuffer *ChunkBuffer) PutMessage(msg *kafka.Message) { 95 | uuid := []byte(fmt.Sprintf("%s%d|", KafkaMsgGuidPrefix(chunkBuffer.Topic, chunkBuffer.Partition), msg.Offset())) 96 | lf := []byte("\n") 97 | chunkBuffer.Offset = msg.Offset() 98 | chunkBuffer.File.Write(uuid) 99 | chunkBuffer.File.Write(msg.Payload()) 100 | chunkBuffer.File.Write(lf) 101 | 102 | chunkBuffer.length += int64(len(uuid)) + int64(len(msg.Payload())) + int64(len(lf)) 103 | } 104 | 105 | 106 | func (chunkBuffer *ChunkBuffer) StoreToS3AndRelease(s3bucket *s3.Bucket) (bool, error) { 107 | var s3path string 108 | var err error 109 | 110 | if debug { 111 | fmt.Printf("Closing bufferfile: %s\n", chunkBuffer.File.Name()) 112 | } 113 | chunkBuffer.File.Close() 114 | 115 | contents, err := ioutil.ReadFile(chunkBuffer.File.Name()) 116 | if err != nil { 117 | return false, err 118 | } 119 | 120 | if len(contents) <= 0 { 121 | if debug { 122 | fmt.Printf("Nothing to store to s3 for bufferfile: %s\n", chunkBuffer.File.Name()) 123 | } 124 | } else { // Write to s3 in a new filename 125 | alreadyExists := true 126 | for alreadyExists { 127 | writeTime := time.Now() 128 | s3path = fmt.Sprintf("%s%s%d", S3TopicPartitionPrefix(chunkBuffer.Topic, chunkBuffer.Partition), S3DatePrefix(&writeTime), writeTime.UnixNano()) 129 | alreadyExists, err = s3bucket.Exists(s3path) 130 | if err != nil { 131 | panic(err) 132 | return false, err 133 | } 134 | } 135 | 136 | fmt.Printf("S3 Put Object: { Bucket: %s, Key: %s, MimeType:%s }\n", s3bucket.Name, s3path, mime.TypeByExtension(filepath.Ext(chunkBuffer.File.Name()))) 137 | 138 | err = s3bucket.Put(s3path, contents, mime.TypeByExtension(filepath.Ext(chunkBuffer.File.Name())), s3.Private, s3.Options{}) 139 | if err != nil { 140 | panic(err) 141 | } 142 | } 143 | 144 | if !keepBufferFiles { 145 | if debug { 146 | fmt.Printf("Deleting bufferfile: %s\n", chunkBuffer.File.Name()) 147 | } 148 | err = os.Remove(chunkBuffer.File.Name()) 149 | if err != nil { 150 | fmt.Errorf("Error deleting bufferfile %s: %#v", chunkBuffer.File.Name(), err) 151 | } 152 | } 153 | 154 | return true, nil 155 | } 156 | 157 | func LastS3KeyWithPrefix(bucket *s3.Bucket, prefix *string) (string, error) { 158 | narrowedPrefix := *prefix 159 | keyMarker := "" 160 | 161 | // First, do a few checks for shortcuts for checking backwards: focus in on the 14 days. 162 | // Otherwise just loop forward until there aren't any more results 163 | currentDay := time.Now() 164 | for i := 0; i < S3_REWIND_IN_DAYS_BEFORE_LONG_LOOP; i++ { 165 | testPrefix := fmt.Sprintf("%s%s", *prefix, S3DatePrefix(¤tDay)) 166 | results, err := bucket.List(narrowedPrefix, "", keyMarker, 0) 167 | if err != nil && len(results.Contents) > 0 { 168 | narrowedPrefix = testPrefix 169 | break 170 | } 171 | currentDay = currentDay.Add(-1 * time.Duration(DAY_IN_SECONDS) * time.Second) 172 | } 173 | 174 | lastKey := "" 175 | moreResults := true 176 | for moreResults { 177 | results, err := bucket.List(narrowedPrefix, "", keyMarker, 0) 178 | if err != nil { return lastKey, err } 179 | 180 | if len(results.Contents) == 0 { // empty request, return last found lastKey 181 | return lastKey, nil 182 | } 183 | 184 | lastKey = results.Contents[len(results.Contents)-1].Key 185 | keyMarker = lastKey 186 | moreResults = results.IsTruncated 187 | } 188 | return lastKey, nil 189 | } 190 | 191 | func main() { 192 | flag.Parse() // Read argv 193 | 194 | if shouldOutputVersion { 195 | fmt.Printf("kafka-s3-consumer %s\n", VERSION) 196 | os.Exit(0) 197 | } 198 | 199 | config, err := configfile.ReadConfigFile(configFilename) 200 | if err != nil { 201 | fmt.Printf("Couldn't read config file %s because: %#v\n", configFilename, err) 202 | panic(err) 203 | } 204 | 205 | // Read configuration file 206 | host, _ := config.GetString("kafka", "host") 207 | debug, _ = config.GetBool("default", "debug") 208 | bufferMaxSizeInByes, _ := config.GetInt64("default", "maxchunksizebytes") 209 | bufferMaxAgeInMinutes, _ := config.GetInt64("default", "maxchunkagemins") 210 | port, _ := config.GetString("kafka", "port") 211 | hostname := fmt.Sprintf("%s:%s", host, port) 212 | awsKey, _ := config.GetString("s3", "accesskey") 213 | awsSecret, _ := config.GetString("s3", "secretkey") 214 | awsRegion, _ := config.GetString("s3", "region") 215 | s3BucketName, _ := config.GetString("s3", "bucket") 216 | s3bucket := s3.New(aws.Auth{AccessKey: awsKey, SecretKey: awsSecret}, aws.Regions[awsRegion]).Bucket(s3BucketName) 217 | 218 | kafkaPollSleepMilliSeconds, _ := config.GetInt64("default", "pollsleepmillis") 219 | maxSize, _ := config.GetInt64("kafka", "maxmessagesize") 220 | tempfilePath, _ := config.GetString("default", "filebufferpath") 221 | topicsRaw, _ := config.GetString("kafka", "topics") 222 | topics := strings.Split(topicsRaw, ",") 223 | for i, _ := range topics { topics[i] = strings.TrimSpace(topics[i]) } 224 | partitionsRaw, _ := config.GetString("kafka", "partitions") 225 | partitionStrings := strings.Split(partitionsRaw, ",") 226 | partitions := make([]int64, len(partitionStrings)) 227 | for i, _ := range partitionStrings { partitions[i], _ = strconv.ParseInt(strings.TrimSpace(partitionStrings[i]),10,64) } 228 | 229 | // Fetch Offsets from S3 (look for last written file and guid) 230 | if debug { 231 | fmt.Printf("Fetching offsets for each topic from s3 bucket %s ...\n", s3bucket.Name) 232 | } 233 | offsets := make([]uint64, len(topics)) 234 | for i, _ := range offsets { 235 | prefix := S3TopicPartitionPrefix(&topics[i], partitions[i]) 236 | if debug { 237 | fmt.Printf(" Looking at %s object versions: ", prefix) 238 | } 239 | latestKey, err := LastS3KeyWithPrefix(s3bucket, &prefix) 240 | if err != nil { panic(err) } 241 | 242 | if debug { 243 | fmt.Printf("Got: %#v\n", latestKey) 244 | } 245 | 246 | if len(latestKey) == 0 { // no keys found, there aren't any files written, so start at 0 offset 247 | offsets[i] = 0 248 | if debug { 249 | fmt.Printf(" No s3 object found, assuming Offset:%d\n", offsets[i]) 250 | } 251 | } else { // if a key was found we have to open the object and find the last offset 252 | if debug { 253 | fmt.Printf(" Found s3 object %s, got: ", latestKey) 254 | } 255 | contentBytes, err := s3bucket.Get(latestKey) 256 | guidPrefix := KafkaMsgGuidPrefix(&topics[i], partitions[i]) 257 | lines := strings.Split(string(contentBytes), "\n") 258 | for l := len(lines)-1; l >= 0; l-- { 259 | if debug { 260 | fmt.Printf(" Looking at Line '%s'\n", lines[l]) 261 | } 262 | if strings.HasPrefix(lines[l], guidPrefix) { // found a line with a guid, extract offset and escape out 263 | guidSplits := strings.SplitN(strings.SplitN(lines[l], "|", 2)[0], guidPrefix, 2) 264 | offsetString := guidSplits[len(guidSplits)-1] 265 | offsets[i], err = strconv.ParseUint(offsetString, 10, 64) 266 | if err != nil { 267 | panic (err) 268 | } 269 | if debug { 270 | fmt.Printf("OffsetString:%s(L#%d), Offset:%d\n", offsetString, l, offsets[i]) 271 | } 272 | break 273 | } 274 | } 275 | } 276 | } 277 | 278 | 279 | 280 | if debug { 281 | fmt.Printf("Making sure chunkbuffer directory structure exists at %s\n", tempfilePath) 282 | } 283 | err = os.MkdirAll(tempfilePath, 0700) 284 | if err != nil { 285 | fmt.Errorf("Error ensuring chunkbuffer directory structure %s: %#v\n", tempfilePath, err) 286 | panic(err) 287 | } 288 | 289 | if debug { 290 | fmt.Printf("Watching %d topics, opening a chunkbuffer for each.\n", len(topics)) 291 | } 292 | buffers := make([]*ChunkBuffer, len(topics)) 293 | for i, _ := range topics { 294 | buffers[i] = &ChunkBuffer{FilePath: &tempfilePath, 295 | MaxSizeInBytes: bufferMaxSizeInByes, 296 | MaxAgeInMins: bufferMaxAgeInMinutes, 297 | Topic: &topics[i], 298 | Partition: partitions[i], 299 | Offset: offsets[i], 300 | } 301 | buffers[i].CreateBufferFileOrPanic() 302 | if debug { 303 | fmt.Printf("Consumer[%s#%d][chunkbuffer]: %s\n", hostname, i, buffers[i].File.Name()) 304 | } 305 | } 306 | 307 | 308 | if debug { 309 | fmt.Printf("Setting up a broker for each of the %d topics.\n", len(topics)) 310 | } 311 | brokers := make([]*kafka.BrokerConsumer, len(topics)) 312 | for i, _ := range partitionStrings { 313 | fmt.Printf("Setup Consumer[%s#%d]: { topic: %s, partition: %d, offset: %d, maxMessageSize: %d }\n", 314 | hostname, 315 | i, 316 | topics[i], 317 | partitions[i], 318 | offsets[i], 319 | maxSize, 320 | ) 321 | brokers[i] = kafka.NewBrokerConsumer(hostname, topics[i], int(partitions[i]), uint64(offsets[i]), uint32(maxSize)) 322 | } 323 | 324 | 325 | if debug { 326 | fmt.Printf("Brokers created, starting to listen with %d brokers...\n", len(brokers)) 327 | } 328 | 329 | 330 | brokerFinishes := make(chan bool, len(brokers)) 331 | for idx, currentBroker := range brokers { 332 | go func(i int, broker *kafka.BrokerConsumer) { 333 | quitSignal := make(chan os.Signal, 1) 334 | signal.Notify(quitSignal, os.Interrupt) 335 | consumedCount, skippedCount, err := broker.ConsumeUntilQuit(kafkaPollSleepMilliSeconds, quitSignal, func(msg *kafka.Message){ 336 | if msg != nil { 337 | if debug { 338 | fmt.Printf("`%s` { ", topics[i]) 339 | msg.Print() 340 | fmt.Printf("}\n") 341 | } 342 | buffers[i].PutMessage(msg) 343 | } 344 | 345 | // check for max size and max age ... if over, rotate 346 | // to new buffer file and upload the old one. 347 | if buffers[i].NeedsRotation() { 348 | rotatedOutBuffer := buffers[i] 349 | 350 | if debug { 351 | fmt.Printf("Broker#%d: Log Rotation needed! Rotating out of %s\n", i, rotatedOutBuffer.File.Name()) 352 | } 353 | 354 | buffers[i] = &ChunkBuffer{FilePath: &tempfilePath, 355 | MaxSizeInBytes: bufferMaxSizeInByes, 356 | MaxAgeInMins: bufferMaxAgeInMinutes, 357 | Topic: &topics[i], 358 | Partition: partitions[i], 359 | Offset: msg.Offset(), 360 | } 361 | buffers[i].CreateBufferFileOrPanic() 362 | 363 | if debug { 364 | fmt.Printf("Broker#%d: Rotating into %s\n", i, buffers[i].File.Name()) 365 | } 366 | 367 | rotatedOutBuffer.StoreToS3AndRelease(s3bucket) 368 | } 369 | }) 370 | 371 | if err != nil { 372 | fmt.Printf("ERROR in Broker#%d:\n", i) 373 | panic(err) 374 | } 375 | 376 | if debug { 377 | fmt.Printf("Quit signal handled by Broker Consumer #%d (Topic `%s`)\n", i, topics[i]) 378 | fmt.Printf("%s Report: %d messages successfully consumed, %d messages skipped (typically corrupted, check logs)\n", topics[i], consumedCount, skippedCount) 379 | } 380 | 381 | // buffer stopped, let's clean up nicely 382 | buffers[i].StoreToS3AndRelease(s3bucket) 383 | 384 | brokerFinishes <- true 385 | }(idx, currentBroker) 386 | } 387 | 388 | <- brokerFinishes 389 | 390 | fmt.Printf("All %d brokers finished.\n", len(brokers)) 391 | } --------------------------------------------------------------------------------