├── README.md └── s3_output.go /README.md: -------------------------------------------------------------------------------- 1 | # heka-s3 2 | 3 | Heka output plugin for persisting messages from the data pipeline to AWS S3 buckets. It buffers logs to disk locally and uploads periodically to S3. It is currently running reliably in production at [Wego](http://www.wego.com). 4 | 5 | ## Installation 6 | 7 | Refer to: http://hekad.readthedocs.org/en/v0.9.2/installing.html#building-hekad-with-external-plugins 8 | 9 | Simply add this line in _{heka root}/cmake/plugin_loader.cmake_: 10 | 11 | add_external_plugin(git https://github.com/uohzxela/heka-s3 master) 12 | 13 | Then run build.sh as per the documentation. 14 | 15 | ## Configuration 16 | 17 | Sample TOML file (with Kafka as input source): 18 | 19 | ``` 20 | [error-logs-input-kafka] 21 | type = "KafkaInput" 22 | topic = "error-logs" 23 | addrs = ["kafka-a-1.test.org:9092"] 24 | 25 | [error-logs-output-s3] 26 | type = "S3Output" 27 | message_matcher = "Logger == 'error-logs-input-kafka'" 28 | secret_key = "SECRET_KEY" 29 | access_key = "ACCESS_KEY" 30 | bucket = "logs" 31 | prefix = "/error-logs" 32 | region = "ap-southeast-1" 33 | ticker_interval = 3600 34 | compression = true 35 | buffer_path = "/var/log/heka/buffer/s3" 36 | buffer_chunk_limit = 1000000 37 | encoder = "PayloadEncoder" 38 | 39 | [PayloadEncoder] 40 | append_newlines = false 41 | ``` 42 | 43 | | Attributes | Type | Default | Remarks | 44 | | ------------- |------------- | ----- | --------- | 45 | | secret_key | string | nil | needed for S3 authentication | 46 | | access_key | string | nil | needed for S3 authentication | 47 | | bucket | string | nil | specifies bucket in S3 | 48 | | prefix | string | nil | specifies path in bucket | 49 | | region | string | nil | e.g. "ap-southeast-1"| 50 | | ticker_interval | int (seconds) | nil | specifies buffering time before every upload | 51 | | compression | boolean | true | only gzip is supported for now | 52 | | buffer_path | string | nil | defines path to store buffer file locally | 53 | | buffer_chunk_limit| int (bytes) | 1000000 | defines buffer size limit in memory before flushing to disk| 54 | 55 | Logs are saved to S3 as _{bucket}/{prefix}/{current date}/{current time stamp}.gz_, if compression is enabled. 56 | 57 | In this example, an S3 object will be saved hourly as `All Buckets/logs/error-logs/2015-05-18/20150518174140.gz` 58 | 59 | Regardless of the ticker_interval, buffer on the local disk is automatically uploaded by midnight to the previous day's folder so as to deal with timestamp issue. 60 | 61 | ## Contributing 62 | 63 | 1. Fork it 64 | 2. Create your feature branch (`git checkout -b my-new-feature`) 65 | 3. Commit your changes (`git commit -am 'Added some feature'`) 66 | 4. Push to the branch (`git push origin my-new-feature`) 67 | 5. Create new Pull Request 68 | 69 | ## License 70 | 71 | MIT 72 | -------------------------------------------------------------------------------- /s3_output.go: -------------------------------------------------------------------------------- 1 | package s3 2 | 3 | import ( 4 | "fmt" 5 | "errors" 6 | "io" 7 | "bufio" 8 | "bytes" 9 | "time" 10 | "os" 11 | "os/exec" 12 | "strings" 13 | "github.com/mozilla-services/heka/message" 14 | . "github.com/mozilla-services/heka/pipeline" 15 | "github.com/AdRoll/goamz/aws" 16 | "github.com/AdRoll/goamz/s3" 17 | ) 18 | 19 | const INTERVAL_PERIOD time.Duration = 24 * time.Hour 20 | const HOUR_TO_TICK int = 00 21 | const MINUTE_TO_TICK int = 00 22 | const SECOND_TO_TICK int = 00 23 | 24 | type S3OutputConfig struct { 25 | SecretKey string `toml:"secret_key"` 26 | AccessKey string `toml:"access_key"` 27 | Region string `toml:"region"` 28 | Bucket string `toml:"bucket"` 29 | Prefix string `toml:"prefix"` 30 | TickerInterval uint `toml:"ticker_interval"` 31 | Compression bool `toml:"compression"` 32 | BufferPath string `toml:"buffer_path"` 33 | BufferChunkLimit int `toml:"buffer_chunk_limit"` 34 | } 35 | 36 | type S3Output struct { 37 | config *S3OutputConfig 38 | client *s3.S3 39 | bucket *s3.Bucket 40 | bufferFilePath string 41 | } 42 | 43 | func midnightTickerUpdate() *time.Ticker { 44 | nextTick := time.Date(time.Now().Year(), time.Now().Month(), time.Now().Day(), HOUR_TO_TICK, MINUTE_TO_TICK, SECOND_TO_TICK, 0, time.Local) 45 | if !nextTick.After(time.Now()) { 46 | nextTick = nextTick.Add(INTERVAL_PERIOD) 47 | } 48 | diff := nextTick.Sub(time.Now()) 49 | return time.NewTicker(diff) 50 | } 51 | 52 | func (so *S3Output) ConfigStruct() interface{} { 53 | return &S3OutputConfig{Compression: true, BufferChunkLimit: 1000000} 54 | } 55 | 56 | func (so *S3Output) Init(config interface{}) (err error) { 57 | so.config = config.(*S3OutputConfig) 58 | auth, err := aws.GetAuth(so.config.AccessKey, so.config.SecretKey, "", time.Now()) 59 | if err != nil { 60 | return 61 | } 62 | region, ok := aws.Regions[so.config.Region] 63 | if !ok { 64 | err = errors.New("Region of that name not found.") 65 | return 66 | } 67 | so.client = s3.New(auth, region) 68 | so.bucket = so.client.Bucket(so.config.Bucket) 69 | 70 | prefixList := strings.Split(so.config.Prefix, "/") 71 | bufferFileName := so.config.Bucket + strings.Join(prefixList, "_") 72 | so.bufferFilePath = so.config.BufferPath + "/" + bufferFileName 73 | return 74 | } 75 | 76 | func (so *S3Output) Run(or OutputRunner, h PluginHelper) (err error) { 77 | inChan := or.InChan() 78 | tickerChan := or.Ticker() 79 | buffer := bytes.NewBuffer(nil) 80 | midnightTicker := midnightTickerUpdate() 81 | 82 | var ( 83 | pack *PipelinePack 84 | msg *message.Message 85 | ok = true 86 | ) 87 | 88 | for ok { 89 | select { 90 | case pack, ok = <- inChan: 91 | if !ok { 92 | break 93 | } 94 | msg = pack.Message 95 | err := so.WriteToBuffer(buffer, msg, or) 96 | if err != nil { 97 | or.LogMessage(fmt.Sprintf("Warning, unable to write to buffer: %s", err)) 98 | err = nil 99 | continue 100 | } 101 | pack.Recycle(nil) 102 | case <- tickerChan: 103 | or.LogMessage(fmt.Sprintf("Ticker fired, uploading payload.")) 104 | err := so.Upload(buffer, or, false) 105 | if err != nil { 106 | or.LogMessage(fmt.Sprintf("Warning, unable to upload payload: %s", err)) 107 | err = nil 108 | continue 109 | } 110 | or.LogMessage(fmt.Sprintf("Payload uploaded successfully.")) 111 | buffer.Reset() 112 | case <- midnightTicker.C: 113 | midnightTicker = midnightTickerUpdate() 114 | or.LogMessage(fmt.Sprintf("Midnight ticker fired, uploading payload.")) 115 | err := so.Upload(buffer, or, true) 116 | if err != nil { 117 | or.LogMessage(fmt.Sprintf("Warning, unable to upload payload: %s", err)) 118 | err = nil 119 | continue 120 | } 121 | or.LogMessage(fmt.Sprintf("Payload uploaded successfully.")) 122 | buffer.Reset() 123 | } 124 | } 125 | 126 | or.LogMessage(fmt.Sprintf("Shutting down S3 output runner.")) 127 | return 128 | } 129 | 130 | func (so *S3Output) WriteToBuffer(buffer *bytes.Buffer, msg *message.Message, or OutputRunner) (err error) { 131 | _, err = buffer.Write([]byte(msg.GetPayload())) 132 | if err != nil { 133 | return 134 | } 135 | if buffer.Len() > so.config.BufferChunkLimit { 136 | err = so.SaveToDisk(buffer, or) 137 | } 138 | return 139 | } 140 | 141 | func (so *S3Output) SaveToDisk(buffer *bytes.Buffer, or OutputRunner) (err error) { 142 | _, err = os.Stat(so.config.BufferPath) 143 | if os.IsNotExist(err) { 144 | err = os.MkdirAll(so.config.BufferPath, 0666) 145 | if err != nil { return } 146 | } 147 | 148 | err = os.Chdir(so.config.BufferPath) 149 | if err != nil { return } 150 | 151 | _, err = os.Stat(so.bufferFilePath) 152 | if os.IsNotExist(err) { 153 | or.LogMessage("Creating buffer file: " + so.bufferFilePath) 154 | w, err := os.Create(so.bufferFilePath) 155 | w.Close() 156 | if err != nil { return err } 157 | } 158 | 159 | f, err := os.OpenFile(so.bufferFilePath, os.O_APPEND|os.O_WRONLY, 0666) 160 | if err != nil { return } 161 | 162 | _, err = f.Write(buffer.Bytes()) 163 | if err != nil { return } 164 | 165 | f.Close() 166 | buffer.Reset() 167 | 168 | return 169 | } 170 | 171 | func (so *S3Output) ReadFromDisk(or OutputRunner) (buffer *bytes.Buffer, err error) { 172 | if so.config.Compression { 173 | or.LogMessage("Compressing buffer file...") 174 | cmd := exec.Command("gzip", so.bufferFilePath) 175 | err = cmd.Run() 176 | if err != nil { 177 | return nil, err 178 | } 179 | // rename to original filename without .gz extension 180 | cmd = exec.Command("mv", so.bufferFilePath + ".gz", so.bufferFilePath) 181 | err = cmd.Run() 182 | if err != nil { 183 | return nil, err 184 | } 185 | } 186 | 187 | or.LogMessage("Uploading, reading from buffer file.") 188 | fi, err := os.Open(so.bufferFilePath) 189 | if err != nil { return } 190 | 191 | r := bufio.NewReader(fi) 192 | buffer = bytes.NewBuffer(nil) 193 | 194 | buf := make([]byte, 1024) 195 | for { 196 | n, err := r.Read(buf) 197 | if err != nil && err != io.EOF { 198 | break 199 | } 200 | if n == 0 { 201 | break 202 | } 203 | _, err = buffer.Write(buf[:n]) 204 | if err != nil { 205 | break 206 | } 207 | } 208 | 209 | fi.Close() 210 | return buffer, err 211 | } 212 | 213 | func (so *S3Output) Upload(buffer *bytes.Buffer, or OutputRunner, isMidnight bool) (err error) { 214 | _, err = os.Stat(so.bufferFilePath) 215 | if buffer.Len() == 0 && os.IsNotExist(err) { 216 | err = errors.New("Nothing to upload.") 217 | return 218 | } 219 | 220 | err = so.SaveToDisk(buffer, or) 221 | if err != nil { return } 222 | 223 | buffer, err = so.ReadFromDisk(or) 224 | if err != nil { return } 225 | 226 | var ( 227 | currentTime = time.Now().Local().Format("20060102150405") 228 | currentDate = "" 229 | ext = "" 230 | contentType = "text/plain" 231 | ) 232 | 233 | if isMidnight { 234 | currentDate = time.Now().Local().AddDate(0, 0, -1).Format("2006-01-02 15:00:00 +0800")[0:10] 235 | } else { 236 | currentDate = time.Now().Local().Format("2006-01-02 15:00:00 +0800")[0:10] 237 | } 238 | 239 | if so.config.Compression { 240 | ext = ".gz" 241 | contentType = "multipart/x-gzip" 242 | } 243 | 244 | path := so.config.Prefix + "/" + currentDate + "/" + currentTime + ext 245 | err = so.bucket.Put(path, buffer.Bytes(), contentType, "public-read", s3.Options{}) 246 | 247 | or.LogMessage("Upload finished, removing buffer file on disk.") 248 | if err == nil { 249 | err = os.Remove(so.bufferFilePath) 250 | } 251 | 252 | return 253 | } 254 | 255 | func init() { 256 | RegisterPlugin("S3Output", func() interface{} { 257 | return new(S3Output) 258 | }) 259 | } 260 | --------------------------------------------------------------------------------