├── README.md ├── http.go ├── LICENSE └── nsq_to_elasticsearch.go /README.md: -------------------------------------------------------------------------------- 1 | nsq-to-es 2 | ========= 3 | 4 | NSQ to Elasticsearch transport utility. 5 | -------------------------------------------------------------------------------- /http.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net/http" 7 | 8 | "github.com/bitly/go-nsq" 9 | "github.com/bitly/nsq/util" 10 | ) 11 | 12 | var httpclient *http.Client 13 | var userAgent string 14 | 15 | func init() { 16 | httpclient = &http.Client{Transport: nsq.NewDeadlineTransport(*httpTimeout)} 17 | userAgent = fmt.Sprintf("nsq_to_elasticsearch v%s", util.BINARY_VERSION) 18 | } 19 | 20 | func HttpGet(endpoint string) (*http.Response, error) { 21 | req, err := http.NewRequest("GET", endpoint, nil) 22 | if err != nil { 23 | return nil, err 24 | } 25 | req.Header.Set("User-Agent", userAgent) 26 | return httpclient.Do(req) 27 | } 28 | 29 | func HttpPost(endpoint string, body *bytes.Buffer) (*http.Response, error) { 30 | req, err := http.NewRequest("POST", endpoint, body) 31 | if err != nil { 32 | return nil, err 33 | } 34 | req.Header.Set("User-Agent", userAgent) 35 | req.Header.Set("Content-Type", *contentType) 36 | return httpclient.Do(req) 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Seth Hall 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /nsq_to_elasticsearch.go: -------------------------------------------------------------------------------- 1 | // This is an NSQ client that reads the specified topic/channel 2 | // and performs HTTP requests (GET/POST) to the specified endpoints 3 | 4 | package main 5 | 6 | import ( 7 | "bytes" 8 | "errors" 9 | "flag" 10 | "fmt" 11 | "log" 12 | "math" 13 | //"math/rand" 14 | //"net/url" 15 | "os" 16 | "os/signal" 17 | "sort" 18 | "strings" 19 | "syscall" 20 | "time" 21 | 22 | "github.com/bitly/go-hostpool" 23 | "github.com/bitly/go-nsq" 24 | "github.com/bitly/nsq/util" 25 | ) 26 | 27 | const ( 28 | ModeAll = iota 29 | ModeRoundRobin 30 | ModeHostPool 31 | ) 32 | 33 | var ( 34 | showVersion = flag.Bool("version", false, "print version string") 35 | 36 | topic = flag.String("topic", "bro_logs", "nsq topic") 37 | channel = flag.String("channel", "nsq_to_elasticsearch", "nsq channel") 38 | maxInFlight = flag.Int("max-in-flight", 10, "max number of messages to allow in flight") 39 | 40 | numPublishers = flag.Int("n", 1, "number of concurrent publishers") 41 | mode = flag.String("mode", "round-robin", "the upstream request mode options: round-robin or hostpool") 42 | httpTimeout = flag.Duration("http-timeout", 20*time.Second, "timeout for HTTP connect/read/write (each)") 43 | statusEvery = flag.Int("status-every", 250, "the # of requests between logging status (per handler), 0 disables") 44 | contentType = flag.String("content-type", "text/json", "the Content-Type used for POST requests") 45 | 46 | readerOpts = util.StringArray{} 47 | postAddrs = util.StringArray{} 48 | nsqdTCPAddrs = util.StringArray{} 49 | lookupdHTTPAddrs = util.StringArray{} 50 | 51 | maxBackoffDuration = flag.Duration("max-backoff-duration", 120*time.Second, "(deprecated) use --reader-opt=max_backoff_duration=X, the maximum backoff duration") 52 | bulkSize = flag.Int("bulk-size", 1000, "the # of messages to forward to elasticsearch in each bulk transfer") 53 | maxBulkTime = flag.Duration("max-bulk-time", 60*time.Second, "the maximum number of seconds between bulk inserts") 54 | 55 | queuedItems []string 56 | lastBulkTime = time.Now() 57 | queuedItemsNum int = 0 58 | ) 59 | 60 | func init() { 61 | flag.Var(&readerOpts, "reader-opt", "option to passthrough to nsq.Reader (may be given multiple times)") 62 | flag.Var(&postAddrs, "post", "URL to make a POST request to. data will be in the body (may be given multiple times)") 63 | flag.Var(&nsqdTCPAddrs, "nsqd-tcp-address", "nsqd TCP address (may be given multiple times)") 64 | flag.Var(&lookupdHTTPAddrs, "lookupd-http-address", "lookupd HTTP address (may be given multiple times)") 65 | 66 | queuedItems = make([]string, *bulkSize) 67 | } 68 | 69 | type Durations []time.Duration 70 | 71 | func (s Durations) Len() int { 72 | return len(s) 73 | } 74 | 75 | func (s Durations) Swap(i, j int) { 76 | s[i], s[j] = s[j], s[i] 77 | } 78 | 79 | func (s Durations) Less(i, j int) bool { 80 | return s[i] < s[j] 81 | } 82 | 83 | type Publisher interface { 84 | Publish(string, []byte) error 85 | } 86 | 87 | type PublishHandler struct { 88 | Publisher 89 | addresses util.StringArray 90 | counter uint64 91 | mode int 92 | hostPool hostpool.HostPool 93 | reqs Durations 94 | id int 95 | } 96 | 97 | func (ph *PublishHandler) HandleMessage(m *nsq.Message) error { 98 | var startTime time.Time 99 | 100 | if *statusEvery > 0 { 101 | startTime = time.Now() 102 | } 103 | 104 | endOfCommand := strings.Index(string(m.Body), "}}") + 2 105 | CommandString := m.Body[0:endOfCommand] 106 | LogString := m.Body[endOfCommand:] 107 | 108 | //log.Printf("stuff: %s", m.Body) 109 | //log.Printf("stuff: %s", CommandString) 110 | //log.Printf("stuff: %s", LogString) 111 | //tmp := fmt.Sprintf("%s\n%s\n", CommandString, ) 112 | queuedItems = append(queuedItems, string(CommandString)) 113 | queuedItems = append(queuedItems, string(LogString)) 114 | queuedItemsNum = queuedItemsNum + 1 115 | 116 | //log.Printf("seconds? %d", lastBulkTime.Unix()) 117 | sinceLastBulk := time.Now().Sub(lastBulkTime) 118 | 119 | //fmt.Printf("sincelastbulk: %s maxbulksecs: %s\n", sinceLastBulk, *maxBulkTime) 120 | //fmt.Printf("bulksize: %d\n", *bulkSize) 121 | fmt.Printf("Length of queued items: %d\n", queuedItemsNum) 122 | if queuedItemsNum >= *bulkSize || (queuedItemsNum > 0 && sinceLastBulk > *maxBulkTime) { 123 | log.Printf("last batch time: %s number of items %d\n", sinceLastBulk, queuedItemsNum) 124 | lastBulkTime = time.Now() 125 | 126 | log.Printf("begin preparing string\n") 127 | 128 | //out := string("") 129 | out := strings.Join(queuedItems[2:queuedItemsNum*2], "\n") + "\n" 130 | //for e := queuedItems.Front(); e != nil; e = e.Next() { 131 | // out = fmt.Sprintf("%s%s\n", out, e.Value) 132 | //} 133 | 134 | // If the data sent, flush the queuedItems 135 | queuedItemsNum = 0 136 | queuedItems = make([]string, *bulkSize) 137 | 138 | log.Printf("done preparing string\n") 139 | 140 | switch ph.mode { 141 | case ModeAll: 142 | for _, addr := range ph.addresses { 143 | err := ph.Publish(addr, []byte(out)) 144 | if err != nil { 145 | return err 146 | } 147 | } 148 | case ModeRoundRobin: 149 | idx := ph.counter % uint64(len(ph.addresses)) 150 | err := ph.Publish(ph.addresses[idx], []byte(out)) 151 | if err != nil { 152 | return err 153 | } 154 | ph.counter++ 155 | case ModeHostPool: 156 | hostPoolResponse := ph.hostPool.Get() 157 | err := ph.Publish(hostPoolResponse.Host(), []byte(out)) 158 | hostPoolResponse.Mark(err) 159 | if err != nil { 160 | return err 161 | } 162 | } 163 | 164 | log.Printf("finished pushing logs\n") 165 | 166 | if *statusEvery > 0 { 167 | duration := time.Now().Sub(startTime) 168 | ph.reqs = append(ph.reqs, duration) 169 | } 170 | 171 | if *statusEvery > 0 && len(ph.reqs) >= *statusEvery { 172 | var total time.Duration 173 | for _, v := range ph.reqs { 174 | total += v 175 | } 176 | avgMs := (total.Seconds() * 1000) / float64(len(ph.reqs)) 177 | 178 | sort.Sort(ph.reqs) 179 | p95Ms := percentile(95.0, ph.reqs, len(ph.reqs)).Seconds() * 1000 180 | p99Ms := percentile(99.0, ph.reqs, len(ph.reqs)).Seconds() * 1000 181 | 182 | log.Printf("handler(%d): finished %d requests - 99th: %.02fms - 95th: %.02fms - avg: %.02fms", 183 | ph.id, *statusEvery, p99Ms, p95Ms, avgMs) 184 | 185 | ph.reqs = ph.reqs[:0] 186 | } 187 | } 188 | 189 | return nil 190 | } 191 | 192 | func percentile(perc float64, arr []time.Duration, length int) time.Duration { 193 | indexOfPerc := int(math.Ceil(((perc / 100.0) * float64(length)) + 0.5)) 194 | if indexOfPerc >= length { 195 | indexOfPerc = length - 1 196 | } 197 | return arr[indexOfPerc] 198 | } 199 | 200 | type PostPublisher struct{} 201 | 202 | func (p *PostPublisher) Publish(addr string, msg []byte) error { 203 | buf := bytes.NewBuffer(msg) 204 | resp, err := HttpPost(addr, buf) 205 | if err != nil { 206 | return err 207 | } 208 | resp.Body.Close() 209 | if resp.StatusCode != 200 { 210 | return errors.New(fmt.Sprintf("got status code %d", resp.StatusCode)) 211 | } 212 | return nil 213 | } 214 | 215 | func hasArg(s string) bool { 216 | for _, arg := range os.Args { 217 | if strings.Contains(arg, s) { 218 | return true 219 | } 220 | } 221 | return false 222 | } 223 | 224 | func main() { 225 | var publisher Publisher 226 | var addresses util.StringArray 227 | var selectedMode int 228 | 229 | flag.Parse() 230 | 231 | if *showVersion { 232 | fmt.Printf("nsq_to_elasticsearch v%s\n", util.BINARY_VERSION) 233 | return 234 | } 235 | 236 | if *topic == "" || *channel == "" { 237 | log.Fatalf("--topic and --channel are required") 238 | } 239 | 240 | if *contentType != flag.Lookup("content-type").DefValue { 241 | if len(postAddrs) == 0 { 242 | log.Fatalf("--content-type only used with --post") 243 | } 244 | if len(*contentType) == 0 { 245 | log.Fatalf("--content-type requires a value when used") 246 | } 247 | } 248 | 249 | if len(nsqdTCPAddrs) == 0 && len(lookupdHTTPAddrs) == 0 { 250 | log.Fatalf("--nsqd-tcp-address or --lookupd-http-address required") 251 | } 252 | if len(nsqdTCPAddrs) > 0 && len(lookupdHTTPAddrs) > 0 { 253 | log.Fatalf("use --nsqd-tcp-address or --lookupd-http-address not both") 254 | } 255 | 256 | switch *mode { 257 | case "round-robin": 258 | selectedMode = ModeRoundRobin 259 | case "hostpool": 260 | selectedMode = ModeHostPool 261 | } 262 | 263 | termChan := make(chan os.Signal, 1) 264 | signal.Notify(termChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) 265 | 266 | if len(postAddrs) > 0 { 267 | publisher = &PostPublisher{} 268 | addresses = postAddrs 269 | } 270 | 271 | r, err := nsq.NewReader(*topic, *channel) 272 | if err != nil { 273 | log.Fatalf(err.Error()) 274 | } 275 | err = util.ParseReaderOpts(r, readerOpts) 276 | if err != nil { 277 | log.Fatalf(err.Error()) 278 | } 279 | r.SetMaxInFlight(*maxInFlight) 280 | 281 | for i := 0; i < *numPublishers; i++ { 282 | handler := &PublishHandler{ 283 | Publisher: publisher, 284 | addresses: addresses, 285 | mode: selectedMode, 286 | reqs: make(Durations, 0, *statusEvery), 287 | id: i, 288 | hostPool: hostpool.New(addresses), 289 | } 290 | r.AddHandler(handler) 291 | } 292 | 293 | for _, addrString := range nsqdTCPAddrs { 294 | err := r.ConnectToNSQ(addrString) 295 | if err != nil { 296 | log.Fatalf(err.Error()) 297 | } 298 | } 299 | 300 | for _, addrString := range lookupdHTTPAddrs { 301 | log.Printf("lookupd addr %s", addrString) 302 | err := r.ConnectToLookupd(addrString) 303 | if err != nil { 304 | log.Fatalf(err.Error()) 305 | } 306 | } 307 | 308 | for { 309 | select { 310 | case <-r.ExitChan: 311 | return 312 | case <-termChan: 313 | r.Stop() 314 | } 315 | } 316 | } 317 | --------------------------------------------------------------------------------