├── .circleci └── config.yml ├── .gitattributes ├── .github ├── CODEOWNERS └── workflows │ └── semantic.yml ├── LICENSE ├── README.md ├── cmd ├── lpverify │ └── main.go └── verify-lines │ ├── README.md │ └── verify-lines.go ├── go.mod ├── go.sum ├── line-protocol.ebnf └── lineprotocol ├── byteset.go ├── corpus_test.go ├── decoder.go ├── decoder_test.go ├── doc.go ├── encoder.go ├── encoder_test.go ├── escape.go ├── example_test.go ├── precision.go ├── section_string.go ├── strconv.go ├── testdata └── corpus.json ├── value.go ├── value_test.go └── valuekind.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: circleci/golang:1.17.5 6 | 7 | working_directory: /go/src/github.com/influxdata/line-protocol 8 | steps: 9 | - checkout 10 | - run: go get honnef.co/go/tools/... 11 | - run: go test -v ./... 12 | - run: go vet -v ./... 13 | - run: staticcheck ./... 14 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | go.sum linguist-generated=true 2 | 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This file allows teams or people to be assigned as 2 | # automatic code-reviewers for files or directories. 3 | # 4 | # Here is information about how to configure this file: 5 | # https://help.github.com/en/articles/about-code-owners 6 | 7 | * @influxdata/cloud-2-storage-engineers 8 | -------------------------------------------------------------------------------- /.github/workflows/semantic.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Semantic PR and Commit Messages" 3 | 4 | on: 5 | pull_request: 6 | types: [opened, reopened, synchronize, edited] 7 | 8 | jobs: 9 | semantic: 10 | uses: influxdata/validate-semantic-github-messages/.github/workflows/semantic.yml@main 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 InfluxData Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InfluxDB line-protocol codec 2 | 3 | This module implements a high performance Go codec for the line-protocol syntax as accepted by InfluxDB. 4 | Currently the API is low level - it's intended for converting line-protocol to some chosen concrete 5 | types that aren't specified here. (In future work, we'll define a `Point` type that implements a convenient 6 | but less performant type to encode or decode). 7 | 8 | The API documentation is here: https://pkg.go.dev/github.com/influxdata/line-protocol/v2/lineprotocol 9 | -------------------------------------------------------------------------------- /cmd/lpverify/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/influxdata/line-protocol/v2/lineprotocol" 8 | ) 9 | 10 | func main() { 11 | dec := lineprotocol.NewDecoder(os.Stdin) 12 | if !verify(dec) { 13 | os.Exit(1) 14 | } 15 | } 16 | 17 | func verify(dec *lineprotocol.Decoder) (ok bool) { 18 | logErr := func(err error) { 19 | fmt.Fprintf(os.Stderr, "%v\n", err) 20 | ok = false 21 | } 22 | nextLine: 23 | for dec.Next() { 24 | _, err := dec.Measurement() 25 | if err != nil { 26 | logErr(err) 27 | continue nextLine 28 | } 29 | for { 30 | key, _, err := dec.NextTag() 31 | if err != nil { 32 | logErr(err) 33 | continue nextLine 34 | } 35 | if key == nil { 36 | break 37 | } 38 | } 39 | for { 40 | key, _, err := dec.NextField() 41 | if err != nil { 42 | logErr(err) 43 | continue nextLine 44 | } 45 | if key == nil { 46 | break 47 | } 48 | } 49 | // TODO precision flag so we can check time bounds. 50 | if _, err := dec.TimeBytes(); err != nil { 51 | logErr(err) 52 | continue nextLine 53 | } 54 | } 55 | return ok 56 | } 57 | -------------------------------------------------------------------------------- /cmd/verify-lines/README.md: -------------------------------------------------------------------------------- 1 | ## Verify your Line Protocol 2 | 3 | verify-lines.go is provided to allow calling the LP decoder from Python to check line protocol for errors. 4 | 5 | To do so, first build a shared object line for your system from the root of this project. 6 | 7 | ```bash 8 | go build -buildmode=c-shared -o verify-lines.so ./cmd/verify-lines/verify-lines.go 9 | ``` 10 | 11 | Then within your Python script or program, run these: 12 | 13 | ```python 14 | import ctypes 15 | so = ctypes.cdll.LoadLibrary('./verify-lines.so') 16 | verifyLines = so.verifyLines 17 | ``` 18 | 19 | Then you can check single line protocol lines or batches of them. Encoding to utf8 is required. 20 | 21 | ```python 22 | >>> verifyLines('foo,tag1=val1,tag2=val2 x=1,y="hello" 1625823259000000'.encode('utf-8')) 23 | 0 24 | >>> verifyLines('foo,,,, 1625823259000000'.encode('utf-8')) 25 | at line 1:5: expected tag key or field but found ',' instead 26 | 1 27 | >>> batch="""##comment 28 | ... "foo",tag1=val1,"tag2"="tag 29 | ... " x2=1,y="hel 30 | ... lo" 1625823259000000 31 | ... _bar enabled=true 32 | ... foo,bar=a\\ x=1 x=1""" 33 | >>> verifyLines(batch.encode('utf-8')) 34 | at line 2:28: expected tag key or field but found '\n' instead 35 | at line 6:13: empty tag key 36 | 1 37 | ``` 38 | 39 | The error messages explain where the first error in each line occurred. There may be more than one error in a line. 40 | The line count is 1 indexed. -------------------------------------------------------------------------------- /cmd/verify-lines/verify-lines.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "C" 4 | import ( 5 | "fmt" 6 | "os" 7 | 8 | "github.com/influxdata/line-protocol/v2/lineprotocol" 9 | ) 10 | 11 | func main() {} 12 | 13 | // verifyLines provides a cgo hook to verify line protocol strings. 14 | // The method will print encountered decode errors to stderr and returns 15 | // a boolean to indicate if any errors where encountered following the 16 | // unix standard of 0 for success and 1 for error conditions. 17 | // It will continue to decode and verify all lines even after encountering 18 | // an error. The line immediately after an error may verify but not be 19 | // what was intended. 20 | //export verifyLines 21 | func verifyLines(lines *C.char) C.int { 22 | dec := lineprotocol.NewDecoderWithBytes([]byte(C.GoString(lines))) 23 | var failure bool 24 | logErr := func(err error) { 25 | fmt.Fprintf(os.Stderr, "%v\n", err) 26 | failure = true 27 | } 28 | nextLine: 29 | for dec.Next() { 30 | _, err := dec.Measurement() 31 | if err != nil { 32 | logErr(err) 33 | continue nextLine 34 | } 35 | for { 36 | key, _, err := dec.NextTag() 37 | if err != nil { 38 | logErr(err) 39 | continue nextLine 40 | } 41 | if key == nil { 42 | break 43 | } 44 | } 45 | for { 46 | key, _, err := dec.NextField() 47 | if err != nil { 48 | logErr(err) 49 | continue nextLine 50 | } 51 | if key == nil { 52 | break 53 | } 54 | } 55 | if _, err := dec.TimeBytes(); err != nil { 56 | logErr(err) 57 | continue nextLine 58 | } 59 | } 60 | if failure { 61 | return 1 62 | } else { 63 | return 0 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/influxdata/line-protocol/v2 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/frankban/quicktest v1.13.0 7 | github.com/google/go-cmp v0.5.5 8 | github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 9 | gopkg.in/yaml.v3 v3.0.1 // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 2 | github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= 3 | github.com/frankban/quicktest v1.11.2 h1:mjwHjStlXWibxOohM7HYieIViKyh56mmt3+6viyhDDI= 4 | github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= 5 | github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk= 6 | github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= 7 | github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 8 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 9 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 10 | github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184 h1:modYba1g1we+YJf0yGTwmohVWVAxcAch18nPg3e24OY= 11 | github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98= 12 | github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig= 13 | github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo= 14 | github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY= 15 | github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY= 16 | github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= 17 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 18 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 19 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 20 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 21 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 22 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 23 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 24 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 25 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 26 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 27 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 28 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 29 | gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ= 30 | gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 31 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 32 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 33 | -------------------------------------------------------------------------------- /line-protocol.ebnf: -------------------------------------------------------------------------------- 1 | // This file holds a grammar for the line-protocol syntax. 2 | // The grammar is in EBNF format as used in the Go specification. 3 | 4 | lines = line { [ "\r" ] "\n" line } [ "\r" ] . 5 | 6 | space_char = " " . 7 | whitespace = space_char { space_char } . 8 | nonprintable_char = "\u0000"…"\u001f" | "\u007f" . 9 | 10 | line = { space_char } [ point | comment ] . 11 | point = measurement { "," tag } whitespace field { "," field } [ whitespace timestamp ] { space_char } . 12 | comment = "#" { not(nonprintable_char) | "\t" } . 13 | 14 | measurement = measurement_start { measurement_elem } . 15 | // Note: the start character is different from other measurement characters 16 | // because it can't be a # character (otherwise it would match a comment). 17 | measurement_start = not(nonprintable_char | space_char | `\` | "," | "#" ) | measurement_escape_seq . 18 | measurement_elem = measurement_regular_char | measurement_escape_seq . 19 | measurement_regular_char = not(nonprintable_char | space_char | `\` | "," ) . 20 | measurement_escape_seq = `\` { `\` } not ( `\` | nonprintable_char ). 21 | 22 | key = key_elem { key_elem } . 23 | key_elem = key_regular_char | key_escape_seq . 24 | key_regular_char = not(nonprintable_char | space_char | `\` | "," | "=" ) . 25 | key_escape_seq = `\` { `\` } not ( `\` | nonprintable_char ) . 26 | 27 | tag = key "=" key . 28 | 29 | field = key "=" fieldval . 30 | 31 | fieldval = boolfield | stringfield | intfield | uintfield | floatfield . 32 | decimal_digits = decimal_digit { decimal_digit } . 33 | decimal_digit = "0" … "9" . 34 | 35 | boolfield = "t" | "T" | "true" | "True" | "TRUE" | "f" | "F" | "false" | "False" | "FALSE" . 36 | intfield = [ "-" ] decimal_digits "i" . 37 | uintfield = decimal_digits "u" . 38 | 39 | floatfield = [ "-" ] non_negative_float . 40 | non_negative_float = decimal_digits [ "." [ decimal_digits ] [ decimal_exponent ] ] | 41 | decimal_digits decimal_exponent | 42 | "." decimal_digits [ decimal_exponent ] . 43 | 44 | decimal_exponent = ( "e" | "E" ) [ "+" | "-" ] decimal_digits . 45 | 46 | stringfield = `"` { not(`"` | `\`) | `\` any_char } `"` . 47 | any_char = "\u0000" … "\U0010FFFF" . 48 | 49 | timestamp = [ "-" ] decimal_digits . 50 | -------------------------------------------------------------------------------- /lineprotocol/byteset.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | // newByteset returns a set representation 4 | // of the bytes in the given string. 5 | func newByteSet(s string) *byteSet { 6 | var set byteSet 7 | for i := 0; i < len(s); i++ { 8 | set.set(s[i]) 9 | } 10 | return &set 11 | } 12 | 13 | func newByteSetRange(i0, i1 uint8) *byteSet { 14 | var set byteSet 15 | for i := i0; i <= i1; i++ { 16 | set.set(i) 17 | 18 | } 19 | return &set 20 | } 21 | 22 | type byteSet [256]bool 23 | 24 | // holds reports whether b holds the byte x. 25 | func (b *byteSet) get(x uint8) bool { 26 | return b[x] 27 | } 28 | 29 | // set ensures that x is in the set. 30 | func (b *byteSet) set(x uint8) { 31 | b[x] = true 32 | } 33 | 34 | // union returns the union of b and b1. 35 | func (b *byteSet) union(b1 *byteSet) *byteSet { 36 | r := *b 37 | for i := range r { 38 | r[i] = r[i] || b1[i] 39 | } 40 | return &r 41 | } 42 | 43 | // union returns the union of b and b1. 44 | func (b *byteSet) intersect(b1 *byteSet) *byteSet { 45 | r := *b 46 | for i := range r { 47 | r[i] = r[i] && b1[i] 48 | } 49 | return &r 50 | } 51 | 52 | func (b *byteSet) without(b1 *byteSet) *byteSet { 53 | return b.intersect(b1.invert()) 54 | } 55 | 56 | // invert returns everything not in b. 57 | func (b *byteSet) invert() *byteSet { 58 | r := *b 59 | for i := range r { 60 | r[i] = !r[i] 61 | } 62 | return &r 63 | } 64 | -------------------------------------------------------------------------------- /lineprotocol/corpus_test.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "sort" 7 | "sync" 8 | "testing" 9 | "time" 10 | 11 | qt "github.com/frankban/quicktest" 12 | "github.com/google/go-cmp/cmp/cmpopts" 13 | "github.com/influxdata/line-protocol-corpus/lpcorpus" 14 | ) 15 | 16 | func TestCorpusDecode(t *testing.T) { 17 | c := qt.New(t) 18 | corpus, err := readCorpusDecodeResults() 19 | c.Assert(err, qt.IsNil) 20 | for _, test := range corpus { 21 | c.Run(test.Input.Key, func(c *qt.C) { 22 | precision := fromCorpusPrecision(test.Input.Precision) 23 | ps, err := decodeToCorpusPoints(test.Input.Text, precision, test.Input.DefaultTime) 24 | // We'll treat it as success if we match any of the result 25 | if test.Output.Error != "" { 26 | c.Assert(err, qt.Not(qt.IsNil)) 27 | // Check exact error? 28 | return 29 | } 30 | c.Assert(err, qt.IsNil) 31 | c.Assert(ps, qt.CmpEquals(cmpopts.EquateEmpty()), test.Output.Result) 32 | }) 33 | } 34 | } 35 | 36 | func TestCorpusEncode(t *testing.T) { 37 | c := qt.New(t) 38 | corpus, err := readCorpusEncodeResults() 39 | c.Assert(err, qt.IsNil) 40 | for _, test := range corpus { 41 | c.Run(test.Input.Key, func(c *qt.C) { 42 | precision := fromCorpusPrecision(test.Input.Precision) 43 | data, err := encodeWithCorpusInput(test.Input.Point, precision) 44 | if err == nil { 45 | // The encoding succeeded. Check that we can round-trip back to the 46 | // original values. 47 | ms, err := decodeToCorpusPoints(data, precision, 0) 48 | if c.Check(err, qt.IsNil) { 49 | c.Check(ms, qt.HasLen, 1) 50 | c.Check(ms[0], qt.DeepEquals, test.Input.Point) 51 | } 52 | } 53 | if test.Output.Error != "" { 54 | c.Assert(err, qt.IsNotNil) 55 | // assert exact string? 56 | return 57 | } 58 | c.Assert(err, qt.IsNil) 59 | c.Assert(string(data), qt.Equals, string(test.Output.Result)) 60 | }) 61 | } 62 | } 63 | 64 | func encodeWithCorpusInput(m *lpcorpus.Point, precision Precision) ([]byte, error) { 65 | var e Encoder 66 | e.SetPrecision(precision) 67 | e.StartLineRaw(m.Name) 68 | for _, tag := range m.Tags { 69 | e.AddTagRaw(tag.Key, tag.Value) 70 | } 71 | for _, field := range m.Fields { 72 | v, ok := NewValue(field.Value.Interface()) 73 | if !ok { 74 | return nil, fmt.Errorf("invalid value for encoding %v", field.Value) 75 | } 76 | e.AddFieldRaw(field.Key, v) 77 | } 78 | e.EndLine(time.Unix(0, m.Time)) 79 | return bytes.TrimSuffix(e.Bytes(), []byte("\n")), e.Err() 80 | } 81 | 82 | func decodeToCorpusPoints(text []byte, precision Precision, defaultTime int64) ([]*lpcorpus.Point, error) { 83 | dec := NewDecoderWithBytes(text) 84 | ms := []*lpcorpus.Point{} 85 | for dec.Next() { 86 | m, err := decodeToCorpusPoint(dec, precision, defaultTime) 87 | if err != nil { 88 | return nil, fmt.Errorf("cannot get metric for point %d: %v", len(ms), err) 89 | } 90 | ms = append(ms, m) 91 | } 92 | return ms, nil 93 | } 94 | 95 | func decodeToCorpusPoint(dec *Decoder, precision Precision, defaultTime int64) (*lpcorpus.Point, error) { 96 | m := lpcorpus.Point{ 97 | Tags: []lpcorpus.Tag{}, 98 | Fields: []lpcorpus.Field{}, 99 | } 100 | var err error 101 | m.Name, err = dec.Measurement() 102 | if err != nil { 103 | return nil, fmt.Errorf("cannot get measurement: %v", err) 104 | } 105 | for { 106 | key, val, err := dec.NextTag() 107 | if err != nil { 108 | return nil, fmt.Errorf("cannot get tag %v: %v", len(m.Tags), err) 109 | } 110 | if key == nil { 111 | break 112 | } 113 | m.Tags = append(m.Tags, lpcorpus.Tag{ 114 | Key: dupBytes(key), 115 | Value: dupBytes(val), 116 | }) 117 | } 118 | sort.Slice(m.Tags, func(i, j int) bool { 119 | return bytes.Compare(m.Tags[i].Key, m.Tags[j].Key) < 0 120 | }) 121 | for i := range m.Tags { 122 | if i > 0 && bytes.Equal(m.Tags[i-1].Key, m.Tags[i].Key) { 123 | return nil, fmt.Errorf("duplicate key %q", m.Tags[i].Key) 124 | } 125 | } 126 | for { 127 | key, val, err := dec.NextField() 128 | if err != nil { 129 | return nil, fmt.Errorf("cannot get field %d: %v", len(m.Fields), err) 130 | } 131 | if key == nil { 132 | break 133 | } 134 | m.Fields = append(m.Fields, lpcorpus.Field{ 135 | Key: dupBytes(key), 136 | Value: lpcorpus.MustNewValue(val.Interface()), 137 | }) 138 | } 139 | 140 | timestamp, err := dec.Time(precision, time.Unix(0, defaultTime)) 141 | if err != nil { 142 | return nil, fmt.Errorf("cannot get time: %v", err) 143 | } 144 | m.Time = timestamp.UnixNano() 145 | return &m, nil 146 | } 147 | 148 | func fromCorpusPrecision(precision lpcorpus.Precision) Precision { 149 | switch precision.Duration { 150 | case time.Nanosecond: 151 | return Nanosecond 152 | case time.Microsecond: 153 | return Microsecond 154 | case time.Millisecond: 155 | return Millisecond 156 | case time.Second: 157 | return Second 158 | default: 159 | panic(fmt.Errorf("unknown precision in test corpus %q", precision)) 160 | } 161 | } 162 | 163 | func dupBytes(b []byte) []byte { 164 | return append([]byte(nil), b...) 165 | } 166 | 167 | var corpus struct { 168 | once sync.Once 169 | decode []*lpcorpus.DecodeCorpusEntry 170 | encode []*lpcorpus.EncodeCorpusEntry 171 | err error 172 | } 173 | 174 | func readCorpusOnce() { 175 | corpus.once.Do(func() { 176 | corp, err := lpcorpus.ReadCorpusJSON("testdata/corpus.json") 177 | if err != nil { 178 | corpus.err = err 179 | return 180 | } 181 | // Create slices rather than using the maps directly so that 182 | // it's easy to execute the tests in deterministic order. 183 | drs := make([]*lpcorpus.DecodeCorpusEntry, 0, len(corp.Decode)) 184 | for _, d := range corp.Decode { 185 | drs = append(drs, d) 186 | } 187 | sort.Slice(drs, func(i, j int) bool { 188 | return drs[i].Input.Key < drs[j].Input.Key 189 | }) 190 | corpus.decode = drs 191 | 192 | ers := make([]*lpcorpus.EncodeCorpusEntry, 0, len(corp.Encode)) 193 | for _, e := range corp.Encode { 194 | ers = append(ers, e) 195 | } 196 | sort.Slice(ers, func(i, j int) bool { 197 | return ers[i].Input.Key < ers[j].Input.Key 198 | }) 199 | corpus.encode = ers 200 | }) 201 | } 202 | 203 | func readCorpusDecodeResults() ([]*lpcorpus.DecodeCorpusEntry, error) { 204 | readCorpusOnce() 205 | return corpus.decode, corpus.err 206 | } 207 | 208 | func readCorpusEncodeResults() ([]*lpcorpus.EncodeCorpusEntry, error) { 209 | readCorpusOnce() 210 | return corpus.encode, corpus.err 211 | } 212 | -------------------------------------------------------------------------------- /lineprotocol/decoder.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "time" 8 | "unicode/utf8" 9 | ) 10 | 11 | const ( 12 | // When the buffer is grown, it will be grown by a minimum of 8K. 13 | minGrow = 8192 14 | 15 | // The buffer will be grown if there's less than minRead space available 16 | // to read into. 17 | minRead = minGrow / 2 18 | 19 | // maxSlide is the maximum number of bytes that will 20 | // be copied to the start of the buffer when reset is called. 21 | // This is a trade-off between copy overhead and the likelihood 22 | // that a complete line-protocol entry will fit into this size. 23 | maxSlide = 256 24 | ) 25 | 26 | var ( 27 | // Note: in some places we hard-code a single space in the source for efficency. 28 | fieldSeparatorSpace = newByteSet(" ") 29 | 30 | whitespace = fieldSeparatorSpace.union(newByteSet("\r\n")) 31 | tagKeyChars = newByteSet(",=").union(whitespace).union(nonPrintable).invert() 32 | tagKeyEscapes = newEscaper(",= ") 33 | nonPrintable = newByteSetRange(0, 31).union(newByteSet("\x7f")) 34 | eolChars = newByteSet("\r\n") 35 | measurementChars = newByteSet(", ").union(nonPrintable).invert() 36 | measurementEscapes = newEscaper(" ,") 37 | tagValChars = newByteSet(",=").union(whitespace).union(nonPrintable).invert() 38 | tagValEscapes = newEscaper(", =") 39 | fieldKeyChars = tagKeyChars 40 | fieldKeyEscapes = tagKeyEscapes 41 | fieldStringValChars = newByteSet(`"`).invert() 42 | fieldStringValEscapes = newEscaper("\\\"\n\r\t") 43 | fieldValChars = newByteSet(",").union(whitespace).invert() 44 | timeChars = newByteSet("-0123456789") 45 | commentChars = nonPrintable.invert().without(eolChars) 46 | notEOL = eolChars.invert() 47 | notNewline = newByteSet("\n").invert() 48 | ) 49 | 50 | // Decoder implements low level parsing of a set of line-protocol entries. 51 | // 52 | // Decoder methods must be called in the same order that their respective 53 | // sections appear in a line-protocol entry. See the documentation on the 54 | // Decoder.Next method for details. 55 | type Decoder struct { 56 | // rd holds the reader, if any. If there is no reader, 57 | // complete will be true. 58 | rd io.Reader 59 | 60 | // buf holds data that's been read. 61 | buf []byte 62 | 63 | // r0 holds the earliest read position in buf. 64 | // Data in buf[0:r0] is considered to be discarded. 65 | r0 int 66 | 67 | // r1 holds the read position in buf. Data in buf[r1:] is 68 | // next to be read. Data in buf[len(buf):cap(buf)] is 69 | // available for reading into. 70 | r1 int 71 | 72 | // complete holds whether the data in buffer 73 | // is known to be all the data that's available. 74 | complete bool 75 | 76 | // section holds the current section of the entry that's being 77 | // read. 78 | section section 79 | 80 | // skipping holds whether we will need 81 | // to return the values that we're decoding. 82 | skipping bool 83 | 84 | // escBuf holds a buffer for unescaped characters. 85 | escBuf []byte 86 | 87 | // line holds the line number corresponding to the 88 | // character at buf[r1]. 89 | line int64 90 | 91 | // err holds any non-EOF error that was returned from rd. 92 | err error 93 | } 94 | 95 | // NewDecoder returns a decoder that splits the line-protocol text 96 | // inside buf. 97 | func NewDecoderWithBytes(buf []byte) *Decoder { 98 | return &Decoder{ 99 | buf: buf, 100 | complete: true, 101 | section: endSection, 102 | line: 1, 103 | } 104 | } 105 | 106 | // NewDecoder returns a decoder that reads from the given reader. 107 | func NewDecoder(r io.Reader) *Decoder { 108 | return &Decoder{ 109 | rd: r, 110 | escBuf: make([]byte, 0, 512), 111 | section: endSection, 112 | line: 1, 113 | } 114 | } 115 | 116 | // Next advances to the next entry, and reports whether there is an 117 | // entry available. Syntax errors on individual lines do not cause this 118 | // to return false (the decoder attempts to recover from badly 119 | // formatted lines), but I/O errors do. Call d.Err to discover if there 120 | // was any I/O error. Syntax errors are returned as *DecoderError 121 | // errors from Decoder methods. 122 | // 123 | // After calling Next, the various components of a line can be retrieved 124 | // by calling Measurement, NextTag, NextField and Time in that order 125 | // (the same order that the components are held in the entry). 126 | // 127 | // IMPORTANT NOTE: the byte slices returned by the Decoder methods are 128 | // only valid until the next call to any other Decode method. 129 | // 130 | // Decoder will skip earlier components if a later method is called, 131 | // but it doesn't retain the entire entry, so it cannot go backwards. 132 | // 133 | // For example, to retrieve only the timestamp of all lines, this suffices: 134 | // 135 | // for d.Next() { 136 | // timestamp, err := d.TimeBytes() 137 | // } 138 | // 139 | func (d *Decoder) Next() bool { 140 | if _, err := d.advanceToSection(endSection); err != nil { 141 | // There was a syntax error and the line might not be 142 | // fully consumed, so make sure that we do actually 143 | // consume the rest of the line. This relies on the fact 144 | // that when we return a syntax error, we abandon the 145 | // rest of the line by going to newlineSection. If we 146 | // changed that behaviour (for example to allow obtaining 147 | // multiple errors per line), then we might need to loop here. 148 | d.advanceToSection(endSection) 149 | } 150 | d.skipEmptyLines() 151 | d.section = measurementSection 152 | return d.ensure(1) 153 | } 154 | 155 | // Err returns any I/O error encountered when reading 156 | // entries. If d was created with NewDecoderWithBytes, 157 | // Err will always return nil. 158 | func (d *Decoder) Err() error { 159 | return d.err 160 | } 161 | 162 | // Measurement returns the measurement name. It returns nil 163 | // unless called before NextTag, NextField or Time. 164 | func (d *Decoder) Measurement() ([]byte, error) { 165 | if ok, err := d.advanceToSection(measurementSection); err != nil { 166 | return nil, err 167 | } else if !ok { 168 | return nil, nil 169 | } 170 | d.reset() 171 | measure, i0, err := d.takeEsc(measurementChars, &measurementEscapes.revTable) 172 | if err != nil { 173 | return nil, err 174 | } 175 | if len(measure) == 0 { 176 | if !d.ensure(1) { 177 | return nil, d.syntaxErrorf(i0, "no measurement name found") 178 | } 179 | return nil, d.syntaxErrorf(i0, "invalid character %q found at start of measurement name", d.at(0)) 180 | } 181 | if measure[0] == '#' { 182 | // Comments are usually skipped earlier but if a comment contains invalid white space, 183 | // there's no way for the comment-parsing code to return an error, so instead 184 | // the read point is set to the start of the comment and we hit this case. 185 | // TODO find the actual invalid character to give a more accurate position. 186 | return nil, d.syntaxErrorf(i0, "invalid character found in comment line") 187 | } 188 | if err := d.advanceTagComma(); err != nil { 189 | return nil, err 190 | } 191 | d.section = tagSection 192 | return measure, nil 193 | } 194 | 195 | // NextTag returns the next tag in the entry. 196 | // If there are no more tags, it returns nil, nil, nil. 197 | // Note that this must be called before NextField because 198 | // tags precede fields in the line-protocol entry. 199 | func (d *Decoder) NextTag() (key, value []byte, err error) { 200 | if ok, err := d.advanceToSection(tagSection); err != nil { 201 | return nil, nil, err 202 | } else if !ok { 203 | return nil, nil, nil 204 | } 205 | if d.ensure(1) && d.at(0) == ' ' { 206 | d.discardc(' ') 207 | d.section = fieldSection 208 | return nil, nil, nil 209 | } 210 | tagKey, i0, err := d.takeEsc(tagKeyChars, &tagKeyEscapes.revTable) 211 | if err != nil { 212 | return nil, nil, err 213 | } 214 | if len(tagKey) == 0 || !d.ensure(1) || d.at(0) != '=' { 215 | hasKey := len(tagKey) != 0 216 | eof := !d.ensure(1) 217 | hasEquals := !eof && d.at(0) == '=' 218 | switch { 219 | case eof && !hasKey: 220 | return nil, nil, d.syntaxErrorf(i0, "expected tag key or field but found end of input instead") 221 | case eof: 222 | return nil, nil, d.syntaxErrorf(i0, "expected '=' after tag key %q, but got end of input instead", tagKey) 223 | case hasKey: 224 | return nil, nil, d.syntaxErrorf(i0, "expected '=' after tag key %q, but got %q instead", tagKey, d.at(0)) 225 | case hasEquals: 226 | return nil, nil, d.syntaxErrorf(i0, "empty tag key") 227 | default: 228 | return nil, nil, d.syntaxErrorf(i0, "expected tag key or field but found %q instead", d.at(0)) 229 | } 230 | } 231 | d.advance(1) 232 | tagVal, i0, err := d.takeEsc(tagValChars, &tagValEscapes.revTable) 233 | if err != nil { 234 | return nil, nil, err 235 | } 236 | if len(tagVal) == 0 { 237 | return nil, nil, d.syntaxErrorf(i0, "expected tag value after tag key %q, but none found", tagKey) 238 | } 239 | if !d.ensure(1) { 240 | // There's no more data after the tag value. Instead of returning an error 241 | // immediately, advance to the field section and return the tag and value. 242 | // This means that we'll see all the tags even when there's no value, 243 | // and it also allows a client to parse the tags in isolation even when there 244 | // are no keys. We'll return an error if the client tries to read values from here. 245 | d.section = fieldSection 246 | return tagKey, tagVal, nil 247 | } 248 | if err := d.advanceTagComma(); err != nil { 249 | return nil, nil, err 250 | } 251 | return tagKey, tagVal, nil 252 | } 253 | 254 | // advanceTagComma consumes a comma after a measurement 255 | // or a tag value, making sure it's not followed by whitespace. 256 | func (d *Decoder) advanceTagComma() error { 257 | if !d.ensure(1) { 258 | return nil 259 | } 260 | if d.at(0) != ',' { 261 | return nil 262 | } 263 | // If there's a comma, there's a tag, so check that there's the start 264 | // of a tag name there. 265 | d.advance(1) 266 | if !d.ensure(1) { 267 | return d.syntaxErrorf(d.r1-d.r0, "expected tag key after comma; got end of input") 268 | } 269 | if whitespace.get(d.at(0)) { 270 | return d.syntaxErrorf(d.r1-d.r0, "expected tag key after comma; got white space instead") 271 | } 272 | return nil 273 | } 274 | 275 | // NextFieldBytes returns the next field in the entry. 276 | // If there are no more fields, it returns all zero values. 277 | // Note that this must be called before Time because 278 | // fields precede the timestamp in the line-protocol entry. 279 | // 280 | // The returned value slice may not be valid: to 281 | // check its validity, use NewValueFromBytes(kind, value), or use NextField. 282 | func (d *Decoder) NextFieldBytes() (key []byte, kind ValueKind, value []byte, err error) { 283 | if ok, err := d.advanceToSection(fieldSection); err != nil { 284 | return nil, Unknown, nil, err 285 | } else if !ok { 286 | return nil, Unknown, nil, nil 287 | } 288 | fieldKey, i0, err := d.takeEsc(fieldKeyChars, &fieldKeyEscapes.revTable) 289 | if err != nil { 290 | return nil, Unknown, nil, err 291 | } 292 | if len(fieldKey) == 0 { 293 | if !d.ensure(1) { 294 | return nil, Unknown, nil, d.syntaxErrorf(i0, "expected field key but none found") 295 | } 296 | return nil, Unknown, nil, d.syntaxErrorf(i0, "invalid character %q found at start of field key", d.at(0)) 297 | } 298 | if !d.ensure(1) { 299 | return nil, Unknown, nil, d.syntaxErrorf(d.r1-d.r0, "want '=' after field key %q, found end of input", fieldKey) 300 | } 301 | if nextc := d.at(0); nextc != '=' { 302 | return nil, Unknown, nil, d.syntaxErrorf(d.r1-d.r0, "want '=' after field key %q, found %q", fieldKey, nextc) 303 | } 304 | d.advance(1) 305 | if !d.ensure(1) { 306 | return nil, Unknown, nil, d.syntaxErrorf(d.r1-d.r0, "expected value for field %q, found end of input", fieldKey) 307 | } 308 | var fieldVal []byte 309 | var fieldKind ValueKind 310 | switch d.at(0) { 311 | case '"': 312 | // Skip leading quote. 313 | d.advance(1) 314 | var err error 315 | fieldVal, i0, err = d.takeEsc(fieldStringValChars, &fieldStringValEscapes.revTable) 316 | if err != nil { 317 | return nil, Unknown, nil, err 318 | } 319 | fieldKind = String 320 | if !d.ensure(1) { 321 | return nil, Unknown, nil, d.syntaxErrorf(i0-1, "expected closing quote for string field %q, found end of input", fieldKey) 322 | } 323 | if d.at(0) != '"' { 324 | // This can't happen, as all characters are allowed in a string. 325 | return nil, Unknown, nil, d.syntaxErrorf(i0-1, "unexpected string termination") 326 | } 327 | // Skip trailing quote 328 | d.advance(1) 329 | case 't', 'T', 'f', 'F': 330 | fieldVal = d.take(fieldValChars) 331 | fieldKind = Bool 332 | case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.': 333 | fieldVal = d.take(fieldValChars) 334 | switch fieldVal[len(fieldVal)-1] { 335 | case 'i': 336 | fieldVal = fieldVal[:len(fieldVal)-1] 337 | fieldKind = Int 338 | case 'u': 339 | fieldVal = fieldVal[:len(fieldVal)-1] 340 | fieldKind = Uint 341 | default: 342 | fieldKind = Float 343 | } 344 | case ' ', ',': 345 | return nil, Unknown, nil, d.syntaxErrorf(d.r1-d.r0, "missing field value for field %q", fieldKey) 346 | default: 347 | start := d.r1 - d.r0 348 | fieldVal := d.take(fieldValChars) 349 | return nil, Unknown, nil, d.syntaxErrorf(start, "value for field %q (%q) has unrecognized type", fieldKey, fieldVal) 350 | } 351 | if !d.ensure(1) { 352 | d.section = endSection 353 | return fieldKey, fieldKind, fieldVal, nil 354 | } 355 | nextc := d.at(0) 356 | if nextc == ',' { 357 | d.advance(1) 358 | return fieldKey, fieldKind, fieldVal, nil 359 | } 360 | if !whitespace.get(nextc) { 361 | return nil, Unknown, nil, d.syntaxErrorf(d.r1-d.r0, "unexpected character %q after field %q", nextc, fieldKey) 362 | } 363 | d.discardc(' ') 364 | if d.takeEOL() { 365 | d.section = endSection 366 | return fieldKey, fieldKind, fieldVal, nil 367 | } 368 | d.section = timeSection 369 | return fieldKey, fieldKind, fieldVal, nil 370 | } 371 | 372 | // takeEOL consumes input up until the next end of line. 373 | func (d *Decoder) takeEOL() bool { 374 | if !d.ensure(1) { 375 | // End of input. 376 | return true 377 | } 378 | switch d.at(0) { 379 | case '\n': 380 | // Regular NL. 381 | d.advance(1) 382 | d.line++ 383 | return true 384 | case '\r': 385 | if !d.ensure(2) { 386 | // CR at end of input. 387 | d.advance(1) 388 | return true 389 | } 390 | if d.at(1) == '\n' { 391 | // CR-NL 392 | d.advance(2) 393 | d.line++ 394 | return true 395 | } 396 | } 397 | return false 398 | } 399 | 400 | // NextField is a wrapper around NextFieldBytes that parses 401 | // the field value. Note: the returned value is only valid 402 | // until the next call method call on Decoder because when 403 | // it's a string, it refers to an internal buffer. 404 | // 405 | // If the value cannot be parsed because it's out of range 406 | // (as opposed to being syntactically invalid), 407 | // the errors.Is(err, ErrValueOutOfRange) will return true. 408 | func (d *Decoder) NextField() (key []byte, val Value, err error) { 409 | // Even though NextFieldBytes calls advanceToSection, 410 | // we need to call it here too so that we know exactly where 411 | // the field starts so that startIndex is accurate. 412 | if ok, err := d.advanceToSection(fieldSection); err != nil { 413 | return nil, Value{}, err 414 | } else if !ok { 415 | return nil, Value{}, nil 416 | } 417 | startIndex := d.r1 - d.r0 418 | key, kind, data, err := d.NextFieldBytes() 419 | if err != nil || key == nil { 420 | return nil, Value{}, err 421 | } 422 | 423 | v, err := newValueFromBytes(kind, data, false) 424 | if err != nil { 425 | // We want to produce an error that points to where the field 426 | // location, but NextFieldBytes has read past that. 427 | // However, we know the key length, and we can work out 428 | // the how many characters it took when escaped, so 429 | // we can reconstruct the index of the start of the field. 430 | escLen, _ := tagKeyEscapes.escapedLen(unsafeBytesToString(key)) 431 | startIndex += escLen + len("=") 432 | return nil, Value{}, d.syntaxErrorf(startIndex, "cannot parse value for field key %q: %w", key, err) 433 | } 434 | return key, v, nil 435 | } 436 | 437 | // TimeBytes returns the timestamp of the entry as a byte slice. 438 | // If there is no timestamp, it returns nil, nil. 439 | func (d *Decoder) TimeBytes() ([]byte, error) { 440 | if ok, err := d.advanceToSection(timeSection); err != nil { 441 | return nil, err 442 | } else if !ok { 443 | return nil, nil 444 | } 445 | start := d.r1 - d.r0 446 | timeBytes := d.take(timeChars) 447 | if len(timeBytes) == 0 { 448 | d.section = endSection 449 | timeBytes = nil 450 | } 451 | if !d.ensure(1) { 452 | d.section = endSection 453 | return timeBytes, nil 454 | } 455 | if !whitespace.get(d.at(0)) { 456 | // Absorb the rest of the line so that we get a better error. 457 | d.take(notEOL) 458 | return nil, d.syntaxErrorf(start, "invalid timestamp (%q)", d.buf[d.r0+start:d.r1]) 459 | } 460 | d.discardc(' ') 461 | if !d.ensure(1) { 462 | d.section = endSection 463 | return timeBytes, nil 464 | } 465 | if !d.takeEOL() { 466 | start := d.r1 - d.r0 467 | extra := d.take(notEOL) 468 | return nil, d.syntaxErrorf(start, "unexpected text after timestamp (%q)", extra) 469 | } 470 | d.section = endSection 471 | return timeBytes, nil 472 | } 473 | 474 | // Time is a wrapper around TimeBytes that returns the timestamp 475 | // assuming the given precision. 476 | func (d *Decoder) Time(prec Precision, defaultTime time.Time) (time.Time, error) { 477 | // Even though TimeBytes calls advanceToSection, 478 | // we need to call it here too so that we know exactly where 479 | // the timestamp starts so that start is accurate. 480 | if ok, err := d.advanceToSection(timeSection); err != nil { 481 | return time.Time{}, err 482 | } else if !ok { 483 | return defaultTime.Truncate(prec.Duration()), err 484 | } 485 | start := d.r1 - d.r0 486 | data, err := d.TimeBytes() 487 | if err != nil { 488 | return time.Time{}, err 489 | } 490 | if data == nil { 491 | return defaultTime.Truncate(prec.Duration()), nil 492 | } 493 | ts, err := parseIntBytes(data, 10, 64) 494 | if err != nil { 495 | return time.Time{}, d.syntaxErrorf(start, "invalid timestamp (%q): %w", data, maybeOutOfRange(err, "invalid syntax")) 496 | } 497 | ns, ok := prec.asNanoseconds(ts) 498 | if !ok { 499 | return time.Time{}, d.syntaxErrorf(start, "invalid timestamp (%q): %w", data, ErrValueOutOfRange) 500 | } 501 | return time.Unix(0, ns), nil 502 | } 503 | 504 | // consumeLine is used to recover from errors by reading an entire 505 | // line even if it contains invalid characters. 506 | func (d *Decoder) consumeLine() { 507 | d.take(notNewline) 508 | if d.at(0) == '\n' { 509 | d.advance(1) 510 | d.line++ 511 | } 512 | d.reset() 513 | d.section = endSection 514 | } 515 | 516 | func (d *Decoder) skipEmptyLines() { 517 | for { 518 | startLine := d.r1 - d.r0 519 | d.discardc(' ') 520 | switch d.at(0) { 521 | case '#': 522 | // Found a comment. 523 | d.take(commentChars) 524 | if !d.takeEOL() { 525 | // Comment has invalid characters. 526 | // Rewind input to start of comment so 527 | // that next section will return the error. 528 | d.r1 = d.r0 + startLine 529 | return 530 | } 531 | case '\n': 532 | d.line++ 533 | d.advance(1) 534 | case '\r': 535 | if !d.takeEOL() { 536 | // Solitary carriage return. 537 | // Leave it there and next section will return an error. 538 | return 539 | } 540 | default: 541 | return 542 | } 543 | } 544 | } 545 | 546 | func (d *Decoder) advanceToSection(section section) (bool, error) { 547 | if d.section == section { 548 | return true, nil 549 | } 550 | if d.section > section { 551 | return false, nil 552 | } 553 | // Enable skipping to avoid unnecessary unescaping work. 554 | d.skipping = true 555 | for d.section < section { 556 | if err := d.consumeSection(); err != nil { 557 | d.skipping = false 558 | return false, err 559 | } 560 | } 561 | d.skipping = false 562 | return d.section == section, nil 563 | } 564 | 565 | //go:generate stringer -type section 566 | 567 | // section represents one decoder section of a line-protocol entry. 568 | // An entry consists of a measurement (measurementSection), 569 | // an optional set of tags (tagSection), one or more fields (fieldSection) 570 | // and an option timestamp (timeSection). 571 | type section byte 572 | 573 | const ( 574 | measurementSection section = iota 575 | tagSection 576 | fieldSection 577 | timeSection 578 | 579 | // newlineSection represents the newline at the end of the line. 580 | // This section also absorbs any invalid characters at the end 581 | // of the line - it's used as a recovery state if we find an error 582 | // when parsing an earlier part of an entry. 583 | newlineSection 584 | 585 | // endSection represents the end of an entry. When we're at this 586 | // stage, calling More will cycle back to measurementSection again. 587 | endSection 588 | ) 589 | 590 | func (d *Decoder) consumeSection() error { 591 | switch d.section { 592 | case measurementSection: 593 | _, err := d.Measurement() 594 | return err 595 | case tagSection: 596 | for { 597 | key, _, err := d.NextTag() 598 | if err != nil || key == nil { 599 | return err 600 | } 601 | } 602 | case fieldSection: 603 | for { 604 | key, _, _, err := d.NextFieldBytes() 605 | if err != nil || key == nil { 606 | return err 607 | } 608 | } 609 | case timeSection: 610 | _, err := d.TimeBytes() 611 | return err 612 | case newlineSection: 613 | d.consumeLine() 614 | return nil 615 | default: 616 | return nil 617 | } 618 | } 619 | 620 | // take returns the next slice of bytes that are in the given set 621 | // reading more data as needed. It updates d.r1. 622 | // 623 | // Note: we assume that the set never contains the newline 624 | // character because newlines can only occur when explicitly 625 | // allowed (in string field values and at the end of an entry), 626 | // so we don't need to update d.line. 627 | func (d *Decoder) take(set *byteSet) []byte { 628 | // Note: use a relative index for start because absolute 629 | // indexes aren't stable (the contents of the buffer can be 630 | // moved when reading more data). 631 | start := d.r1 - d.r0 632 | outer: 633 | for { 634 | if !d.ensure(1) { 635 | break 636 | } 637 | buf := d.buf[d.r1:] 638 | for i, c := range buf { 639 | if !set.get(c) { 640 | d.r1 += i 641 | break outer 642 | } 643 | } 644 | d.r1 += len(buf) 645 | } 646 | return d.buf[d.r0+start : d.r1] 647 | } 648 | 649 | // discardc is similar to take but just discards all the next consecutive 650 | // occurrences of a single character. 651 | func (d *Decoder) discardc(dc byte) { 652 | // Note: use a relative index for start because absolute 653 | // indexes aren't stable (the contents of the buffer can be 654 | // moved when reading more data). 655 | for { 656 | if !d.ensure(1) { 657 | break 658 | } 659 | buf := d.buf[d.r1:] 660 | for i, c := range buf { 661 | if c != dc { 662 | d.r1 += i 663 | return 664 | } 665 | } 666 | d.r1 += len(buf) 667 | } 668 | } 669 | 670 | // takeEsc is like take except that escaped characters also count as 671 | // part of the set. The escapeTable determines which characters 672 | // can be escaped. 673 | // 674 | // It returns the unescaped string (unless d.skipping is true, in which 675 | // case it doesn't need to go to the trouble of unescaping it), and the 676 | // index into buf that corresponds to the start of the taken bytes. 677 | // 678 | // takeEsc also returns the offset of the start of the taken bytes 679 | // relative to d.r0. 680 | // 681 | // It returns an error if the returned string contains an 682 | // invalid UTF-8 sequence. The other return parameters are unaffected by this. 683 | func (d *Decoder) takeEsc(set *byteSet, escapeTable *[256]byte) ([]byte, int, error) { 684 | // start holds the offset from r0 of the start of the taken slice. 685 | // Note that we can't use d.r1 directly, because the offsets can change 686 | // when the buffer is grown. 687 | start := d.r1 - d.r0 688 | 689 | // startUnesc holds the offset from t0 of the start of the most recent 690 | // unescaped segment. 691 | startUnesc := start 692 | 693 | // startEsc holds the index into r.escBuf of the start of the escape buffer. 694 | startEsc := len(d.escBuf) 695 | charBits := byte(0) 696 | outer: 697 | for { 698 | //if !d.ensure(1) { 699 | // break 700 | //} 701 | buf := d.buf[d.r1:] 702 | if len(buf) < 1 && !d.ensure1(1) { 703 | break 704 | } 705 | for i := 0; i < len(buf); i++ { 706 | c := buf[i] 707 | if c != '\\' { 708 | if !set.get(c) { 709 | // We've found the end, so we're done here. 710 | d.r1 += i 711 | break outer 712 | } 713 | charBits |= c 714 | continue 715 | } 716 | if i+1 >= len(buf) { 717 | // Not enough room in the buffer. Try reading more so that 718 | // we can see the next byte (note: ensure(i+2) is asking 719 | // for exactly one more character, because we know we already 720 | // have i+1 bytes in the buffer). 721 | if !d.ensure(i + 2) { 722 | // No character to escape, so leave the \ intact. 723 | d.r1 = len(d.buf) 724 | break outer 725 | } 726 | // Note that d.ensure can change d.buf, so we need to 727 | // update buf to point to the correct buffer. 728 | buf = d.buf[d.r1:] 729 | } 730 | replc := escapeTable[buf[i+1]] 731 | if replc == 0 { 732 | // The backslash doesn't precede a value escaped 733 | // character, so it stays intact. 734 | continue 735 | } 736 | if !d.skipping { 737 | d.escBuf = append(d.escBuf, d.buf[d.r0+startUnesc:d.r1+i]...) 738 | d.escBuf = append(d.escBuf, replc) 739 | startUnesc = d.r1 - d.r0 + i + 2 740 | } 741 | i++ 742 | } 743 | // We've consumed all the bytes in the buffer. Now continue 744 | // the loop, trying to acquire more. 745 | d.r1 += len(buf) 746 | } 747 | taken := d.buf[d.r0+start : d.r1] 748 | if set.get('\n') { 749 | d.line += int64(bytes.Count(taken, newlineBytes)) 750 | } 751 | if len(d.escBuf) > startEsc { 752 | // We've got an unescaped result: append any remaining unescaped bytes 753 | // and return the relevant portion of the escape buffer. 754 | d.escBuf = append(d.escBuf, d.buf[startUnesc+d.r0:d.r1]...) 755 | taken = d.escBuf[startEsc:] 756 | } 757 | // Fast-path check for valid UTF-8 - if no high bit is set, 758 | // then it can't be invalid UTF-8. 759 | if (charBits&0x80) != 0 && !utf8.Valid(taken) { 760 | // TODO point directly to the offending sequence. 761 | return taken, start, d.syntaxErrorf(start, "invalid utf-8 sequence in token %q", taken) 762 | } 763 | return taken, start, nil 764 | } 765 | 766 | var newlineBytes = []byte{'\n'} 767 | 768 | // at returns the byte at i bytes after the current read position. 769 | // It assumes that the index has already been ensured. 770 | // If there's no byte there, it returns zero. 771 | func (d *Decoder) at(i int) byte { 772 | if d.r1+i < len(d.buf) { 773 | return d.buf[d.r1+i] 774 | } 775 | return 0 776 | } 777 | 778 | // reset discards all the data up to d.r1 and data in d.escBuf 779 | func (d *Decoder) reset() { 780 | if unread := len(d.buf) - d.r1; unread == 0 { 781 | // No bytes in the buffer, so we can start from the beginning without 782 | // needing to copy anything (and get better cache behaviour too). 783 | d.buf = d.buf[:0] 784 | d.r1 = 0 785 | } else if !d.complete && unread <= maxSlide { 786 | // Slide the unread portion of the buffer to the 787 | // start so that when we read more data, 788 | // there's less chance that we'll need to grow the buffer. 789 | copy(d.buf, d.buf[d.r1:]) 790 | d.r1 = 0 791 | d.buf = d.buf[:unread] 792 | } 793 | d.r0 = d.r1 794 | d.escBuf = d.escBuf[:0] 795 | } 796 | 797 | // advance advances the read point by n. 798 | // This should only be used when it's known that 799 | // there are already n bytes available in the buffer. 800 | func (d *Decoder) advance(n int) { 801 | d.r1 += n 802 | } 803 | 804 | // ensure ensures that there are at least n bytes available in 805 | // d.buf[d.r1:], reading more bytes if necessary. 806 | // It reports whether enough bytes are available. 807 | func (d *Decoder) ensure(n int) bool { 808 | if d.r1+n <= len(d.buf) { 809 | // There are enough bytes available. 810 | return true 811 | } 812 | return d.ensure1(n) 813 | } 814 | 815 | // ensure1 is factored out of ensure so that ensure 816 | // itself can be inlined. 817 | func (d *Decoder) ensure1(n int) bool { 818 | for { 819 | if d.complete { 820 | // No possibility of more data. 821 | return false 822 | } 823 | d.readMore() 824 | if d.r1+n <= len(d.buf) { 825 | // There are enough bytes available. 826 | return true 827 | } 828 | } 829 | } 830 | 831 | // readMore reads more data into d.buf. 832 | func (d *Decoder) readMore() { 833 | if d.complete { 834 | return 835 | } 836 | n := cap(d.buf) - len(d.buf) 837 | if n < minRead { 838 | // We need to grow the buffer. Note that we don't have to copy 839 | // the unused part of the buffer (d.buf[:d.r0]). 840 | // TODO provide a way to limit the maximum size that 841 | // the buffer can grow to. 842 | used := len(d.buf) - d.r0 843 | n1 := cap(d.buf) * 2 844 | if n1-used < minGrow { 845 | n1 = used + minGrow 846 | } 847 | buf1 := make([]byte, used, n1) 848 | copy(buf1, d.buf[d.r0:]) 849 | d.buf = buf1 850 | d.r1 -= d.r0 851 | d.r0 = 0 852 | } 853 | n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)]) 854 | d.buf = d.buf[:len(d.buf)+n] 855 | if err == nil { 856 | return 857 | } 858 | d.complete = true 859 | if err != io.EOF { 860 | d.err = err 861 | } 862 | } 863 | 864 | // syntaxErrorf records a syntax error at the given offset from d.r0 865 | // and the using the given fmt.Sprintf-formatted message. 866 | func (d *Decoder) syntaxErrorf(offset int, f string, a ...interface{}) error { 867 | // Note: we only ever reset the buffer at the end of an entry, 868 | // so we can assume that d.r0 corresponds to column 1. 869 | buf := d.buf[d.r0 : d.r0+offset] 870 | var columnBytes []byte 871 | if i := bytes.LastIndexByte(buf, '\n'); i >= 0 { 872 | columnBytes = buf[i+1:] 873 | } else { 874 | columnBytes = buf 875 | } 876 | column := len(columnBytes) + 1 877 | 878 | // Note: line corresponds to the current line at d.r1, so if 879 | // there are any newlines after the location of the error, we need to 880 | // reduce the line we report accordingly. 881 | remain := d.buf[d.r0+offset : d.r1] 882 | line := d.line - int64(bytes.Count(remain, newlineBytes)) 883 | 884 | // We'll recover from a syntax error by reading all bytes until 885 | // the next newline. We don't want to do that if we've already 886 | // just scanned the end of a line. 887 | if d.section != endSection { 888 | d.section = newlineSection 889 | } 890 | return &DecodeError{ 891 | Line: line, 892 | Column: column, 893 | Err: fmt.Errorf(f, a...), 894 | } 895 | } 896 | 897 | // DecodeError represents an error when decoding a line-protocol entry. 898 | type DecodeError struct { 899 | // Line holds the one-based index of the line where the error occurred. 900 | Line int64 901 | // Column holds the one-based index of the column (in bytes) where the error occurred. 902 | Column int 903 | // Err holds the underlying error. 904 | Err error 905 | } 906 | 907 | // Error implements the error interface. 908 | func (e *DecodeError) Error() string { 909 | return fmt.Sprintf("at line %d:%d: %s", e.Line, e.Column, e.Err.Error()) 910 | } 911 | 912 | // Unwrap implements error unwrapping so that the underlying 913 | // error can be retrieved. 914 | func (e *DecodeError) Unwrap() error { 915 | return e.Err 916 | } 917 | -------------------------------------------------------------------------------- /lineprotocol/decoder_test.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "math/rand" 10 | "regexp" 11 | "strings" 12 | "testing" 13 | "testing/iotest" 14 | "time" 15 | "unicode/utf8" 16 | 17 | qt "github.com/frankban/quicktest" 18 | ) 19 | 20 | type TagKeyValue struct { 21 | Key, Value string 22 | Error string 23 | } 24 | 25 | type FieldKeyValue struct { 26 | Key string 27 | Value interface{} 28 | Error string 29 | } 30 | 31 | type Point struct { 32 | Measurement string 33 | MeasurementError string 34 | Tags []TagKeyValue 35 | Fields []FieldKeyValue 36 | Precision Precision 37 | DefaultTime time.Time 38 | Time time.Time 39 | TimeError string 40 | } 41 | 42 | func isDecodeError(err error) bool { 43 | return errors.As(err, new(*DecodeError)) 44 | } 45 | 46 | // sectionCheckers holds a function for each section that checks that the result of decoding 47 | // for that section is as expected. 48 | var sectionCheckers = []func(c *qt.C, dec *Decoder, expect Point, errp errPositions){ 49 | measurementSection: func(c *qt.C, dec *Decoder, expect Point, errp errPositions) { 50 | m, err := dec.Measurement() 51 | if expect.MeasurementError != "" { 52 | c.Assert(err, qt.Satisfies, isDecodeError) 53 | c.Assert(err, qt.ErrorMatches, regexp.QuoteMeta(errp.makeErr(expect.MeasurementError)), qt.Commentf("measurement %q", m)) 54 | return 55 | } 56 | 57 | c.Assert(err, qt.IsNil) 58 | c.Assert(string(m), qt.Equals, expect.Measurement, qt.Commentf("runes: %x", []rune(string(m)))) 59 | }, 60 | tagSection: func(c *qt.C, dec *Decoder, expect Point, errp errPositions) { 61 | var tags []TagKeyValue 62 | for { 63 | key, value, err := dec.NextTag() 64 | if err != nil { 65 | c.Assert(key, qt.IsNil) 66 | c.Assert(value, qt.IsNil) 67 | c.Assert(err, qt.Satisfies, isDecodeError) 68 | tags = append(tags, TagKeyValue{ 69 | Error: err.Error(), 70 | }) 71 | continue 72 | } 73 | if key == nil { 74 | break 75 | } 76 | tags = append(tags, TagKeyValue{ 77 | Key: string(key), 78 | Value: string(value), 79 | }) 80 | } 81 | // Translate the positions in the expected errors. 82 | expectTags := append([]TagKeyValue(nil), expect.Tags...) 83 | for i := range expectTags { 84 | tag := &expectTags[i] 85 | tag.Error = errp.makeErr(tag.Error) 86 | } 87 | c.Assert(tags, qt.DeepEquals, expectTags) 88 | }, 89 | fieldSection: func(c *qt.C, dec *Decoder, expect Point, errp errPositions) { 90 | var fields []FieldKeyValue 91 | for { 92 | key, value, err := dec.NextField() 93 | if err != nil { 94 | if s := err.Error(); strings.Contains(s, "out of range") { 95 | if !errors.Is(err, ErrValueOutOfRange) { 96 | c.Errorf("out of range error not propagated to result error") 97 | } 98 | } 99 | c.Assert(err, qt.Satisfies, isDecodeError) 100 | c.Assert(key, qt.IsNil) 101 | fields = append(fields, FieldKeyValue{ 102 | Error: err.Error(), 103 | }) 104 | continue 105 | } 106 | if key == nil { 107 | break 108 | } 109 | fields = append(fields, FieldKeyValue{ 110 | Key: string(key), 111 | Value: value.Interface(), 112 | }) 113 | } 114 | // Translate the positions in the expected errors. 115 | expectFields := append([]FieldKeyValue(nil), expect.Fields...) 116 | for i := range expectFields { 117 | field := &expectFields[i] 118 | field.Error = errp.makeErr(field.Error) 119 | } 120 | c.Assert(fields, qt.DeepEquals, expectFields) 121 | }, 122 | timeSection: func(c *qt.C, dec *Decoder, expect Point, errp errPositions) { 123 | timestamp, err := dec.Time(expect.Precision, expect.DefaultTime) 124 | if expect.TimeError != "" { 125 | c.Assert(err, qt.Satisfies, isDecodeError) 126 | c.Assert(err, qt.ErrorMatches, regexp.QuoteMeta(errp.makeErr(expect.TimeError))) 127 | c.Assert(timestamp, qt.DeepEquals, time.Time{}) 128 | return 129 | } 130 | c.Assert(err, qt.IsNil) 131 | c.Assert(timestamp, qt.DeepEquals, expect.Time) 132 | }, 133 | } 134 | 135 | var decoderTests = []struct { 136 | testName string 137 | // text holds the text to be decoded. 138 | // the position of an error is marked by a ∑ character (the error 139 | // string contains a corresponding ∑ character, signifying that 140 | // it's expected to be a DecodeError at that error position. 141 | text string 142 | expect []Point 143 | }{{ 144 | testName: "all-fields-present-no-escapes", 145 | text: ` 146 | # comment 147 | somename,tag1=val1,tag2=val2 floatfield=1,strfield="hello",intfield=-1i,uintfield=1u,boolfield=true 1602841605822791506 148 | `, 149 | expect: []Point{{ 150 | Measurement: "somename", 151 | Tags: []TagKeyValue{{ 152 | Key: "tag1", 153 | Value: "val1", 154 | }, { 155 | Key: "tag2", 156 | Value: "val2", 157 | }}, 158 | Fields: []FieldKeyValue{{ 159 | Key: "floatfield", 160 | Value: 1.0, 161 | }, { 162 | Key: "strfield", 163 | Value: "hello", 164 | }, { 165 | Key: "intfield", 166 | Value: int64(-1), 167 | }, { 168 | Key: "uintfield", 169 | Value: uint64(1), 170 | }, { 171 | Key: "boolfield", 172 | Value: true, 173 | }}, 174 | Time: time.Unix(0, 1602841605822791506), 175 | }}, 176 | }, { 177 | testName: "multiple-entries", 178 | text: ` 179 | # comment 180 | m1,tag1=val1 x="first" 1602841605822791506 181 | m2,foo=bar x="second" 1602841605822792000 182 | 183 | # last comment 184 | `, 185 | expect: []Point{{ 186 | Measurement: "m1", 187 | Tags: []TagKeyValue{{ 188 | Key: "tag1", 189 | Value: "val1", 190 | }}, 191 | Fields: []FieldKeyValue{{ 192 | Key: "x", 193 | Value: "first", 194 | }}, 195 | Time: time.Unix(0, 1602841605822791506), 196 | }, { 197 | Measurement: "m2", 198 | Tags: []TagKeyValue{{ 199 | Key: "foo", 200 | Value: "bar", 201 | }}, 202 | Fields: []FieldKeyValue{{ 203 | Key: "x", 204 | Value: "second", 205 | }}, 206 | Time: time.Unix(0, 1602841605822792000), 207 | }}, 208 | }, { 209 | testName: "multiple-entries-with-error#1", 210 | text: ` 211 | m1 value=12.0 212 | m2 value=∑¹2a.0 213 | m3 value=32.0 214 | m4 value=42.0 215 | `, 216 | expect: []Point{{ 217 | Measurement: "m1", 218 | Fields: []FieldKeyValue{{ 219 | Key: "value", 220 | Value: 12.0, 221 | }}, 222 | }, { 223 | Measurement: "m2", 224 | Fields: []FieldKeyValue{{ 225 | Error: `at line ∑¹: cannot parse value for field key "value": invalid float value syntax`, 226 | }}, 227 | }, { 228 | Measurement: "m3", 229 | Fields: []FieldKeyValue{{ 230 | Key: "value", 231 | Value: 32.0, 232 | }}, 233 | }, { 234 | Measurement: "m4", 235 | Fields: []FieldKeyValue{{ 236 | Key: "value", 237 | Value: 42.0, 238 | }}, 239 | }}, 240 | }, { 241 | testName: "multiple-entries-with-error#2", 242 | text: ` 243 | m1 value=12.0 244 | m2∑¹ 245 | m3 value=32.0 246 | `, 247 | expect: []Point{{ 248 | Measurement: "m1", 249 | Fields: []FieldKeyValue{{ 250 | Key: "value", 251 | Value: 12.0, 252 | }}, 253 | }, { 254 | Measurement: "m2", 255 | // DecoderSkipSection: expect to see no error on TagSection but got error 256 | Tags: []TagKeyValue{{ 257 | Error: `at line ∑¹: expected tag key or field but found '\n' instead`, 258 | }}, 259 | }, { 260 | Measurement: "m3", 261 | Fields: []FieldKeyValue{{ 262 | Key: "value", 263 | Value: 32.0, 264 | }}, 265 | }}, 266 | }, { 267 | testName: "escaped-values", 268 | text: ` 269 | comma\,1,equals\==e\,x,two=val2 field\=x="fir\" 270 | ,st\\" 1602841605822791506 271 | 272 | # last comment 273 | `, 274 | expect: []Point{{ 275 | Measurement: "comma,1", 276 | Tags: []TagKeyValue{{ 277 | Key: "equals=", 278 | Value: "e,x", 279 | }, { 280 | Key: "two", 281 | Value: "val2", 282 | }}, 283 | Fields: []FieldKeyValue{{ 284 | Key: "field=x", 285 | Value: "fir\"\n,st\\", 286 | }}, 287 | Time: time.Unix(0, 1602841605822791506), 288 | }}, 289 | }, { 290 | testName: "missing-quotes", 291 | text: `TestBucket FieldOné=∑¹Happy,FieldTwo=sad`, 292 | expect: []Point{{ 293 | Measurement: "TestBucket", 294 | Fields: []FieldKeyValue{{ 295 | Error: `at line ∑¹: value for field "FieldOné" ("Happy") has unrecognized type`, 296 | }}, 297 | }}, 298 | }, { 299 | testName: "trailing-comma-after-measurement", 300 | text: `TestBuckét,∑¹ FieldOne=Happy 301 | next x=1`, 302 | expect: []Point{{ 303 | MeasurementError: "at line ∑¹: expected tag key after comma; got white space instead", 304 | }, { 305 | Measurement: "next", 306 | Fields: []FieldKeyValue{{ 307 | Key: "x", 308 | Value: 1.0, 309 | }}, 310 | }}, 311 | }, { 312 | testName: "missing-comma-after-field", 313 | text: `TestBuckét TagOné="Happy" ∑¹FieldOne=123.45`, 314 | expect: []Point{{ 315 | Measurement: "TestBuckét", 316 | Fields: []FieldKeyValue{{ 317 | Key: "TagOné", 318 | Value: "Happy", 319 | }}, 320 | TimeError: `at line ∑¹: invalid timestamp ("FieldOne=123.45")`, 321 | }}, 322 | }, { 323 | testName: "missing timestamp", 324 | text: "b f=1", 325 | expect: []Point{{ 326 | Measurement: "b", 327 | Fields: []FieldKeyValue{{ 328 | Key: "f", 329 | Value: 1.0, 330 | }}, 331 | }}, 332 | }, { 333 | testName: "missing timestamp with newline", 334 | text: "b f=1\n", 335 | expect: []Point{{ 336 | Measurement: "b", 337 | Fields: []FieldKeyValue{{ 338 | Key: "f", 339 | Value: 1.0, 340 | }}, 341 | }}, 342 | }, { 343 | testName: "out-of-range-timestamp", 344 | text: "b f=1 ∑¹9223372036854775808", 345 | expect: []Point{{ 346 | Measurement: "b", 347 | Fields: []FieldKeyValue{{ 348 | Key: "f", 349 | Value: 1.0, 350 | }}, 351 | TimeError: `at line ∑¹: invalid timestamp ("9223372036854775808"): line-protocol value out of range`, 352 | }}, 353 | }, { 354 | testName: "out-of-range-timestamp-due-to-precision", 355 | text: "b f=1 ∑¹200000000000000000", 356 | expect: []Point{{ 357 | Measurement: "b", 358 | Fields: []FieldKeyValue{{ 359 | Key: "f", 360 | Value: 1.0, 361 | }}, 362 | Precision: Second, 363 | TimeError: `at line ∑¹: invalid timestamp ("200000000000000000"): line-protocol value out of range`, 364 | }}, 365 | }, { 366 | testName: "negative-timestamp-just-in-range", 367 | text: "b f=1 ∑¹-9223372036854775808", 368 | expect: []Point{{ 369 | Measurement: "b", 370 | Fields: []FieldKeyValue{{ 371 | Key: "f", 372 | Value: 1.0, 373 | }}, 374 | Time: time.Unix(0, -9223372036854775808), 375 | }}, 376 | }, { 377 | testName: "negative-timestamp-just-out-of-range", 378 | text: "b f=1 ∑¹-9223372036854775809", 379 | expect: []Point{{ 380 | Measurement: "b", 381 | Fields: []FieldKeyValue{{ 382 | Key: "f", 383 | Value: 1.0, 384 | }}, 385 | TimeError: `at line ∑¹: invalid timestamp ("-9223372036854775809"): line-protocol value out of range`, 386 | }}, 387 | }, { 388 | testName: "missing-timestamp-with-default", 389 | text: "b f=1", 390 | expect: []Point{{ 391 | Measurement: "b", 392 | Fields: []FieldKeyValue{{ 393 | Key: "f", 394 | Value: 1.0, 395 | }}, 396 | DefaultTime: time.Unix(0, 1643971097811314803), 397 | Time: time.Unix(0, 1643971097811314803), 398 | }}, 399 | }, { 400 | testName: "field-with-space-and-no-timestamp", 401 | text: "9 f=-7 ", 402 | expect: []Point{{ 403 | Measurement: "9", 404 | Fields: []FieldKeyValue{{ 405 | Key: "f", 406 | Value: -7.0, 407 | }}, 408 | }}, 409 | }, { 410 | testName: "carriage-returns", 411 | text: "# foo\r\nm x=1\r\n\r\n", 412 | expect: []Point{{ 413 | Measurement: "m", 414 | Fields: []FieldKeyValue{{ 415 | Key: "x", 416 | Value: 1.0, 417 | }}, 418 | }}, 419 | }, { 420 | testName: "carriage-return-in-comment", 421 | text: "∑¹# foo\rxxx\nm x=1\r\n\r\n", 422 | expect: []Point{{ 423 | MeasurementError: "at line ∑¹: invalid character found in comment line", 424 | }, { 425 | Measurement: "m", 426 | Fields: []FieldKeyValue{{ 427 | Key: "x", 428 | Value: 1.0, 429 | }}, 430 | }}, 431 | }, { 432 | // This test ensures that the ErrValueOutOfRange error is 433 | // propagated correctly with errors.Is 434 | testName: "out-of-range-value", 435 | text: "mmmé é=∑¹1e9999999999999", 436 | expect: []Point{{ 437 | Measurement: "mmmé", 438 | Fields: []FieldKeyValue{{ 439 | Error: `at line ∑¹: cannot parse value for field key "é": line-protocol value out of range`, 440 | }}, 441 | }}, 442 | }, { 443 | testName: "field-key-error-after-newline-in-string", 444 | // Note: we've deliberately got two fields below so that 445 | // if we ever change error behaviour so that the caller 446 | // can see multiple errors on a single line, this test should 447 | // fail (see comment in the Next method). 448 | text: "m f=1,∑¹\x01=1,\x01=2", 449 | expect: []Point{{ 450 | Measurement: "m", 451 | Fields: []FieldKeyValue{{ 452 | Key: "f", 453 | Value: 1.0, 454 | }, { 455 | Error: `at line ∑¹: invalid character '\x01' found at start of field key`, 456 | }}, 457 | }}, 458 | }, { 459 | testName: "field-value-error-after-newline-in-string", 460 | text: "m f=\"hello\ngoodbye\nx\",gé=∑¹invalid", 461 | expect: []Point{{ 462 | Measurement: "m", 463 | Fields: []FieldKeyValue{{ 464 | Key: "f", 465 | Value: "hello\ngoodbye\nx", 466 | }, { 467 | Error: `at line ∑¹: value for field "gé" ("invalid") has unrecognized type`, 468 | }}, 469 | }}, 470 | }, { 471 | testName: "field-string-value-error-after-newline-in-string", 472 | text: "m f=\"a\nb\",g=∑¹\"c\nd", 473 | expect: []Point{{ 474 | Measurement: "m", 475 | Fields: []FieldKeyValue{{ 476 | Key: "f", 477 | Value: "a\nb", 478 | }, { 479 | Error: `at line ∑¹: expected closing quote for string field "g", found end of input`, 480 | }}, 481 | }}, 482 | }, { 483 | testName: "non-printable-ASCII-in-tag-key", 484 | text: "m foo∑¹\x01=bar x=1", 485 | expect: []Point{{ 486 | Measurement: "m", 487 | Fields: []FieldKeyValue{{ 488 | Error: `at line ∑¹: want '=' after field key "foo", found '\x01'`, 489 | }}, 490 | }}, 491 | }, { 492 | testName: "non-printable-ASCII-in-tag-key", 493 | text: "m,∑¹foo\x03=bar x=1", 494 | expect: []Point{{ 495 | Measurement: "m", 496 | Tags: []TagKeyValue{{ 497 | Error: `at line ∑¹: expected '=' after tag key "foo", but got '\x03' instead`, 498 | }}, 499 | }}, 500 | }, { 501 | testName: "non-printable-ASCII-in-tag-value", 502 | text: "m,foo=bar∑¹\x02 x=1", 503 | expect: []Point{{ 504 | Measurement: "m", 505 | Tags: []TagKeyValue{{ 506 | Key: "foo", 507 | Value: "bar", 508 | }, { 509 | Error: `at line ∑¹: expected tag key or field but found '\x02' instead`, 510 | }}, 511 | }}, 512 | }, { 513 | testName: "non-printable-ASCII-in-field-key", 514 | text: "m foo∑¹\x01=bar", 515 | expect: []Point{{ 516 | Measurement: "m", 517 | Fields: []FieldKeyValue{{ 518 | Error: `at line ∑¹: want '=' after field key "foo", found '\x01'`, 519 | }}, 520 | }}, 521 | }, { 522 | testName: "backslash-escapes-in-string-field", 523 | text: `m s="\t\r\n\v"`, 524 | expect: []Point{{ 525 | Measurement: "m", 526 | Fields: []FieldKeyValue{{ 527 | Key: "s", 528 | Value: "\t\r\n\\v", 529 | }}, 530 | }}, 531 | }, { 532 | testName: "backslash-escapes-in-tags", 533 | text: `m,s=\t\r\n\v x=1`, 534 | expect: []Point{{ 535 | Measurement: "m", 536 | Tags: []TagKeyValue{{ 537 | Key: "s", 538 | Value: `\t\r\n\v`, 539 | }}, 540 | Fields: []FieldKeyValue{{ 541 | Key: "x", 542 | Value: 1.0, 543 | }}, 544 | }}, 545 | }, { 546 | testName: "bad-tag-key-#1", 547 | text: "m∑¹", 548 | expect: []Point{{ 549 | Measurement: "m", 550 | Tags: []TagKeyValue{{ 551 | Error: `at line ∑¹: expected tag key or field but found end of input instead`, 552 | }}, 553 | }}, 554 | }, { 555 | testName: "bad-tag-key-#2", 556 | text: "m,∑¹=bar¹", 557 | expect: []Point{{ 558 | Measurement: "m", 559 | Tags: []TagKeyValue{{ 560 | Error: `at line ∑¹: empty tag key`, 561 | }}, 562 | }}, 563 | }, { 564 | testName: "bad-tag-key-#3", 565 | text: "m,∑¹x =y¹", 566 | expect: []Point{{ 567 | Measurement: "m", 568 | Tags: []TagKeyValue{{ 569 | Error: `at line ∑¹: expected '=' after tag key "x", but got ' ' instead`, 570 | }}, 571 | }}, 572 | }, { 573 | testName: "bad-tag-key-#4", 574 | text: "m,∑¹x", 575 | expect: []Point{{ 576 | Measurement: "m", 577 | Tags: []TagKeyValue{{ 578 | Error: `at line ∑¹: expected '=' after tag key "x", but got end of input instead`, 579 | }}, 580 | }}, 581 | }} 582 | 583 | func TestDecoder(t *testing.T) { 584 | c := qt.New(t) 585 | for _, test := range decoderTests { 586 | c.Run(test.testName, func(c *qt.C) { 587 | errp, text := makeErrPositions(test.text) 588 | dec := NewDecoderWithBytes([]byte(text)) 589 | assertDecodeResult(c, dec, test.expect, false, errp) 590 | }) 591 | } 592 | } 593 | 594 | // assertDecodeResult asserts that the entries from dec match 595 | // the expected points and returns the number of points 596 | // consumed. If allowMore is true, it's OK for there 597 | // to be more points than expected. 598 | func assertDecodeResult(c *qt.C, dec *Decoder, expect []Point, allowMore bool, errp errPositions) int { 599 | i := 0 600 | for { 601 | if i >= len(expect) && allowMore { 602 | return i 603 | } 604 | if !dec.Next() { 605 | break 606 | } 607 | if i >= len(expect) { 608 | c.Fatalf("too many points found") 609 | } 610 | for _, checkSection := range sectionCheckers { 611 | checkSection(c, dec, expect[i], errp) 612 | } 613 | i++ 614 | } 615 | c.Assert(i, qt.Equals, len(expect)) 616 | return i 617 | } 618 | 619 | func doSection(dec *Decoder, section section) error { 620 | switch section { 621 | case measurementSection: 622 | _, err := dec.Measurement() 623 | return err 624 | case tagSection: 625 | _, _, err := dec.NextTag() 626 | return err 627 | case fieldSection: 628 | _, _, _, err := dec.NextFieldBytes() 629 | return err 630 | case timeSection: 631 | _, err := dec.TimeBytes() 632 | return err 633 | } 634 | panic("unreachable") 635 | } 636 | 637 | // expectedSectionError returns the error that's expected when 638 | // reading any complete section up to and including 639 | // the given section. 640 | func expectedSectionError(p Point, section section) string { 641 | switch section { 642 | case measurementSection: 643 | if p.MeasurementError != "" { 644 | return p.MeasurementError 645 | } 646 | case tagSection: 647 | for _, tag := range p.Tags { 648 | if tag.Error != "" { 649 | return tag.Error 650 | } 651 | } 652 | case fieldSection: 653 | for _, field := range p.Fields { 654 | if field.Error != "" { 655 | return field.Error 656 | } 657 | } 658 | case timeSection: 659 | if p.TimeError != "" { 660 | return p.TimeError 661 | } 662 | default: 663 | return "" 664 | } 665 | return expectedSectionError(p, section-1) 666 | } 667 | 668 | func TestDecoderSkipSection(t *testing.T) { 669 | // This test tests that we can call an individual decoder method 670 | // without calling any of the others. The decoder logic 671 | // should skip the other parts. 672 | c := qt.New(t) 673 | for _, test := range decoderTests { 674 | c.Run(test.testName, func(c *qt.C) { 675 | for secti := range sectionCheckers { 676 | sect := section(secti) 677 | c.Run(sect.String(), func(c *qt.C) { 678 | errp, text := makeErrPositions(test.text) 679 | dec := NewDecoderWithBytes([]byte(text)) 680 | i := 0 681 | for dec.Next() { 682 | if i >= len(test.expect) { 683 | continue 684 | } 685 | if e := expectedSectionError(test.expect[i], sect-1); e != "" && !strings.Contains(e, "cannot parse value for field key") { 686 | // If there's an error earlier in the line, it gets returned on the 687 | // later section (unless it's an error parsing a field value, in which case 688 | // the syntax checking is more lax). 689 | c.Assert(doSection(dec, sect), qt.ErrorMatches, regexp.QuoteMeta(errp.makeErr(e))) 690 | } else { 691 | sectionCheckers[sect](c, dec, test.expect[i], errp) 692 | } 693 | i++ 694 | } 695 | c.Assert(i, qt.Equals, len(test.expect)) 696 | }) 697 | } 698 | }) 699 | } 700 | } 701 | 702 | var decoderTakeTests = []struct { 703 | testName string 704 | newDecoder func(s string) *Decoder 705 | expectError string 706 | }{{ 707 | testName: "bytes", 708 | newDecoder: func(s string) *Decoder { 709 | return NewDecoderWithBytes([]byte(s)) 710 | }, 711 | }, { 712 | testName: "reader", 713 | newDecoder: func(s string) *Decoder { 714 | return NewDecoder(strings.NewReader(s)) 715 | }, 716 | }, { 717 | testName: "one-byte-reader", 718 | newDecoder: func(s string) *Decoder { 719 | return NewDecoder(iotest.OneByteReader(strings.NewReader(s))) 720 | }, 721 | }, { 722 | testName: "data-err-reader", 723 | newDecoder: func(s string) *Decoder { 724 | return NewDecoder(iotest.DataErrReader(strings.NewReader(s))) 725 | }, 726 | }, { 727 | testName: "error-reader", 728 | newDecoder: func(s string) *Decoder { 729 | return NewDecoder(&errorReader{ 730 | r: strings.NewReader(s), 731 | err: fmt.Errorf("some error"), 732 | }) 733 | }, 734 | expectError: "some error", 735 | }} 736 | 737 | // TestDecoderTake tests the internal Decoder.take method. 738 | func TestDecoderTake(t *testing.T) { 739 | c := qt.New(t) 740 | for _, test := range decoderTakeTests { 741 | c.Run(test.testName, func(c *qt.C) { 742 | s := "aabbcccddefga" 743 | dec := test.newDecoder(s) 744 | data1 := dec.take(newByteSet("abc")) 745 | c.Assert(string(data1), qt.Equals, "aabbccc") 746 | 747 | data2 := dec.take(newByteSet("d")) 748 | c.Assert(string(data2), qt.Equals, "dd") 749 | 750 | data3 := dec.take(newByteSet(" ").invert()) 751 | c.Assert(string(data3), qt.Equals, "efga") 752 | c.Assert(dec.complete, qt.Equals, true) 753 | 754 | data4 := dec.take(newByteSet(" ").invert()) 755 | c.Assert(string(data4), qt.Equals, "") 756 | 757 | // Check that none of them have been overwritten. 758 | c.Assert(string(data1), qt.Equals, "aabbccc") 759 | c.Assert(string(data2), qt.Equals, "dd") 760 | c.Assert(string(data3), qt.Equals, "efga") 761 | if test.expectError != "" { 762 | c.Assert(dec.err, qt.ErrorMatches, test.expectError) 763 | } else { 764 | c.Assert(dec.err, qt.IsNil) 765 | } 766 | }) 767 | } 768 | } 769 | 770 | func TestLongTake(t *testing.T) { 771 | c := qt.New(t) 772 | // Test that we can take segments that are longer than the 773 | // read buffer size. 774 | src := strings.Repeat("abcdefgh", (minRead*3)/8) 775 | dec := NewDecoder(strings.NewReader(src)) 776 | data := dec.take(newByteSet("abcdefgh")) 777 | c.Assert(string(data), qt.Equals, src) 778 | } 779 | 780 | func TestTakeWithReset(t *testing.T) { 781 | c := qt.New(t) 782 | // Test that we can take segments that are longer than the 783 | // read buffer size. 784 | lineCount := (minRead * 3) / 9 785 | src := strings.Repeat("abcdefgh\n", lineCount) 786 | dec := NewDecoder(strings.NewReader(src)) 787 | n := 0 788 | for { 789 | data := dec.take(newByteSet("abcdefgh")) 790 | if len(data) == 0 { 791 | break 792 | } 793 | n++ 794 | c.Assert(string(data), qt.Equals, "abcdefgh") 795 | b := dec.at(0) 796 | c.Assert(b, qt.Equals, byte('\n')) 797 | dec.advance(1) 798 | dec.reset() 799 | } 800 | c.Assert(n, qt.Equals, lineCount) 801 | } 802 | 803 | func TestDecoderTakeWithReset(t *testing.T) { 804 | c := qt.New(t) 805 | // With a byte-at-a-time reader, we won't read any more 806 | // than we absolutely need. 807 | dec := NewDecoder(iotest.OneByteReader(strings.NewReader("aabbcccddefg"))) 808 | data1 := dec.take(newByteSet("abc")) 809 | c.Assert(string(data1), qt.Equals, "aabbccc") 810 | c.Assert(dec.at(0), qt.Equals, byte('d')) 811 | dec.advance(1) 812 | dec.reset() 813 | c.Assert(dec.r0, qt.Equals, 0) 814 | c.Assert(dec.r1, qt.Equals, 0) 815 | } 816 | 817 | func TestDecoderTakeEsc(t *testing.T) { 818 | c := qt.New(t) 819 | for _, test := range decoderTakeTests { 820 | c.Run(test.testName, func(c *qt.C) { 821 | dec := test.newDecoder(`hello\ \t\\z\XY`) 822 | data, i, err := dec.takeEsc(newByteSet("X").invert(), &newEscaper(" \t").revTable) 823 | c.Assert(err, qt.IsNil) 824 | c.Assert(string(data), qt.Equals, "hello \t\\\\z\\") 825 | c.Assert(i, qt.Equals, 0) 826 | 827 | // Check that an escaped character will be included when 828 | // it's not part of the take set. 829 | dec = test.newDecoder(`hello\ \t\\z\XYX`) 830 | data1, i, err := dec.takeEsc(newByteSet("X").invert(), &newEscaper("X \t").revTable) 831 | c.Assert(err, qt.IsNil) 832 | c.Assert(string(data1), qt.Equals, "hello \t\\\\zXY") 833 | c.Assert(i, qt.Equals, 0) 834 | 835 | // Check that the next call to takeEsc continues where it left off. 836 | data2, i, err := dec.takeEsc(newByteSet(" ").invert(), &newEscaper(" ").revTable) 837 | c.Assert(err, qt.IsNil) 838 | c.Assert(string(data2), qt.Equals, "X") 839 | c.Assert(i, qt.Equals, 15) 840 | // Check that data1 hasn't been overwritten. 841 | c.Assert(string(data1), qt.Equals, "hello \t\\\\zXY") 842 | 843 | // Check that a backslash followed by EOF is taken as literal. 844 | dec = test.newDecoder(`x\`) 845 | data, i, err = dec.takeEsc(newByteSet("").invert(), &newEscaper(" ").revTable) 846 | c.Assert(err, qt.IsNil) 847 | c.Assert(i, qt.Equals, 0) 848 | c.Assert(string(data), qt.Equals, "x\\") 849 | }) 850 | } 851 | } 852 | 853 | func TestDecoderTakeEscSkipping(t *testing.T) { 854 | c := qt.New(t) 855 | dec := NewDecoder(strings.NewReader(`hello\ \t\\z\XY`)) 856 | dec.skipping = true 857 | data, i, err := dec.takeEsc(newByteSet("X").invert(), &newEscaper(" \t").revTable) 858 | c.Assert(err, qt.IsNil) 859 | // When skipping is true, the data isn't unquoted (that's just unnecessary extra work). 860 | c.Assert(string(data), qt.Equals, `hello\ \t\\z\`) 861 | c.Assert(i, qt.Equals, 0) 862 | } 863 | 864 | func TestDecoderTakeEscGrowBuffer(t *testing.T) { 865 | // This test tests the code path in Decoder.readMore 866 | // when the buffer needs to grow while we're reading a token. 867 | c := qt.New(t) 868 | dec := NewDecoder(&nbyteReader{ 869 | buf: []byte(`hello\ \ \ \ foo`), 870 | next: []int{ 871 | len(`hello\`), 872 | len(` \ \ \`), 873 | len(` foo`), 874 | }, 875 | }) 876 | data, i, err := dec.takeEsc(newByteSet(" ").invert(), &newEscaper(" ").revTable) 877 | c.Assert(err, qt.IsNil) 878 | c.Assert(string(data), qt.Equals, `hello `) 879 | c.Assert(i, qt.Equals, 0) 880 | data = dec.take(newByteSet("").invert()) 881 | c.Assert(string(data), qt.Equals, ` foo`) 882 | } 883 | 884 | func TestDecoderTakeSlideBuffer(t *testing.T) { 885 | // This test tests the code path in Decoder.readMore 886 | // when the read buffer is large enough but the current 887 | // data is inconveniently in the wrong place, so 888 | // it gets slid to the front of the buffer. 889 | c := qt.New(t) 890 | // The first string that we'll read takes up almost all of the 891 | // initially added buffer, leaving just a little left at the end, 892 | // that will be moved to the front when we come to read that part. 893 | firstToken := strings.Repeat("a", minGrow-4) 894 | dec := NewDecoder(strings.NewReader(firstToken + ` helloworld there`)) 895 | data := dec.take(newByteSet(" ").invert()) 896 | c.Assert(string(data), qt.Equals, firstToken) 897 | data = dec.take(newByteSet(" ")) 898 | c.Assert(string(data), qt.Equals, " ") 899 | 900 | // Reset the buffer. There's still the data from `helloworld` onwards that will remain in the buffer. 901 | dec.reset() 902 | 903 | data = dec.take(newByteSet(" ").invert()) 904 | c.Assert(string(data), qt.Equals, "helloworld") 905 | data = dec.take(newByteSet(" ")) 906 | c.Assert(string(data), qt.Equals, " ") 907 | data = dec.take(newByteSet(" ").invert()) 908 | c.Assert(string(data), qt.Equals, "there") 909 | } 910 | 911 | type nbyteReader struct { 912 | // next holds the read counts for subsequent calls to Read. 913 | // If next is empty and buf is not empty, Read will panic. 914 | next []int 915 | // buf holds the data remaining to be read. 916 | buf []byte 917 | } 918 | 919 | func (r *nbyteReader) Read(buf []byte) (int, error) { 920 | if len(r.buf) == 0 && len(r.next) == 0 { 921 | return 0, io.EOF 922 | } 923 | n := r.next[0] 924 | r.next = r.next[1:] 925 | nb := copy(buf, r.buf[:n]) 926 | if nb != n { 927 | panic(fmt.Errorf("read count for return (%d) is too large for provided buffer (%d)", n, len(r.buf))) 928 | } 929 | r.buf = r.buf[n:] 930 | return n, nil 931 | } 932 | 933 | type errorReader struct { 934 | r io.Reader 935 | err error 936 | } 937 | 938 | func (r *errorReader) Read(buf []byte) (int, error) { 939 | n, err := r.r.Read(buf) 940 | if err != nil { 941 | err = r.err 942 | } 943 | return n, err 944 | } 945 | 946 | var scanEntriesBenchmarks = []struct { 947 | name string 948 | makeData func() (data []byte, totalEntries int) 949 | expect Point 950 | }{{ 951 | name: "long-lines", 952 | makeData: func() (data []byte, totalEntries int) { 953 | entry := `name,tag1=baz,tag2=asdfvdfsvdsvdfs,tagvdsvd=hello field="` + strings.Repeat("a", 4500) + `" 1602841605822791506` 954 | var buf bytes.Buffer 955 | for buf.Len() < 25*1024*1024 { 956 | buf.WriteString(entry) 957 | buf.WriteByte('\n') 958 | totalEntries++ 959 | } 960 | return buf.Bytes(), totalEntries 961 | }, 962 | expect: Point{ 963 | Measurement: "name", 964 | Tags: []TagKeyValue{{ 965 | Key: "tag1", 966 | Value: "baz", 967 | }, { 968 | Key: "tag2", 969 | Value: "asdfvdfsvdsvdfs", 970 | }, { 971 | Key: "tagvdsvd", 972 | Value: "hello", 973 | }}, 974 | Fields: []FieldKeyValue{{ 975 | Key: "field", 976 | Value: strings.Repeat("a", 4500), 977 | }}, 978 | Time: time.Unix(0, 1602841605822791506), 979 | }, 980 | }, { 981 | name: "long-lines-with-escapes", 982 | makeData: func() (data []byte, totalEntries int) { 983 | entry := `name,ta\=g1=foo\ bar\,baz,tag2=asdfvdfsvdsvdfs,tag\=vdsvd=hello field="` + strings.Repeat(`\"`, 4500) + `" 1602841605822791506` 984 | var buf bytes.Buffer 985 | for buf.Len() < 25*1024*1024 { 986 | buf.WriteString(entry) 987 | buf.WriteByte('\n') 988 | totalEntries++ 989 | } 990 | return buf.Bytes(), totalEntries 991 | }, 992 | expect: Point{ 993 | Measurement: "name", 994 | Tags: []TagKeyValue{{ 995 | Key: "ta=g1", 996 | Value: "foo bar,baz", 997 | }, { 998 | Key: "tag2", 999 | Value: "asdfvdfsvdsvdfs", 1000 | }, { 1001 | Key: "tag=vdsvd", 1002 | Value: "hello", 1003 | }}, 1004 | Fields: []FieldKeyValue{{ 1005 | Key: "field", 1006 | Value: strings.Repeat(`"`, 4500), 1007 | }}, 1008 | Time: time.Unix(0, 1602841605822791506), 1009 | }, 1010 | }, { 1011 | name: "single-short-line", 1012 | makeData: func() ([]byte, int) { 1013 | return []byte(`x,t=y y=1 1602841605822791506`), 1 1014 | }, 1015 | expect: Point{ 1016 | Measurement: "x", 1017 | Tags: []TagKeyValue{{ 1018 | Key: "t", 1019 | Value: "y", 1020 | }}, 1021 | Fields: []FieldKeyValue{{ 1022 | Key: "y", 1023 | Value: 1.0, 1024 | }}, 1025 | Time: time.Unix(0, 1602841605822791506), 1026 | }, 1027 | }, { 1028 | name: "single-short-line-with-escapes", 1029 | makeData: singleEntry(`x,t=y\,y y=1 1602841605822791506`), 1030 | expect: Point{ 1031 | Measurement: "x", 1032 | Tags: []TagKeyValue{{ 1033 | Key: "t", 1034 | Value: "y,y", 1035 | }}, 1036 | Fields: []FieldKeyValue{{ 1037 | Key: "y", 1038 | Value: 1.0, 1039 | }}, 1040 | Time: time.Unix(0, 1602841605822791506), 1041 | }, 1042 | }, { 1043 | name: "many-short-lines", 1044 | makeData: func() (data []byte, totalEntries int) { 1045 | entry := `x,t=y y=1 1602841605822791506` 1046 | var buf bytes.Buffer 1047 | for buf.Len() < 25*1024*1024 { 1048 | buf.WriteString(entry) 1049 | buf.WriteByte('\n') 1050 | totalEntries++ 1051 | } 1052 | return buf.Bytes(), totalEntries 1053 | }, 1054 | expect: Point{ 1055 | Measurement: "x", 1056 | Tags: []TagKeyValue{{ 1057 | Key: "t", 1058 | Value: "y", 1059 | }}, 1060 | Fields: []FieldKeyValue{{ 1061 | Key: "y", 1062 | Value: 1.0, 1063 | }}, 1064 | Time: time.Unix(0, 1602841605822791506), 1065 | }, 1066 | }, { 1067 | name: "field-key-escape-not-escapable", 1068 | makeData: singleEntry(`cpu va\lue=42 1602841605822791506`), 1069 | expect: Point{ 1070 | Measurement: "cpu", 1071 | Fields: []FieldKeyValue{{ 1072 | Key: `va\lue`, 1073 | Value: 42.0, 1074 | }}, 1075 | Time: time.Unix(0, 1602841605822791506), 1076 | }, 1077 | }, { 1078 | name: "tag-value-triple-escape-space", 1079 | makeData: singleEntry(`cpu,host=two\\\ words value=42 1602841605822791506`), 1080 | expect: Point{ 1081 | Measurement: "cpu", 1082 | Tags: []TagKeyValue{{ 1083 | Key: "host", 1084 | Value: `two\\ words`, 1085 | }}, 1086 | Fields: []FieldKeyValue{{ 1087 | Key: `value`, 1088 | Value: 42.0, 1089 | }}, 1090 | Time: time.Unix(0, 1602841605822791506), 1091 | }, 1092 | }, { 1093 | name: "procstat", 1094 | makeData: singleEntry(`procstat,exe=bash,process_name=bash voluntary_context_switches=42i,memory_rss=5103616i,rlimit_memory_data_hard=2147483647i,cpu_time_user=0.02,rlimit_file_locks_soft=2147483647i,pid=29417i,cpu_time_nice=0,rlimit_memory_locked_soft=65536i,read_count=259i,rlimit_memory_vms_hard=2147483647i,memory_swap=0i,rlimit_num_fds_soft=1024i,rlimit_nice_priority_hard=0i,cpu_time_soft_irq=0,cpu_time=0i,rlimit_memory_locked_hard=65536i,realtime_priority=0i,signals_pending=0i,nice_priority=20i,cpu_time_idle=0,memory_stack=139264i,memory_locked=0i,rlimit_memory_stack_soft=8388608i,cpu_time_iowait=0,cpu_time_guest=0,cpu_time_guest_nice=0,rlimit_memory_data_soft=2147483647i,read_bytes=0i,rlimit_cpu_time_soft=2147483647i,involuntary_context_switches=2i,write_bytes=106496i,cpu_time_system=0,cpu_time_irq=0,cpu_usage=0,memory_vms=21659648i,memory_data=1576960i,rlimit_memory_stack_hard=2147483647i,num_threads=1i,rlimit_memory_rss_soft=2147483647i,rlimit_realtime_priority_soft=0i,num_fds=4i,write_count=35i,rlimit_signals_pending_soft=78994i,cpu_time_steal=0,rlimit_num_fds_hard=4096i,rlimit_file_locks_hard=2147483647i,rlimit_cpu_time_hard=2147483647i,rlimit_signals_pending_hard=78994i,rlimit_nice_priority_soft=0i,rlimit_memory_rss_hard=2147483647i,rlimit_memory_vms_soft=2147483647i,rlimit_realtime_priority_hard=0i 1517620624000000000`), 1095 | expect: Point{ 1096 | Measurement: "procstat", 1097 | Tags: []TagKeyValue{{ 1098 | Key: "exe", 1099 | Value: "bash", 1100 | }, { 1101 | Key: "process_name", 1102 | Value: "bash", 1103 | }}, 1104 | Fields: []FieldKeyValue{{ 1105 | Key: "voluntary_context_switches", 1106 | Value: int64(42), 1107 | }, { 1108 | Key: "memory_rss", 1109 | Value: int64(5103616), 1110 | }, { 1111 | Key: "rlimit_memory_data_hard", 1112 | Value: int64(2147483647), 1113 | }, { 1114 | Key: "cpu_time_user", 1115 | Value: 0.02, 1116 | }, { 1117 | Key: "rlimit_file_locks_soft", 1118 | Value: int64(2147483647), 1119 | }, { 1120 | Key: "pid", 1121 | Value: int64(29417), 1122 | }, { 1123 | Key: "cpu_time_nice", 1124 | Value: 0.0, 1125 | }, { 1126 | Key: "rlimit_memory_locked_soft", 1127 | Value: int64(65536), 1128 | }, { 1129 | Key: "read_count", 1130 | Value: int64(259), 1131 | }, { 1132 | Key: "rlimit_memory_vms_hard", 1133 | Value: int64(2147483647), 1134 | }, { 1135 | Key: "memory_swap", 1136 | Value: int64(0), 1137 | }, { 1138 | Key: "rlimit_num_fds_soft", 1139 | Value: int64(1024), 1140 | }, { 1141 | Key: "rlimit_nice_priority_hard", 1142 | Value: int64(0), 1143 | }, { 1144 | Key: "cpu_time_soft_irq", 1145 | Value: 0.0, 1146 | }, { 1147 | Key: "cpu_time", 1148 | Value: int64(0), 1149 | }, { 1150 | Key: "rlimit_memory_locked_hard", 1151 | Value: int64(65536), 1152 | }, { 1153 | Key: "realtime_priority", 1154 | Value: int64(0), 1155 | }, { 1156 | Key: "signals_pending", 1157 | Value: int64(0), 1158 | }, { 1159 | Key: "nice_priority", 1160 | Value: int64(20), 1161 | }, { 1162 | Key: "cpu_time_idle", 1163 | Value: 0.0, 1164 | }, { 1165 | Key: "memory_stack", 1166 | Value: int64(139264), 1167 | }, { 1168 | Key: "memory_locked", 1169 | Value: int64(0), 1170 | }, { 1171 | Key: "rlimit_memory_stack_soft", 1172 | Value: int64(8388608), 1173 | }, { 1174 | Key: "cpu_time_iowait", 1175 | Value: 0.0, 1176 | }, { 1177 | Key: "cpu_time_guest", 1178 | Value: 0.0, 1179 | }, { 1180 | Key: "cpu_time_guest_nice", 1181 | Value: 0.0, 1182 | }, { 1183 | Key: "rlimit_memory_data_soft", 1184 | Value: int64(2147483647), 1185 | }, { 1186 | Key: "read_bytes", 1187 | Value: int64(0), 1188 | }, { 1189 | Key: "rlimit_cpu_time_soft", 1190 | Value: int64(2147483647), 1191 | }, { 1192 | Key: "involuntary_context_switches", 1193 | Value: int64(2), 1194 | }, { 1195 | Key: "write_bytes", 1196 | Value: int64(106496), 1197 | }, { 1198 | Key: "cpu_time_system", 1199 | Value: 0.0, 1200 | }, { 1201 | Key: "cpu_time_irq", 1202 | Value: 0.0, 1203 | }, { 1204 | Key: "cpu_usage", 1205 | Value: 0.0, 1206 | }, { 1207 | Key: "memory_vms", 1208 | Value: int64(21659648), 1209 | }, { 1210 | Key: "memory_data", 1211 | Value: int64(1576960), 1212 | }, { 1213 | Key: "rlimit_memory_stack_hard", 1214 | Value: int64(2147483647), 1215 | }, { 1216 | Key: "num_threads", 1217 | Value: int64(1), 1218 | }, { 1219 | Key: "rlimit_memory_rss_soft", 1220 | Value: int64(2147483647), 1221 | }, { 1222 | Key: "rlimit_realtime_priority_soft", 1223 | Value: int64(0), 1224 | }, { 1225 | Key: "num_fds", 1226 | Value: int64(4), 1227 | }, { 1228 | Key: "write_count", 1229 | Value: int64(35), 1230 | }, { 1231 | Key: "rlimit_signals_pending_soft", 1232 | Value: int64(78994), 1233 | }, { 1234 | Key: "cpu_time_steal", 1235 | Value: 0.0, 1236 | }, { 1237 | Key: "rlimit_num_fds_hard", 1238 | Value: int64(4096), 1239 | }, { 1240 | Key: "rlimit_file_locks_hard", 1241 | Value: int64(2147483647), 1242 | }, { 1243 | Key: "rlimit_cpu_time_hard", 1244 | Value: int64(2147483647), 1245 | }, { 1246 | Key: "rlimit_signals_pending_hard", 1247 | Value: int64(78994), 1248 | }, { 1249 | Key: "rlimit_nice_priority_soft", 1250 | Value: int64(0), 1251 | }, { 1252 | Key: "rlimit_memory_rss_hard", 1253 | Value: int64(2147483647), 1254 | }, { 1255 | Key: "rlimit_memory_vms_soft", 1256 | Value: int64(2147483647), 1257 | }, { 1258 | Key: "rlimit_realtime_priority_hard", 1259 | Value: int64(0), 1260 | }}, 1261 | Time: time.Unix(0, 1517620624000000000), 1262 | }, 1263 | }} 1264 | 1265 | func singleEntry(s string) func() ([]byte, int) { 1266 | return func() ([]byte, int) { 1267 | return []byte(s), 1 1268 | } 1269 | } 1270 | 1271 | func BenchmarkDecodeEntriesSkipping(b *testing.B) { 1272 | for _, bench := range scanEntriesBenchmarks { 1273 | b.Run(bench.name, func(b *testing.B) { 1274 | data, total := bench.makeData() 1275 | c := qt.New(b) 1276 | // Sanity check that the decoder is doing what we're expecting. 1277 | // Only check the first entry because checking them all is slow. 1278 | dec := NewDecoderWithBytes(data) 1279 | assertDecodeResult(c, dec, []Point{bench.expect}, true, errPositions{}) 1280 | b.ReportAllocs() 1281 | b.ResetTimer() 1282 | b.SetBytes(int64(len(data))) 1283 | for i := 0; i < b.N; i++ { 1284 | n := 0 1285 | dec := NewDecoderWithBytes(data) 1286 | for dec.Next() { 1287 | n++ 1288 | } 1289 | if n != total { 1290 | b.Fatalf("unexpected read count; got %v want %v", n, total) 1291 | } 1292 | } 1293 | }) 1294 | } 1295 | } 1296 | 1297 | func BenchmarkDecodeEntriesWithoutSkipping(b *testing.B) { 1298 | for _, bench := range scanEntriesBenchmarks { 1299 | b.Run(bench.name, func(b *testing.B) { 1300 | data, total := bench.makeData() 1301 | c := qt.New(b) 1302 | // Sanity check that the decoder is doing what we're expecting. 1303 | // Only check the first entry because checking them all is slow. 1304 | dec := NewDecoderWithBytes(data) 1305 | assertDecodeResult(c, dec, []Point{bench.expect}, true, errPositions{}) 1306 | b.ReportAllocs() 1307 | b.ResetTimer() 1308 | b.SetBytes(int64(len(data))) 1309 | for i := 0; i < b.N; i++ { 1310 | n := 0 1311 | dec := NewDecoderWithBytes(data) 1312 | for dec.Next() { 1313 | dec.Measurement() 1314 | for { 1315 | key, _, _ := dec.NextTag() 1316 | if key == nil { 1317 | break 1318 | } 1319 | } 1320 | for { 1321 | key, _, _ := dec.NextField() 1322 | if key == nil { 1323 | break 1324 | } 1325 | } 1326 | dec.TimeBytes() 1327 | n++ 1328 | } 1329 | if n != total { 1330 | b.Fatalf("unexpected read count; got %v want %v", n, total) 1331 | } 1332 | } 1333 | }) 1334 | } 1335 | } 1336 | 1337 | type errPos struct { 1338 | line int64 1339 | column int 1340 | } 1341 | 1342 | func (p errPos) String() string { 1343 | return fmt.Sprintf("%d:%d", p.line, p.column) 1344 | } 1345 | 1346 | // errPositions records error positions so that we can avoid 1347 | // mentioning them directly in test cases. 1348 | type errPositions struct { 1349 | repl *strings.Replacer 1350 | } 1351 | 1352 | // makeErr returns s with ∑ markers replaced with 1353 | // their line:column equivalents. 1354 | func (errp errPositions) makeErr(s string) string { 1355 | if errp.repl == nil { 1356 | return s 1357 | } 1358 | s1 := errp.repl.Replace(s) 1359 | if strings.Count(s1, "∑") > 0 { 1360 | panic(fmt.Errorf("no value for error marker found in %q (remaining %q)", s, s1)) 1361 | } 1362 | return s1 1363 | } 1364 | 1365 | // makeErrPositions returns the positions of all the ∑ markers 1366 | // in the text keyed by the character following the ∑. 1367 | // 1368 | // It also returns the text with the ∑ markers removed. 1369 | func makeErrPositions(text string) (errPositions, string) { 1370 | buf := make([]byte, 0, len(text)) 1371 | currPos := errPos{ 1372 | line: 1, 1373 | column: 1, 1374 | } 1375 | var repls []string 1376 | for len(text) > 0 { 1377 | r, size := utf8.DecodeRuneInString(text) 1378 | switch r { 1379 | case '\n': 1380 | currPos.line++ 1381 | currPos.column = 1 1382 | buf = append(buf, '\n') 1383 | case '∑': 1384 | _, size = utf8.DecodeRuneInString(text[size:]) 1385 | repls = append(repls, text[:len("∑")+size], currPos.String()) 1386 | text = text[len("∑"):] 1387 | default: 1388 | buf = append(buf, text[:size]...) 1389 | currPos.column += size 1390 | } 1391 | text = text[size:] 1392 | } 1393 | return errPositions{ 1394 | repl: strings.NewReplacer(repls...), 1395 | }, string(buf) 1396 | } 1397 | 1398 | func TestErrorPositions(t *testing.T) { 1399 | // Although this is just testing an internal testing function, 1400 | // that function itself is used as the basis for other tests, 1401 | // so we're providing some base assurance for those. 1402 | tests := []struct { 1403 | text string 1404 | err string 1405 | expectErr string 1406 | expectText string 1407 | }{{ 1408 | text: "", 1409 | err: "", 1410 | expectErr: "", 1411 | }, { 1412 | text: "∑¹", 1413 | err: "at line ∑¹: something", 1414 | expectErr: "at line 1:1: something", 1415 | }, { 1416 | text: "a\nbác∑¹helélo∑²blah\n∑³x\n", 1417 | err: "foo: at line ∑¹: blah", 1418 | expectErr: "foo: at line 2:5: blah", 1419 | expectText: "a\nbácheléloblah\nx\n", 1420 | }, { 1421 | text: "a\nbác∑¹helélo∑²blah\n∑³x\n", 1422 | err: "foo: at line ∑²: blah", 1423 | expectErr: "foo: at line 2:12: blah", 1424 | expectText: "a\nbácheléloblah\nx\n", 1425 | }, { 1426 | text: "a\nbác∑¹helélo∑²blah\n∑³x\n", 1427 | err: "foo: at line ∑³: blah", 1428 | expectErr: "foo: at line 3:1: blah", 1429 | expectText: "a\nbácheléloblah\nx\n", 1430 | }, { 1431 | text: "a∑¹x", 1432 | err: "at line ∑¹: blah", 1433 | expectErr: "at line 1:2: blah", 1434 | expectText: "ax", 1435 | }} 1436 | c := qt.New(t) 1437 | for _, test := range tests { 1438 | c.Run("", func(c *qt.C) { 1439 | eps, text := makeErrPositions(test.text) 1440 | c.Assert(text, qt.Equals, test.expectText) 1441 | c.Assert(eps.makeErr(test.err), qt.Equals, test.expectErr) 1442 | }) 1443 | } 1444 | } 1445 | 1446 | func TestDecodeLargeDataWithReader(t *testing.T) { 1447 | c := qt.New(t) 1448 | r, w := io.Pipe() 1449 | const maxTagCount = 9 1450 | const maxFieldCount = 4 1451 | const npoints = 2000 1452 | go func() { 1453 | defer w.Close() 1454 | bw := bufio.NewWriter(w) 1455 | defer bw.Flush() 1456 | g := newTokenGenerator() 1457 | var enc Encoder 1458 | enc.SetLax(true) // Allow out-of-order tag keys. 1459 | for i := 0; i < npoints; i++ { 1460 | ntags := g.rand.Intn(maxTagCount + 1) 1461 | nfields := g.rand.Intn(maxFieldCount) + 1 1462 | timestamp := g.rand.Int63n(0xffff_ffff_ffff) 1463 | enc.StartLineRaw(g.token()) 1464 | for j := 0; j < ntags; j++ { 1465 | enc.AddTagRaw(g.token(), g.token()) 1466 | } 1467 | for j := 0; j < nfields; j++ { 1468 | key, val := g.token(), g.token() 1469 | v, err := NewValueFromBytes(String, val) 1470 | if err != nil { 1471 | panic(err) 1472 | } 1473 | enc.AddFieldRaw(key, v) 1474 | } 1475 | enc.EndLine(time.Unix(0, timestamp)) 1476 | bw.Write(enc.Bytes()) 1477 | enc.Reset() 1478 | } 1479 | }() 1480 | g := newTokenGenerator() 1481 | var wc writeCounter 1482 | dec := NewDecoder(io.TeeReader(r, &wc)) 1483 | n := 0 1484 | for ; dec.Next(); n++ { 1485 | if n >= npoints { 1486 | c.Fatalf("too many points decoded") 1487 | } 1488 | wantNtags := g.rand.Intn(maxTagCount + 1) 1489 | wantNfields := g.rand.Intn(maxFieldCount) + 1 1490 | wantTimestamp := g.rand.Int63n(0xffff_ffff_ffff) 1491 | m, err := dec.Measurement() 1492 | c.Assert(err, qt.IsNil) 1493 | c.Check(m, qt.DeepEquals, g.token(), qt.Commentf("n %d", n)) 1494 | tagi := 0 1495 | for { 1496 | key, val, err := dec.NextTag() 1497 | c.Assert(err, qt.IsNil) 1498 | if key == nil { 1499 | break 1500 | } 1501 | if tagi >= wantNtags { 1502 | c.Fatalf("too many tags found on entry %d", n) 1503 | } 1504 | wantKey, wantVal := g.token(), g.token() 1505 | c.Check(key, qt.DeepEquals, wantKey) 1506 | c.Check(val, qt.DeepEquals, wantVal) 1507 | tagi++ 1508 | } 1509 | c.Assert(tagi, qt.Equals, wantNtags) 1510 | fieldi := 0 1511 | for { 1512 | key, val, err := dec.NextField() 1513 | c.Assert(err, qt.IsNil) 1514 | if key == nil { 1515 | break 1516 | } 1517 | if fieldi >= wantNfields { 1518 | c.Fatalf("too many tags found on entry %d", n) 1519 | } 1520 | wantKey, wantVal := g.token(), g.token() 1521 | c.Check(key, qt.DeepEquals, wantKey) 1522 | c.Check(val.Interface(), qt.Equals, string(wantVal)) 1523 | fieldi++ 1524 | } 1525 | c.Assert(fieldi, qt.Equals, wantNfields) 1526 | t, err := dec.Time(Nanosecond, time.Time{}) 1527 | c.Check(err, qt.IsNil) 1528 | c.Check(t.UnixNano(), qt.Equals, wantTimestamp) 1529 | } 1530 | c.Assert(n, qt.Equals, npoints) 1531 | c.Logf("total bytes: %v", wc.n) 1532 | } 1533 | 1534 | type writeCounter struct { 1535 | n int 1536 | } 1537 | 1538 | func (w *writeCounter) Write(buf []byte) (int, error) { 1539 | w.n += len(buf) 1540 | return len(buf), nil 1541 | } 1542 | 1543 | func newTokenGenerator() *tokenGenerator { 1544 | return &tokenGenerator{ 1545 | rand: rand.New(rand.NewSource(0)), 1546 | } 1547 | } 1548 | 1549 | type tokenGenerator struct { 1550 | rand *rand.Rand 1551 | } 1552 | 1553 | const alphabet = "abcdefghijklmnopqrstuvwxyz =, =, =," 1554 | 1555 | func (g *tokenGenerator) token() []byte { 1556 | data := make([]byte, g.rand.Intn(40)+1) 1557 | for i := range data { 1558 | data[i] = alphabet[g.rand.Intn(len(alphabet))] 1559 | } 1560 | return data 1561 | } 1562 | -------------------------------------------------------------------------------- /lineprotocol/doc.go: -------------------------------------------------------------------------------- 1 | // Package lineprotocol implements a codec for the InfluxDB line-protocol syntax. 2 | // See the documentation of the Encoder and Decoder types for details. 3 | package lineprotocol 4 | 5 | -------------------------------------------------------------------------------- /lineprotocol/encoder.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "time" 8 | "unicode/utf8" 9 | ) 10 | 11 | // Encoder encapsulates the encoding part of the line protocol. 12 | // 13 | // The zero value of an Encoder is ready to use. 14 | // 15 | // It is associated with a []byte buffer which is appended to 16 | // each time a method is called. 17 | // 18 | // Methods must be called in the same order that their 19 | // respective data appears in the line-protocol point (Encoder 20 | // doesn't reorder anything). That is, for a given entry, methods 21 | // must be called in the following order: 22 | // 23 | // StartLine 24 | // AddTag (zero or more times) 25 | // AddField (one or more times) 26 | // EndLine (optional) 27 | // 28 | // When an error is encountered encoding a point, 29 | // the Err method returns it, and the erroneous point 30 | // is omitted from the result. 31 | // 32 | type Encoder struct { 33 | buf []byte 34 | prevTagKey []byte 35 | // lineStart holds the index of the start of the current line. 36 | lineStart int 37 | // section holds the section of line that's about to be added. 38 | section section 39 | // lax holds whether keys and values are checked for validity 40 | // when being encoded. 41 | lax bool 42 | // lineHasError records whether there's been an error encountered 43 | // on the current entry, in which case, no further data will be added 44 | // until the next entry. 45 | lineHasError bool 46 | // err holds the most recent error encountered when encoding. 47 | err error 48 | // pointIndex holds the index of the current point being encoded. 49 | pointIndex int 50 | // precisionMultiplier holds the timestamp precision. 51 | // Timestamps are divided by this when encoded. 52 | precisionMultiplier int64 53 | } 54 | 55 | // Bytes returns the current line buffer. 56 | func (e *Encoder) Bytes() []byte { 57 | return e.buf 58 | } 59 | 60 | // SetBuffer sets the buffer used for the line, 61 | // clears any current error and resets the line. 62 | // 63 | // Encoded data will be appended to buf. 64 | func (e *Encoder) SetBuffer(buf []byte) { 65 | e.buf = buf 66 | e.pointIndex = 0 67 | e.ClearErr() 68 | e.section = measurementSection 69 | } 70 | 71 | // SetPrecision sets the precision used to encode the time stamps 72 | // in the encoded messages. The default precision is Nanosecond. 73 | // Timestamps are truncated to this precision. 74 | func (e *Encoder) SetPrecision(p Precision) { 75 | e.precisionMultiplier = int64(p.Duration()) 76 | } 77 | 78 | // Reset resets the line, clears any error, and starts writing at the start 79 | // of the line buffer slice. 80 | func (e *Encoder) Reset() { 81 | e.SetBuffer(e.buf[:0]) 82 | } 83 | 84 | // SetLax sets whether the Encoder methods check fully for validity or not. 85 | // When Lax is true: 86 | // 87 | // - measurement names, tag and field keys aren't checked for invalid characters 88 | // - field values passed to AddRawField are not bounds or syntax checked 89 | // - tag keys are not checked to be in alphabetical order. 90 | // 91 | // This can be used to increase performance in 92 | // places where values are already known to be valid. 93 | func (e *Encoder) SetLax(lax bool) { 94 | e.lax = lax 95 | } 96 | 97 | // Err returns the first encoding error that's been encountered so far, 98 | // if any. 99 | // TODO define a type so that we can get access to the line where it happened. 100 | func (e *Encoder) Err() error { 101 | return e.err 102 | } 103 | 104 | // ClearErr clears any current encoding error. 105 | func (e *Encoder) ClearErr() { 106 | e.err = nil 107 | } 108 | 109 | // StartLine starts writing a line with the given measurement name. If this 110 | // is called when it's not possible to start a new entry, or the 111 | // measurement cannot be encoded, it will return an error. 112 | // 113 | // Starting a new entry is always allowed when there's been an error 114 | // encoding the previous entry. 115 | func (e *Encoder) StartLine(measurement string) { 116 | section := e.section 117 | e.pointIndex++ 118 | e.section = tagSection 119 | if section == tagSection { 120 | // This error is unusual, because it indicates an error on the previous 121 | // line, even though there's probably not an error on this line, so 122 | // don't return here. This means that unfortunately, if you 123 | // add a line with an invalid measurement immediately after 124 | // adding a line with no fields, you won't ever see the second 125 | // of those two errors. Clients can avoid that possibility by making 126 | // sure to call EndLine even if they don't wish to add a timestamp. 127 | e.setErrorf("cannot start line without adding at least one field to previous line") 128 | } 129 | e.prevTagKey = e.prevTagKey[:0] 130 | e.lineStart = len(e.buf) 131 | e.lineHasError = false 132 | if !e.lax { 133 | if !validMeasurementOrKey(measurement) { 134 | e.setErrorf("invalid measurement %q", measurement) 135 | return 136 | } 137 | } 138 | if section != measurementSection && section != endSection { 139 | // This isn't the first line, and EndLine hasn't been explicitly called, 140 | // so we need a newline separator. 141 | e.buf = append(e.buf, '\n') 142 | } 143 | e.buf = measurementEscapes.appendEscaped(e.buf, measurement) 144 | } 145 | 146 | // StartLineRaw is the same as Start except that it accepts a byte slice 147 | // instead of a string, which can save allocations. 148 | func (e *Encoder) StartLineRaw(name []byte) { 149 | e.StartLine(unsafeBytesToString(name)) 150 | } 151 | 152 | // AddTag adds a tag to the line. Tag keys must be added in lexical order 153 | // and AddTag must be called after StartLine and before AddField. 154 | // 155 | // Neither the key or the value may contain non-printable ASCII 156 | // characters (0x00 to 0x1f and 0x7f) or invalid UTF-8 or 157 | // a trailing backslash character. 158 | func (e *Encoder) AddTag(key, value string) { 159 | if e.section != tagSection { 160 | e.setErrorf("tag must be added after adding a measurement and before adding fields") 161 | return 162 | } 163 | if !e.lax { 164 | if !validMeasurementOrKey(key) { 165 | e.setErrorf("invalid tag key %q", key) 166 | return 167 | } 168 | if !validMeasurementOrKey(value) { 169 | e.setErrorf("invalid tag value %s=%q", key, value) 170 | return 171 | } 172 | if key <= string(e.prevTagKey) { 173 | e.setErrorf("tag key %q out of order (previous key %q)", key, e.prevTagKey) 174 | return 175 | } 176 | // We need to copy the tag key because AddTag can be called 177 | // by AddTagRaw with a slice of byte which might change from 178 | // call to call. 179 | e.prevTagKey = append(e.prevTagKey[:0], key...) 180 | } 181 | if e.lineHasError { 182 | return 183 | } 184 | e.buf = append(e.buf, ',') 185 | e.buf = tagKeyEscapes.appendEscaped(e.buf, key) 186 | e.buf = append(e.buf, '=') 187 | e.buf = tagValEscapes.appendEscaped(e.buf, value) 188 | } 189 | 190 | // AddTagRaw is like AddTag except that it accepts byte slices 191 | // instead of strings, which can save allocations. Note that 192 | // AddRawTag _will_ escape metacharacters such as "=" 193 | // and "," when they're present. 194 | func (e *Encoder) AddTagRaw(key, value []byte) { 195 | e.AddTag(unsafeBytesToString(key), unsafeBytesToString(value)) 196 | } 197 | 198 | // AddField adds a field to the line. AddField must be called after AddTag 199 | // or AddMeasurement. At least one field must be added to each line. 200 | func (e *Encoder) AddField(key string, value Value) { 201 | if e.section != fieldSection && e.section != tagSection { 202 | e.setErrorf("field must be added after tag or measurement section") 203 | return 204 | } 205 | section := e.section 206 | e.section = fieldSection 207 | if !e.lax { 208 | if !validMeasurementOrKey(key) { 209 | e.setErrorf("invalid field key %q", key) 210 | return 211 | } 212 | } 213 | if e.lineHasError { 214 | return 215 | } 216 | if section == tagSection { 217 | e.buf = append(e.buf, ' ') 218 | } else { 219 | e.buf = append(e.buf, ',') 220 | } 221 | e.buf = fieldKeyEscapes.appendEscaped(e.buf, key) 222 | e.buf = append(e.buf, '=') 223 | e.buf = value.AppendBytes(e.buf) 224 | } 225 | 226 | // AddFieldRaw is like AddField except that the key is represented 227 | // as a byte slice instead of a string, which can save allocations. 228 | // TODO would it be better for this to be: 229 | // AddFieldRaw(key []byte, kind ValueKind, data []byte) error 230 | // so that we could respect lax and be more efficient when reading directly 231 | // from a Decoder? 232 | func (e *Encoder) AddFieldRaw(key []byte, value Value) { 233 | e.AddField(unsafeBytesToString(key), value) 234 | } 235 | 236 | var ( 237 | minTime = time.Unix(0, math.MinInt64) 238 | maxTime = time.Unix(0, math.MaxInt64) 239 | ) 240 | 241 | // EndLine adds the timestamp and newline at the end of the line. 242 | // If t is zero, no timestamp will written and this method will do nothing. 243 | // If the time is outside the maximum representable time range, 244 | // an ErrRange error will be returned. 245 | func (e *Encoder) EndLine(t time.Time) { 246 | if e.section != fieldSection { 247 | e.setErrorf("timestamp must be added after adding at least one field") 248 | return 249 | } 250 | e.section = endSection 251 | if t.IsZero() { 252 | // Zero timestamp. All we need is a newline. 253 | if !e.lineHasError { 254 | e.buf = append(e.buf, '\n') 255 | } 256 | return 257 | } 258 | if t.Before(minTime) || t.After(maxTime) { 259 | e.setErrorf("timestamp %s: %w", t.Format(time.RFC3339), ErrValueOutOfRange) 260 | return 261 | } 262 | if e.lineHasError { 263 | return 264 | } 265 | e.buf = append(e.buf, ' ') 266 | timestamp := t.UnixNano() 267 | if m := e.precisionMultiplier; m > 0 { 268 | timestamp /= m 269 | } 270 | e.buf = strconv.AppendInt(e.buf, timestamp, 10) 271 | e.buf = append(e.buf, '\n') 272 | } 273 | 274 | func (e *Encoder) setErrorf(format string, arg ...interface{}) { 275 | e.lineHasError = true 276 | if e.err == nil { 277 | if e.pointIndex <= 1 { 278 | e.err = fmt.Errorf(format, arg...) 279 | } else { 280 | e.err = fmt.Errorf("encoding point %d: %w", e.pointIndex-1, fmt.Errorf(format, arg...)) 281 | } 282 | } 283 | // Remove the partially encoded part of the current line. 284 | e.buf = e.buf[:e.lineStart] 285 | if len(e.buf) == 0 { 286 | // Make sure the next entry doesn't add a newline. 287 | e.section = measurementSection 288 | } 289 | } 290 | 291 | // validMeasurementOrKey reports whether s can be 292 | // encoded as a valid measurement or key. 293 | func validMeasurementOrKey(s string) bool { 294 | if s == "" { 295 | return false 296 | } 297 | if !utf8.ValidString(s) { 298 | return false 299 | } 300 | for i := 0; i < len(s); i++ { 301 | if nonPrintable.get(s[i]) { 302 | return false 303 | } 304 | } 305 | //lint:ignore S1008 Leave my comment alone! 306 | if s[len(s)-1] == '\\' { 307 | // A trailing backslash can't be round-tripped. 308 | return false 309 | } 310 | return true 311 | } 312 | -------------------------------------------------------------------------------- /lineprotocol/encoder_test.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "sort" 5 | "testing" 6 | "time" 7 | 8 | qt "github.com/frankban/quicktest" 9 | ) 10 | 11 | func TestEncoderWithDecoderTests(t *testing.T) { 12 | c := qt.New(t) 13 | runTests := func(c *qt.C, lax bool) { 14 | for _, test := range decoderTests { 15 | if pointsHaveError(test.expect) { 16 | // Can't encode a test that results in an error. 17 | continue 18 | } 19 | c.Run(test.testName, func(c *qt.C) { 20 | // Always use sorted tags even though they might not 21 | // be sorted in the test case. 22 | points := append([]Point(nil), test.expect...) 23 | for i := range points { 24 | points[i] = pointWithSortedTags(points[i]) 25 | } 26 | var e Encoder 27 | e.SetLax(lax) 28 | for _, p := range points { 29 | encodePoint(&e, p) 30 | } 31 | c.Assert(e.Err(), qt.IsNil) 32 | data := e.Bytes() 33 | c.Logf("encoded: %q", data) 34 | // Check that the data round-trips OK 35 | dec := NewDecoderWithBytes(data) 36 | assertDecodeResult(c, dec, points, false, errPositions{}) 37 | }) 38 | } 39 | } 40 | c.Run("strict", func(c *qt.C) { 41 | runTests(c, false) 42 | }) 43 | c.Run("lax", func(c *qt.C) { 44 | runTests(c, true) 45 | }) 46 | } 47 | 48 | func TestEncoderErrorOmitsLine(t *testing.T) { 49 | c := qt.New(t) 50 | var e Encoder 51 | e.StartLine("m1") 52 | e.AddField("\xff", MustNewValue(int64(1))) 53 | c.Assert(e.Err(), qt.ErrorMatches, `invalid field key "\\xff"`) 54 | c.Assert(e.Bytes(), qt.HasLen, 0) 55 | e.ClearErr() 56 | 57 | // Check that an error after the first line doesn't erase 58 | // everything. 59 | e.StartLine("m1") 60 | e.AddField("f", MustNewValue(int64(1))) 61 | e.StartLine("m2") 62 | c.Assert(e.Err(), qt.IsNil) 63 | c.Assert(string(e.Bytes()), qt.Equals, "m1 f=1i\nm2") 64 | e.AddField("g", MustNewValue(int64(3))) 65 | e.AddField("\\", MustNewValue(int64(4))) 66 | c.Assert(e.Err(), qt.ErrorMatches, `encoding point 2: invalid field key "\\\\"`) 67 | c.Assert(string(e.Bytes()), qt.Equals, "m1 f=1i") 68 | 69 | // Check that we can add a new line while retaining the first error. 70 | e.StartLine("m3") 71 | e.AddField("f", MustNewValue(int64(3))) 72 | c.Assert(string(e.Bytes()), qt.Equals, "m1 f=1i\nm3 f=3i") 73 | c.Assert(e.Err(), qt.ErrorMatches, `encoding point 2: invalid field key "\\\\"`) 74 | } 75 | 76 | func TestEncoderErrorWithOutOfOrderTags(t *testing.T) { 77 | c := qt.New(t) 78 | var e Encoder 79 | e.StartLine("m1") 80 | e.AddTag("b", "1") 81 | c.Assert(e.Err(), qt.IsNil) 82 | e.AddTag("a", "1") 83 | c.Assert(e.Err(), qt.ErrorMatches, `tag key "a" out of order \(previous key "b"\)`) 84 | } 85 | 86 | func TestEncoderAddFieldBeforeMeasurement(t *testing.T) { 87 | c := qt.New(t) 88 | var e Encoder 89 | e.AddField("hello", MustNewValue(int64(1))) 90 | c.Assert(e.Err(), qt.ErrorMatches, `field must be added after tag or measurement section`) 91 | } 92 | 93 | func TestEncoderEndLineWithNoField(t *testing.T) { 94 | c := qt.New(t) 95 | var e Encoder 96 | e.StartLine("hello") 97 | e.EndLine(time.Time{}) 98 | c.Assert(e.Err(), qt.ErrorMatches, `timestamp must be added after adding at least one field`) 99 | } 100 | 101 | func TestEncoderEndLineWithNoTime(t *testing.T) { 102 | c := qt.New(t) 103 | var e Encoder 104 | e.StartLine("m") 105 | e.AddField("f", MustNewValue(int64(3))) 106 | e.EndLine(time.Time{}) 107 | c.Assert(e.Err(), qt.IsNil) 108 | c.Assert(string(e.Bytes()), qt.Equals, "m f=3i\n") 109 | } 110 | 111 | func TestEncoderAddTagBeforeStartLine(t *testing.T) { 112 | c := qt.New(t) 113 | var e Encoder 114 | e.AddTag("a", "b") 115 | c.Assert(e.Err(), qt.ErrorMatches, `tag must be added after adding a measurement and before adding fields`) 116 | } 117 | 118 | func TestEncoderAddTagAfterAddField(t *testing.T) { 119 | c := qt.New(t) 120 | var e Encoder 121 | e.StartLine("m") 122 | e.AddField("f", MustNewValue(int64(12))) 123 | e.AddTag("a", "b") 124 | c.Assert(e.Err(), qt.ErrorMatches, `tag must be added after adding a measurement and before adding fields`) 125 | } 126 | 127 | func TestEncoderStartLineWithNoFieldsOnPreviousLine(t *testing.T) { 128 | c := qt.New(t) 129 | var e Encoder 130 | e.StartLine("m") 131 | e.StartLine("n") 132 | c.Assert(e.Err(), qt.ErrorMatches, `encoding point 1: cannot start line without adding at least one field to previous line`) 133 | } 134 | 135 | func TestEncoderStartLineWithInvalidMeasurementAndNoFieldsOnPreviousLine(t *testing.T) { 136 | c := qt.New(t) 137 | var e Encoder 138 | e.StartLine("m") 139 | e.StartLine("") 140 | c.Assert(e.Err(), qt.ErrorMatches, `encoding point 1: cannot start line without adding at least one field to previous line`) 141 | 142 | // The current line is now in error state, so fields won't be added. 143 | e.AddField("f", MustNewValue(int64(1))) 144 | c.Assert(e.Bytes(), qt.HasLen, 0) 145 | 146 | // The next line gets added OK though. 147 | e.StartLine("m") 148 | e.AddField("f", MustNewValue(int64(1))) 149 | c.Assert(string(e.Bytes()), qt.Equals, "m f=1i") 150 | } 151 | 152 | func TestEncoderWithPrecision(t *testing.T) { 153 | c := qt.New(t) 154 | var e Encoder 155 | e.StartLine("x") 156 | e.SetPrecision(Second) 157 | e.AddField("f", MustNewValue(int64(1))) 158 | e.EndLine(time.Unix(0, 1615196563_299_053_942)) 159 | c.Assert(string(e.Bytes()), qt.Equals, "x f=1i 1615196563\n") 160 | 161 | e.Reset() 162 | e.SetPrecision(Microsecond) 163 | e.StartLine("x") 164 | e.AddField("f", MustNewValue(int64(1))) 165 | e.EndLine(time.Unix(0, 1615196563_299_053_942)) 166 | c.Assert(string(e.Bytes()), qt.Equals, "x f=1i 1615196563299053\n") 167 | } 168 | 169 | var encoderDataErrorTests = []struct { 170 | testName string 171 | point Point 172 | expectError string 173 | }{{ 174 | testName: "EmptyMeasurement", 175 | point: Point{ 176 | Measurement: "", 177 | Fields: []FieldKeyValue{{ 178 | Key: "f", 179 | Value: int64(1), 180 | }}, 181 | }, 182 | expectError: `invalid measurement ""`, 183 | }, { 184 | testName: "NonPrintableMeasurement", 185 | point: Point{ 186 | Measurement: "\x01", 187 | Fields: []FieldKeyValue{{ 188 | Key: "f", 189 | Value: int64(1), 190 | }}, 191 | }, 192 | expectError: `invalid measurement "\\x01"`, 193 | }, { 194 | testName: "NonUTF8Measurement", 195 | point: Point{ 196 | Measurement: "\xff", 197 | Fields: []FieldKeyValue{{ 198 | Key: "f", 199 | Value: int64(1), 200 | }}, 201 | }, 202 | expectError: `invalid measurement "\\xff"`, 203 | }, { 204 | testName: "MeasurementWithTrailingBackslash", 205 | point: Point{ 206 | Measurement: "x\\", 207 | Fields: []FieldKeyValue{{ 208 | Key: "f", 209 | Value: int64(1), 210 | }}, 211 | }, 212 | expectError: `invalid measurement "x\\\\"`, 213 | }, { 214 | testName: "InvalidTagKey", 215 | point: Point{ 216 | Measurement: "m", 217 | Tags: []TagKeyValue{{ 218 | Key: "", 219 | Value: "x", 220 | }, { 221 | Key: "b", 222 | Value: "x", 223 | }}, 224 | Fields: []FieldKeyValue{{ 225 | Key: "f", 226 | Value: int64(1), 227 | }}, 228 | }, 229 | expectError: `invalid tag key ""`, 230 | }, { 231 | testName: "InvalidTagValue", 232 | point: Point{ 233 | Measurement: "m", 234 | Tags: []TagKeyValue{{ 235 | Key: "x", 236 | Value: "", 237 | }}, 238 | Fields: []FieldKeyValue{{ 239 | Key: "f", 240 | Value: int64(1), 241 | }}, 242 | }, 243 | expectError: `invalid tag value x=""`, 244 | }, { 245 | testName: "OutOfOrderTag", 246 | point: Point{ 247 | Measurement: "m", 248 | Tags: []TagKeyValue{{ 249 | Key: "x", 250 | Value: "1", 251 | }, { 252 | Key: "a", 253 | Value: "1", 254 | }}, 255 | Fields: []FieldKeyValue{{ 256 | Key: "f", 257 | Value: int64(1), 258 | }}, 259 | }, 260 | expectError: `tag key "a" out of order \(previous key "x"\)`, 261 | }, { 262 | testName: "InvalidFieldKey", 263 | point: Point{ 264 | Measurement: "m", 265 | Fields: []FieldKeyValue{{ 266 | Key: "", 267 | Value: int64(1), 268 | }}, 269 | // Include an explicit timestamp so that we test the path 270 | // in Endline that checks lineHasError. 271 | Time: time.Unix(0, 123456), 272 | }, 273 | expectError: `invalid field key ""`, 274 | }, { 275 | testName: "TimeStampTooEarly", 276 | point: Point{ 277 | Measurement: "m", 278 | Fields: []FieldKeyValue{{ 279 | Key: "x", 280 | Value: int64(1), 281 | }}, 282 | Time: mustParseTime("1000-01-01T12:00:00Z"), 283 | }, 284 | expectError: `timestamp 1000-01-01T12:00:00Z: line-protocol value out of range`, 285 | }, { 286 | testName: "TimeStampTooLate", 287 | point: Point{ 288 | Measurement: "m", 289 | Fields: []FieldKeyValue{{ 290 | Key: "x", 291 | Value: int64(1), 292 | }}, 293 | Time: mustParseTime("8888-01-01T12:00:00Z"), 294 | }, 295 | expectError: `timestamp 8888-01-01T12:00:00Z: line-protocol value out of range`, 296 | }} 297 | 298 | func TestEncoderDataError(t *testing.T) { 299 | c := qt.New(t) 300 | for _, test := range encoderDataErrorTests { 301 | c.Run(test.testName, func(c *qt.C) { 302 | var e Encoder 303 | e.StartLine("m") 304 | e.AddField("f", MustNewValue(int64(1))) 305 | e.EndLine(time.Time{}) 306 | c.Assert(e.Err(), qt.IsNil) 307 | initialBytes := string(e.Bytes()) 308 | encodePoint(&e, test.point) 309 | c.Assert(e.Err(), qt.ErrorMatches, "encoding point 1: "+test.expectError) 310 | 311 | // Check that the original line is still intact without any of 312 | // the new one. 313 | c.Assert(string(e.Bytes()), qt.Equals, initialBytes) 314 | 315 | // Check that we can add another line OK. 316 | e.ClearErr() 317 | e.StartLine("n") 318 | e.AddField("g", MustNewValue(int64(1))) 319 | c.Assert(e.Err(), qt.IsNil) 320 | c.Assert(string(e.Bytes()), qt.Equals, "m f=1i\nn g=1i") 321 | }) 322 | } 323 | } 324 | 325 | func BenchmarkEncode(b *testing.B) { 326 | ts := time.Now() 327 | field1Val := []byte("ds;livjdsflvkfesdvljkdsnbvlkdfsjbldfsjhbdfklsjbvkdsjhbv") 328 | field2Val := []byte("12343456") 329 | benchmarks := []struct { 330 | name string 331 | encode func(b *testing.B, encoder *Encoder) 332 | }{{ 333 | name: "100-points", 334 | encode: func(b *testing.B, e *Encoder) { 335 | for j := 0; j < 100; j++ { 336 | e.StartLine("measurement") 337 | e.AddTag("tagfffffffdsffsdfvgdsfvdsfvdsfvd1", "blahblahblah") 338 | e.AddTag("uag2", "dfsvdfsvbsdfvs") 339 | e.AddTag("zzzzzzzzzz", "fdbgfdbgf") 340 | v, err := NewValueFromBytes(String, field1Val) 341 | if err != nil { 342 | b.Fatal(err) 343 | } 344 | e.AddField("f", v) 345 | v, err = NewValueFromBytes(Int, field2Val) 346 | if err != nil { 347 | b.Fatal(err) 348 | } 349 | e.AddField("f2", v) 350 | e.EndLine(ts) 351 | } 352 | }, 353 | }, { 354 | name: "1-point", 355 | encode: func(b *testing.B, e *Encoder) { 356 | e.StartLine("measurement") 357 | e.AddTag("tagfffffffdsffsdfvgdsfvdsfvdsfvd1", "blahblahblah") 358 | e.AddTag("uag2", "dfsvdfsvbsdfvs") 359 | e.AddTag("zzzzzzzzzz", "fdbgfdbgf") 360 | v, err := NewValueFromBytes(String, field1Val) 361 | if err != nil { 362 | b.Fatal(err) 363 | } 364 | e.AddField("f", v) 365 | v, err = NewValueFromBytes(Int, field2Val) 366 | if err != nil { 367 | b.Fatal(err) 368 | } 369 | e.AddField("f2", v) 370 | e.EndLine(ts) 371 | }, 372 | }} 373 | runBench := func(b *testing.B, lax bool) { 374 | name := "strict" 375 | if lax { 376 | name = "lax" 377 | } 378 | b.Run(name, func(b *testing.B) { 379 | for _, benchmark := range benchmarks { 380 | b.Run(benchmark.name, func(b *testing.B) { 381 | b.ReportAllocs() 382 | var e Encoder 383 | e.SetLax(lax) 384 | benchmark.encode(b, &e) 385 | b.SetBytes(int64(len(e.Bytes()))) 386 | e.Reset() 387 | b.ResetTimer() 388 | for i := 0; i < b.N; i++ { 389 | benchmark.encode(b, &e) 390 | } 391 | }) 392 | } 393 | }) 394 | } 395 | runBench(b, false) 396 | runBench(b, true) 397 | } 398 | 399 | func encodePoint(e *Encoder, p Point) { 400 | e.StartLine(p.Measurement) 401 | for _, tag := range p.Tags { 402 | e.AddTag(tag.Key, tag.Value) 403 | } 404 | for _, field := range p.Fields { 405 | e.AddField(field.Key, MustNewValue(field.Value)) 406 | } 407 | e.EndLine(p.Time) 408 | } 409 | 410 | func pointsHaveError(ps []Point) bool { 411 | for _, p := range ps { 412 | if p.MeasurementError != "" || p.TimeError != "" { 413 | return true 414 | } 415 | for _, tag := range p.Tags { 416 | if tag.Error != "" { 417 | return true 418 | } 419 | } 420 | for _, field := range p.Fields { 421 | if field.Error != "" { 422 | return true 423 | } 424 | } 425 | } 426 | return false 427 | } 428 | 429 | func pointWithSortedTags(p Point) Point { 430 | p.Tags = append([]TagKeyValue(nil), p.Tags...) 431 | sort.Slice(p.Tags, func(i, j int) bool { 432 | return p.Tags[i].Key < p.Tags[j].Key 433 | }) 434 | return p 435 | } 436 | 437 | func mustParseTime(ts string) time.Time { 438 | t, err := time.Parse(time.RFC3339, ts) 439 | if err != nil { 440 | panic(err) 441 | } 442 | return t 443 | } 444 | -------------------------------------------------------------------------------- /lineprotocol/escape.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | ) 7 | 8 | // escaper represents a set of characters that can be escaped. 9 | type escaper struct { 10 | // table maps from byte value to the byte used to escape that value. 11 | // If an entry is zero, it doesn't need to be escaped. 12 | table [256]byte 13 | 14 | // revTable holds the inverse of table - it maps 15 | // from escaped value to the unescaped value. 16 | revTable [256]byte 17 | 18 | // escapes holds all the characters that need to be escaped. 19 | escapes string 20 | } 21 | 22 | // newEscaper returns an escaper that escapes the 23 | // given characters. 24 | func newEscaper(escapes string) *escaper { 25 | var e escaper 26 | for _, b := range escapes { 27 | // Note that this works because the Go escaping rules 28 | // for white space are the same as line-protocol's. 29 | q := strconv.QuoteRune(b) 30 | q = q[1 : len(q)-1] // strip single quotes. 31 | q = strings.TrimPrefix(q, "\\") // remove backslash if present. 32 | e.table[byte(b)] = q[0] // use single remaining character. 33 | e.revTable[q[0]] = byte(b) 34 | } 35 | e.escapes = escapes 36 | return &e 37 | } 38 | 39 | // appendEscaped returns the escaped form of s appended to buf. 40 | func (e *escaper) appendEscaped(buf []byte, s string) []byte { 41 | newLen, startIndex := e.escapedLen(s) 42 | if newLen == len(s) { 43 | return append(buf, s...) 44 | } 45 | if cap(buf)-len(buf) < newLen { 46 | nBuf := make([]byte, len(buf), len(buf)+newLen) 47 | copy(nBuf, buf) 48 | buf = nBuf 49 | } 50 | e._escape(buf[len(buf):len(buf)+newLen], s, startIndex) 51 | return buf[:len(buf)+newLen] 52 | } 53 | 54 | // escaped returns the length that s will be after escaping 55 | // and the index of the first character in s that needs escaping. 56 | func (e *escaper) escapedLen(s string) (escLen, startIndex int) { 57 | startIndex = len(s) 58 | n := len(s) 59 | for i := 0; i < len(e.escapes); i++ { 60 | k := strings.IndexByte(s, e.escapes[i]) 61 | if k == -1 { 62 | continue 63 | } 64 | if k < startIndex { 65 | startIndex = k 66 | } 67 | n += 1 + strings.Count(s[k+1:], e.escapes[i:i+1]) 68 | } 69 | return n, startIndex 70 | } 71 | 72 | // _escape writes the escaped form of s into buf. It 73 | // assumes buf is the correct length (as determined 74 | // by escapedLen). 75 | // This method should be treated as private to escaper. 76 | func (e *escaper) _escape(buf []byte, s string, escIndex int) { 77 | copy(buf, s[:escIndex]) 78 | j := escIndex 79 | for i := escIndex; i < len(s); i++ { 80 | b := s[i] 81 | if r := e.table[b]; r != 0 { 82 | buf[j] = '\\' 83 | buf[j+1] = r 84 | j += 2 85 | } else { 86 | buf[j] = b 87 | j++ 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /lineprotocol/example_test.go: -------------------------------------------------------------------------------- 1 | package lineprotocol_test 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/influxdata/line-protocol/v2/lineprotocol" 8 | ) 9 | 10 | func ExampleDecoder() { 11 | data := []byte(` 12 | foo,tag1=val1,tag2=val2 x=1,y="hello" 1625823259000000 13 | bar enabled=true 14 | `) 15 | dec := lineprotocol.NewDecoderWithBytes(data) 16 | for dec.Next() { 17 | fmt.Printf("\nstart entry\n") 18 | m, err := dec.Measurement() 19 | if err != nil { 20 | panic(err) 21 | } 22 | fmt.Printf("measurement %s\n", m) 23 | for { 24 | key, val, err := dec.NextTag() 25 | if err != nil { 26 | panic(err) 27 | } 28 | if key == nil { 29 | break 30 | } 31 | fmt.Printf("tag %s=%s\n", key, val) 32 | } 33 | for { 34 | key, val, err := dec.NextField() 35 | if err != nil { 36 | panic(err) 37 | } 38 | if key == nil { 39 | break 40 | } 41 | fmt.Printf("field %s=%v\n", key, val) 42 | } 43 | t, err := dec.Time(lineprotocol.Microsecond, time.Time{}) 44 | if err != nil { 45 | panic(err) 46 | } 47 | if t.IsZero() { 48 | fmt.Printf("no timestamp\n") 49 | } else { 50 | fmt.Printf("timestamp %s\n", t.UTC().Format(time.RFC3339Nano)) 51 | } 52 | } 53 | // Note: because we're decoding from a slice of bytes, dec.Error can't return 54 | // an error. If we were decoding from an io.Reader, we'd need to check dec.Error 55 | // here. 56 | 57 | // Output: 58 | // 59 | // start entry 60 | // measurement foo 61 | // tag tag1=val1 62 | // tag tag2=val2 63 | // field x=1 64 | // field y="hello" 65 | // timestamp 2021-07-09T09:34:19Z 66 | // 67 | // start entry 68 | // measurement bar 69 | // field enabled=true 70 | // no timestamp 71 | } 72 | 73 | func ExampleEncoder() { 74 | var enc lineprotocol.Encoder 75 | enc.SetPrecision(lineprotocol.Microsecond) 76 | enc.StartLine("foo") 77 | enc.AddTag("tag1", "val1") 78 | enc.AddTag("tag2", "val2") 79 | enc.AddField("x", lineprotocol.MustNewValue(1.0)) 80 | enc.AddField("y", lineprotocol.MustNewValue("hello")) 81 | enc.EndLine(time.Unix(0, 1625823259000000000)) 82 | enc.StartLine("bar") 83 | enc.AddField("enabled", lineprotocol.BoolValue(true)) 84 | enc.EndLine(time.Time{}) 85 | if err := enc.Err(); err != nil { 86 | panic(fmt.Errorf("encoding error: %v", err)) 87 | } 88 | fmt.Printf("%s", enc.Bytes()) 89 | // Output: 90 | // foo,tag1=val1,tag2=val2 x=1,y="hello" 1625823259000000 91 | // bar enabled=true 92 | } 93 | -------------------------------------------------------------------------------- /lineprotocol/precision.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | // Precision specifies the scale at which a line-protocol timestamp 9 | // is encoded. 10 | type Precision byte 11 | 12 | const ( 13 | Nanosecond Precision = iota 14 | Microsecond 15 | Millisecond 16 | Second 17 | ) 18 | 19 | // asNanoseconds returns x multiplied by p.Duration. 20 | // It reports whether the multiplication succeeded without 21 | // overflow. 22 | func (p Precision) asNanoseconds(x int64) (int64, bool) { 23 | if p == Nanosecond { 24 | return x, true 25 | } 26 | d := int64(p.Duration()) 27 | // Note: because p has a limited number of values, we don't have 28 | // to worry about edge cases like x being the most negative number. 29 | if c := x * d; c/d == x { 30 | return c, true 31 | } 32 | return 0, false 33 | } 34 | 35 | // Duration returns the time duration for the given precision. 36 | // For example, Second.Duration() is time.Second. 37 | func (p Precision) Duration() time.Duration { 38 | switch p { 39 | case Nanosecond: 40 | return time.Nanosecond 41 | case Microsecond: 42 | return time.Microsecond 43 | case Millisecond: 44 | return time.Millisecond 45 | case Second: 46 | return time.Second 47 | } 48 | panic(fmt.Errorf("unknown precision %d", p)) 49 | } 50 | 51 | // String returns p as a string (ns, µs, ms or s). 52 | func (p Precision) String() string { 53 | switch p { 54 | case Nanosecond: 55 | return "ns" 56 | case Microsecond: 57 | return "µs" 58 | case Millisecond: 59 | return "ms" 60 | case Second: 61 | return "s" 62 | } 63 | panic(fmt.Errorf("unknown precision %d", p)) 64 | } 65 | -------------------------------------------------------------------------------- /lineprotocol/section_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type section"; DO NOT EDIT. 2 | 3 | package lineprotocol 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[measurementSection-0] 12 | _ = x[tagSection-1] 13 | _ = x[fieldSection-2] 14 | _ = x[timeSection-3] 15 | _ = x[newlineSection-4] 16 | _ = x[endSection-5] 17 | } 18 | 19 | const _section_name = "measurementSectiontagSectionfieldSectiontimeSectionnewlineSectionendSection" 20 | 21 | var _section_index = [...]uint8{0, 18, 28, 40, 51, 65, 75} 22 | 23 | func (i section) String() string { 24 | if i >= section(len(_section_index)-1) { 25 | return "section(" + strconv.FormatInt(int64(i), 10) + ")" 26 | } 27 | return _section_name[_section_index[i]:_section_index[i+1]] 28 | } 29 | -------------------------------------------------------------------------------- /lineprotocol/strconv.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "strconv" 7 | "unsafe" 8 | ) 9 | 10 | // parseIntBytes is a zero-alloc wrapper around strconv.ParseInt. 11 | func parseIntBytes(b []byte, base int, bitSize int) (i int64, err error) { 12 | return strconv.ParseInt(unsafeBytesToString(b), base, bitSize) 13 | } 14 | 15 | // parseUintBytes is a zero-alloc wrapper around strconv.ParseUint. 16 | func parseUintBytes(b []byte, base int, bitSize int) (i uint64, err error) { 17 | return strconv.ParseUint(unsafeBytesToString(b), base, bitSize) 18 | } 19 | 20 | // parseFloatBytes is a zero-alloc wrapper around strconv.ParseFloat. 21 | func parseFloatBytes(b []byte, bitSize int) (float64, error) { 22 | return strconv.ParseFloat(unsafeBytesToString(b), bitSize) 23 | } 24 | 25 | var errInvalidBool = fmt.Errorf("invalid boolean value") 26 | 27 | // parseBoolBytes doesn't bother wrapping strconv.ParseBool because 28 | // it's not quite the same, so simple and faster this way. 29 | func parseBoolBytes(s []byte) (byte, error) { 30 | switch string(s) { 31 | case "t", "T", "true", "True", "TRUE": 32 | return 1, nil 33 | case "f", "F", "false", "False", "FALSE": 34 | return 0, nil 35 | } 36 | return 0, errInvalidBool 37 | } 38 | 39 | // unsafeBytesToString converts a []byte to a string without a heap allocation. 40 | // 41 | // It is unsafe, and is intended to prepare input to short-lived functions 42 | // that require strings. 43 | func unsafeBytesToString(data []byte) string { 44 | dataHeader := (*reflect.SliceHeader)(unsafe.Pointer(&data)) 45 | var str string 46 | stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&str)) 47 | stringHeader.Data = dataHeader.Data 48 | stringHeader.Len = dataHeader.Len 49 | return str 50 | } 51 | -------------------------------------------------------------------------------- /lineprotocol/value.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "strconv" 9 | "unicode/utf8" 10 | ) 11 | 12 | // ErrValueOutOfRange signals that a value is out of the acceptable numeric range. 13 | var ErrValueOutOfRange = errors.New("line-protocol value out of range") 14 | 15 | // Value holds one of the possible line-protocol field values. 16 | type Value struct { 17 | // number covers: 18 | // - signed integer 19 | // - unsigned integer 20 | // - bool 21 | // - float 22 | number uint64 23 | // bytes holds the string bytes or a sentinel (see below) 24 | // when the value's not holding a string. 25 | bytes []byte 26 | } 27 | 28 | var ( 29 | intSentinel = [1]byte{'i'} 30 | uintSentinel = [1]byte{'u'} 31 | floatSentinel = [1]byte{'f'} 32 | boolSentinel = [1]byte{'b'} 33 | ) 34 | 35 | // MustNewValue is like NewValue except that it panics on failure. 36 | func MustNewValue(x interface{}) Value { 37 | v, ok := NewValue(x) 38 | if !ok { 39 | panic(fmt.Errorf("invalid value for NewValue: %T (%#v)", x, x)) 40 | } 41 | return v 42 | } 43 | 44 | // Equal reports whether v1 is equal to v2. 45 | func (v1 Value) Equal(v2 Value) bool { 46 | k := v1.Kind() 47 | if v2.Kind() != k { 48 | return false 49 | } 50 | if k != Float { 51 | return v1.number == v2.number && bytes.Equal(v1.bytes, v2.bytes) 52 | } 53 | // Floats can't be compared bitwise. 54 | return v1.FloatV() == v2.FloatV() 55 | } 56 | 57 | // NewValueFromBytes creates a value of the given kind with the 58 | // given data, as returned from Decoder.NextFieldBytes. 59 | // 60 | // If the value is out of range, errors.Is(err, ErrValueOutOfRange) will return true. 61 | // 62 | // The data for Int and Uint kinds should not include 63 | // the type suffixes present in the line-protocol field values. 64 | // For example, the data for the zero Int should be "0" not "0i". 65 | // 66 | // The data for String should not include the surrounding quotes, 67 | // should be unescaped already and should not contain invalid 68 | // utf-8. The returned value will contain a reference to data - it does not make a copy. 69 | func NewValueFromBytes(kind ValueKind, data []byte) (Value, error) { 70 | return newValueFromBytes(kind, data, true) 71 | } 72 | 73 | func newValueFromBytes(kind ValueKind, data []byte, checkUTF8 bool) (Value, error) { 74 | switch kind { 75 | case Int: 76 | x, err := parseIntBytes(data, 10, 64) 77 | if err != nil { 78 | return Value{}, maybeOutOfRange(err, "invalid integer value syntax") 79 | } 80 | return Value{ 81 | number: uint64(x), 82 | bytes: intSentinel[:], 83 | }, nil 84 | case Uint: 85 | x, err := parseUintBytes(data, 10, 64) 86 | if err != nil { 87 | return Value{}, maybeOutOfRange(err, "invalid unsigned integer value syntax") 88 | } 89 | return Value{ 90 | number: x, 91 | bytes: uintSentinel[:], 92 | }, nil 93 | case Float: 94 | x, err := parseFloatBytes(data, 64) 95 | if err != nil { 96 | return Value{}, maybeOutOfRange(err, "invalid float value syntax") 97 | } 98 | if math.IsInf(x, 0) || math.IsNaN(x) { 99 | return Value{}, fmt.Errorf("non-number %q cannot be represented as a line-protocol field value", data) 100 | } 101 | return Value{ 102 | number: math.Float64bits(x), 103 | bytes: floatSentinel[:], 104 | }, nil 105 | case Bool: 106 | x, err := parseBoolBytes(data) 107 | if err != nil { 108 | return Value{}, fmt.Errorf("invalid bool value %q", data) 109 | } 110 | return Value{ 111 | number: uint64(x), 112 | bytes: boolSentinel[:], 113 | }, nil 114 | case String: 115 | if checkUTF8 && !utf8.Valid(data) { 116 | return Value{}, fmt.Errorf("invalid utf-8 found in value %q", data) 117 | } 118 | return Value{ 119 | bytes: data, 120 | }, nil 121 | case Unknown: 122 | return Value{}, fmt.Errorf("cannot parse value %q with unknown kind", data) 123 | default: 124 | return Value{}, fmt.Errorf("unexpected value kind %d (value %q)", kind, data) 125 | } 126 | } 127 | 128 | // NewValue returns a Value containing the value of x, which must 129 | // be of type int64 (Int), uint64 (Uint), float64 (Float), bool (Bool), 130 | // string (String) or []byte (String). 131 | // 132 | // Unlike NewValueFromBytes, NewValue will make a copy of the byte 133 | // slice if x is []byte - use NewValueFromBytes if you require zero-copy 134 | // semantics. 135 | // 136 | // NewValue will fail and return false if x isn't a recognized 137 | // type or if it's a non-finite float64, or if a string or byte slice contains 138 | // invalid utf-8. 139 | func NewValue(x interface{}) (Value, bool) { 140 | switch x := x.(type) { 141 | case int64: 142 | return IntValue(x), true 143 | case uint64: 144 | return UintValue(x), true 145 | case float64: 146 | return FloatValue(x) 147 | case bool: 148 | return BoolValue(x), true 149 | case string: 150 | return StringValue(x) 151 | case []byte: 152 | return StringValueFromBytes(x) 153 | } 154 | return Value{}, false 155 | } 156 | 157 | // IntValue returns a Value containing the value of x. 158 | func IntValue(x int64) Value { 159 | return Value{ 160 | number: uint64(x), 161 | bytes: intSentinel[:], 162 | } 163 | } 164 | 165 | // UintValue returns a Value containing the value of x. 166 | func UintValue(x uint64) Value { 167 | return Value{ 168 | number: uint64(x), 169 | bytes: uintSentinel[:], 170 | } 171 | } 172 | 173 | // FloatValue returns a Value containing the value of x. 174 | // 175 | // FloatValue will fail and return false if x is non-finite. 176 | func FloatValue(x float64) (Value, bool) { 177 | if math.IsInf(x, 0) || math.IsNaN(x) { 178 | return Value{}, false 179 | } 180 | return Value{ 181 | number: math.Float64bits(x), 182 | bytes: floatSentinel[:], 183 | }, true 184 | } 185 | 186 | // BoolValue returns a Value containing the value of x. 187 | func BoolValue(x bool) Value { 188 | n := uint64(0) 189 | if x { 190 | n = 1 191 | } 192 | return Value{ 193 | number: uint64(n), 194 | bytes: boolSentinel[:], 195 | } 196 | } 197 | 198 | // StringValue returns a Value containing the value of x. 199 | // 200 | // StringValue will fail and return false if x contains invalid utf-8. 201 | func StringValue(x string) (Value, bool) { 202 | if !utf8.ValidString(x) { 203 | return Value{}, false 204 | } 205 | return Value{ 206 | bytes: []byte(x), 207 | }, true 208 | } 209 | 210 | // StringValueFromBytes returns a Value containing the value of x. 211 | // 212 | // StringValueFromBytes will fail and return false if x contains invalid utf-8. 213 | // 214 | // Unlike NewValueFromBytes, StringValueFromBytes will make a copy of the byte 215 | // slice - use NewValueFromBytes if you require zero-copy semantics. 216 | func StringValueFromBytes(x []byte) (Value, bool) { 217 | if !utf8.Valid(x) { 218 | return Value{}, false 219 | } 220 | return Value{ 221 | bytes: append([]byte(nil), x...), 222 | }, true 223 | } 224 | 225 | // IntV returns the value as an int64. It panics if v.Kind is not Int. 226 | func (v Value) IntV() int64 { 227 | v.mustBe(Int) 228 | return int64(v.number) 229 | } 230 | 231 | // UintV returns the value as a uint64. It panics if v.Kind is not Uint. 232 | func (v Value) UintV() uint64 { 233 | v.mustBe(Uint) 234 | return v.number 235 | } 236 | 237 | // FloatV returns the value as a float64. It panics if v.Kind is not Float. 238 | func (v Value) FloatV() float64 { 239 | v.mustBe(Float) 240 | return math.Float64frombits(v.number) 241 | } 242 | 243 | // StringV returns the value as a string. It panics if v.Kind is not String. 244 | func (v Value) StringV() string { 245 | v.mustBe(String) 246 | return string(v.bytes) 247 | } 248 | 249 | // BytesV returns the value as a []byte. It panics if v.Kind is not String. 250 | // Note that this may return a direct reference to the byte slice within the 251 | // value - modifying the returned byte slice may mutate the contents 252 | // of the Value. 253 | func (v Value) BytesV() []byte { 254 | v.mustBe(String) 255 | return v.bytes 256 | } 257 | 258 | // BoolV returns the value as a bool. It panics if v.Kind is not Bool. 259 | func (v Value) BoolV() bool { 260 | v.mustBe(Bool) 261 | return v.number != 0 262 | } 263 | 264 | // Interface returns the value as an interface. The returned value 265 | // will have a different dynamic type depending on the value kind; 266 | // one of int64 (Int), uint64 (Uint), float64 (Float), string (String), bool (Bool). 267 | func (v Value) Interface() interface{} { 268 | switch v.Kind() { 269 | case Int: 270 | return v.IntV() 271 | case Uint: 272 | return v.UintV() 273 | case String: 274 | return v.StringV() 275 | case Bool: 276 | return v.BoolV() 277 | case Float: 278 | return v.FloatV() 279 | default: 280 | // Shouldn't be able to happen. 281 | panic("unknown value kind") 282 | } 283 | } 284 | 285 | func (v Value) mustBe(k ValueKind) { 286 | if v.Kind() != k { 287 | panic(fmt.Errorf("value has unexpected kind; got %v want %v", v.Kind(), k)) 288 | } 289 | } 290 | 291 | func (v Value) Kind() ValueKind { 292 | if len(v.bytes) != 1 { 293 | return String 294 | } 295 | switch &v.bytes[0] { 296 | case &intSentinel[0]: 297 | return Int 298 | case &uintSentinel[0]: 299 | return Uint 300 | case &floatSentinel[0]: 301 | return Float 302 | case &boolSentinel[0]: 303 | return Bool 304 | } 305 | return String 306 | } 307 | 308 | // String returns the value as it would be encoded in a line-protocol entry. 309 | func (v Value) String() string { 310 | return string(v.AppendBytes(nil)) 311 | } 312 | 313 | // AppendTo appends the encoded value of v to buf. 314 | func (v Value) AppendBytes(dst []byte) []byte { 315 | switch v.Kind() { 316 | case Float: 317 | return strconv.AppendFloat(dst, v.FloatV(), 'g', -1, 64) 318 | case Int: 319 | dst = strconv.AppendInt(dst, v.IntV(), 10) 320 | dst = append(dst, 'i') 321 | return dst 322 | case Uint: 323 | dst = strconv.AppendUint(dst, v.UintV(), 10) 324 | dst = append(dst, 'u') 325 | return dst 326 | case Bool: 327 | if v.BoolV() { 328 | return append(dst, "true"...) 329 | } 330 | return append(dst, "false"...) 331 | case String: 332 | dst = append(dst, '"') 333 | dst = fieldStringValEscapes.appendEscaped(dst, unsafeBytesToString(v.bytes)) 334 | dst = append(dst, '"') 335 | return dst 336 | default: 337 | panic("unknown kind") 338 | } 339 | } 340 | 341 | func maybeOutOfRange(err error, s string) error { 342 | if err, ok := err.(*strconv.NumError); ok && err.Err == strconv.ErrRange { 343 | return ErrValueOutOfRange 344 | } 345 | return errors.New(s) 346 | } 347 | -------------------------------------------------------------------------------- /lineprotocol/value_test.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | 7 | qt "github.com/frankban/quicktest" 8 | ) 9 | 10 | var parseValueTests = []struct { 11 | testName string 12 | kind ValueKind 13 | data string 14 | expectError string 15 | expectInterface interface{} 16 | expectString string 17 | }{{ 18 | testName: "int", 19 | kind: Int, 20 | data: "1234", 21 | expectInterface: int64(1234), 22 | expectString: "1234i", 23 | }, { 24 | testName: "uint", 25 | kind: Uint, 26 | data: "1234", 27 | expectInterface: uint64(1234), 28 | expectString: "1234u", 29 | }, { 30 | testName: "float", 31 | kind: Float, 32 | data: "1e3", 33 | expectInterface: float64(1000), 34 | expectString: "1000", 35 | }, { 36 | testName: "bool-true", 37 | kind: Bool, 38 | data: "true", 39 | expectInterface: true, 40 | expectString: "true", 41 | }, { 42 | testName: "bool-false", 43 | kind: Bool, 44 | data: "false", 45 | expectInterface: false, 46 | expectString: "false", 47 | }, { 48 | testName: "string", 49 | kind: String, 50 | data: "hello world", 51 | expectInterface: "hello world", 52 | expectString: `"hello world"`, 53 | }, { 54 | testName: "invalid-int", 55 | kind: Int, 56 | data: "1e3", 57 | expectError: `invalid integer value syntax`, 58 | }, { 59 | testName: "invalid-uint", 60 | kind: Uint, 61 | data: "1e3", 62 | expectError: `invalid unsigned integer value syntax`, 63 | }, { 64 | testName: "invalid-float", 65 | kind: Float, 66 | data: "1e3a", 67 | expectError: `invalid float value syntax`, 68 | }, { 69 | testName: "NaN", 70 | kind: Float, 71 | data: "NaN", 72 | expectError: `non-number "NaN" cannot be represented as a line-protocol field value`, 73 | }, { 74 | testName: "-Inf", 75 | kind: Float, 76 | data: "-Inf", 77 | expectError: `non-number "-Inf" cannot be represented as a line-protocol field value`, 78 | }, { 79 | testName: "invalid-bool", 80 | kind: Bool, 81 | data: "truE", 82 | expectError: `invalid bool value "truE"`, 83 | }, { 84 | testName: "unknown-kind", 85 | kind: Unknown, 86 | data: "nope", 87 | expectError: `cannot parse value "nope" with unknown kind`, 88 | }, { 89 | testName: "invalid-kind", 90 | kind: 125, 91 | data: "nope", 92 | expectError: `unexpected value kind 125 \(value "nope"\)`, 93 | }, { 94 | testName: "out-of-range-int", 95 | kind: Int, 96 | data: "18446744073709552000", 97 | expectError: `line-protocol value out of range`, 98 | }, { 99 | testName: "out-of-range-uint", 100 | kind: Uint, 101 | data: "18446744073709552000", 102 | expectError: `line-protocol value out of range`, 103 | }, { 104 | testName: "out-of-range-float", 105 | kind: Float, 106 | data: "1e18446744073709552000", 107 | expectError: `line-protocol value out of range`, 108 | }} 109 | 110 | func TestValueCreation(t *testing.T) { 111 | c := qt.New(t) 112 | for _, test := range parseValueTests { 113 | c.Run(test.testName, func(c *qt.C) { 114 | v, err := NewValueFromBytes(test.kind, []byte(test.data)) 115 | if test.expectError != "" { 116 | c.Assert(err, qt.ErrorMatches, test.expectError) 117 | } else { 118 | c.Assert(v.Kind(), qt.Equals, test.kind) 119 | c.Assert(v.Interface(), qt.Equals, test.expectInterface) 120 | c.Assert(v.String(), qt.Equals, test.expectString) 121 | 122 | // Check that we can create the same value with NewValue 123 | v1, ok := NewValue(v.Interface()) 124 | c.Assert(ok, qt.IsTrue) 125 | c.Assert(v1.Kind(), qt.Equals, v.Kind()) 126 | c.Assert(v1, qt.DeepEquals, v) 127 | c.Assert(v1.Interface(), qt.Equals, v.Interface()) 128 | v2 := MustNewValue(v.Interface()) 129 | c.Assert(v2, qt.DeepEquals, v1) 130 | if test.kind == String { 131 | // Check we can use bytes values too. 132 | v3, ok := NewValue(v.BytesV()) 133 | c.Assert(ok, qt.IsTrue) 134 | c.Assert(v3.Kind(), qt.Equals, v.Kind()) 135 | c.Assert(v3, qt.DeepEquals, v) 136 | c.Assert(v3.Interface(), qt.Equals, v.Interface()) 137 | } 138 | } 139 | }) 140 | } 141 | } 142 | 143 | // Note: many NewValue inputs are tested in TestValueCreation above. 144 | // This test just tests values that can be represented as Go values 145 | // but not as valid Values. 146 | var newValueInvalidTests = []struct { 147 | testName string 148 | value interface{} 149 | }{{ 150 | testName: "NaN", 151 | value: math.NaN(), 152 | }, { 153 | testName: "Inf", 154 | value: math.Inf(1), 155 | }, { 156 | testName: "unknown-type", 157 | value: new(int), 158 | }} 159 | 160 | func TestNewValueInvalid(t *testing.T) { 161 | c := qt.New(t) 162 | for _, test := range newValueInvalidTests { 163 | c.Run(test.testName, func(c *qt.C) { 164 | _, ok := NewValue(test.value) 165 | c.Assert(ok, qt.IsFalse) 166 | }) 167 | } 168 | } 169 | 170 | var valueEqualTests = []struct { 171 | testName string 172 | v1, v2 Value 173 | expect bool 174 | }{{ 175 | testName: "SameString", 176 | v1: MustNewValue("hello"), 177 | v2: MustNewValue("hello"), 178 | expect: true, 179 | }, { 180 | testName: "SameInt", 181 | v1: MustNewValue(int64(12345)), 182 | v2: MustNewValue(int64(12345)), 183 | expect: true, 184 | }, { 185 | testName: "SameBool", 186 | v1: MustNewValue(true), 187 | v2: MustNewValue(true), 188 | expect: true, 189 | }, { 190 | testName: "SameFloat", 191 | v1: MustNewValue(1234.5), 192 | v2: MustNewValue(1234.5), 193 | expect: true, 194 | }, { 195 | testName: "SameUint", 196 | v1: MustNewValue(uint64(43323)), 197 | v2: MustNewValue(uint64(43323)), 198 | expect: true, 199 | }, { 200 | testName: "DifferentFloat", 201 | v1: MustNewValue(0.1), 202 | v2: MustNewValue(0.2), 203 | expect: false, 204 | }, { 205 | testName: "DifferentTypesSameBits", 206 | v1: MustNewValue("i"), 207 | v2: MustNewValue(int64(0)), 208 | }, { 209 | testName: "DifferentZeros", 210 | v1: MustNewValue(zero), 211 | v2: MustNewValue(-zero), 212 | expect: true, 213 | }} 214 | 215 | var zero = 0.0 216 | 217 | func TestValueEqual(t *testing.T) { 218 | c := qt.New(t) 219 | for _, test := range valueEqualTests { 220 | c.Run(test.testName, func(c *qt.C) { 221 | c.Assert(test.v1.Equal(test.v2), qt.Equals, test.expect) 222 | }) 223 | } 224 | } 225 | -------------------------------------------------------------------------------- /lineprotocol/valuekind.go: -------------------------------------------------------------------------------- 1 | package lineprotocol 2 | 3 | import "fmt" 4 | 5 | // ValueKind represents the type of a field value. 6 | type ValueKind uint8 7 | 8 | const ( 9 | Unknown ValueKind = iota 10 | String 11 | Int 12 | Uint 13 | Float 14 | Bool 15 | ) 16 | 17 | var kinds = []string{ 18 | Unknown: "unknown", 19 | String: "string", 20 | Int: "int", 21 | Uint: "uint", 22 | Float: "float", 23 | Bool: "bool", 24 | } 25 | 26 | // String returns k as a string. It panics if k isn't one of the 27 | // enumerated ValueKind constants. The string form is 28 | // the lower-case form of the constant. 29 | func (k ValueKind) String() string { 30 | return kinds[k] 31 | } 32 | 33 | // MarshalText implements encoding.TextMarshaler for ValueKind. 34 | // It returns an error if k is Unknown. 35 | func (k ValueKind) MarshalText() ([]byte, error) { 36 | if k == Unknown { 37 | return nil, fmt.Errorf("cannot marshal 'unknown' value type") 38 | } 39 | return []byte(k.String()), nil 40 | } 41 | 42 | // UnmarshalText implements encoding.TextUnmarshaler for ValueKind. 43 | func (k *ValueKind) UnmarshalText(data []byte) error { 44 | s := string(data) 45 | for i, kstr := range kinds { 46 | if i > 0 && kstr == s { 47 | *k = ValueKind(i) 48 | return nil 49 | } 50 | } 51 | return fmt.Errorf("unknown Value kind %q", s) 52 | } 53 | --------------------------------------------------------------------------------