├── .gitignore ├── .revive.toml ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── bench ├── encjson_test.go └── jsonlex_test.go ├── cursor.go ├── cursor_test.go ├── go.mod ├── go.sum ├── lexer.go ├── lexer_test.go ├── reader_support.go ├── reader_support_test.go └── testdata ├── 2000kB.json ├── 200kB.json ├── 20kB.json └── 2kB.json /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | _* 3 | *.out 4 | *.html 5 | *.test 6 | *.prof 7 | cmd/ 8 | bin/ 9 | makefile 10 | -------------------------------------------------------------------------------- /.revive.toml: -------------------------------------------------------------------------------- 1 | ignoreGeneratedHeader = false 2 | severity = "warning" 3 | confidence = 0.8 4 | errorCode = 0 5 | warningCode = 0 6 | 7 | [rule.file-header] 8 | arguments = ["MIT license.*"] 9 | [rule.blank-imports] 10 | [rule.context-as-argument] 11 | [rule.context-keys-type] 12 | [rule.dot-imports] 13 | [rule.empty-block] 14 | [rule.error-naming] 15 | [rule.error-return] 16 | [rule.error-strings] 17 | [rule.errorf] 18 | [rule.exported] 19 | [rule.if-return] 20 | [rule.increment-decrement] 21 | [rule.indent-error-flow] 22 | [rule.package-comments] 23 | [rule.range] 24 | [rule.receiver-naming] 25 | [rule.redefines-builtin-id] 26 | [rule.superfluous-else] 27 | [rule.time-naming] 28 | [rule.unexported-return] 29 | [rule.unreachable-code] 30 | [rule.unused-parameter] 31 | [rule.var-declaration] 32 | [rule.var-naming] 33 | 34 | [rule.argument-limit] 35 | arguments = [5] 36 | [rule.atomic] 37 | [rule.bare-return] 38 | [rule.bool-literal-in-expr] 39 | [rule.call-to-gc] 40 | [rule.confusing-naming] 41 | [rule.confusing-results] 42 | [rule.constant-logical-expr] 43 | [rule.cyclomatic] 44 | arguments = [67] # this is sick :) 45 | [rule.deep-exit] 46 | [rule.duplicated-imports] 47 | [rule.flag-parameter] 48 | [rule.function-result-limit] 49 | arguments = [4] 50 | [rule.import-shadowing] 51 | [rule.line-length-limit] 52 | arguments = [120] 53 | [rule.modifies-parameter] 54 | [rule.modifies-value-receiver] 55 | [rule.range-val-in-closure] 56 | [rule.struct-tag] 57 | [rule.unhandled-error] 58 | [rule.unnecessary-stmt] 59 | [rule.unused-receiver] 60 | [rule.waitgroup-by-value] 61 | 62 | # [rule.add-constant] 63 | # arguments = [{maxLitCount = "5",allowStrs ="\"\"",allowInts="0,1,2,3,4,5",allowFloats="0.0,0.,1.0,1.,2.0,2."}] 64 | # [rule.max-public-structs] 65 | # arguments =[5] 66 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - "1.13" 5 | - "1.14" 6 | - "1.15" 7 | - "1.16" 8 | - "1.17" 9 | 10 | env: 11 | - GO111MODULE=auto 12 | 13 | before_install: 14 | - go get github.com/mattn/goveralls 15 | 16 | script: 17 | - make .travis 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | #### v0.4.0 2 | * Added UnreadableReader interface which allows the Lexer to unread bytes if necessary. 3 | Along comes a new option LexerOptEnableUnreadBuffer for the NewLexer() function, which enables 4 | the UnreadableReader. Kudos: Gregor Noczinski 5 | 6 | #### v0.3.1 7 | * Test and documentation improvements 8 | 9 | #### v0.3.0 10 | * Lexer.Scan() is now reentrant. Depending on the outcome of the Yield callback, the Scan() function will terminate and can be invoked again. 11 | * Added a Cursor which provides iteration over tokens using Curr(), Peek(), Next() and Last() methods 12 | * Added a Filter, an optional callback for the Cursor constructor, which allows to filter the lexer output and transparently skip tokens. 13 | * API break: 14 | * Old: Yield func(token Token, load []byte, pos uint) 15 | * New: Yield func(kind TokenKind, load []byte, pos uint) bool 16 | 17 | #### v0.2.4 18 | * First milestone 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright © Daniel T. Gorski et al. 4 | 5 | Permission is hereby granted, free of charge, to any person 6 | obtaining a copy of this software and associated documentation 7 | files (the "Software"), to deal in the Software without restriction, 8 | including without limitation the rights to use, copy, modify, merge, 9 | publish, distribute, sublicense, and/or sell copies of the Software, 10 | and to permit persons to whom the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help clean test bench prof tidy sniff .travis 2 | 3 | help: # Displays this list 4 | @echo; grep "^[a-z][a-zA-Z0-9_<> -]\+:" Makefile | sed -E "s/:[^#]*?#?(.*)?/\r\t\t\1/" | sed "s/^/ make /"; echo 5 | 6 | clean: # Removes build/test artifacts 7 | @find . -type f | grep "\.out$$" | xargs -I{} rm {}; 8 | @find . -type f | grep "\.html$$" | xargs -I{} rm {}; 9 | @find . -type f | grep "\.test$$" | xargs -I{} rm {}; 10 | @find . -type f | grep "\.prof$$" | xargs -I{} rm {}; 11 | 12 | test: clean # Runs integrity test with -race 13 | CGO_ENABLED=1 go test -v -count=1 -race -covermode=atomic -coverprofile=./coverage.out . 14 | @go tool cover -html=./coverage.out -o ./coverage.html && echo "coverage: " 15 | 16 | bench: clean # Executes artificial benchmarks 17 | CGO_ENABLED=0 GOMAXPROCS=1 GOGC=off go test -run=^$$ -bench=. ./bench 18 | 19 | prof-cpu: clean # Creates CPU profiler output 20 | CGO_ENABLED=0 GOMAXPROCS=1 GOGC=off go test -cpuprofile=cpu.prof -bench=cursor.*2000kB ./bench 21 | @echo "\nCPU --------------------------------------" 22 | go tool pprof -top cpu.prof | sed "s/^/ /" 23 | 24 | prof-mem: clean # Creates memory profiler output 25 | CGO_ENABLED=0 GOMAXPROCS=1 GOGC=off go test -memprofile=mem.prof -bench=cursor.*2000kB ./bench 26 | @echo "\nMEM --------------------------------------" 27 | go tool pprof -top mem.prof | sed "s/^/ /" 28 | 29 | sniff: # Checks format and runs linter (void on success) 30 | @find . -type f -not -path "*/\.*" -name "*.go" | xargs -I{} gofmt -d {} 31 | @go vet ./... || true 32 | @>/dev/null which revive || (echo "Missing a linter, install with: go install github.com/mgechev/revive" && false) 33 | @revive -config .revive.toml ./... 34 | 35 | tidy: # Formats source files, cleans go.mod 36 | @gofmt -w . 37 | @go mod tidy 38 | 39 | .travis: # Travis CI (see .travis.yml), runs tests 40 | ifndef TRAVIS 41 | @echo "Fail: requires Travis runtime" 42 | else 43 | @$(MAKE) test --no-print-directory && \ 44 | goveralls -coverprofile=./coverage.out -service=travis-ci 45 | endif 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://app.travis-ci.com/dtgorski/jsonlex.svg?branch=master)](https://app.travis-ci.com/dtgorski/jsonlex) 2 | [![Coverage Status](https://coveralls.io/repos/github/dtgorski/jsonlex/badge.svg?branch=master)](https://coveralls.io/github/dtgorski/jsonlex?branch=master) 3 | [![Open Issues](https://img.shields.io/github/issues/dtgorski/jsonlex.svg)](https://github.com/dtgorski/jsonlex/issues) 4 | [![Report Card](https://goreportcard.com/badge/github.com/dtgorski/jsonlex)](https://goreportcard.com/report/github.com/dtgorski/jsonlex) 5 | [![PkgGoDev](https://pkg.go.dev/badge/github.com/dtgorski/jsonlex)](https://pkg.go.dev/github.com/dtgorski/jsonlex) 6 | 7 | ## jsonlex 8 | 9 | Fast JSON lexer (tokenizer) with no memory footprint and no garbage collector pressure (zero heap allocations). 10 | 11 | ### Installation 12 | ``` 13 | go get -u github.com/dtgorski/jsonlex 14 | ``` 15 | 16 | ### Important 17 | Using an ```io.Reader``` that directly uses system calls (e.g. ```os.File```) will result in poor performance. Wrap your input reader with ```bufio.Reader``` or better ```bytes.Reader``` to achieve best results. 18 | 19 | ### Usage A - iterating behaviour (Cursor) 20 | ``` 21 | package main 22 | 23 | import ( 24 | "bytes" 25 | "github.com/dtgorski/jsonlex" 26 | ) 27 | 28 | func main() { 29 | reader := bytes.NewReader( 30 | []byte(`{ "foo": "bar", "baz": 42 }`), 31 | ) 32 | 33 | cursor := jsonlex.NewCursor(reader, nil) 34 | 35 | println(cursor.Curr().String()) 36 | println(cursor.Next().String()) 37 | 38 | if !cursor.Next().Is(jsonlex.TokenEOF) { 39 | println("there is more ...") 40 | } 41 | } 42 | ``` 43 | 44 | ### Usage B - emitting behaviour (Yield) 45 | ``` 46 | package main 47 | 48 | import ( 49 | "bytes" 50 | "github.com/dtgorski/jsonlex" 51 | ) 52 | 53 | func main() { 54 | reader := bytes.NewReader( 55 | []byte(`{ "foo": "bar", "baz": 42 }`), 56 | ) 57 | 58 | lexer := jsonlex.NewLexer( 59 | func(kind jsonlex.TokenKind, load []byte, pos uint) bool { 60 | 61 | save := make([]byte, len(load)) 62 | copy(save, load) 63 | 64 | println(pos, kind, string(save)) 65 | return true 66 | }, 67 | ) 68 | 69 | lexer.Scan(reader) 70 | } 71 | ``` 72 | 73 | Please note, that the ```Scan()``` function is reentrant and subsequent invocations will continue to consume the available byte stream _as long as you provide_ a reader that implements an ```UnreadByte() error``` interface, and you [configure the Lexer with the ```LexerOptEnableUnreadBuffer``` option](https://pkg.go.dev/github.com/dtgorski/jsonlex#NewLexer) activated. 74 | 75 | ### Emitted tokens 76 | | [```jsonlex```](https://pkg.go.dev/github.com/dtgorski/jsonlex) | Representation 77 | | --- | --- 78 | |```TokenEOF``` | signals end of file/stream 79 | |```TokenERR``` | error string (other than EOF) 80 | |```TokenLIT``` | literal (```true```, ```false```, ```null```) 81 | |```TokenNUM``` | float number 82 | |```TokenSTR``` | "...\\"..." 83 | |```TokenCOL``` | : colon 84 | |```TokenCOM``` | , comma 85 | |```TokenLSB``` | [ left square bracket 86 | |```TokenRSB``` | ] right square bracket 87 | |```TokenLCB``` | { left curly brace 88 | |```TokenRCB``` | } right curly brace 89 | 90 | ### Artificial benchmarks 91 | 92 | Each benchmark consists of complete tokenization of a JSON document of a given size (2kB, 20kB, 200kB and 2000kB) using one CPU core. The unit ```doc/s``` means _tokenized documents per second_, so more is better. 93 | The comparison candidate is Go's [encoding/json.Decoder.Token()](https://golang.org/pkg/encoding/json/#Decoder.Token) implementation. 94 | 95 | | |2kB|20kB|200kb|2000kB 96 | | --- | --- | --- | --- | --- 97 | |```encoding/json```|```9910 doc/s```|```1152 doc/s```|```126 doc/s```|```14 doc/s``` 98 | |```dtgorski/jsonlex```|**```71880 doc/s```**|**```7341 doc/s```**|**```753 doc/s```**|**```85 doc/s```** 99 | 100 | ``` 101 | cpus: 1 core (~8000 BogoMIPS) 102 | goos: linux 103 | goarch: amd64 104 | pkg: github.com/dtgorski/jsonlex/bench 105 | 106 | Benchmark_encjson_2kB 9910 120475 ns/op 36528 B/op 1963 allocs/op 107 | Benchmark_encjson_20kB 1152 1040771 ns/op 318432 B/op 18231 allocs/op 108 | Benchmark_encjson_200kB 126 9494534 ns/op 2877968 B/op 164401 allocs/op 109 | Benchmark_encjson_2000kB 14 77593586 ns/op 23355856 B/op 1319126 allocs/op 110 | 111 | Benchmark_jsonlex_lexer_2kB 71880 16691 ns/op 0 B/op 0 allocs/op 112 | Benchmark_jsonlex_lexer_20kB 7341 163210 ns/op 0 B/op 0 allocs/op 113 | Benchmark_jsonlex_lexer_200kB 753 1594025 ns/op 0 B/op 0 allocs/op 114 | Benchmark_jsonlex_lexer_2000kB 85 14107866 ns/op 0 B/op 0 allocs/op 115 | 116 | Benchmark_jsonlex_cursor_2kB 38002 31776 ns/op 3680 B/op 592 allocs/op 117 | Benchmark_jsonlex_cursor_20kB 4058 300490 ns/op 25168 B/op 5446 allocs/op 118 | Benchmark_jsonlex_cursor_200kB 422 2777058 ns/op 248816 B/op 49141 allocs/op 119 | Benchmark_jsonlex_cursor_2000kB 50 23559879 ns/op 2254896 B/op 396298 allocs/op 120 | ``` 121 | 122 | ### Disclaimer 123 | The implementation and features of ```jsonlex``` follow the [YAGNI](https://en.wikipedia.org/wiki/You_aren%27t_gonna_need_it) principle. 124 | There is no claim for completeness or reliability. 125 | 126 | ### @dev 127 | Try ```make```: 128 | ``` 129 | $ make 130 | 131 | make help Displays this list 132 | make clean Removes build/test artifacts 133 | make test Runs integrity test with -race 134 | make bench Executes artificial benchmarks 135 | make prof-cpu Creates CPU profiler output 136 | make prof-mem Creates memory profiler output 137 | make sniff Checks format and runs linter (void on success) 138 | make tidy Formats source files, cleans go.mod 139 | ``` 140 | 141 | ### License 142 | [MIT](https://opensource.org/licenses/MIT) - © dtg [at] lengo [dot] org 143 | -------------------------------------------------------------------------------- /bench/encjson_test.go: -------------------------------------------------------------------------------- 1 | // MIT license · Daniel T. Gorski · dtg [at] lengo [dot] org · 10/2020 2 | 3 | package bench 4 | 5 | import ( 6 | "bytes" 7 | "encoding/json" 8 | "io" 9 | "io/ioutil" 10 | "os" 11 | "testing" 12 | ) 13 | 14 | func Benchmark_encjson_2kB(b *testing.B) { 15 | runDecoder(b, "../testdata/2kB.json") 16 | } 17 | 18 | func Benchmark_encjson_20kB(b *testing.B) { 19 | runDecoder(b, "../testdata/20kB.json") 20 | } 21 | 22 | func Benchmark_encjson_200kB(b *testing.B) { 23 | runDecoder(b, "../testdata/200kB.json") 24 | } 25 | 26 | func Benchmark_encjson_2000kB(b *testing.B) { 27 | runDecoder(b, "../testdata/2000kB.json") 28 | } 29 | 30 | func runDecoder(b *testing.B, file string) { 31 | b.ReportAllocs() 32 | 33 | f, _ := os.Open(file) 34 | defer func() { _ = f.Close() }() 35 | buf, _ := ioutil.ReadAll(f) 36 | 37 | b.ResetTimer() 38 | for n := 0; n < b.N; n++ { 39 | d := json.NewDecoder(bytes.NewReader(buf)) 40 | for { 41 | _, err := d.Token() 42 | if err == io.EOF { 43 | break 44 | } 45 | if err != nil { 46 | b.Error(err) 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /bench/jsonlex_test.go: -------------------------------------------------------------------------------- 1 | // MIT license · Daniel T. Gorski · dtg [at] lengo [dot] org · 10/2020 2 | 3 | package bench 4 | 5 | import ( 6 | "io" 7 | "io/ioutil" 8 | "os" 9 | "testing" 10 | 11 | . "github.com/dtgorski/jsonlex" 12 | ) 13 | 14 | func Benchmark_jsonlex_lexer_2kB(b *testing.B) { 15 | runLexer(b, "../testdata/2kB.json") 16 | } 17 | 18 | func Benchmark_jsonlex_lexer_20kB(b *testing.B) { 19 | runLexer(b, "../testdata/20kB.json") 20 | } 21 | 22 | func Benchmark_jsonlex_lexer_200kB(b *testing.B) { 23 | runLexer(b, "../testdata/200kB.json") 24 | } 25 | 26 | func Benchmark_jsonlex_lexer_2000kB(b *testing.B) { 27 | runLexer(b, "../testdata/2000kB.json") 28 | } 29 | 30 | func Benchmark_jsonlex_cursor_2kB(b *testing.B) { 31 | runCursor(b, "../testdata/2kB.json") 32 | } 33 | 34 | func Benchmark_jsonlex_cursor_20kB(b *testing.B) { 35 | runCursor(b, "../testdata/20kB.json") 36 | } 37 | 38 | func Benchmark_jsonlex_cursor_200kB(b *testing.B) { 39 | runCursor(b, "../testdata/200kB.json") 40 | } 41 | 42 | func Benchmark_jsonlex_cursor_2000kB(b *testing.B) { 43 | runCursor(b, "../testdata/2000kB.json") 44 | } 45 | 46 | func runLexer(b *testing.B, file string) { 47 | b.ReportAllocs() 48 | 49 | f, _ := os.Open(file) 50 | defer func() { _ = f.Close() }() 51 | buf, _ := ioutil.ReadAll(f) 52 | 53 | r := newReader(buf) 54 | 55 | lexer := NewLexer( 56 | func(kind TokenKind, load []byte, pos uint) bool { 57 | if kind == TokenERR { 58 | b.Fatal(kind) 59 | } 60 | return true 61 | }, 62 | ) 63 | 64 | b.ResetTimer() 65 | for n := 0; n < b.N; n++ { 66 | r.Reset() 67 | lexer.Scan(r) 68 | } 69 | } 70 | 71 | func runCursor(b *testing.B, file string) { 72 | b.ReportAllocs() 73 | 74 | f, _ := os.Open(file) 75 | defer func() { _ = f.Close() }() 76 | buf, _ := ioutil.ReadAll(f) 77 | 78 | b.ResetTimer() 79 | for n := 0; n < b.N; n++ { 80 | r := newReader(buf) 81 | cursor := NewCursor(r, nil) 82 | 83 | for ; ; cursor.Next() { 84 | if cursor.Curr().Is(TokenERR) { 85 | b.Errorf("%s", cursor.Curr().Load) 86 | break 87 | } 88 | if cursor.Curr().Is(TokenEOF) { 89 | break 90 | } 91 | } 92 | } 93 | } 94 | 95 | type ( 96 | reader struct { 97 | buf []byte 98 | pos int 99 | len int 100 | } 101 | ) 102 | 103 | func newReader(b []byte) *reader { 104 | return &reader{buf: b, len: len(b)} 105 | } 106 | 107 | func (r *reader) Read(b []byte) (n int, err error) { 108 | if r.pos == r.len { 109 | return 0, io.EOF 110 | } 111 | b[0] = r.buf[r.pos] 112 | r.pos++ 113 | return 1, nil 114 | } 115 | 116 | func (r *reader) Reset() { 117 | r.pos = 0 118 | } 119 | -------------------------------------------------------------------------------- /cursor.go: -------------------------------------------------------------------------------- 1 | // MIT license · Daniel T. Gorski · dtg [at] lengo [dot] org · 10/2020 2 | 3 | package jsonlex 4 | 5 | import ( 6 | "io" 7 | ) 8 | 9 | type ( 10 | // Cursor allows traversing the token stream. 11 | Cursor struct { 12 | reader io.Reader 13 | filter Filter 14 | lexer *Lexer 15 | lastTok Token 16 | currTok Token 17 | nextTok Token 18 | } 19 | 20 | // Token is a container for token information. 21 | Token struct { 22 | Kind TokenKind 23 | Load []byte 24 | Pos uint 25 | } 26 | 27 | // TokenKind denotes the type of token. 28 | TokenKind uint8 29 | 30 | // Filter is a callback function. It will be invoked when 31 | // the cursor is advanced. The callback must return whether 32 | // the token is accepted (true) or should dropped (false). 33 | // After a token is dropped, the scan for a next token continues. 34 | Filter func(kind TokenKind, load []byte) bool 35 | ) 36 | 37 | // NewCursor creates and prepares a Cursor. 38 | func NewCursor(r io.Reader, f Filter) *Cursor { 39 | c := &Cursor{ 40 | reader: r, 41 | filter: f, 42 | } 43 | 44 | yield := func(kind TokenKind, load []byte, pos uint) bool { 45 | if c.currTok.Is(TokenERR) { 46 | return false 47 | } 48 | if c.filter != nil && !c.filter(kind, load) { 49 | return true 50 | } 51 | 52 | val := make([]byte, len(load)) 53 | copy(val, load) 54 | 55 | c.lastTok = c.currTok 56 | c.currTok = c.nextTok 57 | c.nextTok = Token{kind, val, pos} 58 | 59 | return false 60 | } 61 | 62 | c.lexer = NewLexer(yield) 63 | c.Next() 64 | c.Next() 65 | 66 | return c 67 | } 68 | 69 | // Last returns the previous Token in stream. 70 | // The underlying scanner position is not modified. 71 | func (c *Cursor) Last() Token { 72 | return c.lastTok 73 | } 74 | 75 | // Curr function returns the current Token in stream. 76 | // The underlying scanner position is not modified. 77 | func (c *Cursor) Curr() Token { 78 | return c.currTok 79 | } 80 | 81 | // Peek returns the next Token in stream. 82 | // The underlying scanner position is not modified. 83 | func (c *Cursor) Peek() Token { 84 | return c.nextTok 85 | } 86 | 87 | // Next returns the next Token in stream. In contrast to 88 | // the other methods, the underlying scanner position is 89 | // modified. 90 | func (c *Cursor) Next() Token { 91 | c.lexer.Scan(c.reader) 92 | return c.currTok 93 | } 94 | 95 | // Is is a convenience function. 96 | func (t Token) Is(kind TokenKind) bool { 97 | return t.Kind == kind 98 | } 99 | 100 | func (t Token) String() string { 101 | return string(t.Load) 102 | } 103 | 104 | // Kinds of tokens emitted by the lexer. 105 | const ( 106 | TokenEOF TokenKind = iota // signals end of file/stream 107 | TokenERR // error string (other than EOF) 108 | TokenLIT // literal (true, false, null) 109 | TokenNUM // float number 110 | TokenSTR // "...\"..." 111 | TokenCOL // : colon 112 | TokenCOM // , comma 113 | TokenLSB // [ left square bracket 114 | TokenRSB // ] right square bracket 115 | TokenLCB // { left curly brace 116 | TokenRCB // } right curly brace 117 | 118 | scanning 119 | ) 120 | 121 | // Is is a convenience function. 122 | func (k TokenKind) Is(kind TokenKind) bool { 123 | return k == kind 124 | } 125 | -------------------------------------------------------------------------------- /cursor_test.go: -------------------------------------------------------------------------------- 1 | // MIT license · Daniel T. Gorski · dtg [at] lengo [dot] org · 10/2020 2 | 3 | package jsonlex 4 | 5 | import ( 6 | "bytes" 7 | "io" 8 | "testing" 9 | ) 10 | 11 | func TestCursor_1(t *testing.T) { 12 | s := `{ "foo": -1 }` 13 | r := bytes.NewReader([]byte(s)) 14 | c := NewCursor(r, nil) 15 | 16 | if n := c.Curr(); !n.Is(TokenLCB) { 17 | t.Errorf("unexpected") 18 | } 19 | if n := c.Peek(); !n.Is(TokenSTR) { 20 | t.Errorf("unexpected") 21 | } 22 | if n := c.Next(); !n.Is(TokenSTR) { 23 | t.Errorf("unexpected") 24 | } 25 | if n := c.Next(); !n.Is(TokenCOL) { 26 | t.Errorf("unexpected") 27 | } 28 | if n := c.Next(); !n.Is(TokenNUM) { 29 | t.Errorf("unexpected") 30 | } 31 | if n := c.Next(); !n.Is(TokenRCB) { 32 | t.Errorf("unexpected") 33 | } 34 | if n := c.Last(); !n.Is(TokenNUM) { 35 | t.Errorf("unexpected") 36 | } 37 | if n := c.Next(); !n.Is(TokenEOF) { 38 | t.Errorf("unexpected") 39 | } 40 | if n := c.Next(); !n.Is(TokenEOF) { 41 | t.Errorf("unexpected") 42 | } 43 | } 44 | 45 | func TestCursor_2(t *testing.T) { 46 | s := `{ "foo": -1 }` 47 | r := bytes.NewReader([]byte(s)) 48 | f := func(k TokenKind, l []byte) bool { 49 | return !k.Is(TokenLCB) && !k.Is(TokenRCB) && !k.Is(TokenCOL) 50 | } 51 | c := NewCursor(r, f) 52 | 53 | if n := c.Curr(); !n.Is(TokenSTR) { 54 | t.Errorf("unexpected") 55 | } 56 | if n := c.Next(); !n.Is(TokenNUM) { 57 | t.Errorf("unexpected") 58 | } 59 | if n := c.Next(); !n.Is(TokenEOF) { 60 | t.Errorf("unexpected") 61 | } 62 | } 63 | 64 | func TestCursor_3(t *testing.T) { 65 | r := &FaultyReader{} 66 | c := NewCursor(r, nil) 67 | 68 | if n := c.Next(); !n.Is(TokenERR) { 69 | t.Errorf("unexpected") 70 | } 71 | if c.Next().String() != io.ErrUnexpectedEOF.Error() { 72 | t.Errorf("unexpected") 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dtgorski/jsonlex 2 | 3 | go 1.14 4 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dtgorski/jsonlex/11d151ebbef040f91442c5aa677e6a0afce981b1/go.sum -------------------------------------------------------------------------------- /lexer.go: -------------------------------------------------------------------------------- 1 | // MIT license · Daniel T. Gorski · dtg [at] lengo [dot] org · 10/2020 2 | // Gregor Noczinski · gregor [at] noczinski [dot] eu · 12/2021 3 | 4 | package jsonlex 5 | 6 | import ( 7 | "errors" 8 | "fmt" 9 | "io" 10 | ) 11 | 12 | type ( 13 | // Lexer splits JSON byte stream into tokens. 14 | Lexer struct { 15 | yield Yield // callback function 16 | area []byte // pre-allocated space 17 | buff [1]byte // read-in buffer 18 | bpos uint // byte position in stream 19 | tpos uint // token position in stream 20 | hold bool // whether to advance reader 21 | frac bool // number fraction mode 22 | expo bool // number exponent mode 23 | sign bool // exponent sign 24 | esc bool // string escaping mode 25 | burd bool // is true if buffer was unread 26 | burde bool // unread feature (if supported) enabled 27 | 28 | } 29 | 30 | // Yield is a callback function. It will be invoked 31 | // by the Scan() function for each token to be found. 32 | // The callback must return whether the scan process 33 | // should continue (true) or stop (false). 34 | Yield func(kind TokenKind, load []byte, pos uint) bool 35 | ) 36 | 37 | // NewLexer takes a callback (yield) function as parameter. 38 | // This yield function will be invoked for each token 39 | // consumed from the byte stream by Scan(). 40 | func NewLexer(yield Yield, opts ...lexerOpt) *Lexer { 41 | l := &Lexer{ 42 | yield: yield, 43 | area: make([]byte, 0, 1024), 44 | buff: [1]byte{0}, 45 | } 46 | for _, opt := range opts { 47 | opt(l) 48 | } 49 | return l 50 | } 51 | 52 | type lexerOpt func(*Lexer) 53 | 54 | var ( 55 | // LexerOptEnableUnreadBuffer enables if the given io.Reader 56 | // does implement UnreadableReader; it's UnreadableReader.UnreadByte 57 | // will be called if the Lexer reads one byte more to ensure that 58 | // a literal or number was ended. This ensures this Lexer never reads 59 | // more bytes than it is currently processing. 60 | LexerOptEnableUnreadBuffer lexerOpt = func(l *Lexer) { 61 | l.burde = true 62 | } 63 | ) 64 | 65 | // Scan reads and tokenizes the byte stream. 66 | // The yield function is invoked for each token found. 67 | // 68 | // The Scan() function terminates in following cases: 69 | // a) when the yield function return false 70 | // b) after emitting a jsonlex.TokenEOF or jsonlex.TokenERR 71 | // 72 | // Important: The Scan() function is reentrant, subsequent invocations will 73 | // continue to consume the available byte stream as long as you provide 74 | // a reader that implements an UnreadByte() interface, and you configure 75 | // the Lexer with the LexerOptEnableUnreadBuffer option activated. 76 | func (l *Lexer) Scan(r io.Reader) { 77 | var ( 78 | b byte // byte under scrutiny 79 | n int // number of bytes read 80 | t TokenKind // current token or state 81 | err error // ordinary error holder 82 | ) 83 | 84 | nextToken: 85 | l.esc, l.frac = false, false 86 | l.expo, l.sign = false, false 87 | load := l.area[:0] 88 | t = scanning 89 | 90 | if l.burd { 91 | if _, ok := r.(UnreadableReader); ok { 92 | expb := l.buff[0] 93 | n, err = r.Read(l.buff[:]) 94 | if n == 0 || err == io.EOF { 95 | err = io.ErrUnexpectedEOF 96 | goto emitErrToken 97 | } 98 | if err != nil { 99 | goto emitErrToken 100 | } 101 | if expb != l.buff[0] { 102 | err = errUnexpectedByte 103 | } 104 | l.burd = false 105 | } 106 | } 107 | 108 | nextByte: 109 | if l.hold { 110 | l.hold = false 111 | } else { 112 | n, err = r.Read(l.buff[:]) 113 | l.burd = false 114 | l.bpos += uint(n) 115 | } 116 | 117 | if err != nil { 118 | if err == io.EOF && len(load) > 0 && t.Is(TokenNUM) { 119 | goto emitNumToken 120 | } 121 | if err == io.EOF && len(load) > 0 && t.Is(TokenLIT) { 122 | goto emitLitToken 123 | } 124 | if err == io.EOF && len(load) > 0 { 125 | goto emitToken 126 | } 127 | if err == io.EOF { 128 | l.tpos = l.bpos 129 | goto emitEofToken 130 | } 131 | goto emitErrToken 132 | } 133 | 134 | if b = l.buff[0]; t != scanning { 135 | if t.Is(TokenSTR) { 136 | goto scanStr 137 | } 138 | if t.Is(TokenNUM) { 139 | goto scanNum 140 | } 141 | if t.Is(TokenLIT) { 142 | goto scanLit 143 | } 144 | goto emitTokenHold 145 | } 146 | 147 | if b == 0x20 || b == '\n' || b == '\r' || b == '\t' { 148 | goto nextByte 149 | } 150 | if b > 0x7F || b < 0x20 { 151 | goto emitUnexpErrToken 152 | } 153 | 154 | l.tpos = l.bpos - 1 155 | if s := states[b]; s != 0 { 156 | t = s 157 | if b == '"' { 158 | goto nextByte 159 | } 160 | goto consume 161 | } 162 | 163 | emitUnexpErrToken: 164 | if m := fmt.Sprintf("unexpected %q (0x%X)", b, b); true { 165 | l.yield(TokenERR, []byte(m), l.tpos) 166 | } 167 | return 168 | 169 | emitErrToken: 170 | l.yield(TokenERR, []byte(err.Error()), l.tpos) 171 | return 172 | 173 | emitEofToken: 174 | l.yield(TokenEOF, nil, l.tpos) 175 | return 176 | 177 | emitNumToken: 178 | if b := load[len(load)-1]; b == '.' || b == '-' || 179 | b == 'e' || b == 'E' { 180 | goto emitUnexpErrToken 181 | } 182 | if len(load) >= 3 { 183 | if s := string(load[:3]); s == "-.e" || s == "-.E" { 184 | goto emitUnexpErrToken 185 | } 186 | } 187 | goto emitTokenHold 188 | 189 | emitLitToken: 190 | if s := string(load); true { 191 | if s != "null" && s != "true" && s != "false" { 192 | goto emitUnexpErrToken 193 | } 194 | } 195 | 196 | emitTokenHold: 197 | if err == nil { 198 | l.hold = true 199 | } 200 | 201 | emitToken: 202 | if l.yield(t, load, l.tpos) { 203 | goto nextToken 204 | } 205 | return 206 | 207 | scanStr: 208 | if l.esc { 209 | l.esc = false 210 | goto consume 211 | } else if b == '\\' { 212 | l.esc = true 213 | } 214 | if !l.esc && b == '"' { 215 | goto emitToken 216 | } 217 | goto consume 218 | 219 | scanNum: 220 | if b >= '0' && b <= '9' { 221 | l.sign = false 222 | goto consume 223 | } 224 | if !l.frac && b == '.' { 225 | l.frac = true 226 | goto consume 227 | } 228 | if !l.expo && (b == 'e' || b == 'E') { 229 | l.frac, l.expo, l.sign = true, true, true 230 | goto consume 231 | } 232 | if l.sign && (b == '+' || b == '-') { 233 | l.sign = false 234 | goto consume 235 | } 236 | 237 | if l.burde { 238 | if ur, ok := r.(UnreadableReader); ok { 239 | if err = ur.UnreadByte(); err != nil { 240 | goto emitErrToken 241 | } 242 | l.burd = true 243 | } 244 | } 245 | 246 | goto emitNumToken 247 | 248 | scanLit: 249 | if b >= 'a' && b <= 'z' { 250 | goto consume 251 | } 252 | 253 | if l.burde { 254 | if ur, ok := r.(UnreadableReader); ok { 255 | if err = ur.UnreadByte(); err != nil { 256 | goto emitErrToken 257 | } 258 | l.burd = true 259 | } 260 | } 261 | 262 | goto emitLitToken 263 | 264 | consume: 265 | load = append(load, b) 266 | switch t { 267 | case TokenLSB, TokenRSB, 268 | TokenLCB, TokenRCB, 269 | TokenCOL, TokenCOM: 270 | goto emitToken 271 | } 272 | goto nextByte 273 | } 274 | 275 | var states = [0x80]TokenKind{ 276 | ' ': 0, // 0x20 space 277 | '!': 0, // 0x21 exclamation mark 278 | '"': TokenSTR, // 0x22 quotation mark 279 | '#': 0, // 0x23 number sign 280 | '$': 0, // 0x24 dollar sign 281 | '%': 0, // 0x25 percent sign 282 | '&': 0, // 0x26 ampersand 283 | '\'': 0, // 0x27 apostrophe 284 | '(': 0, // 0x28 left parenthesis 285 | ')': 0, // 0x29 right parenthesis 286 | '*': 0, // 0x2A asterisk 287 | '+': 0, // 0x2B plus sign 288 | ',': TokenCOM, // 0x2C comma 289 | '-': TokenNUM, // 0x2D minus sign 290 | '.': 0, // 0x2E full stop 291 | '/': 0, // 0x2F forward slash 292 | '0': TokenNUM, // 0x30 digit 293 | '1': TokenNUM, // 0x31 digit 294 | '2': TokenNUM, // 0x32 digit 295 | '3': TokenNUM, // 0x33 digit 296 | '4': TokenNUM, // 0x34 digit 297 | '5': TokenNUM, // 0x35 digit 298 | '6': TokenNUM, // 0x36 digit 299 | '7': TokenNUM, // 0x37 digit 300 | '8': TokenNUM, // 0x38 digit 301 | '9': TokenNUM, // 0x39 digit 302 | ':': TokenCOL, // 0x3A colon 303 | ';': 0, // 0x3B semicolon 304 | '<': 0, // 0x3C less-than sign 305 | '=': 0, // 0x3D equals sign 306 | '>': 0, // 0x3E greater-than sign 307 | '?': 0, // 0x3F question mark 308 | '@': 0, // 0x40 commercial at 309 | 'A': 0, // 0x41 capital letter 310 | 'B': 0, // 0x42 capital letter 311 | 'C': 0, // 0x43 capital letter 312 | 'D': 0, // 0x44 capital letter 313 | 'E': 0, // 0x45 capital letter 314 | 'F': 0, // 0x46 capital letter 315 | 'G': 0, // 0x47 capital letter 316 | 'H': 0, // 0x48 capital letter 317 | 'I': 0, // 0x49 capital letter 318 | 'J': 0, // 0x4A capital letter 319 | 'K': 0, // 0x4B capital letter 320 | 'L': 0, // 0x4C capital letter 321 | 'M': 0, // 0x4D capital letter 322 | 'N': 0, // 0x4E capital letter 323 | 'O': 0, // 0x4F capital letter 324 | 'P': 0, // 0x50 capital letter 325 | 'Q': 0, // 0x51 capital letter 326 | 'R': 0, // 0x52 capital letter 327 | 'S': 0, // 0x53 capital letter 328 | 'T': 0, // 0x54 capital letter 329 | 'U': 0, // 0x55 capital letter 330 | 'V': 0, // 0x56 capital letter 331 | 'W': 0, // 0x57 capital letter 332 | 'X': 0, // 0x58 capital letter 333 | 'Y': 0, // 0x59 capital letter 334 | 'Z': 0, // 0x5A capital letter 335 | '[': TokenLSB, // 0x5B left square bracket 336 | '\\': 0, // 0x5C reverse slash 337 | ']': TokenRSB, // 0x5D right square bracket 338 | '^': 0, // 0x5E circumflex accent 339 | '_': 0, // 0x5F low line 340 | '`': 0, // 0x60 grave accent 341 | 'a': 0, // 0x61 small letter 342 | 'b': 0, // 0x62 small letter 343 | 'c': 0, // 0x63 small letter 344 | 'd': 0, // 0x64 small letter 345 | 'e': 0, // 0x65 small letter 346 | 'f': TokenLIT, // 0x66 small letter 347 | 'g': 0, // 0x67 small letter 348 | 'h': 0, // 0x68 small letter 349 | 'i': 0, // 0x69 small letter 350 | 'j': 0, // 0x6A small letter 351 | 'k': 0, // 0x6B small letter 352 | 'l': 0, // 0x6C small letter 353 | 'm': 0, // 0x6D small letter 354 | 'n': TokenLIT, // 0x6E small letter 355 | 'o': 0, // 0x6F small letter 356 | 'p': 0, // 0x70 small letter 357 | 'q': 0, // 0x71 small letter 358 | 'r': 0, // 0x72 small letter 359 | 's': 0, // 0x73 small letter 360 | 't': TokenLIT, // 0x74 small letter 361 | 'u': 0, // 0x75 small letter 362 | 'v': 0, // 0x76 small letter 363 | 'w': 0, // 0x77 small letter 364 | 'x': 0, // 0x78 small letter 365 | 'y': 0, // 0x79 small letter 366 | 'z': 0, // 0x7A small letter 367 | '{': TokenLCB, // 0x7B left curly brace 368 | '|': 0, // 0x7C vertical line 369 | '}': TokenRCB, // 0x7D right curly brace 370 | '~': 0, // 0x7E tilde 371 | 0x7F: 0, // 0x7F unexpected character 372 | } 373 | 374 | // UnreadableReader provides the UnreadByte 375 | type UnreadableReader interface { 376 | io.Reader 377 | 378 | // UnreadByte unreads the last read byte by this reader. 379 | UnreadByte() error 380 | } 381 | 382 | var errUnexpectedByte = errors.New("unexpected byte") 383 | -------------------------------------------------------------------------------- /lexer_test.go: -------------------------------------------------------------------------------- 1 | // MIT license · Daniel T. Gorski · dtg [at] lengo [dot] org · 10/2020 2 | // Gregor Noczinski · gregor [at] noczinski [dot] eu · 12/2021 3 | 4 | package jsonlex 5 | 6 | import ( 7 | "bytes" 8 | "io" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | // expect EOF 14 | func TestLexer_Scan_1(t *testing.T) { 15 | s := `` 16 | i := 0 17 | y := func(kind TokenKind, load []byte, pos uint) bool { 18 | i++ 19 | if !kind.Is(TokenEOF) { 20 | t.Errorf("unexpected %q", load) 21 | } 22 | return true 23 | } 24 | l := NewLexer(y) 25 | r := bytes.NewReader([]byte(s)) 26 | l.Scan(r) 27 | 28 | if i != 1 { 29 | t.Error("unexpected") 30 | } 31 | } 32 | 33 | // expect error, unexpected input 34 | func TestLexer_Scan_2(t *testing.T) { 35 | s := ` * ` 36 | i := 0 37 | y := func(kind TokenKind, load []byte, pos uint) bool { 38 | i++ 39 | if !kind.Is(TokenERR) { 40 | t.Errorf("unexpected %q", load) 41 | } 42 | return true 43 | } 44 | l := NewLexer(y) 45 | r := bytes.NewReader([]byte(s)) 46 | l.Scan(r) 47 | 48 | if i != 1 { 49 | t.Error("unexpected") 50 | } 51 | } 52 | 53 | // expect standard functionality 54 | func TestLexer_Scan_3(t *testing.T) { 55 | s := ` { "foo": "bar", "b\"az": [ null, true, false, -42, "false" ] } ` 56 | 57 | e := []struct { 58 | kind TokenKind 59 | load []byte 60 | }{ 61 | {kind: TokenLCB, load: []byte(`{`)}, 62 | {kind: TokenSTR, load: []byte(`foo`)}, 63 | {kind: TokenCOL, load: []byte(`:`)}, 64 | {kind: TokenSTR, load: []byte(`bar`)}, 65 | {kind: TokenCOM, load: []byte(`,`)}, 66 | {kind: TokenSTR, load: []byte(`b\"az`)}, 67 | {kind: TokenCOL, load: []byte(`:`)}, 68 | {kind: TokenLSB, load: []byte(`[`)}, 69 | {kind: TokenLIT, load: []byte(`null`)}, 70 | {kind: TokenCOM, load: []byte(`,`)}, 71 | {kind: TokenLIT, load: []byte(`true`)}, 72 | {kind: TokenCOM, load: []byte(`,`)}, 73 | {kind: TokenLIT, load: []byte(`false`)}, 74 | {kind: TokenCOM, load: []byte(`,`)}, 75 | {kind: TokenNUM, load: []byte(`-42`)}, 76 | {kind: TokenCOM, load: []byte(`,`)}, 77 | {kind: TokenSTR, load: []byte(`false`)}, 78 | {kind: TokenRSB, load: []byte(`]`)}, 79 | {kind: TokenRCB, load: []byte(`}`)}, 80 | {kind: TokenEOF, load: nil}, 81 | } 82 | 83 | i := 0 84 | y := func(kind TokenKind, load []byte, pos uint) bool { 85 | if !e[i].kind.Is(kind) { 86 | t.Errorf("unexpected %q", kind) 87 | } 88 | if !bytes.Equal(e[i].load, load) { 89 | t.Errorf("unexpected %q", load) 90 | } 91 | i++ 92 | return true 93 | } 94 | l := NewLexer(y) 95 | r := bytes.NewReader([]byte(s)) 96 | l.Scan(r) 97 | } 98 | 99 | // expect no errors while tokenizing floats and other valid literals 100 | func TestLexer_Scan_4(t *testing.T) { 101 | s := []string{ 102 | "-0", 103 | "-1", 104 | "0.1e-20", 105 | "1.e+5", 106 | "1.0", 107 | "1e+1", 108 | "-.0E+0", 109 | "1E-0", 110 | "1E-1", 111 | ":", 112 | "true", 113 | "false", 114 | "null", 115 | } 116 | 117 | i := 0 118 | y := func(kind TokenKind, load []byte, pos uint) bool { 119 | i++ 120 | if kind.Is(TokenERR) { 121 | t.Errorf("unexpected %q", load) 122 | return false 123 | } 124 | return true 125 | } 126 | for _, v := range s { 127 | i = 0 128 | l := NewLexer(y) 129 | r := bytes.NewReader([]byte(v)) 130 | l.Scan(r) 131 | if i != 2 { 132 | t.Error("unexpected") 133 | } 134 | } 135 | } 136 | 137 | // expect errors while tokenizing broken floats 138 | func TestLexer_Scan_5(t *testing.T) { 139 | s := []string{ 140 | "-", 141 | "--", 142 | "+1", 143 | ".", 144 | "-0.", 145 | "-E", 146 | "-e", 147 | ".E", 148 | ".e", 149 | "-.E", 150 | "-.e", 151 | "1e", 152 | "-.e0", 153 | ".e0", 154 | "1E-+0", 155 | "1e.", 156 | } 157 | 158 | i := 0 159 | y := func(kind TokenKind, load []byte, pos uint) bool { 160 | i++ 161 | if !kind.Is(TokenERR) { 162 | t.Errorf("unexpected %q %q", kind, load) 163 | } 164 | return true 165 | } 166 | for _, v := range s { 167 | i = 0 168 | l := NewLexer(y) 169 | r := bytes.NewReader([]byte(v)) 170 | l.Scan(r) 171 | if i != 1 { 172 | t.Error("unexpected") 173 | } 174 | } 175 | for _, v := range s { 176 | i = 0 177 | l := NewLexer(y) 178 | v += " " // ws after token 179 | r := bytes.NewReader([]byte(v)) 180 | l.Scan(r) 181 | if i != 1 { 182 | t.Error("unexpected") 183 | } 184 | } 185 | } 186 | 187 | // expect error when byte stream contains illegal values 188 | func TestLexer_Scan_6(t *testing.T) { 189 | s := []byte{0x05, 0x7F, 0x80} 190 | 191 | i := 0 192 | y := func(kind TokenKind, load []byte, pos uint) bool { 193 | i++ 194 | if !kind.Is(TokenERR) { 195 | t.Errorf("unexpected %q", load) 196 | } 197 | return true 198 | } 199 | for _, v := range s { 200 | i = 0 201 | l := NewLexer(y) 202 | r := bytes.NewReader([]byte{v}) 203 | l.Scan(r) 204 | if i != 1 { 205 | t.Error("unexpected") 206 | } 207 | } 208 | } 209 | 210 | // expect error when malformed tokens found 211 | func TestLexer_Scan_7(t *testing.T) { 212 | s := `frue nalse tull` 213 | 214 | i := 0 215 | y := func(kind TokenKind, load []byte, pos uint) bool { 216 | i++ 217 | if !kind.Is(TokenERR) { 218 | t.Errorf("unexpected %d %q", kind, load) 219 | } 220 | return true 221 | } 222 | for _, v := range strings.Split(s, " ") { 223 | i = 0 224 | l := NewLexer(y) 225 | r := bytes.NewReader([]byte(v)) 226 | l.Scan(r) 227 | if i != 1 { 228 | t.Error("unexpected") 229 | } 230 | } 231 | for _, v := range strings.Split(s, " ") { 232 | i = 0 233 | l := NewLexer(y) 234 | v += " " // ws after token 235 | r := bytes.NewReader([]byte(v)) 236 | l.Scan(r) 237 | if i != 1 { 238 | t.Error("unexpected") 239 | } 240 | } 241 | } 242 | 243 | // re-entrance 244 | func TestLexer_Scan_8(t *testing.T) { 245 | s := ` { } ` 246 | i := 0 247 | y := func(kind TokenKind, load []byte, pos uint) bool { 248 | i++ 249 | if i == 1 && !kind.Is(TokenLCB) { 250 | t.Errorf("unexpected %q", load) 251 | } 252 | if i == 2 && !kind.Is(TokenRCB) { 253 | t.Errorf("unexpected %q", load) 254 | } 255 | if i == 3 && !kind.Is(TokenEOF) { 256 | t.Errorf("unexpected %q", load) 257 | } 258 | if i == 4 && !kind.Is(TokenEOF) { 259 | t.Errorf("unexpected %q", load) 260 | } 261 | return false 262 | } 263 | l := NewLexer(y) 264 | r := bytes.NewReader([]byte(s)) 265 | 266 | l.Scan(r) 267 | l.Scan(r) 268 | l.Scan(r) 269 | l.Scan(r) 270 | } 271 | 272 | // expect error when reader fails with io.ErrUnexpectedEOF 273 | func TestLexer_Scan_9(t *testing.T) { 274 | y := func(kind TokenKind, load []byte, pos uint) bool { 275 | if !kind.Is(TokenERR) { 276 | t.Errorf("unexpected %q", load) 277 | } 278 | return true 279 | } 280 | l := NewLexer(y) 281 | r := &FaultyReader{} 282 | l.Scan(r) 283 | } 284 | 285 | type FaultyReader struct{} 286 | 287 | func (*FaultyReader) Read([]byte) (int, error) { 288 | return 0, io.ErrUnexpectedEOF 289 | } 290 | 291 | // ensure LexerOptEnableUnreadBuffer is working with objects 292 | func TestLexer_Scan_10(t *testing.T) { 293 | s := []byte(`{"hello": "world", "a": 1, "b": false}`) 294 | r := bytes.NewBuffer(s) 295 | 296 | steps := []struct { 297 | kind TokenKind 298 | load string 299 | left string 300 | }{{ 301 | kind: TokenLCB, //0 302 | load: `{`, 303 | left: `"hello": "world", "a": 1, "b": false}`, 304 | }, { 305 | kind: TokenSTR, //1 306 | load: `hello`, 307 | left: `: "world", "a": 1, "b": false}`, 308 | }, { 309 | kind: TokenCOL, //2 310 | load: `:`, 311 | left: ` "world", "a": 1, "b": false}`, 312 | }, { 313 | kind: TokenSTR, //3 314 | load: `world`, 315 | left: `, "a": 1, "b": false}`, 316 | }, { 317 | kind: TokenCOM, //4 318 | load: `,`, 319 | left: ` "a": 1, "b": false}`, 320 | }, { 321 | kind: TokenSTR, //5 322 | load: `a`, 323 | left: `: 1, "b": false}`, 324 | }, { 325 | kind: TokenCOL, //6 326 | load: `:`, 327 | left: ` 1, "b": false}`, 328 | }, { 329 | kind: TokenNUM, //7 330 | load: `1`, 331 | left: `, "b": false}`, 332 | }, { 333 | kind: TokenCOM, //8 334 | load: `,`, 335 | left: ` "b": false}`, 336 | }, { 337 | kind: TokenSTR, //9 338 | load: `b`, 339 | left: `: false}`, 340 | }, { 341 | kind: TokenCOL, //10 342 | load: `:`, 343 | left: ` false}`, 344 | }, { 345 | kind: TokenLIT, //11 346 | load: `false`, 347 | left: `}`, 348 | }, { 349 | kind: TokenRCB, //12 350 | load: `}`, 351 | left: ``, 352 | }} 353 | 354 | var i int 355 | y := func(kind TokenKind, load []byte, pos uint) bool { 356 | if i >= len(steps) { 357 | panic("once too often called") 358 | } 359 | 360 | step := steps[i] 361 | if !kind.Is(step.kind) { 362 | t.Errorf("%d: unexpected token %q", i, load) 363 | } else if string(load) != step.load { 364 | t.Errorf("%d: uexpected load %q", i, load) 365 | } else if r.String() != step.left { 366 | t.Errorf("%d: uexpected left content to parse (%d != %d) %q", i, r.Len(), len(step.left), r.String()) 367 | } 368 | i++ 369 | return false 370 | } 371 | 372 | l := NewLexer(y, LexerOptEnableUnreadBuffer) 373 | 374 | for range steps { 375 | l.Scan(r) 376 | } 377 | } 378 | 379 | // ensure LexerOptEnableUnreadBuffer is working with arrays 380 | func TestLexer_Scan_11(t *testing.T) { 381 | s := []byte(`["hello", 1, false]`) 382 | r := bytes.NewBuffer(s) 383 | 384 | steps := []struct { 385 | kind TokenKind 386 | load string 387 | left string 388 | }{{ 389 | kind: TokenLSB, //0 390 | load: `[`, 391 | left: `"hello", 1, false]`, 392 | }, { 393 | kind: TokenSTR, //1 394 | load: `hello`, 395 | left: `, 1, false]`, 396 | }, { 397 | kind: TokenCOM, //2 398 | load: `,`, 399 | left: ` 1, false]`, 400 | }, { 401 | kind: TokenNUM, //3 402 | load: `1`, 403 | left: `, false]`, 404 | }, { 405 | kind: TokenCOM, //4 406 | load: `,`, 407 | left: ` false]`, 408 | }, { 409 | kind: TokenLIT, //5 410 | load: `false`, 411 | left: `]`, 412 | }, { 413 | kind: TokenRSB, //6 414 | load: `]`, 415 | left: ``, 416 | }} 417 | 418 | var i int 419 | y := func(kind TokenKind, load []byte, pos uint) bool { 420 | if i >= len(steps) { 421 | panic("once too often called") 422 | } 423 | 424 | step := steps[i] 425 | if !kind.Is(step.kind) { 426 | t.Errorf("%d: unexpected token %q", i, load) 427 | } else if string(load) != step.load { 428 | t.Errorf("%d: uexpected load %q", i, load) 429 | } else if r.String() != step.left { 430 | t.Errorf("%d: uexpected left content to parse (%d != %d) %q", i, r.Len(), len(step.left), r.String()) 431 | } 432 | i++ 433 | return false 434 | } 435 | 436 | l := NewLexer(y, LexerOptEnableUnreadBuffer) 437 | 438 | for range steps { 439 | l.Scan(r) 440 | } 441 | } 442 | -------------------------------------------------------------------------------- /reader_support.go: -------------------------------------------------------------------------------- 1 | // MIT license · Gregor Noczinski · gregor [at] noczinski [dot] eu · 12/2021 2 | 3 | package jsonlex 4 | 5 | import ( 6 | "fmt" 7 | "io" 8 | ) 9 | 10 | // EnsureAtLeastSingleByteUnreadableReader will provide an instance of the 11 | // provided io.Reader which can at least unread a single byte. 12 | func EnsureAtLeastSingleByteUnreadableReader(r io.Reader) UnreadableReader { 13 | if ur, ok := r.(UnreadableReader); ok { 14 | return ur 15 | } 16 | return &singleByteUnreadableReader{ 17 | delegate: r, 18 | } 19 | } 20 | 21 | type singleByteUnreadableReader struct { 22 | delegate io.Reader 23 | lastByte byte 24 | state singleByteUnreadableReaderState 25 | } 26 | 27 | func (r *singleByteUnreadableReader) Read(p []byte) (int, error) { 28 | if len(p) == 0 { 29 | return 0, nil 30 | } 31 | 32 | switch r.state { 33 | case singleByteUnreadableReaderStateBuffered, singleByteUnreadableReaderStateEmpty: 34 | n, err := r.delegate.Read(p) 35 | if n > 0 { 36 | r.lastByte = p[n-1] 37 | r.state = singleByteUnreadableReaderStateBuffered 38 | } 39 | return n, err 40 | 41 | case singleByteUnreadableReaderStateRewind: 42 | p[0] = r.lastByte 43 | r.state = singleByteUnreadableReaderStateEmpty 44 | if len(p) == 1 { 45 | return 1, nil 46 | } 47 | n, err := r.delegate.Read(p[1:]) 48 | return n + 1, err 49 | 50 | default: 51 | panic(fmt.Sprintf("unknown state: %d", r.state)) 52 | } 53 | } 54 | 55 | func (r *singleByteUnreadableReader) UnreadByte() error { 56 | switch r.state { 57 | case singleByteUnreadableReaderStateBuffered: 58 | r.state = singleByteUnreadableReaderStateRewind 59 | return nil 60 | 61 | case singleByteUnreadableReaderStateEmpty, singleByteUnreadableReaderStateRewind: 62 | return io.ErrShortBuffer 63 | 64 | default: 65 | panic(fmt.Sprintf("unknown state: %d", r.state)) 66 | } 67 | } 68 | 69 | type singleByteUnreadableReaderState uint8 70 | 71 | const ( 72 | singleByteUnreadableReaderStateEmpty singleByteUnreadableReaderState = iota 73 | singleByteUnreadableReaderStateBuffered 74 | singleByteUnreadableReaderStateRewind 75 | ) 76 | -------------------------------------------------------------------------------- /reader_support_test.go: -------------------------------------------------------------------------------- 1 | // MIT license · Gregor Noczinski · gregor [at] noczinski [dot] eu · 12/2021 2 | 3 | package jsonlex 4 | 5 | import ( 6 | "bytes" 7 | "io" 8 | "reflect" 9 | "testing" 10 | ) 11 | 12 | // bytes.Buffer stays bytes.Buffer 13 | func TestEnsureAtLeastSingleByteUnreadableReader_1(t *testing.T) { 14 | given := new(bytes.Buffer) 15 | 16 | actual := EnsureAtLeastSingleByteUnreadableReader(given) 17 | 18 | if actual != given { 19 | t.Errorf("EnsureAtLeastSingleByteUnreadableReader() = %v, want: %v", actual, given) 20 | } 21 | } 22 | 23 | // uselessTestReader will be wrapped into *singleByteUnreadableReader 24 | func TestEnsureAtLeastSingleByteUnreadableReader_2(t *testing.T) { 25 | given := &uselessTestReader{} 26 | 27 | actual := EnsureAtLeastSingleByteUnreadableReader(given) 28 | 29 | if sbur, ok := actual.(*singleByteUnreadableReader); !ok { 30 | t.Errorf( 31 | "EnsureAtLeastSingleByteUnreadableReader() = %v, should be of: %v", 32 | actual, reflect.TypeOf(&singleByteUnreadableReader{}), 33 | ) 34 | } else if sbur.delegate != given { 35 | t.Errorf("EnsureAtLeastSingleByteUnreadableReader().delegate = %v, want: %v", sbur.delegate, given) 36 | } 37 | } 38 | 39 | func TestSingleByteUnreadableReader_Read(t *testing.T) { 40 | delegate := bytes.NewBuffer([]byte("0123456789")) 41 | instance := &singleByteUnreadableReader{delegate: delegate} 42 | 43 | steps := []struct { 44 | doUnread bool 45 | amount int 46 | expected string 47 | expectedError error 48 | left string 49 | }{{ //0 50 | amount: 2, 51 | expected: `01`, 52 | left: `23456789`, 53 | }, { //1 54 | amount: 1, 55 | expected: `2`, 56 | left: `3456789`, 57 | }, { //2 58 | doUnread: true, 59 | left: `3456789`, 60 | }, { //3 61 | amount: 1, 62 | expected: `2`, 63 | left: `3456789`, 64 | }, { //4 65 | amount: 3, 66 | expected: `345`, 67 | left: `6789`, 68 | }, { //5 69 | doUnread: true, 70 | left: `6789`, 71 | }, { //6 72 | doUnread: true, 73 | expectedError: io.ErrShortBuffer, 74 | left: `6789`, 75 | }, { //7 76 | amount: 3, 77 | expected: `567`, 78 | left: `89`, 79 | }, { //8 80 | amount: 0, 81 | expected: ``, 82 | left: `89`, 83 | }} 84 | 85 | for i, step := range steps { 86 | if step.doUnread { 87 | actualErr := instance.UnreadByte() 88 | if actualErr != step.expectedError { 89 | t.Errorf("%d: instance.UnreadByte() = %v, want: %v", i, actualErr, step.expectedError) 90 | } 91 | } else { 92 | actual := make([]byte, step.amount) 93 | actualN, actualErr := instance.Read(actual) 94 | if actualErr != step.expectedError { 95 | t.Errorf("%d: instance.Read() err = %v, want: %v", i, actualErr, step.expectedError) 96 | } 97 | if actualN != len(step.expected) { 98 | t.Errorf("%d: instance.Read() n = %d, want: %d", i, actualN, len(step.expected)) 99 | } 100 | if string(actual) != step.expected { 101 | t.Errorf("%d: actual = %q, want: %q", i, actual, step.expected) 102 | } 103 | } 104 | if delegate.String() != step.left { 105 | t.Errorf("%d: left = %q, want: %q", i, delegate.String(), step.left) 106 | } 107 | } 108 | } 109 | 110 | type uselessTestReader struct{} 111 | 112 | func (*uselessTestReader) Read([]byte) (int, error) { 113 | panic("should never be called") 114 | } 115 | -------------------------------------------------------------------------------- /testdata/20kB.json: -------------------------------------------------------------------------------- 1 | {"tree":{"name":"/","kids":[{"name":"go","kids":[{"name":"src","kids":[{"name":"pkg","kids":[{"name":"exp","kids":[{"name":"draw","kids":[{"name":"Makefile","kids":[],"cl_weight":1,"touches":1,"min_t":1258062920,"max_t":1258062920,"mean_t":1258062920}],"cl_weight":1,"touches":1,"min_t":1258062920,"max_t":1258062920,"mean_t":1258062920}],"cl_weight":2,"touches":2,"min_t":1258062920,"max_t":1316289444,"mean_t":1287176182},{"name":"crypto","kids":[{"name":"cipher","kids":[{"name":"ocfb.go","kids":[],"cl_weight":1.7833333333333334,"touches":4,"min_t":1290201478,"max_t":1306956202,"mean_t":1296843222},{"name":"ctr.go","kids":[],"cl_weight":1.125,"touches":2,"min_t":1290193927,"max_t":1301428055,"mean_t":1295810991},{"name":"ocfb_test.go","kids":[],"cl_weight":0.7,"touches":2,"min_t":1290201478,"max_t":1296742662,"mean_t":1293472070},{"name":"ofb.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1295392566,"max_t":1295392566,"mean_t":1295392566},{"name":"ofb_test.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1295392566,"max_t":1295392566,"mean_t":1295392566},{"name":"cfb.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1290201478,"max_t":1290201478,"mean_t":1290201478},{"name":"cfb_test.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1290201478,"max_t":1290201478,"mean_t":1290201478},{"name":"Makefile","kids":[],"cl_weight":0.325,"touches":2,"min_t":1290193927,"max_t":1290201478,"mean_t":1290197702},{"name":"cbc.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1290193927,"max_t":1290193927,"mean_t":1290193927},{"name":"cbc_aes_test.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1290193927,"max_t":1290193927,"mean_t":1290193927},{"name":"cipher.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1290193927,"max_t":1290193927,"mean_t":1290193927},{"name":"common_test.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1290193927,"max_t":1290193927,"mean_t":1290193927},{"name":"ctr_aes_test.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1290193927,"max_t":1290193927,"mean_t":1290193927},{"name":"io.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1290193927,"max_t":1290193927,"mean_t":1290193927}],"cl_weight":6.083333333333334,"touches":7,"min_t":1290193927,"max_t":1306956202,"mean_t":1296341062},{"name":"des","kids":[{"name":"block.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1302032448,"max_t":1302032448,"mean_t":1302032448},{"name":"cipher.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1302032448,"max_t":1302032448,"mean_t":1302032448},{"name":"const.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1302032448,"max_t":1302032448,"mean_t":1302032448},{"name":"des_test.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1302032448,"max_t":1302032448,"mean_t":1302032448}],"cl_weight":0.8,"touches":1,"min_t":1302032448,"max_t":1302032448,"mean_t":1302032448},{"name":"block","kids":[{"name":"Makefile","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"cbc.go","kids":[],"cl_weight":0.12698412698412698,"touches":2,"min_t":1288795423,"max_t":1302031420,"mean_t":1295413421},{"name":"cfb.go","kids":[],"cl_weight":0.12698412698412698,"touches":2,"min_t":1288795423,"max_t":1302031420,"mean_t":1295413421},{"name":"cfb_aes_test.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"cipher.go","kids":[],"cl_weight":1.126984126984127,"touches":3,"min_t":1288795423,"max_t":1302031420,"mean_t":1295087303},{"name":"cmac.go","kids":[],"cl_weight":0.12698412698412698,"touches":2,"min_t":1288795423,"max_t":1302031420,"mean_t":1295413421},{"name":"cmac_aes_test.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"ctr.go","kids":[],"cl_weight":0.12698412698412698,"touches":2,"min_t":1288795423,"max_t":1302031420,"mean_t":1295413421},{"name":"eax.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"eax_aes_test.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"ecb.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"ecb_aes_test.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"ecb_test.go","kids":[],"cl_weight":0.12698412698412698,"touches":2,"min_t":1288795423,"max_t":1302031420,"mean_t":1295413421},{"name":"ofb.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"ofb_aes_test.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"xor.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"xor_test.go","kids":[],"cl_weight":0.05555555555555555,"touches":1,"min_t":1302031420,"max_t":1302031420,"mean_t":1302031420},{"name":"cbc_aes_test.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"ctr_aes_test.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383}],"cl_weight":2.5158730158730176,"touches":4,"min_t":1288795423,"max_t":1302031420,"mean_t":1294104073},{"name":"ecdsa","kids":[{"name":"ecdsa.go","kids":[],"cl_weight":1.25,"touches":2,"min_t":1299768154,"max_t":1300286561,"mean_t":1300027357},{"name":"Makefile","kids":[],"cl_weight":0.25,"touches":1,"min_t":1299768154,"max_t":1299768154,"mean_t":1299768154},{"name":"ecdsa_test.go","kids":[],"cl_weight":0.25,"touches":1,"min_t":1299768154,"max_t":1299768154,"mean_t":1299768154}],"cl_weight":1.75,"touches":2,"min_t":1299768154,"max_t":1300286561,"mean_t":1300027357},{"name":"Makefile","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"crypto.go","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"md4","kids":[{"name":"md4.go","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168}],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"md5","kids":[{"name":"md5.go","kids":[],"cl_weight":1.0625,"touches":2,"min_t":1258322446,"max_t":1296576168,"mean_t":1277449307}],"cl_weight":1.0625,"touches":2,"min_t":1258322446,"max_t":1296576168,"mean_t":1277449307},{"name":"ripemd160","kids":[{"name":"ripemd160.go","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168}],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"sha1","kids":[{"name":"sha1.go","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168}],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"sha256","kids":[{"name":"sha256.go","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168}],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"sha512","kids":[{"name":"sha512.go","kids":[],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168}],"cl_weight":0.0625,"touches":1,"min_t":1296576168,"max_t":1296576168,"mean_t":1296576168},{"name":"rc4","kids":[{"name":"rc4.go","kids":[],"cl_weight":0.27692307692307694,"touches":2,"min_t":1254251724,"max_t":1292431795,"mean_t":1273341759},{"name":"rc4_test.go","kids":[],"cl_weight":0.27692307692307694,"touches":2,"min_t":1254251724,"max_t":1292431795,"mean_t":1273341759},{"name":"Makefile","kids":[],"cl_weight":0.2,"touches":1,"min_t":1254251724,"max_t":1254251724,"mean_t":1254251724}],"cl_weight":0.7538461538461538,"touches":2,"min_t":1254251724,"max_t":1292431795,"mean_t":1273341759},{"name":"aes","kids":[{"name":"aes_test.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"block.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"cipher.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423}],"cl_weight":0.21428571428571427,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"blowfish","kids":[{"name":"blowfish_test.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"cipher.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423}],"cl_weight":0.14285714285714285,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"xtea","kids":[{"name":"block.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"cipher.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"xtea_test.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423}],"cl_weight":0.21428571428571427,"touches":1,"min_t":1288795423,"max_t":1288795423,"mean_t":1288795423},{"name":"cast5","kids":[{"name":"Makefile","kids":[],"cl_weight":0.25,"touches":1,"min_t":1288795155,"max_t":1288795155,"mean_t":1288795155},{"name":"cast5.go","kids":[],"cl_weight":0.25,"touches":1,"min_t":1288795155,"max_t":1288795155,"mean_t":1288795155},{"name":"cast5_test.go","kids":[],"cl_weight":0.25,"touches":1,"min_t":1288795155,"max_t":1288795155,"mean_t":1288795155}],"cl_weight":0.75,"touches":1,"min_t":1288795155,"max_t":1288795155,"mean_t":1288795155},{"name":"subtle","kids":[{"name":"Makefile","kids":[],"cl_weight":0.125,"touches":1,"min_t":1257189127,"max_t":1257189127,"mean_t":1257189127},{"name":"constant_time.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1257189127,"max_t":1257189127,"mean_t":1257189127},{"name":"constant_time_test.go","kids":[],"cl_weight":0.125,"touches":1,"min_t":1257189127,"max_t":1257189127,"mean_t":1257189127}],"cl_weight":0.375,"touches":1,"min_t":1257189127,"max_t":1257189127,"mean_t":1257189127}],"cl_weight":112.46350177600193,"touches":124,"min_t":1254251724,"max_t":1316028739,"mean_t":1286826165},{"name":"html","kids":[{"name":"escape.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383}],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"json","kids":[{"name":"decode.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"struct.go","kids":[],"cl_weight":2,"touches":3,"min_t":1258140553,"max_t":1258153144,"mean_t":1258146580},{"name":"struct_test.go","kids":[],"cl_weight":1,"touches":2,"min_t":1258140553,"max_t":1258146045,"mean_t":1258143299},{"name":"generic_test.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1257988394,"max_t":1257988394,"mean_t":1257988394},{"name":"parse.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1257988394,"max_t":1257988394,"mean_t":1257988394}],"cl_weight":4.071428571428571,"touches":5,"min_t":1257988394,"max_t":1291154383,"mean_t":1264716503},{"name":"regexp","kids":[{"name":"regexp.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383}],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"strings","kids":[{"name":"strings.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383}],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"utf8","kids":[{"name":"utf8.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"utf8_test.go","kids":[],"cl_weight":0.07142857142857142,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383}],"cl_weight":0.14285714285714285,"touches":1,"min_t":1291154383,"max_t":1291154383,"mean_t":1291154383},{"name":"time","kids":[{"name":"format.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1279552084,"max_t":1279552084,"mean_t":1279552084},{"name":"time_test.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1279552084,"max_t":1279552084,"mean_t":1279552084},{"name":"time.go","kids":[],"cl_weight":1,"touches":1,"min_t":1254848499,"max_t":1254848499,"mean_t":1254848499}],"cl_weight":2,"touches":2,"min_t":1254848499,"max_t":1279552084,"mean_t":1267200291},{"name":"runtime","kids":[{"name":"chan.c","kids":[],"cl_weight":3.25,"touches":4,"min_t":1256779433,"max_t":1261167953,"mean_t":1259465671},{"name":"runtime.h","kids":[],"cl_weight":0.45,"touches":2,"min_t":1258135731,"max_t":1261167953,"mean_t":1259651842},{"name":"linux","kids":[{"name":"386","kids":[{"name":"sys.s","kids":[],"cl_weight":0.2,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731}],"cl_weight":0.2,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731},{"name":"amd64","kids":[{"name":"sys.s","kids":[],"cl_weight":0.2,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731}],"cl_weight":0.2,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731}],"cl_weight":0.4,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731},{"name":"malloc.cgo","kids":[],"cl_weight":0.2,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731},{"name":"mem.c","kids":[],"cl_weight":0.2,"touches":1,"min_t":1258135731,"max_t":1258135731,"mean_t":1258135731}],"cl_weight":4.500000000000001,"touches":5,"min_t":1256779433,"max_t":1261167953,"mean_t":1259199683},{"name":"net","kids":[{"name":"fd.go","kids":[],"cl_weight":1,"touches":1,"min_t":1258579114,"max_t":1258579114,"mean_t":1258579114}],"cl_weight":1,"touches":1,"min_t":1258579114,"max_t":1258579114,"mean_t":1258579114},{"name":"unsafe","kids":[{"name":"unsafe.go","kids":[],"cl_weight":1,"touches":1,"min_t":1258414744,"max_t":1258414744,"mean_t":1258414744}],"cl_weight":1,"touches":1,"min_t":1258414744,"max_t":1258414744,"mean_t":1258414744},{"name":"Make.deps","kids":[],"cl_weight":2.0861111111111112,"touches":11,"min_t":1254251724,"max_t":1257974497,"mean_t":1256728540},{"name":"bignum","kids":[{"name":"bignum.go","kids":[],"cl_weight":0.225,"touches":2,"min_t":1257971686,"max_t":1257974497,"mean_t":1257973091}],"cl_weight":0.225,"touches":2,"min_t":1257971686,"max_t":1257974497,"mean_t":1257973091},{"name":"testing","kids":[{"name":"script","kids":[{"name":"Makefile","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257281226,"max_t":1257281226,"mean_t":1257281226},{"name":"script.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257281226,"max_t":1257281226,"mean_t":1257281226},{"name":"script_test.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257281226,"max_t":1257281226,"mean_t":1257281226}],"cl_weight":0.6000000000000001,"touches":1,"min_t":1257281226,"max_t":1257281226,"mean_t":1257281226},{"name":"quick","kids":[{"name":"Makefile","kids":[],"cl_weight":0.2,"touches":1,"min_t":1256768180,"max_t":1256768180,"mean_t":1256768180},{"name":"quick.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1256768180,"max_t":1256768180,"mean_t":1256768180},{"name":"quick_test.go","kids":[],"cl_weight":0.2,"touches":1,"min_t":1256768180,"max_t":1256768180,"mean_t":1256768180}],"cl_weight":0.6000000000000001,"touches":1,"min_t":1256768180,"max_t":1256768180,"mean_t":1256768180}],"cl_weight":1.2,"touches":2,"min_t":1256768180,"max_t":1257281226,"mean_t":1257024703},{"name":"debug","kids":[{"name":"elf","kids":[{"name":"testdata","kids":[{"name":"go-relocation-test-gcc424-x86-64.o","kids":[],"cl_weight":0.14285714285714285,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136},{"name":"go-relocation-test-gcc441-x86-64.o","kids":[],"cl_weight":0.14285714285714285,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136},{"name":"go-relocation-test-gcc441-x86.o","kids":[],"cl_weight":0.14285714285714285,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136}],"cl_weight":0.42857142857142855,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136},{"name":"file.go","kids":[],"cl_weight":0.14285714285714285,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136},{"name":"file_test.go","kids":[],"cl_weight":0.14285714285714285,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136}],"cl_weight":0.7142857142857142,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136}],"cl_weight":0.7142857142857142,"touches":1,"min_t":1257192136,"max_t":1257192136,"mean_t":1257192136},{"name":"reflect","kids":[{"name":"all_test.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1256179887,"max_t":1256179887,"mean_t":1256179887},{"name":"value.go","kids":[],"cl_weight":0.5,"touches":1,"min_t":1256179887,"max_t":1256179887,"mean_t":1256179887}],"cl_weight":1,"touches":1,"min_t":1256179887,"max_t":1256179887,"mean_t":1256179887}],"cl_weight":172.302597402597,"touches":174,"min_t":1254251724,"max_t":1316289444,"mean_t":1283150599},{"name":"Make.pkg","kids":[],"cl_weight":0.5,"touches":1,"min_t":1263339800,"max_t":1263339800,"mean_t":1263339800},{"name":"make.bash","kids":[],"cl_weight":2.1,"touches":3,"min_t":1257974497,"max_t":1257980535,"mean_t":1257976693}],"cl_weight":176.4999999999996,"touches":177,"min_t":1254251724,"max_t":1316289444,"mean_t":1282723881},{"name":"misc","kids":[{"name":"dashboard","kids":[{"name":"README","kids":[],"cl_weight":0.2777777777777778,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"buildcontrol.py","kids":[],"cl_weight":0.2777777777777778,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"builder.sh","kids":[],"cl_weight":0.2777777777777778,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"godashboard","kids":[{"name":"gobuild.py","kids":[],"cl_weight":0.2777777777777778,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"index.yaml","kids":[],"cl_weight":0.2777777777777778,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"key.py","kids":[],"cl_weight":0.2777777777777778,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"_multiprocessing.py","kids":[],"cl_weight":0.1111111111111111,"touches":1,"min_t":1262918745,"max_t":1262918745,"mean_t":1262918745},{"name":"app.yaml","kids":[],"cl_weight":0.1111111111111111,"touches":1,"min_t":1262918745,"max_t":1262918745,"mean_t":1262918745},{"name":"main.html","kids":[],"cl_weight":0.1111111111111111,"touches":1,"min_t":1262918745,"max_t":1262918745,"mean_t":1262918745}],"cl_weight":1.166666666666667,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067}],"cl_weight":2,"touches":2,"min_t":1262918745,"max_t":1264539389,"mean_t":1263729067},{"name":"cgo","kids":[{"name":"gmp","kids":[{"name":"gmp.go","kids":[],"cl_weight":1,"touches":1,"min_t":1255542979,"max_t":1255542979,"mean_t":1255542979}],"cl_weight":1,"touches":1,"min_t":1255542979,"max_t":1255542979,"mean_t":1255542979}],"cl_weight":1,"touches":1,"min_t":1255542979,"max_t":1255542979,"mean_t":1255542979}],"cl_weight":3,"touches":3,"min_t":1255542979,"max_t":1264539389,"mean_t":1261000371},{"name":"test","kids":[{"name":"chan","kids":[{"name":"doubleselect.go","kids":[],"cl_weight":0.25,"touches":1,"min_t":1261167953,"max_t":1261167953,"mean_t":1261167953}],"cl_weight":0.25,"touches":1,"min_t":1261167953,"max_t":1261167953,"mean_t":1261167953},{"name":"golden.out","kids":[],"cl_weight":0.25,"touches":1,"min_t":1261167953,"max_t":1261167953,"mean_t":1261167953}],"cl_weight":0.5,"touches":1,"min_t":1261167953,"max_t":1261167953,"mean_t":1261167953},{"name":"doc","kids":[{"name":"effective_go.html","kids":[],"cl_weight":1,"touches":1,"min_t":1258401378,"max_t":1258401378,"mean_t":1258401378},{"name":"install.html","kids":[],"cl_weight":1,"touches":1,"min_t":1257967097,"max_t":1257967097,"mean_t":1257967097},{"name":"go-logo-black.png","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257452334,"max_t":1257452334,"mean_t":1257452334},{"name":"video-snap.jpg","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257452334,"max_t":1257452334,"mean_t":1257452334},{"name":"root.html","kids":[],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759},{"name":"style.css","kids":[],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759},{"name":"go-logo-blue.png","kids":[],"cl_weight":0.25,"touches":1,"min_t":1257307185,"max_t":1257307185,"mean_t":1257307185}],"cl_weight":3.5500000000000007,"touches":4,"min_t":1257307185,"max_t":1258401378,"mean_t":1257781998},{"name":"lib","kids":[{"name":"godoc","kids":[{"name":"godoc.html","kids":[],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759}],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759}],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759}],"cl_weight":0,"touches":0,"min_t":0,"max_t":0,"mean_t":0}],"cl_weight":0,"touches":0,"min_t":0,"max_t":0,"mean_t":0},"username":"agl"} 2 | -------------------------------------------------------------------------------- /testdata/2kB.json: -------------------------------------------------------------------------------- 1 | {"tree":{"name":true,"kids":[{"name":false,"kids":[{"name":null,"kids":[{"name":"pkg","kids":[{"name":"exp","kids":[{"name":"draw","kids":[{"name":"Makefile","kids":[],"cl_weight":1,"touches":1,"min_t":1258062920,"max_t":1258062920,"mean_t":1258062920}],"cl_weight":1,"touches":1,"min_t":1258062920,"max_t":1258062920,"mean_t":1258062920}],"cl_weight":2,"touches":2,"min_t":1258062920,"max_t":1316289444,"mean_t":1287176182}],"cl_weight":172.302597402597,"touches":174,"min_t":1254251724,"max_t":1316289444,"mean_t":1283150599}],"cl_weight":176.4999999999996,"touches":177,"min_t":1254251724,"max_t":1316289444,"mean_t":1282723881},{"name":"misc","kids":[],"cl_weight":3,"touches":3,"min_t":1255542979,"max_t":1264539389,"mean_t":1261000371},{"name":"doc","kids":[{"name":"effective_go.html","kids":[],"cl_weight":1,"touches":1,"min_t":1258401378,"max_t":1258401378,"mean_t":1258401378},{"name":"install.html","kids":[],"cl_weight":1,"touches":1,"min_t":1257967097,"max_t":1257967097,"mean_t":1257967097},{"name":"go-logo-black.png","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257452334,"max_t":1257452334,"mean_t":1257452334},{"name":"video-snap.jpg","kids":[],"cl_weight":0.2,"touches":1,"min_t":1257452334,"max_t":1257452334,"mean_t":1257452334},{"name":"root.html","kids":[],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759},{"name":"style.css","kids":[],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759},{"name":"go-logo-blue.png","kids":[],"cl_weight":0.25,"touches":1,"min_t":1257307185,"max_t":1257307185,"mean_t":1257307185}],"cl_weight":3.5500000000000007,"touches":4,"min_t":1257307185,"max_t":1258401378,"mean_t":1257781998},{"name":"lib","kids":[{"name":"godoc","kids":[{"name":"godoc.html","kids":[],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759}],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759}],"cl_weight":0.45,"touches":2,"min_t":1257307185,"max_t":1257452334,"mean_t":1257379759}],"cl_weight":0,"touches":0,"min_t":0,"max_t":0,"mean_t":0}],"cl_weight":0,"touches":0,"min_t":0,"max_t":0,"mean_t":0},"username":"agl"} 2 | --------------------------------------------------------------------------------