├── .gitignore ├── LICENSE ├── README.md ├── jsonarray.go ├── jsonarray_test.go └── stackreader.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.test 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Tommi Virtanen 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `jsonarray` -- Streaming decoder for JSON arrays 2 | 3 | Go library for decoding very large or streaming JSON arrays. 4 | 5 | Many streaming JSON APIs give you newline-separated JSON. That's easy 6 | to parse, just keep calling 7 | [`json.Decoder.Decode`](http://golang.org/pkg/encoding/json/#Decoder.Decode). 8 | 9 | Sometimes, streaming APIs, and especially JSON databases, just return 10 | a very large JSON array as their result. This is not as easy to 11 | handle. `jsonarray` makes it easy. 12 | 13 | (If the large array isn't the outermost JSON object, that's still 14 | harder to get right. Ideas for API that can handle that are welcome.) 15 | 16 | Use the Go import path 17 | 18 | github.com/tv42/jsonarray 19 | 20 | Documentation at http://godoc.org/github.com/tv42/jsonarray 21 | -------------------------------------------------------------------------------- /jsonarray.go: -------------------------------------------------------------------------------- 1 | package jsonarray 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | ) 9 | 10 | // NewDecoder returns a new decoder that reads items of a JSON array 11 | // from r. Only one item is held in memory at a time, and items are 12 | // decoded as soon as they are completed, without waiting for the 13 | // whole array. 14 | func NewDecoder(r io.Reader) *Decoder { 15 | b := bufio.NewReader(r) 16 | dec := &Decoder{ 17 | state: start, 18 | r: &stackReader{readers: []reader{b}}, 19 | } 20 | return dec 21 | } 22 | 23 | type state int 24 | 25 | const ( 26 | start state = iota 27 | after state = iota 28 | broken state = iota 29 | ) 30 | 31 | // A Decoder reads and decodes JSON array items from an input stream. 32 | type Decoder struct { 33 | state state 34 | r *stackReader 35 | err error 36 | } 37 | 38 | // ErrNotArray is the type of an error returned when the stream did not 39 | // contain a JSON array. 40 | type ErrNotArray struct { 41 | Bad byte 42 | } 43 | 44 | func (n *ErrNotArray) Error() string { 45 | return fmt.Sprintf("not an array: starts with %q", n.Bad) 46 | } 47 | 48 | // ErrNotCommaSeparated is the type of an error returned when the array 49 | // items in the stream were not comma separated. 50 | type ErrNotCommaSeparated struct { 51 | Bad byte 52 | } 53 | 54 | func (n *ErrNotCommaSeparated) Error() string { 55 | return fmt.Sprintf("not comma-separated: %q", n.Bad) 56 | } 57 | 58 | // Decode unmarshals the next item in the array. 59 | // 60 | // On reaching the end of the array, Decode returns io.EOF. This does 61 | // not mean that the underlying stream would have reached EOF. 62 | // 63 | // If EOF is seen before the JSON array closes, returns 64 | // io.ErrUnexpectedEOF. 65 | func (d *Decoder) Decode(v interface{}) error { 66 | switch d.state { 67 | case broken: 68 | return d.err 69 | 70 | case start: 71 | c, err := d.readNonWhitespace() 72 | if err != nil { 73 | return d.breaks(err) 74 | } 75 | switch c { 76 | case '[': 77 | // nothing 78 | default: 79 | return d.breaks(&ErrNotArray{Bad: c}) 80 | } 81 | d.state = after 82 | 83 | case after: 84 | c, err := d.readNonWhitespace() 85 | if err == io.EOF { 86 | // did not see closing `]` 87 | return d.breaks(io.ErrUnexpectedEOF) 88 | } 89 | if err != nil { 90 | return d.breaks(err) 91 | } 92 | switch c { 93 | case ',': 94 | // nothing 95 | case ']': 96 | // end of array 97 | return d.breaks(io.EOF) 98 | default: 99 | return d.breaks(&ErrNotCommaSeparated{Bad: c}) 100 | } 101 | } 102 | 103 | dec := json.NewDecoder(d.r) 104 | err := dec.Decode(v) 105 | if err == io.EOF { 106 | // did not see closing `]` 107 | return d.breaks(io.ErrUnexpectedEOF) 108 | } 109 | if err != nil { 110 | return d.breaks(err) 111 | } 112 | // patch the parts already buffered back into the reader 113 | d.r.Insert(dec.Buffered()) 114 | return nil 115 | } 116 | 117 | // Read a non-whitespace byte. 118 | func (d *Decoder) readNonWhitespace() (byte, error) { 119 | for { 120 | c, err := d.r.ReadByte() 121 | if err != nil { 122 | return c, err 123 | } 124 | switch c { 125 | // http://tools.ietf.org/html/rfc7159#section-2 126 | case 0x20, 0x09, 0x0A, 0x0D: 127 | continue 128 | } 129 | return c, nil 130 | } 131 | } 132 | 133 | func (d *Decoder) breaks(err error) error { 134 | d.err = err 135 | d.state = broken 136 | return err 137 | } 138 | -------------------------------------------------------------------------------- /jsonarray_test.go: -------------------------------------------------------------------------------- 1 | package jsonarray_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "strings" 8 | "testing" 9 | "testing/iotest" 10 | 11 | "github.com/tv42/jsonarray" 12 | ) 13 | 14 | func Example() { 15 | // simulate streaming by serving reads one byte at a time 16 | stream := iotest.OneByteReader( 17 | strings.NewReader(`[{"Greeting": "hell"},{"Greeting": "o, w"},{"Greeting": "orld"}]`), 18 | ) 19 | 20 | type Message struct { 21 | Greeting string 22 | } 23 | dec := jsonarray.NewDecoder(stream) 24 | for { 25 | var msg Message 26 | if err := dec.Decode(&msg); err != nil { 27 | if err == io.EOF { 28 | break 29 | } 30 | fmt.Printf("decode error: %v\n", err) 31 | return 32 | } 33 | fmt.Printf("%s", msg.Greeting) 34 | } 35 | fmt.Printf("\nbye!\n") 36 | 37 | // Output: 38 | // hello, world 39 | // bye! 40 | } 41 | 42 | type T struct { 43 | X int 44 | } 45 | 46 | func decode(t *testing.T, dec *jsonarray.Decoder, want T) { 47 | var got T 48 | if err := dec.Decode(&got); err != nil { 49 | t.Fatalf("decode error: %v", err) 50 | } 51 | if got != want { 52 | t.Fatalf("decode error: %#v != %#v", got, want) 53 | } 54 | } 55 | 56 | func eof(t *testing.T, dec *jsonarray.Decoder) { 57 | var got T 58 | err := dec.Decode(&got) 59 | if err == nil { 60 | t.Fatalf("expected EOF, got: %#v", got) 61 | } 62 | if err != io.EOF { 63 | t.Fatalf("unexpected decode error: %v", err) 64 | } 65 | } 66 | 67 | func erring(t *testing.T, dec *jsonarray.Decoder, want error) { 68 | var got T 69 | err := dec.Decode(&got) 70 | if err == nil { 71 | t.Fatalf("expected %v, got: %#v", want, got) 72 | } 73 | if err != want { 74 | t.Fatalf("unexpected decode error: %v != %v", err, want) 75 | } 76 | } 77 | 78 | func TestSimple(t *testing.T) { 79 | const input = `[{"X":1}, {"X" :2 } , { "X" : 3 } ] ` 80 | r := strings.NewReader(input) 81 | dec := jsonarray.NewDecoder(r) 82 | decode(t, dec, T{X: 1}) 83 | decode(t, dec, T{X: 2}) 84 | decode(t, dec, T{X: 3}) 85 | eof(t, dec) 86 | } 87 | 88 | func TestLong(t *testing.T) { 89 | var buf bytes.Buffer 90 | buf.Write([]byte(`[`)) 91 | for i := 0; i < 999; i++ { 92 | fmt.Fprintf(&buf, `{"X":%d},`, i) 93 | } 94 | buf.Write([]byte(`{"X":999}]`)) 95 | dec := jsonarray.NewDecoder(&buf) 96 | for i := 0; i < 1000; i++ { 97 | decode(t, dec, T{X: i}) 98 | } 99 | eof(t, dec) 100 | } 101 | 102 | func TestBadEarlyEOF(t *testing.T) { 103 | const input = `[{"X":1},` 104 | r := strings.NewReader(input) 105 | dec := jsonarray.NewDecoder(r) 106 | decode(t, dec, T{X: 1}) 107 | erring(t, dec, io.ErrUnexpectedEOF) 108 | } 109 | -------------------------------------------------------------------------------- /stackreader.go: -------------------------------------------------------------------------------- 1 | package jsonarray 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | ) 8 | 9 | type reader interface { 10 | io.Reader 11 | io.ByteReader 12 | } 13 | 14 | // stackReader is sort like io.MultiReader, but it insists on 15 | // io.ByteReader and supports inserting readers in front of the queue. 16 | type stackReader struct { 17 | // front of the queue is highest index, for easy insertion at head 18 | readers []reader 19 | } 20 | 21 | var _ = io.Reader(&stackReader{}) 22 | var _ = io.ByteReader(&stackReader{}) 23 | 24 | func (mr *stackReader) Read(p []byte) (n int, err error) { 25 | for len(mr.readers) > 0 { 26 | n, err = mr.readers[len(mr.readers)-1].Read(p) 27 | // Pop readers that have become empty. Strive to do this one 28 | // round earlier than when we'd see io.EOF; that way the stack 29 | // remains at max 2 entries in practice. 30 | if err == io.EOF || isEmpty(mr.readers[len(mr.readers)-1]) { 31 | mr.readers = mr.readers[:len(mr.readers)-1] 32 | } 33 | if n > 0 || err != io.EOF { 34 | if err == io.EOF { 35 | // Don't return io.EOF yet. There may be more bytes 36 | // in the remaining readers. 37 | err = nil 38 | } 39 | return n, err 40 | } 41 | } 42 | return 0, io.EOF 43 | } 44 | 45 | func (mr *stackReader) ReadByte() (c byte, err error) { 46 | for len(mr.readers) > 0 { 47 | c, err = mr.readers[len(mr.readers)-1].ReadByte() 48 | if err == io.EOF { 49 | mr.readers = mr.readers[:len(mr.readers)-1] 50 | continue 51 | } 52 | return c, err 53 | } 54 | return 0, io.EOF 55 | } 56 | 57 | // isEmpty peeks inside a bytes.Reader to see if it has been drained. 58 | func isEmpty(r io.Reader) bool { 59 | if br, ok := r.(*bytes.Reader); ok { 60 | if br.Len() == 0 { 61 | return true 62 | } 63 | } 64 | return false 65 | } 66 | 67 | // Insert reader in front of the queue. If the reader does not 68 | // implement io.ByteReader, it will be wrapped in a bufio.Reader. 69 | func (mr *stackReader) Insert(r io.Reader) { 70 | if isEmpty(r) { 71 | return 72 | } 73 | rr, ok := r.(reader) 74 | if !ok { 75 | rr = bufio.NewReader(r) 76 | } 77 | mr.readers = append(mr.readers, rr) 78 | } 79 | --------------------------------------------------------------------------------