├── test.sh
├── sample.json
├── LICENSE
├── scratch.go
├── README.md
├── jsparser_test.go
└── jsparser.go
/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | go test jsparser.go scratch.go jsparser_test.go -v
4 |
5 | go test jsparser.go scratch.go jsparser_test.go -v --minify
6 |
7 | go test jsparser.go scratch.go jsparser_test.go -v --parseall
--------------------------------------------------------------------------------
/sample.json:
--------------------------------------------------------------------------------
1 | {
2 | "nu": null,
3 | "b": true,
4 | "b1": false,
5 | "n": 2323,
6 | "n1": 23.23,
7 | "n2": 23.23e-6,
8 | "s": "sstring",
9 | "s1": "s1tring",
10 | "s2": "s2tr\\ing\"\u849c",
11 | "o": {
12 | "o1": "o1string" ,
13 | "o2": "o2string" ,
14 | "o3": true ,
15 | "o4": ["o4string", {
16 | "o41": "o41string"
17 | },
18 | ["o4nestedarray item 1" , "o4nestedarray item 1 item 2" , true, 99 , null, 90.98]
19 | ],
20 | "o5": 98.21,
21 | "o6": null,
22 | "o7": {
23 | "o71": "o71string",
24 | "o72": ["o72string", null, false, 98, {}],
25 | "o73": true,
26 | "o74": 98
27 | }
28 | },
29 | "a": [{
30 | "a11": "o71string\\",
31 | "a12": ["o72string", null, false, 98, {}],
32 | "a13": true,
33 | "a14": 98
34 | },
35 | {
36 | "a11": "o71string",
37 | "a12": ["o72string", null, false, 98, {}],
38 | "a13": true,
39 | "a14": 98
40 | },
41 | "astringinside", false, 99, null, 433.33e-6
42 | ]
43 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Tamer Gür
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/scratch.go:
--------------------------------------------------------------------------------
1 | package jsparser
2 |
3 | import "unicode/utf8"
4 |
5 | // based on https://github.com/bcicen/jstream
6 |
7 | type scratch struct {
8 | data []byte
9 | dataRes []*JSON
10 | fill int
11 | fillRes int
12 | }
13 |
14 | // reset scratch buffer
15 | func (s *scratch) reset() { s.fill = 0 }
16 |
17 | // bytes returns the written contents of scratch buffer
18 | func (s *scratch) bytes() []byte { return s.data[0:s.fill] }
19 |
20 | // string returns the written contents of scratch buffer
21 | func (s *scratch) string() string { return string(s.data[0:s.fill]) }
22 |
23 | // grow scratch buffer
24 | func (s *scratch) grow() {
25 | ndata := make([]byte, cap(s.data)*2)
26 | copy(ndata, s.data[:])
27 | s.data = ndata
28 | }
29 |
30 | // append single byte to scratch buffer
31 | func (s *scratch) add(c byte) {
32 | if s.fill+1 >= cap(s.data) {
33 | s.grow()
34 | }
35 |
36 | s.data[s.fill] = c
37 | s.fill++
38 | }
39 |
40 | // append encoded rune to scratch buffer
41 | func (s *scratch) addRune(r rune) int {
42 | if s.fill+utf8.UTFMax >= cap(s.data) {
43 | s.grow()
44 | }
45 |
46 | n := utf8.EncodeRune(s.data[s.fill:], r)
47 | s.fill += n
48 | return n
49 | }
50 |
51 | // grow result buffer
52 | func (s *scratch) growRes() {
53 | ndata := make([]*JSON, cap(s.dataRes)*2)
54 | copy(ndata, s.dataRes[:])
55 | s.dataRes = ndata
56 | }
57 |
58 | // add result
59 | func (s *scratch) addRes(res *JSON) {
60 | if s.fillRes+1 >= cap(s.dataRes) {
61 | s.growRes()
62 | }
63 |
64 | s.dataRes[s.fillRes] = res
65 | s.fillRes++
66 | }
67 |
68 | func (s *scratch) allRes() []*JSON {
69 | return s.dataRes[0:s.fillRes]
70 | }
71 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## jsparser
2 |
3 | jsparser is a json parser for GO. It is efficient to parse large json data with streaming fashion.
4 |
5 | ### Usage
6 | ```json
7 | {
8 | "books": [{
9 | "title": "The Iliad and The Odyssey",
10 | "price": 12.95,
11 | "comments": [{
12 | "rating": 4,
13 | "comment": "Best translation I've read."
14 | }, {
15 | "rating": 2,
16 | "comment": "I like other versions better."
17 | }]
18 | },
19 | {
20 | "title": "Anthology of World Literature",
21 | "price": 24.95,
22 | "comments": [{
23 | "rating": 4,
24 | "comment": "Excellent overview of world literature."
25 | }, {
26 | "rating": 3,
27 | "comment": "Needs more modern literature."
28 | }]
29 | }
30 | ]
31 | }
32 | ```
33 | Stream over books
34 |
35 | ```go
36 | f, _ := os.Open("input.json")
37 | br := bufio.NewReaderSize(f,65536)
38 | parser := jsparser.NewJSONParser(br, "books")
39 |
40 | for json:= range parser.Stream() {
41 | fmt.Println(json.ObjectVals["title"])
42 | fmt.Println(json.ObjectVals["price"])
43 | fmt.Println(json.ObjectVals["comments"].(*jsparser.JSON).ArrayVals[0].(*jsparser.JSON).ObjectVals["rating"])
44 | }
45 |
46 | // for relatively small size json. get all the results as slice
47 | for json:= range parser.Parse() {
48 | }
49 |
50 | ```
51 |
52 | Skip props for efficiency
53 |
54 | ```go
55 | parser := pr.NewJSONParser(br, "books").SkipProps([]string{"comments", "price"})
56 | ```
57 |
58 | Error handling
59 |
60 | ```go
61 | for json:= range parser.Stream() {
62 | if json.Err !=nil {
63 | // handle error
64 | }
65 | }
66 | ```
67 |
68 | Progress of parsing
69 | ```go
70 | // total byte read to calculate the progress of parsing
71 | parser.TotalReadSize
72 | ```
73 |
74 |
75 | If you are interested check also [xml parser](https://github.com/tamerh/xml-stream-parser) which works similarly.
76 |
--------------------------------------------------------------------------------
/jsparser_test.go:
--------------------------------------------------------------------------------
1 | package jsparser
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "flag"
7 | "os"
8 | "strings"
9 | "testing"
10 | )
11 |
12 | var minify bool
13 | var parseall bool
14 |
15 | func TestMain(m *testing.M) {
16 | // call flag.Parse() here if TestMain uses flags
17 |
18 | flag.BoolVar(&minify, "minify", false, "Minify")
19 |
20 | flag.BoolVar(&parseall, "parseall", false, "ParseAll")
21 |
22 | flag.Parse()
23 |
24 | os.Exit(m.Run())
25 | }
26 |
27 | func getparser(prop string) *JsonParser {
28 |
29 | if minify {
30 | // todo add some space after some values
31 | const minijson string = `{"nu":null,"b":true,"b1":false,"n":2323,"n1":23.23,"n2":23.23e-6 ,"s":"sstring","s1":"s1tring","s2":"s2tr\\ing\"蒜","o":{"o1":"o1string","o2":"o2string","o3":true,"o4":["o4string",{"o41":"o41string"},["o4nestedarray item 1","o4nestedarray item 1 item 2",true,99,null,90.98]],"o5":98.21,"o6":null,"o7":{"o71":"o71string","o72":["o72string",null,false,98,{}],"o73":true,"o74":98}},"a":[{"a11":"o71string\\","a12":["o72string",null,false,98,{}],"a13":true,"a14":98},{"a11":"o71string","a12":["o72string",null,false,98,{}],"a13":true,"a14":98},"astringinside",false,99,null,0.00043333]}`
32 |
33 | br := bufio.NewReader(strings.NewReader(minijson))
34 |
35 | p := NewJSONParser(br, prop)
36 |
37 | return p
38 | }
39 |
40 | file, _ := os.Open("sample.json")
41 |
42 | br := bufio.NewReader(file)
43 |
44 | p := NewJSONParser(br, prop)
45 |
46 | return p
47 |
48 | }
49 |
50 | func allResult(p *JsonParser) []*JSON {
51 |
52 | if parseall {
53 | return p.Parse()
54 |
55 | }
56 | var res []*JSON
57 | for json := range p.Stream() {
58 | res = append(res, json)
59 | }
60 | return res
61 |
62 | }
63 | func TestString(t *testing.T) {
64 |
65 | var js JSON
66 |
67 | p := getparser("s")
68 | resultCount := 0
69 |
70 | for _, json := range allResult(p) {
71 |
72 | if json.Err != nil {
73 | panic(json.Err)
74 | }
75 | js = *json
76 | resultCount++
77 |
78 | }
79 |
80 | if resultCount != 1 {
81 | panic("result count must 1")
82 | }
83 |
84 | if js.StringVal != "sstring" {
85 | panic("invalid result string")
86 | }
87 |
88 | if js.ValueType != String {
89 | panic("Value type must be string")
90 | }
91 |
92 | p = getparser("s2")
93 |
94 | for _, json := range allResult(p) {
95 |
96 | if json.Err != nil {
97 | panic(json.Err)
98 | }
99 | js = *json
100 |
101 | }
102 |
103 | if js.StringVal != "s2tr\\ing\"蒜" {
104 | panic("invalid result string")
105 | }
106 |
107 | // Skip
108 |
109 | }
110 |
111 | func TestBoolean(t *testing.T) {
112 |
113 | p := getparser("b")
114 |
115 | resultCount := 0
116 | var js JSON
117 |
118 | for _, json := range allResult(p) {
119 |
120 | if json.Err != nil {
121 | panic(json.Err)
122 | }
123 | js = *json
124 | resultCount++
125 |
126 | }
127 |
128 | if resultCount != 1 {
129 | panic("result count must 1")
130 | }
131 |
132 | if !js.BoolVal {
133 | panic("invalid result boolean")
134 | }
135 |
136 | if js.ValueType != Boolean {
137 | panic("Value type must be boolean")
138 | }
139 |
140 | }
141 |
142 | func TestNumber(t *testing.T) {
143 |
144 | p := getparser("n2")
145 |
146 | resultCount := 0
147 | var js JSON
148 |
149 | for _, json := range allResult(p) {
150 |
151 | if json.Err != nil {
152 | panic(json.Err)
153 | }
154 | js = *json
155 | resultCount++
156 |
157 | }
158 |
159 | if resultCount != 1 {
160 | panic("result count must 1")
161 | }
162 |
163 | if js.StringVal != "23.23e-6" {
164 | panic("invalid result")
165 | }
166 |
167 | if js.ValueType != Number {
168 | panic("Value type must be boolean")
169 | }
170 |
171 | }
172 |
173 | func TestNull(t *testing.T) {
174 |
175 | p := getparser("nu")
176 |
177 | resultCount := 0
178 | var js JSON
179 |
180 | for _, json := range allResult(p) {
181 |
182 | if json.Err != nil {
183 | panic(json.Err)
184 | }
185 | js = *json
186 | resultCount++
187 |
188 | }
189 |
190 | if resultCount != 1 {
191 | panic("result count must 1")
192 | }
193 |
194 | if js.StringVal != "" {
195 | panic("invalid result")
196 | }
197 |
198 | if js.ValueType != Null {
199 | panic("Value type must be null")
200 | }
201 |
202 | }
203 |
204 | func TestObject(t *testing.T) {
205 |
206 | p := getparser("o")
207 |
208 | resultCount := 0
209 | var js JSON
210 |
211 | for _, json := range allResult(p) {
212 |
213 | if json.Err != nil {
214 | panic(json.Err)
215 | }
216 | js = *json
217 | resultCount++
218 |
219 | }
220 |
221 | if resultCount != 1 {
222 | panic("result count must 1")
223 | }
224 |
225 | if js.ValueType != Object {
226 | panic("Value type must be object")
227 | }
228 |
229 | if val, ok := js.ObjectVals["o1"]; !ok || val.(string) != "o1string" {
230 | panic("Test failed")
231 | }
232 |
233 | if val, ok := js.ObjectVals["o2"]; !ok || val.(string) != "o2string" {
234 | panic("Test failed")
235 | }
236 |
237 | if val, ok := js.ObjectVals["o3"]; !ok || !val.(bool) {
238 | panic("Test failed")
239 | }
240 |
241 | if val, ok := js.ObjectVals["o4"]; !ok || len(val.(*JSON).ArrayVals) != 3 {
242 | panic("Test failed")
243 | }
244 |
245 | if val, ok := js.ObjectVals["o4"]; !ok || len(val.(*JSON).ArrayVals[2].(*JSON).ArrayVals) != 6 {
246 | panic("Test failed")
247 | }
248 |
249 | // Skip test
250 | p = getparser("o").SkipProps([]string{"o1", "o2", "o4", "o5", "o6", "o7"})
251 |
252 | for _, json := range allResult(p) {
253 |
254 | if json.Err != nil {
255 | panic(json.Err)
256 | }
257 | js = *json
258 | resultCount++
259 | }
260 |
261 | if _, ok := js.ObjectVals["o1"]; ok {
262 | panic("Test failed")
263 | }
264 |
265 | if _, ok := js.ObjectVals["o2"]; ok {
266 | panic("Test failed")
267 | }
268 |
269 | if _, ok := js.ObjectVals["o4"]; ok {
270 | panic("Test failed")
271 | }
272 |
273 | if _, ok := js.ObjectVals["o5"]; ok {
274 | panic("Test failed")
275 | }
276 |
277 | if _, ok := js.ObjectVals["o6"]; ok {
278 | panic("Test failed")
279 | }
280 |
281 | if _, ok := js.ObjectVals["o7"]; ok {
282 | panic("Test failed")
283 | }
284 |
285 | if val, ok := js.ObjectVals["o3"]; !ok || !val.(bool) {
286 | panic("Test failed")
287 | }
288 |
289 | }
290 |
291 | func TestArray(t *testing.T) {
292 |
293 | p := getparser("a")
294 |
295 | var results []*JSON
296 |
297 | for _, json := range allResult(p) {
298 |
299 | if json.Err != nil {
300 | panic(json.Err)
301 | }
302 | results = append(results, json)
303 | }
304 |
305 | if len(results) != 7 {
306 | panic("result count must 7")
307 | }
308 |
309 | if results[0].ValueType != Object {
310 | panic("Value type must be object")
311 | }
312 | if results[1].ValueType != Object {
313 | panic("Value type must be object")
314 | }
315 |
316 | if results[2].ValueType != String {
317 | panic("Value type must be string")
318 | }
319 |
320 | if results[3].ValueType != Boolean {
321 | panic("Value type must be bool")
322 | }
323 |
324 | if results[4].ValueType != Number {
325 | panic("Value type must be bool")
326 | }
327 |
328 | if results[5].ValueType != Null {
329 | panic("Value type must be null")
330 | }
331 |
332 | if results[6].ValueType != Number {
333 | panic("Value type must be bool")
334 | }
335 |
336 | // Skip test
337 | p = getparser("a").SkipProps([]string{"a11", "a12", "a13"})
338 |
339 | for _, json := range allResult(p) {
340 |
341 | if json.Err != nil {
342 | panic(json.Err)
343 | }
344 |
345 | if json.ValueType == Object {
346 |
347 | if _, ok := json.ObjectVals["a11"]; ok {
348 | panic("Test failed")
349 | }
350 |
351 | if _, ok := json.ObjectVals["a12"]; ok {
352 | panic("Test failed")
353 | }
354 |
355 | if _, ok := json.ObjectVals["a13"]; ok {
356 | panic("Test failed")
357 | }
358 |
359 | }
360 |
361 | }
362 |
363 | }
364 |
365 | func TestArrayOnly(t *testing.T) {
366 |
367 | jsonArrays := [2]string{}
368 | jsonArrays[0] = `
369 | {"list":[
370 | {"Name": "Ed", "Text": "Knock knock."},
371 | {"Name": "Sam", "Text": "Who's there?"},
372 | {"Name": "Ed", "Text": "Go fmt."},
373 | {"Name": "Sam", "Text": "Go fmt ?"},
374 | {"Name": "Ed", "Text": "Go fmt !"}
375 | ]}
376 | `
377 | jsonArrays[1] = "[" + jsonArrays[0] + "]"
378 |
379 | for _, jsarray := range jsonArrays {
380 | br := bufio.NewReader(bytes.NewReader([]byte(jsarray)))
381 | p := NewJSONParser(br, "list")
382 | var results []*JSON
383 | for _, json := range allResult(p) {
384 |
385 | if json.Err != nil {
386 | t.Fatal(" Test failed")
387 | }
388 | results = append(results, json)
389 | }
390 | if results[0].ObjectVals["Text"].(string) != "Knock knock." {
391 | t.Fatal("results[0] Test failed ")
392 | }
393 |
394 | if results[1].ObjectVals["Name"].(string) != "Sam" {
395 | t.Fatal("results[0] Test failed ")
396 | }
397 |
398 | if results[4].ObjectVals["Name"].(string) != "Ed" {
399 | t.Fatal("results[0] Test failed ")
400 | }
401 | }
402 | }
403 |
404 | func TestRootArray(t *testing.T) {
405 |
406 | jsarray := `
407 | [
408 | {"Name": "Ed", "Text": "Knock knock."},
409 | {"Name": "Sam", "Text": "Who's there?"},
410 | {"Name": "Ed", "Text": "Go fmt."},
411 | {"Name": "Sam", "Text": "Go fmt ?"},
412 | {"Name": "Ed", "Text": "Go fmt !"},
413 | "Hello World",
414 | 666,
415 | null,
416 | true
417 | ]`
418 |
419 | br := bufio.NewReader(bytes.NewReader([]byte(jsarray)))
420 | p := NewJSONParser(br, "")
421 | var results []*JSON
422 | for _, json := range allResult(p) {
423 |
424 | if json.Err != nil {
425 | t.Fatal(" Test failed")
426 | }
427 | results = append(results, json)
428 | }
429 | if results[0].ObjectVals["Text"].(string) != "Knock knock." {
430 | t.Fatal("results[0] Test failed ")
431 | }
432 |
433 | if results[1].ObjectVals["Name"].(string) != "Sam" {
434 | t.Fatal("results[0] Test failed ")
435 | }
436 |
437 | if results[4].ObjectVals["Name"].(string) != "Ed" {
438 | t.Fatal("results[0] Test failed ")
439 | }
440 |
441 | if results[5].StringVal != "Hello World" {
442 | t.Fatal("results[0] Test failed ")
443 | }
444 |
445 | if results[6].StringVal != "666" {
446 | t.Fatal("results[0] Test failed ")
447 | }
448 |
449 | if results[7].StringVal != "" {
450 | t.Fatal("results[0] Test failed ")
451 | }
452 |
453 | if !results[8].BoolVal {
454 | t.Fatal("results[0] Test failed ")
455 | }
456 |
457 | }
458 |
459 | func TestInvalid(t *testing.T) {
460 |
461 | invalidStart := `{{"Name": "Ed", "Text": "Go fmt."},"s":"valid","s2":in"valid"}`
462 |
463 | br := bufio.NewReader(bytes.NewReader([]byte(invalidStart)))
464 | p := NewJSONParser(br, "s2")
465 |
466 | for _, json := range allResult(p) {
467 |
468 | if json.Err == nil {
469 | t.Fatal("Invalid error expected")
470 | }
471 |
472 | }
473 |
474 | invalidStart2 := `{{"Name": "Ed", "Text": "Go fmt."},"s":in"valid","s2":"valid"}` // invalid in non loop property
475 |
476 | br = bufio.NewReader(bytes.NewReader([]byte(invalidStart2)))
477 | p = NewJSONParser(br, "s2")
478 |
479 | for _, json := range allResult(p) {
480 |
481 | if json.Err == nil {
482 | t.Fatal("Invalid error expected")
483 | }
484 |
485 | }
486 |
487 | invalidEnd := `{"list":[{"Name": "Ed" , "Text": "Go fmt."} , {"Name": "Sam" , "Text": "Go fm"t who?"}]}`
488 |
489 | br = bufio.NewReader(bytes.NewReader([]byte(invalidEnd)))
490 | p = NewJSONParser(br, "list")
491 | index := 0
492 | for _, json := range allResult(p) {
493 |
494 | if index == 1 && json.Err == nil {
495 | t.Fatal("Invalid error expected")
496 | }
497 | index++
498 | }
499 |
500 | }
501 |
502 | func Benchmark1(b *testing.B) {
503 |
504 | for n := 0; n < b.N; n++ {
505 | p := getparser("a").SkipProps([]string{"a11"})
506 | for json := range p.Stream() {
507 | nothing(json)
508 | }
509 | }
510 | }
511 |
512 | func Benchmark2(b *testing.B) {
513 |
514 | for n := 0; n < b.N; n++ {
515 | p := getparser("a").SkipProps([]string{"a11"})
516 | for _, json := range p.Parse() {
517 | nothing(json)
518 | }
519 | }
520 | }
521 |
522 | func nothing(j *JSON) {
523 |
524 | }
525 |
--------------------------------------------------------------------------------
/jsparser.go:
--------------------------------------------------------------------------------
1 | package jsparser
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "fmt"
7 | "unicode/utf16"
8 | )
9 |
10 | type JsonParser struct {
11 | reader *bufio.Reader
12 | loopProp []byte
13 | resChan chan *JSON
14 | isResArr bool
15 | skipProps map[string]bool
16 | TotalReadSize uint64
17 | lastReadSize int
18 | scratch *scratch
19 | }
20 |
21 | // JSON parsed result
22 | type JSON struct {
23 | StringVal string
24 | BoolVal bool
25 | ArrayVals []interface{}
26 | ObjectVals map[string]interface{}
27 | ValueType ValueType
28 | Err error
29 | }
30 |
31 | // ValueType of JSON value
32 | type ValueType int8
33 |
34 | // JSON types
35 | const (
36 | Invalid ValueType = iota
37 | Null
38 | String
39 | Number
40 | Boolean
41 | Array
42 | Object
43 | )
44 |
45 | func NewJSONParser(reader *bufio.Reader, loopProp string) *JsonParser {
46 |
47 | j := &JsonParser{
48 | reader: reader,
49 | loopProp: []byte(loopProp),
50 | resChan: make(chan *JSON, 256),
51 | skipProps: map[string]bool{},
52 | scratch: &scratch{data: make([]byte, 2048), dataRes: make([]*JSON, 2048)},
53 | }
54 | return j
55 | }
56 |
57 | func (j *JsonParser) SkipProps(skipProps []string) *JsonParser {
58 |
59 | if len(skipProps) > 0 {
60 | for _, s := range skipProps {
61 | j.skipProps[s] = true
62 | }
63 | }
64 | return j
65 |
66 | }
67 |
68 | func (j *JsonParser) Stream() chan *JSON {
69 |
70 | go j.parse()
71 |
72 | return j.resChan
73 |
74 | }
75 |
76 | func (j *JsonParser) Parse() []*JSON {
77 |
78 | j.isResArr = true
79 | j.parse()
80 | return j.scratch.allRes()
81 |
82 | }
83 |
84 | func (j *JsonParser) parse() {
85 |
86 | defer close(j.resChan)
87 |
88 | var b byte
89 | var err error
90 |
91 |
92 | if len(j.loopProp)==0{ // expecting top level json is an Array
93 | for {
94 | b, err = j.readByte()
95 |
96 | if err != nil {
97 | return
98 | }
99 |
100 | if j.isWS(b) {
101 | continue
102 | }
103 |
104 | if b == '[' {
105 |
106 | j.loopArray()
107 | return
108 |
109 | }
110 |
111 | j.sendErrorStr("Check your json. When loop property is empty top level json must be an Array")
112 | return
113 |
114 | }
115 | }else{
116 |
117 | for {
118 | b, err = j.readByte()
119 |
120 | if err != nil {
121 | return
122 | }
123 |
124 | if j.isWS(b) {
125 | continue
126 | }
127 |
128 | if b == '"' { // begining of possible json property
129 |
130 | isprop, err := j.getPropName()
131 |
132 | if err != nil {
133 | j.sendError()
134 | return
135 | }
136 |
137 | if isprop {
138 |
139 | b, err = j.skipWS()
140 | if err != nil {
141 | j.sendError()
142 | return
143 | }
144 |
145 | valType, typeErr := j.getValueType(b)
146 |
147 | if typeErr != nil {
148 | j.sendError()
149 | return
150 | }
151 |
152 | if bytes.Equal(j.loopProp, j.scratch.bytes()) {
153 |
154 | switch valType {
155 | case String:
156 |
157 | err = j.string()
158 |
159 | if err != nil {
160 | j.sendError()
161 | return
162 | }
163 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: String})
164 |
165 | case Array:
166 |
167 | success := j.loopArray()
168 | if !success {
169 | return
170 | }
171 |
172 | case Object:
173 |
174 | res := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object}
175 | j.getObjectTree(res)
176 | j.sendRes(res)
177 | if res.Err != nil {
178 | return
179 | }
180 |
181 | case Boolean:
182 |
183 | b, err := j.boolean()
184 | if err != nil {
185 | j.sendError()
186 | return
187 | }
188 | j.sendRes(&JSON{BoolVal: b, ValueType: Boolean})
189 |
190 | case Number:
191 |
192 | err = j.number(b)
193 |
194 | if err != nil {
195 | j.sendError()
196 | return
197 | }
198 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: Number})
199 |
200 | case Null:
201 |
202 | err := j.null()
203 |
204 | if err != nil {
205 | j.sendError()
206 | return
207 | }
208 | j.sendRes(&JSON{ValueType: Null})
209 |
210 | }
211 |
212 | } else {
213 |
214 | if valType == String { // if valtype is string just skip it otherwise continue looking loopProp.
215 | err = j.skipString()
216 | if err != nil {
217 | j.sendError()
218 | return
219 | }
220 | }
221 |
222 | }
223 | }
224 | }
225 | }
226 |
227 | }
228 |
229 |
230 |
231 | }
232 |
233 | func (j *JsonParser) sendRes(res *JSON) {
234 | if j.isResArr {
235 | j.scratch.addRes(res)
236 | } else {
237 | j.resChan <- res
238 | }
239 | }
240 |
241 | func (j *JsonParser) loopArray() bool {
242 |
243 | var b byte
244 | var err error
245 |
246 | for {
247 |
248 | b, err = j.skipWS()
249 |
250 | if err != nil {
251 | j.sendError()
252 | return false
253 | }
254 |
255 | if b == ']' {
256 | return true
257 | }
258 |
259 | if b == ',' {
260 | continue
261 | }
262 |
263 | valType, err := j.getValueType(b)
264 |
265 | if err != nil {
266 | j.sendError()
267 | return false
268 | }
269 |
270 | switch valType {
271 | case String:
272 |
273 | err = j.string()
274 |
275 | if err != nil {
276 | j.sendRes(&JSON{Err: err, ValueType: Invalid})
277 | return false
278 | }
279 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: String})
280 | case Array:
281 |
282 | res := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Array}
283 | j.getArrayTree(res)
284 | j.sendRes(res)
285 |
286 | case Object:
287 |
288 | res := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object}
289 | j.getObjectTree(res)
290 | j.sendRes(res)
291 |
292 | case Boolean:
293 |
294 | b, err := j.boolean()
295 | if err != nil {
296 | j.sendError()
297 | return false
298 | }
299 | j.sendRes(&JSON{BoolVal: b, ValueType: Boolean})
300 |
301 | case Number:
302 |
303 | err = j.number(b)
304 | if err != nil {
305 | return false
306 | }
307 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: Number})
308 |
309 | case Null:
310 |
311 | err := j.null()
312 |
313 | if err != nil {
314 | return false
315 | }
316 | j.sendRes(&JSON{ValueType: Null})
317 |
318 | }
319 |
320 | }
321 |
322 | }
323 |
324 | func (j *JsonParser) getObjectTree(res *JSON) {
325 |
326 | if res.Err != nil {
327 | return
328 | }
329 |
330 | var b byte
331 | var err error
332 | for {
333 |
334 | b, err = j.readByte()
335 |
336 | if err != nil {
337 | res.Err = err
338 | return
339 | }
340 |
341 | if j.isWS(b) {
342 | continue
343 | }
344 |
345 | if b == '"' { // begining of json property
346 |
347 | _, err := j.getPropName() // first variable ommited because inside object there can't be string item
348 | prop := j.scratch.string()
349 |
350 | if err != nil {
351 | res.Err = err
352 | return
353 | }
354 |
355 | b, err = j.skipWS()
356 | if err != nil {
357 | res.Err = j.defaultError()
358 | return
359 | }
360 |
361 | valType, err := j.getValueType(b)
362 |
363 | if err != nil {
364 | res.Err = err
365 | return
366 | }
367 |
368 | switch valType {
369 | case String:
370 |
371 | if ok := j.skipProps[prop]; ok {
372 | err = j.skipString()
373 |
374 | if err != nil {
375 | res.Err = err
376 | return
377 | }
378 | break
379 | }
380 |
381 | err = j.string()
382 |
383 | if err != nil {
384 | res.Err = err
385 | return
386 | }
387 |
388 | res.ObjectVals[prop] = j.scratch.string()
389 |
390 | case Array:
391 |
392 | if ok := j.skipProps[prop]; ok {
393 | err = j.skipArrayOrObject('[', ']')
394 |
395 | if err != nil {
396 | res.Err = err
397 | return
398 | }
399 | break
400 | }
401 | r := &JSON{ValueType: Array}
402 | j.getArrayTree(r)
403 | if r.Err != nil {
404 | res.Err = r.Err
405 | return
406 | }
407 | res.ObjectVals[prop] = r
408 |
409 | case Object:
410 |
411 | if ok := j.skipProps[prop]; ok {
412 | err = j.skipArrayOrObject('{', '}')
413 |
414 | if err != nil {
415 | res.Err = err
416 | return
417 | }
418 | break
419 | }
420 | r := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object}
421 | j.getObjectTree(r)
422 |
423 | if r.Err != nil {
424 | res.Err = r.Err
425 | return
426 | }
427 | res.ObjectVals[prop] = r
428 |
429 | case Boolean:
430 |
431 | b, err := j.boolean()
432 |
433 | if err != nil {
434 | res.Err = err
435 | return
436 | }
437 |
438 | // rest of the skip since they are small we just don't include in the result
439 | if ok := j.skipProps[prop]; !ok {
440 | res.ObjectVals[prop] = b
441 | }
442 |
443 | case Number:
444 |
445 | err = j.number(b)
446 |
447 | if err != nil {
448 | res.Err = err
449 | return
450 | }
451 |
452 | if ok := j.skipProps[prop]; !ok {
453 | res.ObjectVals[prop] = j.scratch.string()
454 | }
455 |
456 | case Null:
457 |
458 | err = j.null()
459 | if err != nil {
460 | res.Err = err
461 | return
462 | }
463 |
464 | if ok := j.skipProps[prop]; !ok {
465 | res.ObjectVals[prop] = ""
466 | }
467 |
468 | }
469 |
470 | } else if b == ',' {
471 |
472 | continue
473 |
474 | } else if b == '}' { // completion of current object
475 |
476 | return
477 |
478 | } else { // invalid end
479 |
480 | res.Err = j.defaultError()
481 | return
482 |
483 | }
484 |
485 | }
486 |
487 | }
488 |
489 | func (j *JsonParser) getArrayTree(res *JSON) {
490 |
491 | if res.Err != nil {
492 | return
493 | }
494 |
495 | var b byte
496 | var err error
497 |
498 | for {
499 |
500 | b, err = j.readByte()
501 |
502 | if err != nil {
503 | res.Err = err
504 | return
505 | }
506 |
507 | if j.isWS(b) {
508 | continue
509 | }
510 |
511 | if b == ',' {
512 | continue
513 | }
514 |
515 | if b == ']' { // means complete of current array
516 | return
517 | }
518 |
519 | valType, err := j.getValueType(b)
520 |
521 | if err != nil {
522 | res.Err = err
523 | return
524 | }
525 | switch valType {
526 | case String:
527 |
528 | err = j.string()
529 |
530 | if err != nil {
531 | res.Err = err
532 | return
533 | }
534 | res.ArrayVals = append(res.ArrayVals, j.scratch.string())
535 |
536 | case Array:
537 |
538 | r := &JSON{ValueType: Array}
539 | j.getArrayTree(r)
540 | if r.Err != nil {
541 | res.Err = r.Err
542 | return
543 | }
544 | res.ArrayVals = append(res.ArrayVals, r)
545 |
546 | case Object:
547 |
548 | r := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object}
549 | j.getObjectTree(r)
550 | if r.Err != nil {
551 | res.Err = r.Err
552 | return
553 | }
554 | res.ArrayVals = append(res.ArrayVals, r)
555 |
556 | case Boolean:
557 |
558 | b, err := j.boolean()
559 | if err != nil {
560 | res.Err = err
561 | return
562 | }
563 |
564 | res.ArrayVals = append(res.ArrayVals, b)
565 |
566 | case Number:
567 |
568 | err = j.number(b)
569 | if err != nil {
570 | res.Err = err
571 | return
572 | }
573 | res.ArrayVals = append(res.ArrayVals, j.scratch.string())
574 |
575 | case Null:
576 |
577 | err = j.null()
578 |
579 | if err != nil {
580 | res.Err = err
581 | return
582 | }
583 |
584 | res.ArrayVals = append(res.ArrayVals, "")
585 |
586 | }
587 |
588 | }
589 |
590 | }
591 |
592 | func (j *JsonParser) number(first byte) error {
593 |
594 | var c byte
595 | var err error
596 | j.scratch.reset()
597 | j.scratch.add(first)
598 |
599 | for {
600 |
601 | c, err = j.readByte()
602 |
603 | if err != nil {
604 | return j.defaultError()
605 | }
606 |
607 | if j.isWS(c) {
608 |
609 | c, err = j.skipWS()
610 |
611 | if err != nil {
612 | return j.defaultError()
613 | }
614 |
615 | if !(c == ',' || c == '}' || c == ']') {
616 | return j.defaultError()
617 | }
618 | err := j.unreadByte()
619 | if err != nil {
620 | return j.defaultError()
621 | }
622 |
623 | return nil
624 | }
625 |
626 | if c == ',' || c == '}' || c == ']' {
627 |
628 | err := j.unreadByte()
629 | if err != nil {
630 | return j.defaultError()
631 | }
632 |
633 | return nil
634 | }
635 |
636 | j.scratch.add(c)
637 |
638 | }
639 |
640 | }
641 |
642 | func (j *JsonParser) boolean() (bool, error) {
643 |
644 | var c byte
645 | var err error
646 |
647 | c, err = j.readByte()
648 |
649 | if err != nil {
650 | return false, j.defaultError()
651 | }
652 |
653 | // true
654 | if c == 'r' {
655 | c, err = j.readByte()
656 |
657 | if err != nil {
658 | return false, j.defaultError()
659 | }
660 | if c == 'u' {
661 | c, err = j.readByte()
662 |
663 | if err != nil {
664 | return false, j.defaultError()
665 | }
666 | if c == 'e' {
667 | // check last
668 | c, err = j.skipWS()
669 | if err != nil {
670 | return false, j.defaultError()
671 | }
672 | if !(c == ',' || c == '}' || c == ']') {
673 | return false, j.defaultError()
674 | }
675 | err := j.unreadByte()
676 | if err != nil {
677 | return false, j.defaultError()
678 | }
679 |
680 | return true, nil
681 | }
682 | }
683 | }
684 |
685 | // false
686 | if c == 'a' {
687 | c, err = j.readByte()
688 |
689 | if err != nil {
690 | return false, j.defaultError()
691 | }
692 | if c == 'l' {
693 | c, err = j.readByte()
694 |
695 | if err != nil {
696 | return false, j.defaultError()
697 | }
698 | if c == 's' {
699 | c, err = j.readByte()
700 |
701 | if err != nil {
702 | return false, j.defaultError()
703 | }
704 | if c == 'e' {
705 | // check last
706 | c, err = j.skipWS()
707 | if err != nil {
708 | return false, j.defaultError()
709 | }
710 | if !(c == ',' || c == '}' || c == ']') {
711 | return false, j.defaultError()
712 | }
713 | err := j.unreadByte()
714 | if err != nil {
715 | return false, j.defaultError()
716 | }
717 |
718 | return false, nil
719 | }
720 | }
721 | }
722 | }
723 |
724 | return false, j.defaultError()
725 |
726 | }
727 |
728 | func (j *JsonParser) null() error {
729 |
730 | var c byte
731 | var err error
732 |
733 | c, err = j.readByte()
734 |
735 | if err != nil {
736 | return j.defaultError()
737 | }
738 |
739 | // true
740 | if c == 'u' {
741 | c, err = j.readByte()
742 |
743 | if err != nil {
744 | return j.defaultError()
745 | }
746 |
747 | if c == 'l' {
748 | c, err = j.readByte()
749 |
750 | if err != nil {
751 | return j.defaultError()
752 | }
753 | if c == 'l' {
754 | // check last
755 | c, err = j.skipWS()
756 | if err != nil {
757 | return j.defaultError()
758 | }
759 |
760 | if !(c == ',' || c == '}' || c == ']') {
761 | return j.defaultError()
762 | }
763 |
764 | err := j.unreadByte()
765 | if err != nil {
766 | return j.defaultError()
767 | }
768 |
769 | return nil
770 | }
771 | }
772 | }
773 |
774 | return j.defaultError()
775 | }
776 |
777 | func (j *JsonParser) skipString() error {
778 |
779 | var c byte
780 | var prev byte
781 | var prevPrev byte
782 | var err error
783 | for {
784 |
785 | c, err = j.readByte()
786 |
787 | if err != nil {
788 | return j.defaultError()
789 | }
790 |
791 | if c == '"' {
792 |
793 | if !(prev == '\\' && prevPrev != '\\') { // escape check
794 | return nil
795 | }
796 |
797 | }
798 |
799 | prevPrev = prev
800 | prev = c
801 |
802 | }
803 |
804 | }
805 |
806 | func (j *JsonParser) skipArrayOrObject(start byte, end byte) error {
807 |
808 | var c byte
809 | var err error
810 | var depth = 1
811 | for {
812 |
813 | c, err = j.readByte()
814 |
815 | if err != nil {
816 | return j.defaultError()
817 | }
818 |
819 | switch c {
820 | case '"':
821 | err = j.skipString() // this is needed because string can contain [ or ]
822 | if err != nil {
823 | return err
824 | }
825 | case start:
826 | depth++
827 | case end:
828 | depth--
829 | if depth == 0 {
830 | return nil
831 | }
832 |
833 | }
834 |
835 | }
836 |
837 | }
838 |
839 | func (j *JsonParser) getValueType(c byte) (ValueType, error) {
840 |
841 | switch c {
842 | case '"':
843 | return String, nil
844 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
845 | return Number, nil
846 | case 'f':
847 | return Boolean, nil
848 | case 't':
849 | return Boolean, nil
850 | case 'n':
851 | return Null, nil
852 | case '[':
853 | return Array, nil
854 | case '{':
855 | return Object, nil
856 | }
857 |
858 | return Invalid, j.defaultError()
859 |
860 | }
861 |
862 | // first return type is checking if it is property or just an array item
863 | func (j *JsonParser) getPropName() (bool, error) {
864 |
865 | err := j.string()
866 |
867 | if err != nil {
868 | return false, err
869 | }
870 |
871 | b, err := j.skipWS()
872 |
873 | if err != nil {
874 | return false, err
875 | }
876 |
877 | if b == ':' { // end of property name
878 | return true, nil
879 | }
880 |
881 | err = j.unreadByte()
882 |
883 | return false, err
884 |
885 | }
886 |
887 | func (j *JsonParser) isWS(in byte) bool {
888 |
889 | if in == ' ' || in == '\n' || in == '\t' || in == '\r' {
890 | return true
891 | }
892 |
893 | return false
894 |
895 | }
896 |
897 | // skips WS and read first non WS
898 | func (j *JsonParser) skipWS() (byte, error) {
899 |
900 | var b byte
901 | var err error
902 | for {
903 | b, err = j.readByte()
904 | if err != nil {
905 | return 0, err
906 | }
907 | if b == ' ' || b == '\n' || b == '\t' || b == '\r' {
908 | continue
909 | } else {
910 | return b, nil
911 | }
912 | }
913 |
914 | }
915 |
916 | func (j *JsonParser) readByte() (byte, error) {
917 |
918 | by, err := j.reader.ReadByte()
919 |
920 | j.TotalReadSize = j.TotalReadSize + 1
921 |
922 | j.lastReadSize = 1
923 |
924 | if err != nil {
925 | return 0, err
926 | }
927 | return by, nil
928 |
929 | }
930 |
931 | func (j *JsonParser) unreadByte() error {
932 |
933 | err := j.reader.UnreadByte()
934 | if err != nil {
935 | return err
936 | }
937 | j.TotalReadSize = j.TotalReadSize - 1
938 | return nil
939 |
940 | }
941 |
942 | func (j *JsonParser) sendError() {
943 | err := fmt.Errorf("Invalid json")
944 | if j.isResArr {
945 | j.scratch.addRes(&JSON{Err: err, ValueType: Invalid})
946 | } else {
947 | j.resChan <- &JSON{Err: err, ValueType: Invalid}
948 | }
949 | }
950 |
951 | func (j *JsonParser) sendErrorStr(s string) {
952 | err := fmt.Errorf(s)
953 | if j.isResArr {
954 | j.scratch.addRes(&JSON{Err: err, ValueType: Invalid})
955 | } else {
956 | j.resChan <- &JSON{Err: err, ValueType: Invalid}
957 | }
958 | }
959 |
960 | func (j *JsonParser) resultError() *JSON {
961 |
962 | return &JSON{Err: j.defaultError(), ValueType: Invalid}
963 |
964 | }
965 |
966 | func (j *JsonParser) defaultError() error {
967 | err := fmt.Errorf("Invalid json")
968 | return err
969 | }
970 |
971 | // based on https://github.com/bcicen/jstream
972 | func (j *JsonParser) string() error {
973 |
974 | j.scratch.reset()
975 |
976 | var err error
977 | var c byte
978 |
979 | c, err = j.readByte()
980 | if err != nil {
981 | if err != nil {
982 | return j.defaultError()
983 | }
984 | }
985 |
986 | scan:
987 | for {
988 | switch {
989 | case c == '"':
990 | return nil
991 | case c == '\\':
992 | c, err = j.readByte()
993 | if err != nil {
994 | if err != nil {
995 | return j.defaultError()
996 | }
997 | }
998 | goto scan_esc
999 | case c < 0x20:
1000 | return j.defaultError()
1001 | // Coerce to well-formed UTF-8.
1002 |
1003 | }
1004 | j.scratch.add(c)
1005 | c, err = j.readByte()
1006 | if err != nil {
1007 | if err != nil {
1008 | return j.defaultError()
1009 | }
1010 | }
1011 | }
1012 |
1013 | scan_esc:
1014 | switch c {
1015 | case '"', '\\', '/', '\'':
1016 | j.scratch.add(c)
1017 | case 'u':
1018 | goto scan_u
1019 | case 'b':
1020 | j.scratch.add('\b')
1021 | case 'f':
1022 | j.scratch.add('\f')
1023 | case 'n':
1024 | j.scratch.add('\n')
1025 | case 'r':
1026 | j.scratch.add('\r')
1027 | case 't':
1028 | j.scratch.add('\t')
1029 | default:
1030 | //err := fmt.Errorf("error in string escape code")
1031 | return j.defaultError()
1032 | }
1033 |
1034 | c, err = j.readByte()
1035 | if err != nil {
1036 | if err != nil {
1037 | return j.defaultError()
1038 | }
1039 | }
1040 |
1041 | goto scan
1042 |
1043 | scan_u:
1044 | r := j.u4()
1045 | if r < 0 {
1046 | //err := fmt.Errorf("in unicode escape sequence")
1047 | return j.defaultError()
1048 | }
1049 |
1050 | // check for proceeding surrogate pair
1051 | c, err = j.readByte()
1052 | if err != nil {
1053 | if err != nil {
1054 | return j.defaultError()
1055 | }
1056 | }
1057 |
1058 | if !utf16.IsSurrogate(r) || c != '\\' {
1059 | j.scratch.addRune(r)
1060 | goto scan
1061 | }
1062 |
1063 | c, err = j.readByte()
1064 | if err != nil {
1065 | if err != nil {
1066 | return j.defaultError()
1067 | }
1068 | }
1069 |
1070 | if c != 'u' {
1071 | j.scratch.addRune(r)
1072 | goto scan_esc
1073 | }
1074 |
1075 | r2 := j.u4()
1076 | if r2 < 0 {
1077 | return j.defaultError()
1078 | }
1079 |
1080 | // write surrogate pair
1081 | j.scratch.addRune(utf16.DecodeRune(r, r2))
1082 |
1083 | c, err = j.readByte()
1084 | if err != nil {
1085 | if err != nil {
1086 | return j.defaultError()
1087 | }
1088 | }
1089 |
1090 | goto scan
1091 | }
1092 |
1093 | // u4 reads four bytes following a \u escape
1094 | func (j *JsonParser) u4() rune {
1095 | // logic taken from:
1096 | // github.com/buger/jsonparser/blob/master/escape.go#L20
1097 |
1098 | var c byte
1099 | var err error
1100 | var h [4]int
1101 | for i := 0; i < 4; i++ {
1102 |
1103 | c, err = j.readByte()
1104 | if err != nil {
1105 | if err != nil {
1106 | return -1
1107 | }
1108 | }
1109 | switch {
1110 | case c >= '0' && c <= '9':
1111 | h[i] = int(c - '0')
1112 | case c >= 'A' && c <= 'F':
1113 | h[i] = int(c - 'A' + 10)
1114 | case c >= 'a' && c <= 'f':
1115 | h[i] = int(c - 'a' + 10)
1116 | default:
1117 | return -1
1118 | }
1119 | }
1120 | return rune(h[0]<<12 + h[1]<<8 + h[2]<<4 + h[3])
1121 | }
1122 |
--------------------------------------------------------------------------------