├── test.sh ├── sample.json ├── LICENSE ├── scratch.go ├── README.md ├── jsparser_test.go └── jsparser.go /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | go test jsparser.go scratch.go jsparser_test.go -v 4 | 5 | go test jsparser.go scratch.go jsparser_test.go -v --minify 6 | 7 | go test jsparser.go scratch.go jsparser_test.go -v --parseall -------------------------------------------------------------------------------- /sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "nu": null, 3 | "b": true, 4 | "b1": false, 5 | "n": 2323, 6 | "n1": 23.23, 7 | "n2": 23.23e-6, 8 | "s": "sstring", 9 | "s1": "s1tring", 10 | "s2": "s2tr\\ing\"\u849c", 11 | "o": { 12 | "o1": "o1string" , 13 | "o2": "o2string" , 14 | "o3": true , 15 | "o4": ["o4string", { 16 | "o41": "o41string" 17 | }, 18 | ["o4nestedarray item 1" , "o4nestedarray item 1 item 2" , true, 99 , null, 90.98] 19 | ], 20 | "o5": 98.21, 21 | "o6": null, 22 | "o7": { 23 | "o71": "o71string", 24 | "o72": ["o72string", null, false, 98, {}], 25 | "o73": true, 26 | "o74": 98 27 | } 28 | }, 29 | "a": [{ 30 | "a11": "o71string\\", 31 | "a12": ["o72string", null, false, 98, {}], 32 | "a13": true, 33 | "a14": 98 34 | }, 35 | { 36 | "a11": "o71string", 37 | "a12": ["o72string", null, false, 98, {}], 38 | "a13": true, 39 | "a14": 98 40 | }, 41 | "astringinside", false, 99, null, 433.33e-6 42 | ] 43 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Tamer Gür 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scratch.go: -------------------------------------------------------------------------------- 1 | package jsparser 2 | 3 | import "unicode/utf8" 4 | 5 | // based on https://github.com/bcicen/jstream 6 | 7 | type scratch struct { 8 | data []byte 9 | dataRes []*JSON 10 | fill int 11 | fillRes int 12 | } 13 | 14 | // reset scratch buffer 15 | func (s *scratch) reset() { s.fill = 0 } 16 | 17 | // bytes returns the written contents of scratch buffer 18 | func (s *scratch) bytes() []byte { return s.data[0:s.fill] } 19 | 20 | // string returns the written contents of scratch buffer 21 | func (s *scratch) string() string { return string(s.data[0:s.fill]) } 22 | 23 | // grow scratch buffer 24 | func (s *scratch) grow() { 25 | ndata := make([]byte, cap(s.data)*2) 26 | copy(ndata, s.data[:]) 27 | s.data = ndata 28 | } 29 | 30 | // append single byte to scratch buffer 31 | func (s *scratch) add(c byte) { 32 | if s.fill+1 >= cap(s.data) { 33 | s.grow() 34 | } 35 | 36 | s.data[s.fill] = c 37 | s.fill++ 38 | } 39 | 40 | // append encoded rune to scratch buffer 41 | func (s *scratch) addRune(r rune) int { 42 | if s.fill+utf8.UTFMax >= cap(s.data) { 43 | s.grow() 44 | } 45 | 46 | n := utf8.EncodeRune(s.data[s.fill:], r) 47 | s.fill += n 48 | return n 49 | } 50 | 51 | // grow result buffer 52 | func (s *scratch) growRes() { 53 | ndata := make([]*JSON, cap(s.dataRes)*2) 54 | copy(ndata, s.dataRes[:]) 55 | s.dataRes = ndata 56 | } 57 | 58 | // add result 59 | func (s *scratch) addRes(res *JSON) { 60 | if s.fillRes+1 >= cap(s.dataRes) { 61 | s.growRes() 62 | } 63 | 64 | s.dataRes[s.fillRes] = res 65 | s.fillRes++ 66 | } 67 | 68 | func (s *scratch) allRes() []*JSON { 69 | return s.dataRes[0:s.fillRes] 70 | } 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## jsparser 2 | 3 | jsparser is a json parser for GO. It is efficient to parse large json data with streaming fashion. 4 | 5 | ### Usage 6 | ```json 7 | { 8 | "books": [{ 9 | "title": "The Iliad and The Odyssey", 10 | "price": 12.95, 11 | "comments": [{ 12 | "rating": 4, 13 | "comment": "Best translation I've read." 14 | }, { 15 | "rating": 2, 16 | "comment": "I like other versions better." 17 | }] 18 | }, 19 | { 20 | "title": "Anthology of World Literature", 21 | "price": 24.95, 22 | "comments": [{ 23 | "rating": 4, 24 | "comment": "Excellent overview of world literature." 25 | }, { 26 | "rating": 3, 27 | "comment": "Needs more modern literature." 28 | }] 29 | } 30 | ] 31 | } 32 | ``` 33 | Stream over books 34 | 35 | ```go 36 | f, _ := os.Open("input.json") 37 | br := bufio.NewReaderSize(f,65536) 38 | parser := jsparser.NewJSONParser(br, "books") 39 | 40 | for json:= range parser.Stream() { 41 | fmt.Println(json.ObjectVals["title"]) 42 | fmt.Println(json.ObjectVals["price"]) 43 | fmt.Println(json.ObjectVals["comments"].(*jsparser.JSON).ArrayVals[0].(*jsparser.JSON).ObjectVals["rating"]) 44 | } 45 | 46 | // for relatively small size json. get all the results as slice 47 | for json:= range parser.Parse() { 48 | } 49 | 50 | ``` 51 | 52 | Skip props for efficiency 53 | 54 | ```go 55 | parser := pr.NewJSONParser(br, "books").SkipProps([]string{"comments", "price"}) 56 | ``` 57 | 58 | Error handling 59 | 60 | ```go 61 | for json:= range parser.Stream() { 62 | if json.Err !=nil { 63 | // handle error 64 | } 65 | } 66 | ``` 67 | 68 | Progress of parsing 69 | ```go 70 | // total byte read to calculate the progress of parsing 71 | parser.TotalReadSize 72 | ``` 73 | 74 | 75 | If you are interested check also [xml parser](https://github.com/tamerh/xml-stream-parser) which works similarly. 76 | -------------------------------------------------------------------------------- /jsparser_test.go: -------------------------------------------------------------------------------- 1 | package jsparser 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "flag" 7 | "os" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | var minify bool 13 | var parseall bool 14 | 15 | func TestMain(m *testing.M) { 16 | // call flag.Parse() here if TestMain uses flags 17 | 18 | flag.BoolVar(&minify, "minify", false, "Minify") 19 | 20 | flag.BoolVar(&parseall, "parseall", false, "ParseAll") 21 | 22 | flag.Parse() 23 | 24 | os.Exit(m.Run()) 25 | } 26 | 27 | func getparser(prop string) *JsonParser { 28 | 29 | if minify { 30 | // todo add some space after some values 31 | const minijson string = `{"nu":null,"b":true,"b1":false,"n":2323,"n1":23.23,"n2":23.23e-6 ,"s":"sstring","s1":"s1tring","s2":"s2tr\\ing\"蒜","o":{"o1":"o1string","o2":"o2string","o3":true,"o4":["o4string",{"o41":"o41string"},["o4nestedarray item 1","o4nestedarray item 1 item 2",true,99,null,90.98]],"o5":98.21,"o6":null,"o7":{"o71":"o71string","o72":["o72string",null,false,98,{}],"o73":true,"o74":98}},"a":[{"a11":"o71string\\","a12":["o72string",null,false,98,{}],"a13":true,"a14":98},{"a11":"o71string","a12":["o72string",null,false,98,{}],"a13":true,"a14":98},"astringinside",false,99,null,0.00043333]}` 32 | 33 | br := bufio.NewReader(strings.NewReader(minijson)) 34 | 35 | p := NewJSONParser(br, prop) 36 | 37 | return p 38 | } 39 | 40 | file, _ := os.Open("sample.json") 41 | 42 | br := bufio.NewReader(file) 43 | 44 | p := NewJSONParser(br, prop) 45 | 46 | return p 47 | 48 | } 49 | 50 | func allResult(p *JsonParser) []*JSON { 51 | 52 | if parseall { 53 | return p.Parse() 54 | 55 | } 56 | var res []*JSON 57 | for json := range p.Stream() { 58 | res = append(res, json) 59 | } 60 | return res 61 | 62 | } 63 | func TestString(t *testing.T) { 64 | 65 | var js JSON 66 | 67 | p := getparser("s") 68 | resultCount := 0 69 | 70 | for _, json := range allResult(p) { 71 | 72 | if json.Err != nil { 73 | panic(json.Err) 74 | } 75 | js = *json 76 | resultCount++ 77 | 78 | } 79 | 80 | if resultCount != 1 { 81 | panic("result count must 1") 82 | } 83 | 84 | if js.StringVal != "sstring" { 85 | panic("invalid result string") 86 | } 87 | 88 | if js.ValueType != String { 89 | panic("Value type must be string") 90 | } 91 | 92 | p = getparser("s2") 93 | 94 | for _, json := range allResult(p) { 95 | 96 | if json.Err != nil { 97 | panic(json.Err) 98 | } 99 | js = *json 100 | 101 | } 102 | 103 | if js.StringVal != "s2tr\\ing\"蒜" { 104 | panic("invalid result string") 105 | } 106 | 107 | // Skip 108 | 109 | } 110 | 111 | func TestBoolean(t *testing.T) { 112 | 113 | p := getparser("b") 114 | 115 | resultCount := 0 116 | var js JSON 117 | 118 | for _, json := range allResult(p) { 119 | 120 | if json.Err != nil { 121 | panic(json.Err) 122 | } 123 | js = *json 124 | resultCount++ 125 | 126 | } 127 | 128 | if resultCount != 1 { 129 | panic("result count must 1") 130 | } 131 | 132 | if !js.BoolVal { 133 | panic("invalid result boolean") 134 | } 135 | 136 | if js.ValueType != Boolean { 137 | panic("Value type must be boolean") 138 | } 139 | 140 | } 141 | 142 | func TestNumber(t *testing.T) { 143 | 144 | p := getparser("n2") 145 | 146 | resultCount := 0 147 | var js JSON 148 | 149 | for _, json := range allResult(p) { 150 | 151 | if json.Err != nil { 152 | panic(json.Err) 153 | } 154 | js = *json 155 | resultCount++ 156 | 157 | } 158 | 159 | if resultCount != 1 { 160 | panic("result count must 1") 161 | } 162 | 163 | if js.StringVal != "23.23e-6" { 164 | panic("invalid result") 165 | } 166 | 167 | if js.ValueType != Number { 168 | panic("Value type must be boolean") 169 | } 170 | 171 | } 172 | 173 | func TestNull(t *testing.T) { 174 | 175 | p := getparser("nu") 176 | 177 | resultCount := 0 178 | var js JSON 179 | 180 | for _, json := range allResult(p) { 181 | 182 | if json.Err != nil { 183 | panic(json.Err) 184 | } 185 | js = *json 186 | resultCount++ 187 | 188 | } 189 | 190 | if resultCount != 1 { 191 | panic("result count must 1") 192 | } 193 | 194 | if js.StringVal != "" { 195 | panic("invalid result") 196 | } 197 | 198 | if js.ValueType != Null { 199 | panic("Value type must be null") 200 | } 201 | 202 | } 203 | 204 | func TestObject(t *testing.T) { 205 | 206 | p := getparser("o") 207 | 208 | resultCount := 0 209 | var js JSON 210 | 211 | for _, json := range allResult(p) { 212 | 213 | if json.Err != nil { 214 | panic(json.Err) 215 | } 216 | js = *json 217 | resultCount++ 218 | 219 | } 220 | 221 | if resultCount != 1 { 222 | panic("result count must 1") 223 | } 224 | 225 | if js.ValueType != Object { 226 | panic("Value type must be object") 227 | } 228 | 229 | if val, ok := js.ObjectVals["o1"]; !ok || val.(string) != "o1string" { 230 | panic("Test failed") 231 | } 232 | 233 | if val, ok := js.ObjectVals["o2"]; !ok || val.(string) != "o2string" { 234 | panic("Test failed") 235 | } 236 | 237 | if val, ok := js.ObjectVals["o3"]; !ok || !val.(bool) { 238 | panic("Test failed") 239 | } 240 | 241 | if val, ok := js.ObjectVals["o4"]; !ok || len(val.(*JSON).ArrayVals) != 3 { 242 | panic("Test failed") 243 | } 244 | 245 | if val, ok := js.ObjectVals["o4"]; !ok || len(val.(*JSON).ArrayVals[2].(*JSON).ArrayVals) != 6 { 246 | panic("Test failed") 247 | } 248 | 249 | // Skip test 250 | p = getparser("o").SkipProps([]string{"o1", "o2", "o4", "o5", "o6", "o7"}) 251 | 252 | for _, json := range allResult(p) { 253 | 254 | if json.Err != nil { 255 | panic(json.Err) 256 | } 257 | js = *json 258 | resultCount++ 259 | } 260 | 261 | if _, ok := js.ObjectVals["o1"]; ok { 262 | panic("Test failed") 263 | } 264 | 265 | if _, ok := js.ObjectVals["o2"]; ok { 266 | panic("Test failed") 267 | } 268 | 269 | if _, ok := js.ObjectVals["o4"]; ok { 270 | panic("Test failed") 271 | } 272 | 273 | if _, ok := js.ObjectVals["o5"]; ok { 274 | panic("Test failed") 275 | } 276 | 277 | if _, ok := js.ObjectVals["o6"]; ok { 278 | panic("Test failed") 279 | } 280 | 281 | if _, ok := js.ObjectVals["o7"]; ok { 282 | panic("Test failed") 283 | } 284 | 285 | if val, ok := js.ObjectVals["o3"]; !ok || !val.(bool) { 286 | panic("Test failed") 287 | } 288 | 289 | } 290 | 291 | func TestArray(t *testing.T) { 292 | 293 | p := getparser("a") 294 | 295 | var results []*JSON 296 | 297 | for _, json := range allResult(p) { 298 | 299 | if json.Err != nil { 300 | panic(json.Err) 301 | } 302 | results = append(results, json) 303 | } 304 | 305 | if len(results) != 7 { 306 | panic("result count must 7") 307 | } 308 | 309 | if results[0].ValueType != Object { 310 | panic("Value type must be object") 311 | } 312 | if results[1].ValueType != Object { 313 | panic("Value type must be object") 314 | } 315 | 316 | if results[2].ValueType != String { 317 | panic("Value type must be string") 318 | } 319 | 320 | if results[3].ValueType != Boolean { 321 | panic("Value type must be bool") 322 | } 323 | 324 | if results[4].ValueType != Number { 325 | panic("Value type must be bool") 326 | } 327 | 328 | if results[5].ValueType != Null { 329 | panic("Value type must be null") 330 | } 331 | 332 | if results[6].ValueType != Number { 333 | panic("Value type must be bool") 334 | } 335 | 336 | // Skip test 337 | p = getparser("a").SkipProps([]string{"a11", "a12", "a13"}) 338 | 339 | for _, json := range allResult(p) { 340 | 341 | if json.Err != nil { 342 | panic(json.Err) 343 | } 344 | 345 | if json.ValueType == Object { 346 | 347 | if _, ok := json.ObjectVals["a11"]; ok { 348 | panic("Test failed") 349 | } 350 | 351 | if _, ok := json.ObjectVals["a12"]; ok { 352 | panic("Test failed") 353 | } 354 | 355 | if _, ok := json.ObjectVals["a13"]; ok { 356 | panic("Test failed") 357 | } 358 | 359 | } 360 | 361 | } 362 | 363 | } 364 | 365 | func TestArrayOnly(t *testing.T) { 366 | 367 | jsonArrays := [2]string{} 368 | jsonArrays[0] = ` 369 | {"list":[ 370 | {"Name": "Ed", "Text": "Knock knock."}, 371 | {"Name": "Sam", "Text": "Who's there?"}, 372 | {"Name": "Ed", "Text": "Go fmt."}, 373 | {"Name": "Sam", "Text": "Go fmt ?"}, 374 | {"Name": "Ed", "Text": "Go fmt !"} 375 | ]} 376 | ` 377 | jsonArrays[1] = "[" + jsonArrays[0] + "]" 378 | 379 | for _, jsarray := range jsonArrays { 380 | br := bufio.NewReader(bytes.NewReader([]byte(jsarray))) 381 | p := NewJSONParser(br, "list") 382 | var results []*JSON 383 | for _, json := range allResult(p) { 384 | 385 | if json.Err != nil { 386 | t.Fatal(" Test failed") 387 | } 388 | results = append(results, json) 389 | } 390 | if results[0].ObjectVals["Text"].(string) != "Knock knock." { 391 | t.Fatal("results[0] Test failed ") 392 | } 393 | 394 | if results[1].ObjectVals["Name"].(string) != "Sam" { 395 | t.Fatal("results[0] Test failed ") 396 | } 397 | 398 | if results[4].ObjectVals["Name"].(string) != "Ed" { 399 | t.Fatal("results[0] Test failed ") 400 | } 401 | } 402 | } 403 | 404 | func TestRootArray(t *testing.T) { 405 | 406 | jsarray := ` 407 | [ 408 | {"Name": "Ed", "Text": "Knock knock."}, 409 | {"Name": "Sam", "Text": "Who's there?"}, 410 | {"Name": "Ed", "Text": "Go fmt."}, 411 | {"Name": "Sam", "Text": "Go fmt ?"}, 412 | {"Name": "Ed", "Text": "Go fmt !"}, 413 | "Hello World", 414 | 666, 415 | null, 416 | true 417 | ]` 418 | 419 | br := bufio.NewReader(bytes.NewReader([]byte(jsarray))) 420 | p := NewJSONParser(br, "") 421 | var results []*JSON 422 | for _, json := range allResult(p) { 423 | 424 | if json.Err != nil { 425 | t.Fatal(" Test failed") 426 | } 427 | results = append(results, json) 428 | } 429 | if results[0].ObjectVals["Text"].(string) != "Knock knock." { 430 | t.Fatal("results[0] Test failed ") 431 | } 432 | 433 | if results[1].ObjectVals["Name"].(string) != "Sam" { 434 | t.Fatal("results[0] Test failed ") 435 | } 436 | 437 | if results[4].ObjectVals["Name"].(string) != "Ed" { 438 | t.Fatal("results[0] Test failed ") 439 | } 440 | 441 | if results[5].StringVal != "Hello World" { 442 | t.Fatal("results[0] Test failed ") 443 | } 444 | 445 | if results[6].StringVal != "666" { 446 | t.Fatal("results[0] Test failed ") 447 | } 448 | 449 | if results[7].StringVal != "" { 450 | t.Fatal("results[0] Test failed ") 451 | } 452 | 453 | if !results[8].BoolVal { 454 | t.Fatal("results[0] Test failed ") 455 | } 456 | 457 | } 458 | 459 | func TestInvalid(t *testing.T) { 460 | 461 | invalidStart := `{{"Name": "Ed", "Text": "Go fmt."},"s":"valid","s2":in"valid"}` 462 | 463 | br := bufio.NewReader(bytes.NewReader([]byte(invalidStart))) 464 | p := NewJSONParser(br, "s2") 465 | 466 | for _, json := range allResult(p) { 467 | 468 | if json.Err == nil { 469 | t.Fatal("Invalid error expected") 470 | } 471 | 472 | } 473 | 474 | invalidStart2 := `{{"Name": "Ed", "Text": "Go fmt."},"s":in"valid","s2":"valid"}` // invalid in non loop property 475 | 476 | br = bufio.NewReader(bytes.NewReader([]byte(invalidStart2))) 477 | p = NewJSONParser(br, "s2") 478 | 479 | for _, json := range allResult(p) { 480 | 481 | if json.Err == nil { 482 | t.Fatal("Invalid error expected") 483 | } 484 | 485 | } 486 | 487 | invalidEnd := `{"list":[{"Name": "Ed" , "Text": "Go fmt."} , {"Name": "Sam" , "Text": "Go fm"t who?"}]}` 488 | 489 | br = bufio.NewReader(bytes.NewReader([]byte(invalidEnd))) 490 | p = NewJSONParser(br, "list") 491 | index := 0 492 | for _, json := range allResult(p) { 493 | 494 | if index == 1 && json.Err == nil { 495 | t.Fatal("Invalid error expected") 496 | } 497 | index++ 498 | } 499 | 500 | } 501 | 502 | func Benchmark1(b *testing.B) { 503 | 504 | for n := 0; n < b.N; n++ { 505 | p := getparser("a").SkipProps([]string{"a11"}) 506 | for json := range p.Stream() { 507 | nothing(json) 508 | } 509 | } 510 | } 511 | 512 | func Benchmark2(b *testing.B) { 513 | 514 | for n := 0; n < b.N; n++ { 515 | p := getparser("a").SkipProps([]string{"a11"}) 516 | for _, json := range p.Parse() { 517 | nothing(json) 518 | } 519 | } 520 | } 521 | 522 | func nothing(j *JSON) { 523 | 524 | } 525 | -------------------------------------------------------------------------------- /jsparser.go: -------------------------------------------------------------------------------- 1 | package jsparser 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "unicode/utf16" 8 | ) 9 | 10 | type JsonParser struct { 11 | reader *bufio.Reader 12 | loopProp []byte 13 | resChan chan *JSON 14 | isResArr bool 15 | skipProps map[string]bool 16 | TotalReadSize uint64 17 | lastReadSize int 18 | scratch *scratch 19 | } 20 | 21 | // JSON parsed result 22 | type JSON struct { 23 | StringVal string 24 | BoolVal bool 25 | ArrayVals []interface{} 26 | ObjectVals map[string]interface{} 27 | ValueType ValueType 28 | Err error 29 | } 30 | 31 | // ValueType of JSON value 32 | type ValueType int8 33 | 34 | // JSON types 35 | const ( 36 | Invalid ValueType = iota 37 | Null 38 | String 39 | Number 40 | Boolean 41 | Array 42 | Object 43 | ) 44 | 45 | func NewJSONParser(reader *bufio.Reader, loopProp string) *JsonParser { 46 | 47 | j := &JsonParser{ 48 | reader: reader, 49 | loopProp: []byte(loopProp), 50 | resChan: make(chan *JSON, 256), 51 | skipProps: map[string]bool{}, 52 | scratch: &scratch{data: make([]byte, 2048), dataRes: make([]*JSON, 2048)}, 53 | } 54 | return j 55 | } 56 | 57 | func (j *JsonParser) SkipProps(skipProps []string) *JsonParser { 58 | 59 | if len(skipProps) > 0 { 60 | for _, s := range skipProps { 61 | j.skipProps[s] = true 62 | } 63 | } 64 | return j 65 | 66 | } 67 | 68 | func (j *JsonParser) Stream() chan *JSON { 69 | 70 | go j.parse() 71 | 72 | return j.resChan 73 | 74 | } 75 | 76 | func (j *JsonParser) Parse() []*JSON { 77 | 78 | j.isResArr = true 79 | j.parse() 80 | return j.scratch.allRes() 81 | 82 | } 83 | 84 | func (j *JsonParser) parse() { 85 | 86 | defer close(j.resChan) 87 | 88 | var b byte 89 | var err error 90 | 91 | 92 | if len(j.loopProp)==0{ // expecting top level json is an Array 93 | for { 94 | b, err = j.readByte() 95 | 96 | if err != nil { 97 | return 98 | } 99 | 100 | if j.isWS(b) { 101 | continue 102 | } 103 | 104 | if b == '[' { 105 | 106 | j.loopArray() 107 | return 108 | 109 | } 110 | 111 | j.sendErrorStr("Check your json. When loop property is empty top level json must be an Array") 112 | return 113 | 114 | } 115 | }else{ 116 | 117 | for { 118 | b, err = j.readByte() 119 | 120 | if err != nil { 121 | return 122 | } 123 | 124 | if j.isWS(b) { 125 | continue 126 | } 127 | 128 | if b == '"' { // begining of possible json property 129 | 130 | isprop, err := j.getPropName() 131 | 132 | if err != nil { 133 | j.sendError() 134 | return 135 | } 136 | 137 | if isprop { 138 | 139 | b, err = j.skipWS() 140 | if err != nil { 141 | j.sendError() 142 | return 143 | } 144 | 145 | valType, typeErr := j.getValueType(b) 146 | 147 | if typeErr != nil { 148 | j.sendError() 149 | return 150 | } 151 | 152 | if bytes.Equal(j.loopProp, j.scratch.bytes()) { 153 | 154 | switch valType { 155 | case String: 156 | 157 | err = j.string() 158 | 159 | if err != nil { 160 | j.sendError() 161 | return 162 | } 163 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: String}) 164 | 165 | case Array: 166 | 167 | success := j.loopArray() 168 | if !success { 169 | return 170 | } 171 | 172 | case Object: 173 | 174 | res := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object} 175 | j.getObjectTree(res) 176 | j.sendRes(res) 177 | if res.Err != nil { 178 | return 179 | } 180 | 181 | case Boolean: 182 | 183 | b, err := j.boolean() 184 | if err != nil { 185 | j.sendError() 186 | return 187 | } 188 | j.sendRes(&JSON{BoolVal: b, ValueType: Boolean}) 189 | 190 | case Number: 191 | 192 | err = j.number(b) 193 | 194 | if err != nil { 195 | j.sendError() 196 | return 197 | } 198 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: Number}) 199 | 200 | case Null: 201 | 202 | err := j.null() 203 | 204 | if err != nil { 205 | j.sendError() 206 | return 207 | } 208 | j.sendRes(&JSON{ValueType: Null}) 209 | 210 | } 211 | 212 | } else { 213 | 214 | if valType == String { // if valtype is string just skip it otherwise continue looking loopProp. 215 | err = j.skipString() 216 | if err != nil { 217 | j.sendError() 218 | return 219 | } 220 | } 221 | 222 | } 223 | } 224 | } 225 | } 226 | 227 | } 228 | 229 | 230 | 231 | } 232 | 233 | func (j *JsonParser) sendRes(res *JSON) { 234 | if j.isResArr { 235 | j.scratch.addRes(res) 236 | } else { 237 | j.resChan <- res 238 | } 239 | } 240 | 241 | func (j *JsonParser) loopArray() bool { 242 | 243 | var b byte 244 | var err error 245 | 246 | for { 247 | 248 | b, err = j.skipWS() 249 | 250 | if err != nil { 251 | j.sendError() 252 | return false 253 | } 254 | 255 | if b == ']' { 256 | return true 257 | } 258 | 259 | if b == ',' { 260 | continue 261 | } 262 | 263 | valType, err := j.getValueType(b) 264 | 265 | if err != nil { 266 | j.sendError() 267 | return false 268 | } 269 | 270 | switch valType { 271 | case String: 272 | 273 | err = j.string() 274 | 275 | if err != nil { 276 | j.sendRes(&JSON{Err: err, ValueType: Invalid}) 277 | return false 278 | } 279 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: String}) 280 | case Array: 281 | 282 | res := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Array} 283 | j.getArrayTree(res) 284 | j.sendRes(res) 285 | 286 | case Object: 287 | 288 | res := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object} 289 | j.getObjectTree(res) 290 | j.sendRes(res) 291 | 292 | case Boolean: 293 | 294 | b, err := j.boolean() 295 | if err != nil { 296 | j.sendError() 297 | return false 298 | } 299 | j.sendRes(&JSON{BoolVal: b, ValueType: Boolean}) 300 | 301 | case Number: 302 | 303 | err = j.number(b) 304 | if err != nil { 305 | return false 306 | } 307 | j.sendRes(&JSON{StringVal: j.scratch.string(), ValueType: Number}) 308 | 309 | case Null: 310 | 311 | err := j.null() 312 | 313 | if err != nil { 314 | return false 315 | } 316 | j.sendRes(&JSON{ValueType: Null}) 317 | 318 | } 319 | 320 | } 321 | 322 | } 323 | 324 | func (j *JsonParser) getObjectTree(res *JSON) { 325 | 326 | if res.Err != nil { 327 | return 328 | } 329 | 330 | var b byte 331 | var err error 332 | for { 333 | 334 | b, err = j.readByte() 335 | 336 | if err != nil { 337 | res.Err = err 338 | return 339 | } 340 | 341 | if j.isWS(b) { 342 | continue 343 | } 344 | 345 | if b == '"' { // begining of json property 346 | 347 | _, err := j.getPropName() // first variable ommited because inside object there can't be string item 348 | prop := j.scratch.string() 349 | 350 | if err != nil { 351 | res.Err = err 352 | return 353 | } 354 | 355 | b, err = j.skipWS() 356 | if err != nil { 357 | res.Err = j.defaultError() 358 | return 359 | } 360 | 361 | valType, err := j.getValueType(b) 362 | 363 | if err != nil { 364 | res.Err = err 365 | return 366 | } 367 | 368 | switch valType { 369 | case String: 370 | 371 | if ok := j.skipProps[prop]; ok { 372 | err = j.skipString() 373 | 374 | if err != nil { 375 | res.Err = err 376 | return 377 | } 378 | break 379 | } 380 | 381 | err = j.string() 382 | 383 | if err != nil { 384 | res.Err = err 385 | return 386 | } 387 | 388 | res.ObjectVals[prop] = j.scratch.string() 389 | 390 | case Array: 391 | 392 | if ok := j.skipProps[prop]; ok { 393 | err = j.skipArrayOrObject('[', ']') 394 | 395 | if err != nil { 396 | res.Err = err 397 | return 398 | } 399 | break 400 | } 401 | r := &JSON{ValueType: Array} 402 | j.getArrayTree(r) 403 | if r.Err != nil { 404 | res.Err = r.Err 405 | return 406 | } 407 | res.ObjectVals[prop] = r 408 | 409 | case Object: 410 | 411 | if ok := j.skipProps[prop]; ok { 412 | err = j.skipArrayOrObject('{', '}') 413 | 414 | if err != nil { 415 | res.Err = err 416 | return 417 | } 418 | break 419 | } 420 | r := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object} 421 | j.getObjectTree(r) 422 | 423 | if r.Err != nil { 424 | res.Err = r.Err 425 | return 426 | } 427 | res.ObjectVals[prop] = r 428 | 429 | case Boolean: 430 | 431 | b, err := j.boolean() 432 | 433 | if err != nil { 434 | res.Err = err 435 | return 436 | } 437 | 438 | // rest of the skip since they are small we just don't include in the result 439 | if ok := j.skipProps[prop]; !ok { 440 | res.ObjectVals[prop] = b 441 | } 442 | 443 | case Number: 444 | 445 | err = j.number(b) 446 | 447 | if err != nil { 448 | res.Err = err 449 | return 450 | } 451 | 452 | if ok := j.skipProps[prop]; !ok { 453 | res.ObjectVals[prop] = j.scratch.string() 454 | } 455 | 456 | case Null: 457 | 458 | err = j.null() 459 | if err != nil { 460 | res.Err = err 461 | return 462 | } 463 | 464 | if ok := j.skipProps[prop]; !ok { 465 | res.ObjectVals[prop] = "" 466 | } 467 | 468 | } 469 | 470 | } else if b == ',' { 471 | 472 | continue 473 | 474 | } else if b == '}' { // completion of current object 475 | 476 | return 477 | 478 | } else { // invalid end 479 | 480 | res.Err = j.defaultError() 481 | return 482 | 483 | } 484 | 485 | } 486 | 487 | } 488 | 489 | func (j *JsonParser) getArrayTree(res *JSON) { 490 | 491 | if res.Err != nil { 492 | return 493 | } 494 | 495 | var b byte 496 | var err error 497 | 498 | for { 499 | 500 | b, err = j.readByte() 501 | 502 | if err != nil { 503 | res.Err = err 504 | return 505 | } 506 | 507 | if j.isWS(b) { 508 | continue 509 | } 510 | 511 | if b == ',' { 512 | continue 513 | } 514 | 515 | if b == ']' { // means complete of current array 516 | return 517 | } 518 | 519 | valType, err := j.getValueType(b) 520 | 521 | if err != nil { 522 | res.Err = err 523 | return 524 | } 525 | switch valType { 526 | case String: 527 | 528 | err = j.string() 529 | 530 | if err != nil { 531 | res.Err = err 532 | return 533 | } 534 | res.ArrayVals = append(res.ArrayVals, j.scratch.string()) 535 | 536 | case Array: 537 | 538 | r := &JSON{ValueType: Array} 539 | j.getArrayTree(r) 540 | if r.Err != nil { 541 | res.Err = r.Err 542 | return 543 | } 544 | res.ArrayVals = append(res.ArrayVals, r) 545 | 546 | case Object: 547 | 548 | r := &JSON{ObjectVals: map[string]interface{}{}, ValueType: Object} 549 | j.getObjectTree(r) 550 | if r.Err != nil { 551 | res.Err = r.Err 552 | return 553 | } 554 | res.ArrayVals = append(res.ArrayVals, r) 555 | 556 | case Boolean: 557 | 558 | b, err := j.boolean() 559 | if err != nil { 560 | res.Err = err 561 | return 562 | } 563 | 564 | res.ArrayVals = append(res.ArrayVals, b) 565 | 566 | case Number: 567 | 568 | err = j.number(b) 569 | if err != nil { 570 | res.Err = err 571 | return 572 | } 573 | res.ArrayVals = append(res.ArrayVals, j.scratch.string()) 574 | 575 | case Null: 576 | 577 | err = j.null() 578 | 579 | if err != nil { 580 | res.Err = err 581 | return 582 | } 583 | 584 | res.ArrayVals = append(res.ArrayVals, "") 585 | 586 | } 587 | 588 | } 589 | 590 | } 591 | 592 | func (j *JsonParser) number(first byte) error { 593 | 594 | var c byte 595 | var err error 596 | j.scratch.reset() 597 | j.scratch.add(first) 598 | 599 | for { 600 | 601 | c, err = j.readByte() 602 | 603 | if err != nil { 604 | return j.defaultError() 605 | } 606 | 607 | if j.isWS(c) { 608 | 609 | c, err = j.skipWS() 610 | 611 | if err != nil { 612 | return j.defaultError() 613 | } 614 | 615 | if !(c == ',' || c == '}' || c == ']') { 616 | return j.defaultError() 617 | } 618 | err := j.unreadByte() 619 | if err != nil { 620 | return j.defaultError() 621 | } 622 | 623 | return nil 624 | } 625 | 626 | if c == ',' || c == '}' || c == ']' { 627 | 628 | err := j.unreadByte() 629 | if err != nil { 630 | return j.defaultError() 631 | } 632 | 633 | return nil 634 | } 635 | 636 | j.scratch.add(c) 637 | 638 | } 639 | 640 | } 641 | 642 | func (j *JsonParser) boolean() (bool, error) { 643 | 644 | var c byte 645 | var err error 646 | 647 | c, err = j.readByte() 648 | 649 | if err != nil { 650 | return false, j.defaultError() 651 | } 652 | 653 | // true 654 | if c == 'r' { 655 | c, err = j.readByte() 656 | 657 | if err != nil { 658 | return false, j.defaultError() 659 | } 660 | if c == 'u' { 661 | c, err = j.readByte() 662 | 663 | if err != nil { 664 | return false, j.defaultError() 665 | } 666 | if c == 'e' { 667 | // check last 668 | c, err = j.skipWS() 669 | if err != nil { 670 | return false, j.defaultError() 671 | } 672 | if !(c == ',' || c == '}' || c == ']') { 673 | return false, j.defaultError() 674 | } 675 | err := j.unreadByte() 676 | if err != nil { 677 | return false, j.defaultError() 678 | } 679 | 680 | return true, nil 681 | } 682 | } 683 | } 684 | 685 | // false 686 | if c == 'a' { 687 | c, err = j.readByte() 688 | 689 | if err != nil { 690 | return false, j.defaultError() 691 | } 692 | if c == 'l' { 693 | c, err = j.readByte() 694 | 695 | if err != nil { 696 | return false, j.defaultError() 697 | } 698 | if c == 's' { 699 | c, err = j.readByte() 700 | 701 | if err != nil { 702 | return false, j.defaultError() 703 | } 704 | if c == 'e' { 705 | // check last 706 | c, err = j.skipWS() 707 | if err != nil { 708 | return false, j.defaultError() 709 | } 710 | if !(c == ',' || c == '}' || c == ']') { 711 | return false, j.defaultError() 712 | } 713 | err := j.unreadByte() 714 | if err != nil { 715 | return false, j.defaultError() 716 | } 717 | 718 | return false, nil 719 | } 720 | } 721 | } 722 | } 723 | 724 | return false, j.defaultError() 725 | 726 | } 727 | 728 | func (j *JsonParser) null() error { 729 | 730 | var c byte 731 | var err error 732 | 733 | c, err = j.readByte() 734 | 735 | if err != nil { 736 | return j.defaultError() 737 | } 738 | 739 | // true 740 | if c == 'u' { 741 | c, err = j.readByte() 742 | 743 | if err != nil { 744 | return j.defaultError() 745 | } 746 | 747 | if c == 'l' { 748 | c, err = j.readByte() 749 | 750 | if err != nil { 751 | return j.defaultError() 752 | } 753 | if c == 'l' { 754 | // check last 755 | c, err = j.skipWS() 756 | if err != nil { 757 | return j.defaultError() 758 | } 759 | 760 | if !(c == ',' || c == '}' || c == ']') { 761 | return j.defaultError() 762 | } 763 | 764 | err := j.unreadByte() 765 | if err != nil { 766 | return j.defaultError() 767 | } 768 | 769 | return nil 770 | } 771 | } 772 | } 773 | 774 | return j.defaultError() 775 | } 776 | 777 | func (j *JsonParser) skipString() error { 778 | 779 | var c byte 780 | var prev byte 781 | var prevPrev byte 782 | var err error 783 | for { 784 | 785 | c, err = j.readByte() 786 | 787 | if err != nil { 788 | return j.defaultError() 789 | } 790 | 791 | if c == '"' { 792 | 793 | if !(prev == '\\' && prevPrev != '\\') { // escape check 794 | return nil 795 | } 796 | 797 | } 798 | 799 | prevPrev = prev 800 | prev = c 801 | 802 | } 803 | 804 | } 805 | 806 | func (j *JsonParser) skipArrayOrObject(start byte, end byte) error { 807 | 808 | var c byte 809 | var err error 810 | var depth = 1 811 | for { 812 | 813 | c, err = j.readByte() 814 | 815 | if err != nil { 816 | return j.defaultError() 817 | } 818 | 819 | switch c { 820 | case '"': 821 | err = j.skipString() // this is needed because string can contain [ or ] 822 | if err != nil { 823 | return err 824 | } 825 | case start: 826 | depth++ 827 | case end: 828 | depth-- 829 | if depth == 0 { 830 | return nil 831 | } 832 | 833 | } 834 | 835 | } 836 | 837 | } 838 | 839 | func (j *JsonParser) getValueType(c byte) (ValueType, error) { 840 | 841 | switch c { 842 | case '"': 843 | return String, nil 844 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': 845 | return Number, nil 846 | case 'f': 847 | return Boolean, nil 848 | case 't': 849 | return Boolean, nil 850 | case 'n': 851 | return Null, nil 852 | case '[': 853 | return Array, nil 854 | case '{': 855 | return Object, nil 856 | } 857 | 858 | return Invalid, j.defaultError() 859 | 860 | } 861 | 862 | // first return type is checking if it is property or just an array item 863 | func (j *JsonParser) getPropName() (bool, error) { 864 | 865 | err := j.string() 866 | 867 | if err != nil { 868 | return false, err 869 | } 870 | 871 | b, err := j.skipWS() 872 | 873 | if err != nil { 874 | return false, err 875 | } 876 | 877 | if b == ':' { // end of property name 878 | return true, nil 879 | } 880 | 881 | err = j.unreadByte() 882 | 883 | return false, err 884 | 885 | } 886 | 887 | func (j *JsonParser) isWS(in byte) bool { 888 | 889 | if in == ' ' || in == '\n' || in == '\t' || in == '\r' { 890 | return true 891 | } 892 | 893 | return false 894 | 895 | } 896 | 897 | // skips WS and read first non WS 898 | func (j *JsonParser) skipWS() (byte, error) { 899 | 900 | var b byte 901 | var err error 902 | for { 903 | b, err = j.readByte() 904 | if err != nil { 905 | return 0, err 906 | } 907 | if b == ' ' || b == '\n' || b == '\t' || b == '\r' { 908 | continue 909 | } else { 910 | return b, nil 911 | } 912 | } 913 | 914 | } 915 | 916 | func (j *JsonParser) readByte() (byte, error) { 917 | 918 | by, err := j.reader.ReadByte() 919 | 920 | j.TotalReadSize = j.TotalReadSize + 1 921 | 922 | j.lastReadSize = 1 923 | 924 | if err != nil { 925 | return 0, err 926 | } 927 | return by, nil 928 | 929 | } 930 | 931 | func (j *JsonParser) unreadByte() error { 932 | 933 | err := j.reader.UnreadByte() 934 | if err != nil { 935 | return err 936 | } 937 | j.TotalReadSize = j.TotalReadSize - 1 938 | return nil 939 | 940 | } 941 | 942 | func (j *JsonParser) sendError() { 943 | err := fmt.Errorf("Invalid json") 944 | if j.isResArr { 945 | j.scratch.addRes(&JSON{Err: err, ValueType: Invalid}) 946 | } else { 947 | j.resChan <- &JSON{Err: err, ValueType: Invalid} 948 | } 949 | } 950 | 951 | func (j *JsonParser) sendErrorStr(s string) { 952 | err := fmt.Errorf(s) 953 | if j.isResArr { 954 | j.scratch.addRes(&JSON{Err: err, ValueType: Invalid}) 955 | } else { 956 | j.resChan <- &JSON{Err: err, ValueType: Invalid} 957 | } 958 | } 959 | 960 | func (j *JsonParser) resultError() *JSON { 961 | 962 | return &JSON{Err: j.defaultError(), ValueType: Invalid} 963 | 964 | } 965 | 966 | func (j *JsonParser) defaultError() error { 967 | err := fmt.Errorf("Invalid json") 968 | return err 969 | } 970 | 971 | // based on https://github.com/bcicen/jstream 972 | func (j *JsonParser) string() error { 973 | 974 | j.scratch.reset() 975 | 976 | var err error 977 | var c byte 978 | 979 | c, err = j.readByte() 980 | if err != nil { 981 | if err != nil { 982 | return j.defaultError() 983 | } 984 | } 985 | 986 | scan: 987 | for { 988 | switch { 989 | case c == '"': 990 | return nil 991 | case c == '\\': 992 | c, err = j.readByte() 993 | if err != nil { 994 | if err != nil { 995 | return j.defaultError() 996 | } 997 | } 998 | goto scan_esc 999 | case c < 0x20: 1000 | return j.defaultError() 1001 | // Coerce to well-formed UTF-8. 1002 | 1003 | } 1004 | j.scratch.add(c) 1005 | c, err = j.readByte() 1006 | if err != nil { 1007 | if err != nil { 1008 | return j.defaultError() 1009 | } 1010 | } 1011 | } 1012 | 1013 | scan_esc: 1014 | switch c { 1015 | case '"', '\\', '/', '\'': 1016 | j.scratch.add(c) 1017 | case 'u': 1018 | goto scan_u 1019 | case 'b': 1020 | j.scratch.add('\b') 1021 | case 'f': 1022 | j.scratch.add('\f') 1023 | case 'n': 1024 | j.scratch.add('\n') 1025 | case 'r': 1026 | j.scratch.add('\r') 1027 | case 't': 1028 | j.scratch.add('\t') 1029 | default: 1030 | //err := fmt.Errorf("error in string escape code") 1031 | return j.defaultError() 1032 | } 1033 | 1034 | c, err = j.readByte() 1035 | if err != nil { 1036 | if err != nil { 1037 | return j.defaultError() 1038 | } 1039 | } 1040 | 1041 | goto scan 1042 | 1043 | scan_u: 1044 | r := j.u4() 1045 | if r < 0 { 1046 | //err := fmt.Errorf("in unicode escape sequence") 1047 | return j.defaultError() 1048 | } 1049 | 1050 | // check for proceeding surrogate pair 1051 | c, err = j.readByte() 1052 | if err != nil { 1053 | if err != nil { 1054 | return j.defaultError() 1055 | } 1056 | } 1057 | 1058 | if !utf16.IsSurrogate(r) || c != '\\' { 1059 | j.scratch.addRune(r) 1060 | goto scan 1061 | } 1062 | 1063 | c, err = j.readByte() 1064 | if err != nil { 1065 | if err != nil { 1066 | return j.defaultError() 1067 | } 1068 | } 1069 | 1070 | if c != 'u' { 1071 | j.scratch.addRune(r) 1072 | goto scan_esc 1073 | } 1074 | 1075 | r2 := j.u4() 1076 | if r2 < 0 { 1077 | return j.defaultError() 1078 | } 1079 | 1080 | // write surrogate pair 1081 | j.scratch.addRune(utf16.DecodeRune(r, r2)) 1082 | 1083 | c, err = j.readByte() 1084 | if err != nil { 1085 | if err != nil { 1086 | return j.defaultError() 1087 | } 1088 | } 1089 | 1090 | goto scan 1091 | } 1092 | 1093 | // u4 reads four bytes following a \u escape 1094 | func (j *JsonParser) u4() rune { 1095 | // logic taken from: 1096 | // github.com/buger/jsonparser/blob/master/escape.go#L20 1097 | 1098 | var c byte 1099 | var err error 1100 | var h [4]int 1101 | for i := 0; i < 4; i++ { 1102 | 1103 | c, err = j.readByte() 1104 | if err != nil { 1105 | if err != nil { 1106 | return -1 1107 | } 1108 | } 1109 | switch { 1110 | case c >= '0' && c <= '9': 1111 | h[i] = int(c - '0') 1112 | case c >= 'A' && c <= 'F': 1113 | h[i] = int(c - 'A' + 10) 1114 | case c >= 'a' && c <= 'f': 1115 | h[i] = int(c - 'a' + 10) 1116 | default: 1117 | return -1 1118 | } 1119 | } 1120 | return rune(h[0]<<12 + h[1]<<8 + h[2]<<4 + h[3]) 1121 | } 1122 | --------------------------------------------------------------------------------