├── tests
    ├── specs
    │   ├── array
    │   │   ├── empty-array.json
    │   │   ├── trailing_comma.json
    │   │   ├── array.json
    │   │   ├── empty-array.txt
    │   │   ├── trailing_comma.txt
    │   │   └── array.txt
    │   ├── object
    │   │   ├── empty-object.json
    │   │   ├── trailing_comma.json
    │   │   ├── non-string-prop-names.json
    │   │   ├── empty-object.txt
    │   │   ├── object.json
    │   │   ├── trailing_comma.txt
    │   │   ├── non-string-prop-names.txt
    │   │   └── object.txt
    │   ├── encoding
    │   │   ├── encoding.json
    │   │   └── encoding.txt
    │   ├── string
    │   │   ├── string.json
    │   │   └── string.txt
    │   └── comments
    │   │   ├── only-comments.json
    │   │   ├── inline-comments.json
    │   │   ├── only-comments.txt
    │   │   └── inline-comments.txt
    └── test.rs
├── .rustfmt.toml
├── .gitignore
├── rust-toolchain.toml
├── .github
    ├── contributing.md
    ├── workflows
    │   ├── release.yml
    │   └── ci.yml
    └── CODE_OF_CONDUCT.md
├── README.md
├── dprint.json
├── Cargo.toml
├── LICENSE
├── src
    ├── common.rs
    ├── tokens.rs
    ├── cst
    │   └── input.rs
    ├── serde.rs
    ├── lib.rs
    ├── errors.rs
    ├── parse_to_value.rs
    ├── value.rs
    ├── string.rs
    ├── ast.rs
    ├── scanner.rs
    └── parse_to_ast.rs
└── benches
    ├── bench.rs
    └── data
        ├── tsconfig.json
        └── package.txt


/tests/specs/array/empty-array.json:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/tests/specs/object/empty-object.json:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/tests/specs/array/trailing_comma.json:
--------------------------------------------------------------------------------
1 | [1,]
2 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 120
2 | tab_spaces = 2
3 | 


--------------------------------------------------------------------------------
/tests/specs/object/trailing_comma.json:
--------------------------------------------------------------------------------
1 | { "p": 1, }
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .claude
2 | .vscode
3 | target
4 | Cargo.lock
5 | 


--------------------------------------------------------------------------------
/tests/specs/encoding/encoding.json:
--------------------------------------------------------------------------------
1 | // 3 bytes: ℝ
2 | "2: ß"
3 | 


--------------------------------------------------------------------------------
/tests/specs/array/array.json:
--------------------------------------------------------------------------------
1 | ["test", 5, { "prop": 4 }, ["test"], true, false, null]
2 | 


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "1.89.0"
3 | components = ["clippy", "rustfmt"]
4 | 


--------------------------------------------------------------------------------
/tests/specs/string/string.json:
--------------------------------------------------------------------------------
1 | [
2 |   "testing\" testing",
3 |   "testing\\ testing"
4 | ]
5 | 


--------------------------------------------------------------------------------
/tests/specs/comments/only-comments.json:
--------------------------------------------------------------------------------
1 | // testing
2 | /* test */ // test
3 | //test /* test */
4 | 


--------------------------------------------------------------------------------
/.github/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | Todo...
4 | 
5 | ## Benchmarks
6 | 
7 | See [Benchmarks](https://dprint.github.io/jsonc-parser/dev/bench/)
8 | 


--------------------------------------------------------------------------------
/tests/specs/comments/inline-comments.json:
--------------------------------------------------------------------------------
1 | /*1*/{ // 2
2 |     /*3*/"a"/*4*/: /*5*/5/*6*/, //7
3 |     "b": /*8*/[/*9*/]/*10*/,//11
4 |     "c": null
5 | /*13*/} // 14


--------------------------------------------------------------------------------
/tests/specs/object/non-string-prop-names.json:
--------------------------------------------------------------------------------
1 | {
2 |     myProp: "test",
3 |     other: "asdf",
4 |     asdf-test: "test",
5 |     oo43o : 5,
6 |     jnm44 : 3,
7 |     456 : 34
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/specs/array/empty-array.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": {
 3 |     "type": "array",
 4 |     "range": {
 5 |       "start": 0,
 6 |       "end": 2,
 7 |     },
 8 |     "elements": [
 9 |     ]
10 |   },
11 |   "comments": [
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/specs/object/empty-object.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": {
 3 |     "type": "object",
 4 |     "range": {
 5 |       "start": 0,
 6 |       "end": 2,
 7 |     },
 8 |     "properties": [
 9 |     ]
10 |   },
11 |   "comments": [
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/specs/object/object.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "number": 5,
 3 |     "string": "str\\test",
 4 |     "object": {
 5 |         "obj": 5
 6 |     },
 7 |     "array": [],
 8 |     "true": true,
 9 |     "false": false,
10 |     "null": null
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/specs/array/trailing_comma.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": {
 3 |     "type": "array",
 4 |     "range": {
 5 |       "start": 0,
 6 |       "end": 4,
 7 |     },
 8 |     "elements": [
 9 |       {
10 |         "type": "number",
11 |         "range": {
12 |           "start": 1,
13 |           "end": 2,
14 |         },
15 |         "value": "1"
16 |       }
17 |     ]
18 |   },
19 |   "comments": [
20 |   ]
21 | }
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # jsonc-parser
 2 | 
 3 | [![](https://img.shields.io/crates/v/jsonc-parser.svg)](https://crates.io/crates/jsonc-parser)
 4 | [![](https://docs.rs/jsonc-parser/badge.svg)](https://docs.rs/jsonc-parser)
 5 | 
 6 | A JSON parser and manipulator for Rust that supports comments and other JSON extensions.
 7 | 
 8 | ## Documentation
 9 | 
10 | For usage examples and API documentation, see the [rustdoc documentation](https://docs.rs/jsonc-parser).
11 | 


--------------------------------------------------------------------------------
/tests/specs/string/string.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": {
 3 |     "type": "array",
 4 |     "range": {
 5 |       "start": 0,
 6 |       "end": 48,
 7 |     },
 8 |     "elements": [
 9 |       {
10 |         "type": "string",
11 |         "range": {
12 |           "start": 4,
13 |           "end": 23,
14 |         },
15 |         "value": "testing" testing"
16 |       },
17 |       {
18 |         "type": "string",
19 |         "range": {
20 |           "start": 27,
21 |           "end": 46,
22 |         },
23 |         "value": "testing\\ testing"
24 |       }
25 |     ]
26 |   },
27 |   "comments": [
28 |   ]
29 | }
30 | 


--------------------------------------------------------------------------------
/dprint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "indentWidth": 2,
 3 |   "exec": {
 4 |     "cwd": "${configDir}",
 5 |     "commands": [{
 6 |       "command": "rustfmt --edition 2024 --config imports_granularity=item",
 7 |       "exts": ["rs"]
 8 |     }]
 9 |   },
10 |   "includes": ["**/*.{md,rs}"],
11 |   "excludes": [
12 |     "**/target",
13 |     "./benches/json"
14 |   ],
15 |   "plugins": [
16 |     "https://plugins.dprint.dev/markdown-0.19.0.wasm",
17 |     "https://plugins.dprint.dev/exec-0.6.0.json@a054130d458f124f9b5c91484833828950723a5af3f8ff2bd1523bd47b83b364",
18 |     "https://plugins.dprint.dev/json-0.20.0.wasm"
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jsonc-parser"
 3 | version = "0.28.0"
 4 | authors = ["David Sherret <dsherret@gmail.com>"]
 5 | edition = "2024"
 6 | license = "MIT"
 7 | description = "JSONC parser."
 8 | repository = "https://github.com/dprint/jsonc-parser"
 9 | 
10 | [package.metadata.docs.rs]
11 | all-features = true
12 | 
13 | [dependencies]
14 | indexmap = { version = "2.2.6", optional = true }
15 | serde_json = { version = "1.0", optional = true }
16 | unicode-width = { version = "0.2.0", optional = true }
17 | 
18 | [features]
19 | cst = []
20 | preserve_order = ["indexmap", "serde_json/preserve_order"]
21 | serde = ["serde_json"]
22 | error_unicode_width = ["unicode-width"]
23 | 
24 | [dev-dependencies]
25 | pretty_assertions = "1.0.0"
26 | 


--------------------------------------------------------------------------------
/tests/specs/encoding/encoding.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": {
 3 |     "type": "string",
 4 |     "range": {
 5 |       "start": 16,
 6 |       "end": 23,
 7 |     },
 8 |     "value": "2: ß"
 9 |   },
10 |   "comments": [
11 |     {
12 |       "pos": 0,
13 |       "comments": [
14 |         {
15 |           "type": "line",
16 |           "range": {
17 |             "start": 0,
18 |             "end": 15,
19 |           },
20 |           "value": " 3 bytes: ℝ"
21 |         }
22 |       ]
23 |     },
24 |     {
25 |       "pos": 16,
26 |       "comments": [
27 |         {
28 |           "type": "line",
29 |           "range": {
30 |             "start": 0,
31 |             "end": 15,
32 |           },
33 |           "value": " 3 bytes: ℝ"
34 |         }
35 |       ]
36 |     }
37 |   ]
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/specs/object/trailing_comma.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": {
 3 |     "type": "object",
 4 |     "range": {
 5 |       "start": 0,
 6 |       "end": 11,
 7 |     },
 8 |     "properties": [
 9 |       {
10 |         "type": "objectProp",
11 |         "range": {
12 |           "start": 2,
13 |           "end": 8,
14 |         },
15 |         "name": {
16 |           "type": "string",
17 |           "range": {
18 |             "start": 2,
19 |             "end": 5,
20 |           },
21 |           "value": "p"
22 |         },
23 |         "value": {
24 |           "type": "number",
25 |           "range": {
26 |             "start": 7,
27 |             "end": 8,
28 |           },
29 |           "value": "1"
30 |         }
31 |       }
32 |     ]
33 |   },
34 |   "comments": [
35 |   ]
36 | }
37 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       releaseKind:
 7 |         description: 'Kind of release'
 8 |         default: 'minor'
 9 |         type: choice
10 |         options:
11 |           - patch
12 |           - minor
13 |         required: true
14 | 
15 | jobs:
16 |   rust:
17 |     name: release
18 |     runs-on: ubuntu-latest
19 |     timeout-minutes: 30
20 | 
21 |     steps:
22 |       - name: Clone repository
23 |         uses: actions/checkout@v4
24 |         with:
25 |           token: ${{ secrets.GH_DPRINTBOT_PAT }}
26 | 
27 |       - uses: denoland/setup-deno@v2
28 |       - uses: dsherret/rust-toolchain-file@v1
29 | 
30 |       - name: Bump version and tag
31 |         env:
32 |           GITHUB_TOKEN: ${{ secrets.GH_DPRINTBOT_PAT }}
33 |           GH_WORKFLOW_ACTOR: ${{ github.actor }}
34 |         run: |
35 |           git config user.email "dprintbot@users.noreply.github.com"
36 |           git config user.name "dprintbot"
37 |           deno run -A https://raw.githubusercontent.com/dprint/automation/0.9.0/tasks/publish_release.ts --${{github.event.inputs.releaseKind}}
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2020 David Sherret
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/common.rs:
--------------------------------------------------------------------------------
 1 | /// Positional information about a start and end point in the text.
 2 | #[derive(Debug, PartialEq, Clone, Copy)]
 3 | pub struct Range {
 4 |   /// Start position of the node in the text.
 5 |   pub start: usize,
 6 |   /// End position of the node in the text.
 7 |   pub end: usize,
 8 | }
 9 | 
10 | impl Range {
11 |   pub fn new(start: usize, end: usize) -> Self {
12 |     Range { start, end }
13 |   }
14 | 
15 |   pub fn from_byte_index(pos: usize) -> Self {
16 |     Range::new(pos, pos)
17 |   }
18 | }
19 | 
20 | impl Ranged for Range {
21 |   fn range(&self) -> Range {
22 |     *self
23 |   }
24 | }
25 | 
26 | /// Represents an object that has a range in the text.
27 | pub trait Ranged {
28 |   /// Gets the range.
29 |   fn range(&self) -> Range;
30 | 
31 |   /// Gets the byte index of the first character in the text.
32 |   fn start(&self) -> usize {
33 |     self.range().start
34 |   }
35 | 
36 |   /// Gets the byte index after the last character in the text.
37 |   fn end(&self) -> usize {
38 |     self.range().end
39 |   }
40 | 
41 |   /// Gets the text from the provided string.
42 |   fn text<'a>(&self, text: &'a str) -> &'a str {
43 |     let range = self.range();
44 |     &text[range.start..range.end]
45 |   }
46 | 
47 |   /// Gets the end byte index minus the start byte index of the range.
48 |   fn width(&self) -> usize {
49 |     let range = self.range();
50 |     range.end - range.start
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/tokens.rs:
--------------------------------------------------------------------------------
 1 | use super::common::Range;
 2 | use super::common::Ranged;
 3 | use std::borrow::Cow;
 4 | 
 5 | /// A token found while scanning.
 6 | #[derive(Debug, PartialEq, Clone)]
 7 | pub enum Token<'a> {
 8 |   OpenBrace,
 9 |   CloseBrace,
10 |   OpenBracket,
11 |   CloseBracket,
12 |   Comma,
13 |   Colon,
14 |   String(Cow<'a, str>),
15 |   Word(&'a str),
16 |   Boolean(bool),
17 |   Number(&'a str),
18 |   Null,
19 |   CommentLine(&'a str),
20 |   CommentBlock(&'a str),
21 | }
22 | 
23 | impl<'a> Token<'a> {
24 |   pub fn as_str(&self) -> &str {
25 |     match self {
26 |       Token::OpenBrace => "{",
27 |       Token::CloseBrace => "}",
28 |       Token::OpenBracket => "[",
29 |       Token::CloseBracket => "]",
30 |       Token::Comma => ",",
31 |       Token::Colon => ":",
32 |       Token::String(value) => value,
33 |       Token::Word(value) => value,
34 |       Token::Boolean(value) => {
35 |         if *value {
36 |           "true"
37 |         } else {
38 |           "false"
39 |         }
40 |       }
41 |       Token::Number(value) => value,
42 |       Token::Null => "null",
43 |       Token::CommentLine(value) => value,
44 |       Token::CommentBlock(value) => value,
45 |     }
46 |   }
47 | }
48 | 
49 | /// A token with positional information.
50 | pub struct TokenAndRange<'a> {
51 |   pub range: Range,
52 |   pub token: Token<'a>,
53 | }
54 | 
55 | impl<'a> Ranged for TokenAndRange<'a> {
56 |   fn range(&self) -> Range {
57 |     self.range
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/specs/comments/only-comments.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "value": null,
 3 |   "comments": [
 4 |     {
 5 |       "pos": 0,
 6 |       "comments": [
 7 |         {
 8 |           "type": "line",
 9 |           "range": {
10 |             "start": 0,
11 |             "end": 10,
12 |           },
13 |           "value": " testing"
14 |         },
15 |         {
16 |           "type": "block",
17 |           "range": {
18 |             "start": 11,
19 |             "end": 21,
20 |           },
21 |           "value": " test "
22 |         },
23 |         {
24 |           "type": "line",
25 |           "range": {
26 |             "start": 22,
27 |             "end": 29,
28 |           },
29 |           "value": " test"
30 |         },
31 |         {
32 |           "type": "line",
33 |           "range": {
34 |             "start": 30,
35 |             "end": 47,
36 |           },
37 |           "value": "test /* test */"
38 |         }
39 |       ]
40 |     },
41 |     {
42 |       "pos": 48,
43 |       "comments": [
44 |         {
45 |           "type": "line",
46 |           "range": {
47 |             "start": 0,
48 |             "end": 10,
49 |           },
50 |           "value": " testing"
51 |         },
52 |         {
53 |           "type": "block",
54 |           "range": {
55 |             "start": 11,
56 |             "end": 21,
57 |           },
58 |           "value": " test "
59 |         },
60 |         {
61 |           "type": "line",
62 |           "range": {
63 |             "start": 22,
64 |             "end": 29,
65 |           },
66 |           "value": " test"
67 |         },
68 |         {
69 |           "type": "line",
70 |           "range": {
71 |             "start": 30,
72 |             "end": 47,
73 |           },
74 |           "value": "test /* test */"
75 |         }
76 |       ]
77 |     }
78 |   ]
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/specs/array/array.txt:
--------------------------------------------------------------------------------
  1 | {
  2 |   "value": {
  3 |     "type": "array",
  4 |     "range": {
  5 |       "start": 0,
  6 |       "end": 55,
  7 |     },
  8 |     "elements": [
  9 |       {
 10 |         "type": "string",
 11 |         "range": {
 12 |           "start": 1,
 13 |           "end": 7,
 14 |         },
 15 |         "value": "test"
 16 |       },
 17 |       {
 18 |         "type": "number",
 19 |         "range": {
 20 |           "start": 9,
 21 |           "end": 10,
 22 |         },
 23 |         "value": "5"
 24 |       },
 25 |       {
 26 |         "type": "object",
 27 |         "range": {
 28 |           "start": 12,
 29 |           "end": 25,
 30 |         },
 31 |         "properties": [
 32 |           {
 33 |             "type": "objectProp",
 34 |             "range": {
 35 |               "start": 14,
 36 |               "end": 23,
 37 |             },
 38 |             "name": {
 39 |               "type": "string",
 40 |               "range": {
 41 |                 "start": 14,
 42 |                 "end": 20,
 43 |               },
 44 |               "value": "prop"
 45 |             },
 46 |             "value": {
 47 |               "type": "number",
 48 |               "range": {
 49 |                 "start": 22,
 50 |                 "end": 23,
 51 |               },
 52 |               "value": "4"
 53 |             }
 54 |           }
 55 |         ]
 56 |       },
 57 |       {
 58 |         "type": "array",
 59 |         "range": {
 60 |           "start": 27,
 61 |           "end": 35,
 62 |         },
 63 |         "elements": [
 64 |           {
 65 |             "type": "string",
 66 |             "range": {
 67 |               "start": 28,
 68 |               "end": 34,
 69 |             },
 70 |             "value": "test"
 71 |           }
 72 |         ]
 73 |       },
 74 |       {
 75 |         "type": "boolean",
 76 |         "range": {
 77 |           "start": 37,
 78 |           "end": 41,
 79 |         },
 80 |         "value": "true"
 81 |       },
 82 |       {
 83 |         "type": "boolean",
 84 |         "range": {
 85 |           "start": 43,
 86 |           "end": 48,
 87 |         },
 88 |         "value": "false"
 89 |       },
 90 |       {
 91 |         "type": "null",
 92 |         "range": {
 93 |           "start": 50,
 94 |           "end": 54,
 95 |         }
 96 |       }
 97 |     ]
 98 |   },
 99 |   "comments": [
100 |   ]
101 | }
102 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     name: ${{ matrix.config.kind }} ${{ matrix.config.os }}
 8 |     runs-on: ${{ matrix.config.os }}
 9 |     strategy:
10 |       matrix:
11 |         config:
12 |           - os: ubuntu-latest
13 |             kind: test_release
14 |           - os: ubuntu-latest
15 |             kind: test_debug
16 | 
17 |     env:
18 |       CARGO_INCREMENTAL: 0
19 |       RUST_BACKTRACE: full
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - uses: dsherret/rust-toolchain-file@v1
24 |     - uses: Swatinem/rust-cache@v2
25 |       with:
26 |         save-if: ${{ github.ref == 'refs/heads/main' }}
27 | 
28 |     - name: Test debug
29 |       if: matrix.config.kind == 'test_debug'
30 |       run: |
31 |         cargo test --features serde
32 |         cargo test --features preserve_order
33 |         cargo test --all-features
34 |     - name: Test release
35 |       if: matrix.config.kind == 'test_release'
36 |       run: cargo test --release --all-features
37 | 
38 |       # CARGO PUBLISH
39 |     - name: Cargo login
40 |       if: matrix.config.kind == 'test_release' && startsWith(github.ref, 'refs/tags/')
41 |       run: cargo login ${{ secrets.CRATES_TOKEN }}
42 | 
43 |     - name: Cargo publish
44 |       if: matrix.config.kind == 'test_release' && startsWith(github.ref, 'refs/tags/')
45 |       run: cargo publish
46 | 
47 |   benchmark:
48 |     name: Benchmarks
49 |     runs-on: ubuntu-latest
50 |     steps:
51 |     - uses: actions/checkout@v4
52 |     - name: Install latest nightly
53 |       uses: actions-rs/toolchain@v1
54 |       with:
55 |         toolchain: nightly
56 |         override: true
57 |     - name: Cache cargo
58 |       uses: actions/cache@v4
59 |       with:
60 |         path: |
61 |           ~/.cargo/registry
62 |           ~/.cargo/git
63 |           target
64 |         key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
65 |     # Run benchmark and stores the output to a file
66 |     - name: Run benchmark
67 |       run: cargo +nightly bench --features serde | tee output.txt
68 |     # Run `github-action-benchmark` action
69 |     - name: Store benchmark result
70 |       uses: rhysd/github-action-benchmark@v1
71 |       with:
72 |         tool: 'cargo'
73 |         output-file-path: output.txt
74 |         fail-on-alert: true
75 |         github-token: ${{ secrets.GITHUB_TOKEN }}
76 |         auto-push: ${{ github.ref == 'refs/heads/main' }}
77 | 


--------------------------------------------------------------------------------
/benches/bench.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | 
 5 | use jsonc_parser::parse_to_ast;
 6 | use jsonc_parser::parse_to_value;
 7 | use std::fs::read_to_string;
 8 | use test::Bencher;
 9 | 
10 | #[bench]
11 | fn citm_catalog_json_large_ast(b: &mut Bencher) {
12 |   bench_ast(b, &get_citm_catalog_json_large());
13 | }
14 | 
15 | #[bench]
16 | fn citm_catalog_json_large_value(b: &mut Bencher) {
17 |   bench_value(b, &get_citm_catalog_json_large());
18 | }
19 | 
20 | #[bench]
21 | #[cfg(feature = "serde")]
22 | fn citm_catalog_json_large_serde(b: &mut Bencher) {
23 |   bench_serde(b, &get_citm_catalog_json_large());
24 | }
25 | 
26 | #[bench]
27 | fn tsconfig_json_ast(b: &mut Bencher) {
28 |   bench_ast(b, &get_tsconfig_json());
29 | }
30 | 
31 | #[bench]
32 | fn tsconfig_json_value(b: &mut Bencher) {
33 |   bench_value(b, &get_tsconfig_json());
34 | }
35 | 
36 | #[bench]
37 | fn package_json_ast(b: &mut Bencher) {
38 |   bench_ast(b, &get_package_json());
39 | }
40 | 
41 | #[bench]
42 | fn package_json_value(b: &mut Bencher) {
43 |   bench_value(b, &get_package_json());
44 | }
45 | 
46 | // bench helpers
47 | 
48 | fn bench_ast(b: &mut Bencher, json_text: &str) {
49 |   b.iter(|| parse_to_ast(json_text, &Default::default(), &Default::default()).unwrap());
50 | }
51 | 
52 | fn bench_value(b: &mut Bencher, json_text: &str) {
53 |   b.iter(|| parse_to_value(json_text, &Default::default()).unwrap());
54 | }
55 | 
56 | #[cfg(feature = "serde")]
57 | fn bench_serde(b: &mut Bencher, json_text: &str) {
58 |   b.iter(|| serde_json::from_str::<serde_json::Value>(json_text).unwrap());
59 | }
60 | 
61 | // data
62 | 
63 | fn get_citm_catalog_json_large() -> String {
64 |   create_json_array_of_object(&get_citm_catalog_json(), 6)
65 | }
66 | 
67 | fn get_citm_catalog_json() -> String {
68 |   // from https://github.com/serde-rs/json-benchmark/blob/master/data/citm_catalog.json
69 |   read_to_string("benches/data/citm_catalog.json").unwrap()
70 | }
71 | 
72 | fn get_tsconfig_json() -> String {
73 |   read_to_string("benches/data/tsconfig.json").unwrap()
74 | }
75 | 
76 | fn get_package_json() -> String {
77 |   read_to_string("benches/data/package.txt").unwrap()
78 | }
79 | 
80 | fn create_json_array_of_object(text: &str, length: usize) -> String {
81 |   let mut result = String::new();
82 |   result.push_str("[");
83 |   for i in 0..length {
84 |     if i > 0 {
85 |       result.push_str(",");
86 |     }
87 |     result.push_str(text);
88 |   }
89 |   result.push_str("]");
90 |   result
91 | }
92 | 


--------------------------------------------------------------------------------
/tests/specs/object/non-string-prop-names.txt:
--------------------------------------------------------------------------------
  1 | {
  2 |   "value": {
  3 |     "type": "object",
  4 |     "range": {
  5 |       "start": 0,
  6 |       "end": 108,
  7 |     },
  8 |     "properties": [
  9 |       {
 10 |         "type": "objectProp",
 11 |         "range": {
 12 |           "start": 6,
 13 |           "end": 20,
 14 |         },
 15 |         "name": {
 16 |           "type": "word",
 17 |           "range": {
 18 |             "start": 6,
 19 |             "end": 12,
 20 |           },
 21 |           "value": "myProp"
 22 |         },
 23 |         "value": {
 24 |           "type": "string",
 25 |           "range": {
 26 |             "start": 14,
 27 |             "end": 20,
 28 |           },
 29 |           "value": "test"
 30 |         }
 31 |       },
 32 |       {
 33 |         "type": "objectProp",
 34 |         "range": {
 35 |           "start": 26,
 36 |           "end": 39,
 37 |         },
 38 |         "name": {
 39 |           "type": "word",
 40 |           "range": {
 41 |             "start": 26,
 42 |             "end": 31,
 43 |           },
 44 |           "value": "other"
 45 |         },
 46 |         "value": {
 47 |           "type": "string",
 48 |           "range": {
 49 |             "start": 33,
 50 |             "end": 39,
 51 |           },
 52 |           "value": "asdf"
 53 |         }
 54 |       },
 55 |       {
 56 |         "type": "objectProp",
 57 |         "range": {
 58 |           "start": 45,
 59 |           "end": 62,
 60 |         },
 61 |         "name": {
 62 |           "type": "word",
 63 |           "range": {
 64 |             "start": 45,
 65 |             "end": 54,
 66 |           },
 67 |           "value": "asdf-test"
 68 |         },
 69 |         "value": {
 70 |           "type": "string",
 71 |           "range": {
 72 |             "start": 56,
 73 |             "end": 62,
 74 |           },
 75 |           "value": "test"
 76 |         }
 77 |       },
 78 |       {
 79 |         "type": "objectProp",
 80 |         "range": {
 81 |           "start": 68,
 82 |           "end": 77,
 83 |         },
 84 |         "name": {
 85 |           "type": "word",
 86 |           "range": {
 87 |             "start": 68,
 88 |             "end": 73,
 89 |           },
 90 |           "value": "oo43o"
 91 |         },
 92 |         "value": {
 93 |           "type": "number",
 94 |           "range": {
 95 |             "start": 76,
 96 |             "end": 77,
 97 |           },
 98 |           "value": "5"
 99 |         }
100 |       },
101 |       {
102 |         "type": "objectProp",
103 |         "range": {
104 |           "start": 83,
105 |           "end": 92,
106 |         },
107 |         "name": {
108 |           "type": "word",
109 |           "range": {
110 |             "start": 83,
111 |             "end": 88,
112 |           },
113 |           "value": "jnm44"
114 |         },
115 |         "value": {
116 |           "type": "number",
117 |           "range": {
118 |             "start": 91,
119 |             "end": 92,
120 |           },
121 |           "value": "3"
122 |         }
123 |       },
124 |       {
125 |         "type": "objectProp",
126 |         "range": {
127 |           "start": 98,
128 |           "end": 106,
129 |         },
130 |         "name": {
131 |           "type": "word",
132 |           "range": {
133 |             "start": 98,
134 |             "end": 101,
135 |           },
136 |           "value": "456"
137 |         },
138 |         "value": {
139 |           "type": "number",
140 |           "range": {
141 |             "start": 104,
142 |             "end": 106,
143 |           },
144 |           "value": "34"
145 |         }
146 |       }
147 |     ]
148 |   },
149 |   "comments": [
150 |   ]
151 | }
152 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | - Using welcoming and inclusive language
18 | - Being respectful of differing viewpoints and experiences
19 | - Gracefully accepting constructive criticism
20 | - Focusing on what is best for the community
21 | - Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | - The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | - Trolling, insulting/derogatory comments, and personal or political attacks
28 | - Public or private harassment
29 | - Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | - Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team on twitter via direct message at
59 | https://twitter.com/DavidSherret (DMs open). All complaints will be reviewed
60 | and investigated and will result in a response that is deemed necessary and
61 | appropriate to the circumstances. The project team is obligated to maintain
62 | confidentiality with regard to the reporter of an incident. Further details
63 | of specific enforcement policies may be posted separately.
64 | 
65 | Project maintainers who do not follow or enforce the Code of Conduct in good
66 | faith may face temporary or permanent repercussions as determined by other
67 | members of the project's leadership.
68 | 
69 | ## Attribution
70 | 
71 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
72 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
73 | 
74 | [homepage]: https://www.contributor-covenant.org
75 | 
76 | For answers to common questions about this code of conduct, see
77 | https://www.contributor-covenant.org/faq
78 | 


--------------------------------------------------------------------------------
/src/cst/input.rs:
--------------------------------------------------------------------------------
  1 | /// API user provided value for inserts and replaces.
  2 | #[derive(Debug, Clone)]
  3 | pub enum CstInputValue {
  4 |   Null,
  5 |   Bool(bool),
  6 |   Number(String),
  7 |   String(String),
  8 |   Array(Vec<CstInputValue>),
  9 |   Object(Vec<(String, CstInputValue)>),
 10 | }
 11 | 
 12 | impl CstInputValue {
 13 |   pub(crate) fn force_multiline(&self) -> bool {
 14 |     match self {
 15 |       CstInputValue::Null | CstInputValue::Bool(_) | CstInputValue::Number(_) | CstInputValue::String(_) => false,
 16 |       CstInputValue::Array(v) => v.iter().any(|v| v.is_object_or_array_with_elements()),
 17 |       CstInputValue::Object(v) => !v.is_empty(),
 18 |     }
 19 |   }
 20 | 
 21 |   fn is_object_or_array_with_elements(&self) -> bool {
 22 |     match self {
 23 |       CstInputValue::Null | CstInputValue::Bool(_) | CstInputValue::Number(_) | CstInputValue::String(_) => false,
 24 |       CstInputValue::Array(v) => !v.is_empty(),
 25 |       CstInputValue::Object(v) => !v.is_empty(),
 26 |     }
 27 |   }
 28 | }
 29 | 
 30 | impl From<bool> for CstInputValue {
 31 |   fn from(b: bool) -> Self {
 32 |     CstInputValue::Bool(b)
 33 |   }
 34 | }
 35 | 
 36 | impl From<&str> for CstInputValue {
 37 |   fn from(s: &str) -> Self {
 38 |     CstInputValue::String(s.to_string())
 39 |   }
 40 | }
 41 | 
 42 | impl From<String> for CstInputValue {
 43 |   fn from(s: String) -> Self {
 44 |     CstInputValue::String(s)
 45 |   }
 46 | }
 47 | 
 48 | impl From<f64> for CstInputValue {
 49 |   fn from(n: f64) -> Self {
 50 |     CstInputValue::Number(n.to_string())
 51 |   }
 52 | }
 53 | 
 54 | impl From<usize> for CstInputValue {
 55 |   fn from(n: usize) -> Self {
 56 |     CstInputValue::Number(n.to_string())
 57 |   }
 58 | }
 59 | 
 60 | impl From<isize> for CstInputValue {
 61 |   fn from(n: isize) -> Self {
 62 |     CstInputValue::Number(n.to_string())
 63 |   }
 64 | }
 65 | 
 66 | impl From<u64> for CstInputValue {
 67 |   fn from(n: u64) -> Self {
 68 |     CstInputValue::Number(n.to_string())
 69 |   }
 70 | }
 71 | 
 72 | impl From<i64> for CstInputValue {
 73 |   fn from(n: i64) -> Self {
 74 |     CstInputValue::Number(n.to_string())
 75 |   }
 76 | }
 77 | 
 78 | impl From<u32> for CstInputValue {
 79 |   fn from(n: u32) -> Self {
 80 |     CstInputValue::Number(n.to_string())
 81 |   }
 82 | }
 83 | 
 84 | impl From<i32> for CstInputValue {
 85 |   fn from(n: i32) -> Self {
 86 |     CstInputValue::Number(n.to_string())
 87 |   }
 88 | }
 89 | 
 90 | impl<T> From<Vec<T>> for CstInputValue
 91 | where
 92 |   T: Into<CstInputValue>,
 93 | {
 94 |   fn from(vec: Vec<T>) -> Self {
 95 |     CstInputValue::Array(vec.into_iter().map(Into::into).collect())
 96 |   }
 97 | }
 98 | 
 99 | impl From<Vec<(String, CstInputValue)>> for CstInputValue {
100 |   fn from(obj: Vec<(String, CstInputValue)>) -> Self {
101 |     CstInputValue::Object(obj)
102 |   }
103 | }
104 | 
105 | #[macro_export]
106 | macro_rules! json {
107 |   (null) => {
108 |     $crate::cst::CstInputValue::Null
109 |   };
110 | 
111 |   ([ $($elems:tt),* $(,)? ]) => {
112 |     $crate::cst::CstInputValue::Array(vec![
113 |       $(json!($elems)),*
114 |     ])
115 |   };
116 | 
117 |   ({ $($key:tt : $value:tt),* $(,)? }) => {
118 |     $crate::cst::CstInputValue::Object(vec![
119 |       $(
120 |          ($crate::json!(private_quote_property $key).to_string(), json!($value))
121 |       ),*
122 |     ])
123 |   };
124 | 
125 |   ($other:expr) => {
126 |     $crate::cst::CstInputValue::from($other)
127 |   };
128 | 
129 |   // hack to not have another public macro for quoting object key properties
130 |   (private_quote_property $key:ident) => {
131 |     stringify!($key)
132 |   };
133 | 
134 |   (private_quote_property $key:expr) => {
135 |     $key
136 |   };
137 | }
138 | 


--------------------------------------------------------------------------------
/src/serde.rs:
--------------------------------------------------------------------------------
  1 | use super::CollectOptions;
  2 | use super::ParseOptions;
  3 | use super::errors::ParseError;
  4 | use super::parse_to_ast;
  5 | 
  6 | /// Parses a string containing JSONC to a `serde_json::Value.
  7 | ///
  8 | /// Requires the "serde" cargo feature:
  9 | ///
 10 | /// ```toml
 11 | /// jsonc-parser = { version = "...", features = ["serde"] }
 12 | /// ```
 13 | ///
 14 | /// # Example
 15 | ///
 16 | /// ```rs
 17 | /// use jsonc_parser::parse_to_serde_value;
 18 | ///
 19 | /// let json_value = parse_to_serde_value(r#"{ "test": 5 } // test"#, &Default::default()).unwrap();
 20 | /// ```
 21 | pub fn parse_to_serde_value(text: &str, parse_options: &ParseOptions) -> Result<Option<serde_json::Value>, ParseError> {
 22 |   let value = parse_to_ast(
 23 |     text,
 24 |     &CollectOptions {
 25 |       comments: crate::CommentCollectionStrategy::Off,
 26 |       tokens: false,
 27 |     },
 28 |     parse_options,
 29 |   )?
 30 |   .value;
 31 |   Ok(value.map(|v| v.into()))
 32 | }
 33 | 
 34 | #[cfg(test)]
 35 | mod tests {
 36 |   use pretty_assertions::assert_eq;
 37 |   use serde_json::Value as SerdeValue;
 38 |   use std::str::FromStr;
 39 | 
 40 |   use super::*;
 41 | 
 42 |   #[test]
 43 |   fn it_should_error_when_has_error() {
 44 |     assert_has_error(
 45 |       "[][]",
 46 |       "Text cannot contain more than one JSON value on line 1 column 3",
 47 |     );
 48 |   }
 49 | 
 50 |   fn assert_has_error(text: &str, message: &str) {
 51 |     let result = parse_to_serde_value(text, &Default::default());
 52 |     match result {
 53 |       Ok(_) => panic!("Expected error, but did not find one."),
 54 |       Err(err) => assert_eq!(err.to_string(), message),
 55 |     }
 56 |   }
 57 | 
 58 |   #[test]
 59 |   fn it_should_parse_to_serde_value() {
 60 |     let result = parse_to_serde_value(
 61 |       r#"{ "a": { "a1": 5 }, "b": [0.3e+025], "c": "c1", "d": true, "e": false, "f": null }"#,
 62 |       &Default::default(),
 63 |     )
 64 |     .unwrap();
 65 | 
 66 |     let mut expected_value = serde_json::map::Map::new();
 67 |     expected_value.insert("a".to_string(), {
 68 |       let mut inner_obj = serde_json::map::Map::new();
 69 |       inner_obj.insert(
 70 |         "a1".to_string(),
 71 |         SerdeValue::Number(serde_json::Number::from_str("5").unwrap()),
 72 |       );
 73 |       SerdeValue::Object(inner_obj)
 74 |     });
 75 |     expected_value.insert("b".to_string(), {
 76 |       let mut inner_array = Vec::new();
 77 |       inner_array.push(SerdeValue::Number(serde_json::Number::from_str("0.3e+025").unwrap()));
 78 |       SerdeValue::Array(inner_array)
 79 |     });
 80 |     expected_value.insert("c".to_string(), SerdeValue::String("c1".to_string()));
 81 |     expected_value.insert("d".to_string(), SerdeValue::Bool(true));
 82 |     expected_value.insert("e".to_string(), SerdeValue::Bool(false));
 83 |     expected_value.insert("f".to_string(), SerdeValue::Null);
 84 | 
 85 |     assert_eq!(result, Some(SerdeValue::Object(expected_value)));
 86 |   }
 87 | 
 88 |   #[test]
 89 |   fn it_should_parse_hexadecimal_numbers_to_decimal() {
 90 |     let result = parse_to_serde_value(
 91 |       r#"{
 92 |         "hex1": 0x7DF,
 93 |         "hex2": 0xFF,
 94 |         "hex3": 0x10
 95 |       }"#,
 96 |       &Default::default(),
 97 |     )
 98 |     .unwrap();
 99 | 
100 |     let mut expected_value = serde_json::map::Map::new();
101 |     expected_value.insert("hex1".to_string(), SerdeValue::Number(serde_json::Number::from(2015)));
102 |     expected_value.insert("hex2".to_string(), SerdeValue::Number(serde_json::Number::from(255)));
103 |     expected_value.insert("hex3".to_string(), SerdeValue::Number(serde_json::Number::from(16)));
104 | 
105 |     assert_eq!(result, Some(SerdeValue::Object(expected_value)));
106 |   }
107 | 
108 |   #[test]
109 |   fn it_should_parse_unary_plus_numbers() {
110 |     let result = parse_to_serde_value(
111 |       r#"{
112 |         "pos1": +42,
113 |         "pos2": +0.5,
114 |         "pos3": +1e10
115 |       }"#,
116 |       &Default::default(),
117 |     )
118 |     .unwrap();
119 | 
120 |     let mut expected_value = serde_json::map::Map::new();
121 |     expected_value.insert("pos1".to_string(), SerdeValue::Number(serde_json::Number::from(42)));
122 |     expected_value.insert(
123 |       "pos2".to_string(),
124 |       SerdeValue::Number(serde_json::Number::from_str("0.5").unwrap()),
125 |     );
126 |     expected_value.insert(
127 |       "pos3".to_string(),
128 |       SerdeValue::Number(serde_json::Number::from_str("1e10").unwrap()),
129 |     );
130 | 
131 |     assert_eq!(result, Some(SerdeValue::Object(expected_value)));
132 |   }
133 | }
134 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # jsonc-parser
  2 | //!
  3 | //! A JSON parser and manipulator that supports comments and other JSON extensions.
  4 | //!
  5 | //! ## Parsing
  6 | //!
  7 | //! To a simple `JsonValue`:
  8 | //!
  9 | //! ```
 10 | //! use jsonc_parser::parse_to_value;
 11 | //!
 12 | //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
 13 | //! let json_value = parse_to_value(r#"{ "test": 5 } // test"#, &Default::default())?;
 14 | //! // check the json_value here
 15 | //! # Ok(())
 16 | //! # }
 17 | //! ```
 18 | //!
 19 | //! Or an AST:
 20 | //!
 21 | //! ```
 22 | //! use jsonc_parser::parse_to_ast;
 23 | //! use jsonc_parser::CollectOptions;
 24 | //! use jsonc_parser::CommentCollectionStrategy;
 25 | //!
 26 | //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
 27 | //! let parse_result = parse_to_ast(r#"{ "test": 5 } // test"#, &CollectOptions {
 28 | //!     comments: CommentCollectionStrategy::Separate, // include comments in result
 29 | //!     tokens: true, // include tokens in result
 30 | //! }, &Default::default())?;
 31 | //! // ...inspect parse_result for value, tokens, and comments here...
 32 | //! # Ok(())
 33 | //! # }
 34 | //! ```
 35 | //!
 36 | //! ## Manipulation (CST)
 37 | //!
 38 | //! When enabling the `cst` cargo feature, parsing to a CST provides a first class manipulation API:
 39 | //!
 40 | //! ```
 41 | //! # #[cfg(feature = "cst")]
 42 | //! # {
 43 | //! use jsonc_parser::cst::CstRootNode;
 44 | //! use jsonc_parser::ParseOptions;
 45 | //! use jsonc_parser::json;
 46 | //!
 47 | //! let json_text = r#"{
 48 | //!   // comment
 49 | //!   "data": 123
 50 | //! }"#;
 51 | //!
 52 | //! let root = CstRootNode::parse(json_text, &ParseOptions::default()).unwrap();
 53 | //! let root_obj = root.object_value_or_set();
 54 | //!
 55 | //! root_obj.get("data").unwrap().set_value(json!({
 56 | //!   "nested": true
 57 | //! }));
 58 | //! root_obj.append("new_key", json!([456, 789, false]));
 59 | //!
 60 | //! assert_eq!(root.to_string(), r#"{
 61 | //!   // comment
 62 | //!   "data": {
 63 | //!     "nested": true
 64 | //!   },
 65 | //!   "new_key": [456, 789, false]
 66 | //! }"#);
 67 | //! # }
 68 | //! ```
 69 | //!
 70 | //! ## Serde
 71 | //!
 72 | //! If you enable the `"serde"` feature as follows:
 73 | //!
 74 | //! ```toml
 75 | //! # in Cargo.toml
 76 | //! jsonc-parser = { version = "...", features = ["serde"] }
 77 | //! ```
 78 | //!
 79 | //! Then you can use the `parse_to_serde_value` function to get a `serde_json::Value`:
 80 | //!
 81 | //! ```
 82 | //! # #[cfg(feature = "serde")]
 83 | //! # {
 84 | //! use jsonc_parser::parse_to_serde_value;
 85 | //!
 86 | //! # fn parse_example() -> Result<(), Box<dyn std::error::Error>> {
 87 | //! let json_value = parse_to_serde_value(r#"{ "test": 5 } // test"#, &Default::default())?;
 88 | //! # Ok(())
 89 | //! # }
 90 | //! # }
 91 | //! ```
 92 | //!
 93 | //! Alternatively, use `parse_to_ast` then call `.into()` (ex. `let value: serde_json::Value = ast.into();`).
 94 | //!
 95 | //! ## Parse Strictly as JSON
 96 | //!
 97 | //! Provide `ParseOptions` and set all the options to false:
 98 | //!
 99 | //! ```
100 | //! use jsonc_parser::parse_to_value;
101 | //! use jsonc_parser::ParseOptions;
102 | //!
103 | //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
104 | //! # let text = "{}";
105 | //! let json_value = parse_to_value(text, &ParseOptions {
106 | //!   allow_comments: false,
107 | //!   allow_loose_object_property_names: false,
108 | //!   allow_trailing_commas: false,
109 | //!   allow_single_quoted_strings: false,
110 | //!   allow_hexadecimal_numbers: false,
111 | //!   allow_unary_plus_numbers: false,
112 | //! })?;
113 | //! # Ok(())
114 | //! # }
115 | //! ```
116 | //!
117 | //! ## Error column number with unicode-width
118 | //!
119 | //! To get more accurate display column numbers in error messages, enable the `error_unicode_width` cargo feature,
120 | //! which will pull in and use the [unicode-width](https://crates.io/crates/unicode-width) dependency internally.
121 | //! Otherwise it will use the character count, which isn't as accurate of a number, but will probably be good enough
122 | //! in most cases.
123 | 
124 | #![deny(clippy::print_stderr)]
125 | #![deny(clippy::print_stdout)]
126 | #![allow(clippy::uninlined_format_args)]
127 | 
128 | pub mod ast;
129 | pub mod common;
130 | #[cfg(feature = "cst")]
131 | pub mod cst;
132 | pub mod errors;
133 | mod parse_to_ast;
134 | mod parse_to_value;
135 | mod scanner;
136 | #[cfg(feature = "serde")]
137 | mod serde;
138 | mod string;
139 | pub mod tokens;
140 | mod value;
141 | 
142 | pub use parse_to_ast::*;
143 | pub use parse_to_value::*;
144 | pub use scanner::*;
145 | pub use string::ParseStringErrorKind;
146 | pub use value::*;
147 | 
148 | #[cfg(feature = "serde")]
149 | pub use serde::*;
150 | 


--------------------------------------------------------------------------------
/tests/specs/object/object.txt:
--------------------------------------------------------------------------------
  1 | {
  2 |   "value": {
  3 |     "type": "object",
  4 |     "range": {
  5 |       "start": 0,
  6 |       "end": 159,
  7 |     },
  8 |     "properties": [
  9 |       {
 10 |         "type": "objectProp",
 11 |         "range": {
 12 |           "start": 6,
 13 |           "end": 17,
 14 |         },
 15 |         "name": {
 16 |           "type": "string",
 17 |           "range": {
 18 |             "start": 6,
 19 |             "end": 14,
 20 |           },
 21 |           "value": "number"
 22 |         },
 23 |         "value": {
 24 |           "type": "number",
 25 |           "range": {
 26 |             "start": 16,
 27 |             "end": 17,
 28 |           },
 29 |           "value": "5"
 30 |         }
 31 |       },
 32 |       {
 33 |         "type": "objectProp",
 34 |         "range": {
 35 |           "start": 23,
 36 |           "end": 44,
 37 |         },
 38 |         "name": {
 39 |           "type": "string",
 40 |           "range": {
 41 |             "start": 23,
 42 |             "end": 31,
 43 |           },
 44 |           "value": "string"
 45 |         },
 46 |         "value": {
 47 |           "type": "string",
 48 |           "range": {
 49 |             "start": 33,
 50 |             "end": 44,
 51 |           },
 52 |           "value": "str\\test"
 53 |         }
 54 |       },
 55 |       {
 56 |         "type": "objectProp",
 57 |         "range": {
 58 |           "start": 50,
 59 |           "end": 84,
 60 |         },
 61 |         "name": {
 62 |           "type": "string",
 63 |           "range": {
 64 |             "start": 50,
 65 |             "end": 58,
 66 |           },
 67 |           "value": "object"
 68 |         },
 69 |         "value": {
 70 |           "type": "object",
 71 |           "range": {
 72 |             "start": 60,
 73 |             "end": 84,
 74 |           },
 75 |           "properties": [
 76 |             {
 77 |               "type": "objectProp",
 78 |               "range": {
 79 |                 "start": 70,
 80 |                 "end": 78,
 81 |               },
 82 |               "name": {
 83 |                 "type": "string",
 84 |                 "range": {
 85 |                   "start": 70,
 86 |                   "end": 75,
 87 |                 },
 88 |                 "value": "obj"
 89 |               },
 90 |               "value": {
 91 |                 "type": "number",
 92 |                 "range": {
 93 |                   "start": 77,
 94 |                   "end": 78,
 95 |                 },
 96 |                 "value": "5"
 97 |               }
 98 |             }
 99 |           ]
100 |         }
101 |       },
102 |       {
103 |         "type": "objectProp",
104 |         "range": {
105 |           "start": 90,
106 |           "end": 101,
107 |         },
108 |         "name": {
109 |           "type": "string",
110 |           "range": {
111 |             "start": 90,
112 |             "end": 97,
113 |           },
114 |           "value": "array"
115 |         },
116 |         "value": {
117 |           "type": "array",
118 |           "range": {
119 |             "start": 99,
120 |             "end": 101,
121 |           },
122 |           "elements": [
123 |           ]
124 |         }
125 |       },
126 |       {
127 |         "type": "objectProp",
128 |         "range": {
129 |           "start": 107,
130 |           "end": 119,
131 |         },
132 |         "name": {
133 |           "type": "string",
134 |           "range": {
135 |             "start": 107,
136 |             "end": 113,
137 |           },
138 |           "value": "true"
139 |         },
140 |         "value": {
141 |           "type": "boolean",
142 |           "range": {
143 |             "start": 115,
144 |             "end": 119,
145 |           },
146 |           "value": "true"
147 |         }
148 |       },
149 |       {
150 |         "type": "objectProp",
151 |         "range": {
152 |           "start": 125,
153 |           "end": 139,
154 |         },
155 |         "name": {
156 |           "type": "string",
157 |           "range": {
158 |             "start": 125,
159 |             "end": 132,
160 |           },
161 |           "value": "false"
162 |         },
163 |         "value": {
164 |           "type": "boolean",
165 |           "range": {
166 |             "start": 134,
167 |             "end": 139,
168 |           },
169 |           "value": "false"
170 |         }
171 |       },
172 |       {
173 |         "type": "objectProp",
174 |         "range": {
175 |           "start": 145,
176 |           "end": 157,
177 |         },
178 |         "name": {
179 |           "type": "string",
180 |           "range": {
181 |             "start": 145,
182 |             "end": 151,
183 |           },
184 |           "value": "null"
185 |         },
186 |         "value": {
187 |           "type": "null",
188 |           "range": {
189 |             "start": 153,
190 |             "end": 157,
191 |           }
192 |         }
193 |       }
194 |     ]
195 |   },
196 |   "comments": [
197 |   ]
198 | }
199 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | 
  3 | use crate::ParseStringErrorKind;
  4 | 
  5 | use super::common::Range;
  6 | 
  7 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
  8 | pub enum ParseErrorKind {
  9 |   CommentsNotAllowed,
 10 |   ExpectedColonAfterObjectKey,
 11 |   ExpectedObjectValue,
 12 |   ExpectedDigit,
 13 |   ExpectedDigitFollowingNegativeSign,
 14 |   ExpectedPlusMinusOrDigitInNumberLiteral,
 15 |   ExpectedStringObjectProperty,
 16 |   HexadecimalNumbersNotAllowed,
 17 |   MultipleRootJsonValues,
 18 |   SingleQuotedStringsNotAllowed,
 19 |   String(ParseStringErrorKind),
 20 |   TrailingCommasNotAllowed,
 21 |   UnaryPlusNumbersNotAllowed,
 22 |   UnexpectedCloseBrace,
 23 |   UnexpectedCloseBracket,
 24 |   UnexpectedColon,
 25 |   UnexpectedComma,
 26 |   UnexpectedToken,
 27 |   UnexpectedTokenInObject,
 28 |   UnexpectedWord,
 29 |   UnterminatedArray,
 30 |   UnterminatedCommentBlock,
 31 |   UnterminatedObject,
 32 | }
 33 | 
 34 | impl std::fmt::Display for ParseErrorKind {
 35 |   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 36 |     use ParseErrorKind::*;
 37 |     match self {
 38 |       CommentsNotAllowed => {
 39 |         write!(f, "Comments are not allowed")
 40 |       }
 41 |       ExpectedColonAfterObjectKey => {
 42 |         write!(f, "Expected colon after the string or word in object property")
 43 |       }
 44 |       ExpectedDigit => {
 45 |         write!(f, "Expected digit")
 46 |       }
 47 |       ExpectedDigitFollowingNegativeSign => {
 48 |         write!(f, "Expected digit following negative sign")
 49 |       }
 50 |       ExpectedPlusMinusOrDigitInNumberLiteral => {
 51 |         write!(f, "Expected plus, minus, or digit in number literal")
 52 |       }
 53 |       ExpectedObjectValue => {
 54 |         write!(f, "Expected value after colon in object property")
 55 |       }
 56 |       ExpectedStringObjectProperty => {
 57 |         write!(f, "Expected string for object property")
 58 |       }
 59 |       HexadecimalNumbersNotAllowed => {
 60 |         write!(f, "Hexadecimal numbers are not allowed")
 61 |       }
 62 |       MultipleRootJsonValues => {
 63 |         write!(f, "Text cannot contain more than one JSON value")
 64 |       }
 65 |       SingleQuotedStringsNotAllowed => {
 66 |         write!(f, "Single-quoted strings are not allowed")
 67 |       }
 68 |       String(kind) => kind.fmt(f),
 69 |       TrailingCommasNotAllowed => {
 70 |         write!(f, "Trailing commas are not allowed")
 71 |       }
 72 |       UnaryPlusNumbersNotAllowed => {
 73 |         write!(f, "Unary plus on numbers is not allowed")
 74 |       }
 75 |       UnexpectedCloseBrace => {
 76 |         write!(f, "Unexpected close brace")
 77 |       }
 78 |       UnexpectedCloseBracket => {
 79 |         write!(f, "Unexpected close bracket")
 80 |       }
 81 |       UnexpectedColon => {
 82 |         write!(f, "Unexpected colon")
 83 |       }
 84 |       UnexpectedComma => {
 85 |         write!(f, "Unexpected comma")
 86 |       }
 87 |       UnexpectedWord => {
 88 |         write!(f, "Unexpected word")
 89 |       }
 90 |       UnexpectedToken => {
 91 |         write!(f, "Unexpected token")
 92 |       }
 93 |       UnexpectedTokenInObject => {
 94 |         write!(f, "Unexpected token in object")
 95 |       }
 96 |       UnterminatedArray => {
 97 |         write!(f, "Unterminated array")
 98 |       }
 99 |       UnterminatedCommentBlock => {
100 |         write!(f, "Unterminated comment block")
101 |       }
102 |       UnterminatedObject => {
103 |         write!(f, "Unterminated object")
104 |       }
105 |     }
106 |   }
107 | }
108 | 
109 | #[derive(Debug, Clone, PartialEq)]
110 | struct ParseErrorInner {
111 |   range: Range,
112 |   line_display: usize,
113 |   column_display: usize,
114 |   kind: ParseErrorKind,
115 | }
116 | 
117 | /// Error that could occur while parsing or tokenizing.
118 | #[derive(Debug, Clone, PartialEq)]
119 | pub struct ParseError(Box<ParseErrorInner>);
120 | 
121 | impl std::error::Error for ParseError {}
122 | 
123 | impl ParseError {
124 |   pub(crate) fn new(range: Range, kind: ParseErrorKind, file_text: &str) -> ParseError {
125 |     let (line_display, column_display) = get_line_and_column_display(range, file_text);
126 |     ParseError(Box::new(ParseErrorInner {
127 |       range,
128 |       line_display,
129 |       column_display,
130 |       kind,
131 |     }))
132 |   }
133 | 
134 |   /// Start and end position of the error.
135 |   pub fn range(&self) -> Range {
136 |     self.0.range
137 |   }
138 | 
139 |   /// 1-indexed line number the error occurred on.
140 |   pub fn line_display(&self) -> usize {
141 |     self.0.line_display
142 |   }
143 | 
144 |   /// 1-indexed column number the error occurred on.
145 |   ///
146 |   /// Note: Use the `error_unicode_width` feature to get the correct column
147 |   /// number for Unicode characters on the line, otherwise this is just the
148 |   /// number of characters by default.
149 |   pub fn column_display(&self) -> usize {
150 |     self.0.column_display
151 |   }
152 | 
153 |   /// Error message.
154 |   pub fn kind(&self) -> &ParseErrorKind {
155 |     &self.0.kind
156 |   }
157 | }
158 | 
159 | impl fmt::Display for ParseError {
160 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
161 |     let inner = &*self.0;
162 |     write!(
163 |       f,
164 |       "{} on line {} column {}",
165 |       inner.kind, inner.line_display, inner.column_display
166 |     )
167 |   }
168 | }
169 | 
170 | fn get_line_and_column_display(range: Range, file_text: &str) -> (usize, usize) {
171 |   let mut line_index = 0;
172 |   let mut column_index = 0;
173 |   for c in file_text[..range.start].chars() {
174 |     if c == '\n' {
175 |       line_index += 1;
176 |       column_index = 0;
177 |     } else {
178 |       #[cfg(feature = "error_unicode_width")]
179 |       {
180 |         if let Some(width) = unicode_width::UnicodeWidthChar::width_cjk(c) {
181 |           column_index += width;
182 |         }
183 |       }
184 |       #[cfg(not(feature = "error_unicode_width"))]
185 |       {
186 |         column_index += 1;
187 |       }
188 |     }
189 |   }
190 |   (line_index + 1, column_index + 1)
191 | }
192 | 


--------------------------------------------------------------------------------
/benches/data/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     /* Visit https://aka.ms/tsconfig.json to read more about this file */
 4 | 
 5 |     /* Basic Options */
 6 |     // "incremental": true,                   /* Enable incremental compilation */
 7 |     "target": "es5",                          /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
 8 |     "module": "commonjs",                     /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
 9 |     // "lib": [],                             /* Specify library files to be included in the compilation. */
10 |     // "allowJs": true,                       /* Allow javascript files to be compiled. */
11 |     // "checkJs": true,                       /* Report errors in .js files. */
12 |     // "jsx": "preserve",                     /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
13 |     // "declaration": true,                   /* Generates corresponding '.d.ts' file. */
14 |     // "declarationMap": true,                /* Generates a sourcemap for each corresponding '.d.ts' file. */
15 |     // "sourceMap": true,                     /* Generates corresponding '.map' file. */
16 |     // "outFile": "./",                       /* Concatenate and emit output to single file. */
17 |     // "outDir": "./",                        /* Redirect output structure to the directory. */
18 |     // "rootDir": "./",                       /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
19 |     // "composite": true,                     /* Enable project compilation */
20 |     // "tsBuildInfoFile": "./",               /* Specify file to store incremental compilation information */
21 |     // "removeComments": true,                /* Do not emit comments to output. */
22 |     // "noEmit": true,                        /* Do not emit outputs. */
23 |     // "importHelpers": true,                 /* Import emit helpers from 'tslib'. */
24 |     // "downlevelIteration": true,            /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
25 |     // "isolatedModules": true,               /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
26 | 
27 |     /* Strict Type-Checking Options */
28 |     "strict": true,                           /* Enable all strict type-checking options. */
29 |     // "noImplicitAny": true,                 /* Raise error on expressions and declarations with an implied 'any' type. */
30 |     // "strictNullChecks": true,              /* Enable strict null checks. */
31 |     // "strictFunctionTypes": true,           /* Enable strict checking of function types. */
32 |     // "strictBindCallApply": true,           /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
33 |     // "strictPropertyInitialization": true,  /* Enable strict checking of property initialization in classes. */
34 |     // "noImplicitThis": true,                /* Raise error on 'this' expressions with an implied 'any' type. */
35 |     // "alwaysStrict": true,                  /* Parse in strict mode and emit "use strict" for each source file. */
36 | 
37 |     /* Additional Checks */
38 |     // "noUnusedLocals": true,                /* Report errors on unused locals. */
39 |     // "noUnusedParameters": true,            /* Report errors on unused parameters. */
40 |     // "noImplicitReturns": true,             /* Report error when not all code paths in function return a value. */
41 |     // "noFallthroughCasesInSwitch": true,    /* Report errors for fallthrough cases in switch statement. */
42 | 
43 |     /* Module Resolution Options */
44 |     // "moduleResolution": "node",            /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
45 |     // "baseUrl": "./",                       /* Base directory to resolve non-absolute module names. */
46 |     // "paths": {},                           /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
47 |     // "rootDirs": [],                        /* List of root folders whose combined content represents the structure of the project at runtime. */
48 |     // "typeRoots": [],                       /* List of folders to include type definitions from. */
49 |     // "types": [],                           /* Type declaration files to be included in compilation. */
50 |     // "allowSyntheticDefaultImports": true,  /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
51 |     "esModuleInterop": true,                  /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */
52 |     // "preserveSymlinks": true,              /* Do not resolve the real path of symlinks. */
53 |     // "allowUmdGlobalAccess": true,          /* Allow accessing UMD globals from modules. */
54 | 
55 |     /* Source Map Options */
56 |     // "sourceRoot": "",                      /* Specify the location where debugger should locate TypeScript files instead of source locations. */
57 |     // "mapRoot": "",                         /* Specify the location where debugger should locate map files instead of generated locations. */
58 |     // "inlineSourceMap": true,               /* Emit a single file with source maps instead of having a separate file. */
59 |     // "inlineSources": true,                 /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
60 | 
61 |     /* Experimental Options */
62 |     // "experimentalDecorators": true,        /* Enables experimental support for ES7 decorators. */
63 |     // "emitDecoratorMetadata": true,         /* Enables experimental support for emitting type metadata for decorators. */
64 | 
65 |     /* Advanced Options */
66 |     "skipLibCheck": true,                     /* Skip type checking of declaration files. */
67 |     "forceConsistentCasingInFileNames": true  /* Disallow inconsistently-cased references to the same file. */
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/parse_to_value.rs:
--------------------------------------------------------------------------------
  1 | use super::CollectOptions;
  2 | use super::ParseOptions;
  3 | use super::ast;
  4 | use super::errors::ParseError;
  5 | use super::parse_to_ast;
  6 | use super::value::*;
  7 | use crate::value::Map;
  8 | 
  9 | /// Parses a string containing JSONC to a `JsonValue`.
 10 | ///
 11 | /// Returns `None` when the provided string is empty or whitespace.
 12 | ///
 13 | /// # Example
 14 | ///
 15 | /// ```
 16 | /// use jsonc_parser::parse_to_value;
 17 | ///
 18 | /// let json_value = parse_to_value(r#"{ "test": 5 } // test"#, &Default::default()).expect("Should parse.");
 19 | /// ```
 20 | pub fn parse_to_value<'a>(text: &'a str, options: &ParseOptions) -> Result<Option<JsonValue<'a>>, ParseError> {
 21 |   let value = parse_to_ast(
 22 |     text,
 23 |     &CollectOptions {
 24 |       comments: crate::CommentCollectionStrategy::Off,
 25 |       tokens: false,
 26 |     },
 27 |     options,
 28 |   )?
 29 |   .value;
 30 |   Ok(value.map(handle_value))
 31 | }
 32 | 
 33 | fn handle_value(value: ast::Value) -> JsonValue {
 34 |   match value {
 35 |     ast::Value::StringLit(lit) => JsonValue::String(lit.value),
 36 |     ast::Value::NumberLit(lit) => JsonValue::Number(lit.value),
 37 |     ast::Value::BooleanLit(lit) => JsonValue::Boolean(lit.value),
 38 |     ast::Value::Object(obj) => JsonValue::Object(handle_object(obj)),
 39 |     ast::Value::Array(arr) => JsonValue::Array(handle_array(arr)),
 40 |     ast::Value::NullKeyword(_) => JsonValue::Null,
 41 |   }
 42 | }
 43 | 
 44 | fn handle_array(arr: ast::Array) -> JsonArray {
 45 |   let elements = arr.elements.into_iter().map(handle_value).collect();
 46 | 
 47 |   JsonArray::new(elements)
 48 | }
 49 | 
 50 | fn handle_object(obj: ast::Object) -> JsonObject {
 51 |   let mut props = Map::with_capacity(obj.properties.len());
 52 |   for prop in obj.properties.into_iter() {
 53 |     let prop_name = prop.name.into_string();
 54 |     let prop_value = handle_value(prop.value);
 55 |     props.insert(prop_name, prop_value);
 56 |   }
 57 |   JsonObject::new(props)
 58 | }
 59 | 
 60 | #[cfg(test)]
 61 | mod tests {
 62 |   use crate::errors::ParseErrorKind;
 63 | 
 64 |   use super::*;
 65 |   use std::borrow::Cow;
 66 | 
 67 |   #[test]
 68 |   fn it_should_parse_object() {
 69 |     let value = parse_to_value(
 70 |       r#"{
 71 |     "a": null,
 72 |     "b": [null, "text"],
 73 |     "c": true,
 74 |     d: 25.55
 75 | }"#,
 76 |       &Default::default(),
 77 |     )
 78 |     .unwrap()
 79 |     .unwrap();
 80 | 
 81 |     let mut object_map = Map::new();
 82 |     object_map.insert(String::from("a"), JsonValue::Null);
 83 |     object_map.insert(
 84 |       String::from("b"),
 85 |       JsonValue::Array(vec![JsonValue::Null, JsonValue::String(Cow::Borrowed("text"))].into()),
 86 |     );
 87 |     object_map.insert(String::from("c"), JsonValue::Boolean(true));
 88 |     object_map.insert(String::from("d"), JsonValue::Number("25.55"));
 89 |     assert_eq!(value, JsonValue::Object(object_map.into()));
 90 |   }
 91 | 
 92 |   #[test]
 93 |   fn it_should_parse_boolean_false() {
 94 |     let value = parse_to_value("false", &Default::default()).unwrap().unwrap();
 95 |     assert_eq!(value, JsonValue::Boolean(false));
 96 |     let value = parse_to_value("true", &Default::default()).unwrap().unwrap();
 97 |     assert_eq!(value, JsonValue::Boolean(true));
 98 |   }
 99 | 
100 |   #[test]
101 |   fn it_should_parse_boolean_true() {
102 |     let value = parse_to_value("true", &Default::default()).unwrap().unwrap();
103 |     assert_eq!(value, JsonValue::Boolean(true));
104 |   }
105 | 
106 |   #[test]
107 |   fn it_should_parse_number() {
108 |     let value = parse_to_value("50", &Default::default()).unwrap().unwrap();
109 |     assert_eq!(value, JsonValue::Number("50"));
110 |   }
111 | 
112 |   #[test]
113 |   fn it_should_parse_string() {
114 |     let value = parse_to_value(r#""test""#, &Default::default()).unwrap().unwrap();
115 |     assert_eq!(value, JsonValue::String(Cow::Borrowed("test")));
116 |   }
117 | 
118 |   #[test]
119 |   fn it_should_parse_string_with_quotes() {
120 |     let value = parse_to_value(r#""echo \"test\"""#, &Default::default())
121 |       .unwrap()
122 |       .unwrap();
123 |     assert_eq!(value, JsonValue::String(Cow::Borrowed(r#"echo "test""#)));
124 |   }
125 | 
126 |   #[test]
127 |   fn it_should_parse_array() {
128 |     let value = parse_to_value(r#"[false, true]"#, &Default::default())
129 |       .unwrap()
130 |       .unwrap();
131 |     assert_eq!(
132 |       value,
133 |       JsonValue::Array(vec![JsonValue::Boolean(false), JsonValue::Boolean(true)].into())
134 |     );
135 |   }
136 | 
137 |   #[test]
138 |   fn it_should_parse_null() {
139 |     let value = parse_to_value("null", &Default::default()).unwrap().unwrap();
140 |     assert_eq!(value, JsonValue::Null);
141 |   }
142 | 
143 |   #[test]
144 |   fn it_should_parse_empty() {
145 |     let value = parse_to_value("", &Default::default()).unwrap();
146 |     assert!(value.is_none());
147 |   }
148 | 
149 |   #[test]
150 |   fn error_unexpected_token() {
151 |     let err = parse_to_value("{\n  \"a\":\u{200b}5 }", &Default::default())
152 |       .err()
153 |       .unwrap();
154 |     assert_eq!(err.range().start, 8);
155 |     assert_eq!(err.range().end, 11);
156 |     assert_eq!(err.kind().clone(), ParseErrorKind::UnexpectedToken);
157 |   }
158 | 
159 |   #[test]
160 |   fn it_should_parse_surrogate_pair() {
161 |     // RFC 8259 § 7: non-BMP character 𝄞 (U+1D11E) should be escaped as surrogate pair \uD834\uDD1E
162 |     let src = r#""\uD834\uDD1E""#;
163 |     let v = parse_to_value(src, &Default::default()).unwrap().unwrap();
164 |     if let JsonValue::String(s) = v {
165 |       assert_eq!("\u{1D11E}", s.as_ref());
166 |     } else {
167 |       panic!("Expected string value, got {:?}", v);
168 |     }
169 |   }
170 | 
171 |   #[test]
172 |   fn it_should_parse_multiple_surrogate_pairs() {
173 |     let src = r#""\uD834\uDD1E\uD834\uDD1E""#;
174 |     let v = parse_to_value(src, &Default::default()).unwrap().unwrap();
175 |     if let JsonValue::String(s) = v {
176 |       assert_eq!("\u{1D11E}\u{1D11E}", s.as_ref());
177 |     } else {
178 |       panic!("Expected string value, got {:?}", v);
179 |     }
180 |   }
181 | 
182 |   #[test]
183 |   fn it_should_parse_mixed_escapes_with_surrogate_pairs() {
184 |     // "A𝄞B" where 𝄞 is encoded as surrogate pair
185 |     let src = r#""\u0041\uD834\uDD1E\u0042""#;
186 |     let v = parse_to_value(src, &Default::default()).unwrap().unwrap();
187 |     if let JsonValue::String(s) = v {
188 |       assert_eq!("A\u{1D11E}B", s.as_ref());
189 |     } else {
190 |       panic!("Expected string value, got {:?}", v);
191 |     }
192 |   }
193 | 
194 |   #[test]
195 |   fn it_should_error_on_unpaired_high_surrogate_with_text() {
196 |     let src = r#""\uD834x""#;
197 |     let err = parse_to_value(src, &Default::default()).err().unwrap();
198 |     assert!(err.to_string().contains("unpaired high surrogate"));
199 |   }
200 | 
201 |   #[test]
202 |   fn it_should_error_on_unpaired_high_surrogate_at_eof() {
203 |     let src = r#""\uD834""#;
204 |     let err = parse_to_value(src, &Default::default()).err().unwrap();
205 |     assert!(err.to_string().contains("unpaired high surrogate"));
206 |   }
207 | 
208 |   #[test]
209 |   fn it_should_error_on_high_surrogate_followed_by_non_low_surrogate() {
210 |     let src = r#""\uD834\u0041""#;
211 |     let err = parse_to_value(src, &Default::default()).err().unwrap();
212 |     assert!(err.to_string().contains("not followed by low surrogate"));
213 |   }
214 | 
215 |   #[test]
216 |   fn it_should_error_on_unpaired_low_surrogate() {
217 |     // This test verifies existing behavior is maintained
218 |     let src = r#""\uDC00""#;
219 |     let err = parse_to_value(src, &Default::default()).err().unwrap();
220 |     assert!(err.to_string().contains("unpaired low surrogate"));
221 |   }
222 | }
223 | 


--------------------------------------------------------------------------------
/tests/specs/comments/inline-comments.txt:
--------------------------------------------------------------------------------
  1 | {
  2 |   "value": {
  3 |     "type": "object",
  4 |     "range": {
  5 |       "start": 5,
  6 |       "end": 102,
  7 |     },
  8 |     "properties": [
  9 |       {
 10 |         "type": "objectProp",
 11 |         "range": {
 12 |           "start": 21,
 13 |           "end": 37,
 14 |         },
 15 |         "name": {
 16 |           "type": "string",
 17 |           "range": {
 18 |             "start": 21,
 19 |             "end": 24,
 20 |           },
 21 |           "value": "a"
 22 |         },
 23 |         "value": {
 24 |           "type": "number",
 25 |           "range": {
 26 |             "start": 36,
 27 |             "end": 37,
 28 |           },
 29 |           "value": "5"
 30 |         }
 31 |       },
 32 |       {
 33 |         "type": "objectProp",
 34 |         "range": {
 35 |           "start": 52,
 36 |           "end": 69,
 37 |         },
 38 |         "name": {
 39 |           "type": "string",
 40 |           "range": {
 41 |             "start": 52,
 42 |             "end": 55,
 43 |           },
 44 |           "value": "b"
 45 |         },
 46 |         "value": {
 47 |           "type": "array",
 48 |           "range": {
 49 |             "start": 62,
 50 |             "end": 69,
 51 |           },
 52 |           "elements": [
 53 |           ]
 54 |         }
 55 |       },
 56 |       {
 57 |         "type": "objectProp",
 58 |         "range": {
 59 |           "start": 85,
 60 |           "end": 94,
 61 |         },
 62 |         "name": {
 63 |           "type": "string",
 64 |           "range": {
 65 |             "start": 85,
 66 |             "end": 88,
 67 |           },
 68 |           "value": "c"
 69 |         },
 70 |         "value": {
 71 |           "type": "null",
 72 |           "range": {
 73 |             "start": 90,
 74 |             "end": 94,
 75 |           }
 76 |         }
 77 |       }
 78 |     ]
 79 |   },
 80 |   "comments": [
 81 |     {
 82 |       "pos": 0,
 83 |       "comments": [
 84 |         {
 85 |           "type": "block",
 86 |           "range": {
 87 |             "start": 0,
 88 |             "end": 5,
 89 |           },
 90 |           "value": "1"
 91 |         }
 92 |       ]
 93 |     },
 94 |     {
 95 |       "pos": 5,
 96 |       "comments": [
 97 |         {
 98 |           "type": "block",
 99 |           "range": {
100 |             "start": 0,
101 |             "end": 5,
102 |           },
103 |           "value": "1"
104 |         }
105 |       ]
106 |     },
107 |     {
108 |       "pos": 6,
109 |       "comments": [
110 |         {
111 |           "type": "line",
112 |           "range": {
113 |             "start": 7,
114 |             "end": 11,
115 |           },
116 |           "value": " 2"
117 |         },
118 |         {
119 |           "type": "block",
120 |           "range": {
121 |             "start": 16,
122 |             "end": 21,
123 |           },
124 |           "value": "3"
125 |         }
126 |       ]
127 |     },
128 |     {
129 |       "pos": 21,
130 |       "comments": [
131 |         {
132 |           "type": "line",
133 |           "range": {
134 |             "start": 7,
135 |             "end": 11,
136 |           },
137 |           "value": " 2"
138 |         },
139 |         {
140 |           "type": "block",
141 |           "range": {
142 |             "start": 16,
143 |             "end": 21,
144 |           },
145 |           "value": "3"
146 |         }
147 |       ]
148 |     },
149 |     {
150 |       "pos": 24,
151 |       "comments": [
152 |         {
153 |           "type": "block",
154 |           "range": {
155 |             "start": 24,
156 |             "end": 29,
157 |           },
158 |           "value": "4"
159 |         }
160 |       ]
161 |     },
162 |     {
163 |       "pos": 29,
164 |       "comments": [
165 |         {
166 |           "type": "block",
167 |           "range": {
168 |             "start": 24,
169 |             "end": 29,
170 |           },
171 |           "value": "4"
172 |         }
173 |       ]
174 |     },
175 |     {
176 |       "pos": 30,
177 |       "comments": [
178 |         {
179 |           "type": "block",
180 |           "range": {
181 |             "start": 31,
182 |             "end": 36,
183 |           },
184 |           "value": "5"
185 |         }
186 |       ]
187 |     },
188 |     {
189 |       "pos": 36,
190 |       "comments": [
191 |         {
192 |           "type": "block",
193 |           "range": {
194 |             "start": 31,
195 |             "end": 36,
196 |           },
197 |           "value": "5"
198 |         }
199 |       ]
200 |     },
201 |     {
202 |       "pos": 37,
203 |       "comments": [
204 |         {
205 |           "type": "block",
206 |           "range": {
207 |             "start": 37,
208 |             "end": 42,
209 |           },
210 |           "value": "6"
211 |         }
212 |       ]
213 |     },
214 |     {
215 |       "pos": 42,
216 |       "comments": [
217 |         {
218 |           "type": "block",
219 |           "range": {
220 |             "start": 37,
221 |             "end": 42,
222 |           },
223 |           "value": "6"
224 |         }
225 |       ]
226 |     },
227 |     {
228 |       "pos": 43,
229 |       "comments": [
230 |         {
231 |           "type": "line",
232 |           "range": {
233 |             "start": 44,
234 |             "end": 47,
235 |           },
236 |           "value": "7"
237 |         }
238 |       ]
239 |     },
240 |     {
241 |       "pos": 52,
242 |       "comments": [
243 |         {
244 |           "type": "line",
245 |           "range": {
246 |             "start": 44,
247 |             "end": 47,
248 |           },
249 |           "value": "7"
250 |         }
251 |       ]
252 |     },
253 |     {
254 |       "pos": 56,
255 |       "comments": [
256 |         {
257 |           "type": "block",
258 |           "range": {
259 |             "start": 57,
260 |             "end": 62,
261 |           },
262 |           "value": "8"
263 |         }
264 |       ]
265 |     },
266 |     {
267 |       "pos": 62,
268 |       "comments": [
269 |         {
270 |           "type": "block",
271 |           "range": {
272 |             "start": 57,
273 |             "end": 62,
274 |           },
275 |           "value": "8"
276 |         }
277 |       ]
278 |     },
279 |     {
280 |       "pos": 63,
281 |       "comments": [
282 |         {
283 |           "type": "block",
284 |           "range": {
285 |             "start": 63,
286 |             "end": 68,
287 |           },
288 |           "value": "9"
289 |         }
290 |       ]
291 |     },
292 |     {
293 |       "pos": 68,
294 |       "comments": [
295 |         {
296 |           "type": "block",
297 |           "range": {
298 |             "start": 63,
299 |             "end": 68,
300 |           },
301 |           "value": "9"
302 |         }
303 |       ]
304 |     },
305 |     {
306 |       "pos": 69,
307 |       "comments": [
308 |         {
309 |           "type": "block",
310 |           "range": {
311 |             "start": 69,
312 |             "end": 75,
313 |           },
314 |           "value": "10"
315 |         }
316 |       ]
317 |     },
318 |     {
319 |       "pos": 75,
320 |       "comments": [
321 |         {
322 |           "type": "block",
323 |           "range": {
324 |             "start": 69,
325 |             "end": 75,
326 |           },
327 |           "value": "10"
328 |         }
329 |       ]
330 |     },
331 |     {
332 |       "pos": 76,
333 |       "comments": [
334 |         {
335 |           "type": "line",
336 |           "range": {
337 |             "start": 76,
338 |             "end": 80,
339 |           },
340 |           "value": "11"
341 |         }
342 |       ]
343 |     },
344 |     {
345 |       "pos": 85,
346 |       "comments": [
347 |         {
348 |           "type": "line",
349 |           "range": {
350 |             "start": 76,
351 |             "end": 80,
352 |           },
353 |           "value": "11"
354 |         }
355 |       ]
356 |     },
357 |     {
358 |       "pos": 94,
359 |       "comments": [
360 |         {
361 |           "type": "block",
362 |           "range": {
363 |             "start": 95,
364 |             "end": 101,
365 |           },
366 |           "value": "13"
367 |         }
368 |       ]
369 |     },
370 |     {
371 |       "pos": 101,
372 |       "comments": [
373 |         {
374 |           "type": "block",
375 |           "range": {
376 |             "start": 95,
377 |             "end": 101,
378 |           },
379 |           "value": "13"
380 |         }
381 |       ]
382 |     },
383 |     {
384 |       "pos": 102,
385 |       "comments": [
386 |         {
387 |           "type": "line",
388 |           "range": {
389 |             "start": 103,
390 |             "end": 108,
391 |           },
392 |           "value": " 14"
393 |         }
394 |       ]
395 |     },
396 |     {
397 |       "pos": 108,
398 |       "comments": [
399 |         {
400 |           "type": "line",
401 |           "range": {
402 |             "start": 103,
403 |             "end": 108,
404 |           },
405 |           "value": " 14"
406 |         }
407 |       ]
408 |     }
409 |   ]
410 | }
411 | 


--------------------------------------------------------------------------------
/src/value.rs:
--------------------------------------------------------------------------------
  1 | use core::slice::Iter;
  2 | use std::borrow::Cow;
  3 | 
  4 | /// A JSON value.
  5 | #[derive(Clone, PartialEq, Debug)]
  6 | pub enum JsonValue<'a> {
  7 |   String(Cow<'a, str>),
  8 |   Number(&'a str),
  9 |   Boolean(bool),
 10 |   Object(JsonObject<'a>),
 11 |   Array(JsonArray<'a>),
 12 |   Null,
 13 | }
 14 | 
 15 | #[cfg(not(feature = "preserve_order"))]
 16 | pub type Map<K, V> = std::collections::HashMap<K, V>;
 17 | #[cfg(feature = "preserve_order")]
 18 | pub type Map<K, V> = indexmap::IndexMap<K, V>;
 19 | 
 20 | /// A JSON object.
 21 | #[derive(Clone, PartialEq, Debug)]
 22 | pub struct JsonObject<'a>(Map<String, JsonValue<'a>>);
 23 | 
 24 | impl<'a> IntoIterator for JsonObject<'a> {
 25 |   type Item = (String, JsonValue<'a>);
 26 |   #[cfg(not(feature = "preserve_order"))]
 27 |   type IntoIter = std::collections::hash_map::IntoIter<String, JsonValue<'a>>;
 28 |   #[cfg(feature = "preserve_order")]
 29 |   type IntoIter = indexmap::map::IntoIter<String, JsonValue<'a>>;
 30 | 
 31 |   fn into_iter(self) -> Self::IntoIter {
 32 |     self.0.into_iter()
 33 |   }
 34 | }
 35 | 
 36 | impl<'a> From<Map<String, JsonValue<'a>>> for JsonObject<'a> {
 37 |   fn from(properties: Map<String, JsonValue>) -> JsonObject {
 38 |     JsonObject::new(properties)
 39 |   }
 40 | }
 41 | 
 42 | #[cfg(not(feature = "preserve_order"))]
 43 | #[inline(always)]
 44 | fn remove_entry<'a>(map: &mut Map<String, JsonValue<'a>>, key: &str) -> Option<(String, JsonValue<'a>)> {
 45 |   map.remove_entry(key)
 46 | }
 47 | 
 48 | #[cfg(feature = "preserve_order")]
 49 | #[inline(always)]
 50 | fn remove_entry<'a>(map: &mut Map<String, JsonValue<'a>>, key: &str) -> Option<(String, JsonValue<'a>)> {
 51 |   map.shift_remove_entry(key)
 52 | }
 53 | 
 54 | macro_rules! generate_take {
 55 |   ($self:ident, $name:ident, $value_type:ident) => {
 56 |     match remove_entry(&mut $self.0, $name) {
 57 |       Some((_, JsonValue::$value_type(value))) => Some(value),
 58 |       Some((key, value)) => {
 59 |         // add it back
 60 |         $self.0.insert(key, value);
 61 |         None
 62 |       }
 63 |       _ => None,
 64 |     }
 65 |   };
 66 | }
 67 | 
 68 | macro_rules! generate_get {
 69 |   ($self:ident, $name:ident, $value_type:ident) => {
 70 |     match $self.0.get($name) {
 71 |       Some(JsonValue::$value_type(value)) => Some(value),
 72 |       _ => None,
 73 |     }
 74 |   };
 75 | }
 76 | 
 77 | impl<'a> JsonObject<'a> {
 78 |   /// Creates a new JsonObject.
 79 |   pub fn new(inner: Map<String, JsonValue<'a>>) -> JsonObject<'a> {
 80 |     JsonObject(inner)
 81 |   }
 82 | 
 83 |   /// Creates a new JsonObject with the specified capacity.
 84 |   pub fn with_capacity(capacity: usize) -> JsonObject<'a> {
 85 |     JsonObject(Map::with_capacity(capacity))
 86 |   }
 87 | 
 88 |   /// Drops the object returning the inner map.
 89 |   pub fn take_inner(self) -> Map<String, JsonValue<'a>> {
 90 |     self.0
 91 |   }
 92 | 
 93 |   /// Gets the number of properties.
 94 |   pub fn len(&self) -> usize {
 95 |     self.0.len()
 96 |   }
 97 | 
 98 |   /// Gets if there are no properties.
 99 |   pub fn is_empty(&self) -> bool {
100 |     self.0.is_empty()
101 |   }
102 | 
103 |   /// Gets a value in the object by its name.
104 |   pub fn get(&self, name: &str) -> Option<&JsonValue<'a>> {
105 |     self.0.get(name)
106 |   }
107 | 
108 |   /// Gets a string property value from the object by name.
109 |   /// Returns `None` when not a string or it doesn't exist.
110 |   pub fn get_string(&self, name: &str) -> Option<&Cow<'a, str>> {
111 |     generate_get!(self, name, String)
112 |   }
113 | 
114 |   /// Gets a number property value from the object by name.
115 |   /// Returns `None` when not a number or it doesn't exist.
116 |   pub fn get_number(&self, name: &str) -> Option<&'a str> {
117 |     generate_get!(self, name, Number)
118 |   }
119 | 
120 |   /// Gets a boolean property value from the object by name.
121 |   /// Returns `None` when not a boolean or it doesn't exist.
122 |   pub fn get_boolean(&self, name: &str) -> Option<bool> {
123 |     let result = generate_get!(self, name, Boolean);
124 |     result.cloned()
125 |   }
126 | 
127 |   /// Gets an object property value from the object by name.
128 |   /// Returns `None` when not an object or it doesn't exist.
129 |   pub fn get_object(&self, name: &str) -> Option<&JsonObject<'a>> {
130 |     generate_get!(self, name, Object)
131 |   }
132 | 
133 |   /// Gets an array property value from the object by name.
134 |   /// Returns `None` when not an array or it doesn't exist.
135 |   pub fn get_array(&self, name: &str) -> Option<&JsonArray<'a>> {
136 |     generate_get!(self, name, Array)
137 |   }
138 | 
139 |   /// Takes a value from the object by name.
140 |   /// Returns `None` when it doesn't exist.
141 |   pub fn take(&mut self, name: &str) -> Option<JsonValue<'a>> {
142 |     remove_entry(&mut self.0, name).map(|(_, value)| value)
143 |   }
144 | 
145 |   /// Takes a string property value from the object by name.
146 |   /// Returns `None` when not a string or it doesn't exist.
147 |   pub fn take_string(&mut self, name: &str) -> Option<Cow<'a, str>> {
148 |     generate_take!(self, name, String)
149 |   }
150 | 
151 |   /// Takes a number property value from the object by name.
152 |   /// Returns `None` when not a number or it doesn't exist.
153 |   pub fn take_number(&mut self, name: &str) -> Option<&'a str> {
154 |     generate_take!(self, name, Number)
155 |   }
156 | 
157 |   /// Takes a boolean property value from the object by name.
158 |   /// Returns `None` when not a boolean or it doesn't exist.
159 |   pub fn take_boolean(&mut self, name: &str) -> Option<bool> {
160 |     generate_take!(self, name, Boolean)
161 |   }
162 | 
163 |   /// Takes an object property value from the object by name.
164 |   /// Returns `None` when not an object or it doesn't exist.
165 |   pub fn take_object(&mut self, name: &str) -> Option<JsonObject<'a>> {
166 |     generate_take!(self, name, Object)
167 |   }
168 | 
169 |   /// Takes an array property value from the object by name.
170 |   /// Returns `None` when not an array or it doesn't exist.
171 |   pub fn take_array(&mut self, name: &str) -> Option<JsonArray<'a>> {
172 |     generate_take!(self, name, Array)
173 |   }
174 | }
175 | 
176 | /// A JSON array.
177 | #[derive(Clone, PartialEq, Debug)]
178 | pub struct JsonArray<'a>(Vec<JsonValue<'a>>);
179 | 
180 | impl<'a> IntoIterator for JsonArray<'a> {
181 |   type Item = JsonValue<'a>;
182 |   type IntoIter = std::vec::IntoIter<Self::Item>;
183 | 
184 |   fn into_iter(self) -> Self::IntoIter {
185 |     self.0.into_iter()
186 |   }
187 | }
188 | 
189 | impl<'a> From<Vec<JsonValue<'a>>> for JsonArray<'a> {
190 |   fn from(elements: Vec<JsonValue<'a>>) -> JsonArray<'a> {
191 |     JsonArray::new(elements)
192 |   }
193 | }
194 | 
195 | impl<'a> JsonArray<'a> {
196 |   /// Creates a new JsonArray.
197 |   pub fn new(inner: Vec<JsonValue<'a>>) -> JsonArray<'a> {
198 |     JsonArray(inner)
199 |   }
200 | 
201 |   /// Drops the object returning the inner vector.
202 |   pub fn take_inner(self) -> Vec<JsonValue<'a>> {
203 |     self.0
204 |   }
205 | 
206 |   /// Iterates over the array elements.
207 |   pub fn iter(&self) -> Iter<'_, JsonValue<'a>> {
208 |     self.0.iter()
209 |   }
210 | 
211 |   /// Gets a value from the array by index.
212 |   pub fn get(&self, index: usize) -> Option<&JsonValue<'a>> {
213 |     self.0.get(index)
214 |   }
215 | 
216 |   /// Gets the number of elements.
217 |   pub fn len(&self) -> usize {
218 |     self.0.len()
219 |   }
220 | 
221 |   /// Gets if the array is empty.
222 |   pub fn is_empty(&self) -> bool {
223 |     self.0.is_empty()
224 |   }
225 | }
226 | 
227 | #[cfg(test)]
228 | mod test {
229 |   use super::*;
230 | 
231 |   #[test]
232 |   fn it_should_take() {
233 |     let mut inner = Map::new();
234 |     inner.insert(String::from("prop"), JsonValue::String(Cow::Borrowed("asdf")));
235 |     inner.insert(String::from("other"), JsonValue::String(Cow::Borrowed("text")));
236 |     let mut obj = JsonObject::new(inner);
237 | 
238 |     assert_eq!(obj.len(), 2);
239 |     assert_eq!(obj.take_string("asdf"), None);
240 |     assert_eq!(obj.len(), 2);
241 |     assert_eq!(obj.take_number("prop"), None);
242 |     assert_eq!(obj.len(), 2);
243 |     assert_eq!(obj.take_string("prop"), Some(Cow::Borrowed("asdf")));
244 |     assert_eq!(obj.len(), 1);
245 |     assert_eq!(obj.take("something"), None);
246 |     assert_eq!(obj.len(), 1);
247 |     assert_eq!(obj.take("other"), Some(JsonValue::String(Cow::Borrowed("text"))));
248 |     assert_eq!(obj.len(), 0);
249 |   }
250 | 
251 |   #[test]
252 |   fn it_should_get() {
253 |     let mut inner = Map::new();
254 |     inner.insert(String::from("prop"), JsonValue::String(Cow::Borrowed("asdf")));
255 |     let obj = JsonObject::new(inner);
256 | 
257 |     assert_eq!(obj.len(), 1);
258 |     assert_eq!(obj.get_string("asdf"), None);
259 |     assert_eq!(obj.get_string("prop"), Some(&Cow::Borrowed("asdf")));
260 |     assert_eq!(obj.get("prop"), Some(&JsonValue::String(Cow::Borrowed("asdf"))));
261 |     assert_eq!(obj.get("asdf"), None);
262 |     assert_eq!(obj.len(), 1);
263 |   }
264 | }
265 | 


--------------------------------------------------------------------------------
/tests/test.rs:
--------------------------------------------------------------------------------
  1 | extern crate jsonc_parser;
  2 | 
  3 | use jsonc_parser::ast::*;
  4 | use jsonc_parser::common::*;
  5 | use jsonc_parser::*;
  6 | use pretty_assertions::assert_eq;
  7 | use std::fs::{self};
  8 | use std::path::Path;
  9 | use std::path::PathBuf;
 10 | use std::rc::Rc;
 11 | 
 12 | #[test]
 13 | fn test_specs() {
 14 |   for json_path in get_json_file_paths_in_dir(Path::new("./tests/specs")) {
 15 |     let text_file_path = json_path.with_extension("txt");
 16 |     let json_file_text = fs::read_to_string(&json_path).unwrap().replace("\r\n", "\n");
 17 |     let result = parse_to_ast(
 18 |       &json_file_text,
 19 |       &CollectOptions {
 20 |         comments: CommentCollectionStrategy::Separate,
 21 |         tokens: true,
 22 |       },
 23 |       &Default::default(),
 24 |     )
 25 |     .expect("Expected no error.");
 26 |     let result_text = parse_result_to_test_str(&result);
 27 |     let expected_text = fs::read_to_string(&text_file_path).unwrap().replace("\r\n", "\n");
 28 |     // fs::write(&text_file_path, result_text.clone()).unwrap();
 29 |     assert_eq!(result_text.trim(), expected_text.trim());
 30 |   }
 31 | }
 32 | 
 33 | #[cfg(feature = "cst")]
 34 | #[test]
 35 | fn test_cst() {
 36 |   for json_path in get_json_file_paths_in_dir(Path::new("./tests/specs")) {
 37 |     let json_file_text = fs::read_to_string(&json_path).unwrap().replace("\r\n", "\n");
 38 | 
 39 |     eprintln!("Parsing: {:?}", json_path);
 40 |     let value = jsonc_parser::cst::CstRootNode::parse(&json_file_text, &ParseOptions::default()).unwrap();
 41 |     let cst_string = value.to_string();
 42 |     assert_eq!(cst_string, json_file_text);
 43 |   }
 44 | }
 45 | 
 46 | fn get_json_file_paths_in_dir(path: &Path) -> Vec<PathBuf> {
 47 |   return read_dir_recursively(path);
 48 | 
 49 |   fn read_dir_recursively(dir_path: &Path) -> Vec<PathBuf> {
 50 |     let mut result = Vec::new();
 51 | 
 52 |     for entry in dir_path.read_dir().expect("read dir failed") {
 53 |       if let Ok(entry) = entry {
 54 |         let entry_path = entry.path();
 55 |         if entry_path.is_file() {
 56 |           if let Some(ext) = entry_path.extension() {
 57 |             if ext == "json" {
 58 |               result.push(entry_path);
 59 |             }
 60 |           }
 61 |         } else {
 62 |           result.extend(read_dir_recursively(&entry_path));
 63 |         }
 64 |       }
 65 |     }
 66 | 
 67 |     result
 68 |   }
 69 | }
 70 | 
 71 | // todo: move elsewhere and improve
 72 | 
 73 | fn parse_result_to_test_str(parse_result: &ParseResult) -> String {
 74 |   let mut text = String::new();
 75 |   text.push_str("{\n");
 76 |   text.push_str(&format!(
 77 |     "  \"value\": {},\n",
 78 |     match &parse_result.value {
 79 |       Some(value) => value_to_test_str(value).replace("\n", "\n  "),
 80 |       None => String::from("null"),
 81 |     }
 82 |   ));
 83 |   text.push_str("  \"comments\": [");
 84 |   let comments = parse_result.comments.as_ref().expect("Expected comments.");
 85 |   let collection_count = comments.len();
 86 |   let mut comments = comments.iter().collect::<Vec<_>>();
 87 |   comments.sort_by(|a, b| a.0.cmp(b.0));
 88 |   for (i, comment_collection) in comments.into_iter().enumerate() {
 89 |     text.push_str("\n    ");
 90 |     text.push_str(&comments_to_test_str(comment_collection).replace("\n", "\n    "));
 91 |     if i + 1 < collection_count {
 92 |       text.push(',');
 93 |     }
 94 |   }
 95 |   text.push_str("\n  ]\n");
 96 |   text.push_str("}\n");
 97 |   text
 98 | }
 99 | 
100 | fn value_to_test_str(value: &Value) -> String {
101 |   match value {
102 |     Value::StringLit(lit) => string_lit_to_test_str(lit),
103 |     Value::NumberLit(lit) => number_lit_to_test_str(lit),
104 |     Value::BooleanLit(lit) => boolean_lit_to_test_str(lit),
105 |     Value::Object(obj) => object_to_test_str(obj),
106 |     Value::Array(arr) => array_to_test_str(arr),
107 |     Value::NullKeyword(keyword) => null_keyword_to_test_str(keyword),
108 |   }
109 | }
110 | 
111 | fn range_to_test_str(range: Range) -> String {
112 |   let mut text = String::new();
113 |   text.push_str("\"range\": {\n");
114 |   text.push_str(&format!("  \"start\": {},\n", range.start));
115 |   text.push_str(&format!("  \"end\": {},\n", range.end));
116 |   text.push('}');
117 |   text
118 | }
119 | 
120 | fn string_lit_to_test_str(lit: &StringLit) -> String {
121 |   lit_to_test_str("string", &lit.value, lit.range)
122 | }
123 | 
124 | fn word_lit_to_test_str(lit: &WordLit) -> String {
125 |   lit_to_test_str("word", lit.value, lit.range)
126 | }
127 | 
128 | fn number_lit_to_test_str(lit: &NumberLit) -> String {
129 |   lit_to_test_str("number", lit.value, lit.range)
130 | }
131 | 
132 | fn boolean_lit_to_test_str(lit: &BooleanLit) -> String {
133 |   lit_to_test_str("boolean", &lit.value.to_string(), lit.range)
134 | }
135 | 
136 | fn lit_to_test_str(lit_type: &str, value: &str, range: Range) -> String {
137 |   let mut text = String::new();
138 |   text.push_str("{\n");
139 |   text.push_str(&format!("  \"type\": \"{}\",\n", lit_type));
140 |   text.push_str(&format!("  {},\n", range_to_test_str(range).replace("\n", "\n  ")));
141 |   text.push_str(&format!("  \"value\": \"{}\"\n", escape_json_str(value)));
142 |   text.push('}');
143 |   text
144 | }
145 | 
146 | fn object_to_test_str(obj: &Object) -> String {
147 |   let mut text = String::new();
148 |   text.push_str("{\n");
149 |   text.push_str("  \"type\": \"object\",\n");
150 |   text.push_str(&format!("  {},\n", range_to_test_str(obj.range).replace("\n", "\n  ")));
151 |   text.push_str("  \"properties\": [");
152 |   let prop_count = obj.properties.len();
153 |   for (i, prop) in obj.properties.iter().enumerate() {
154 |     text.push_str("\n    ");
155 |     text.push_str(&object_prop_to_test_str(prop).replace("\n", "\n    "));
156 |     if i + 1 < prop_count {
157 |       text.push(',');
158 |     }
159 |   }
160 |   text.push_str("\n  ]\n");
161 |   text.push('}');
162 |   text
163 | }
164 | 
165 | fn object_prop_to_test_str(obj_prop: &ObjectProp) -> String {
166 |   let mut text = String::new();
167 |   text.push_str("{\n");
168 |   text.push_str("  \"type\": \"objectProp\",\n");
169 |   text.push_str(&format!(
170 |     "  {},\n",
171 |     range_to_test_str(obj_prop.range).replace("\n", "\n  ")
172 |   ));
173 |   text.push_str(&format!(
174 |     "  \"name\": {},\n",
175 |     object_prop_name_to_test_str(&obj_prop.name).replace("\n", "\n  ")
176 |   ));
177 |   text.push_str(&format!(
178 |     "  \"value\": {}\n",
179 |     value_to_test_str(&obj_prop.value).replace("\n", "\n  ")
180 |   ));
181 |   text.push('}');
182 |   text
183 | }
184 | 
185 | fn object_prop_name_to_test_str(obj_prop_name: &ObjectPropName) -> String {
186 |   match obj_prop_name {
187 |     ObjectPropName::String(lit) => string_lit_to_test_str(lit),
188 |     ObjectPropName::Word(word) => word_lit_to_test_str(word),
189 |   }
190 | }
191 | 
192 | fn array_to_test_str(arr: &Array) -> String {
193 |   let mut text = String::new();
194 |   text.push_str("{\n");
195 |   text.push_str("  \"type\": \"array\",\n");
196 |   text.push_str(&format!("  {},\n", range_to_test_str(arr.range).replace("\n", "\n  ")));
197 |   text.push_str("  \"elements\": [");
198 |   let elements_count = arr.elements.len();
199 |   for (i, element) in arr.elements.iter().enumerate() {
200 |     text.push_str("\n    ");
201 |     text.push_str(&value_to_test_str(element).replace("\n", "\n    "));
202 |     if i + 1 < elements_count {
203 |       text.push(',');
204 |     }
205 |   }
206 |   text.push_str("\n  ]\n");
207 |   text.push('}');
208 |   text
209 | }
210 | 
211 | fn null_keyword_to_test_str(null_keyword: &NullKeyword) -> String {
212 |   let mut text = String::new();
213 |   text.push_str("{\n");
214 |   text.push_str("  \"type\": \"null\",\n");
215 |   text.push_str(&format!(
216 |     "  {}\n",
217 |     range_to_test_str(null_keyword.range).replace("\n", "\n  ")
218 |   ));
219 |   text.push('}');
220 |   text
221 | }
222 | 
223 | fn comments_to_test_str(comments: (&usize, &Rc<Vec<Comment>>)) -> String {
224 |   let mut text = String::new();
225 |   text.push_str("{\n");
226 |   text.push_str(&format!("  \"pos\": {},\n", comments.0));
227 |   text.push_str("  \"comments\": [");
228 |   let comments_count = comments.1.len();
229 |   for (i, comment) in comments.1.iter().enumerate() {
230 |     text.push_str("\n    ");
231 |     text.push_str(&comment_to_test_str(comment).replace("\n", "\n    "));
232 |     if i + 1 < comments_count {
233 |       text.push(',');
234 |     }
235 |   }
236 |   text.push_str("\n  ]\n");
237 |   text.push('}');
238 |   text
239 | }
240 | 
241 | fn comment_to_test_str(comment: &Comment) -> String {
242 |   match comment {
243 |     Comment::Line(line) => comment_line_to_test_str(line),
244 |     Comment::Block(block) => comment_block_to_test_str(block),
245 |   }
246 | }
247 | 
248 | fn comment_line_to_test_str(line: &CommentLine) -> String {
249 |   lit_to_test_str("line", line.text, line.range)
250 | }
251 | 
252 | fn comment_block_to_test_str(block: &CommentBlock) -> String {
253 |   lit_to_test_str("block", block.text, block.range)
254 | }
255 | 
256 | fn escape_json_str(text: &str) -> String {
257 |   text
258 |     .replace("\\", "\\\\")
259 |     .replace("\x08", "\\b")
260 |     .replace("\x0C", "\\f")
261 |     .replace("\r", "\\r")
262 |     .replace("\t", "\\t")
263 |     .replace("\n", "\\n")
264 | }
265 | 


--------------------------------------------------------------------------------
/benches/data/package.txt:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "code-oss-dev",
  3 |   "version": "1.56.0",
  4 |   "distro": "bf595b1f6780f5ba3c8cc511b0820871466079d3",
  5 |   "author": {
  6 |     "name": "Microsoft Corporation"
  7 |   },
  8 |   "license": "MIT",
  9 |   "main": "./out/main",
 10 |   "private": true,
 11 |   "scripts": {
 12 |     "test": "mocha",
 13 |     "test-browser": "node test/unit/browser/index.js",
 14 |     "preinstall": "node build/npm/preinstall.js",
 15 |     "postinstall": "node build/npm/postinstall.js",
 16 |     "compile": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile",
 17 |     "watch": "npm-run-all -lp watch-client watch-extensions watch-extension-media",
 18 |     "watchd": "deemon yarn watch",
 19 |     "watch-webd": "deemon yarn watch-web",
 20 |     "kill-watchd": "deemon --kill yarn watch",
 21 |     "kill-watch-webd": "deemon --kill yarn watch-web",
 22 |     "restart-watchd": "deemon --restart yarn watch",
 23 |     "restart-watch-webd": "deemon --restart yarn watch-web",
 24 |     "watch-client": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-client",
 25 |     "watch-clientd": "deemon yarn watch-client",
 26 |     "kill-watch-clientd": "deemon --kill yarn watch-client",
 27 |     "watch-extensions": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-extensions",
 28 |     "watch-extension-media": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-extension-media",
 29 |     "watch-extensionsd": "deemon yarn watch-extensions",
 30 |     "kill-watch-extensionsd": "deemon --kill yarn watch-extensions",
 31 |     "mocha": "mocha test/unit/node/all.js --delay",
 32 |     "precommit": "node build/hygiene.js",
 33 |     "gulp": "node --max_old_space_size=8192 ./node_modules/gulp/bin/gulp.js",
 34 |     "electron": "node build/lib/electron",
 35 |     "7z": "7z",
 36 |     "update-grammars": "node build/npm/update-all-grammars.js",
 37 |     "update-localization-extension": "node build/npm/update-localization-extension.js",
 38 |     "smoketest": "cd test/smoke && yarn compile && node test/index.js",
 39 |     "smoketest-no-compile": "cd test/smoke && node test/index.js",
 40 |     "download-builtin-extensions": "node build/lib/builtInExtensions.js",
 41 |     "download-builtin-extensions-cg": "node build/lib/builtInExtensionsCG.js",
 42 |     "monaco-compile-check": "tsc -p src/tsconfig.monaco.json --noEmit",
 43 |     "tsec-compile-check": "node node_modules/tsec/bin/tsec -p src/tsconfig.tsec.json",
 44 |     "valid-layers-check": "node build/lib/layersChecker.js",
 45 |     "strict-function-types-watch": "tsc --watch -p src/tsconfig.json --noEmit --strictFunctionTypes",
 46 |     "update-distro": "node build/npm/update-distro.js",
 47 |     "web": "node resources/web/code-web.js",
 48 |     "compile-web": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile-web",
 49 |     "watch-web": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-web",
 50 |     "eslint": "node build/eslint",
 51 |     "electron-rebuild": "electron-rebuild --arch=arm64 --force --version=11.4.1",
 52 |     "playwright-install": "node build/azure-pipelines/common/installPlaywright.js",
 53 |     "compile-build": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile-build",
 54 |     "compile-extensions-build": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile-extensions-build",
 55 |     "minify-vscode": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js minify-vscode",
 56 |     "minify-vscode-reh": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js minify-vscode-reh",
 57 |     "minify-vscode-reh-web": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js minify-vscode-reh-web",
 58 |     "hygiene": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js hygiene",
 59 |     "core-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js core-ci",
 60 |     "extensions-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js extensions-ci"
 61 |   },
 62 |   "dependencies": {
 63 |     "applicationinsights": "1.0.8",
 64 |     "chokidar": "3.5.1",
 65 |     "graceful-fs": "4.2.3",
 66 |     "http-proxy-agent": "^2.1.0",
 67 |     "https-proxy-agent": "^2.2.3",
 68 |     "iconv-lite-umd": "0.6.8",
 69 |     "jschardet": "2.3.0",
 70 |     "keytar": "7.2.0",
 71 |     "minimist": "^1.2.5",
 72 |     "native-is-elevated": "0.4.3",
 73 |     "native-keymap": "2.2.1",
 74 |     "native-watchdog": "1.3.0",
 75 |     "node-pty": "0.10.0-beta19",
 76 |     "nsfw": "2.1.2",
 77 |     "spdlog": "^0.11.1",
 78 |     "sudo-prompt": "9.2.1",
 79 |     "tas-client-umd": "0.1.4",
 80 |     "v8-inspect-profiler": "^0.0.20",
 81 |     "vscode-oniguruma": "1.3.1",
 82 |     "vscode-proxy-agent": "^0.8.2",
 83 |     "vscode-regexpp": "^3.1.0",
 84 |     "vscode-ripgrep": "^1.11.1",
 85 |     "vscode-sqlite3": "4.0.10",
 86 |     "vscode-textmate": "5.2.0",
 87 |     "xterm": "4.12.0-beta.15",
 88 |     "xterm-addon-search": "0.9.0-beta.1",
 89 |     "xterm-addon-unicode11": "0.3.0-beta.4",
 90 |     "xterm-addon-webgl": "0.11.0-beta.4",
 91 |     "yauzl": "^2.9.2",
 92 |     "yazl": "^2.4.3"
 93 |   },
 94 |   "devDependencies": {
 95 |     "7zip": "0.0.6",
 96 |     "@types/applicationinsights": "0.20.0",
 97 |     "@types/chokidar": "2.1.3",
 98 |     "@types/cookie": "^0.3.3",
 99 |     "@types/copy-webpack-plugin": "^6.0.3",
100 |     "@types/cssnano": "^4.0.0",
101 |     "@types/debug": "4.1.5",
102 |     "@types/graceful-fs": "4.1.2",
103 |     "@types/gulp-postcss": "^8.0.0",
104 |     "@types/http-proxy-agent": "^2.0.1",
105 |     "@types/keytar": "^4.4.0",
106 |     "@types/minimist": "^1.2.1",
107 |     "@types/mocha": "^8.2.0",
108 |     "@types/node": "^12.19.9",
109 |     "@types/sinon": "^1.16.36",
110 |     "@types/trusted-types": "^1.0.6",
111 |     "@types/vscode-windows-registry": "^1.0.0",
112 |     "@types/webpack": "^4.41.25",
113 |     "@types/windows-foreground-love": "^0.3.0",
114 |     "@types/windows-mutex": "^0.4.0",
115 |     "@types/windows-process-tree": "^0.2.0",
116 |     "@types/winreg": "^1.2.30",
117 |     "@types/yauzl": "^2.9.1",
118 |     "@types/yazl": "^2.4.2",
119 |     "@typescript-eslint/eslint-plugin": "3.2.0",
120 |     "@typescript-eslint/parser": "^3.3.0",
121 |     "ansi-colors": "^3.2.3",
122 |     "asar": "^3.0.3",
123 |     "chromium-pickle-js": "^0.2.0",
124 |     "copy-webpack-plugin": "^6.0.3",
125 |     "cson-parser": "^1.3.3",
126 |     "css-loader": "^3.2.0",
127 |     "cssnano": "^4.1.10",
128 |     "debounce": "^1.0.0",
129 |     "deemon": "^1.4.0",
130 |     "electron": "11.4.1",
131 |     "electron-rebuild": "2.0.3",
132 |     "eslint": "6.8.0",
133 |     "eslint-plugin-jsdoc": "^19.1.0",
134 |     "event-stream": "3.3.4",
135 |     "fancy-log": "^1.3.3",
136 |     "fast-plist": "0.1.2",
137 |     "file-loader": "^4.2.0",
138 |     "glob": "^5.0.13",
139 |     "gulp": "^4.0.0",
140 |     "gulp-atom-electron": "^1.30.1",
141 |     "gulp-azure-storage": "^0.11.1",
142 |     "gulp-bom": "^3.0.0",
143 |     "gulp-buffer": "0.0.2",
144 |     "gulp-concat": "^2.6.1",
145 |     "gulp-eslint": "^5.0.0",
146 |     "gulp-filter": "^5.1.0",
147 |     "gulp-flatmap": "^1.0.2",
148 |     "gulp-gunzip": "^1.0.0",
149 |     "gulp-gzip": "^1.4.2",
150 |     "gulp-json-editor": "^2.5.0",
151 |     "gulp-plumber": "^1.2.0",
152 |     "gulp-postcss": "^9.0.0",
153 |     "gulp-remote-retry-src": "^0.6.0",
154 |     "gulp-rename": "^1.2.0",
155 |     "gulp-replace": "^0.5.4",
156 |     "gulp-shell": "^0.6.5",
157 |     "gulp-sourcemaps": "^3.0.0",
158 |     "gulp-tsb": "4.0.6",
159 |     "gulp-untar": "^0.0.7",
160 |     "gulp-vinyl-zip": "^2.1.2",
161 |     "husky": "^0.13.1",
162 |     "innosetup": "6.0.5",
163 |     "is": "^3.1.0",
164 |     "istanbul-lib-coverage": "^3.0.0",
165 |     "istanbul-lib-instrument": "^4.0.0",
166 |     "istanbul-lib-report": "^3.0.0",
167 |     "istanbul-lib-source-maps": "^4.0.0",
168 |     "istanbul-reports": "^3.0.0",
169 |     "jsdom-no-contextify": "^3.1.0",
170 |     "lazy.js": "^0.4.2",
171 |     "merge-options": "^1.0.1",
172 |     "mime": "^1.4.1",
173 |     "minimatch": "^3.0.4",
174 |     "minimist": "^1.2.5",
175 |     "mkdirp": "^1.0.4",
176 |     "mocha": "^8.2.1",
177 |     "mocha-junit-reporter": "^2.0.0",
178 |     "mocha-multi-reporters": "^1.5.1",
179 |     "npm-run-all": "^4.1.5",
180 |     "opn": "^6.0.0",
181 |     "optimist": "0.3.5",
182 |     "p-all": "^1.0.0",
183 |     "playwright": "1.8.0",
184 |     "pump": "^1.0.1",
185 |     "queue": "3.0.6",
186 |     "rcedit": "^1.1.0",
187 |     "request": "^2.85.0",
188 |     "rimraf": "^2.2.8",
189 |     "sinon": "^1.17.2",
190 |     "source-map": "0.6.1",
191 |     "source-map-support": "^0.3.2",
192 |     "style-loader": "^1.0.0",
193 |     "ts-loader": "^6.2.1",
194 |     "tsec": "0.1.4",
195 |     "typescript": "^4.3.0-dev.20210330",
196 |     "typescript-formatter": "7.1.0",
197 |     "underscore": "^1.8.2",
198 |     "vinyl": "^2.0.0",
199 |     "vinyl-fs": "^3.0.0",
200 |     "vscode-debugprotocol": "1.46.0",
201 |     "vscode-nls-dev": "^3.3.1",
202 |     "vscode-telemetry-extractor": "^1.7.0",
203 |     "webpack": "^4.43.0",
204 |     "webpack-cli": "^3.3.12",
205 |     "webpack-stream": "^5.2.1",
206 |     "xml2js": "^0.4.17",
207 |     "yaserver": "^0.2.0"
208 |   },
209 |   "repository": {
210 |     "type": "git",
211 |     "url": "https://github.com/microsoft/vscode.git"
212 |   },
213 |   "bugs": {
214 |     "url": "https://github.com/microsoft/vscode/issues"
215 |   },
216 |   "optionalDependencies": {
217 |     "vscode-windows-ca-certs": "^0.3.0",
218 |     "vscode-windows-registry": "1.0.3",
219 |     "windows-foreground-love": "0.2.0",
220 |     "windows-mutex": "0.3.0",
221 |     "windows-process-tree": "0.2.4"
222 |   },
223 |   "resolutions": {
224 |     "elliptic": "^6.5.3",
225 |     "nwmatcher": "^1.4.4"
226 |   }
227 | }


--------------------------------------------------------------------------------
/src/string.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Cow;
  2 | 
  3 | pub struct ParseStringError {
  4 |   pub byte_index: usize,
  5 |   pub kind: ParseStringErrorKind,
  6 | }
  7 | 
  8 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
  9 | pub enum ParseStringErrorKind {
 10 |   InvalidEscapeInSingleQuoteString,
 11 |   InvalidEscapeInDoubleQuoteString,
 12 |   ExpectedFourHexDigits,
 13 |   InvalidUnicodeEscapeSequence(String),
 14 |   InvalidEscape,
 15 |   UnterminatedStringLiteral,
 16 | }
 17 | 
 18 | impl std::error::Error for ParseStringErrorKind {}
 19 | 
 20 | impl std::fmt::Display for ParseStringErrorKind {
 21 |   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 22 |     match self {
 23 |       ParseStringErrorKind::InvalidEscapeInSingleQuoteString => {
 24 |         write!(f, "Invalid escape in single quote string")
 25 |       }
 26 |       ParseStringErrorKind::InvalidEscapeInDoubleQuoteString => {
 27 |         write!(f, "Invalid escape in double quote string")
 28 |       }
 29 |       ParseStringErrorKind::ExpectedFourHexDigits => {
 30 |         write!(f, "Expected four hex digits")
 31 |       }
 32 |       ParseStringErrorKind::InvalidUnicodeEscapeSequence(value) => {
 33 |         write!(
 34 |           f,
 35 |           "Invalid unicode escape sequence. '{}' is not a valid UTF8 character",
 36 |           value
 37 |         )
 38 |       }
 39 |       ParseStringErrorKind::InvalidEscape => {
 40 |         write!(f, "Invalid escape")
 41 |       }
 42 |       ParseStringErrorKind::UnterminatedStringLiteral => {
 43 |         write!(f, "Unterminated string literal")
 44 |       }
 45 |     }
 46 |   }
 47 | }
 48 | 
 49 | pub trait CharProvider<'a> {
 50 |   fn current_char(&mut self) -> Option<char>;
 51 |   fn byte_index(&self) -> usize;
 52 |   fn move_next_char(&mut self) -> Option<char>;
 53 |   fn text(&self) -> &'a str;
 54 | }
 55 | 
 56 | #[cfg(feature = "cst")]
 57 | pub fn parse_string(text: &str) -> Result<Cow<'_, str>, ParseStringError> {
 58 |   struct StringCharProvider<'a> {
 59 |     text: &'a str,
 60 |     byte_index: usize,
 61 |     current_char: Option<char>,
 62 |     chars: std::str::Chars<'a>,
 63 |   }
 64 | 
 65 |   impl<'a> CharProvider<'a> for StringCharProvider<'a> {
 66 |     fn current_char(&mut self) -> Option<char> {
 67 |       self.current_char
 68 |     }
 69 | 
 70 |     fn byte_index(&self) -> usize {
 71 |       self.byte_index
 72 |     }
 73 | 
 74 |     fn move_next_char(&mut self) -> Option<char> {
 75 |       if let Some(current_char) = self.current_char {
 76 |         self.byte_index += current_char.len_utf8();
 77 |       }
 78 |       self.current_char = self.chars.next();
 79 |       self.current_char
 80 |     }
 81 | 
 82 |     fn text(&self) -> &'a str {
 83 |       self.text
 84 |     }
 85 |   }
 86 | 
 87 |   let mut chars = text.chars();
 88 |   let mut provider = StringCharProvider {
 89 |     text,
 90 |     byte_index: 0,
 91 |     current_char: chars.next(),
 92 |     chars,
 93 |   };
 94 | 
 95 |   parse_string_with_char_provider(&mut provider)
 96 | }
 97 | 
 98 | pub fn parse_string_with_char_provider<'a, T: CharProvider<'a>>(
 99 |   chars: &mut T,
100 | ) -> Result<Cow<'a, str>, ParseStringError> {
101 |   debug_assert!(
102 |     chars.current_char() == Some('\'') || chars.current_char() == Some('"'),
103 |     "Expected \", was {:?}",
104 |     chars.current_char()
105 |   );
106 |   let is_double_quote = chars.current_char() == Some('"');
107 |   let mut last_start_byte_index = chars.byte_index() + 1;
108 |   let mut text: Option<String> = None;
109 |   let mut last_was_backslash = false;
110 |   let mut found_end_string = false;
111 |   let token_start = chars.byte_index();
112 | 
113 |   while let Some(current_char) = chars.move_next_char() {
114 |     if last_was_backslash {
115 |       let escape_start = chars.byte_index() - 1; // -1 for backslash
116 |       match current_char {
117 |         '"' | '\'' | '\\' | '/' | 'b' | 'f' | 'u' | 'r' | 'n' | 't' => {
118 |           if current_char == '"' {
119 |             if !is_double_quote {
120 |               return Err(ParseStringError {
121 |                 byte_index: escape_start,
122 |                 kind: ParseStringErrorKind::InvalidEscapeInSingleQuoteString,
123 |               });
124 |             }
125 |           } else if current_char == '\'' && is_double_quote {
126 |             return Err(ParseStringError {
127 |               byte_index: escape_start,
128 |               kind: ParseStringErrorKind::InvalidEscapeInDoubleQuoteString,
129 |             });
130 |           }
131 | 
132 |           let previous_text = &chars.text()[last_start_byte_index..escape_start];
133 |           if text.is_none() {
134 |             text = Some(String::new());
135 |           }
136 |           let text = text.as_mut().unwrap();
137 |           text.push_str(previous_text);
138 |           if current_char == 'u' {
139 |             let hex_char = parse_hex_char(chars).map_err(|kind| ParseStringError {
140 |               byte_index: escape_start,
141 |               kind,
142 |             })?;
143 |             text.push(hex_char);
144 |             last_start_byte_index = chars.byte_index() + chars.current_char().map(|c| c.len_utf8()).unwrap_or(0);
145 |           } else {
146 |             text.push(match current_char {
147 |               'b' => '\u{08}',
148 |               'f' => '\u{0C}',
149 |               't' => '\t',
150 |               'r' => '\r',
151 |               'n' => '\n',
152 |               _ => current_char,
153 |             });
154 |             last_start_byte_index = chars.byte_index() + current_char.len_utf8();
155 |           }
156 |         }
157 |         _ => {
158 |           return Err(ParseStringError {
159 |             byte_index: escape_start,
160 |             kind: ParseStringErrorKind::InvalidEscape,
161 |           });
162 |         }
163 |       }
164 |       last_was_backslash = false;
165 |     } else if is_double_quote && current_char == '"' || !is_double_quote && current_char == '\'' {
166 |       found_end_string = true;
167 |       break;
168 |     } else {
169 |       last_was_backslash = current_char == '\\';
170 |     }
171 |   }
172 | 
173 |   if found_end_string {
174 |     chars.move_next_char();
175 |     let final_segment = &chars.text()[last_start_byte_index..chars.byte_index() - 1];
176 |     Ok(match text {
177 |       Some(mut text) => {
178 |         text.push_str(final_segment);
179 |         Cow::Owned(text)
180 |       }
181 |       None => Cow::Borrowed(final_segment),
182 |     })
183 |   } else {
184 |     Err(ParseStringError {
185 |       byte_index: token_start,
186 |       kind: ParseStringErrorKind::UnterminatedStringLiteral,
187 |     })
188 |   }
189 | }
190 | 
191 | fn parse_hex_char<'a, T: CharProvider<'a>>(chars: &mut T) -> Result<char, ParseStringErrorKind> {
192 |   let mut hex_text = String::new();
193 |   // expect four hex values
194 |   for _ in 0..4 {
195 |     let current_char = chars.move_next_char();
196 |     if !is_hex(current_char) {
197 |       return Err(ParseStringErrorKind::ExpectedFourHexDigits);
198 |     }
199 |     if let Some(current_char) = current_char {
200 |       hex_text.push(current_char);
201 |     }
202 |   }
203 | 
204 |   let hex_value = match u32::from_str_radix(&hex_text, 16) {
205 |     Ok(v) => v,
206 |     Err(_) => {
207 |       return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(hex_text));
208 |     }
209 |   };
210 | 
211 |   // Check if this is a high surrogate (0xD800-0xDBFF)
212 |   let hex_char = if (0xD800..=0xDBFF).contains(&hex_value) {
213 |     // High surrogate - must be followed by low surrogate
214 |     // Peek ahead for \uXXXX pattern
215 |     let next_char = chars.move_next_char();
216 |     if next_char != Some('\\') {
217 |       return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!(
218 |         "{} (unpaired high surrogate)",
219 |         hex_text
220 |       )));
221 |     }
222 | 
223 |     let next_char = chars.move_next_char();
224 |     if next_char != Some('u') {
225 |       return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!(
226 |         "{} (unpaired high surrogate)",
227 |         hex_text
228 |       )));
229 |     }
230 | 
231 |     // Parse the second \uXXXX
232 |     let mut hex_text2 = String::new();
233 |     for _ in 0..4 {
234 |       let current_char = chars.move_next_char();
235 |       if !is_hex(current_char) {
236 |         return Err(ParseStringErrorKind::ExpectedFourHexDigits);
237 |       }
238 |       if let Some(current_char) = current_char {
239 |         hex_text2.push(current_char);
240 |       }
241 |     }
242 | 
243 |     let hex_value2 = match u32::from_str_radix(&hex_text2, 16) {
244 |       Ok(v) => v,
245 |       Err(_) => {
246 |         return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(hex_text2));
247 |       }
248 |     };
249 | 
250 |     // Verify it's a low surrogate (0xDC00-0xDFFF)
251 |     if !(0xDC00..=0xDFFF).contains(&hex_value2) {
252 |       return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!(
253 |         "{} (high surrogate not followed by low surrogate)",
254 |         hex_text
255 |       )));
256 |     }
257 | 
258 |     // Combine surrogate pair using RFC 8259 formula
259 |     let code_point = ((hex_value - 0xD800) * 0x400) + (hex_value2 - 0xDC00) + 0x10000;
260 | 
261 |     match std::char::from_u32(code_point) {
262 |       Some(c) => c,
263 |       None => {
264 |         return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!(
265 |           "{}\\u{} (invalid surrogate pair)",
266 |           hex_text, hex_text2
267 |         )));
268 |       }
269 |     }
270 |   } else if (0xDC00..=0xDFFF).contains(&hex_value) {
271 |     // Low surrogate without high surrogate
272 |     return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!(
273 |       "{} (unpaired low surrogate)",
274 |       hex_text
275 |     )));
276 |   } else {
277 |     // Normal unicode escape
278 |     match std::char::from_u32(hex_value) {
279 |       Some(hex_char) => hex_char,
280 |       None => {
281 |         return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(hex_text));
282 |       }
283 |     }
284 |   };
285 |   Ok(hex_char)
286 | }
287 | 
288 | fn is_hex(c: Option<char>) -> bool {
289 |   let Some(c) = c else {
290 |     return false;
291 |   };
292 |   is_digit(c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
293 | }
294 | 
295 | fn is_digit(c: char) -> bool {
296 |   c.is_ascii_digit()
297 | }
298 | 


--------------------------------------------------------------------------------
/src/ast.rs:
--------------------------------------------------------------------------------
  1 | use super::common::Range;
  2 | use super::common::Ranged;
  3 | use std::borrow::Cow;
  4 | 
  5 | /// JSON value.
  6 | #[derive(Debug, PartialEq, Clone)]
  7 | pub enum Value<'a> {
  8 |   StringLit(StringLit<'a>),
  9 |   NumberLit(NumberLit<'a>),
 10 |   BooleanLit(BooleanLit),
 11 |   Object(Object<'a>),
 12 |   Array(Array<'a>),
 13 |   NullKeyword(NullKeyword),
 14 | }
 15 | 
 16 | impl<'a> Value<'a> {
 17 |   pub fn as_string_lit(&self) -> Option<&StringLit<'a>> {
 18 |     match self {
 19 |       Value::StringLit(node) => Some(node),
 20 |       _ => None,
 21 |     }
 22 |   }
 23 | 
 24 |   pub fn as_number_lit(&self) -> Option<&NumberLit<'a>> {
 25 |     match self {
 26 |       Value::NumberLit(node) => Some(node),
 27 |       _ => None,
 28 |     }
 29 |   }
 30 | 
 31 |   pub fn as_boolean_lit(&self) -> Option<&BooleanLit> {
 32 |     match self {
 33 |       Value::BooleanLit(node) => Some(node),
 34 |       _ => None,
 35 |     }
 36 |   }
 37 | 
 38 |   pub fn as_object(&self) -> Option<&Object<'a>> {
 39 |     match self {
 40 |       Value::Object(node) => Some(node),
 41 |       _ => None,
 42 |     }
 43 |   }
 44 | 
 45 |   pub fn as_array(&self) -> Option<&Array<'a>> {
 46 |     match self {
 47 |       Value::Array(node) => Some(node),
 48 |       _ => None,
 49 |     }
 50 |   }
 51 | 
 52 |   pub fn as_null_keyword(&self) -> Option<&NullKeyword> {
 53 |     match self {
 54 |       Value::NullKeyword(node) => Some(node),
 55 |       _ => None,
 56 |     }
 57 |   }
 58 | }
 59 | 
 60 | #[cfg(feature = "serde")]
 61 | impl<'a> From<Value<'a>> for serde_json::Value {
 62 |   fn from(value: Value<'a>) -> Self {
 63 |     use std::str::FromStr;
 64 |     match value {
 65 |       Value::Array(arr) => {
 66 |         let vec = arr.elements.into_iter().map(|v| v.into()).collect();
 67 |         serde_json::Value::Array(vec)
 68 |       }
 69 |       Value::BooleanLit(b) => serde_json::Value::Bool(b.value),
 70 |       Value::NullKeyword(_) => serde_json::Value::Null,
 71 |       Value::NumberLit(num) => {
 72 |         // check if this is a hexadecimal literal (0x or 0X prefix)
 73 |         let num_str = num.value.trim_start_matches(['-', '+']);
 74 |         if num_str.len() > 2 && (num_str.starts_with("0x") || num_str.starts_with("0X")) {
 75 |           // Parse hexadecimal and convert to decimal
 76 |           let hex_part = &num_str[2..];
 77 |           match i64::from_str_radix(hex_part, 16) {
 78 |             Ok(decimal_value) => {
 79 |               let final_value = if num.value.starts_with('-') {
 80 |                 -decimal_value
 81 |               } else {
 82 |                 decimal_value
 83 |               };
 84 |               serde_json::Value::Number(serde_json::Number::from(final_value))
 85 |             }
 86 |             Err(_) => serde_json::Value::String(num.value.to_string()),
 87 |           }
 88 |         } else {
 89 |           // standard decimal number
 90 |           let num_for_parsing = num.value.trim_start_matches('+');
 91 |           match serde_json::Number::from_str(num_for_parsing) {
 92 |             Ok(number) => serde_json::Value::Number(number),
 93 |             Err(_) => serde_json::Value::String(num.value.to_string()),
 94 |           }
 95 |         }
 96 |       }
 97 |       Value::Object(obj) => {
 98 |         let mut map = serde_json::map::Map::new();
 99 |         for prop in obj.properties {
100 |           map.insert(prop.name.into_string(), prop.value.into());
101 |         }
102 |         serde_json::Value::Object(map)
103 |       }
104 |       Value::StringLit(s) => serde_json::Value::String(s.value.into_owned()),
105 |     }
106 |   }
107 | }
108 | 
109 | /// Node that can appear in the AST.
110 | #[derive(Debug, PartialEq, Clone, Copy)]
111 | pub enum Node<'a, 'b> {
112 |   StringLit(&'b StringLit<'a>),
113 |   NumberLit(&'b NumberLit<'a>),
114 |   BooleanLit(&'b BooleanLit),
115 |   Object(&'b Object<'a>),
116 |   ObjectProp(&'b ObjectProp<'a>),
117 |   Array(&'b Array<'a>),
118 |   NullKeyword(&'b NullKeyword),
119 |   WordLit(&'b WordLit<'a>),
120 | }
121 | 
122 | impl<'a, 'b> Node<'a, 'b> {
123 |   /// Gets the node kind.
124 |   pub fn kind(&self) -> NodeKind {
125 |     match self {
126 |       Node::StringLit(_) => NodeKind::StringLit,
127 |       Node::NumberLit(_) => NodeKind::NumberLit,
128 |       Node::BooleanLit(_) => NodeKind::BooleanLit,
129 |       Node::Object(_) => NodeKind::Object,
130 |       Node::ObjectProp(_) => NodeKind::ObjectProp,
131 |       Node::Array(_) => NodeKind::Array,
132 |       Node::NullKeyword(_) => NodeKind::NullKeyword,
133 |       Node::WordLit(_) => NodeKind::WordLit,
134 |     }
135 |   }
136 | 
137 |   pub fn as_string_lit(&self) -> Option<&'b StringLit<'a>> {
138 |     match self {
139 |       Node::StringLit(node) => Some(node),
140 |       _ => None,
141 |     }
142 |   }
143 | 
144 |   pub fn as_number_lit(&self) -> Option<&'b NumberLit<'a>> {
145 |     match self {
146 |       Node::NumberLit(node) => Some(node),
147 |       _ => None,
148 |     }
149 |   }
150 | 
151 |   pub fn as_boolean_lit(&self) -> Option<&'b BooleanLit> {
152 |     match self {
153 |       Node::BooleanLit(node) => Some(node),
154 |       _ => None,
155 |     }
156 |   }
157 | 
158 |   pub fn as_object(&self) -> Option<&'b Object<'a>> {
159 |     match self {
160 |       Node::Object(node) => Some(node),
161 |       _ => None,
162 |     }
163 |   }
164 | 
165 |   pub fn as_object_prop(&self) -> Option<&'b ObjectProp<'a>> {
166 |     match self {
167 |       Node::ObjectProp(node) => Some(node),
168 |       _ => None,
169 |     }
170 |   }
171 | 
172 |   pub fn as_array(&self) -> Option<&'b Array<'a>> {
173 |     match self {
174 |       Node::Array(node) => Some(node),
175 |       _ => None,
176 |     }
177 |   }
178 | 
179 |   pub fn as_null_keyword(&self) -> Option<&'b NullKeyword> {
180 |     match self {
181 |       Node::NullKeyword(node) => Some(node),
182 |       _ => None,
183 |     }
184 |   }
185 | 
186 |   pub fn as_word_lit(&self) -> Option<&'b WordLit<'a>> {
187 |     match self {
188 |       Node::WordLit(node) => Some(node),
189 |       _ => None,
190 |     }
191 |   }
192 | }
193 | 
194 | /// Kind of AST node.
195 | #[derive(Debug, PartialEq, Clone, Copy)]
196 | pub enum NodeKind {
197 |   StringLit,
198 |   NumberLit,
199 |   BooleanLit,
200 |   Object,
201 |   ObjectProp,
202 |   Array,
203 |   NullKeyword,
204 |   WordLit,
205 | }
206 | 
207 | /// Node surrounded in double quotes (ex. `"my string"`).
208 | #[derive(Debug, PartialEq, Clone)]
209 | pub struct StringLit<'a> {
210 |   pub range: Range,
211 |   pub value: Cow<'a, str>,
212 | }
213 | 
214 | /// A string that's not in quotes.
215 | /// Usually the appearance of this would be a parsing error.
216 | #[derive(Debug, PartialEq, Clone)]
217 | pub struct WordLit<'a> {
218 |   pub range: Range,
219 |   pub value: &'a str,
220 | }
221 | 
222 | /// Represents a number (ex. `123`, `99.99`, `-1.2e+2`).
223 | #[derive(Debug, PartialEq, Clone)]
224 | pub struct NumberLit<'a> {
225 |   pub range: Range,
226 |   pub value: &'a str,
227 | }
228 | 
229 | /// Represents a boolean (ex. `true` or `false`).
230 | #[derive(Debug, PartialEq, Clone)]
231 | pub struct BooleanLit {
232 |   pub range: Range,
233 |   pub value: bool,
234 | }
235 | 
236 | /// Represents the null keyword (ex. `null`).
237 | #[derive(Debug, PartialEq, Clone)]
238 | pub struct NullKeyword {
239 |   pub range: Range,
240 | }
241 | 
242 | /// Represents an object that may contain properties (ex. `{}`, `{ "prop": 4 }`).
243 | #[derive(Debug, PartialEq, Clone)]
244 | pub struct Object<'a> {
245 |   pub range: Range,
246 |   pub properties: Vec<ObjectProp<'a>>,
247 | }
248 | 
249 | macro_rules! generate_take {
250 |   ($self:ident, $name:ident, $value_type:ident) => {
251 |     // there must be some better code that could be written here...
252 |     if let Some(pos) = $self.properties.iter().position(|p| p.name.as_str() == $name) {
253 |       if let Value::$value_type(_) = &$self.properties[pos].value {
254 |         if let Value::$value_type(node) = $self.properties.remove(pos).value {
255 |           Some(node)
256 |         } else {
257 |           None
258 |         }
259 |       } else {
260 |         None
261 |       }
262 |     } else {
263 |       None
264 |     }
265 |   };
266 | }
267 | 
268 | macro_rules! generate_get {
269 |   ($self:ident, $name:ident, $value_type:ident) => {
270 |     $self
271 |       .properties
272 |       .iter()
273 |       .filter(|p| p.name.as_str() == $name)
274 |       .map(|p| {
275 |         if let Value::$value_type(node) = &p.value {
276 |           Some(node)
277 |         } else {
278 |           None
279 |         }
280 |       })
281 |       .next()
282 |       .flatten()
283 |   };
284 | }
285 | 
286 | impl<'a> Object<'a> {
287 |   /// Gets a property value in the object by its name.
288 |   pub fn get(&self, name: &str) -> Option<&ObjectProp<'a>> {
289 |     self.properties.iter().find(|p| p.name.as_str() == name)
290 |   }
291 | 
292 |   /// Gets a string property value from the object by name.
293 |   /// Returns `None` when not a string or it doesn't exist.
294 |   pub fn get_string(&self, name: &str) -> Option<&StringLit<'a>> {
295 |     generate_get!(self, name, StringLit)
296 |   }
297 | 
298 |   /// Gets a number property value from the object by name.
299 |   /// Returns `None` when not a number or it doesn't exist.
300 |   pub fn get_number(&self, name: &str) -> Option<&NumberLit<'a>> {
301 |     generate_get!(self, name, NumberLit)
302 |   }
303 | 
304 |   /// Gets a boolean property value from the object by name.
305 |   /// Returns `None` when not a boolean or it doesn't exist.
306 |   pub fn get_boolean(&self, name: &str) -> Option<&BooleanLit> {
307 |     generate_get!(self, name, BooleanLit)
308 |   }
309 | 
310 |   /// Gets an object property value from the object by name.
311 |   /// Returns `None` when not an object or it doesn't exist.
312 |   pub fn get_object(&self, name: &str) -> Option<&Object<'a>> {
313 |     generate_get!(self, name, Object)
314 |   }
315 | 
316 |   /// Gets an array property value from the object by name.
317 |   /// Returns `None` when not an array or it doesn't exist.
318 |   pub fn get_array(&self, name: &str) -> Option<&Array<'a>> {
319 |     generate_get!(self, name, Array)
320 |   }
321 | 
322 |   /// Takes a value from the object by name.
323 |   /// Returns `None` when it doesn't exist.
324 |   pub fn take(&mut self, name: &str) -> Option<ObjectProp<'a>> {
325 |     if let Some(pos) = self.properties.iter().position(|p| p.name.as_str() == name) {
326 |       Some(self.properties.remove(pos))
327 |     } else {
328 |       None
329 |     }
330 |   }
331 | 
332 |   /// Takes a string property value from the object by name.
333 |   /// Returns `None` when not a string or it doesn't exist.
334 |   pub fn take_string(&mut self, name: &str) -> Option<StringLit<'a>> {
335 |     generate_take!(self, name, StringLit)
336 |   }
337 | 
338 |   /// Takes a number property value from the object by name.
339 |   /// Returns `None` when not a number or it doesn't exist.
340 |   pub fn take_number(&mut self, name: &str) -> Option<NumberLit<'a>> {
341 |     generate_take!(self, name, NumberLit)
342 |   }
343 | 
344 |   /// Takes a boolean property value from the object by name.
345 |   /// Returns `None` when not a boolean or it doesn't exist.
346 |   pub fn take_boolean(&mut self, name: &str) -> Option<BooleanLit> {
347 |     generate_take!(self, name, BooleanLit)
348 |   }
349 | 
350 |   /// Takes an object property value from the object by name.
351 |   /// Returns `None` when not an object or it doesn't exist.
352 |   pub fn take_object(&mut self, name: &str) -> Option<Object<'a>> {
353 |     generate_take!(self, name, Object)
354 |   }
355 | 
356 |   /// Takes an array property value from the object by name.
357 |   /// Returns `None` when not an array or it doesn't exist.
358 |   pub fn take_array(&mut self, name: &str) -> Option<Array<'a>> {
359 |     generate_take!(self, name, Array)
360 |   }
361 | }
362 | 
363 | /// Represents an object property (ex. `"prop": []`).
364 | #[derive(Debug, PartialEq, Clone)]
365 | pub struct ObjectProp<'a> {
366 |   pub range: Range,
367 |   pub name: ObjectPropName<'a>,
368 |   pub value: Value<'a>,
369 | }
370 | 
371 | /// Represents an object property name that may or may not be in quotes.
372 | #[derive(Debug, PartialEq, Clone)]
373 | pub enum ObjectPropName<'a> {
374 |   String(StringLit<'a>),
375 |   Word(WordLit<'a>),
376 | }
377 | 
378 | impl<'a> ObjectPropName<'a> {
379 |   /// Converts the object property name into a string.
380 |   pub fn into_string(self) -> String {
381 |     match self {
382 |       ObjectPropName::String(lit) => lit.value.into_owned(),
383 |       ObjectPropName::Word(lit) => lit.value.to_string(),
384 |     }
385 |   }
386 | 
387 |   /// Gets the object property name as a string reference.
388 |   pub fn as_str(&'a self) -> &'a str {
389 |     match self {
390 |       ObjectPropName::String(lit) => lit.value.as_ref(),
391 |       ObjectPropName::Word(lit) => lit.value,
392 |     }
393 |   }
394 | }
395 | 
396 | /// Represents an array that may contain elements (ex. `[]`, `[5, 6]`).
397 | #[derive(Debug, PartialEq, Clone)]
398 | pub struct Array<'a> {
399 |   pub range: Range,
400 |   pub elements: Vec<Value<'a>>,
401 | }
402 | 
403 | /// Kind of JSONC comment.
404 | #[derive(Debug, PartialEq, Clone)]
405 | pub enum CommentKind {
406 |   Line,
407 |   Block,
408 | }
409 | 
410 | /// JSONC comment.
411 | #[derive(Debug, PartialEq, Clone)]
412 | pub enum Comment<'a> {
413 |   Line(CommentLine<'a>),
414 |   Block(CommentBlock<'a>),
415 | }
416 | 
417 | impl<'a> Comment<'a> {
418 |   /// Gets the text of the comment.
419 |   pub fn text(&self) -> &'a str {
420 |     match self {
421 |       Comment::Line(line) => line.text,
422 |       Comment::Block(line) => line.text,
423 |     }
424 |   }
425 | 
426 |   /// Gets the comment kind.
427 |   pub fn kind(&self) -> CommentKind {
428 |     match self {
429 |       Comment::Line(_) => CommentKind::Line,
430 |       Comment::Block(_) => CommentKind::Block,
431 |     }
432 |   }
433 | }
434 | 
435 | impl<'a> Ranged for Comment<'a> {
436 |   fn range(&self) -> Range {
437 |     match self {
438 |       Comment::Line(line) => line.range(),
439 |       Comment::Block(line) => line.range(),
440 |     }
441 |   }
442 | }
443 | 
444 | /// Represents a comment line (ex. `// my comment`).
445 | #[derive(Debug, PartialEq, Clone)]
446 | pub struct CommentLine<'a> {
447 |   pub range: Range,
448 |   pub text: &'a str,
449 | }
450 | 
451 | /// Represents a comment block (ex. `/* my comment */`).
452 | #[derive(Debug, PartialEq, Clone)]
453 | pub struct CommentBlock<'a> {
454 |   pub range: Range,
455 |   pub text: &'a str,
456 | }
457 | 
458 | // Object Property Name
459 | 
460 | impl<'a, 'b> From<&'b ObjectPropName<'a>> for Node<'a, 'b> {
461 |   fn from(object_prop_name: &'b ObjectPropName<'a>) -> Node<'a, 'b> {
462 |     match object_prop_name {
463 |       ObjectPropName::String(lit) => lit.into(),
464 |       ObjectPropName::Word(lit) => lit.into(),
465 |     }
466 |   }
467 | }
468 | 
469 | impl<'a> Ranged for ObjectPropName<'a> {
470 |   fn range(&self) -> Range {
471 |     match self {
472 |       ObjectPropName::String(lit) => lit.range(),
473 |       ObjectPropName::Word(lit) => lit.range(),
474 |     }
475 |   }
476 | }
477 | 
478 | // Implement Traits
479 | 
480 | macro_rules! impl_ranged {
481 |   ($($node_name:ident),*) => {
482 |     $(
483 |       impl Ranged for $node_name {
484 |         fn range(&self) -> Range {
485 |             self.range
486 |         }
487 |       }
488 |     )*
489 |   };
490 | }
491 | 
492 | impl_ranged![BooleanLit, NullKeyword];
493 | 
494 | macro_rules! impl_ranged_lifetime {
495 |   ($($node_name:ident),*) => {
496 |     $(
497 |       impl<'a> Ranged for $node_name<'a> {
498 |         fn range(&self) -> Range {
499 |             self.range
500 |         }
501 |       }
502 |     )*
503 |   };
504 | }
505 | 
506 | impl_ranged_lifetime![
507 |   WordLit,
508 |   Object,
509 |   ObjectProp,
510 |   Array,
511 |   CommentLine,
512 |   CommentBlock,
513 |   NumberLit,
514 |   StringLit
515 | ];
516 | 
517 | impl<'a> Ranged for Value<'a> {
518 |   fn range(&self) -> Range {
519 |     match self {
520 |       Value::Array(node) => node.range(),
521 |       Value::BooleanLit(node) => node.range(),
522 |       Value::NullKeyword(node) => node.range(),
523 |       Value::NumberLit(node) => node.range(),
524 |       Value::Object(node) => node.range(),
525 |       Value::StringLit(node) => node.range(),
526 |     }
527 |   }
528 | }
529 | 
530 | impl<'a, 'b> Ranged for Node<'a, 'b> {
531 |   fn range(&self) -> Range {
532 |     match self {
533 |       Node::StringLit(node) => node.range(),
534 |       Node::NumberLit(node) => node.range(),
535 |       Node::BooleanLit(node) => node.range(),
536 |       Node::NullKeyword(node) => node.range(),
537 |       Node::WordLit(node) => node.range(),
538 |       Node::Array(node) => node.range(),
539 |       Node::Object(node) => node.range(),
540 |       Node::ObjectProp(node) => node.range(),
541 |     }
542 |   }
543 | }
544 | 
545 | macro_rules! generate_node {
546 |     ($($node_name:ident),*) => {
547 |         $(
548 |         impl<'a, 'b> From<&'b $node_name> for Node<'a, 'b> {
549 |             fn from(node: &'b $node_name) -> Node<'a, 'b> {
550 |                 Node::$node_name(node)
551 |             }
552 |         }
553 |         )*
554 |     };
555 | }
556 | 
557 | generate_node![BooleanLit, NullKeyword];
558 | 
559 | macro_rules! generate_node_lifetime {
560 |     ($($node_name:ident),*) => {
561 | 
562 |         $(
563 |         impl<'a, 'b> From<&'b $node_name<'a>> for Node<'a, 'b> {
564 |             fn from(node: &'b $node_name<'a>) -> Node<'a, 'b> {
565 |                 Node::$node_name(node)
566 |             }
567 |         }
568 |         )*
569 |     };
570 | }
571 | 
572 | generate_node_lifetime![WordLit, Object, ObjectProp, Array, NumberLit, StringLit];
573 | 
574 | impl<'a, 'b> From<&'b Value<'a>> for Node<'a, 'b> {
575 |   fn from(value: &'b Value<'a>) -> Node<'a, 'b> {
576 |     match value {
577 |       Value::Array(node) => Node::Array(node),
578 |       Value::BooleanLit(node) => Node::BooleanLit(node),
579 |       Value::NullKeyword(node) => Node::NullKeyword(node),
580 |       Value::NumberLit(node) => Node::NumberLit(node),
581 |       Value::Object(node) => Node::Object(node),
582 |       Value::StringLit(node) => Node::StringLit(node),
583 |     }
584 |   }
585 | }
586 | 
587 | #[cfg(test)]
588 | mod test {
589 |   use super::*;
590 |   use crate::ParseOptions;
591 |   use crate::parse_to_ast;
592 | 
593 |   #[test]
594 |   fn it_should_take() {
595 |     let ast = parse_to_ast(
596 |       "{'prop': 'asdf', 'other': 'text'}",
597 |       &Default::default(),
598 |       &ParseOptions::default(),
599 |     )
600 |     .unwrap();
601 |     let mut obj = match ast.value {
602 |       Some(Value::Object(obj)) => obj,
603 |       _ => unreachable!(),
604 |     };
605 | 
606 |     assert_eq!(obj.properties.len(), 2);
607 |     assert_eq!(obj.take_string("asdf"), None);
608 |     assert_eq!(obj.properties.len(), 2);
609 |     assert_eq!(obj.take_number("prop"), None);
610 |     assert_eq!(obj.properties.len(), 2);
611 |     assert!(obj.take_string("prop").is_some());
612 |     assert_eq!(obj.properties.len(), 1);
613 |     assert_eq!(obj.take("something"), None);
614 |     assert_eq!(obj.properties.len(), 1);
615 |     assert!(obj.take("other").is_some());
616 |     assert_eq!(obj.properties.len(), 0);
617 |   }
618 | 
619 |   #[test]
620 |   fn it_should_get() {
621 |     let ast = parse_to_ast("{'prop': 'asdf'}", &Default::default(), &ParseOptions::default()).unwrap();
622 |     let obj = match ast.value {
623 |       Some(Value::Object(obj)) => obj,
624 |       _ => unreachable!(),
625 |     };
626 | 
627 |     assert_eq!(obj.properties.len(), 1);
628 |     assert_eq!(obj.get_string("asdf"), None);
629 |     assert!(obj.get_string("prop").is_some());
630 |     assert_eq!(obj.get("asdf"), None);
631 |     assert_eq!(obj.properties.len(), 1);
632 |   }
633 | 
634 |   #[cfg(feature = "serde")]
635 |   #[test]
636 |   fn it_should_coerce_to_serde_value() {
637 |     let ast = parse_to_ast(
638 |       r#"{"prop":[true,1,null,"str"]}"#,
639 |       &Default::default(),
640 |       &ParseOptions::default(),
641 |     )
642 |     .unwrap();
643 |     let value = ast.value.unwrap();
644 |     let serde_value: serde_json::Value = value.into();
645 | 
646 |     assert_eq!(
647 |       serde_value,
648 |       serde_json::json!({
649 |         "prop": [
650 |           true,
651 |           1,
652 |           null,
653 |           "str"
654 |         ]
655 |       })
656 |     );
657 |   }
658 | 
659 |   #[cfg(feature = "serde")]
660 |   #[test]
661 |   fn handle_weird_data() {
662 |     let ast = parse_to_ast(
663 |       r#"{eyyyyyyy:6yy:6000e000615yyyk:6}"#,
664 |       &Default::default(),
665 |       &ParseOptions::default(),
666 |     )
667 |     .unwrap();
668 |     let value = ast.value.unwrap();
669 |     let serde_value: serde_json::Value = value.into();
670 | 
671 |     assert_eq!(
672 |       serde_value,
673 |       // this output is fine because the input is bad
674 |       serde_json::json!({
675 |         "eyyyyyyy": 6,
676 |         "yy": "6000e000615",
677 |         "yyyk": 6
678 |       })
679 |     );
680 |   }
681 | }
682 | 


--------------------------------------------------------------------------------
/src/scanner.rs:
--------------------------------------------------------------------------------
  1 | use crate::string::CharProvider;
  2 | 
  3 | use super::common::Range;
  4 | use super::errors::*;
  5 | use super::tokens::Token;
  6 | use std::str::Chars;
  7 | 
  8 | /// Converts text into a stream of tokens.
  9 | pub struct Scanner<'a> {
 10 |   byte_index: usize,
 11 |   token_start: usize,
 12 |   char_iter: Chars<'a>,
 13 |   // todo(dsherret): why isn't this a VecDeque?
 14 |   char_buffer: Vec<char>,
 15 |   current_token: Option<Token<'a>>,
 16 |   file_text: &'a str,
 17 |   allow_single_quoted_strings: bool,
 18 |   allow_hexadecimal_numbers: bool,
 19 |   allow_unary_plus_numbers: bool,
 20 | }
 21 | 
 22 | const CHAR_BUFFER_MAX_SIZE: usize = 6;
 23 | 
 24 | /// Options for the scanner.
 25 | #[derive(Debug)]
 26 | pub struct ScannerOptions {
 27 |   /// Allow single-quoted strings (defaults to `true`).
 28 |   pub allow_single_quoted_strings: bool,
 29 |   /// Allow hexadecimal numbers like 0xFF (defaults to `true`).
 30 |   pub allow_hexadecimal_numbers: bool,
 31 |   /// Allow unary plus sign on numbers like +42 (defaults to `true`).
 32 |   pub allow_unary_plus_numbers: bool,
 33 | }
 34 | 
 35 | impl Default for ScannerOptions {
 36 |   fn default() -> Self {
 37 |     Self {
 38 |       allow_single_quoted_strings: true,
 39 |       allow_hexadecimal_numbers: true,
 40 |       allow_unary_plus_numbers: true,
 41 |     }
 42 |   }
 43 | }
 44 | 
 45 | impl<'a> Scanner<'a> {
 46 |   /// Creates a new scanner with specific options.
 47 |   pub fn new(file_text: &'a str, options: &ScannerOptions) -> Scanner<'a> {
 48 |     let mut char_iter = file_text.chars();
 49 |     let mut char_buffer = Vec::with_capacity(CHAR_BUFFER_MAX_SIZE);
 50 |     let current_char = char_iter.next();
 51 |     if let Some(current_char) = current_char {
 52 |       char_buffer.push(current_char);
 53 |     }
 54 | 
 55 |     Scanner {
 56 |       byte_index: 0,
 57 |       token_start: 0,
 58 |       char_iter,
 59 |       char_buffer,
 60 |       current_token: None,
 61 |       file_text,
 62 |       allow_single_quoted_strings: options.allow_single_quoted_strings,
 63 |       allow_hexadecimal_numbers: options.allow_hexadecimal_numbers,
 64 |       allow_unary_plus_numbers: options.allow_unary_plus_numbers,
 65 |     }
 66 |   }
 67 | 
 68 |   pub fn file_text(&self) -> &str {
 69 |     self.file_text
 70 |   }
 71 | 
 72 |   /// Moves to and returns the next token.
 73 |   pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
 74 |     self.skip_whitespace();
 75 |     self.token_start = self.byte_index;
 76 |     if let Some(current_char) = self.current_char() {
 77 |       let token_result = match current_char {
 78 |         '{' => {
 79 |           self.move_next_char();
 80 |           Ok(Token::OpenBrace)
 81 |         }
 82 |         '}' => {
 83 |           self.move_next_char();
 84 |           Ok(Token::CloseBrace)
 85 |         }
 86 |         '[' => {
 87 |           self.move_next_char();
 88 |           Ok(Token::OpenBracket)
 89 |         }
 90 |         ']' => {
 91 |           self.move_next_char();
 92 |           Ok(Token::CloseBracket)
 93 |         }
 94 |         ',' => {
 95 |           self.move_next_char();
 96 |           Ok(Token::Comma)
 97 |         }
 98 |         ':' => {
 99 |           self.move_next_char();
100 |           Ok(Token::Colon)
101 |         }
102 |         '\'' => {
103 |           if self.allow_single_quoted_strings {
104 |             self.parse_string()
105 |           } else {
106 |             Err(self.create_error_for_current_token(ParseErrorKind::SingleQuotedStringsNotAllowed))
107 |           }
108 |         }
109 |         '"' => self.parse_string(),
110 |         '/' => match self.peek_char() {
111 |           Some('/') => Ok(self.parse_comment_line()),
112 |           Some('*') => self.parse_comment_block(),
113 |           _ => Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken)),
114 |         },
115 |         _ => {
116 |           if current_char == '-' || current_char == '+' || self.is_digit() {
117 |             self.parse_number()
118 |           } else if self.try_move_word("true") {
119 |             Ok(Token::Boolean(true))
120 |           } else if self.try_move_word("false") {
121 |             Ok(Token::Boolean(false))
122 |           } else if self.try_move_word("null") {
123 |             Ok(Token::Null)
124 |           } else {
125 |             self.parse_word()
126 |           }
127 |         }
128 |       };
129 |       match token_result {
130 |         Ok(token) => {
131 |           self.current_token = Some(token.clone());
132 |           Ok(Some(token))
133 |         }
134 |         Err(err) => Err(err),
135 |       }
136 |     } else {
137 |       self.current_token = None;
138 |       Ok(None)
139 |     }
140 |   }
141 | 
142 |   /// Gets the start position of the token.
143 |   pub fn token_start(&self) -> usize {
144 |     self.token_start
145 |   }
146 | 
147 |   /// Gets the end position of the token.
148 |   pub fn token_end(&self) -> usize {
149 |     self.byte_index
150 |   }
151 | 
152 |   /// Gets the current token.
153 |   pub fn token(&self) -> Option<Token<'a>> {
154 |     self.current_token.as_ref().map(|x| x.to_owned())
155 |   }
156 | 
157 |   pub(super) fn create_error_for_current_token(&self, kind: ParseErrorKind) -> ParseError {
158 |     self.create_error_for_start(self.token_start, kind)
159 |   }
160 | 
161 |   pub(super) fn create_error_for_current_char(&self, kind: ParseErrorKind) -> ParseError {
162 |     self.create_error_for_start(self.byte_index, kind)
163 |   }
164 | 
165 |   pub(super) fn create_error_for_start(&self, start: usize, kind: ParseErrorKind) -> ParseError {
166 |     let range = Range {
167 |       start,
168 |       end: if let Some(c) = self.file_text[self.byte_index..].chars().next() {
169 |         self.byte_index + c.len_utf8()
170 |       } else {
171 |         self.file_text.len()
172 |       },
173 |     };
174 |     self.create_error_for_range(range, kind)
175 |   }
176 | 
177 |   pub(super) fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
178 |     ParseError::new(range, kind, self.file_text)
179 |   }
180 | 
181 |   fn parse_string(&mut self) -> Result<Token<'a>, ParseError> {
182 |     crate::string::parse_string_with_char_provider(self)
183 |       .map(Token::String)
184 |       // todo(dsherret): don't convert the error kind to a string here
185 |       .map_err(|err| self.create_error_for_start(err.byte_index, ParseErrorKind::String(err.kind)))
186 |   }
187 | 
188 |   fn parse_number(&mut self) -> Result<Token<'a>, ParseError> {
189 |     let start_byte_index = self.byte_index;
190 | 
191 |     // handle unary plus and unary minus
192 |     if self.is_positive_sign() {
193 |       if !self.allow_unary_plus_numbers {
194 |         return Err(self.create_error_for_current_token(ParseErrorKind::UnaryPlusNumbersNotAllowed));
195 |       }
196 |       self.move_next_char();
197 |     } else if self.is_negative_sign() {
198 |       self.move_next_char();
199 |     }
200 | 
201 |     if self.is_zero() {
202 |       self.move_next_char();
203 | 
204 |       // check for hexadecimal literal (0x or 0X)
205 |       if matches!(self.current_char(), Some('x') | Some('X')) {
206 |         if !self.allow_hexadecimal_numbers {
207 |           return Err(self.create_error_for_current_token(ParseErrorKind::HexadecimalNumbersNotAllowed));
208 |         }
209 | 
210 |         self.move_next_char();
211 | 
212 |         // must have at least one hex digit
213 |         if !self.is_hex_digit() {
214 |           return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
215 |         }
216 | 
217 |         while self.is_hex_digit() {
218 |           self.move_next_char();
219 |         }
220 | 
221 |         let end_byte_index = self.byte_index;
222 |         return Ok(Token::Number(&self.file_text[start_byte_index..end_byte_index]));
223 |       }
224 |     } else if self.is_one_nine() {
225 |       self.move_next_char();
226 |       while self.is_digit() {
227 |         self.move_next_char();
228 |       }
229 |     } else {
230 |       return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigitFollowingNegativeSign));
231 |     }
232 | 
233 |     if self.is_decimal_point() {
234 |       self.move_next_char();
235 | 
236 |       if !self.is_digit() {
237 |         return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
238 |       }
239 | 
240 |       while self.is_digit() {
241 |         self.move_next_char();
242 |       }
243 |     }
244 | 
245 |     match self.current_char() {
246 |       Some('e') | Some('E') => {
247 |         match self.move_next_char() {
248 |           Some('-') | Some('+') => {
249 |             self.move_next_char();
250 |             if !self.is_digit() {
251 |               return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit));
252 |             }
253 |           }
254 |           _ => {
255 |             if !self.is_digit() {
256 |               return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedPlusMinusOrDigitInNumberLiteral));
257 |             }
258 |           }
259 |         }
260 | 
261 |         while self.is_digit() {
262 |           self.move_next_char();
263 |         }
264 |       }
265 |       _ => {}
266 |     }
267 | 
268 |     let end_byte_index = self.byte_index;
269 |     Ok(Token::Number(&self.file_text[start_byte_index..end_byte_index]))
270 |   }
271 | 
272 |   fn parse_comment_line(&mut self) -> Token<'a> {
273 |     self.assert_then_move_char('/');
274 |     #[cfg(debug_assertions)]
275 |     self.assert_char('/');
276 | 
277 |     let start_byte_index = self.byte_index + 1;
278 |     while self.move_next_char().is_some() {
279 |       if self.is_new_line() {
280 |         break;
281 |       }
282 |     }
283 | 
284 |     Token::CommentLine(&self.file_text[start_byte_index..self.byte_index])
285 |   }
286 | 
287 |   fn parse_comment_block(&mut self) -> Result<Token<'a>, ParseError> {
288 |     self.assert_then_move_char('/');
289 |     #[cfg(debug_assertions)]
290 |     self.assert_char('*');
291 |     let mut found_end = false;
292 | 
293 |     let start_byte_index = self.byte_index + 1;
294 |     while let Some(current_char) = self.move_next_char() {
295 |       if current_char == '*' && self.peek_char() == Some('/') {
296 |         found_end = true;
297 |         break;
298 |       }
299 |     }
300 | 
301 |     if found_end {
302 |       let end_byte_index = self.byte_index;
303 |       self.assert_then_move_char('*');
304 |       self.assert_then_move_char('/');
305 |       Ok(Token::CommentBlock(&self.file_text[start_byte_index..end_byte_index]))
306 |     } else {
307 |       Err(self.create_error_for_current_token(ParseErrorKind::UnterminatedCommentBlock))
308 |     }
309 |   }
310 | 
311 |   fn skip_whitespace(&mut self) {
312 |     while let Some(current_char) = self.current_char() {
313 |       if current_char.is_whitespace() {
314 |         self.move_next_char();
315 |       } else {
316 |         break;
317 |       }
318 |     }
319 |   }
320 | 
321 |   fn try_move_word(&mut self, text: &str) -> bool {
322 |     let mut char_index = 0;
323 |     for c in text.chars() {
324 |       if let Some(current_char) = self.peek_char_offset(char_index) {
325 |         if current_char != c {
326 |           return false;
327 |         }
328 | 
329 |         char_index += 1;
330 |       } else {
331 |         return false;
332 |       }
333 |     }
334 | 
335 |     if let Some(next_char) = self.peek_char_offset(char_index)
336 |       && next_char.is_alphanumeric()
337 |     {
338 |       return false;
339 |     }
340 | 
341 |     for _ in 0..char_index {
342 |       self.move_next_char();
343 |     }
344 | 
345 |     true
346 |   }
347 | 
348 |   fn parse_word(&mut self) -> Result<Token<'a>, ParseError> {
349 |     let start_byte_index = self.byte_index;
350 | 
351 |     while let Some(current_char) = self.current_char() {
352 |       // check for word terminators
353 |       if current_char.is_whitespace() || current_char == ':' {
354 |         break;
355 |       }
356 |       // validate that the character is allowed in a word literal
357 |       if !current_char.is_alphanumeric() && current_char != '-' && current_char != '_' {
358 |         return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken));
359 |       }
360 | 
361 |       self.move_next_char();
362 |     }
363 | 
364 |     let end_byte_index = self.byte_index;
365 | 
366 |     if end_byte_index - start_byte_index == 0 {
367 |       return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken));
368 |     }
369 | 
370 |     Ok(Token::Word(&self.file_text[start_byte_index..end_byte_index]))
371 |   }
372 | 
373 |   fn assert_then_move_char(&mut self, _character: char) {
374 |     #[cfg(debug_assertions)]
375 |     self.assert_char(_character);
376 | 
377 |     self.move_next_char();
378 |   }
379 | 
380 |   #[cfg(debug_assertions)]
381 |   fn assert_char(&mut self, character: char) {
382 |     let current_char = self.current_char();
383 |     debug_assert!(
384 |       current_char == Some(character),
385 |       "Expected {:?}, was {:?}",
386 |       character,
387 |       current_char
388 |     );
389 |   }
390 | 
391 |   fn move_next_char(&mut self) -> Option<char> {
392 |     if let Some(&current_char) = self.char_buffer.first() {
393 |       // shift the entire array to the left then pop the last item
394 |       for i in 1..self.char_buffer.len() {
395 |         self.char_buffer[i - 1] = self.char_buffer[i];
396 |       }
397 |       self.char_buffer.pop();
398 | 
399 |       if self.char_buffer.is_empty()
400 |         && let Some(new_char) = self.char_iter.next()
401 |       {
402 |         self.char_buffer.push(new_char);
403 |       }
404 | 
405 |       self.byte_index += current_char.len_utf8();
406 |     }
407 | 
408 |     self.current_char()
409 |   }
410 | 
411 |   fn peek_char(&mut self) -> Option<char> {
412 |     self.peek_char_offset(1)
413 |   }
414 | 
415 |   fn peek_char_offset(&mut self, offset: usize) -> Option<char> {
416 |     // fill the char buffer
417 |     for _ in self.char_buffer.len()..offset + 1 {
418 |       if let Some(next_char) = self.char_iter.next() {
419 |         self.char_buffer.push(next_char);
420 |       } else {
421 |         // end of string
422 |         return None;
423 |       }
424 |     }
425 | 
426 |     // should not exceed this
427 |     debug_assert!(self.char_buffer.len() <= CHAR_BUFFER_MAX_SIZE);
428 | 
429 |     self.char_buffer.get(offset).copied()
430 |   }
431 | 
432 |   fn current_char(&self) -> Option<char> {
433 |     self.char_buffer.first().copied()
434 |   }
435 | 
436 |   fn is_new_line(&mut self) -> bool {
437 |     match self.current_char() {
438 |       Some('\n') => true,
439 |       Some('\r') => self.peek_char() == Some('\n'),
440 |       _ => false,
441 |     }
442 |   }
443 | 
444 |   fn is_digit(&self) -> bool {
445 |     self.is_one_nine() || self.is_zero()
446 |   }
447 | 
448 |   fn is_hex_digit(&self) -> bool {
449 |     match self.current_char() {
450 |       Some(current_char) => current_char.is_ascii_hexdigit(),
451 |       _ => false,
452 |     }
453 |   }
454 | 
455 |   fn is_zero(&self) -> bool {
456 |     self.current_char() == Some('0')
457 |   }
458 | 
459 |   fn is_one_nine(&self) -> bool {
460 |     match self.current_char() {
461 |       Some(current_char) => ('1'..='9').contains(&current_char),
462 |       _ => false,
463 |     }
464 |   }
465 | 
466 |   fn is_negative_sign(&self) -> bool {
467 |     self.current_char() == Some('-')
468 |   }
469 | 
470 |   fn is_positive_sign(&self) -> bool {
471 |     self.current_char() == Some('+')
472 |   }
473 | 
474 |   fn is_decimal_point(&self) -> bool {
475 |     self.current_char() == Some('.')
476 |   }
477 | }
478 | 
479 | impl<'a> CharProvider<'a> for Scanner<'a> {
480 |   fn current_char(&mut self) -> Option<char> {
481 |     Scanner::current_char(self)
482 |   }
483 | 
484 |   fn move_next_char(&mut self) -> Option<char> {
485 |     Scanner::move_next_char(self)
486 |   }
487 | 
488 |   fn byte_index(&self) -> usize {
489 |     self.byte_index
490 |   }
491 | 
492 |   fn text(&self) -> &'a str {
493 |     self.file_text
494 |   }
495 | }
496 | 
497 | #[cfg(test)]
498 | mod tests {
499 |   use std::borrow::Cow;
500 | 
501 |   use super::super::tokens::Token;
502 |   use super::*;
503 |   use pretty_assertions::assert_eq;
504 | 
505 |   #[test]
506 |   fn it_tokenizes_string() {
507 |     assert_has_tokens(
508 |       r#""t\"est", "\t\r\n\n\u0020 test\n other","#,
509 |       vec![
510 |         Token::String(Cow::Borrowed(r#"t"est"#)),
511 |         Token::Comma,
512 |         Token::String(Cow::Borrowed("\t\r\n\n  test\n other")),
513 |         Token::Comma,
514 |       ],
515 |     );
516 |   }
517 | 
518 |   #[test]
519 |   fn it_errors_escaping_single_quote_in_double_quote() {
520 |     assert_has_error(
521 |       r#""t\'est""#,
522 |       "Invalid escape in double quote string on line 1 column 3",
523 |     );
524 |   }
525 | 
526 |   #[test]
527 |   fn it_tokenizes_single_quote_string() {
528 |     assert_has_tokens(
529 |       r#"'t\'est','a',"#,
530 |       vec![
531 |         Token::String(Cow::Borrowed(r#"t'est"#)),
532 |         Token::Comma,
533 |         Token::String(Cow::Borrowed("a")),
534 |         Token::Comma,
535 |       ],
536 |     );
537 |   }
538 | 
539 |   #[test]
540 |   fn it_errors_escaping_double_quote_in_single_quote() {
541 |     assert_has_error(
542 |       r#"'t\"est'"#,
543 |       "Invalid escape in single quote string on line 1 column 3",
544 |     );
545 |   }
546 | 
547 |   #[test]
548 |   fn it_errors_for_word_starting_with_invalid_token() {
549 |     assert_has_error(r#"{ &test }"#, "Unexpected token on line 1 column 3");
550 |   }
551 | 
552 |   #[test]
553 |   fn it_tokenizes_numbers() {
554 |     assert_has_tokens(
555 |       "0, 0.123, -198, 0e-345, 0.3e+025, 1e1,",
556 |       vec![
557 |         Token::Number("0"),
558 |         Token::Comma,
559 |         Token::Number("0.123"),
560 |         Token::Comma,
561 |         Token::Number("-198"),
562 |         Token::Comma,
563 |         Token::Number("0e-345"),
564 |         Token::Comma,
565 |         Token::Number("0.3e+025"),
566 |         Token::Comma,
567 |         Token::Number("1e1"),
568 |         Token::Comma,
569 |       ],
570 |     );
571 |   }
572 | 
573 |   #[test]
574 |   fn it_tokenizes_hexadecimal_numbers() {
575 |     assert_has_tokens(
576 |       "0x7DF, 0xFF, 0x123ABC, 0xabc, 0X1F",
577 |       vec![
578 |         Token::Number("0x7DF"),
579 |         Token::Comma,
580 |         Token::Number("0xFF"),
581 |         Token::Comma,
582 |         Token::Number("0x123ABC"),
583 |         Token::Comma,
584 |         Token::Number("0xabc"),
585 |         Token::Comma,
586 |         Token::Number("0X1F"),
587 |       ],
588 |     );
589 |   }
590 | 
591 |   #[test]
592 |   fn it_tokenizes_unary_plus_numbers() {
593 |     assert_has_tokens(
594 |       "+42, +0.5, +1e10, +0xFF",
595 |       vec![
596 |         Token::Number("+42"),
597 |         Token::Comma,
598 |         Token::Number("+0.5"),
599 |         Token::Comma,
600 |         Token::Number("+1e10"),
601 |         Token::Comma,
602 |         Token::Number("+0xFF"),
603 |       ],
604 |     );
605 |   }
606 | 
607 |   #[test]
608 |   fn it_errors_invalid_exponent() {
609 |     assert_has_error(
610 |       r#"1ea"#,
611 |       "Expected plus, minus, or digit in number literal on line 1 column 3",
612 |     );
613 |     assert_has_error(r#"1e-a"#, "Expected digit on line 1 column 4");
614 |   }
615 | 
616 |   #[test]
617 |   fn it_tokenizes_simple_tokens() {
618 |     assert_has_tokens(
619 |       "{}[],:true,false,null,",
620 |       vec![
621 |         Token::OpenBrace,
622 |         Token::CloseBrace,
623 |         Token::OpenBracket,
624 |         Token::CloseBracket,
625 |         Token::Comma,
626 |         Token::Colon,
627 |         Token::Boolean(true),
628 |         Token::Comma,
629 |         Token::Boolean(false),
630 |         Token::Comma,
631 |         Token::Null,
632 |         Token::Comma,
633 |       ],
634 |     );
635 |   }
636 | 
637 |   #[test]
638 |   fn it_tokenizes_comment_line() {
639 |     assert_has_tokens(
640 |       "//test\n//t\r\n// test\n,",
641 |       vec![
642 |         Token::CommentLine("test"),
643 |         Token::CommentLine("t"),
644 |         Token::CommentLine(" test"),
645 |         Token::Comma,
646 |       ],
647 |     );
648 |   }
649 | 
650 |   #[test]
651 |   fn it_tokenizes_comment_blocks() {
652 |     assert_has_tokens(
653 |       "/*test\n *//* test*/,",
654 |       vec![
655 |         Token::CommentBlock("test\n "),
656 |         Token::CommentBlock(" test"),
657 |         Token::Comma,
658 |       ],
659 |     );
660 |   }
661 | 
662 |   #[test]
663 |   fn it_errors_on_invalid_utf8_char_for_issue_6() {
664 |     assert_has_error(
665 |       "\"\\uDF06\"",
666 |       "Invalid unicode escape sequence. 'DF06 (unpaired low surrogate)' is not a valid UTF8 character on line 1 column 2",
667 |     );
668 |   }
669 | 
670 |   fn assert_has_tokens(text: &str, tokens: Vec<Token>) {
671 |     let mut scanner = Scanner::new(text, &Default::default());
672 |     let mut scanned_tokens = Vec::new();
673 | 
674 |     loop {
675 |       match scanner.scan() {
676 |         Ok(Some(token)) => scanned_tokens.push(token),
677 |         Ok(None) => break,
678 |         Err(err) => panic!("Error parsing: {:?}", err),
679 |       }
680 |     }
681 | 
682 |     assert_eq!(scanned_tokens, tokens);
683 |   }
684 | 
685 |   fn assert_has_error(text: &str, message: &str) {
686 |     let mut scanner = Scanner::new(text, &Default::default());
687 |     let mut error_message = String::new();
688 | 
689 |     loop {
690 |       match scanner.scan() {
691 |         Ok(Some(_)) => {}
692 |         Ok(None) => break,
693 |         Err(err) => {
694 |           error_message = err.to_string();
695 |           break;
696 |         }
697 |       }
698 |     }
699 | 
700 |     assert_eq!(error_message, message);
701 |   }
702 | }
703 | 


--------------------------------------------------------------------------------
/src/parse_to_ast.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Cow;
  2 | use std::collections::HashMap;
  3 | use std::rc::Rc;
  4 | 
  5 | use super::ast::*;
  6 | use super::common::Range;
  7 | use super::errors::*;
  8 | use super::scanner::Scanner;
  9 | use super::scanner::ScannerOptions;
 10 | use super::tokens::Token;
 11 | use super::tokens::TokenAndRange;
 12 | 
 13 | /// Map where the comments are stored in collections where
 14 | /// the key is the previous token end or start of file or
 15 | /// next token start or end of the file.
 16 | pub type CommentMap<'a> = HashMap<usize, Rc<Vec<Comment<'a>>>>;
 17 | 
 18 | /// Strategy for handling comments during parsing.
 19 | ///
 20 | /// This enum determines how comments in the JSON/JSONC input are collected
 21 | /// and represented in the resulting abstract syntax tree (AST).
 22 | #[derive(Default, Debug, PartialEq, Clone)]
 23 | pub enum CommentCollectionStrategy {
 24 |   /// Comments are not collected and are effectively ignored during parsing.
 25 |   #[default]
 26 |   Off,
 27 |   /// Comments are collected and stored separately from the main AST structure.
 28 |   ///
 29 |   /// When this strategy is used, comments are placed in a [`CommentMap`] where
 30 |   /// the key is the previous token end or start of file, or the next token start
 31 |   /// or end of file.
 32 |   Separate,
 33 |   /// Comments are collected and treated as tokens within the AST.
 34 |   ///
 35 |   /// When this strategy is used, comments appear alongside other tokens in the
 36 |   /// token stream when `tokens: true` is set in [`CollectOptions`].
 37 |   AsTokens,
 38 | }
 39 | 
 40 | /// Options for collecting comments and tokens.
 41 | #[derive(Default, Clone)]
 42 | pub struct CollectOptions {
 43 |   /// Include comments in the result.
 44 |   pub comments: CommentCollectionStrategy,
 45 |   /// Include tokens in the result.
 46 |   pub tokens: bool,
 47 | }
 48 | 
 49 | /// Options for parsing.
 50 | #[derive(Clone)]
 51 | pub struct ParseOptions {
 52 |   /// Allow comments (defaults to `true`).
 53 |   pub allow_comments: bool,
 54 |   /// Allow words and numbers as object property names (defaults to `true`).
 55 |   pub allow_loose_object_property_names: bool,
 56 |   /// Allow trailing commas on object literal and array literal values (defaults to `true`).
 57 |   pub allow_trailing_commas: bool,
 58 |   /// Allow single-quoted strings (defaults to `true`).
 59 |   pub allow_single_quoted_strings: bool,
 60 |   /// Allow hexadecimal numbers like 0xFF (defaults to `true`).
 61 |   pub allow_hexadecimal_numbers: bool,
 62 |   /// Allow unary plus sign on numbers like +42 (defaults to `true`).
 63 |   pub allow_unary_plus_numbers: bool,
 64 | }
 65 | 
 66 | impl Default for ParseOptions {
 67 |   fn default() -> Self {
 68 |     Self {
 69 |       allow_comments: true,
 70 |       allow_loose_object_property_names: true,
 71 |       allow_trailing_commas: true,
 72 |       allow_single_quoted_strings: true,
 73 |       allow_hexadecimal_numbers: true,
 74 |       allow_unary_plus_numbers: true,
 75 |     }
 76 |   }
 77 | }
 78 | 
 79 | /// Result of parsing the text.
 80 | pub struct ParseResult<'a> {
 81 |   /// Collection of comments in the text.
 82 |   ///
 83 |   /// Provide `comments: true` to the `ParseOptions` for this to have a value.
 84 |   ///
 85 |   /// Remarks: The key is the start and end position of the tokens.
 86 |   pub comments: Option<CommentMap<'a>>,
 87 |   /// The JSON value the text contained.
 88 |   pub value: Option<Value<'a>>,
 89 |   /// Collection of tokens (excluding any comments).
 90 |   ///
 91 |   /// Provide `tokens: true` to the `ParseOptions` for this to have a value.
 92 |   pub tokens: Option<Vec<TokenAndRange<'a>>>,
 93 | }
 94 | 
 95 | struct Context<'a> {
 96 |   scanner: Scanner<'a>,
 97 |   comments: Option<CommentMap<'a>>,
 98 |   current_comments: Option<Vec<Comment<'a>>>,
 99 |   last_token_end: usize,
100 |   range_stack: Vec<Range>,
101 |   tokens: Option<Vec<TokenAndRange<'a>>>,
102 |   collect_comments_as_tokens: bool,
103 |   allow_comments: bool,
104 |   allow_trailing_commas: bool,
105 |   allow_loose_object_property_names: bool,
106 | }
107 | 
108 | impl<'a> Context<'a> {
109 |   pub fn scan(&mut self) -> Result<Option<Token<'a>>, ParseError> {
110 |     let previous_end = self.last_token_end;
111 |     let token = self.scan_handling_comments()?;
112 |     self.last_token_end = self.scanner.token_end();
113 | 
114 |     // store the comment for the previous token end, and current token start
115 |     if let Some(comments) = self.comments.as_mut()
116 |       && let Some(current_comments) = self.current_comments.take()
117 |     {
118 |       let current_comments = Rc::new(current_comments);
119 |       comments.insert(previous_end, current_comments.clone());
120 |       comments.insert(self.scanner.token_start(), current_comments);
121 |     }
122 | 
123 |     if let Some(token) = &token
124 |       && self.tokens.is_some()
125 |     {
126 |       self.capture_token(token.clone());
127 |     }
128 | 
129 |     Ok(token)
130 |   }
131 | 
132 |   pub fn token(&self) -> Option<Token<'a>> {
133 |     self.scanner.token()
134 |   }
135 | 
136 |   pub fn start_range(&mut self) {
137 |     self.range_stack.push(Range {
138 |       start: self.scanner.token_start(),
139 |       end: 0,
140 |     });
141 |   }
142 | 
143 |   pub fn end_range(&mut self) -> Range {
144 |     let mut range = self
145 |       .range_stack
146 |       .pop()
147 |       .expect("Range was popped from the stack, but the stack was empty.");
148 |     range.end = self.scanner.token_end();
149 |     range
150 |   }
151 | 
152 |   pub fn create_range_from_last_token(&self) -> Range {
153 |     Range {
154 |       start: self.scanner.token_start(),
155 |       end: self.scanner.token_end(),
156 |     }
157 |   }
158 | 
159 |   pub fn create_error(&self, kind: ParseErrorKind) -> ParseError {
160 |     self.scanner.create_error_for_current_token(kind)
161 |   }
162 | 
163 |   pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError {
164 |     let range = self.end_range();
165 |     self.create_error_for_range(range, kind)
166 |   }
167 | 
168 |   pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError {
169 |     self.scanner.create_error_for_range(range, kind)
170 |   }
171 | 
172 |   fn scan_handling_comments(&mut self) -> Result<Option<Token<'a>>, ParseError> {
173 |     loop {
174 |       let token = self.scanner.scan()?;
175 |       match token {
176 |         Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => {
177 |           self.capture_token(token);
178 |         }
179 |         Some(Token::CommentLine(text)) => {
180 |           self.handle_comment(Comment::Line(CommentLine {
181 |             range: self.create_range_from_last_token(),
182 |             text,
183 |           }))?;
184 |         }
185 |         Some(Token::CommentBlock(text)) => {
186 |           self.handle_comment(Comment::Block(CommentBlock {
187 |             range: self.create_range_from_last_token(),
188 |             text,
189 |           }))?;
190 |         }
191 |         _ => return Ok(token),
192 |       }
193 |     }
194 |   }
195 | 
196 |   fn capture_token(&mut self, token: Token<'a>) {
197 |     let range = self.create_range_from_last_token();
198 |     if let Some(tokens) = self.tokens.as_mut() {
199 |       tokens.push(TokenAndRange {
200 |         token: token.clone(),
201 |         range,
202 |       });
203 |     }
204 |   }
205 | 
206 |   fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> {
207 |     if !self.allow_comments {
208 |       return Err(self.create_error(ParseErrorKind::CommentsNotAllowed));
209 |     }
210 | 
211 |     if self.comments.is_some() {
212 |       if let Some(comments) = self.current_comments.as_mut() {
213 |         comments.push(comment);
214 |       } else {
215 |         self.current_comments = Some(vec![comment]);
216 |       }
217 |     }
218 | 
219 |     Ok(())
220 |   }
221 | }
222 | 
223 | /// Parses a string containing JSONC to an AST with comments and tokens.
224 | ///
225 | /// # Example
226 | ///
227 | /// ```
228 | /// use jsonc_parser::CollectOptions;
229 | /// use jsonc_parser::CommentCollectionStrategy;
230 | /// use jsonc_parser::parse_to_ast;
231 | /// use jsonc_parser::ParseOptions;
232 | ///
233 | /// let parse_result = parse_to_ast(r#"{ "test": 5 } // test"#, &CollectOptions {
234 | ///     comments: CommentCollectionStrategy::Separate, // include comments in result
235 | ///     tokens: true, // include tokens in result
236 | /// }, &Default::default()).expect("Should parse.");
237 | /// // ...inspect parse_result for value, tokens, and comments here...
238 | /// ```
239 | pub fn parse_to_ast<'a>(
240 |   text: &'a str,
241 |   collect_options: &CollectOptions,
242 |   parse_options: &ParseOptions,
243 | ) -> Result<ParseResult<'a>, ParseError> {
244 |   let mut context = Context {
245 |     scanner: Scanner::new(
246 |       text,
247 |       &ScannerOptions {
248 |         allow_single_quoted_strings: parse_options.allow_single_quoted_strings,
249 |         allow_hexadecimal_numbers: parse_options.allow_hexadecimal_numbers,
250 |         allow_unary_plus_numbers: parse_options.allow_unary_plus_numbers,
251 |       },
252 |     ),
253 |     comments: match collect_options.comments {
254 |       CommentCollectionStrategy::Separate => Some(Default::default()),
255 |       CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None,
256 |     },
257 |     current_comments: None,
258 |     last_token_end: 0,
259 |     range_stack: Vec::new(),
260 |     tokens: if collect_options.tokens { Some(Vec::new()) } else { None },
261 |     collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens,
262 |     allow_comments: parse_options.allow_comments,
263 |     allow_trailing_commas: parse_options.allow_trailing_commas,
264 |     allow_loose_object_property_names: parse_options.allow_loose_object_property_names,
265 |   };
266 |   context.scan()?;
267 |   let value = parse_value(&mut context)?;
268 | 
269 |   if context.scan()?.is_some() {
270 |     return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues));
271 |   }
272 | 
273 |   debug_assert!(context.range_stack.is_empty());
274 | 
275 |   Ok(ParseResult {
276 |     comments: context.comments,
277 |     tokens: context.tokens,
278 |     value,
279 |   })
280 | }
281 | 
282 | fn parse_value<'a>(context: &mut Context<'a>) -> Result<Option<Value<'a>>, ParseError> {
283 |   match context.token() {
284 |     None => Ok(None),
285 |     Some(token) => match token {
286 |       Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))),
287 |       Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))),
288 |       Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))),
289 |       Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))),
290 |       Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))),
291 |       Token::Null => Ok(Some(Value::NullKeyword(create_null_keyword(context)))),
292 |       Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)),
293 |       Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)),
294 |       Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)),
295 |       Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)),
296 |       Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)),
297 |       Token::CommentLine(_) => unreachable!(),
298 |       Token::CommentBlock(_) => unreachable!(),
299 |     },
300 |   }
301 | }
302 | 
303 | fn parse_object<'a>(context: &mut Context<'a>) -> Result<Object<'a>, ParseError> {
304 |   debug_assert!(context.token() == Some(Token::OpenBrace));
305 |   let mut properties = Vec::new();
306 | 
307 |   context.start_range();
308 |   context.scan()?;
309 | 
310 |   loop {
311 |     match context.token() {
312 |       Some(Token::CloseBrace) => break,
313 |       Some(Token::String(prop_name)) => {
314 |         properties.push(parse_object_property(context, PropName::String(prop_name))?);
315 |       }
316 |       Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => {
317 |         properties.push(parse_object_property(context, PropName::Word(prop_name))?);
318 |       }
319 |       None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)),
320 |       _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)),
321 |     }
322 | 
323 |     // skip the comma
324 |     if let Some(Token::Comma) = context.scan()? {
325 |       let comma_range = context.create_range_from_last_token();
326 |       if let Some(Token::CloseBrace) = context.scan()?
327 |         && !context.allow_trailing_commas
328 |       {
329 |         return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
330 |       }
331 |     }
332 |   }
333 | 
334 |   Ok(Object {
335 |     range: context.end_range(),
336 |     properties,
337 |   })
338 | }
339 | 
340 | enum PropName<'a> {
341 |   String(Cow<'a, str>),
342 |   Word(&'a str),
343 | }
344 | 
345 | fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result<ObjectProp<'a>, ParseError> {
346 |   context.start_range();
347 | 
348 |   let name = match prop_name {
349 |     PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)),
350 |     PropName::Word(prop_name) => {
351 |       if context.allow_loose_object_property_names {
352 |         ObjectPropName::Word(create_word(context, prop_name))
353 |       } else {
354 |         return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty));
355 |       }
356 |     }
357 |   };
358 | 
359 |   match context.scan()? {
360 |     Some(Token::Colon) => {}
361 |     _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)),
362 |   }
363 | 
364 |   context.scan()?;
365 |   let value = parse_value(context)?;
366 | 
367 |   match value {
368 |     Some(value) => Ok(ObjectProp {
369 |       range: context.end_range(),
370 |       name,
371 |       value,
372 |     }),
373 |     None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)),
374 |   }
375 | }
376 | 
377 | fn parse_array<'a>(context: &mut Context<'a>) -> Result<Array<'a>, ParseError> {
378 |   debug_assert!(context.token() == Some(Token::OpenBracket));
379 |   let mut elements = Vec::new();
380 | 
381 |   context.start_range();
382 |   context.scan()?;
383 | 
384 |   loop {
385 |     match context.token() {
386 |       Some(Token::CloseBracket) => break,
387 |       None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
388 |       _ => match parse_value(context)? {
389 |         Some(value) => elements.push(value),
390 |         None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)),
391 |       },
392 |     }
393 | 
394 |     // skip the comma
395 |     if let Some(Token::Comma) = context.scan()? {
396 |       let comma_range = context.create_range_from_last_token();
397 |       if let Some(Token::CloseBracket) = context.scan()?
398 |         && !context.allow_trailing_commas
399 |       {
400 |         return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed));
401 |       }
402 |     }
403 |   }
404 | 
405 |   Ok(Array {
406 |     range: context.end_range(),
407 |     elements,
408 |   })
409 | }
410 | 
411 | // factory functions
412 | 
413 | fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> {
414 |   StringLit {
415 |     range: context.create_range_from_last_token(),
416 |     value,
417 |   }
418 | }
419 | 
420 | fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> {
421 |   WordLit {
422 |     range: context.create_range_from_last_token(),
423 |     value,
424 |   }
425 | }
426 | 
427 | fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit {
428 |   BooleanLit {
429 |     range: context.create_range_from_last_token(),
430 |     value,
431 |   }
432 | }
433 | 
434 | fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> {
435 |   NumberLit {
436 |     range: context.create_range_from_last_token(),
437 |     value,
438 |   }
439 | }
440 | 
441 | fn create_null_keyword(context: &Context) -> NullKeyword {
442 |   NullKeyword {
443 |     range: context.create_range_from_last_token(),
444 |   }
445 | }
446 | 
447 | #[cfg(test)]
448 | mod tests {
449 |   use super::*;
450 |   use pretty_assertions::assert_eq;
451 | 
452 |   #[test]
453 |   fn it_should_error_when_has_multiple_values() {
454 |     assert_has_error(
455 |       "[][]",
456 |       "Text cannot contain more than one JSON value on line 1 column 3",
457 |     );
458 |   }
459 | 
460 |   #[test]
461 |   fn it_should_error_when_object_is_not_terminated() {
462 |     assert_has_error("{", "Unterminated object on line 1 column 1");
463 |   }
464 | 
465 |   #[test]
466 |   fn it_should_error_when_object_has_unexpected_token() {
467 |     assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3");
468 |   }
469 | 
470 |   #[test]
471 |   fn it_should_error_when_object_has_two_non_string_tokens() {
472 |     assert_has_error(
473 |       "{ asdf asdf: 5 }",
474 |       "Expected colon after the string or word in object property on line 1 column 8",
475 |     );
476 |   }
477 | 
478 |   #[test]
479 |   fn it_should_error_when_array_is_not_terminated() {
480 |     assert_has_error("[", "Unterminated array on line 1 column 1");
481 |   }
482 | 
483 |   #[test]
484 |   fn it_should_error_when_array_has_unexpected_token() {
485 |     assert_has_error("[:]", "Unexpected colon on line 1 column 2");
486 |   }
487 | 
488 |   #[test]
489 |   fn it_should_error_when_comment_block_not_closed() {
490 |     assert_has_error("/* test", "Unterminated comment block on line 1 column 1");
491 |   }
492 | 
493 |   #[test]
494 |   fn it_should_error_when_string_lit_not_closed() {
495 |     assert_has_error("\" test", "Unterminated string literal on line 1 column 1");
496 |   }
497 | 
498 |   fn assert_has_error(text: &str, message: &str) {
499 |     let result = parse_to_ast(text, &Default::default(), &Default::default());
500 |     match result {
501 |       Ok(_) => panic!("Expected error, but did not find one."),
502 |       Err(err) => assert_eq!(err.to_string(), message),
503 |     }
504 |   }
505 | 
506 |   #[test]
507 |   fn strict_should_error_object_trailing_comma() {
508 |     assert_has_strict_error(
509 |       r#"{ "test": 5, }"#,
510 |       "Trailing commas are not allowed on line 1 column 12",
511 |     );
512 |   }
513 | 
514 |   #[test]
515 |   fn strict_should_error_array_trailing_comma() {
516 |     assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9");
517 |   }
518 | 
519 |   #[test]
520 |   fn strict_should_error_comment_line() {
521 |     assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12");
522 |   }
523 | 
524 |   #[test]
525 |   fn strict_should_error_comment_block() {
526 |     assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10");
527 |   }
528 | 
529 |   #[test]
530 |   fn strict_should_error_word_property() {
531 |     assert_has_strict_error(
532 |       r#"{ word: 5 }"#,
533 |       "Expected string for object property on line 1 column 3",
534 |     );
535 |   }
536 | 
537 |   #[test]
538 |   fn strict_should_error_single_quoted_string() {
539 |     assert_has_strict_error(
540 |       r#"{ "key": 'value' }"#,
541 |       "Single-quoted strings are not allowed on line 1 column 10",
542 |     );
543 |   }
544 | 
545 |   #[test]
546 |   fn strict_should_error_hexadecimal_number() {
547 |     assert_has_strict_error(
548 |       r#"{ "key": 0xFF }"#,
549 |       "Hexadecimal numbers are not allowed on line 1 column 10",
550 |     );
551 |   }
552 | 
553 |   #[test]
554 |   fn strict_should_error_unary_plus_number() {
555 |     assert_has_strict_error(
556 |       r#"{ "key": +42 }"#,
557 |       "Unary plus on numbers is not allowed on line 1 column 10",
558 |     );
559 |   }
560 | 
561 |   #[track_caller]
562 |   fn assert_has_strict_error(text: &str, message: &str) {
563 |     let result = parse_to_ast(
564 |       text,
565 |       &Default::default(),
566 |       &ParseOptions {
567 |         allow_comments: false,
568 |         allow_loose_object_property_names: false,
569 |         allow_trailing_commas: false,
570 |         allow_single_quoted_strings: false,
571 |         allow_hexadecimal_numbers: false,
572 |         allow_unary_plus_numbers: false,
573 |       },
574 |     );
575 |     match result {
576 |       Ok(_) => panic!("Expected error, but did not find one."),
577 |       Err(err) => assert_eq!(err.to_string(), message),
578 |     }
579 |   }
580 | 
581 |   #[test]
582 |   fn it_should_not_include_tokens_by_default() {
583 |     let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
584 |     assert!(result.tokens.is_none());
585 |   }
586 | 
587 |   #[test]
588 |   fn it_should_include_tokens_when_specified() {
589 |     let result = parse_to_ast(
590 |       "{}",
591 |       &CollectOptions {
592 |         tokens: true,
593 |         ..Default::default()
594 |       },
595 |       &Default::default(),
596 |     )
597 |     .unwrap();
598 |     let tokens = result.tokens.unwrap();
599 |     assert_eq!(tokens.len(), 2);
600 |   }
601 | 
602 |   #[test]
603 |   fn it_should_not_include_comments_by_default() {
604 |     let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap();
605 |     assert!(result.comments.is_none());
606 |   }
607 | 
608 |   #[test]
609 |   fn it_should_include_comments_when_specified() {
610 |     let result = parse_to_ast(
611 |       "{} // 2",
612 |       &CollectOptions {
613 |         comments: CommentCollectionStrategy::Separate,
614 |         ..Default::default()
615 |       },
616 |       &Default::default(),
617 |     )
618 |     .unwrap();
619 |     let comments = result.comments.unwrap();
620 |     assert_eq!(comments.len(), 2); // for both positions, but it's the same comment
621 |   }
622 | 
623 |   #[cfg(not(feature = "error_unicode_width"))]
624 |   #[test]
625 |   fn error_correct_line_column_unicode_width() {
626 |     assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 9");
627 |   }
628 | 
629 |   #[cfg(feature = "error_unicode_width")]
630 |   #[test]
631 |   fn error_correct_line_column_unicode_width() {
632 |     assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 10");
633 |   }
634 | 
635 |   #[test]
636 |   fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() {
637 |     let text = r#"{
638 |       CP_CanFuncReqId: 0x7DF,  // 2015
639 |   }"#;
640 |     {
641 |       let parse_result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap();
642 | 
643 |       let value = parse_result.value.unwrap();
644 |       let obj = value.as_object().unwrap();
645 |       assert_eq!(obj.properties.len(), 1);
646 |       assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId");
647 | 
648 |       let number_value = obj.properties[0].value.as_number_lit().unwrap();
649 |       assert_eq!(number_value.value, "0x7DF");
650 |     }
651 |     #[cfg(feature = "serde")]
652 |     {
653 |       let value = crate::parse_to_serde_value(text, &Default::default()).unwrap().unwrap();
654 |       // hexadecimal numbers are converted to decimal in serde output
655 |       assert_eq!(
656 |         value,
657 |         serde_json::json!({
658 |           "CP_CanFuncReqId": 2015
659 |         })
660 |       );
661 |     }
662 |   }
663 | 
664 |   #[test]
665 |   fn it_should_parse_unary_plus_numbers() {
666 |     let result = parse_to_ast(r#"{ "test": +42 }"#, &Default::default(), &Default::default()).unwrap();
667 | 
668 |     let value = result.value.unwrap();
669 |     let obj = value.as_object().unwrap();
670 |     assert_eq!(obj.properties.len(), 1);
671 |     assert_eq!(obj.properties[0].name.as_str(), "test");
672 | 
673 |     let number_value = obj.properties[0].value.as_number_lit().unwrap();
674 |     assert_eq!(number_value.value, "+42");
675 |   }
676 | }
677 | 


--------------------------------------------------------------------------------