├── tests ├── specs │ ├── array │ │ ├── empty-array.json │ │ ├── trailing_comma.json │ │ ├── array.json │ │ ├── empty-array.txt │ │ ├── trailing_comma.txt │ │ └── array.txt │ ├── object │ │ ├── empty-object.json │ │ ├── trailing_comma.json │ │ ├── non-string-prop-names.json │ │ ├── empty-object.txt │ │ ├── object.json │ │ ├── trailing_comma.txt │ │ ├── non-string-prop-names.txt │ │ └── object.txt │ ├── encoding │ │ ├── encoding.json │ │ └── encoding.txt │ ├── string │ │ ├── string.json │ │ └── string.txt │ └── comments │ │ ├── only-comments.json │ │ ├── inline-comments.json │ │ ├── only-comments.txt │ │ └── inline-comments.txt └── test.rs ├── .rustfmt.toml ├── .gitignore ├── rust-toolchain.toml ├── .github ├── contributing.md ├── workflows │ ├── release.yml │ └── ci.yml └── CODE_OF_CONDUCT.md ├── README.md ├── dprint.json ├── Cargo.toml ├── LICENSE ├── src ├── common.rs ├── tokens.rs ├── cst │ └── input.rs ├── serde.rs ├── lib.rs ├── errors.rs ├── parse_to_value.rs ├── value.rs ├── string.rs ├── ast.rs ├── scanner.rs └── parse_to_ast.rs └── benches ├── bench.rs └── data ├── tsconfig.json └── package.txt /tests/specs/array/empty-array.json: -------------------------------------------------------------------------------- 1 | [] 2 | -------------------------------------------------------------------------------- /tests/specs/object/empty-object.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /tests/specs/array/trailing_comma.json: -------------------------------------------------------------------------------- 1 | [1,] 2 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | tab_spaces = 2 3 | -------------------------------------------------------------------------------- /tests/specs/object/trailing_comma.json: -------------------------------------------------------------------------------- 1 | { "p": 1, } 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .claude 2 | .vscode 3 | target 4 | Cargo.lock 5 | -------------------------------------------------------------------------------- /tests/specs/encoding/encoding.json: -------------------------------------------------------------------------------- 1 | // 3 bytes: ℝ 2 | "2: ß" 3 | -------------------------------------------------------------------------------- /tests/specs/array/array.json: -------------------------------------------------------------------------------- 1 | ["test", 5, { "prop": 4 }, ["test"], true, false, null] 2 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.89.0" 3 | components = ["clippy", "rustfmt"] 4 | -------------------------------------------------------------------------------- /tests/specs/string/string.json: -------------------------------------------------------------------------------- 1 | [ 2 | "testing\" testing", 3 | "testing\\ testing" 4 | ] 5 | -------------------------------------------------------------------------------- /tests/specs/comments/only-comments.json: -------------------------------------------------------------------------------- 1 | // testing 2 | /* test */ // test 3 | //test /* test */ 4 | -------------------------------------------------------------------------------- /.github/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Todo... 4 | 5 | ## Benchmarks 6 | 7 | See [Benchmarks](https://dprint.github.io/jsonc-parser/dev/bench/) 8 | -------------------------------------------------------------------------------- /tests/specs/comments/inline-comments.json: -------------------------------------------------------------------------------- 1 | /*1*/{ // 2 2 | /*3*/"a"/*4*/: /*5*/5/*6*/, //7 3 | "b": /*8*/[/*9*/]/*10*/,//11 4 | "c": null 5 | /*13*/} // 14 -------------------------------------------------------------------------------- /tests/specs/object/non-string-prop-names.json: -------------------------------------------------------------------------------- 1 | { 2 | myProp: "test", 3 | other: "asdf", 4 | asdf-test: "test", 5 | oo43o : 5, 6 | jnm44 : 3, 7 | 456 : 34 8 | } 9 | -------------------------------------------------------------------------------- /tests/specs/array/empty-array.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "array", 4 | "range": { 5 | "start": 0, 6 | "end": 2, 7 | }, 8 | "elements": [ 9 | ] 10 | }, 11 | "comments": [ 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /tests/specs/object/empty-object.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "object", 4 | "range": { 5 | "start": 0, 6 | "end": 2, 7 | }, 8 | "properties": [ 9 | ] 10 | }, 11 | "comments": [ 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /tests/specs/object/object.json: -------------------------------------------------------------------------------- 1 | { 2 | "number": 5, 3 | "string": "str\\test", 4 | "object": { 5 | "obj": 5 6 | }, 7 | "array": [], 8 | "true": true, 9 | "false": false, 10 | "null": null 11 | } 12 | -------------------------------------------------------------------------------- /tests/specs/array/trailing_comma.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "array", 4 | "range": { 5 | "start": 0, 6 | "end": 4, 7 | }, 8 | "elements": [ 9 | { 10 | "type": "number", 11 | "range": { 12 | "start": 1, 13 | "end": 2, 14 | }, 15 | "value": "1" 16 | } 17 | ] 18 | }, 19 | "comments": [ 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jsonc-parser 2 | 3 | [![](https://img.shields.io/crates/v/jsonc-parser.svg)](https://crates.io/crates/jsonc-parser) 4 | [![](https://docs.rs/jsonc-parser/badge.svg)](https://docs.rs/jsonc-parser) 5 | 6 | A JSON parser and manipulator for Rust that supports comments and other JSON extensions. 7 | 8 | ## Documentation 9 | 10 | For usage examples and API documentation, see the [rustdoc documentation](https://docs.rs/jsonc-parser). 11 | -------------------------------------------------------------------------------- /tests/specs/string/string.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "array", 4 | "range": { 5 | "start": 0, 6 | "end": 48, 7 | }, 8 | "elements": [ 9 | { 10 | "type": "string", 11 | "range": { 12 | "start": 4, 13 | "end": 23, 14 | }, 15 | "value": "testing" testing" 16 | }, 17 | { 18 | "type": "string", 19 | "range": { 20 | "start": 27, 21 | "end": 46, 22 | }, 23 | "value": "testing\\ testing" 24 | } 25 | ] 26 | }, 27 | "comments": [ 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /dprint.json: -------------------------------------------------------------------------------- 1 | { 2 | "indentWidth": 2, 3 | "exec": { 4 | "cwd": "${configDir}", 5 | "commands": [{ 6 | "command": "rustfmt --edition 2024 --config imports_granularity=item", 7 | "exts": ["rs"] 8 | }] 9 | }, 10 | "includes": ["**/*.{md,rs}"], 11 | "excludes": [ 12 | "**/target", 13 | "./benches/json" 14 | ], 15 | "plugins": [ 16 | "https://plugins.dprint.dev/markdown-0.19.0.wasm", 17 | "https://plugins.dprint.dev/exec-0.6.0.json@a054130d458f124f9b5c91484833828950723a5af3f8ff2bd1523bd47b83b364", 18 | "https://plugins.dprint.dev/json-0.20.0.wasm" 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jsonc-parser" 3 | version = "0.28.0" 4 | authors = ["David Sherret "] 5 | edition = "2024" 6 | license = "MIT" 7 | description = "JSONC parser." 8 | repository = "https://github.com/dprint/jsonc-parser" 9 | 10 | [package.metadata.docs.rs] 11 | all-features = true 12 | 13 | [dependencies] 14 | indexmap = { version = "2.2.6", optional = true } 15 | serde_json = { version = "1.0", optional = true } 16 | unicode-width = { version = "0.2.0", optional = true } 17 | 18 | [features] 19 | cst = [] 20 | preserve_order = ["indexmap", "serde_json/preserve_order"] 21 | serde = ["serde_json"] 22 | error_unicode_width = ["unicode-width"] 23 | 24 | [dev-dependencies] 25 | pretty_assertions = "1.0.0" 26 | -------------------------------------------------------------------------------- /tests/specs/encoding/encoding.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "string", 4 | "range": { 5 | "start": 16, 6 | "end": 23, 7 | }, 8 | "value": "2: ß" 9 | }, 10 | "comments": [ 11 | { 12 | "pos": 0, 13 | "comments": [ 14 | { 15 | "type": "line", 16 | "range": { 17 | "start": 0, 18 | "end": 15, 19 | }, 20 | "value": " 3 bytes: ℝ" 21 | } 22 | ] 23 | }, 24 | { 25 | "pos": 16, 26 | "comments": [ 27 | { 28 | "type": "line", 29 | "range": { 30 | "start": 0, 31 | "end": 15, 32 | }, 33 | "value": " 3 bytes: ℝ" 34 | } 35 | ] 36 | } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /tests/specs/object/trailing_comma.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "object", 4 | "range": { 5 | "start": 0, 6 | "end": 11, 7 | }, 8 | "properties": [ 9 | { 10 | "type": "objectProp", 11 | "range": { 12 | "start": 2, 13 | "end": 8, 14 | }, 15 | "name": { 16 | "type": "string", 17 | "range": { 18 | "start": 2, 19 | "end": 5, 20 | }, 21 | "value": "p" 22 | }, 23 | "value": { 24 | "type": "number", 25 | "range": { 26 | "start": 7, 27 | "end": 8, 28 | }, 29 | "value": "1" 30 | } 31 | } 32 | ] 33 | }, 34 | "comments": [ 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | releaseKind: 7 | description: 'Kind of release' 8 | default: 'minor' 9 | type: choice 10 | options: 11 | - patch 12 | - minor 13 | required: true 14 | 15 | jobs: 16 | rust: 17 | name: release 18 | runs-on: ubuntu-latest 19 | timeout-minutes: 30 20 | 21 | steps: 22 | - name: Clone repository 23 | uses: actions/checkout@v4 24 | with: 25 | token: ${{ secrets.GH_DPRINTBOT_PAT }} 26 | 27 | - uses: denoland/setup-deno@v2 28 | - uses: dsherret/rust-toolchain-file@v1 29 | 30 | - name: Bump version and tag 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.GH_DPRINTBOT_PAT }} 33 | GH_WORKFLOW_ACTOR: ${{ github.actor }} 34 | run: | 35 | git config user.email "dprintbot@users.noreply.github.com" 36 | git config user.name "dprintbot" 37 | deno run -A https://raw.githubusercontent.com/dprint/automation/0.9.0/tasks/publish_release.ts --${{github.event.inputs.releaseKind}} 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 David Sherret 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/common.rs: -------------------------------------------------------------------------------- 1 | /// Positional information about a start and end point in the text. 2 | #[derive(Debug, PartialEq, Clone, Copy)] 3 | pub struct Range { 4 | /// Start position of the node in the text. 5 | pub start: usize, 6 | /// End position of the node in the text. 7 | pub end: usize, 8 | } 9 | 10 | impl Range { 11 | pub fn new(start: usize, end: usize) -> Self { 12 | Range { start, end } 13 | } 14 | 15 | pub fn from_byte_index(pos: usize) -> Self { 16 | Range::new(pos, pos) 17 | } 18 | } 19 | 20 | impl Ranged for Range { 21 | fn range(&self) -> Range { 22 | *self 23 | } 24 | } 25 | 26 | /// Represents an object that has a range in the text. 27 | pub trait Ranged { 28 | /// Gets the range. 29 | fn range(&self) -> Range; 30 | 31 | /// Gets the byte index of the first character in the text. 32 | fn start(&self) -> usize { 33 | self.range().start 34 | } 35 | 36 | /// Gets the byte index after the last character in the text. 37 | fn end(&self) -> usize { 38 | self.range().end 39 | } 40 | 41 | /// Gets the text from the provided string. 42 | fn text<'a>(&self, text: &'a str) -> &'a str { 43 | let range = self.range(); 44 | &text[range.start..range.end] 45 | } 46 | 47 | /// Gets the end byte index minus the start byte index of the range. 48 | fn width(&self) -> usize { 49 | let range = self.range(); 50 | range.end - range.start 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/tokens.rs: -------------------------------------------------------------------------------- 1 | use super::common::Range; 2 | use super::common::Ranged; 3 | use std::borrow::Cow; 4 | 5 | /// A token found while scanning. 6 | #[derive(Debug, PartialEq, Clone)] 7 | pub enum Token<'a> { 8 | OpenBrace, 9 | CloseBrace, 10 | OpenBracket, 11 | CloseBracket, 12 | Comma, 13 | Colon, 14 | String(Cow<'a, str>), 15 | Word(&'a str), 16 | Boolean(bool), 17 | Number(&'a str), 18 | Null, 19 | CommentLine(&'a str), 20 | CommentBlock(&'a str), 21 | } 22 | 23 | impl<'a> Token<'a> { 24 | pub fn as_str(&self) -> &str { 25 | match self { 26 | Token::OpenBrace => "{", 27 | Token::CloseBrace => "}", 28 | Token::OpenBracket => "[", 29 | Token::CloseBracket => "]", 30 | Token::Comma => ",", 31 | Token::Colon => ":", 32 | Token::String(value) => value, 33 | Token::Word(value) => value, 34 | Token::Boolean(value) => { 35 | if *value { 36 | "true" 37 | } else { 38 | "false" 39 | } 40 | } 41 | Token::Number(value) => value, 42 | Token::Null => "null", 43 | Token::CommentLine(value) => value, 44 | Token::CommentBlock(value) => value, 45 | } 46 | } 47 | } 48 | 49 | /// A token with positional information. 50 | pub struct TokenAndRange<'a> { 51 | pub range: Range, 52 | pub token: Token<'a>, 53 | } 54 | 55 | impl<'a> Ranged for TokenAndRange<'a> { 56 | fn range(&self) -> Range { 57 | self.range 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /tests/specs/comments/only-comments.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": null, 3 | "comments": [ 4 | { 5 | "pos": 0, 6 | "comments": [ 7 | { 8 | "type": "line", 9 | "range": { 10 | "start": 0, 11 | "end": 10, 12 | }, 13 | "value": " testing" 14 | }, 15 | { 16 | "type": "block", 17 | "range": { 18 | "start": 11, 19 | "end": 21, 20 | }, 21 | "value": " test " 22 | }, 23 | { 24 | "type": "line", 25 | "range": { 26 | "start": 22, 27 | "end": 29, 28 | }, 29 | "value": " test" 30 | }, 31 | { 32 | "type": "line", 33 | "range": { 34 | "start": 30, 35 | "end": 47, 36 | }, 37 | "value": "test /* test */" 38 | } 39 | ] 40 | }, 41 | { 42 | "pos": 48, 43 | "comments": [ 44 | { 45 | "type": "line", 46 | "range": { 47 | "start": 0, 48 | "end": 10, 49 | }, 50 | "value": " testing" 51 | }, 52 | { 53 | "type": "block", 54 | "range": { 55 | "start": 11, 56 | "end": 21, 57 | }, 58 | "value": " test " 59 | }, 60 | { 61 | "type": "line", 62 | "range": { 63 | "start": 22, 64 | "end": 29, 65 | }, 66 | "value": " test" 67 | }, 68 | { 69 | "type": "line", 70 | "range": { 71 | "start": 30, 72 | "end": 47, 73 | }, 74 | "value": "test /* test */" 75 | } 76 | ] 77 | } 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /tests/specs/array/array.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "array", 4 | "range": { 5 | "start": 0, 6 | "end": 55, 7 | }, 8 | "elements": [ 9 | { 10 | "type": "string", 11 | "range": { 12 | "start": 1, 13 | "end": 7, 14 | }, 15 | "value": "test" 16 | }, 17 | { 18 | "type": "number", 19 | "range": { 20 | "start": 9, 21 | "end": 10, 22 | }, 23 | "value": "5" 24 | }, 25 | { 26 | "type": "object", 27 | "range": { 28 | "start": 12, 29 | "end": 25, 30 | }, 31 | "properties": [ 32 | { 33 | "type": "objectProp", 34 | "range": { 35 | "start": 14, 36 | "end": 23, 37 | }, 38 | "name": { 39 | "type": "string", 40 | "range": { 41 | "start": 14, 42 | "end": 20, 43 | }, 44 | "value": "prop" 45 | }, 46 | "value": { 47 | "type": "number", 48 | "range": { 49 | "start": 22, 50 | "end": 23, 51 | }, 52 | "value": "4" 53 | } 54 | } 55 | ] 56 | }, 57 | { 58 | "type": "array", 59 | "range": { 60 | "start": 27, 61 | "end": 35, 62 | }, 63 | "elements": [ 64 | { 65 | "type": "string", 66 | "range": { 67 | "start": 28, 68 | "end": 34, 69 | }, 70 | "value": "test" 71 | } 72 | ] 73 | }, 74 | { 75 | "type": "boolean", 76 | "range": { 77 | "start": 37, 78 | "end": 41, 79 | }, 80 | "value": "true" 81 | }, 82 | { 83 | "type": "boolean", 84 | "range": { 85 | "start": 43, 86 | "end": 48, 87 | }, 88 | "value": "false" 89 | }, 90 | { 91 | "type": "null", 92 | "range": { 93 | "start": 50, 94 | "end": 54, 95 | } 96 | } 97 | ] 98 | }, 99 | "comments": [ 100 | ] 101 | } 102 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | name: ${{ matrix.config.kind }} ${{ matrix.config.os }} 8 | runs-on: ${{ matrix.config.os }} 9 | strategy: 10 | matrix: 11 | config: 12 | - os: ubuntu-latest 13 | kind: test_release 14 | - os: ubuntu-latest 15 | kind: test_debug 16 | 17 | env: 18 | CARGO_INCREMENTAL: 0 19 | RUST_BACKTRACE: full 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: dsherret/rust-toolchain-file@v1 24 | - uses: Swatinem/rust-cache@v2 25 | with: 26 | save-if: ${{ github.ref == 'refs/heads/main' }} 27 | 28 | - name: Test debug 29 | if: matrix.config.kind == 'test_debug' 30 | run: | 31 | cargo test --features serde 32 | cargo test --features preserve_order 33 | cargo test --all-features 34 | - name: Test release 35 | if: matrix.config.kind == 'test_release' 36 | run: cargo test --release --all-features 37 | 38 | # CARGO PUBLISH 39 | - name: Cargo login 40 | if: matrix.config.kind == 'test_release' && startsWith(github.ref, 'refs/tags/') 41 | run: cargo login ${{ secrets.CRATES_TOKEN }} 42 | 43 | - name: Cargo publish 44 | if: matrix.config.kind == 'test_release' && startsWith(github.ref, 'refs/tags/') 45 | run: cargo publish 46 | 47 | benchmark: 48 | name: Benchmarks 49 | runs-on: ubuntu-latest 50 | steps: 51 | - uses: actions/checkout@v4 52 | - name: Install latest nightly 53 | uses: actions-rs/toolchain@v1 54 | with: 55 | toolchain: nightly 56 | override: true 57 | - name: Cache cargo 58 | uses: actions/cache@v4 59 | with: 60 | path: | 61 | ~/.cargo/registry 62 | ~/.cargo/git 63 | target 64 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} 65 | # Run benchmark and stores the output to a file 66 | - name: Run benchmark 67 | run: cargo +nightly bench --features serde | tee output.txt 68 | # Run `github-action-benchmark` action 69 | - name: Store benchmark result 70 | uses: rhysd/github-action-benchmark@v1 71 | with: 72 | tool: 'cargo' 73 | output-file-path: output.txt 74 | fail-on-alert: true 75 | github-token: ${{ secrets.GITHUB_TOKEN }} 76 | auto-push: ${{ github.ref == 'refs/heads/main' }} 77 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use jsonc_parser::parse_to_ast; 6 | use jsonc_parser::parse_to_value; 7 | use std::fs::read_to_string; 8 | use test::Bencher; 9 | 10 | #[bench] 11 | fn citm_catalog_json_large_ast(b: &mut Bencher) { 12 | bench_ast(b, &get_citm_catalog_json_large()); 13 | } 14 | 15 | #[bench] 16 | fn citm_catalog_json_large_value(b: &mut Bencher) { 17 | bench_value(b, &get_citm_catalog_json_large()); 18 | } 19 | 20 | #[bench] 21 | #[cfg(feature = "serde")] 22 | fn citm_catalog_json_large_serde(b: &mut Bencher) { 23 | bench_serde(b, &get_citm_catalog_json_large()); 24 | } 25 | 26 | #[bench] 27 | fn tsconfig_json_ast(b: &mut Bencher) { 28 | bench_ast(b, &get_tsconfig_json()); 29 | } 30 | 31 | #[bench] 32 | fn tsconfig_json_value(b: &mut Bencher) { 33 | bench_value(b, &get_tsconfig_json()); 34 | } 35 | 36 | #[bench] 37 | fn package_json_ast(b: &mut Bencher) { 38 | bench_ast(b, &get_package_json()); 39 | } 40 | 41 | #[bench] 42 | fn package_json_value(b: &mut Bencher) { 43 | bench_value(b, &get_package_json()); 44 | } 45 | 46 | // bench helpers 47 | 48 | fn bench_ast(b: &mut Bencher, json_text: &str) { 49 | b.iter(|| parse_to_ast(json_text, &Default::default(), &Default::default()).unwrap()); 50 | } 51 | 52 | fn bench_value(b: &mut Bencher, json_text: &str) { 53 | b.iter(|| parse_to_value(json_text, &Default::default()).unwrap()); 54 | } 55 | 56 | #[cfg(feature = "serde")] 57 | fn bench_serde(b: &mut Bencher, json_text: &str) { 58 | b.iter(|| serde_json::from_str::(json_text).unwrap()); 59 | } 60 | 61 | // data 62 | 63 | fn get_citm_catalog_json_large() -> String { 64 | create_json_array_of_object(&get_citm_catalog_json(), 6) 65 | } 66 | 67 | fn get_citm_catalog_json() -> String { 68 | // from https://github.com/serde-rs/json-benchmark/blob/master/data/citm_catalog.json 69 | read_to_string("benches/data/citm_catalog.json").unwrap() 70 | } 71 | 72 | fn get_tsconfig_json() -> String { 73 | read_to_string("benches/data/tsconfig.json").unwrap() 74 | } 75 | 76 | fn get_package_json() -> String { 77 | read_to_string("benches/data/package.txt").unwrap() 78 | } 79 | 80 | fn create_json_array_of_object(text: &str, length: usize) -> String { 81 | let mut result = String::new(); 82 | result.push_str("["); 83 | for i in 0..length { 84 | if i > 0 { 85 | result.push_str(","); 86 | } 87 | result.push_str(text); 88 | } 89 | result.push_str("]"); 90 | result 91 | } 92 | -------------------------------------------------------------------------------- /tests/specs/object/non-string-prop-names.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "object", 4 | "range": { 5 | "start": 0, 6 | "end": 108, 7 | }, 8 | "properties": [ 9 | { 10 | "type": "objectProp", 11 | "range": { 12 | "start": 6, 13 | "end": 20, 14 | }, 15 | "name": { 16 | "type": "word", 17 | "range": { 18 | "start": 6, 19 | "end": 12, 20 | }, 21 | "value": "myProp" 22 | }, 23 | "value": { 24 | "type": "string", 25 | "range": { 26 | "start": 14, 27 | "end": 20, 28 | }, 29 | "value": "test" 30 | } 31 | }, 32 | { 33 | "type": "objectProp", 34 | "range": { 35 | "start": 26, 36 | "end": 39, 37 | }, 38 | "name": { 39 | "type": "word", 40 | "range": { 41 | "start": 26, 42 | "end": 31, 43 | }, 44 | "value": "other" 45 | }, 46 | "value": { 47 | "type": "string", 48 | "range": { 49 | "start": 33, 50 | "end": 39, 51 | }, 52 | "value": "asdf" 53 | } 54 | }, 55 | { 56 | "type": "objectProp", 57 | "range": { 58 | "start": 45, 59 | "end": 62, 60 | }, 61 | "name": { 62 | "type": "word", 63 | "range": { 64 | "start": 45, 65 | "end": 54, 66 | }, 67 | "value": "asdf-test" 68 | }, 69 | "value": { 70 | "type": "string", 71 | "range": { 72 | "start": 56, 73 | "end": 62, 74 | }, 75 | "value": "test" 76 | } 77 | }, 78 | { 79 | "type": "objectProp", 80 | "range": { 81 | "start": 68, 82 | "end": 77, 83 | }, 84 | "name": { 85 | "type": "word", 86 | "range": { 87 | "start": 68, 88 | "end": 73, 89 | }, 90 | "value": "oo43o" 91 | }, 92 | "value": { 93 | "type": "number", 94 | "range": { 95 | "start": 76, 96 | "end": 77, 97 | }, 98 | "value": "5" 99 | } 100 | }, 101 | { 102 | "type": "objectProp", 103 | "range": { 104 | "start": 83, 105 | "end": 92, 106 | }, 107 | "name": { 108 | "type": "word", 109 | "range": { 110 | "start": 83, 111 | "end": 88, 112 | }, 113 | "value": "jnm44" 114 | }, 115 | "value": { 116 | "type": "number", 117 | "range": { 118 | "start": 91, 119 | "end": 92, 120 | }, 121 | "value": "3" 122 | } 123 | }, 124 | { 125 | "type": "objectProp", 126 | "range": { 127 | "start": 98, 128 | "end": 106, 129 | }, 130 | "name": { 131 | "type": "word", 132 | "range": { 133 | "start": 98, 134 | "end": 101, 135 | }, 136 | "value": "456" 137 | }, 138 | "value": { 139 | "type": "number", 140 | "range": { 141 | "start": 104, 142 | "end": 106, 143 | }, 144 | "value": "34" 145 | } 146 | } 147 | ] 148 | }, 149 | "comments": [ 150 | ] 151 | } 152 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | - Using welcoming and inclusive language 18 | - Being respectful of differing viewpoints and experiences 19 | - Gracefully accepting constructive criticism 20 | - Focusing on what is best for the community 21 | - Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | - The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | - Trolling, insulting/derogatory comments, and personal or political attacks 28 | - Public or private harassment 29 | - Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | - Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team on twitter via direct message at 59 | https://twitter.com/DavidSherret (DMs open). All complaints will be reviewed 60 | and investigated and will result in a response that is deemed necessary and 61 | appropriate to the circumstances. The project team is obligated to maintain 62 | confidentiality with regard to the reporter of an incident. Further details 63 | of specific enforcement policies may be posted separately. 64 | 65 | Project maintainers who do not follow or enforce the Code of Conduct in good 66 | faith may face temporary or permanent repercussions as determined by other 67 | members of the project's leadership. 68 | 69 | ## Attribution 70 | 71 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 72 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 73 | 74 | [homepage]: https://www.contributor-covenant.org 75 | 76 | For answers to common questions about this code of conduct, see 77 | https://www.contributor-covenant.org/faq 78 | -------------------------------------------------------------------------------- /src/cst/input.rs: -------------------------------------------------------------------------------- 1 | /// API user provided value for inserts and replaces. 2 | #[derive(Debug, Clone)] 3 | pub enum CstInputValue { 4 | Null, 5 | Bool(bool), 6 | Number(String), 7 | String(String), 8 | Array(Vec), 9 | Object(Vec<(String, CstInputValue)>), 10 | } 11 | 12 | impl CstInputValue { 13 | pub(crate) fn force_multiline(&self) -> bool { 14 | match self { 15 | CstInputValue::Null | CstInputValue::Bool(_) | CstInputValue::Number(_) | CstInputValue::String(_) => false, 16 | CstInputValue::Array(v) => v.iter().any(|v| v.is_object_or_array_with_elements()), 17 | CstInputValue::Object(v) => !v.is_empty(), 18 | } 19 | } 20 | 21 | fn is_object_or_array_with_elements(&self) -> bool { 22 | match self { 23 | CstInputValue::Null | CstInputValue::Bool(_) | CstInputValue::Number(_) | CstInputValue::String(_) => false, 24 | CstInputValue::Array(v) => !v.is_empty(), 25 | CstInputValue::Object(v) => !v.is_empty(), 26 | } 27 | } 28 | } 29 | 30 | impl From for CstInputValue { 31 | fn from(b: bool) -> Self { 32 | CstInputValue::Bool(b) 33 | } 34 | } 35 | 36 | impl From<&str> for CstInputValue { 37 | fn from(s: &str) -> Self { 38 | CstInputValue::String(s.to_string()) 39 | } 40 | } 41 | 42 | impl From for CstInputValue { 43 | fn from(s: String) -> Self { 44 | CstInputValue::String(s) 45 | } 46 | } 47 | 48 | impl From for CstInputValue { 49 | fn from(n: f64) -> Self { 50 | CstInputValue::Number(n.to_string()) 51 | } 52 | } 53 | 54 | impl From for CstInputValue { 55 | fn from(n: usize) -> Self { 56 | CstInputValue::Number(n.to_string()) 57 | } 58 | } 59 | 60 | impl From for CstInputValue { 61 | fn from(n: isize) -> Self { 62 | CstInputValue::Number(n.to_string()) 63 | } 64 | } 65 | 66 | impl From for CstInputValue { 67 | fn from(n: u64) -> Self { 68 | CstInputValue::Number(n.to_string()) 69 | } 70 | } 71 | 72 | impl From for CstInputValue { 73 | fn from(n: i64) -> Self { 74 | CstInputValue::Number(n.to_string()) 75 | } 76 | } 77 | 78 | impl From for CstInputValue { 79 | fn from(n: u32) -> Self { 80 | CstInputValue::Number(n.to_string()) 81 | } 82 | } 83 | 84 | impl From for CstInputValue { 85 | fn from(n: i32) -> Self { 86 | CstInputValue::Number(n.to_string()) 87 | } 88 | } 89 | 90 | impl From> for CstInputValue 91 | where 92 | T: Into, 93 | { 94 | fn from(vec: Vec) -> Self { 95 | CstInputValue::Array(vec.into_iter().map(Into::into).collect()) 96 | } 97 | } 98 | 99 | impl From> for CstInputValue { 100 | fn from(obj: Vec<(String, CstInputValue)>) -> Self { 101 | CstInputValue::Object(obj) 102 | } 103 | } 104 | 105 | #[macro_export] 106 | macro_rules! json { 107 | (null) => { 108 | $crate::cst::CstInputValue::Null 109 | }; 110 | 111 | ([ $($elems:tt),* $(,)? ]) => { 112 | $crate::cst::CstInputValue::Array(vec![ 113 | $(json!($elems)),* 114 | ]) 115 | }; 116 | 117 | ({ $($key:tt : $value:tt),* $(,)? }) => { 118 | $crate::cst::CstInputValue::Object(vec![ 119 | $( 120 | ($crate::json!(private_quote_property $key).to_string(), json!($value)) 121 | ),* 122 | ]) 123 | }; 124 | 125 | ($other:expr) => { 126 | $crate::cst::CstInputValue::from($other) 127 | }; 128 | 129 | // hack to not have another public macro for quoting object key properties 130 | (private_quote_property $key:ident) => { 131 | stringify!($key) 132 | }; 133 | 134 | (private_quote_property $key:expr) => { 135 | $key 136 | }; 137 | } 138 | -------------------------------------------------------------------------------- /src/serde.rs: -------------------------------------------------------------------------------- 1 | use super::CollectOptions; 2 | use super::ParseOptions; 3 | use super::errors::ParseError; 4 | use super::parse_to_ast; 5 | 6 | /// Parses a string containing JSONC to a `serde_json::Value. 7 | /// 8 | /// Requires the "serde" cargo feature: 9 | /// 10 | /// ```toml 11 | /// jsonc-parser = { version = "...", features = ["serde"] } 12 | /// ``` 13 | /// 14 | /// # Example 15 | /// 16 | /// ```rs 17 | /// use jsonc_parser::parse_to_serde_value; 18 | /// 19 | /// let json_value = parse_to_serde_value(r#"{ "test": 5 } // test"#, &Default::default()).unwrap(); 20 | /// ``` 21 | pub fn parse_to_serde_value(text: &str, parse_options: &ParseOptions) -> Result, ParseError> { 22 | let value = parse_to_ast( 23 | text, 24 | &CollectOptions { 25 | comments: crate::CommentCollectionStrategy::Off, 26 | tokens: false, 27 | }, 28 | parse_options, 29 | )? 30 | .value; 31 | Ok(value.map(|v| v.into())) 32 | } 33 | 34 | #[cfg(test)] 35 | mod tests { 36 | use pretty_assertions::assert_eq; 37 | use serde_json::Value as SerdeValue; 38 | use std::str::FromStr; 39 | 40 | use super::*; 41 | 42 | #[test] 43 | fn it_should_error_when_has_error() { 44 | assert_has_error( 45 | "[][]", 46 | "Text cannot contain more than one JSON value on line 1 column 3", 47 | ); 48 | } 49 | 50 | fn assert_has_error(text: &str, message: &str) { 51 | let result = parse_to_serde_value(text, &Default::default()); 52 | match result { 53 | Ok(_) => panic!("Expected error, but did not find one."), 54 | Err(err) => assert_eq!(err.to_string(), message), 55 | } 56 | } 57 | 58 | #[test] 59 | fn it_should_parse_to_serde_value() { 60 | let result = parse_to_serde_value( 61 | r#"{ "a": { "a1": 5 }, "b": [0.3e+025], "c": "c1", "d": true, "e": false, "f": null }"#, 62 | &Default::default(), 63 | ) 64 | .unwrap(); 65 | 66 | let mut expected_value = serde_json::map::Map::new(); 67 | expected_value.insert("a".to_string(), { 68 | let mut inner_obj = serde_json::map::Map::new(); 69 | inner_obj.insert( 70 | "a1".to_string(), 71 | SerdeValue::Number(serde_json::Number::from_str("5").unwrap()), 72 | ); 73 | SerdeValue::Object(inner_obj) 74 | }); 75 | expected_value.insert("b".to_string(), { 76 | let mut inner_array = Vec::new(); 77 | inner_array.push(SerdeValue::Number(serde_json::Number::from_str("0.3e+025").unwrap())); 78 | SerdeValue::Array(inner_array) 79 | }); 80 | expected_value.insert("c".to_string(), SerdeValue::String("c1".to_string())); 81 | expected_value.insert("d".to_string(), SerdeValue::Bool(true)); 82 | expected_value.insert("e".to_string(), SerdeValue::Bool(false)); 83 | expected_value.insert("f".to_string(), SerdeValue::Null); 84 | 85 | assert_eq!(result, Some(SerdeValue::Object(expected_value))); 86 | } 87 | 88 | #[test] 89 | fn it_should_parse_hexadecimal_numbers_to_decimal() { 90 | let result = parse_to_serde_value( 91 | r#"{ 92 | "hex1": 0x7DF, 93 | "hex2": 0xFF, 94 | "hex3": 0x10 95 | }"#, 96 | &Default::default(), 97 | ) 98 | .unwrap(); 99 | 100 | let mut expected_value = serde_json::map::Map::new(); 101 | expected_value.insert("hex1".to_string(), SerdeValue::Number(serde_json::Number::from(2015))); 102 | expected_value.insert("hex2".to_string(), SerdeValue::Number(serde_json::Number::from(255))); 103 | expected_value.insert("hex3".to_string(), SerdeValue::Number(serde_json::Number::from(16))); 104 | 105 | assert_eq!(result, Some(SerdeValue::Object(expected_value))); 106 | } 107 | 108 | #[test] 109 | fn it_should_parse_unary_plus_numbers() { 110 | let result = parse_to_serde_value( 111 | r#"{ 112 | "pos1": +42, 113 | "pos2": +0.5, 114 | "pos3": +1e10 115 | }"#, 116 | &Default::default(), 117 | ) 118 | .unwrap(); 119 | 120 | let mut expected_value = serde_json::map::Map::new(); 121 | expected_value.insert("pos1".to_string(), SerdeValue::Number(serde_json::Number::from(42))); 122 | expected_value.insert( 123 | "pos2".to_string(), 124 | SerdeValue::Number(serde_json::Number::from_str("0.5").unwrap()), 125 | ); 126 | expected_value.insert( 127 | "pos3".to_string(), 128 | SerdeValue::Number(serde_json::Number::from_str("1e10").unwrap()), 129 | ); 130 | 131 | assert_eq!(result, Some(SerdeValue::Object(expected_value))); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # jsonc-parser 2 | //! 3 | //! A JSON parser and manipulator that supports comments and other JSON extensions. 4 | //! 5 | //! ## Parsing 6 | //! 7 | //! To a simple `JsonValue`: 8 | //! 9 | //! ``` 10 | //! use jsonc_parser::parse_to_value; 11 | //! 12 | //! # fn main() -> Result<(), Box> { 13 | //! let json_value = parse_to_value(r#"{ "test": 5 } // test"#, &Default::default())?; 14 | //! // check the json_value here 15 | //! # Ok(()) 16 | //! # } 17 | //! ``` 18 | //! 19 | //! Or an AST: 20 | //! 21 | //! ``` 22 | //! use jsonc_parser::parse_to_ast; 23 | //! use jsonc_parser::CollectOptions; 24 | //! use jsonc_parser::CommentCollectionStrategy; 25 | //! 26 | //! # fn main() -> Result<(), Box> { 27 | //! let parse_result = parse_to_ast(r#"{ "test": 5 } // test"#, &CollectOptions { 28 | //! comments: CommentCollectionStrategy::Separate, // include comments in result 29 | //! tokens: true, // include tokens in result 30 | //! }, &Default::default())?; 31 | //! // ...inspect parse_result for value, tokens, and comments here... 32 | //! # Ok(()) 33 | //! # } 34 | //! ``` 35 | //! 36 | //! ## Manipulation (CST) 37 | //! 38 | //! When enabling the `cst` cargo feature, parsing to a CST provides a first class manipulation API: 39 | //! 40 | //! ``` 41 | //! # #[cfg(feature = "cst")] 42 | //! # { 43 | //! use jsonc_parser::cst::CstRootNode; 44 | //! use jsonc_parser::ParseOptions; 45 | //! use jsonc_parser::json; 46 | //! 47 | //! let json_text = r#"{ 48 | //! // comment 49 | //! "data": 123 50 | //! }"#; 51 | //! 52 | //! let root = CstRootNode::parse(json_text, &ParseOptions::default()).unwrap(); 53 | //! let root_obj = root.object_value_or_set(); 54 | //! 55 | //! root_obj.get("data").unwrap().set_value(json!({ 56 | //! "nested": true 57 | //! })); 58 | //! root_obj.append("new_key", json!([456, 789, false])); 59 | //! 60 | //! assert_eq!(root.to_string(), r#"{ 61 | //! // comment 62 | //! "data": { 63 | //! "nested": true 64 | //! }, 65 | //! "new_key": [456, 789, false] 66 | //! }"#); 67 | //! # } 68 | //! ``` 69 | //! 70 | //! ## Serde 71 | //! 72 | //! If you enable the `"serde"` feature as follows: 73 | //! 74 | //! ```toml 75 | //! # in Cargo.toml 76 | //! jsonc-parser = { version = "...", features = ["serde"] } 77 | //! ``` 78 | //! 79 | //! Then you can use the `parse_to_serde_value` function to get a `serde_json::Value`: 80 | //! 81 | //! ``` 82 | //! # #[cfg(feature = "serde")] 83 | //! # { 84 | //! use jsonc_parser::parse_to_serde_value; 85 | //! 86 | //! # fn parse_example() -> Result<(), Box> { 87 | //! let json_value = parse_to_serde_value(r#"{ "test": 5 } // test"#, &Default::default())?; 88 | //! # Ok(()) 89 | //! # } 90 | //! # } 91 | //! ``` 92 | //! 93 | //! Alternatively, use `parse_to_ast` then call `.into()` (ex. `let value: serde_json::Value = ast.into();`). 94 | //! 95 | //! ## Parse Strictly as JSON 96 | //! 97 | //! Provide `ParseOptions` and set all the options to false: 98 | //! 99 | //! ``` 100 | //! use jsonc_parser::parse_to_value; 101 | //! use jsonc_parser::ParseOptions; 102 | //! 103 | //! # fn main() -> Result<(), Box> { 104 | //! # let text = "{}"; 105 | //! let json_value = parse_to_value(text, &ParseOptions { 106 | //! allow_comments: false, 107 | //! allow_loose_object_property_names: false, 108 | //! allow_trailing_commas: false, 109 | //! allow_single_quoted_strings: false, 110 | //! allow_hexadecimal_numbers: false, 111 | //! allow_unary_plus_numbers: false, 112 | //! })?; 113 | //! # Ok(()) 114 | //! # } 115 | //! ``` 116 | //! 117 | //! ## Error column number with unicode-width 118 | //! 119 | //! To get more accurate display column numbers in error messages, enable the `error_unicode_width` cargo feature, 120 | //! which will pull in and use the [unicode-width](https://crates.io/crates/unicode-width) dependency internally. 121 | //! Otherwise it will use the character count, which isn't as accurate of a number, but will probably be good enough 122 | //! in most cases. 123 | 124 | #![deny(clippy::print_stderr)] 125 | #![deny(clippy::print_stdout)] 126 | #![allow(clippy::uninlined_format_args)] 127 | 128 | pub mod ast; 129 | pub mod common; 130 | #[cfg(feature = "cst")] 131 | pub mod cst; 132 | pub mod errors; 133 | mod parse_to_ast; 134 | mod parse_to_value; 135 | mod scanner; 136 | #[cfg(feature = "serde")] 137 | mod serde; 138 | mod string; 139 | pub mod tokens; 140 | mod value; 141 | 142 | pub use parse_to_ast::*; 143 | pub use parse_to_value::*; 144 | pub use scanner::*; 145 | pub use string::ParseStringErrorKind; 146 | pub use value::*; 147 | 148 | #[cfg(feature = "serde")] 149 | pub use serde::*; 150 | -------------------------------------------------------------------------------- /tests/specs/object/object.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "object", 4 | "range": { 5 | "start": 0, 6 | "end": 159, 7 | }, 8 | "properties": [ 9 | { 10 | "type": "objectProp", 11 | "range": { 12 | "start": 6, 13 | "end": 17, 14 | }, 15 | "name": { 16 | "type": "string", 17 | "range": { 18 | "start": 6, 19 | "end": 14, 20 | }, 21 | "value": "number" 22 | }, 23 | "value": { 24 | "type": "number", 25 | "range": { 26 | "start": 16, 27 | "end": 17, 28 | }, 29 | "value": "5" 30 | } 31 | }, 32 | { 33 | "type": "objectProp", 34 | "range": { 35 | "start": 23, 36 | "end": 44, 37 | }, 38 | "name": { 39 | "type": "string", 40 | "range": { 41 | "start": 23, 42 | "end": 31, 43 | }, 44 | "value": "string" 45 | }, 46 | "value": { 47 | "type": "string", 48 | "range": { 49 | "start": 33, 50 | "end": 44, 51 | }, 52 | "value": "str\\test" 53 | } 54 | }, 55 | { 56 | "type": "objectProp", 57 | "range": { 58 | "start": 50, 59 | "end": 84, 60 | }, 61 | "name": { 62 | "type": "string", 63 | "range": { 64 | "start": 50, 65 | "end": 58, 66 | }, 67 | "value": "object" 68 | }, 69 | "value": { 70 | "type": "object", 71 | "range": { 72 | "start": 60, 73 | "end": 84, 74 | }, 75 | "properties": [ 76 | { 77 | "type": "objectProp", 78 | "range": { 79 | "start": 70, 80 | "end": 78, 81 | }, 82 | "name": { 83 | "type": "string", 84 | "range": { 85 | "start": 70, 86 | "end": 75, 87 | }, 88 | "value": "obj" 89 | }, 90 | "value": { 91 | "type": "number", 92 | "range": { 93 | "start": 77, 94 | "end": 78, 95 | }, 96 | "value": "5" 97 | } 98 | } 99 | ] 100 | } 101 | }, 102 | { 103 | "type": "objectProp", 104 | "range": { 105 | "start": 90, 106 | "end": 101, 107 | }, 108 | "name": { 109 | "type": "string", 110 | "range": { 111 | "start": 90, 112 | "end": 97, 113 | }, 114 | "value": "array" 115 | }, 116 | "value": { 117 | "type": "array", 118 | "range": { 119 | "start": 99, 120 | "end": 101, 121 | }, 122 | "elements": [ 123 | ] 124 | } 125 | }, 126 | { 127 | "type": "objectProp", 128 | "range": { 129 | "start": 107, 130 | "end": 119, 131 | }, 132 | "name": { 133 | "type": "string", 134 | "range": { 135 | "start": 107, 136 | "end": 113, 137 | }, 138 | "value": "true" 139 | }, 140 | "value": { 141 | "type": "boolean", 142 | "range": { 143 | "start": 115, 144 | "end": 119, 145 | }, 146 | "value": "true" 147 | } 148 | }, 149 | { 150 | "type": "objectProp", 151 | "range": { 152 | "start": 125, 153 | "end": 139, 154 | }, 155 | "name": { 156 | "type": "string", 157 | "range": { 158 | "start": 125, 159 | "end": 132, 160 | }, 161 | "value": "false" 162 | }, 163 | "value": { 164 | "type": "boolean", 165 | "range": { 166 | "start": 134, 167 | "end": 139, 168 | }, 169 | "value": "false" 170 | } 171 | }, 172 | { 173 | "type": "objectProp", 174 | "range": { 175 | "start": 145, 176 | "end": 157, 177 | }, 178 | "name": { 179 | "type": "string", 180 | "range": { 181 | "start": 145, 182 | "end": 151, 183 | }, 184 | "value": "null" 185 | }, 186 | "value": { 187 | "type": "null", 188 | "range": { 189 | "start": 153, 190 | "end": 157, 191 | } 192 | } 193 | } 194 | ] 195 | }, 196 | "comments": [ 197 | ] 198 | } 199 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use crate::ParseStringErrorKind; 4 | 5 | use super::common::Range; 6 | 7 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 8 | pub enum ParseErrorKind { 9 | CommentsNotAllowed, 10 | ExpectedColonAfterObjectKey, 11 | ExpectedObjectValue, 12 | ExpectedDigit, 13 | ExpectedDigitFollowingNegativeSign, 14 | ExpectedPlusMinusOrDigitInNumberLiteral, 15 | ExpectedStringObjectProperty, 16 | HexadecimalNumbersNotAllowed, 17 | MultipleRootJsonValues, 18 | SingleQuotedStringsNotAllowed, 19 | String(ParseStringErrorKind), 20 | TrailingCommasNotAllowed, 21 | UnaryPlusNumbersNotAllowed, 22 | UnexpectedCloseBrace, 23 | UnexpectedCloseBracket, 24 | UnexpectedColon, 25 | UnexpectedComma, 26 | UnexpectedToken, 27 | UnexpectedTokenInObject, 28 | UnexpectedWord, 29 | UnterminatedArray, 30 | UnterminatedCommentBlock, 31 | UnterminatedObject, 32 | } 33 | 34 | impl std::fmt::Display for ParseErrorKind { 35 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 36 | use ParseErrorKind::*; 37 | match self { 38 | CommentsNotAllowed => { 39 | write!(f, "Comments are not allowed") 40 | } 41 | ExpectedColonAfterObjectKey => { 42 | write!(f, "Expected colon after the string or word in object property") 43 | } 44 | ExpectedDigit => { 45 | write!(f, "Expected digit") 46 | } 47 | ExpectedDigitFollowingNegativeSign => { 48 | write!(f, "Expected digit following negative sign") 49 | } 50 | ExpectedPlusMinusOrDigitInNumberLiteral => { 51 | write!(f, "Expected plus, minus, or digit in number literal") 52 | } 53 | ExpectedObjectValue => { 54 | write!(f, "Expected value after colon in object property") 55 | } 56 | ExpectedStringObjectProperty => { 57 | write!(f, "Expected string for object property") 58 | } 59 | HexadecimalNumbersNotAllowed => { 60 | write!(f, "Hexadecimal numbers are not allowed") 61 | } 62 | MultipleRootJsonValues => { 63 | write!(f, "Text cannot contain more than one JSON value") 64 | } 65 | SingleQuotedStringsNotAllowed => { 66 | write!(f, "Single-quoted strings are not allowed") 67 | } 68 | String(kind) => kind.fmt(f), 69 | TrailingCommasNotAllowed => { 70 | write!(f, "Trailing commas are not allowed") 71 | } 72 | UnaryPlusNumbersNotAllowed => { 73 | write!(f, "Unary plus on numbers is not allowed") 74 | } 75 | UnexpectedCloseBrace => { 76 | write!(f, "Unexpected close brace") 77 | } 78 | UnexpectedCloseBracket => { 79 | write!(f, "Unexpected close bracket") 80 | } 81 | UnexpectedColon => { 82 | write!(f, "Unexpected colon") 83 | } 84 | UnexpectedComma => { 85 | write!(f, "Unexpected comma") 86 | } 87 | UnexpectedWord => { 88 | write!(f, "Unexpected word") 89 | } 90 | UnexpectedToken => { 91 | write!(f, "Unexpected token") 92 | } 93 | UnexpectedTokenInObject => { 94 | write!(f, "Unexpected token in object") 95 | } 96 | UnterminatedArray => { 97 | write!(f, "Unterminated array") 98 | } 99 | UnterminatedCommentBlock => { 100 | write!(f, "Unterminated comment block") 101 | } 102 | UnterminatedObject => { 103 | write!(f, "Unterminated object") 104 | } 105 | } 106 | } 107 | } 108 | 109 | #[derive(Debug, Clone, PartialEq)] 110 | struct ParseErrorInner { 111 | range: Range, 112 | line_display: usize, 113 | column_display: usize, 114 | kind: ParseErrorKind, 115 | } 116 | 117 | /// Error that could occur while parsing or tokenizing. 118 | #[derive(Debug, Clone, PartialEq)] 119 | pub struct ParseError(Box); 120 | 121 | impl std::error::Error for ParseError {} 122 | 123 | impl ParseError { 124 | pub(crate) fn new(range: Range, kind: ParseErrorKind, file_text: &str) -> ParseError { 125 | let (line_display, column_display) = get_line_and_column_display(range, file_text); 126 | ParseError(Box::new(ParseErrorInner { 127 | range, 128 | line_display, 129 | column_display, 130 | kind, 131 | })) 132 | } 133 | 134 | /// Start and end position of the error. 135 | pub fn range(&self) -> Range { 136 | self.0.range 137 | } 138 | 139 | /// 1-indexed line number the error occurred on. 140 | pub fn line_display(&self) -> usize { 141 | self.0.line_display 142 | } 143 | 144 | /// 1-indexed column number the error occurred on. 145 | /// 146 | /// Note: Use the `error_unicode_width` feature to get the correct column 147 | /// number for Unicode characters on the line, otherwise this is just the 148 | /// number of characters by default. 149 | pub fn column_display(&self) -> usize { 150 | self.0.column_display 151 | } 152 | 153 | /// Error message. 154 | pub fn kind(&self) -> &ParseErrorKind { 155 | &self.0.kind 156 | } 157 | } 158 | 159 | impl fmt::Display for ParseError { 160 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 161 | let inner = &*self.0; 162 | write!( 163 | f, 164 | "{} on line {} column {}", 165 | inner.kind, inner.line_display, inner.column_display 166 | ) 167 | } 168 | } 169 | 170 | fn get_line_and_column_display(range: Range, file_text: &str) -> (usize, usize) { 171 | let mut line_index = 0; 172 | let mut column_index = 0; 173 | for c in file_text[..range.start].chars() { 174 | if c == '\n' { 175 | line_index += 1; 176 | column_index = 0; 177 | } else { 178 | #[cfg(feature = "error_unicode_width")] 179 | { 180 | if let Some(width) = unicode_width::UnicodeWidthChar::width_cjk(c) { 181 | column_index += width; 182 | } 183 | } 184 | #[cfg(not(feature = "error_unicode_width"))] 185 | { 186 | column_index += 1; 187 | } 188 | } 189 | } 190 | (line_index + 1, column_index + 1) 191 | } 192 | -------------------------------------------------------------------------------- /benches/data/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */ 4 | 5 | /* Basic Options */ 6 | // "incremental": true, /* Enable incremental compilation */ 7 | "target": "es5", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */ 8 | "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */ 9 | // "lib": [], /* Specify library files to be included in the compilation. */ 10 | // "allowJs": true, /* Allow javascript files to be compiled. */ 11 | // "checkJs": true, /* Report errors in .js files. */ 12 | // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ 13 | // "declaration": true, /* Generates corresponding '.d.ts' file. */ 14 | // "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */ 15 | // "sourceMap": true, /* Generates corresponding '.map' file. */ 16 | // "outFile": "./", /* Concatenate and emit output to single file. */ 17 | // "outDir": "./", /* Redirect output structure to the directory. */ 18 | // "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ 19 | // "composite": true, /* Enable project compilation */ 20 | // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */ 21 | // "removeComments": true, /* Do not emit comments to output. */ 22 | // "noEmit": true, /* Do not emit outputs. */ 23 | // "importHelpers": true, /* Import emit helpers from 'tslib'. */ 24 | // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ 25 | // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ 26 | 27 | /* Strict Type-Checking Options */ 28 | "strict": true, /* Enable all strict type-checking options. */ 29 | // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ 30 | // "strictNullChecks": true, /* Enable strict null checks. */ 31 | // "strictFunctionTypes": true, /* Enable strict checking of function types. */ 32 | // "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ 33 | // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ 34 | // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ 35 | // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ 36 | 37 | /* Additional Checks */ 38 | // "noUnusedLocals": true, /* Report errors on unused locals. */ 39 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 40 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 41 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 42 | 43 | /* Module Resolution Options */ 44 | // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ 45 | // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ 46 | // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ 47 | // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ 48 | // "typeRoots": [], /* List of folders to include type definitions from. */ 49 | // "types": [], /* Type declaration files to be included in compilation. */ 50 | // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ 51 | "esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ 52 | // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ 53 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 54 | 55 | /* Source Map Options */ 56 | // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ 57 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 58 | // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ 59 | // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ 60 | 61 | /* Experimental Options */ 62 | // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ 63 | // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ 64 | 65 | /* Advanced Options */ 66 | "skipLibCheck": true, /* Skip type checking of declaration files. */ 67 | "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */ 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/parse_to_value.rs: -------------------------------------------------------------------------------- 1 | use super::CollectOptions; 2 | use super::ParseOptions; 3 | use super::ast; 4 | use super::errors::ParseError; 5 | use super::parse_to_ast; 6 | use super::value::*; 7 | use crate::value::Map; 8 | 9 | /// Parses a string containing JSONC to a `JsonValue`. 10 | /// 11 | /// Returns `None` when the provided string is empty or whitespace. 12 | /// 13 | /// # Example 14 | /// 15 | /// ``` 16 | /// use jsonc_parser::parse_to_value; 17 | /// 18 | /// let json_value = parse_to_value(r#"{ "test": 5 } // test"#, &Default::default()).expect("Should parse."); 19 | /// ``` 20 | pub fn parse_to_value<'a>(text: &'a str, options: &ParseOptions) -> Result>, ParseError> { 21 | let value = parse_to_ast( 22 | text, 23 | &CollectOptions { 24 | comments: crate::CommentCollectionStrategy::Off, 25 | tokens: false, 26 | }, 27 | options, 28 | )? 29 | .value; 30 | Ok(value.map(handle_value)) 31 | } 32 | 33 | fn handle_value(value: ast::Value) -> JsonValue { 34 | match value { 35 | ast::Value::StringLit(lit) => JsonValue::String(lit.value), 36 | ast::Value::NumberLit(lit) => JsonValue::Number(lit.value), 37 | ast::Value::BooleanLit(lit) => JsonValue::Boolean(lit.value), 38 | ast::Value::Object(obj) => JsonValue::Object(handle_object(obj)), 39 | ast::Value::Array(arr) => JsonValue::Array(handle_array(arr)), 40 | ast::Value::NullKeyword(_) => JsonValue::Null, 41 | } 42 | } 43 | 44 | fn handle_array(arr: ast::Array) -> JsonArray { 45 | let elements = arr.elements.into_iter().map(handle_value).collect(); 46 | 47 | JsonArray::new(elements) 48 | } 49 | 50 | fn handle_object(obj: ast::Object) -> JsonObject { 51 | let mut props = Map::with_capacity(obj.properties.len()); 52 | for prop in obj.properties.into_iter() { 53 | let prop_name = prop.name.into_string(); 54 | let prop_value = handle_value(prop.value); 55 | props.insert(prop_name, prop_value); 56 | } 57 | JsonObject::new(props) 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use crate::errors::ParseErrorKind; 63 | 64 | use super::*; 65 | use std::borrow::Cow; 66 | 67 | #[test] 68 | fn it_should_parse_object() { 69 | let value = parse_to_value( 70 | r#"{ 71 | "a": null, 72 | "b": [null, "text"], 73 | "c": true, 74 | d: 25.55 75 | }"#, 76 | &Default::default(), 77 | ) 78 | .unwrap() 79 | .unwrap(); 80 | 81 | let mut object_map = Map::new(); 82 | object_map.insert(String::from("a"), JsonValue::Null); 83 | object_map.insert( 84 | String::from("b"), 85 | JsonValue::Array(vec![JsonValue::Null, JsonValue::String(Cow::Borrowed("text"))].into()), 86 | ); 87 | object_map.insert(String::from("c"), JsonValue::Boolean(true)); 88 | object_map.insert(String::from("d"), JsonValue::Number("25.55")); 89 | assert_eq!(value, JsonValue::Object(object_map.into())); 90 | } 91 | 92 | #[test] 93 | fn it_should_parse_boolean_false() { 94 | let value = parse_to_value("false", &Default::default()).unwrap().unwrap(); 95 | assert_eq!(value, JsonValue::Boolean(false)); 96 | let value = parse_to_value("true", &Default::default()).unwrap().unwrap(); 97 | assert_eq!(value, JsonValue::Boolean(true)); 98 | } 99 | 100 | #[test] 101 | fn it_should_parse_boolean_true() { 102 | let value = parse_to_value("true", &Default::default()).unwrap().unwrap(); 103 | assert_eq!(value, JsonValue::Boolean(true)); 104 | } 105 | 106 | #[test] 107 | fn it_should_parse_number() { 108 | let value = parse_to_value("50", &Default::default()).unwrap().unwrap(); 109 | assert_eq!(value, JsonValue::Number("50")); 110 | } 111 | 112 | #[test] 113 | fn it_should_parse_string() { 114 | let value = parse_to_value(r#""test""#, &Default::default()).unwrap().unwrap(); 115 | assert_eq!(value, JsonValue::String(Cow::Borrowed("test"))); 116 | } 117 | 118 | #[test] 119 | fn it_should_parse_string_with_quotes() { 120 | let value = parse_to_value(r#""echo \"test\"""#, &Default::default()) 121 | .unwrap() 122 | .unwrap(); 123 | assert_eq!(value, JsonValue::String(Cow::Borrowed(r#"echo "test""#))); 124 | } 125 | 126 | #[test] 127 | fn it_should_parse_array() { 128 | let value = parse_to_value(r#"[false, true]"#, &Default::default()) 129 | .unwrap() 130 | .unwrap(); 131 | assert_eq!( 132 | value, 133 | JsonValue::Array(vec![JsonValue::Boolean(false), JsonValue::Boolean(true)].into()) 134 | ); 135 | } 136 | 137 | #[test] 138 | fn it_should_parse_null() { 139 | let value = parse_to_value("null", &Default::default()).unwrap().unwrap(); 140 | assert_eq!(value, JsonValue::Null); 141 | } 142 | 143 | #[test] 144 | fn it_should_parse_empty() { 145 | let value = parse_to_value("", &Default::default()).unwrap(); 146 | assert!(value.is_none()); 147 | } 148 | 149 | #[test] 150 | fn error_unexpected_token() { 151 | let err = parse_to_value("{\n \"a\":\u{200b}5 }", &Default::default()) 152 | .err() 153 | .unwrap(); 154 | assert_eq!(err.range().start, 8); 155 | assert_eq!(err.range().end, 11); 156 | assert_eq!(err.kind().clone(), ParseErrorKind::UnexpectedToken); 157 | } 158 | 159 | #[test] 160 | fn it_should_parse_surrogate_pair() { 161 | // RFC 8259 § 7: non-BMP character 𝄞 (U+1D11E) should be escaped as surrogate pair \uD834\uDD1E 162 | let src = r#""\uD834\uDD1E""#; 163 | let v = parse_to_value(src, &Default::default()).unwrap().unwrap(); 164 | if let JsonValue::String(s) = v { 165 | assert_eq!("\u{1D11E}", s.as_ref()); 166 | } else { 167 | panic!("Expected string value, got {:?}", v); 168 | } 169 | } 170 | 171 | #[test] 172 | fn it_should_parse_multiple_surrogate_pairs() { 173 | let src = r#""\uD834\uDD1E\uD834\uDD1E""#; 174 | let v = parse_to_value(src, &Default::default()).unwrap().unwrap(); 175 | if let JsonValue::String(s) = v { 176 | assert_eq!("\u{1D11E}\u{1D11E}", s.as_ref()); 177 | } else { 178 | panic!("Expected string value, got {:?}", v); 179 | } 180 | } 181 | 182 | #[test] 183 | fn it_should_parse_mixed_escapes_with_surrogate_pairs() { 184 | // "A𝄞B" where 𝄞 is encoded as surrogate pair 185 | let src = r#""\u0041\uD834\uDD1E\u0042""#; 186 | let v = parse_to_value(src, &Default::default()).unwrap().unwrap(); 187 | if let JsonValue::String(s) = v { 188 | assert_eq!("A\u{1D11E}B", s.as_ref()); 189 | } else { 190 | panic!("Expected string value, got {:?}", v); 191 | } 192 | } 193 | 194 | #[test] 195 | fn it_should_error_on_unpaired_high_surrogate_with_text() { 196 | let src = r#""\uD834x""#; 197 | let err = parse_to_value(src, &Default::default()).err().unwrap(); 198 | assert!(err.to_string().contains("unpaired high surrogate")); 199 | } 200 | 201 | #[test] 202 | fn it_should_error_on_unpaired_high_surrogate_at_eof() { 203 | let src = r#""\uD834""#; 204 | let err = parse_to_value(src, &Default::default()).err().unwrap(); 205 | assert!(err.to_string().contains("unpaired high surrogate")); 206 | } 207 | 208 | #[test] 209 | fn it_should_error_on_high_surrogate_followed_by_non_low_surrogate() { 210 | let src = r#""\uD834\u0041""#; 211 | let err = parse_to_value(src, &Default::default()).err().unwrap(); 212 | assert!(err.to_string().contains("not followed by low surrogate")); 213 | } 214 | 215 | #[test] 216 | fn it_should_error_on_unpaired_low_surrogate() { 217 | // This test verifies existing behavior is maintained 218 | let src = r#""\uDC00""#; 219 | let err = parse_to_value(src, &Default::default()).err().unwrap(); 220 | assert!(err.to_string().contains("unpaired low surrogate")); 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /tests/specs/comments/inline-comments.txt: -------------------------------------------------------------------------------- 1 | { 2 | "value": { 3 | "type": "object", 4 | "range": { 5 | "start": 5, 6 | "end": 102, 7 | }, 8 | "properties": [ 9 | { 10 | "type": "objectProp", 11 | "range": { 12 | "start": 21, 13 | "end": 37, 14 | }, 15 | "name": { 16 | "type": "string", 17 | "range": { 18 | "start": 21, 19 | "end": 24, 20 | }, 21 | "value": "a" 22 | }, 23 | "value": { 24 | "type": "number", 25 | "range": { 26 | "start": 36, 27 | "end": 37, 28 | }, 29 | "value": "5" 30 | } 31 | }, 32 | { 33 | "type": "objectProp", 34 | "range": { 35 | "start": 52, 36 | "end": 69, 37 | }, 38 | "name": { 39 | "type": "string", 40 | "range": { 41 | "start": 52, 42 | "end": 55, 43 | }, 44 | "value": "b" 45 | }, 46 | "value": { 47 | "type": "array", 48 | "range": { 49 | "start": 62, 50 | "end": 69, 51 | }, 52 | "elements": [ 53 | ] 54 | } 55 | }, 56 | { 57 | "type": "objectProp", 58 | "range": { 59 | "start": 85, 60 | "end": 94, 61 | }, 62 | "name": { 63 | "type": "string", 64 | "range": { 65 | "start": 85, 66 | "end": 88, 67 | }, 68 | "value": "c" 69 | }, 70 | "value": { 71 | "type": "null", 72 | "range": { 73 | "start": 90, 74 | "end": 94, 75 | } 76 | } 77 | } 78 | ] 79 | }, 80 | "comments": [ 81 | { 82 | "pos": 0, 83 | "comments": [ 84 | { 85 | "type": "block", 86 | "range": { 87 | "start": 0, 88 | "end": 5, 89 | }, 90 | "value": "1" 91 | } 92 | ] 93 | }, 94 | { 95 | "pos": 5, 96 | "comments": [ 97 | { 98 | "type": "block", 99 | "range": { 100 | "start": 0, 101 | "end": 5, 102 | }, 103 | "value": "1" 104 | } 105 | ] 106 | }, 107 | { 108 | "pos": 6, 109 | "comments": [ 110 | { 111 | "type": "line", 112 | "range": { 113 | "start": 7, 114 | "end": 11, 115 | }, 116 | "value": " 2" 117 | }, 118 | { 119 | "type": "block", 120 | "range": { 121 | "start": 16, 122 | "end": 21, 123 | }, 124 | "value": "3" 125 | } 126 | ] 127 | }, 128 | { 129 | "pos": 21, 130 | "comments": [ 131 | { 132 | "type": "line", 133 | "range": { 134 | "start": 7, 135 | "end": 11, 136 | }, 137 | "value": " 2" 138 | }, 139 | { 140 | "type": "block", 141 | "range": { 142 | "start": 16, 143 | "end": 21, 144 | }, 145 | "value": "3" 146 | } 147 | ] 148 | }, 149 | { 150 | "pos": 24, 151 | "comments": [ 152 | { 153 | "type": "block", 154 | "range": { 155 | "start": 24, 156 | "end": 29, 157 | }, 158 | "value": "4" 159 | } 160 | ] 161 | }, 162 | { 163 | "pos": 29, 164 | "comments": [ 165 | { 166 | "type": "block", 167 | "range": { 168 | "start": 24, 169 | "end": 29, 170 | }, 171 | "value": "4" 172 | } 173 | ] 174 | }, 175 | { 176 | "pos": 30, 177 | "comments": [ 178 | { 179 | "type": "block", 180 | "range": { 181 | "start": 31, 182 | "end": 36, 183 | }, 184 | "value": "5" 185 | } 186 | ] 187 | }, 188 | { 189 | "pos": 36, 190 | "comments": [ 191 | { 192 | "type": "block", 193 | "range": { 194 | "start": 31, 195 | "end": 36, 196 | }, 197 | "value": "5" 198 | } 199 | ] 200 | }, 201 | { 202 | "pos": 37, 203 | "comments": [ 204 | { 205 | "type": "block", 206 | "range": { 207 | "start": 37, 208 | "end": 42, 209 | }, 210 | "value": "6" 211 | } 212 | ] 213 | }, 214 | { 215 | "pos": 42, 216 | "comments": [ 217 | { 218 | "type": "block", 219 | "range": { 220 | "start": 37, 221 | "end": 42, 222 | }, 223 | "value": "6" 224 | } 225 | ] 226 | }, 227 | { 228 | "pos": 43, 229 | "comments": [ 230 | { 231 | "type": "line", 232 | "range": { 233 | "start": 44, 234 | "end": 47, 235 | }, 236 | "value": "7" 237 | } 238 | ] 239 | }, 240 | { 241 | "pos": 52, 242 | "comments": [ 243 | { 244 | "type": "line", 245 | "range": { 246 | "start": 44, 247 | "end": 47, 248 | }, 249 | "value": "7" 250 | } 251 | ] 252 | }, 253 | { 254 | "pos": 56, 255 | "comments": [ 256 | { 257 | "type": "block", 258 | "range": { 259 | "start": 57, 260 | "end": 62, 261 | }, 262 | "value": "8" 263 | } 264 | ] 265 | }, 266 | { 267 | "pos": 62, 268 | "comments": [ 269 | { 270 | "type": "block", 271 | "range": { 272 | "start": 57, 273 | "end": 62, 274 | }, 275 | "value": "8" 276 | } 277 | ] 278 | }, 279 | { 280 | "pos": 63, 281 | "comments": [ 282 | { 283 | "type": "block", 284 | "range": { 285 | "start": 63, 286 | "end": 68, 287 | }, 288 | "value": "9" 289 | } 290 | ] 291 | }, 292 | { 293 | "pos": 68, 294 | "comments": [ 295 | { 296 | "type": "block", 297 | "range": { 298 | "start": 63, 299 | "end": 68, 300 | }, 301 | "value": "9" 302 | } 303 | ] 304 | }, 305 | { 306 | "pos": 69, 307 | "comments": [ 308 | { 309 | "type": "block", 310 | "range": { 311 | "start": 69, 312 | "end": 75, 313 | }, 314 | "value": "10" 315 | } 316 | ] 317 | }, 318 | { 319 | "pos": 75, 320 | "comments": [ 321 | { 322 | "type": "block", 323 | "range": { 324 | "start": 69, 325 | "end": 75, 326 | }, 327 | "value": "10" 328 | } 329 | ] 330 | }, 331 | { 332 | "pos": 76, 333 | "comments": [ 334 | { 335 | "type": "line", 336 | "range": { 337 | "start": 76, 338 | "end": 80, 339 | }, 340 | "value": "11" 341 | } 342 | ] 343 | }, 344 | { 345 | "pos": 85, 346 | "comments": [ 347 | { 348 | "type": "line", 349 | "range": { 350 | "start": 76, 351 | "end": 80, 352 | }, 353 | "value": "11" 354 | } 355 | ] 356 | }, 357 | { 358 | "pos": 94, 359 | "comments": [ 360 | { 361 | "type": "block", 362 | "range": { 363 | "start": 95, 364 | "end": 101, 365 | }, 366 | "value": "13" 367 | } 368 | ] 369 | }, 370 | { 371 | "pos": 101, 372 | "comments": [ 373 | { 374 | "type": "block", 375 | "range": { 376 | "start": 95, 377 | "end": 101, 378 | }, 379 | "value": "13" 380 | } 381 | ] 382 | }, 383 | { 384 | "pos": 102, 385 | "comments": [ 386 | { 387 | "type": "line", 388 | "range": { 389 | "start": 103, 390 | "end": 108, 391 | }, 392 | "value": " 14" 393 | } 394 | ] 395 | }, 396 | { 397 | "pos": 108, 398 | "comments": [ 399 | { 400 | "type": "line", 401 | "range": { 402 | "start": 103, 403 | "end": 108, 404 | }, 405 | "value": " 14" 406 | } 407 | ] 408 | } 409 | ] 410 | } 411 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | use core::slice::Iter; 2 | use std::borrow::Cow; 3 | 4 | /// A JSON value. 5 | #[derive(Clone, PartialEq, Debug)] 6 | pub enum JsonValue<'a> { 7 | String(Cow<'a, str>), 8 | Number(&'a str), 9 | Boolean(bool), 10 | Object(JsonObject<'a>), 11 | Array(JsonArray<'a>), 12 | Null, 13 | } 14 | 15 | #[cfg(not(feature = "preserve_order"))] 16 | pub type Map = std::collections::HashMap; 17 | #[cfg(feature = "preserve_order")] 18 | pub type Map = indexmap::IndexMap; 19 | 20 | /// A JSON object. 21 | #[derive(Clone, PartialEq, Debug)] 22 | pub struct JsonObject<'a>(Map>); 23 | 24 | impl<'a> IntoIterator for JsonObject<'a> { 25 | type Item = (String, JsonValue<'a>); 26 | #[cfg(not(feature = "preserve_order"))] 27 | type IntoIter = std::collections::hash_map::IntoIter>; 28 | #[cfg(feature = "preserve_order")] 29 | type IntoIter = indexmap::map::IntoIter>; 30 | 31 | fn into_iter(self) -> Self::IntoIter { 32 | self.0.into_iter() 33 | } 34 | } 35 | 36 | impl<'a> From>> for JsonObject<'a> { 37 | fn from(properties: Map) -> JsonObject { 38 | JsonObject::new(properties) 39 | } 40 | } 41 | 42 | #[cfg(not(feature = "preserve_order"))] 43 | #[inline(always)] 44 | fn remove_entry<'a>(map: &mut Map>, key: &str) -> Option<(String, JsonValue<'a>)> { 45 | map.remove_entry(key) 46 | } 47 | 48 | #[cfg(feature = "preserve_order")] 49 | #[inline(always)] 50 | fn remove_entry<'a>(map: &mut Map>, key: &str) -> Option<(String, JsonValue<'a>)> { 51 | map.shift_remove_entry(key) 52 | } 53 | 54 | macro_rules! generate_take { 55 | ($self:ident, $name:ident, $value_type:ident) => { 56 | match remove_entry(&mut $self.0, $name) { 57 | Some((_, JsonValue::$value_type(value))) => Some(value), 58 | Some((key, value)) => { 59 | // add it back 60 | $self.0.insert(key, value); 61 | None 62 | } 63 | _ => None, 64 | } 65 | }; 66 | } 67 | 68 | macro_rules! generate_get { 69 | ($self:ident, $name:ident, $value_type:ident) => { 70 | match $self.0.get($name) { 71 | Some(JsonValue::$value_type(value)) => Some(value), 72 | _ => None, 73 | } 74 | }; 75 | } 76 | 77 | impl<'a> JsonObject<'a> { 78 | /// Creates a new JsonObject. 79 | pub fn new(inner: Map>) -> JsonObject<'a> { 80 | JsonObject(inner) 81 | } 82 | 83 | /// Creates a new JsonObject with the specified capacity. 84 | pub fn with_capacity(capacity: usize) -> JsonObject<'a> { 85 | JsonObject(Map::with_capacity(capacity)) 86 | } 87 | 88 | /// Drops the object returning the inner map. 89 | pub fn take_inner(self) -> Map> { 90 | self.0 91 | } 92 | 93 | /// Gets the number of properties. 94 | pub fn len(&self) -> usize { 95 | self.0.len() 96 | } 97 | 98 | /// Gets if there are no properties. 99 | pub fn is_empty(&self) -> bool { 100 | self.0.is_empty() 101 | } 102 | 103 | /// Gets a value in the object by its name. 104 | pub fn get(&self, name: &str) -> Option<&JsonValue<'a>> { 105 | self.0.get(name) 106 | } 107 | 108 | /// Gets a string property value from the object by name. 109 | /// Returns `None` when not a string or it doesn't exist. 110 | pub fn get_string(&self, name: &str) -> Option<&Cow<'a, str>> { 111 | generate_get!(self, name, String) 112 | } 113 | 114 | /// Gets a number property value from the object by name. 115 | /// Returns `None` when not a number or it doesn't exist. 116 | pub fn get_number(&self, name: &str) -> Option<&'a str> { 117 | generate_get!(self, name, Number) 118 | } 119 | 120 | /// Gets a boolean property value from the object by name. 121 | /// Returns `None` when not a boolean or it doesn't exist. 122 | pub fn get_boolean(&self, name: &str) -> Option { 123 | let result = generate_get!(self, name, Boolean); 124 | result.cloned() 125 | } 126 | 127 | /// Gets an object property value from the object by name. 128 | /// Returns `None` when not an object or it doesn't exist. 129 | pub fn get_object(&self, name: &str) -> Option<&JsonObject<'a>> { 130 | generate_get!(self, name, Object) 131 | } 132 | 133 | /// Gets an array property value from the object by name. 134 | /// Returns `None` when not an array or it doesn't exist. 135 | pub fn get_array(&self, name: &str) -> Option<&JsonArray<'a>> { 136 | generate_get!(self, name, Array) 137 | } 138 | 139 | /// Takes a value from the object by name. 140 | /// Returns `None` when it doesn't exist. 141 | pub fn take(&mut self, name: &str) -> Option> { 142 | remove_entry(&mut self.0, name).map(|(_, value)| value) 143 | } 144 | 145 | /// Takes a string property value from the object by name. 146 | /// Returns `None` when not a string or it doesn't exist. 147 | pub fn take_string(&mut self, name: &str) -> Option> { 148 | generate_take!(self, name, String) 149 | } 150 | 151 | /// Takes a number property value from the object by name. 152 | /// Returns `None` when not a number or it doesn't exist. 153 | pub fn take_number(&mut self, name: &str) -> Option<&'a str> { 154 | generate_take!(self, name, Number) 155 | } 156 | 157 | /// Takes a boolean property value from the object by name. 158 | /// Returns `None` when not a boolean or it doesn't exist. 159 | pub fn take_boolean(&mut self, name: &str) -> Option { 160 | generate_take!(self, name, Boolean) 161 | } 162 | 163 | /// Takes an object property value from the object by name. 164 | /// Returns `None` when not an object or it doesn't exist. 165 | pub fn take_object(&mut self, name: &str) -> Option> { 166 | generate_take!(self, name, Object) 167 | } 168 | 169 | /// Takes an array property value from the object by name. 170 | /// Returns `None` when not an array or it doesn't exist. 171 | pub fn take_array(&mut self, name: &str) -> Option> { 172 | generate_take!(self, name, Array) 173 | } 174 | } 175 | 176 | /// A JSON array. 177 | #[derive(Clone, PartialEq, Debug)] 178 | pub struct JsonArray<'a>(Vec>); 179 | 180 | impl<'a> IntoIterator for JsonArray<'a> { 181 | type Item = JsonValue<'a>; 182 | type IntoIter = std::vec::IntoIter; 183 | 184 | fn into_iter(self) -> Self::IntoIter { 185 | self.0.into_iter() 186 | } 187 | } 188 | 189 | impl<'a> From>> for JsonArray<'a> { 190 | fn from(elements: Vec>) -> JsonArray<'a> { 191 | JsonArray::new(elements) 192 | } 193 | } 194 | 195 | impl<'a> JsonArray<'a> { 196 | /// Creates a new JsonArray. 197 | pub fn new(inner: Vec>) -> JsonArray<'a> { 198 | JsonArray(inner) 199 | } 200 | 201 | /// Drops the object returning the inner vector. 202 | pub fn take_inner(self) -> Vec> { 203 | self.0 204 | } 205 | 206 | /// Iterates over the array elements. 207 | pub fn iter(&self) -> Iter<'_, JsonValue<'a>> { 208 | self.0.iter() 209 | } 210 | 211 | /// Gets a value from the array by index. 212 | pub fn get(&self, index: usize) -> Option<&JsonValue<'a>> { 213 | self.0.get(index) 214 | } 215 | 216 | /// Gets the number of elements. 217 | pub fn len(&self) -> usize { 218 | self.0.len() 219 | } 220 | 221 | /// Gets if the array is empty. 222 | pub fn is_empty(&self) -> bool { 223 | self.0.is_empty() 224 | } 225 | } 226 | 227 | #[cfg(test)] 228 | mod test { 229 | use super::*; 230 | 231 | #[test] 232 | fn it_should_take() { 233 | let mut inner = Map::new(); 234 | inner.insert(String::from("prop"), JsonValue::String(Cow::Borrowed("asdf"))); 235 | inner.insert(String::from("other"), JsonValue::String(Cow::Borrowed("text"))); 236 | let mut obj = JsonObject::new(inner); 237 | 238 | assert_eq!(obj.len(), 2); 239 | assert_eq!(obj.take_string("asdf"), None); 240 | assert_eq!(obj.len(), 2); 241 | assert_eq!(obj.take_number("prop"), None); 242 | assert_eq!(obj.len(), 2); 243 | assert_eq!(obj.take_string("prop"), Some(Cow::Borrowed("asdf"))); 244 | assert_eq!(obj.len(), 1); 245 | assert_eq!(obj.take("something"), None); 246 | assert_eq!(obj.len(), 1); 247 | assert_eq!(obj.take("other"), Some(JsonValue::String(Cow::Borrowed("text")))); 248 | assert_eq!(obj.len(), 0); 249 | } 250 | 251 | #[test] 252 | fn it_should_get() { 253 | let mut inner = Map::new(); 254 | inner.insert(String::from("prop"), JsonValue::String(Cow::Borrowed("asdf"))); 255 | let obj = JsonObject::new(inner); 256 | 257 | assert_eq!(obj.len(), 1); 258 | assert_eq!(obj.get_string("asdf"), None); 259 | assert_eq!(obj.get_string("prop"), Some(&Cow::Borrowed("asdf"))); 260 | assert_eq!(obj.get("prop"), Some(&JsonValue::String(Cow::Borrowed("asdf")))); 261 | assert_eq!(obj.get("asdf"), None); 262 | assert_eq!(obj.len(), 1); 263 | } 264 | } 265 | -------------------------------------------------------------------------------- /tests/test.rs: -------------------------------------------------------------------------------- 1 | extern crate jsonc_parser; 2 | 3 | use jsonc_parser::ast::*; 4 | use jsonc_parser::common::*; 5 | use jsonc_parser::*; 6 | use pretty_assertions::assert_eq; 7 | use std::fs::{self}; 8 | use std::path::Path; 9 | use std::path::PathBuf; 10 | use std::rc::Rc; 11 | 12 | #[test] 13 | fn test_specs() { 14 | for json_path in get_json_file_paths_in_dir(Path::new("./tests/specs")) { 15 | let text_file_path = json_path.with_extension("txt"); 16 | let json_file_text = fs::read_to_string(&json_path).unwrap().replace("\r\n", "\n"); 17 | let result = parse_to_ast( 18 | &json_file_text, 19 | &CollectOptions { 20 | comments: CommentCollectionStrategy::Separate, 21 | tokens: true, 22 | }, 23 | &Default::default(), 24 | ) 25 | .expect("Expected no error."); 26 | let result_text = parse_result_to_test_str(&result); 27 | let expected_text = fs::read_to_string(&text_file_path).unwrap().replace("\r\n", "\n"); 28 | // fs::write(&text_file_path, result_text.clone()).unwrap(); 29 | assert_eq!(result_text.trim(), expected_text.trim()); 30 | } 31 | } 32 | 33 | #[cfg(feature = "cst")] 34 | #[test] 35 | fn test_cst() { 36 | for json_path in get_json_file_paths_in_dir(Path::new("./tests/specs")) { 37 | let json_file_text = fs::read_to_string(&json_path).unwrap().replace("\r\n", "\n"); 38 | 39 | eprintln!("Parsing: {:?}", json_path); 40 | let value = jsonc_parser::cst::CstRootNode::parse(&json_file_text, &ParseOptions::default()).unwrap(); 41 | let cst_string = value.to_string(); 42 | assert_eq!(cst_string, json_file_text); 43 | } 44 | } 45 | 46 | fn get_json_file_paths_in_dir(path: &Path) -> Vec { 47 | return read_dir_recursively(path); 48 | 49 | fn read_dir_recursively(dir_path: &Path) -> Vec { 50 | let mut result = Vec::new(); 51 | 52 | for entry in dir_path.read_dir().expect("read dir failed") { 53 | if let Ok(entry) = entry { 54 | let entry_path = entry.path(); 55 | if entry_path.is_file() { 56 | if let Some(ext) = entry_path.extension() { 57 | if ext == "json" { 58 | result.push(entry_path); 59 | } 60 | } 61 | } else { 62 | result.extend(read_dir_recursively(&entry_path)); 63 | } 64 | } 65 | } 66 | 67 | result 68 | } 69 | } 70 | 71 | // todo: move elsewhere and improve 72 | 73 | fn parse_result_to_test_str(parse_result: &ParseResult) -> String { 74 | let mut text = String::new(); 75 | text.push_str("{\n"); 76 | text.push_str(&format!( 77 | " \"value\": {},\n", 78 | match &parse_result.value { 79 | Some(value) => value_to_test_str(value).replace("\n", "\n "), 80 | None => String::from("null"), 81 | } 82 | )); 83 | text.push_str(" \"comments\": ["); 84 | let comments = parse_result.comments.as_ref().expect("Expected comments."); 85 | let collection_count = comments.len(); 86 | let mut comments = comments.iter().collect::>(); 87 | comments.sort_by(|a, b| a.0.cmp(b.0)); 88 | for (i, comment_collection) in comments.into_iter().enumerate() { 89 | text.push_str("\n "); 90 | text.push_str(&comments_to_test_str(comment_collection).replace("\n", "\n ")); 91 | if i + 1 < collection_count { 92 | text.push(','); 93 | } 94 | } 95 | text.push_str("\n ]\n"); 96 | text.push_str("}\n"); 97 | text 98 | } 99 | 100 | fn value_to_test_str(value: &Value) -> String { 101 | match value { 102 | Value::StringLit(lit) => string_lit_to_test_str(lit), 103 | Value::NumberLit(lit) => number_lit_to_test_str(lit), 104 | Value::BooleanLit(lit) => boolean_lit_to_test_str(lit), 105 | Value::Object(obj) => object_to_test_str(obj), 106 | Value::Array(arr) => array_to_test_str(arr), 107 | Value::NullKeyword(keyword) => null_keyword_to_test_str(keyword), 108 | } 109 | } 110 | 111 | fn range_to_test_str(range: Range) -> String { 112 | let mut text = String::new(); 113 | text.push_str("\"range\": {\n"); 114 | text.push_str(&format!(" \"start\": {},\n", range.start)); 115 | text.push_str(&format!(" \"end\": {},\n", range.end)); 116 | text.push('}'); 117 | text 118 | } 119 | 120 | fn string_lit_to_test_str(lit: &StringLit) -> String { 121 | lit_to_test_str("string", &lit.value, lit.range) 122 | } 123 | 124 | fn word_lit_to_test_str(lit: &WordLit) -> String { 125 | lit_to_test_str("word", lit.value, lit.range) 126 | } 127 | 128 | fn number_lit_to_test_str(lit: &NumberLit) -> String { 129 | lit_to_test_str("number", lit.value, lit.range) 130 | } 131 | 132 | fn boolean_lit_to_test_str(lit: &BooleanLit) -> String { 133 | lit_to_test_str("boolean", &lit.value.to_string(), lit.range) 134 | } 135 | 136 | fn lit_to_test_str(lit_type: &str, value: &str, range: Range) -> String { 137 | let mut text = String::new(); 138 | text.push_str("{\n"); 139 | text.push_str(&format!(" \"type\": \"{}\",\n", lit_type)); 140 | text.push_str(&format!(" {},\n", range_to_test_str(range).replace("\n", "\n "))); 141 | text.push_str(&format!(" \"value\": \"{}\"\n", escape_json_str(value))); 142 | text.push('}'); 143 | text 144 | } 145 | 146 | fn object_to_test_str(obj: &Object) -> String { 147 | let mut text = String::new(); 148 | text.push_str("{\n"); 149 | text.push_str(" \"type\": \"object\",\n"); 150 | text.push_str(&format!(" {},\n", range_to_test_str(obj.range).replace("\n", "\n "))); 151 | text.push_str(" \"properties\": ["); 152 | let prop_count = obj.properties.len(); 153 | for (i, prop) in obj.properties.iter().enumerate() { 154 | text.push_str("\n "); 155 | text.push_str(&object_prop_to_test_str(prop).replace("\n", "\n ")); 156 | if i + 1 < prop_count { 157 | text.push(','); 158 | } 159 | } 160 | text.push_str("\n ]\n"); 161 | text.push('}'); 162 | text 163 | } 164 | 165 | fn object_prop_to_test_str(obj_prop: &ObjectProp) -> String { 166 | let mut text = String::new(); 167 | text.push_str("{\n"); 168 | text.push_str(" \"type\": \"objectProp\",\n"); 169 | text.push_str(&format!( 170 | " {},\n", 171 | range_to_test_str(obj_prop.range).replace("\n", "\n ") 172 | )); 173 | text.push_str(&format!( 174 | " \"name\": {},\n", 175 | object_prop_name_to_test_str(&obj_prop.name).replace("\n", "\n ") 176 | )); 177 | text.push_str(&format!( 178 | " \"value\": {}\n", 179 | value_to_test_str(&obj_prop.value).replace("\n", "\n ") 180 | )); 181 | text.push('}'); 182 | text 183 | } 184 | 185 | fn object_prop_name_to_test_str(obj_prop_name: &ObjectPropName) -> String { 186 | match obj_prop_name { 187 | ObjectPropName::String(lit) => string_lit_to_test_str(lit), 188 | ObjectPropName::Word(word) => word_lit_to_test_str(word), 189 | } 190 | } 191 | 192 | fn array_to_test_str(arr: &Array) -> String { 193 | let mut text = String::new(); 194 | text.push_str("{\n"); 195 | text.push_str(" \"type\": \"array\",\n"); 196 | text.push_str(&format!(" {},\n", range_to_test_str(arr.range).replace("\n", "\n "))); 197 | text.push_str(" \"elements\": ["); 198 | let elements_count = arr.elements.len(); 199 | for (i, element) in arr.elements.iter().enumerate() { 200 | text.push_str("\n "); 201 | text.push_str(&value_to_test_str(element).replace("\n", "\n ")); 202 | if i + 1 < elements_count { 203 | text.push(','); 204 | } 205 | } 206 | text.push_str("\n ]\n"); 207 | text.push('}'); 208 | text 209 | } 210 | 211 | fn null_keyword_to_test_str(null_keyword: &NullKeyword) -> String { 212 | let mut text = String::new(); 213 | text.push_str("{\n"); 214 | text.push_str(" \"type\": \"null\",\n"); 215 | text.push_str(&format!( 216 | " {}\n", 217 | range_to_test_str(null_keyword.range).replace("\n", "\n ") 218 | )); 219 | text.push('}'); 220 | text 221 | } 222 | 223 | fn comments_to_test_str(comments: (&usize, &Rc>)) -> String { 224 | let mut text = String::new(); 225 | text.push_str("{\n"); 226 | text.push_str(&format!(" \"pos\": {},\n", comments.0)); 227 | text.push_str(" \"comments\": ["); 228 | let comments_count = comments.1.len(); 229 | for (i, comment) in comments.1.iter().enumerate() { 230 | text.push_str("\n "); 231 | text.push_str(&comment_to_test_str(comment).replace("\n", "\n ")); 232 | if i + 1 < comments_count { 233 | text.push(','); 234 | } 235 | } 236 | text.push_str("\n ]\n"); 237 | text.push('}'); 238 | text 239 | } 240 | 241 | fn comment_to_test_str(comment: &Comment) -> String { 242 | match comment { 243 | Comment::Line(line) => comment_line_to_test_str(line), 244 | Comment::Block(block) => comment_block_to_test_str(block), 245 | } 246 | } 247 | 248 | fn comment_line_to_test_str(line: &CommentLine) -> String { 249 | lit_to_test_str("line", line.text, line.range) 250 | } 251 | 252 | fn comment_block_to_test_str(block: &CommentBlock) -> String { 253 | lit_to_test_str("block", block.text, block.range) 254 | } 255 | 256 | fn escape_json_str(text: &str) -> String { 257 | text 258 | .replace("\\", "\\\\") 259 | .replace("\x08", "\\b") 260 | .replace("\x0C", "\\f") 261 | .replace("\r", "\\r") 262 | .replace("\t", "\\t") 263 | .replace("\n", "\\n") 264 | } 265 | -------------------------------------------------------------------------------- /benches/data/package.txt: -------------------------------------------------------------------------------- 1 | { 2 | "name": "code-oss-dev", 3 | "version": "1.56.0", 4 | "distro": "bf595b1f6780f5ba3c8cc511b0820871466079d3", 5 | "author": { 6 | "name": "Microsoft Corporation" 7 | }, 8 | "license": "MIT", 9 | "main": "./out/main", 10 | "private": true, 11 | "scripts": { 12 | "test": "mocha", 13 | "test-browser": "node test/unit/browser/index.js", 14 | "preinstall": "node build/npm/preinstall.js", 15 | "postinstall": "node build/npm/postinstall.js", 16 | "compile": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile", 17 | "watch": "npm-run-all -lp watch-client watch-extensions watch-extension-media", 18 | "watchd": "deemon yarn watch", 19 | "watch-webd": "deemon yarn watch-web", 20 | "kill-watchd": "deemon --kill yarn watch", 21 | "kill-watch-webd": "deemon --kill yarn watch-web", 22 | "restart-watchd": "deemon --restart yarn watch", 23 | "restart-watch-webd": "deemon --restart yarn watch-web", 24 | "watch-client": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-client", 25 | "watch-clientd": "deemon yarn watch-client", 26 | "kill-watch-clientd": "deemon --kill yarn watch-client", 27 | "watch-extensions": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-extensions", 28 | "watch-extension-media": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-extension-media", 29 | "watch-extensionsd": "deemon yarn watch-extensions", 30 | "kill-watch-extensionsd": "deemon --kill yarn watch-extensions", 31 | "mocha": "mocha test/unit/node/all.js --delay", 32 | "precommit": "node build/hygiene.js", 33 | "gulp": "node --max_old_space_size=8192 ./node_modules/gulp/bin/gulp.js", 34 | "electron": "node build/lib/electron", 35 | "7z": "7z", 36 | "update-grammars": "node build/npm/update-all-grammars.js", 37 | "update-localization-extension": "node build/npm/update-localization-extension.js", 38 | "smoketest": "cd test/smoke && yarn compile && node test/index.js", 39 | "smoketest-no-compile": "cd test/smoke && node test/index.js", 40 | "download-builtin-extensions": "node build/lib/builtInExtensions.js", 41 | "download-builtin-extensions-cg": "node build/lib/builtInExtensionsCG.js", 42 | "monaco-compile-check": "tsc -p src/tsconfig.monaco.json --noEmit", 43 | "tsec-compile-check": "node node_modules/tsec/bin/tsec -p src/tsconfig.tsec.json", 44 | "valid-layers-check": "node build/lib/layersChecker.js", 45 | "strict-function-types-watch": "tsc --watch -p src/tsconfig.json --noEmit --strictFunctionTypes", 46 | "update-distro": "node build/npm/update-distro.js", 47 | "web": "node resources/web/code-web.js", 48 | "compile-web": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile-web", 49 | "watch-web": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js watch-web", 50 | "eslint": "node build/eslint", 51 | "electron-rebuild": "electron-rebuild --arch=arm64 --force --version=11.4.1", 52 | "playwright-install": "node build/azure-pipelines/common/installPlaywright.js", 53 | "compile-build": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile-build", 54 | "compile-extensions-build": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js compile-extensions-build", 55 | "minify-vscode": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js minify-vscode", 56 | "minify-vscode-reh": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js minify-vscode-reh", 57 | "minify-vscode-reh-web": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js minify-vscode-reh-web", 58 | "hygiene": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js hygiene", 59 | "core-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js core-ci", 60 | "extensions-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js extensions-ci" 61 | }, 62 | "dependencies": { 63 | "applicationinsights": "1.0.8", 64 | "chokidar": "3.5.1", 65 | "graceful-fs": "4.2.3", 66 | "http-proxy-agent": "^2.1.0", 67 | "https-proxy-agent": "^2.2.3", 68 | "iconv-lite-umd": "0.6.8", 69 | "jschardet": "2.3.0", 70 | "keytar": "7.2.0", 71 | "minimist": "^1.2.5", 72 | "native-is-elevated": "0.4.3", 73 | "native-keymap": "2.2.1", 74 | "native-watchdog": "1.3.0", 75 | "node-pty": "0.10.0-beta19", 76 | "nsfw": "2.1.2", 77 | "spdlog": "^0.11.1", 78 | "sudo-prompt": "9.2.1", 79 | "tas-client-umd": "0.1.4", 80 | "v8-inspect-profiler": "^0.0.20", 81 | "vscode-oniguruma": "1.3.1", 82 | "vscode-proxy-agent": "^0.8.2", 83 | "vscode-regexpp": "^3.1.0", 84 | "vscode-ripgrep": "^1.11.1", 85 | "vscode-sqlite3": "4.0.10", 86 | "vscode-textmate": "5.2.0", 87 | "xterm": "4.12.0-beta.15", 88 | "xterm-addon-search": "0.9.0-beta.1", 89 | "xterm-addon-unicode11": "0.3.0-beta.4", 90 | "xterm-addon-webgl": "0.11.0-beta.4", 91 | "yauzl": "^2.9.2", 92 | "yazl": "^2.4.3" 93 | }, 94 | "devDependencies": { 95 | "7zip": "0.0.6", 96 | "@types/applicationinsights": "0.20.0", 97 | "@types/chokidar": "2.1.3", 98 | "@types/cookie": "^0.3.3", 99 | "@types/copy-webpack-plugin": "^6.0.3", 100 | "@types/cssnano": "^4.0.0", 101 | "@types/debug": "4.1.5", 102 | "@types/graceful-fs": "4.1.2", 103 | "@types/gulp-postcss": "^8.0.0", 104 | "@types/http-proxy-agent": "^2.0.1", 105 | "@types/keytar": "^4.4.0", 106 | "@types/minimist": "^1.2.1", 107 | "@types/mocha": "^8.2.0", 108 | "@types/node": "^12.19.9", 109 | "@types/sinon": "^1.16.36", 110 | "@types/trusted-types": "^1.0.6", 111 | "@types/vscode-windows-registry": "^1.0.0", 112 | "@types/webpack": "^4.41.25", 113 | "@types/windows-foreground-love": "^0.3.0", 114 | "@types/windows-mutex": "^0.4.0", 115 | "@types/windows-process-tree": "^0.2.0", 116 | "@types/winreg": "^1.2.30", 117 | "@types/yauzl": "^2.9.1", 118 | "@types/yazl": "^2.4.2", 119 | "@typescript-eslint/eslint-plugin": "3.2.0", 120 | "@typescript-eslint/parser": "^3.3.0", 121 | "ansi-colors": "^3.2.3", 122 | "asar": "^3.0.3", 123 | "chromium-pickle-js": "^0.2.0", 124 | "copy-webpack-plugin": "^6.0.3", 125 | "cson-parser": "^1.3.3", 126 | "css-loader": "^3.2.0", 127 | "cssnano": "^4.1.10", 128 | "debounce": "^1.0.0", 129 | "deemon": "^1.4.0", 130 | "electron": "11.4.1", 131 | "electron-rebuild": "2.0.3", 132 | "eslint": "6.8.0", 133 | "eslint-plugin-jsdoc": "^19.1.0", 134 | "event-stream": "3.3.4", 135 | "fancy-log": "^1.3.3", 136 | "fast-plist": "0.1.2", 137 | "file-loader": "^4.2.0", 138 | "glob": "^5.0.13", 139 | "gulp": "^4.0.0", 140 | "gulp-atom-electron": "^1.30.1", 141 | "gulp-azure-storage": "^0.11.1", 142 | "gulp-bom": "^3.0.0", 143 | "gulp-buffer": "0.0.2", 144 | "gulp-concat": "^2.6.1", 145 | "gulp-eslint": "^5.0.0", 146 | "gulp-filter": "^5.1.0", 147 | "gulp-flatmap": "^1.0.2", 148 | "gulp-gunzip": "^1.0.0", 149 | "gulp-gzip": "^1.4.2", 150 | "gulp-json-editor": "^2.5.0", 151 | "gulp-plumber": "^1.2.0", 152 | "gulp-postcss": "^9.0.0", 153 | "gulp-remote-retry-src": "^0.6.0", 154 | "gulp-rename": "^1.2.0", 155 | "gulp-replace": "^0.5.4", 156 | "gulp-shell": "^0.6.5", 157 | "gulp-sourcemaps": "^3.0.0", 158 | "gulp-tsb": "4.0.6", 159 | "gulp-untar": "^0.0.7", 160 | "gulp-vinyl-zip": "^2.1.2", 161 | "husky": "^0.13.1", 162 | "innosetup": "6.0.5", 163 | "is": "^3.1.0", 164 | "istanbul-lib-coverage": "^3.0.0", 165 | "istanbul-lib-instrument": "^4.0.0", 166 | "istanbul-lib-report": "^3.0.0", 167 | "istanbul-lib-source-maps": "^4.0.0", 168 | "istanbul-reports": "^3.0.0", 169 | "jsdom-no-contextify": "^3.1.0", 170 | "lazy.js": "^0.4.2", 171 | "merge-options": "^1.0.1", 172 | "mime": "^1.4.1", 173 | "minimatch": "^3.0.4", 174 | "minimist": "^1.2.5", 175 | "mkdirp": "^1.0.4", 176 | "mocha": "^8.2.1", 177 | "mocha-junit-reporter": "^2.0.0", 178 | "mocha-multi-reporters": "^1.5.1", 179 | "npm-run-all": "^4.1.5", 180 | "opn": "^6.0.0", 181 | "optimist": "0.3.5", 182 | "p-all": "^1.0.0", 183 | "playwright": "1.8.0", 184 | "pump": "^1.0.1", 185 | "queue": "3.0.6", 186 | "rcedit": "^1.1.0", 187 | "request": "^2.85.0", 188 | "rimraf": "^2.2.8", 189 | "sinon": "^1.17.2", 190 | "source-map": "0.6.1", 191 | "source-map-support": "^0.3.2", 192 | "style-loader": "^1.0.0", 193 | "ts-loader": "^6.2.1", 194 | "tsec": "0.1.4", 195 | "typescript": "^4.3.0-dev.20210330", 196 | "typescript-formatter": "7.1.0", 197 | "underscore": "^1.8.2", 198 | "vinyl": "^2.0.0", 199 | "vinyl-fs": "^3.0.0", 200 | "vscode-debugprotocol": "1.46.0", 201 | "vscode-nls-dev": "^3.3.1", 202 | "vscode-telemetry-extractor": "^1.7.0", 203 | "webpack": "^4.43.0", 204 | "webpack-cli": "^3.3.12", 205 | "webpack-stream": "^5.2.1", 206 | "xml2js": "^0.4.17", 207 | "yaserver": "^0.2.0" 208 | }, 209 | "repository": { 210 | "type": "git", 211 | "url": "https://github.com/microsoft/vscode.git" 212 | }, 213 | "bugs": { 214 | "url": "https://github.com/microsoft/vscode/issues" 215 | }, 216 | "optionalDependencies": { 217 | "vscode-windows-ca-certs": "^0.3.0", 218 | "vscode-windows-registry": "1.0.3", 219 | "windows-foreground-love": "0.2.0", 220 | "windows-mutex": "0.3.0", 221 | "windows-process-tree": "0.2.4" 222 | }, 223 | "resolutions": { 224 | "elliptic": "^6.5.3", 225 | "nwmatcher": "^1.4.4" 226 | } 227 | } -------------------------------------------------------------------------------- /src/string.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | pub struct ParseStringError { 4 | pub byte_index: usize, 5 | pub kind: ParseStringErrorKind, 6 | } 7 | 8 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 9 | pub enum ParseStringErrorKind { 10 | InvalidEscapeInSingleQuoteString, 11 | InvalidEscapeInDoubleQuoteString, 12 | ExpectedFourHexDigits, 13 | InvalidUnicodeEscapeSequence(String), 14 | InvalidEscape, 15 | UnterminatedStringLiteral, 16 | } 17 | 18 | impl std::error::Error for ParseStringErrorKind {} 19 | 20 | impl std::fmt::Display for ParseStringErrorKind { 21 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 22 | match self { 23 | ParseStringErrorKind::InvalidEscapeInSingleQuoteString => { 24 | write!(f, "Invalid escape in single quote string") 25 | } 26 | ParseStringErrorKind::InvalidEscapeInDoubleQuoteString => { 27 | write!(f, "Invalid escape in double quote string") 28 | } 29 | ParseStringErrorKind::ExpectedFourHexDigits => { 30 | write!(f, "Expected four hex digits") 31 | } 32 | ParseStringErrorKind::InvalidUnicodeEscapeSequence(value) => { 33 | write!( 34 | f, 35 | "Invalid unicode escape sequence. '{}' is not a valid UTF8 character", 36 | value 37 | ) 38 | } 39 | ParseStringErrorKind::InvalidEscape => { 40 | write!(f, "Invalid escape") 41 | } 42 | ParseStringErrorKind::UnterminatedStringLiteral => { 43 | write!(f, "Unterminated string literal") 44 | } 45 | } 46 | } 47 | } 48 | 49 | pub trait CharProvider<'a> { 50 | fn current_char(&mut self) -> Option; 51 | fn byte_index(&self) -> usize; 52 | fn move_next_char(&mut self) -> Option; 53 | fn text(&self) -> &'a str; 54 | } 55 | 56 | #[cfg(feature = "cst")] 57 | pub fn parse_string(text: &str) -> Result, ParseStringError> { 58 | struct StringCharProvider<'a> { 59 | text: &'a str, 60 | byte_index: usize, 61 | current_char: Option, 62 | chars: std::str::Chars<'a>, 63 | } 64 | 65 | impl<'a> CharProvider<'a> for StringCharProvider<'a> { 66 | fn current_char(&mut self) -> Option { 67 | self.current_char 68 | } 69 | 70 | fn byte_index(&self) -> usize { 71 | self.byte_index 72 | } 73 | 74 | fn move_next_char(&mut self) -> Option { 75 | if let Some(current_char) = self.current_char { 76 | self.byte_index += current_char.len_utf8(); 77 | } 78 | self.current_char = self.chars.next(); 79 | self.current_char 80 | } 81 | 82 | fn text(&self) -> &'a str { 83 | self.text 84 | } 85 | } 86 | 87 | let mut chars = text.chars(); 88 | let mut provider = StringCharProvider { 89 | text, 90 | byte_index: 0, 91 | current_char: chars.next(), 92 | chars, 93 | }; 94 | 95 | parse_string_with_char_provider(&mut provider) 96 | } 97 | 98 | pub fn parse_string_with_char_provider<'a, T: CharProvider<'a>>( 99 | chars: &mut T, 100 | ) -> Result, ParseStringError> { 101 | debug_assert!( 102 | chars.current_char() == Some('\'') || chars.current_char() == Some('"'), 103 | "Expected \", was {:?}", 104 | chars.current_char() 105 | ); 106 | let is_double_quote = chars.current_char() == Some('"'); 107 | let mut last_start_byte_index = chars.byte_index() + 1; 108 | let mut text: Option = None; 109 | let mut last_was_backslash = false; 110 | let mut found_end_string = false; 111 | let token_start = chars.byte_index(); 112 | 113 | while let Some(current_char) = chars.move_next_char() { 114 | if last_was_backslash { 115 | let escape_start = chars.byte_index() - 1; // -1 for backslash 116 | match current_char { 117 | '"' | '\'' | '\\' | '/' | 'b' | 'f' | 'u' | 'r' | 'n' | 't' => { 118 | if current_char == '"' { 119 | if !is_double_quote { 120 | return Err(ParseStringError { 121 | byte_index: escape_start, 122 | kind: ParseStringErrorKind::InvalidEscapeInSingleQuoteString, 123 | }); 124 | } 125 | } else if current_char == '\'' && is_double_quote { 126 | return Err(ParseStringError { 127 | byte_index: escape_start, 128 | kind: ParseStringErrorKind::InvalidEscapeInDoubleQuoteString, 129 | }); 130 | } 131 | 132 | let previous_text = &chars.text()[last_start_byte_index..escape_start]; 133 | if text.is_none() { 134 | text = Some(String::new()); 135 | } 136 | let text = text.as_mut().unwrap(); 137 | text.push_str(previous_text); 138 | if current_char == 'u' { 139 | let hex_char = parse_hex_char(chars).map_err(|kind| ParseStringError { 140 | byte_index: escape_start, 141 | kind, 142 | })?; 143 | text.push(hex_char); 144 | last_start_byte_index = chars.byte_index() + chars.current_char().map(|c| c.len_utf8()).unwrap_or(0); 145 | } else { 146 | text.push(match current_char { 147 | 'b' => '\u{08}', 148 | 'f' => '\u{0C}', 149 | 't' => '\t', 150 | 'r' => '\r', 151 | 'n' => '\n', 152 | _ => current_char, 153 | }); 154 | last_start_byte_index = chars.byte_index() + current_char.len_utf8(); 155 | } 156 | } 157 | _ => { 158 | return Err(ParseStringError { 159 | byte_index: escape_start, 160 | kind: ParseStringErrorKind::InvalidEscape, 161 | }); 162 | } 163 | } 164 | last_was_backslash = false; 165 | } else if is_double_quote && current_char == '"' || !is_double_quote && current_char == '\'' { 166 | found_end_string = true; 167 | break; 168 | } else { 169 | last_was_backslash = current_char == '\\'; 170 | } 171 | } 172 | 173 | if found_end_string { 174 | chars.move_next_char(); 175 | let final_segment = &chars.text()[last_start_byte_index..chars.byte_index() - 1]; 176 | Ok(match text { 177 | Some(mut text) => { 178 | text.push_str(final_segment); 179 | Cow::Owned(text) 180 | } 181 | None => Cow::Borrowed(final_segment), 182 | }) 183 | } else { 184 | Err(ParseStringError { 185 | byte_index: token_start, 186 | kind: ParseStringErrorKind::UnterminatedStringLiteral, 187 | }) 188 | } 189 | } 190 | 191 | fn parse_hex_char<'a, T: CharProvider<'a>>(chars: &mut T) -> Result { 192 | let mut hex_text = String::new(); 193 | // expect four hex values 194 | for _ in 0..4 { 195 | let current_char = chars.move_next_char(); 196 | if !is_hex(current_char) { 197 | return Err(ParseStringErrorKind::ExpectedFourHexDigits); 198 | } 199 | if let Some(current_char) = current_char { 200 | hex_text.push(current_char); 201 | } 202 | } 203 | 204 | let hex_value = match u32::from_str_radix(&hex_text, 16) { 205 | Ok(v) => v, 206 | Err(_) => { 207 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(hex_text)); 208 | } 209 | }; 210 | 211 | // Check if this is a high surrogate (0xD800-0xDBFF) 212 | let hex_char = if (0xD800..=0xDBFF).contains(&hex_value) { 213 | // High surrogate - must be followed by low surrogate 214 | // Peek ahead for \uXXXX pattern 215 | let next_char = chars.move_next_char(); 216 | if next_char != Some('\\') { 217 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!( 218 | "{} (unpaired high surrogate)", 219 | hex_text 220 | ))); 221 | } 222 | 223 | let next_char = chars.move_next_char(); 224 | if next_char != Some('u') { 225 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!( 226 | "{} (unpaired high surrogate)", 227 | hex_text 228 | ))); 229 | } 230 | 231 | // Parse the second \uXXXX 232 | let mut hex_text2 = String::new(); 233 | for _ in 0..4 { 234 | let current_char = chars.move_next_char(); 235 | if !is_hex(current_char) { 236 | return Err(ParseStringErrorKind::ExpectedFourHexDigits); 237 | } 238 | if let Some(current_char) = current_char { 239 | hex_text2.push(current_char); 240 | } 241 | } 242 | 243 | let hex_value2 = match u32::from_str_radix(&hex_text2, 16) { 244 | Ok(v) => v, 245 | Err(_) => { 246 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(hex_text2)); 247 | } 248 | }; 249 | 250 | // Verify it's a low surrogate (0xDC00-0xDFFF) 251 | if !(0xDC00..=0xDFFF).contains(&hex_value2) { 252 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!( 253 | "{} (high surrogate not followed by low surrogate)", 254 | hex_text 255 | ))); 256 | } 257 | 258 | // Combine surrogate pair using RFC 8259 formula 259 | let code_point = ((hex_value - 0xD800) * 0x400) + (hex_value2 - 0xDC00) + 0x10000; 260 | 261 | match std::char::from_u32(code_point) { 262 | Some(c) => c, 263 | None => { 264 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!( 265 | "{}\\u{} (invalid surrogate pair)", 266 | hex_text, hex_text2 267 | ))); 268 | } 269 | } 270 | } else if (0xDC00..=0xDFFF).contains(&hex_value) { 271 | // Low surrogate without high surrogate 272 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(format!( 273 | "{} (unpaired low surrogate)", 274 | hex_text 275 | ))); 276 | } else { 277 | // Normal unicode escape 278 | match std::char::from_u32(hex_value) { 279 | Some(hex_char) => hex_char, 280 | None => { 281 | return Err(ParseStringErrorKind::InvalidUnicodeEscapeSequence(hex_text)); 282 | } 283 | } 284 | }; 285 | Ok(hex_char) 286 | } 287 | 288 | fn is_hex(c: Option) -> bool { 289 | let Some(c) = c else { 290 | return false; 291 | }; 292 | is_digit(c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) 293 | } 294 | 295 | fn is_digit(c: char) -> bool { 296 | c.is_ascii_digit() 297 | } 298 | -------------------------------------------------------------------------------- /src/ast.rs: -------------------------------------------------------------------------------- 1 | use super::common::Range; 2 | use super::common::Ranged; 3 | use std::borrow::Cow; 4 | 5 | /// JSON value. 6 | #[derive(Debug, PartialEq, Clone)] 7 | pub enum Value<'a> { 8 | StringLit(StringLit<'a>), 9 | NumberLit(NumberLit<'a>), 10 | BooleanLit(BooleanLit), 11 | Object(Object<'a>), 12 | Array(Array<'a>), 13 | NullKeyword(NullKeyword), 14 | } 15 | 16 | impl<'a> Value<'a> { 17 | pub fn as_string_lit(&self) -> Option<&StringLit<'a>> { 18 | match self { 19 | Value::StringLit(node) => Some(node), 20 | _ => None, 21 | } 22 | } 23 | 24 | pub fn as_number_lit(&self) -> Option<&NumberLit<'a>> { 25 | match self { 26 | Value::NumberLit(node) => Some(node), 27 | _ => None, 28 | } 29 | } 30 | 31 | pub fn as_boolean_lit(&self) -> Option<&BooleanLit> { 32 | match self { 33 | Value::BooleanLit(node) => Some(node), 34 | _ => None, 35 | } 36 | } 37 | 38 | pub fn as_object(&self) -> Option<&Object<'a>> { 39 | match self { 40 | Value::Object(node) => Some(node), 41 | _ => None, 42 | } 43 | } 44 | 45 | pub fn as_array(&self) -> Option<&Array<'a>> { 46 | match self { 47 | Value::Array(node) => Some(node), 48 | _ => None, 49 | } 50 | } 51 | 52 | pub fn as_null_keyword(&self) -> Option<&NullKeyword> { 53 | match self { 54 | Value::NullKeyword(node) => Some(node), 55 | _ => None, 56 | } 57 | } 58 | } 59 | 60 | #[cfg(feature = "serde")] 61 | impl<'a> From> for serde_json::Value { 62 | fn from(value: Value<'a>) -> Self { 63 | use std::str::FromStr; 64 | match value { 65 | Value::Array(arr) => { 66 | let vec = arr.elements.into_iter().map(|v| v.into()).collect(); 67 | serde_json::Value::Array(vec) 68 | } 69 | Value::BooleanLit(b) => serde_json::Value::Bool(b.value), 70 | Value::NullKeyword(_) => serde_json::Value::Null, 71 | Value::NumberLit(num) => { 72 | // check if this is a hexadecimal literal (0x or 0X prefix) 73 | let num_str = num.value.trim_start_matches(['-', '+']); 74 | if num_str.len() > 2 && (num_str.starts_with("0x") || num_str.starts_with("0X")) { 75 | // Parse hexadecimal and convert to decimal 76 | let hex_part = &num_str[2..]; 77 | match i64::from_str_radix(hex_part, 16) { 78 | Ok(decimal_value) => { 79 | let final_value = if num.value.starts_with('-') { 80 | -decimal_value 81 | } else { 82 | decimal_value 83 | }; 84 | serde_json::Value::Number(serde_json::Number::from(final_value)) 85 | } 86 | Err(_) => serde_json::Value::String(num.value.to_string()), 87 | } 88 | } else { 89 | // standard decimal number 90 | let num_for_parsing = num.value.trim_start_matches('+'); 91 | match serde_json::Number::from_str(num_for_parsing) { 92 | Ok(number) => serde_json::Value::Number(number), 93 | Err(_) => serde_json::Value::String(num.value.to_string()), 94 | } 95 | } 96 | } 97 | Value::Object(obj) => { 98 | let mut map = serde_json::map::Map::new(); 99 | for prop in obj.properties { 100 | map.insert(prop.name.into_string(), prop.value.into()); 101 | } 102 | serde_json::Value::Object(map) 103 | } 104 | Value::StringLit(s) => serde_json::Value::String(s.value.into_owned()), 105 | } 106 | } 107 | } 108 | 109 | /// Node that can appear in the AST. 110 | #[derive(Debug, PartialEq, Clone, Copy)] 111 | pub enum Node<'a, 'b> { 112 | StringLit(&'b StringLit<'a>), 113 | NumberLit(&'b NumberLit<'a>), 114 | BooleanLit(&'b BooleanLit), 115 | Object(&'b Object<'a>), 116 | ObjectProp(&'b ObjectProp<'a>), 117 | Array(&'b Array<'a>), 118 | NullKeyword(&'b NullKeyword), 119 | WordLit(&'b WordLit<'a>), 120 | } 121 | 122 | impl<'a, 'b> Node<'a, 'b> { 123 | /// Gets the node kind. 124 | pub fn kind(&self) -> NodeKind { 125 | match self { 126 | Node::StringLit(_) => NodeKind::StringLit, 127 | Node::NumberLit(_) => NodeKind::NumberLit, 128 | Node::BooleanLit(_) => NodeKind::BooleanLit, 129 | Node::Object(_) => NodeKind::Object, 130 | Node::ObjectProp(_) => NodeKind::ObjectProp, 131 | Node::Array(_) => NodeKind::Array, 132 | Node::NullKeyword(_) => NodeKind::NullKeyword, 133 | Node::WordLit(_) => NodeKind::WordLit, 134 | } 135 | } 136 | 137 | pub fn as_string_lit(&self) -> Option<&'b StringLit<'a>> { 138 | match self { 139 | Node::StringLit(node) => Some(node), 140 | _ => None, 141 | } 142 | } 143 | 144 | pub fn as_number_lit(&self) -> Option<&'b NumberLit<'a>> { 145 | match self { 146 | Node::NumberLit(node) => Some(node), 147 | _ => None, 148 | } 149 | } 150 | 151 | pub fn as_boolean_lit(&self) -> Option<&'b BooleanLit> { 152 | match self { 153 | Node::BooleanLit(node) => Some(node), 154 | _ => None, 155 | } 156 | } 157 | 158 | pub fn as_object(&self) -> Option<&'b Object<'a>> { 159 | match self { 160 | Node::Object(node) => Some(node), 161 | _ => None, 162 | } 163 | } 164 | 165 | pub fn as_object_prop(&self) -> Option<&'b ObjectProp<'a>> { 166 | match self { 167 | Node::ObjectProp(node) => Some(node), 168 | _ => None, 169 | } 170 | } 171 | 172 | pub fn as_array(&self) -> Option<&'b Array<'a>> { 173 | match self { 174 | Node::Array(node) => Some(node), 175 | _ => None, 176 | } 177 | } 178 | 179 | pub fn as_null_keyword(&self) -> Option<&'b NullKeyword> { 180 | match self { 181 | Node::NullKeyword(node) => Some(node), 182 | _ => None, 183 | } 184 | } 185 | 186 | pub fn as_word_lit(&self) -> Option<&'b WordLit<'a>> { 187 | match self { 188 | Node::WordLit(node) => Some(node), 189 | _ => None, 190 | } 191 | } 192 | } 193 | 194 | /// Kind of AST node. 195 | #[derive(Debug, PartialEq, Clone, Copy)] 196 | pub enum NodeKind { 197 | StringLit, 198 | NumberLit, 199 | BooleanLit, 200 | Object, 201 | ObjectProp, 202 | Array, 203 | NullKeyword, 204 | WordLit, 205 | } 206 | 207 | /// Node surrounded in double quotes (ex. `"my string"`). 208 | #[derive(Debug, PartialEq, Clone)] 209 | pub struct StringLit<'a> { 210 | pub range: Range, 211 | pub value: Cow<'a, str>, 212 | } 213 | 214 | /// A string that's not in quotes. 215 | /// Usually the appearance of this would be a parsing error. 216 | #[derive(Debug, PartialEq, Clone)] 217 | pub struct WordLit<'a> { 218 | pub range: Range, 219 | pub value: &'a str, 220 | } 221 | 222 | /// Represents a number (ex. `123`, `99.99`, `-1.2e+2`). 223 | #[derive(Debug, PartialEq, Clone)] 224 | pub struct NumberLit<'a> { 225 | pub range: Range, 226 | pub value: &'a str, 227 | } 228 | 229 | /// Represents a boolean (ex. `true` or `false`). 230 | #[derive(Debug, PartialEq, Clone)] 231 | pub struct BooleanLit { 232 | pub range: Range, 233 | pub value: bool, 234 | } 235 | 236 | /// Represents the null keyword (ex. `null`). 237 | #[derive(Debug, PartialEq, Clone)] 238 | pub struct NullKeyword { 239 | pub range: Range, 240 | } 241 | 242 | /// Represents an object that may contain properties (ex. `{}`, `{ "prop": 4 }`). 243 | #[derive(Debug, PartialEq, Clone)] 244 | pub struct Object<'a> { 245 | pub range: Range, 246 | pub properties: Vec>, 247 | } 248 | 249 | macro_rules! generate_take { 250 | ($self:ident, $name:ident, $value_type:ident) => { 251 | // there must be some better code that could be written here... 252 | if let Some(pos) = $self.properties.iter().position(|p| p.name.as_str() == $name) { 253 | if let Value::$value_type(_) = &$self.properties[pos].value { 254 | if let Value::$value_type(node) = $self.properties.remove(pos).value { 255 | Some(node) 256 | } else { 257 | None 258 | } 259 | } else { 260 | None 261 | } 262 | } else { 263 | None 264 | } 265 | }; 266 | } 267 | 268 | macro_rules! generate_get { 269 | ($self:ident, $name:ident, $value_type:ident) => { 270 | $self 271 | .properties 272 | .iter() 273 | .filter(|p| p.name.as_str() == $name) 274 | .map(|p| { 275 | if let Value::$value_type(node) = &p.value { 276 | Some(node) 277 | } else { 278 | None 279 | } 280 | }) 281 | .next() 282 | .flatten() 283 | }; 284 | } 285 | 286 | impl<'a> Object<'a> { 287 | /// Gets a property value in the object by its name. 288 | pub fn get(&self, name: &str) -> Option<&ObjectProp<'a>> { 289 | self.properties.iter().find(|p| p.name.as_str() == name) 290 | } 291 | 292 | /// Gets a string property value from the object by name. 293 | /// Returns `None` when not a string or it doesn't exist. 294 | pub fn get_string(&self, name: &str) -> Option<&StringLit<'a>> { 295 | generate_get!(self, name, StringLit) 296 | } 297 | 298 | /// Gets a number property value from the object by name. 299 | /// Returns `None` when not a number or it doesn't exist. 300 | pub fn get_number(&self, name: &str) -> Option<&NumberLit<'a>> { 301 | generate_get!(self, name, NumberLit) 302 | } 303 | 304 | /// Gets a boolean property value from the object by name. 305 | /// Returns `None` when not a boolean or it doesn't exist. 306 | pub fn get_boolean(&self, name: &str) -> Option<&BooleanLit> { 307 | generate_get!(self, name, BooleanLit) 308 | } 309 | 310 | /// Gets an object property value from the object by name. 311 | /// Returns `None` when not an object or it doesn't exist. 312 | pub fn get_object(&self, name: &str) -> Option<&Object<'a>> { 313 | generate_get!(self, name, Object) 314 | } 315 | 316 | /// Gets an array property value from the object by name. 317 | /// Returns `None` when not an array or it doesn't exist. 318 | pub fn get_array(&self, name: &str) -> Option<&Array<'a>> { 319 | generate_get!(self, name, Array) 320 | } 321 | 322 | /// Takes a value from the object by name. 323 | /// Returns `None` when it doesn't exist. 324 | pub fn take(&mut self, name: &str) -> Option> { 325 | if let Some(pos) = self.properties.iter().position(|p| p.name.as_str() == name) { 326 | Some(self.properties.remove(pos)) 327 | } else { 328 | None 329 | } 330 | } 331 | 332 | /// Takes a string property value from the object by name. 333 | /// Returns `None` when not a string or it doesn't exist. 334 | pub fn take_string(&mut self, name: &str) -> Option> { 335 | generate_take!(self, name, StringLit) 336 | } 337 | 338 | /// Takes a number property value from the object by name. 339 | /// Returns `None` when not a number or it doesn't exist. 340 | pub fn take_number(&mut self, name: &str) -> Option> { 341 | generate_take!(self, name, NumberLit) 342 | } 343 | 344 | /// Takes a boolean property value from the object by name. 345 | /// Returns `None` when not a boolean or it doesn't exist. 346 | pub fn take_boolean(&mut self, name: &str) -> Option { 347 | generate_take!(self, name, BooleanLit) 348 | } 349 | 350 | /// Takes an object property value from the object by name. 351 | /// Returns `None` when not an object or it doesn't exist. 352 | pub fn take_object(&mut self, name: &str) -> Option> { 353 | generate_take!(self, name, Object) 354 | } 355 | 356 | /// Takes an array property value from the object by name. 357 | /// Returns `None` when not an array or it doesn't exist. 358 | pub fn take_array(&mut self, name: &str) -> Option> { 359 | generate_take!(self, name, Array) 360 | } 361 | } 362 | 363 | /// Represents an object property (ex. `"prop": []`). 364 | #[derive(Debug, PartialEq, Clone)] 365 | pub struct ObjectProp<'a> { 366 | pub range: Range, 367 | pub name: ObjectPropName<'a>, 368 | pub value: Value<'a>, 369 | } 370 | 371 | /// Represents an object property name that may or may not be in quotes. 372 | #[derive(Debug, PartialEq, Clone)] 373 | pub enum ObjectPropName<'a> { 374 | String(StringLit<'a>), 375 | Word(WordLit<'a>), 376 | } 377 | 378 | impl<'a> ObjectPropName<'a> { 379 | /// Converts the object property name into a string. 380 | pub fn into_string(self) -> String { 381 | match self { 382 | ObjectPropName::String(lit) => lit.value.into_owned(), 383 | ObjectPropName::Word(lit) => lit.value.to_string(), 384 | } 385 | } 386 | 387 | /// Gets the object property name as a string reference. 388 | pub fn as_str(&'a self) -> &'a str { 389 | match self { 390 | ObjectPropName::String(lit) => lit.value.as_ref(), 391 | ObjectPropName::Word(lit) => lit.value, 392 | } 393 | } 394 | } 395 | 396 | /// Represents an array that may contain elements (ex. `[]`, `[5, 6]`). 397 | #[derive(Debug, PartialEq, Clone)] 398 | pub struct Array<'a> { 399 | pub range: Range, 400 | pub elements: Vec>, 401 | } 402 | 403 | /// Kind of JSONC comment. 404 | #[derive(Debug, PartialEq, Clone)] 405 | pub enum CommentKind { 406 | Line, 407 | Block, 408 | } 409 | 410 | /// JSONC comment. 411 | #[derive(Debug, PartialEq, Clone)] 412 | pub enum Comment<'a> { 413 | Line(CommentLine<'a>), 414 | Block(CommentBlock<'a>), 415 | } 416 | 417 | impl<'a> Comment<'a> { 418 | /// Gets the text of the comment. 419 | pub fn text(&self) -> &'a str { 420 | match self { 421 | Comment::Line(line) => line.text, 422 | Comment::Block(line) => line.text, 423 | } 424 | } 425 | 426 | /// Gets the comment kind. 427 | pub fn kind(&self) -> CommentKind { 428 | match self { 429 | Comment::Line(_) => CommentKind::Line, 430 | Comment::Block(_) => CommentKind::Block, 431 | } 432 | } 433 | } 434 | 435 | impl<'a> Ranged for Comment<'a> { 436 | fn range(&self) -> Range { 437 | match self { 438 | Comment::Line(line) => line.range(), 439 | Comment::Block(line) => line.range(), 440 | } 441 | } 442 | } 443 | 444 | /// Represents a comment line (ex. `// my comment`). 445 | #[derive(Debug, PartialEq, Clone)] 446 | pub struct CommentLine<'a> { 447 | pub range: Range, 448 | pub text: &'a str, 449 | } 450 | 451 | /// Represents a comment block (ex. `/* my comment */`). 452 | #[derive(Debug, PartialEq, Clone)] 453 | pub struct CommentBlock<'a> { 454 | pub range: Range, 455 | pub text: &'a str, 456 | } 457 | 458 | // Object Property Name 459 | 460 | impl<'a, 'b> From<&'b ObjectPropName<'a>> for Node<'a, 'b> { 461 | fn from(object_prop_name: &'b ObjectPropName<'a>) -> Node<'a, 'b> { 462 | match object_prop_name { 463 | ObjectPropName::String(lit) => lit.into(), 464 | ObjectPropName::Word(lit) => lit.into(), 465 | } 466 | } 467 | } 468 | 469 | impl<'a> Ranged for ObjectPropName<'a> { 470 | fn range(&self) -> Range { 471 | match self { 472 | ObjectPropName::String(lit) => lit.range(), 473 | ObjectPropName::Word(lit) => lit.range(), 474 | } 475 | } 476 | } 477 | 478 | // Implement Traits 479 | 480 | macro_rules! impl_ranged { 481 | ($($node_name:ident),*) => { 482 | $( 483 | impl Ranged for $node_name { 484 | fn range(&self) -> Range { 485 | self.range 486 | } 487 | } 488 | )* 489 | }; 490 | } 491 | 492 | impl_ranged![BooleanLit, NullKeyword]; 493 | 494 | macro_rules! impl_ranged_lifetime { 495 | ($($node_name:ident),*) => { 496 | $( 497 | impl<'a> Ranged for $node_name<'a> { 498 | fn range(&self) -> Range { 499 | self.range 500 | } 501 | } 502 | )* 503 | }; 504 | } 505 | 506 | impl_ranged_lifetime![ 507 | WordLit, 508 | Object, 509 | ObjectProp, 510 | Array, 511 | CommentLine, 512 | CommentBlock, 513 | NumberLit, 514 | StringLit 515 | ]; 516 | 517 | impl<'a> Ranged for Value<'a> { 518 | fn range(&self) -> Range { 519 | match self { 520 | Value::Array(node) => node.range(), 521 | Value::BooleanLit(node) => node.range(), 522 | Value::NullKeyword(node) => node.range(), 523 | Value::NumberLit(node) => node.range(), 524 | Value::Object(node) => node.range(), 525 | Value::StringLit(node) => node.range(), 526 | } 527 | } 528 | } 529 | 530 | impl<'a, 'b> Ranged for Node<'a, 'b> { 531 | fn range(&self) -> Range { 532 | match self { 533 | Node::StringLit(node) => node.range(), 534 | Node::NumberLit(node) => node.range(), 535 | Node::BooleanLit(node) => node.range(), 536 | Node::NullKeyword(node) => node.range(), 537 | Node::WordLit(node) => node.range(), 538 | Node::Array(node) => node.range(), 539 | Node::Object(node) => node.range(), 540 | Node::ObjectProp(node) => node.range(), 541 | } 542 | } 543 | } 544 | 545 | macro_rules! generate_node { 546 | ($($node_name:ident),*) => { 547 | $( 548 | impl<'a, 'b> From<&'b $node_name> for Node<'a, 'b> { 549 | fn from(node: &'b $node_name) -> Node<'a, 'b> { 550 | Node::$node_name(node) 551 | } 552 | } 553 | )* 554 | }; 555 | } 556 | 557 | generate_node![BooleanLit, NullKeyword]; 558 | 559 | macro_rules! generate_node_lifetime { 560 | ($($node_name:ident),*) => { 561 | 562 | $( 563 | impl<'a, 'b> From<&'b $node_name<'a>> for Node<'a, 'b> { 564 | fn from(node: &'b $node_name<'a>) -> Node<'a, 'b> { 565 | Node::$node_name(node) 566 | } 567 | } 568 | )* 569 | }; 570 | } 571 | 572 | generate_node_lifetime![WordLit, Object, ObjectProp, Array, NumberLit, StringLit]; 573 | 574 | impl<'a, 'b> From<&'b Value<'a>> for Node<'a, 'b> { 575 | fn from(value: &'b Value<'a>) -> Node<'a, 'b> { 576 | match value { 577 | Value::Array(node) => Node::Array(node), 578 | Value::BooleanLit(node) => Node::BooleanLit(node), 579 | Value::NullKeyword(node) => Node::NullKeyword(node), 580 | Value::NumberLit(node) => Node::NumberLit(node), 581 | Value::Object(node) => Node::Object(node), 582 | Value::StringLit(node) => Node::StringLit(node), 583 | } 584 | } 585 | } 586 | 587 | #[cfg(test)] 588 | mod test { 589 | use super::*; 590 | use crate::ParseOptions; 591 | use crate::parse_to_ast; 592 | 593 | #[test] 594 | fn it_should_take() { 595 | let ast = parse_to_ast( 596 | "{'prop': 'asdf', 'other': 'text'}", 597 | &Default::default(), 598 | &ParseOptions::default(), 599 | ) 600 | .unwrap(); 601 | let mut obj = match ast.value { 602 | Some(Value::Object(obj)) => obj, 603 | _ => unreachable!(), 604 | }; 605 | 606 | assert_eq!(obj.properties.len(), 2); 607 | assert_eq!(obj.take_string("asdf"), None); 608 | assert_eq!(obj.properties.len(), 2); 609 | assert_eq!(obj.take_number("prop"), None); 610 | assert_eq!(obj.properties.len(), 2); 611 | assert!(obj.take_string("prop").is_some()); 612 | assert_eq!(obj.properties.len(), 1); 613 | assert_eq!(obj.take("something"), None); 614 | assert_eq!(obj.properties.len(), 1); 615 | assert!(obj.take("other").is_some()); 616 | assert_eq!(obj.properties.len(), 0); 617 | } 618 | 619 | #[test] 620 | fn it_should_get() { 621 | let ast = parse_to_ast("{'prop': 'asdf'}", &Default::default(), &ParseOptions::default()).unwrap(); 622 | let obj = match ast.value { 623 | Some(Value::Object(obj)) => obj, 624 | _ => unreachable!(), 625 | }; 626 | 627 | assert_eq!(obj.properties.len(), 1); 628 | assert_eq!(obj.get_string("asdf"), None); 629 | assert!(obj.get_string("prop").is_some()); 630 | assert_eq!(obj.get("asdf"), None); 631 | assert_eq!(obj.properties.len(), 1); 632 | } 633 | 634 | #[cfg(feature = "serde")] 635 | #[test] 636 | fn it_should_coerce_to_serde_value() { 637 | let ast = parse_to_ast( 638 | r#"{"prop":[true,1,null,"str"]}"#, 639 | &Default::default(), 640 | &ParseOptions::default(), 641 | ) 642 | .unwrap(); 643 | let value = ast.value.unwrap(); 644 | let serde_value: serde_json::Value = value.into(); 645 | 646 | assert_eq!( 647 | serde_value, 648 | serde_json::json!({ 649 | "prop": [ 650 | true, 651 | 1, 652 | null, 653 | "str" 654 | ] 655 | }) 656 | ); 657 | } 658 | 659 | #[cfg(feature = "serde")] 660 | #[test] 661 | fn handle_weird_data() { 662 | let ast = parse_to_ast( 663 | r#"{eyyyyyyy:6yy:6000e000615yyyk:6}"#, 664 | &Default::default(), 665 | &ParseOptions::default(), 666 | ) 667 | .unwrap(); 668 | let value = ast.value.unwrap(); 669 | let serde_value: serde_json::Value = value.into(); 670 | 671 | assert_eq!( 672 | serde_value, 673 | // this output is fine because the input is bad 674 | serde_json::json!({ 675 | "eyyyyyyy": 6, 676 | "yy": "6000e000615", 677 | "yyyk": 6 678 | }) 679 | ); 680 | } 681 | } 682 | -------------------------------------------------------------------------------- /src/scanner.rs: -------------------------------------------------------------------------------- 1 | use crate::string::CharProvider; 2 | 3 | use super::common::Range; 4 | use super::errors::*; 5 | use super::tokens::Token; 6 | use std::str::Chars; 7 | 8 | /// Converts text into a stream of tokens. 9 | pub struct Scanner<'a> { 10 | byte_index: usize, 11 | token_start: usize, 12 | char_iter: Chars<'a>, 13 | // todo(dsherret): why isn't this a VecDeque? 14 | char_buffer: Vec, 15 | current_token: Option>, 16 | file_text: &'a str, 17 | allow_single_quoted_strings: bool, 18 | allow_hexadecimal_numbers: bool, 19 | allow_unary_plus_numbers: bool, 20 | } 21 | 22 | const CHAR_BUFFER_MAX_SIZE: usize = 6; 23 | 24 | /// Options for the scanner. 25 | #[derive(Debug)] 26 | pub struct ScannerOptions { 27 | /// Allow single-quoted strings (defaults to `true`). 28 | pub allow_single_quoted_strings: bool, 29 | /// Allow hexadecimal numbers like 0xFF (defaults to `true`). 30 | pub allow_hexadecimal_numbers: bool, 31 | /// Allow unary plus sign on numbers like +42 (defaults to `true`). 32 | pub allow_unary_plus_numbers: bool, 33 | } 34 | 35 | impl Default for ScannerOptions { 36 | fn default() -> Self { 37 | Self { 38 | allow_single_quoted_strings: true, 39 | allow_hexadecimal_numbers: true, 40 | allow_unary_plus_numbers: true, 41 | } 42 | } 43 | } 44 | 45 | impl<'a> Scanner<'a> { 46 | /// Creates a new scanner with specific options. 47 | pub fn new(file_text: &'a str, options: &ScannerOptions) -> Scanner<'a> { 48 | let mut char_iter = file_text.chars(); 49 | let mut char_buffer = Vec::with_capacity(CHAR_BUFFER_MAX_SIZE); 50 | let current_char = char_iter.next(); 51 | if let Some(current_char) = current_char { 52 | char_buffer.push(current_char); 53 | } 54 | 55 | Scanner { 56 | byte_index: 0, 57 | token_start: 0, 58 | char_iter, 59 | char_buffer, 60 | current_token: None, 61 | file_text, 62 | allow_single_quoted_strings: options.allow_single_quoted_strings, 63 | allow_hexadecimal_numbers: options.allow_hexadecimal_numbers, 64 | allow_unary_plus_numbers: options.allow_unary_plus_numbers, 65 | } 66 | } 67 | 68 | pub fn file_text(&self) -> &str { 69 | self.file_text 70 | } 71 | 72 | /// Moves to and returns the next token. 73 | pub fn scan(&mut self) -> Result>, ParseError> { 74 | self.skip_whitespace(); 75 | self.token_start = self.byte_index; 76 | if let Some(current_char) = self.current_char() { 77 | let token_result = match current_char { 78 | '{' => { 79 | self.move_next_char(); 80 | Ok(Token::OpenBrace) 81 | } 82 | '}' => { 83 | self.move_next_char(); 84 | Ok(Token::CloseBrace) 85 | } 86 | '[' => { 87 | self.move_next_char(); 88 | Ok(Token::OpenBracket) 89 | } 90 | ']' => { 91 | self.move_next_char(); 92 | Ok(Token::CloseBracket) 93 | } 94 | ',' => { 95 | self.move_next_char(); 96 | Ok(Token::Comma) 97 | } 98 | ':' => { 99 | self.move_next_char(); 100 | Ok(Token::Colon) 101 | } 102 | '\'' => { 103 | if self.allow_single_quoted_strings { 104 | self.parse_string() 105 | } else { 106 | Err(self.create_error_for_current_token(ParseErrorKind::SingleQuotedStringsNotAllowed)) 107 | } 108 | } 109 | '"' => self.parse_string(), 110 | '/' => match self.peek_char() { 111 | Some('/') => Ok(self.parse_comment_line()), 112 | Some('*') => self.parse_comment_block(), 113 | _ => Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken)), 114 | }, 115 | _ => { 116 | if current_char == '-' || current_char == '+' || self.is_digit() { 117 | self.parse_number() 118 | } else if self.try_move_word("true") { 119 | Ok(Token::Boolean(true)) 120 | } else if self.try_move_word("false") { 121 | Ok(Token::Boolean(false)) 122 | } else if self.try_move_word("null") { 123 | Ok(Token::Null) 124 | } else { 125 | self.parse_word() 126 | } 127 | } 128 | }; 129 | match token_result { 130 | Ok(token) => { 131 | self.current_token = Some(token.clone()); 132 | Ok(Some(token)) 133 | } 134 | Err(err) => Err(err), 135 | } 136 | } else { 137 | self.current_token = None; 138 | Ok(None) 139 | } 140 | } 141 | 142 | /// Gets the start position of the token. 143 | pub fn token_start(&self) -> usize { 144 | self.token_start 145 | } 146 | 147 | /// Gets the end position of the token. 148 | pub fn token_end(&self) -> usize { 149 | self.byte_index 150 | } 151 | 152 | /// Gets the current token. 153 | pub fn token(&self) -> Option> { 154 | self.current_token.as_ref().map(|x| x.to_owned()) 155 | } 156 | 157 | pub(super) fn create_error_for_current_token(&self, kind: ParseErrorKind) -> ParseError { 158 | self.create_error_for_start(self.token_start, kind) 159 | } 160 | 161 | pub(super) fn create_error_for_current_char(&self, kind: ParseErrorKind) -> ParseError { 162 | self.create_error_for_start(self.byte_index, kind) 163 | } 164 | 165 | pub(super) fn create_error_for_start(&self, start: usize, kind: ParseErrorKind) -> ParseError { 166 | let range = Range { 167 | start, 168 | end: if let Some(c) = self.file_text[self.byte_index..].chars().next() { 169 | self.byte_index + c.len_utf8() 170 | } else { 171 | self.file_text.len() 172 | }, 173 | }; 174 | self.create_error_for_range(range, kind) 175 | } 176 | 177 | pub(super) fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError { 178 | ParseError::new(range, kind, self.file_text) 179 | } 180 | 181 | fn parse_string(&mut self) -> Result, ParseError> { 182 | crate::string::parse_string_with_char_provider(self) 183 | .map(Token::String) 184 | // todo(dsherret): don't convert the error kind to a string here 185 | .map_err(|err| self.create_error_for_start(err.byte_index, ParseErrorKind::String(err.kind))) 186 | } 187 | 188 | fn parse_number(&mut self) -> Result, ParseError> { 189 | let start_byte_index = self.byte_index; 190 | 191 | // handle unary plus and unary minus 192 | if self.is_positive_sign() { 193 | if !self.allow_unary_plus_numbers { 194 | return Err(self.create_error_for_current_token(ParseErrorKind::UnaryPlusNumbersNotAllowed)); 195 | } 196 | self.move_next_char(); 197 | } else if self.is_negative_sign() { 198 | self.move_next_char(); 199 | } 200 | 201 | if self.is_zero() { 202 | self.move_next_char(); 203 | 204 | // check for hexadecimal literal (0x or 0X) 205 | if matches!(self.current_char(), Some('x') | Some('X')) { 206 | if !self.allow_hexadecimal_numbers { 207 | return Err(self.create_error_for_current_token(ParseErrorKind::HexadecimalNumbersNotAllowed)); 208 | } 209 | 210 | self.move_next_char(); 211 | 212 | // must have at least one hex digit 213 | if !self.is_hex_digit() { 214 | return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit)); 215 | } 216 | 217 | while self.is_hex_digit() { 218 | self.move_next_char(); 219 | } 220 | 221 | let end_byte_index = self.byte_index; 222 | return Ok(Token::Number(&self.file_text[start_byte_index..end_byte_index])); 223 | } 224 | } else if self.is_one_nine() { 225 | self.move_next_char(); 226 | while self.is_digit() { 227 | self.move_next_char(); 228 | } 229 | } else { 230 | return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigitFollowingNegativeSign)); 231 | } 232 | 233 | if self.is_decimal_point() { 234 | self.move_next_char(); 235 | 236 | if !self.is_digit() { 237 | return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit)); 238 | } 239 | 240 | while self.is_digit() { 241 | self.move_next_char(); 242 | } 243 | } 244 | 245 | match self.current_char() { 246 | Some('e') | Some('E') => { 247 | match self.move_next_char() { 248 | Some('-') | Some('+') => { 249 | self.move_next_char(); 250 | if !self.is_digit() { 251 | return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedDigit)); 252 | } 253 | } 254 | _ => { 255 | if !self.is_digit() { 256 | return Err(self.create_error_for_current_char(ParseErrorKind::ExpectedPlusMinusOrDigitInNumberLiteral)); 257 | } 258 | } 259 | } 260 | 261 | while self.is_digit() { 262 | self.move_next_char(); 263 | } 264 | } 265 | _ => {} 266 | } 267 | 268 | let end_byte_index = self.byte_index; 269 | Ok(Token::Number(&self.file_text[start_byte_index..end_byte_index])) 270 | } 271 | 272 | fn parse_comment_line(&mut self) -> Token<'a> { 273 | self.assert_then_move_char('/'); 274 | #[cfg(debug_assertions)] 275 | self.assert_char('/'); 276 | 277 | let start_byte_index = self.byte_index + 1; 278 | while self.move_next_char().is_some() { 279 | if self.is_new_line() { 280 | break; 281 | } 282 | } 283 | 284 | Token::CommentLine(&self.file_text[start_byte_index..self.byte_index]) 285 | } 286 | 287 | fn parse_comment_block(&mut self) -> Result, ParseError> { 288 | self.assert_then_move_char('/'); 289 | #[cfg(debug_assertions)] 290 | self.assert_char('*'); 291 | let mut found_end = false; 292 | 293 | let start_byte_index = self.byte_index + 1; 294 | while let Some(current_char) = self.move_next_char() { 295 | if current_char == '*' && self.peek_char() == Some('/') { 296 | found_end = true; 297 | break; 298 | } 299 | } 300 | 301 | if found_end { 302 | let end_byte_index = self.byte_index; 303 | self.assert_then_move_char('*'); 304 | self.assert_then_move_char('/'); 305 | Ok(Token::CommentBlock(&self.file_text[start_byte_index..end_byte_index])) 306 | } else { 307 | Err(self.create_error_for_current_token(ParseErrorKind::UnterminatedCommentBlock)) 308 | } 309 | } 310 | 311 | fn skip_whitespace(&mut self) { 312 | while let Some(current_char) = self.current_char() { 313 | if current_char.is_whitespace() { 314 | self.move_next_char(); 315 | } else { 316 | break; 317 | } 318 | } 319 | } 320 | 321 | fn try_move_word(&mut self, text: &str) -> bool { 322 | let mut char_index = 0; 323 | for c in text.chars() { 324 | if let Some(current_char) = self.peek_char_offset(char_index) { 325 | if current_char != c { 326 | return false; 327 | } 328 | 329 | char_index += 1; 330 | } else { 331 | return false; 332 | } 333 | } 334 | 335 | if let Some(next_char) = self.peek_char_offset(char_index) 336 | && next_char.is_alphanumeric() 337 | { 338 | return false; 339 | } 340 | 341 | for _ in 0..char_index { 342 | self.move_next_char(); 343 | } 344 | 345 | true 346 | } 347 | 348 | fn parse_word(&mut self) -> Result, ParseError> { 349 | let start_byte_index = self.byte_index; 350 | 351 | while let Some(current_char) = self.current_char() { 352 | // check for word terminators 353 | if current_char.is_whitespace() || current_char == ':' { 354 | break; 355 | } 356 | // validate that the character is allowed in a word literal 357 | if !current_char.is_alphanumeric() && current_char != '-' && current_char != '_' { 358 | return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken)); 359 | } 360 | 361 | self.move_next_char(); 362 | } 363 | 364 | let end_byte_index = self.byte_index; 365 | 366 | if end_byte_index - start_byte_index == 0 { 367 | return Err(self.create_error_for_current_token(ParseErrorKind::UnexpectedToken)); 368 | } 369 | 370 | Ok(Token::Word(&self.file_text[start_byte_index..end_byte_index])) 371 | } 372 | 373 | fn assert_then_move_char(&mut self, _character: char) { 374 | #[cfg(debug_assertions)] 375 | self.assert_char(_character); 376 | 377 | self.move_next_char(); 378 | } 379 | 380 | #[cfg(debug_assertions)] 381 | fn assert_char(&mut self, character: char) { 382 | let current_char = self.current_char(); 383 | debug_assert!( 384 | current_char == Some(character), 385 | "Expected {:?}, was {:?}", 386 | character, 387 | current_char 388 | ); 389 | } 390 | 391 | fn move_next_char(&mut self) -> Option { 392 | if let Some(¤t_char) = self.char_buffer.first() { 393 | // shift the entire array to the left then pop the last item 394 | for i in 1..self.char_buffer.len() { 395 | self.char_buffer[i - 1] = self.char_buffer[i]; 396 | } 397 | self.char_buffer.pop(); 398 | 399 | if self.char_buffer.is_empty() 400 | && let Some(new_char) = self.char_iter.next() 401 | { 402 | self.char_buffer.push(new_char); 403 | } 404 | 405 | self.byte_index += current_char.len_utf8(); 406 | } 407 | 408 | self.current_char() 409 | } 410 | 411 | fn peek_char(&mut self) -> Option { 412 | self.peek_char_offset(1) 413 | } 414 | 415 | fn peek_char_offset(&mut self, offset: usize) -> Option { 416 | // fill the char buffer 417 | for _ in self.char_buffer.len()..offset + 1 { 418 | if let Some(next_char) = self.char_iter.next() { 419 | self.char_buffer.push(next_char); 420 | } else { 421 | // end of string 422 | return None; 423 | } 424 | } 425 | 426 | // should not exceed this 427 | debug_assert!(self.char_buffer.len() <= CHAR_BUFFER_MAX_SIZE); 428 | 429 | self.char_buffer.get(offset).copied() 430 | } 431 | 432 | fn current_char(&self) -> Option { 433 | self.char_buffer.first().copied() 434 | } 435 | 436 | fn is_new_line(&mut self) -> bool { 437 | match self.current_char() { 438 | Some('\n') => true, 439 | Some('\r') => self.peek_char() == Some('\n'), 440 | _ => false, 441 | } 442 | } 443 | 444 | fn is_digit(&self) -> bool { 445 | self.is_one_nine() || self.is_zero() 446 | } 447 | 448 | fn is_hex_digit(&self) -> bool { 449 | match self.current_char() { 450 | Some(current_char) => current_char.is_ascii_hexdigit(), 451 | _ => false, 452 | } 453 | } 454 | 455 | fn is_zero(&self) -> bool { 456 | self.current_char() == Some('0') 457 | } 458 | 459 | fn is_one_nine(&self) -> bool { 460 | match self.current_char() { 461 | Some(current_char) => ('1'..='9').contains(¤t_char), 462 | _ => false, 463 | } 464 | } 465 | 466 | fn is_negative_sign(&self) -> bool { 467 | self.current_char() == Some('-') 468 | } 469 | 470 | fn is_positive_sign(&self) -> bool { 471 | self.current_char() == Some('+') 472 | } 473 | 474 | fn is_decimal_point(&self) -> bool { 475 | self.current_char() == Some('.') 476 | } 477 | } 478 | 479 | impl<'a> CharProvider<'a> for Scanner<'a> { 480 | fn current_char(&mut self) -> Option { 481 | Scanner::current_char(self) 482 | } 483 | 484 | fn move_next_char(&mut self) -> Option { 485 | Scanner::move_next_char(self) 486 | } 487 | 488 | fn byte_index(&self) -> usize { 489 | self.byte_index 490 | } 491 | 492 | fn text(&self) -> &'a str { 493 | self.file_text 494 | } 495 | } 496 | 497 | #[cfg(test)] 498 | mod tests { 499 | use std::borrow::Cow; 500 | 501 | use super::super::tokens::Token; 502 | use super::*; 503 | use pretty_assertions::assert_eq; 504 | 505 | #[test] 506 | fn it_tokenizes_string() { 507 | assert_has_tokens( 508 | r#""t\"est", "\t\r\n\n\u0020 test\n other","#, 509 | vec![ 510 | Token::String(Cow::Borrowed(r#"t"est"#)), 511 | Token::Comma, 512 | Token::String(Cow::Borrowed("\t\r\n\n test\n other")), 513 | Token::Comma, 514 | ], 515 | ); 516 | } 517 | 518 | #[test] 519 | fn it_errors_escaping_single_quote_in_double_quote() { 520 | assert_has_error( 521 | r#""t\'est""#, 522 | "Invalid escape in double quote string on line 1 column 3", 523 | ); 524 | } 525 | 526 | #[test] 527 | fn it_tokenizes_single_quote_string() { 528 | assert_has_tokens( 529 | r#"'t\'est','a',"#, 530 | vec![ 531 | Token::String(Cow::Borrowed(r#"t'est"#)), 532 | Token::Comma, 533 | Token::String(Cow::Borrowed("a")), 534 | Token::Comma, 535 | ], 536 | ); 537 | } 538 | 539 | #[test] 540 | fn it_errors_escaping_double_quote_in_single_quote() { 541 | assert_has_error( 542 | r#"'t\"est'"#, 543 | "Invalid escape in single quote string on line 1 column 3", 544 | ); 545 | } 546 | 547 | #[test] 548 | fn it_errors_for_word_starting_with_invalid_token() { 549 | assert_has_error(r#"{ &test }"#, "Unexpected token on line 1 column 3"); 550 | } 551 | 552 | #[test] 553 | fn it_tokenizes_numbers() { 554 | assert_has_tokens( 555 | "0, 0.123, -198, 0e-345, 0.3e+025, 1e1,", 556 | vec![ 557 | Token::Number("0"), 558 | Token::Comma, 559 | Token::Number("0.123"), 560 | Token::Comma, 561 | Token::Number("-198"), 562 | Token::Comma, 563 | Token::Number("0e-345"), 564 | Token::Comma, 565 | Token::Number("0.3e+025"), 566 | Token::Comma, 567 | Token::Number("1e1"), 568 | Token::Comma, 569 | ], 570 | ); 571 | } 572 | 573 | #[test] 574 | fn it_tokenizes_hexadecimal_numbers() { 575 | assert_has_tokens( 576 | "0x7DF, 0xFF, 0x123ABC, 0xabc, 0X1F", 577 | vec![ 578 | Token::Number("0x7DF"), 579 | Token::Comma, 580 | Token::Number("0xFF"), 581 | Token::Comma, 582 | Token::Number("0x123ABC"), 583 | Token::Comma, 584 | Token::Number("0xabc"), 585 | Token::Comma, 586 | Token::Number("0X1F"), 587 | ], 588 | ); 589 | } 590 | 591 | #[test] 592 | fn it_tokenizes_unary_plus_numbers() { 593 | assert_has_tokens( 594 | "+42, +0.5, +1e10, +0xFF", 595 | vec![ 596 | Token::Number("+42"), 597 | Token::Comma, 598 | Token::Number("+0.5"), 599 | Token::Comma, 600 | Token::Number("+1e10"), 601 | Token::Comma, 602 | Token::Number("+0xFF"), 603 | ], 604 | ); 605 | } 606 | 607 | #[test] 608 | fn it_errors_invalid_exponent() { 609 | assert_has_error( 610 | r#"1ea"#, 611 | "Expected plus, minus, or digit in number literal on line 1 column 3", 612 | ); 613 | assert_has_error(r#"1e-a"#, "Expected digit on line 1 column 4"); 614 | } 615 | 616 | #[test] 617 | fn it_tokenizes_simple_tokens() { 618 | assert_has_tokens( 619 | "{}[],:true,false,null,", 620 | vec![ 621 | Token::OpenBrace, 622 | Token::CloseBrace, 623 | Token::OpenBracket, 624 | Token::CloseBracket, 625 | Token::Comma, 626 | Token::Colon, 627 | Token::Boolean(true), 628 | Token::Comma, 629 | Token::Boolean(false), 630 | Token::Comma, 631 | Token::Null, 632 | Token::Comma, 633 | ], 634 | ); 635 | } 636 | 637 | #[test] 638 | fn it_tokenizes_comment_line() { 639 | assert_has_tokens( 640 | "//test\n//t\r\n// test\n,", 641 | vec![ 642 | Token::CommentLine("test"), 643 | Token::CommentLine("t"), 644 | Token::CommentLine(" test"), 645 | Token::Comma, 646 | ], 647 | ); 648 | } 649 | 650 | #[test] 651 | fn it_tokenizes_comment_blocks() { 652 | assert_has_tokens( 653 | "/*test\n *//* test*/,", 654 | vec![ 655 | Token::CommentBlock("test\n "), 656 | Token::CommentBlock(" test"), 657 | Token::Comma, 658 | ], 659 | ); 660 | } 661 | 662 | #[test] 663 | fn it_errors_on_invalid_utf8_char_for_issue_6() { 664 | assert_has_error( 665 | "\"\\uDF06\"", 666 | "Invalid unicode escape sequence. 'DF06 (unpaired low surrogate)' is not a valid UTF8 character on line 1 column 2", 667 | ); 668 | } 669 | 670 | fn assert_has_tokens(text: &str, tokens: Vec) { 671 | let mut scanner = Scanner::new(text, &Default::default()); 672 | let mut scanned_tokens = Vec::new(); 673 | 674 | loop { 675 | match scanner.scan() { 676 | Ok(Some(token)) => scanned_tokens.push(token), 677 | Ok(None) => break, 678 | Err(err) => panic!("Error parsing: {:?}", err), 679 | } 680 | } 681 | 682 | assert_eq!(scanned_tokens, tokens); 683 | } 684 | 685 | fn assert_has_error(text: &str, message: &str) { 686 | let mut scanner = Scanner::new(text, &Default::default()); 687 | let mut error_message = String::new(); 688 | 689 | loop { 690 | match scanner.scan() { 691 | Ok(Some(_)) => {} 692 | Ok(None) => break, 693 | Err(err) => { 694 | error_message = err.to_string(); 695 | break; 696 | } 697 | } 698 | } 699 | 700 | assert_eq!(error_message, message); 701 | } 702 | } 703 | -------------------------------------------------------------------------------- /src/parse_to_ast.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::collections::HashMap; 3 | use std::rc::Rc; 4 | 5 | use super::ast::*; 6 | use super::common::Range; 7 | use super::errors::*; 8 | use super::scanner::Scanner; 9 | use super::scanner::ScannerOptions; 10 | use super::tokens::Token; 11 | use super::tokens::TokenAndRange; 12 | 13 | /// Map where the comments are stored in collections where 14 | /// the key is the previous token end or start of file or 15 | /// next token start or end of the file. 16 | pub type CommentMap<'a> = HashMap>>>; 17 | 18 | /// Strategy for handling comments during parsing. 19 | /// 20 | /// This enum determines how comments in the JSON/JSONC input are collected 21 | /// and represented in the resulting abstract syntax tree (AST). 22 | #[derive(Default, Debug, PartialEq, Clone)] 23 | pub enum CommentCollectionStrategy { 24 | /// Comments are not collected and are effectively ignored during parsing. 25 | #[default] 26 | Off, 27 | /// Comments are collected and stored separately from the main AST structure. 28 | /// 29 | /// When this strategy is used, comments are placed in a [`CommentMap`] where 30 | /// the key is the previous token end or start of file, or the next token start 31 | /// or end of file. 32 | Separate, 33 | /// Comments are collected and treated as tokens within the AST. 34 | /// 35 | /// When this strategy is used, comments appear alongside other tokens in the 36 | /// token stream when `tokens: true` is set in [`CollectOptions`]. 37 | AsTokens, 38 | } 39 | 40 | /// Options for collecting comments and tokens. 41 | #[derive(Default, Clone)] 42 | pub struct CollectOptions { 43 | /// Include comments in the result. 44 | pub comments: CommentCollectionStrategy, 45 | /// Include tokens in the result. 46 | pub tokens: bool, 47 | } 48 | 49 | /// Options for parsing. 50 | #[derive(Clone)] 51 | pub struct ParseOptions { 52 | /// Allow comments (defaults to `true`). 53 | pub allow_comments: bool, 54 | /// Allow words and numbers as object property names (defaults to `true`). 55 | pub allow_loose_object_property_names: bool, 56 | /// Allow trailing commas on object literal and array literal values (defaults to `true`). 57 | pub allow_trailing_commas: bool, 58 | /// Allow single-quoted strings (defaults to `true`). 59 | pub allow_single_quoted_strings: bool, 60 | /// Allow hexadecimal numbers like 0xFF (defaults to `true`). 61 | pub allow_hexadecimal_numbers: bool, 62 | /// Allow unary plus sign on numbers like +42 (defaults to `true`). 63 | pub allow_unary_plus_numbers: bool, 64 | } 65 | 66 | impl Default for ParseOptions { 67 | fn default() -> Self { 68 | Self { 69 | allow_comments: true, 70 | allow_loose_object_property_names: true, 71 | allow_trailing_commas: true, 72 | allow_single_quoted_strings: true, 73 | allow_hexadecimal_numbers: true, 74 | allow_unary_plus_numbers: true, 75 | } 76 | } 77 | } 78 | 79 | /// Result of parsing the text. 80 | pub struct ParseResult<'a> { 81 | /// Collection of comments in the text. 82 | /// 83 | /// Provide `comments: true` to the `ParseOptions` for this to have a value. 84 | /// 85 | /// Remarks: The key is the start and end position of the tokens. 86 | pub comments: Option>, 87 | /// The JSON value the text contained. 88 | pub value: Option>, 89 | /// Collection of tokens (excluding any comments). 90 | /// 91 | /// Provide `tokens: true` to the `ParseOptions` for this to have a value. 92 | pub tokens: Option>>, 93 | } 94 | 95 | struct Context<'a> { 96 | scanner: Scanner<'a>, 97 | comments: Option>, 98 | current_comments: Option>>, 99 | last_token_end: usize, 100 | range_stack: Vec, 101 | tokens: Option>>, 102 | collect_comments_as_tokens: bool, 103 | allow_comments: bool, 104 | allow_trailing_commas: bool, 105 | allow_loose_object_property_names: bool, 106 | } 107 | 108 | impl<'a> Context<'a> { 109 | pub fn scan(&mut self) -> Result>, ParseError> { 110 | let previous_end = self.last_token_end; 111 | let token = self.scan_handling_comments()?; 112 | self.last_token_end = self.scanner.token_end(); 113 | 114 | // store the comment for the previous token end, and current token start 115 | if let Some(comments) = self.comments.as_mut() 116 | && let Some(current_comments) = self.current_comments.take() 117 | { 118 | let current_comments = Rc::new(current_comments); 119 | comments.insert(previous_end, current_comments.clone()); 120 | comments.insert(self.scanner.token_start(), current_comments); 121 | } 122 | 123 | if let Some(token) = &token 124 | && self.tokens.is_some() 125 | { 126 | self.capture_token(token.clone()); 127 | } 128 | 129 | Ok(token) 130 | } 131 | 132 | pub fn token(&self) -> Option> { 133 | self.scanner.token() 134 | } 135 | 136 | pub fn start_range(&mut self) { 137 | self.range_stack.push(Range { 138 | start: self.scanner.token_start(), 139 | end: 0, 140 | }); 141 | } 142 | 143 | pub fn end_range(&mut self) -> Range { 144 | let mut range = self 145 | .range_stack 146 | .pop() 147 | .expect("Range was popped from the stack, but the stack was empty."); 148 | range.end = self.scanner.token_end(); 149 | range 150 | } 151 | 152 | pub fn create_range_from_last_token(&self) -> Range { 153 | Range { 154 | start: self.scanner.token_start(), 155 | end: self.scanner.token_end(), 156 | } 157 | } 158 | 159 | pub fn create_error(&self, kind: ParseErrorKind) -> ParseError { 160 | self.scanner.create_error_for_current_token(kind) 161 | } 162 | 163 | pub fn create_error_for_current_range(&mut self, kind: ParseErrorKind) -> ParseError { 164 | let range = self.end_range(); 165 | self.create_error_for_range(range, kind) 166 | } 167 | 168 | pub fn create_error_for_range(&self, range: Range, kind: ParseErrorKind) -> ParseError { 169 | self.scanner.create_error_for_range(range, kind) 170 | } 171 | 172 | fn scan_handling_comments(&mut self) -> Result>, ParseError> { 173 | loop { 174 | let token = self.scanner.scan()?; 175 | match token { 176 | Some(token @ Token::CommentLine(_) | token @ Token::CommentBlock(_)) if self.collect_comments_as_tokens => { 177 | self.capture_token(token); 178 | } 179 | Some(Token::CommentLine(text)) => { 180 | self.handle_comment(Comment::Line(CommentLine { 181 | range: self.create_range_from_last_token(), 182 | text, 183 | }))?; 184 | } 185 | Some(Token::CommentBlock(text)) => { 186 | self.handle_comment(Comment::Block(CommentBlock { 187 | range: self.create_range_from_last_token(), 188 | text, 189 | }))?; 190 | } 191 | _ => return Ok(token), 192 | } 193 | } 194 | } 195 | 196 | fn capture_token(&mut self, token: Token<'a>) { 197 | let range = self.create_range_from_last_token(); 198 | if let Some(tokens) = self.tokens.as_mut() { 199 | tokens.push(TokenAndRange { 200 | token: token.clone(), 201 | range, 202 | }); 203 | } 204 | } 205 | 206 | fn handle_comment(&mut self, comment: Comment<'a>) -> Result<(), ParseError> { 207 | if !self.allow_comments { 208 | return Err(self.create_error(ParseErrorKind::CommentsNotAllowed)); 209 | } 210 | 211 | if self.comments.is_some() { 212 | if let Some(comments) = self.current_comments.as_mut() { 213 | comments.push(comment); 214 | } else { 215 | self.current_comments = Some(vec![comment]); 216 | } 217 | } 218 | 219 | Ok(()) 220 | } 221 | } 222 | 223 | /// Parses a string containing JSONC to an AST with comments and tokens. 224 | /// 225 | /// # Example 226 | /// 227 | /// ``` 228 | /// use jsonc_parser::CollectOptions; 229 | /// use jsonc_parser::CommentCollectionStrategy; 230 | /// use jsonc_parser::parse_to_ast; 231 | /// use jsonc_parser::ParseOptions; 232 | /// 233 | /// let parse_result = parse_to_ast(r#"{ "test": 5 } // test"#, &CollectOptions { 234 | /// comments: CommentCollectionStrategy::Separate, // include comments in result 235 | /// tokens: true, // include tokens in result 236 | /// }, &Default::default()).expect("Should parse."); 237 | /// // ...inspect parse_result for value, tokens, and comments here... 238 | /// ``` 239 | pub fn parse_to_ast<'a>( 240 | text: &'a str, 241 | collect_options: &CollectOptions, 242 | parse_options: &ParseOptions, 243 | ) -> Result, ParseError> { 244 | let mut context = Context { 245 | scanner: Scanner::new( 246 | text, 247 | &ScannerOptions { 248 | allow_single_quoted_strings: parse_options.allow_single_quoted_strings, 249 | allow_hexadecimal_numbers: parse_options.allow_hexadecimal_numbers, 250 | allow_unary_plus_numbers: parse_options.allow_unary_plus_numbers, 251 | }, 252 | ), 253 | comments: match collect_options.comments { 254 | CommentCollectionStrategy::Separate => Some(Default::default()), 255 | CommentCollectionStrategy::Off | CommentCollectionStrategy::AsTokens => None, 256 | }, 257 | current_comments: None, 258 | last_token_end: 0, 259 | range_stack: Vec::new(), 260 | tokens: if collect_options.tokens { Some(Vec::new()) } else { None }, 261 | collect_comments_as_tokens: collect_options.comments == CommentCollectionStrategy::AsTokens, 262 | allow_comments: parse_options.allow_comments, 263 | allow_trailing_commas: parse_options.allow_trailing_commas, 264 | allow_loose_object_property_names: parse_options.allow_loose_object_property_names, 265 | }; 266 | context.scan()?; 267 | let value = parse_value(&mut context)?; 268 | 269 | if context.scan()?.is_some() { 270 | return Err(context.create_error(ParseErrorKind::MultipleRootJsonValues)); 271 | } 272 | 273 | debug_assert!(context.range_stack.is_empty()); 274 | 275 | Ok(ParseResult { 276 | comments: context.comments, 277 | tokens: context.tokens, 278 | value, 279 | }) 280 | } 281 | 282 | fn parse_value<'a>(context: &mut Context<'a>) -> Result>, ParseError> { 283 | match context.token() { 284 | None => Ok(None), 285 | Some(token) => match token { 286 | Token::OpenBrace => Ok(Some(Value::Object(parse_object(context)?))), 287 | Token::OpenBracket => Ok(Some(Value::Array(parse_array(context)?))), 288 | Token::String(value) => Ok(Some(Value::StringLit(create_string_lit(context, value)))), 289 | Token::Boolean(value) => Ok(Some(Value::BooleanLit(create_boolean_lit(context, value)))), 290 | Token::Number(value) => Ok(Some(Value::NumberLit(create_number_lit(context, value)))), 291 | Token::Null => Ok(Some(Value::NullKeyword(create_null_keyword(context)))), 292 | Token::CloseBracket => Err(context.create_error(ParseErrorKind::UnexpectedCloseBracket)), 293 | Token::CloseBrace => Err(context.create_error(ParseErrorKind::UnexpectedCloseBrace)), 294 | Token::Comma => Err(context.create_error(ParseErrorKind::UnexpectedComma)), 295 | Token::Colon => Err(context.create_error(ParseErrorKind::UnexpectedColon)), 296 | Token::Word(_) => Err(context.create_error(ParseErrorKind::UnexpectedWord)), 297 | Token::CommentLine(_) => unreachable!(), 298 | Token::CommentBlock(_) => unreachable!(), 299 | }, 300 | } 301 | } 302 | 303 | fn parse_object<'a>(context: &mut Context<'a>) -> Result, ParseError> { 304 | debug_assert!(context.token() == Some(Token::OpenBrace)); 305 | let mut properties = Vec::new(); 306 | 307 | context.start_range(); 308 | context.scan()?; 309 | 310 | loop { 311 | match context.token() { 312 | Some(Token::CloseBrace) => break, 313 | Some(Token::String(prop_name)) => { 314 | properties.push(parse_object_property(context, PropName::String(prop_name))?); 315 | } 316 | Some(Token::Word(prop_name)) | Some(Token::Number(prop_name)) => { 317 | properties.push(parse_object_property(context, PropName::Word(prop_name))?); 318 | } 319 | None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedObject)), 320 | _ => return Err(context.create_error(ParseErrorKind::UnexpectedTokenInObject)), 321 | } 322 | 323 | // skip the comma 324 | if let Some(Token::Comma) = context.scan()? { 325 | let comma_range = context.create_range_from_last_token(); 326 | if let Some(Token::CloseBrace) = context.scan()? 327 | && !context.allow_trailing_commas 328 | { 329 | return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed)); 330 | } 331 | } 332 | } 333 | 334 | Ok(Object { 335 | range: context.end_range(), 336 | properties, 337 | }) 338 | } 339 | 340 | enum PropName<'a> { 341 | String(Cow<'a, str>), 342 | Word(&'a str), 343 | } 344 | 345 | fn parse_object_property<'a>(context: &mut Context<'a>, prop_name: PropName<'a>) -> Result, ParseError> { 346 | context.start_range(); 347 | 348 | let name = match prop_name { 349 | PropName::String(prop_name) => ObjectPropName::String(create_string_lit(context, prop_name)), 350 | PropName::Word(prop_name) => { 351 | if context.allow_loose_object_property_names { 352 | ObjectPropName::Word(create_word(context, prop_name)) 353 | } else { 354 | return Err(context.create_error(ParseErrorKind::ExpectedStringObjectProperty)); 355 | } 356 | } 357 | }; 358 | 359 | match context.scan()? { 360 | Some(Token::Colon) => {} 361 | _ => return Err(context.create_error(ParseErrorKind::ExpectedColonAfterObjectKey)), 362 | } 363 | 364 | context.scan()?; 365 | let value = parse_value(context)?; 366 | 367 | match value { 368 | Some(value) => Ok(ObjectProp { 369 | range: context.end_range(), 370 | name, 371 | value, 372 | }), 373 | None => Err(context.create_error(ParseErrorKind::ExpectedObjectValue)), 374 | } 375 | } 376 | 377 | fn parse_array<'a>(context: &mut Context<'a>) -> Result, ParseError> { 378 | debug_assert!(context.token() == Some(Token::OpenBracket)); 379 | let mut elements = Vec::new(); 380 | 381 | context.start_range(); 382 | context.scan()?; 383 | 384 | loop { 385 | match context.token() { 386 | Some(Token::CloseBracket) => break, 387 | None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)), 388 | _ => match parse_value(context)? { 389 | Some(value) => elements.push(value), 390 | None => return Err(context.create_error_for_current_range(ParseErrorKind::UnterminatedArray)), 391 | }, 392 | } 393 | 394 | // skip the comma 395 | if let Some(Token::Comma) = context.scan()? { 396 | let comma_range = context.create_range_from_last_token(); 397 | if let Some(Token::CloseBracket) = context.scan()? 398 | && !context.allow_trailing_commas 399 | { 400 | return Err(context.create_error_for_range(comma_range, ParseErrorKind::TrailingCommasNotAllowed)); 401 | } 402 | } 403 | } 404 | 405 | Ok(Array { 406 | range: context.end_range(), 407 | elements, 408 | }) 409 | } 410 | 411 | // factory functions 412 | 413 | fn create_string_lit<'a>(context: &Context<'a>, value: Cow<'a, str>) -> StringLit<'a> { 414 | StringLit { 415 | range: context.create_range_from_last_token(), 416 | value, 417 | } 418 | } 419 | 420 | fn create_word<'a>(context: &Context<'a>, value: &'a str) -> WordLit<'a> { 421 | WordLit { 422 | range: context.create_range_from_last_token(), 423 | value, 424 | } 425 | } 426 | 427 | fn create_boolean_lit(context: &Context, value: bool) -> BooleanLit { 428 | BooleanLit { 429 | range: context.create_range_from_last_token(), 430 | value, 431 | } 432 | } 433 | 434 | fn create_number_lit<'a>(context: &Context<'a>, value: &'a str) -> NumberLit<'a> { 435 | NumberLit { 436 | range: context.create_range_from_last_token(), 437 | value, 438 | } 439 | } 440 | 441 | fn create_null_keyword(context: &Context) -> NullKeyword { 442 | NullKeyword { 443 | range: context.create_range_from_last_token(), 444 | } 445 | } 446 | 447 | #[cfg(test)] 448 | mod tests { 449 | use super::*; 450 | use pretty_assertions::assert_eq; 451 | 452 | #[test] 453 | fn it_should_error_when_has_multiple_values() { 454 | assert_has_error( 455 | "[][]", 456 | "Text cannot contain more than one JSON value on line 1 column 3", 457 | ); 458 | } 459 | 460 | #[test] 461 | fn it_should_error_when_object_is_not_terminated() { 462 | assert_has_error("{", "Unterminated object on line 1 column 1"); 463 | } 464 | 465 | #[test] 466 | fn it_should_error_when_object_has_unexpected_token() { 467 | assert_has_error("{ [] }", "Unexpected token in object on line 1 column 3"); 468 | } 469 | 470 | #[test] 471 | fn it_should_error_when_object_has_two_non_string_tokens() { 472 | assert_has_error( 473 | "{ asdf asdf: 5 }", 474 | "Expected colon after the string or word in object property on line 1 column 8", 475 | ); 476 | } 477 | 478 | #[test] 479 | fn it_should_error_when_array_is_not_terminated() { 480 | assert_has_error("[", "Unterminated array on line 1 column 1"); 481 | } 482 | 483 | #[test] 484 | fn it_should_error_when_array_has_unexpected_token() { 485 | assert_has_error("[:]", "Unexpected colon on line 1 column 2"); 486 | } 487 | 488 | #[test] 489 | fn it_should_error_when_comment_block_not_closed() { 490 | assert_has_error("/* test", "Unterminated comment block on line 1 column 1"); 491 | } 492 | 493 | #[test] 494 | fn it_should_error_when_string_lit_not_closed() { 495 | assert_has_error("\" test", "Unterminated string literal on line 1 column 1"); 496 | } 497 | 498 | fn assert_has_error(text: &str, message: &str) { 499 | let result = parse_to_ast(text, &Default::default(), &Default::default()); 500 | match result { 501 | Ok(_) => panic!("Expected error, but did not find one."), 502 | Err(err) => assert_eq!(err.to_string(), message), 503 | } 504 | } 505 | 506 | #[test] 507 | fn strict_should_error_object_trailing_comma() { 508 | assert_has_strict_error( 509 | r#"{ "test": 5, }"#, 510 | "Trailing commas are not allowed on line 1 column 12", 511 | ); 512 | } 513 | 514 | #[test] 515 | fn strict_should_error_array_trailing_comma() { 516 | assert_has_strict_error(r#"[ "test", ]"#, "Trailing commas are not allowed on line 1 column 9"); 517 | } 518 | 519 | #[test] 520 | fn strict_should_error_comment_line() { 521 | assert_has_strict_error(r#"[ "test" ] // 1"#, "Comments are not allowed on line 1 column 12"); 522 | } 523 | 524 | #[test] 525 | fn strict_should_error_comment_block() { 526 | assert_has_strict_error(r#"[ "test" /* 1 */]"#, "Comments are not allowed on line 1 column 10"); 527 | } 528 | 529 | #[test] 530 | fn strict_should_error_word_property() { 531 | assert_has_strict_error( 532 | r#"{ word: 5 }"#, 533 | "Expected string for object property on line 1 column 3", 534 | ); 535 | } 536 | 537 | #[test] 538 | fn strict_should_error_single_quoted_string() { 539 | assert_has_strict_error( 540 | r#"{ "key": 'value' }"#, 541 | "Single-quoted strings are not allowed on line 1 column 10", 542 | ); 543 | } 544 | 545 | #[test] 546 | fn strict_should_error_hexadecimal_number() { 547 | assert_has_strict_error( 548 | r#"{ "key": 0xFF }"#, 549 | "Hexadecimal numbers are not allowed on line 1 column 10", 550 | ); 551 | } 552 | 553 | #[test] 554 | fn strict_should_error_unary_plus_number() { 555 | assert_has_strict_error( 556 | r#"{ "key": +42 }"#, 557 | "Unary plus on numbers is not allowed on line 1 column 10", 558 | ); 559 | } 560 | 561 | #[track_caller] 562 | fn assert_has_strict_error(text: &str, message: &str) { 563 | let result = parse_to_ast( 564 | text, 565 | &Default::default(), 566 | &ParseOptions { 567 | allow_comments: false, 568 | allow_loose_object_property_names: false, 569 | allow_trailing_commas: false, 570 | allow_single_quoted_strings: false, 571 | allow_hexadecimal_numbers: false, 572 | allow_unary_plus_numbers: false, 573 | }, 574 | ); 575 | match result { 576 | Ok(_) => panic!("Expected error, but did not find one."), 577 | Err(err) => assert_eq!(err.to_string(), message), 578 | } 579 | } 580 | 581 | #[test] 582 | fn it_should_not_include_tokens_by_default() { 583 | let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap(); 584 | assert!(result.tokens.is_none()); 585 | } 586 | 587 | #[test] 588 | fn it_should_include_tokens_when_specified() { 589 | let result = parse_to_ast( 590 | "{}", 591 | &CollectOptions { 592 | tokens: true, 593 | ..Default::default() 594 | }, 595 | &Default::default(), 596 | ) 597 | .unwrap(); 598 | let tokens = result.tokens.unwrap(); 599 | assert_eq!(tokens.len(), 2); 600 | } 601 | 602 | #[test] 603 | fn it_should_not_include_comments_by_default() { 604 | let result = parse_to_ast("{}", &Default::default(), &Default::default()).unwrap(); 605 | assert!(result.comments.is_none()); 606 | } 607 | 608 | #[test] 609 | fn it_should_include_comments_when_specified() { 610 | let result = parse_to_ast( 611 | "{} // 2", 612 | &CollectOptions { 613 | comments: CommentCollectionStrategy::Separate, 614 | ..Default::default() 615 | }, 616 | &Default::default(), 617 | ) 618 | .unwrap(); 619 | let comments = result.comments.unwrap(); 620 | assert_eq!(comments.len(), 2); // for both positions, but it's the same comment 621 | } 622 | 623 | #[cfg(not(feature = "error_unicode_width"))] 624 | #[test] 625 | fn error_correct_line_column_unicode_width() { 626 | assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 9"); 627 | } 628 | 629 | #[cfg(feature = "error_unicode_width")] 630 | #[test] 631 | fn error_correct_line_column_unicode_width() { 632 | assert_has_strict_error(r#"["🧑‍🦰", ["#, "Unterminated array on line 1 column 10"); 633 | } 634 | 635 | #[test] 636 | fn it_should_parse_unquoted_keys_with_hex_and_trailing_comma() { 637 | let text = r#"{ 638 | CP_CanFuncReqId: 0x7DF, // 2015 639 | }"#; 640 | { 641 | let parse_result = parse_to_ast(text, &Default::default(), &Default::default()).unwrap(); 642 | 643 | let value = parse_result.value.unwrap(); 644 | let obj = value.as_object().unwrap(); 645 | assert_eq!(obj.properties.len(), 1); 646 | assert_eq!(obj.properties[0].name.as_str(), "CP_CanFuncReqId"); 647 | 648 | let number_value = obj.properties[0].value.as_number_lit().unwrap(); 649 | assert_eq!(number_value.value, "0x7DF"); 650 | } 651 | #[cfg(feature = "serde")] 652 | { 653 | let value = crate::parse_to_serde_value(text, &Default::default()).unwrap().unwrap(); 654 | // hexadecimal numbers are converted to decimal in serde output 655 | assert_eq!( 656 | value, 657 | serde_json::json!({ 658 | "CP_CanFuncReqId": 2015 659 | }) 660 | ); 661 | } 662 | } 663 | 664 | #[test] 665 | fn it_should_parse_unary_plus_numbers() { 666 | let result = parse_to_ast(r#"{ "test": +42 }"#, &Default::default(), &Default::default()).unwrap(); 667 | 668 | let value = result.value.unwrap(); 669 | let obj = value.as_object().unwrap(); 670 | assert_eq!(obj.properties.len(), 1); 671 | assert_eq!(obj.properties[0].name.as_str(), "test"); 672 | 673 | let number_value = obj.properties[0].value.as_number_lit().unwrap(); 674 | assert_eq!(number_value.value, "+42"); 675 | } 676 | } 677 | --------------------------------------------------------------------------------