├── .gitignore
├── test-data
    ├── analytics
    │   ├── _cargo6.toml
    │   ├── _cargo5.toml
    │   ├── _cargo9.toml
    │   ├── _cargo8.toml
    │   ├── _cargo11.toml
    │   ├── _cargo12.toml
    │   ├── _cargo4.toml
    │   ├── _cargo7.toml
    │   ├── _cargo10.toml
    │   ├── _cargo2.toml
    │   ├── _cargo3.toml
    │   └── _cargo1.toml
    ├── invalid
    │   ├── bare-key-3.toml
    │   ├── bare-key-1.toml
    │   ├── int-0-padded.toml
    │   ├── int-signed-bin.toml
    │   ├── int-signed-hex.toml
    │   ├── int-signed-oct.toml
    │   ├── taplo-invalid-float.toml
    │   ├── bare-key-2.toml
    │   ├── comment-control-1.toml
    │   ├── comment-control-2.toml
    │   ├── comment-control-3.toml
    │   ├── comment-control-4.toml
    │   ├── key-value-pair-1.toml
    │   ├── string-basic-control-1.toml
    │   ├── string-basic-control-2.toml
    │   ├── string-basic-control-3.toml
    │   ├── string-basic-control-4.toml
    │   ├── string-basic-unknown-escape.toml
    │   ├── string-literal-control-1.toml
    │   ├── string-literal-control-2.toml
    │   ├── string-literal-control-3.toml
    │   ├── string-literal-control-4.toml
    │   ├── taplo-invalid-array.toml
    │   ├── no-key-name.toml
    │   ├── taplo-invalid-array-comma-start.toml
    │   ├── taplo-table-before-array.toml
    │   ├── inline-table-trailing-comma.toml
    │   ├── string-basic-multiline-control-1.toml
    │   ├── string-basic-multiline-control-2.toml
    │   ├── string-basic-multiline-control-3.toml
    │   ├── string-basic-multiline-control-4.toml
    │   ├── string-basic-multiline-unknown-escape.toml
    │   ├── string-literal-multiline-control-1.toml
    │   ├── string-literal-multiline-control-2.toml
    │   ├── string-literal-multiline-control-3.toml
    │   ├── string-literal-multiline-control-4.toml
    │   ├── taplo-incomplete-inline-table.toml
    │   ├── string-basic-out-of-range-unicode-escape-1.toml
    │   ├── string-basic-out-of-range-unicode-escape-2.toml
    │   ├── key-value-pair-2.toml
    │   ├── multiple-key.toml
    │   ├── taplo-invalid-inline-table.toml
    │   ├── string-basic-multiline-invalid-backslash.toml
    │   ├── string-basic-multiline-out-of-range-unicode-escape-1.toml
    │   ├── string-basic-multiline-out-of-range-unicode-escape-2.toml
    │   ├── string-basic-multiline-quotes.toml
    │   ├── array-of-tables-1.toml
    │   ├── table-invalid-2.toml
    │   ├── inline-table-imutable-2.toml
    │   ├── taplo-duplicate-keys.toml
    │   ├── taplo-inner-key-conflict.toml
    │   ├── inline-table-imutable-1.toml
    │   ├── table-1.toml
    │   ├── table-3.toml
    │   ├── string-literal-multiline-quotes.toml
    │   ├── table-4.toml
    │   ├── table-2.toml
    │   ├── array-of-tables-2.toml
    │   ├── table-invalid-1.toml
    │   ├── multiple-dot-key.toml
    │   ├── table-invalid-3.toml
    │   ├── table-invalid-4.toml
    │   └── taplo-invalid-padding.toml
    ├── rewrite
    │   ├── table.toml
    │   ├── value.toml
    │   ├── table_expected.toml
    │   ├── value_expected.toml
    │   ├── multiple_expected.toml
    │   ├── key.toml
    │   ├── key_expected.toml
    │   ├── multiple.toml
    │   ├── nothing.toml
    │   └── nothing_expected.toml
    ├── README.md
    └── example.toml
├── rust-toolchain.toml
├── src
    ├── util
    │   ├── syntax.rs
    │   ├── mod.rs
    │   └── escape.rs
    ├── parser
    │   ├── macros.rs
    │   └── mod.rs
    ├── lib.rs
    ├── formatter
    │   ├── macros.rs
    │   └── mod.rs
    └── syntax.rs
├── Cargo.toml
├── LICENSE
├── tests
    └── formatter.rs
├── README.md
└── Cargo.lock


/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo6.toml:
--------------------------------------------------------------------------------
1 | lib. =
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/bare-key-3.toml:
--------------------------------------------------------------------------------
1 | barekey =


--------------------------------------------------------------------------------
/test-data/rewrite/table.toml:
--------------------------------------------------------------------------------
1 | value = 2
2 | 


--------------------------------------------------------------------------------
/test-data/rewrite/value.toml:
--------------------------------------------------------------------------------
1 | value = 2
2 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo5.toml:
--------------------------------------------------------------------------------
1 | lib.bench =
2 | 
3 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo9.toml:
--------------------------------------------------------------------------------
1 | [lib]
2 | bench = 


--------------------------------------------------------------------------------
/test-data/invalid/bare-key-1.toml:
--------------------------------------------------------------------------------
1 | bare!key = 123
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/int-0-padded.toml:
--------------------------------------------------------------------------------
1 | int = 0123
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/int-signed-bin.toml:
--------------------------------------------------------------------------------
1 | bin = +0b10
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/int-signed-hex.toml:
--------------------------------------------------------------------------------
1 | hex = +0xab
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/int-signed-oct.toml:
--------------------------------------------------------------------------------
1 | oct = +0o23
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-invalid-float.toml:
--------------------------------------------------------------------------------
1 | what = 1.


--------------------------------------------------------------------------------
/test-data/analytics/_cargo8.toml:
--------------------------------------------------------------------------------
1 | lib = 
2 | 
3 | b = a
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/bare-key-2.toml:
--------------------------------------------------------------------------------
1 | barekey
2 |    = 123
3 | 


--------------------------------------------------------------------------------
/test-data/invalid/comment-control-1.toml:
--------------------------------------------------------------------------------
1 | a = "null" #  
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/comment-control-2.toml:
--------------------------------------------------------------------------------
1 | a = "ctrl-P" # 
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/comment-control-3.toml:
--------------------------------------------------------------------------------
1 | a = "ctrl-_" # 
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/comment-control-4.toml:
--------------------------------------------------------------------------------
1 | a = "0x7f" # 
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/key-value-pair-1.toml:
--------------------------------------------------------------------------------
1 | key = # INVALID
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-control-1.toml:
--------------------------------------------------------------------------------
1 | a = "null "
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-control-2.toml:
--------------------------------------------------------------------------------
1 | a = "ctrl-P"
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-control-3.toml:
--------------------------------------------------------------------------------
1 | a = "ctrl-_"
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-control-4.toml:
--------------------------------------------------------------------------------
1 | a = "0x7f"
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-unknown-escape.toml:
--------------------------------------------------------------------------------
1 | a = "\@"
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-control-1.toml:
--------------------------------------------------------------------------------
1 | a = 'null '
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-control-2.toml:
--------------------------------------------------------------------------------
1 | a = 'null'
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-control-3.toml:
--------------------------------------------------------------------------------
1 | a = 'null'
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-control-4.toml:
--------------------------------------------------------------------------------
1 | a = 'null'
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-invalid-array.toml:
--------------------------------------------------------------------------------
1 | arr = ["value",,,,]


--------------------------------------------------------------------------------
/test-data/analytics/_cargo11.toml:
--------------------------------------------------------------------------------
1 | schema = { enabled = false
2 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo12.toml:
--------------------------------------------------------------------------------
1 | [table]
2 | table = { bool = fa }


--------------------------------------------------------------------------------
/test-data/invalid/no-key-name.toml:
--------------------------------------------------------------------------------
1 | = "no key name"  # INVALID
2 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo4.toml:
--------------------------------------------------------------------------------
1 | lib = 
2 | 
3 | [package]
4 | 
5 | asd =


--------------------------------------------------------------------------------
/test-data/analytics/_cargo7.toml:
--------------------------------------------------------------------------------
1 | [lib]
2 | bench =
3 | 
4 | stuff = { , }


--------------------------------------------------------------------------------
/test-data/invalid/taplo-invalid-array-comma-start.toml:
--------------------------------------------------------------------------------
1 | arr = [,"value"]


--------------------------------------------------------------------------------
/test-data/invalid/taplo-table-before-array.toml:
--------------------------------------------------------------------------------
1 | [foo.bar]
2 | [[foo]]
3 | 


--------------------------------------------------------------------------------
/test-data/invalid/inline-table-trailing-comma.toml:
--------------------------------------------------------------------------------
1 | abc = { abc = 123, }
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-control-1.toml:
--------------------------------------------------------------------------------
1 | a = """null """
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-control-2.toml:
--------------------------------------------------------------------------------
1 | a = """null"""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-control-3.toml:
--------------------------------------------------------------------------------
1 | a = """null"""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-control-4.toml:
--------------------------------------------------------------------------------
1 | a = """null"""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-unknown-escape.toml:
--------------------------------------------------------------------------------
1 | a = """\@"""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-multiline-control-1.toml:
--------------------------------------------------------------------------------
1 | a = '''null '''
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-multiline-control-2.toml:
--------------------------------------------------------------------------------
1 | a = '''null'''
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-multiline-control-3.toml:
--------------------------------------------------------------------------------
1 | a = '''null'''
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-multiline-control-4.toml:
--------------------------------------------------------------------------------
1 | a = '''null'''
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-incomplete-inline-table.toml:
--------------------------------------------------------------------------------
1 | schema = { enabled = false


--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | channel = "1.92.0"
3 | profile = "default"
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-out-of-range-unicode-escape-1.toml:
--------------------------------------------------------------------------------
1 | a = "\UFFFFFFFF"
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-out-of-range-unicode-escape-2.toml:
--------------------------------------------------------------------------------
1 | a = "\U00D80000"
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/key-value-pair-2.toml:
--------------------------------------------------------------------------------
1 | first = "Tom" last = "Preston-Werner" # INVALID
2 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo10.toml:
--------------------------------------------------------------------------------
1 | [features]
2 | asad = []
3 | 
4 | [lib]
5 | 
6 | asd = false
7 | 


--------------------------------------------------------------------------------
/test-data/invalid/multiple-key.toml:
--------------------------------------------------------------------------------
1 | # DO NOT DO THIS
2 | name = "Tom"
3 | name = "Pradyun"
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-invalid-inline-table.toml:
--------------------------------------------------------------------------------
1 | cooldowns = { 
2 |     aggressive = true, 
3 | }


--------------------------------------------------------------------------------
/test-data/rewrite/table_expected.toml:
--------------------------------------------------------------------------------
1 | [table]
2 | original_value = 2
3 | additional_value = 3
4 | 


--------------------------------------------------------------------------------
/test-data/rewrite/value_expected.toml:
--------------------------------------------------------------------------------
1 | value = { original_value = 2, additional_value = 3 }
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-invalid-backslash.toml:
--------------------------------------------------------------------------------
1 | a = """
2 |   foo \ \n
3 |   bar"""
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-out-of-range-unicode-escape-1.toml:
--------------------------------------------------------------------------------
1 | a = """\UFFFFFFFF"""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-out-of-range-unicode-escape-2.toml:
--------------------------------------------------------------------------------
1 | a = """\U00D80000"""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-basic-multiline-quotes.toml:
--------------------------------------------------------------------------------
1 | str5 = """Here are three quotation marks: """."""
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/array-of-tables-1.toml:
--------------------------------------------------------------------------------
1 | # INVALID TOML DOC
2 | fruit = []
3 | 
4 | [[fruit]] # Not allowed
5 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-invalid-2.toml:
--------------------------------------------------------------------------------
1 | # INVALID TOML DOC
2 | fruit = []
3 | 
4 | [[fruit]] # Not allowed
5 | 


--------------------------------------------------------------------------------
/test-data/invalid/inline-table-imutable-2.toml:
--------------------------------------------------------------------------------
1 | [product]
2 | type.name = "Nail"
3 | type = { edible = false }# INVALID
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-duplicate-keys.toml:
--------------------------------------------------------------------------------
1 | # THIS WILL NOT WORK
2 | spelling = "favorite"
3 | "spelling" = "favourite"
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-inner-key-conflict.toml:
--------------------------------------------------------------------------------
1 | package.something.else = 2
2 | 
3 | [package]
4 | something.other = 2
5 | 


--------------------------------------------------------------------------------
/test-data/invalid/inline-table-imutable-1.toml:
--------------------------------------------------------------------------------
1 | [product]
2 | type = { name = "Nail" } 
3 | type.edible = false # INVALID
4 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-1.toml:
--------------------------------------------------------------------------------
1 | # DO NOT DO THIS
2 | 
3 | [fruit]
4 | apple = "red"
5 | 
6 | [fruit]
7 | orange = "orange"
8 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-3.toml:
--------------------------------------------------------------------------------
1 | [fruit]
2 | apple.color = "red"
3 | apple.taste.sweet = true
4 | 
5 | [fruit.apple] # INVALID
6 | 


--------------------------------------------------------------------------------
/test-data/invalid/string-literal-multiline-quotes.toml:
--------------------------------------------------------------------------------
1 | apos15 = '''Here are fifteen apostrophes: ''''''''''''''''''  # INVALID
2 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-4.toml:
--------------------------------------------------------------------------------
1 | [fruit]
2 | apple.color = "red"
3 | apple.taste.sweet = true
4 | 
5 | [fruit.apple.taste] # INVALID
6 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-2.toml:
--------------------------------------------------------------------------------
1 | # DO NOT DO THIS EITHER
2 | 
3 | [fruit]
4 | apple = "red"
5 | 
6 | [fruit.apple]
7 | texture = "smooth"
8 | 


--------------------------------------------------------------------------------
/test-data/rewrite/multiple_expected.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | [table]
 4 | rewritten = 2
 5 | 
 6 | [[table.arr]]
 7 | rewrite_my_value = 0
 8 | 
 9 | [[table.arr]]
10 | rewrite_my_value = 1
11 | 


--------------------------------------------------------------------------------
/test-data/rewrite/key.toml:
--------------------------------------------------------------------------------
 1 | rewrite_me = 2
 2 | 
 3 | [table]
 4 | rewrite_me = 2
 5 | 
 6 | [table.subtable.rewrite_me]
 7 | val = 2
 8 | 
 9 | [[arr.rewrite_me]]
10 | [[arr.rewrite_me]]
11 | rewrite_me = "rewrite_me"
12 | [[arr.rewrite_me]]
13 | 


--------------------------------------------------------------------------------
/test-data/rewrite/key_expected.toml:
--------------------------------------------------------------------------------
 1 | rewritten = 2
 2 | 
 3 | [table]
 4 | rewritten = 2
 5 | 
 6 | [table.subtable.rewritten]
 7 | val = 2
 8 | 
 9 | [[arr.rewritten]]
10 | [[arr.rewritten]]
11 | rewritten = "rewrite_me"
12 | [[arr.rewritten]]
13 | 


--------------------------------------------------------------------------------
/test-data/invalid/array-of-tables-2.toml:
--------------------------------------------------------------------------------
 1 | # INVALID TOML DOC
 2 | [[fruit]]
 3 | name = "apple"
 4 | 
 5 | [[fruit.variety]]
 6 | name = "red delicious"
 7 | 
 8 | # This table conflicts with the previous table
 9 | [fruit.variety]
10 | name = "granny smith"
11 | 


--------------------------------------------------------------------------------
/test-data/rewrite/multiple.toml:
--------------------------------------------------------------------------------
 1 | [remove_this]
 2 | 
 3 | [remove_this.subtable]
 4 | value = "should be removed"
 5 | 
 6 | [table]
 7 | rewrite_me = 2
 8 | 
 9 | [[table.arr]]
10 | rewrite_my_value = 3
11 | 
12 | [[table.arr]]
13 | rewrite_my_value = 3
14 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo2.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | exclude = ["util/schema-gen", "util/test-gen"]
 3 | members = ["taplo-ide", "taplo", "lsp-async-stub"]
 4 | 
 5 | [profile.release]
 6 | codegen-units = 1
 7 | lto = true
 8 | opt-level = 3
 9 | 
10 | [profile.bench]
11 | lto = true
12 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-invalid-1.toml:
--------------------------------------------------------------------------------
1 | [fruit.physical] # subtable, but to which parent element should it belong?
2 | color = "red"
3 | shape = "round"
4 | 
5 | [[fruit]] # parser must throw an error upon discovering that "fruit" is
6 | # an array rather than a table
7 | name = "apple"
8 | 


--------------------------------------------------------------------------------
/test-data/invalid/multiple-dot-key.toml:
--------------------------------------------------------------------------------
1 | # THE FOLLOWING IS INVALID
2 | 
3 | # This defines the value of fruit.apple to be an integer.
4 | fruit.apple = 1
5 | 
6 | # But then this treats fruit.apple like it's a table.
7 | # You can't turn an integer into a table.
8 | fruit.apple.smooth = true
9 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo3.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | [workspace]
 5 | exclude = ["util/schema-gen", "util/test-gen"]
 6 | members = ["taplo-ide", "taplo", "lsp-async-stub"]
 7 | 
 8 | [profile.release]
 9 | codegen-units = 1
10 | lto = true
11 | opt-level = 3
12 | 
13 | [profile.bench]
14 | lto = true
15 | 
16 | # a comment
17 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-invalid-3.toml:
--------------------------------------------------------------------------------
 1 | # INVALID TOML DOC
 2 | [[fruit]]
 3 | name = "apple"
 4 | 
 5 | [[fruit.variety]]
 6 | name = "red delicious"
 7 | 
 8 | # INVALID: This table conflicts with the previous array of tables
 9 | [fruit.variety]
10 | name = "granny smith"
11 | 
12 | [fruit.physical]
13 | color = "red"
14 | shape = "round"
15 | 


--------------------------------------------------------------------------------
/test-data/invalid/table-invalid-4.toml:
--------------------------------------------------------------------------------
 1 | # INVALID TOML DOC
 2 | [[fruit]]
 3 | name = "apple"
 4 | 
 5 | [[fruit.variety]]
 6 | name = "red delicious"
 7 | 
 8 | [fruit.physical]
 9 | color = "red"
10 | shape = "round"
11 | 
12 | # INVALID: This array of tables conflicts with the previous table
13 | [[fruit.physical]]
14 | color = "green"
15 | 


--------------------------------------------------------------------------------
/src/util/syntax.rs:
--------------------------------------------------------------------------------
 1 | use rowan::{GreenNodeBuilder, NodeOrToken};
 2 | 
 3 | use crate::syntax::SyntaxNode;
 4 | 
 5 | pub fn add_all(node: SyntaxNode, builder: &mut GreenNodeBuilder) {
 6 |     builder.start_node(node.kind().into());
 7 | 
 8 |     for c in node.children_with_tokens() {
 9 |         match c {
10 |             NodeOrToken::Node(n) => add_all(n, builder),
11 |             NodeOrToken::Token(t) => builder.token(t.kind().into(), t.text()),
12 |         }
13 |     }
14 | 
15 |     builder.finish_node()
16 | }
17 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name         = "oxc-toml"
 3 | description  = "A TOML formatter library"
 4 | version      = "0.14.1"
 5 | categories   = ["parser-implementations", "parsing"]
 6 | keywords     = ["toml", "formatter"]
 7 | readme       = "README.md"
 8 | authors      = ["Boshen"]
 9 | edition      = "2024"
10 | license      = "MIT"
11 | homepage     = "https://github.com/oxc-project/oxc-toml"
12 | repository   = "https://github.com/oxc-project/oxc-toml"
13 | 
14 | [dependencies]
15 | rustc-hash = "2.1.0"
16 | itertools  = "0.14.0"
17 | logos      = "0.16.0"
18 | rowan      = "0.16.1"
19 | 


--------------------------------------------------------------------------------
/src/parser/macros.rs:
--------------------------------------------------------------------------------
 1 | macro_rules! with_node {
 2 |     ($builder:expr, $kind:ident, $($content:tt)*) => {
 3 |         {
 4 |             $builder.start_node($kind.into());
 5 |             let res = $($content)*;
 6 |             $builder.finish_node();
 7 |             res
 8 |         }
 9 |     };
10 | }
11 | 
12 | macro_rules! whitelisted {
13 |     ($self:expr, $kind:ident, $($content:tt)*) => {
14 |         {
15 |             $self.whitelist_token($kind);
16 |             let res = $($content)*;
17 |             $self.blacklist_token($kind);
18 |             res
19 |         }
20 |     };
21 | }
22 | 


--------------------------------------------------------------------------------
/test-data/invalid/taplo-invalid-padding.toml:
--------------------------------------------------------------------------------
 1 | [int]
 2 | padded_middle = 1__2
 3 | padded_start = _1_2
 4 | padded_end = 1_2_
 5 | 
 6 | padded_plus = +_2
 7 | padded_minus = -_2
 8 | 
 9 | [int.bin]
10 | padded_middle = 0b1__0
11 | padded_start = 0b_1_0
12 | padded_end = 0b1_0_
13 | 
14 | [int.hex]
15 | padded_middle = 0x1__0
16 | padded_start = 0x_1_0
17 | padded_end = 0x1_0_
18 | 
19 | [int.oct]
20 | padded_middle = 0o1__0
21 | padded_start = 0o_1_0
22 | padded_end = 0o1_0_
23 | 
24 | [float]
25 | padded_middle = 1__2.0
26 | padded_start = _1_2.0
27 | padded_end = 1_2_.0
28 | 
29 | padded_plus = +_2.0
30 | padded_minus = -_2.0


--------------------------------------------------------------------------------
/test-data/README.md:
--------------------------------------------------------------------------------
 1 | ## Source of most of the invalid/valid tests:
 2 | 
 3 | These are the spec tests for TOML used by @iarna/toml.
 4 | 
 5 | The errors folder contains TOML files that should cause a parser to report an error.
 6 | 
 7 | The values folder contains TOML files and paired YAML or JSON files.  The
 8 | YAML files should parse to a structure that's deeply equal to the TOML
 9 | structure.  The JSON files match the patterns found in [BurntSushi 0.4 TOML
10 | tests](https://github.com/BurntSushi/toml-test#json-encoding).
11 | 
12 | We introduce the following new types to match TOML 0.5.0:
13 | 
14 | * _datetime-local_ - A datetime without a timezone. Floating.
15 | * _date_ - A date without any time component
16 | * _time_ - A time without any date component
17 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::single_match)]
 2 | //! # About
 3 | //!
 4 | //! The main purpose of this library is to format TOML documents while preserving
 5 | //! the original layout, comments, and whitespace where appropriate.
 6 | //!
 7 | //! It uses [Rowan](::rowan) for the syntax tree, and every character is preserved from the input,
 8 | //! including all comments and white space.
 9 | //!
10 | //! # Usage
11 | //!
12 | //! A TOML document can be formatted directly using the [formatter::format] function:
13 | //!
14 | //! ```
15 | //! use oxc_toml::formatter::{format, Options};
16 | //!
17 | //! const SOURCE: &str = "value=1\n[table]\nstring='some string'";
18 | //!
19 | //! let formatted = format(SOURCE, Options::default());
20 | //! ```
21 | 
22 | pub mod formatter;
23 | pub mod parser;
24 | pub mod syntax;
25 | pub mod util;
26 | 
27 | pub use rowan;
28 | 
29 | pub type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
30 | pub type HashSet<V> = rustc_hash::FxHashSet<V>;
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Boshen (oxc-toml - Formatter-only fork)
 4 | Copyright (c) 2020 Ferenc Tamás (Original Taplo project)
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/tests/formatter.rs:
--------------------------------------------------------------------------------
 1 | use oxc_toml::formatter::{format, Options};
 2 | 
 3 | #[test]
 4 | fn test_basic_formatting() {
 5 |     const SOURCE: &str = "value=1\n[table]\nstring='some string'";
 6 |     let formatted = format(SOURCE, Options::default());
 7 |     
 8 |     // Should add spaces around =
 9 |     assert!(formatted.contains("value = 1"));
10 |     assert!(formatted.contains("string = 'some string'"));
11 | }
12 | 
13 | #[test]
14 | fn test_complex_toml() {
15 |     const SOURCE: &str = r#"
16 | [package]
17 | name="test"
18 | version="1.0.0"
19 | 
20 | [dependencies]
21 | foo="1.0"
22 | bar  =   "2.0"
23 | "#;
24 |     let formatted = format(SOURCE, Options::default());
25 |     
26 |     // Should normalize spacing
27 |     assert!(formatted.contains("name = \"test\""));
28 |     assert!(formatted.contains("version = \"1.0.0\""));
29 |     assert!(formatted.contains("foo = \"1.0\""));
30 |     assert!(formatted.contains("bar = \"2.0\""));
31 | }
32 | 
33 | #[test]
34 | fn test_formatter_preserves_comments() {
35 |     const SOURCE: &str = "# Comment\nvalue=1";
36 |     let formatted = format(SOURCE, Options::default());
37 |     
38 |     assert!(formatted.contains("# Comment"));
39 |     assert!(formatted.contains("value = 1"));
40 | }
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # oxc-toml
 2 | 
 3 | A TOML v1.0.0 formatter library.
 4 | 
 5 | This library provides TOML formatting capabilities while preserving comments, whitespace, and the original document structure where appropriate.
 6 | 
 7 | ## Features
 8 | 
 9 | - Format TOML documents according to configurable style options
10 | - Preserve comments and meaningful whitespace
11 | - Handle syntax errors gracefully
12 | - Fault-tolerant parsing using [Rowan](https://github.com/rust-analyzer/rowan) syntax trees
13 | 
14 | ## Usage
15 | 
16 | ```rust
17 | use oxc_toml::formatter::{format, Options};
18 | 
19 | const SOURCE: &str = "value=1\n[table]\nstring='some string'";
20 | 
21 | let formatted = format(SOURCE, Options::default());
22 | ```
23 | 
24 | ## Attribution
25 | 
26 | This project is a formatter-only fork of the excellent [Taplo](https://github.com/tamasfe/taplo) project, originally created by [Ferenc Tamás](https://github.com/tamasfe).
27 | 
28 | ### What Changed
29 | 
30 | This fork strips away all non-formatter components from Taplo, including:
31 | - CLI tool
32 | - Language Server Protocol (LSP) implementation
33 | - WebAssembly bindings
34 | - DOM (Document Object Model) for TOML manipulation
35 | - JavaScript/TypeScript packages
36 | - Editor integrations
37 | 
38 | The result is a focused, lightweight library that does one thing well: format TOML documents.
39 | 
40 | ### Original Taplo Project
41 | 
42 | Taplo is a comprehensive TOML toolkit that provides:
43 | - TOML v1.0.0 parser
44 | - Formatter (the foundation of this fork)
45 | - Language server for IDE integration
46 | - CLI tool for formatting and validation
47 | - WebAssembly bindings for browser/Node.js
48 | - Schema validation
49 | 
50 | If you need these features, please use the original [Taplo project](https://github.com/tamasfe/taplo).
51 | 
52 | ### License
53 | 
54 | This project maintains the original MIT License from Taplo. See [LICENSE](LICENSE) for details.
55 | 


--------------------------------------------------------------------------------
/test-data/analytics/_cargo1.toml:
--------------------------------------------------------------------------------
 1 | [some.package]
 2 | authors = ["tamasf97 <tamasf97@outlook.com>"]
 3 | edition = "2018"
 4 | name    = "taplo-ide"
 5 | version = "0.1.0"
 6 | 
 7 | [lib]
 8 | crate-type = ["cdylib", "rlib"]
 9 | 
10 | [features]
11 | default = ["console_error_panic_hook"]
12 | 
13 | [dependencies]
14 | async-trait    = "0.1.30"
15 | futures        = "0.3.5"
16 | indexmap       = "1.4.0"
17 | js-sys         = "0.3.39"
18 | lsp-async-stub = { path = "../lsp-async-stub" }
19 | lsp-types      = { version = "0.74.1", features = ["proposed"] }
20 | once_cell      = "1.3.1"
21 | regex          = "1.3"
22 | # reqwest              = "0.10.6"
23 | rowan                = "0.10.0"
24 | schemars             = "0.8.0-alpha-4"
25 | serde                = { version = "1.0", features = ["derive"] }
26 | serde_json           = "1.0"
27 | serde_yaml           = "0.8"
28 | taplo                = { path = "../taplo", features = ["serde", "verify"] }
29 | verify               = { version = "0.3", features = ["schemars"] }
30 | wasm-bindgen         = { version = "^0.2", features = ["serde-serialize"] }
31 | wasm-bindgen-futures = "0.4.12"
32 | 
33 | # The `console_error_panic_hook` crate provides better debugging of panics by
34 | # logging them with `console.error`. This is great for development, but requires
35 | # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
36 | # code size when deploying.
37 | console_error_panic_hook = { version = "0.1.1", optional = true }
38 | 
39 | # `wee_alloc` is a tiny allocator for wasm that is only ~1K in code size
40 | # compared to the default allocator's ~10K. It is slower than the default
41 | # allocator, however.
42 | #
43 | # Unfortunately, `wee_alloc` requires nightly Rust when targeting wasm for now.
44 | wee_alloc = { version = "0.4.2", optional = true }
45 | 
46 | [dev-dependencies]
47 | wasm-bindgen-test = "0.2"
48 | stuff.
49 | []
50 | 
51 | asd.bsd
52 | 
53 | [lib.]
54 | [some.lib.]
55 | 
56 | [[test]]
57 | 
58 | [[test]]
59 | 
60 | thing.


--------------------------------------------------------------------------------
/src/formatter/macros.rs:
--------------------------------------------------------------------------------
 1 | macro_rules! create_options {
 2 |     (
 3 |         $(#[$attr:meta])*
 4 |         pub struct Options {
 5 |             $(
 6 |                 $(#[$field_attr:meta])*
 7 |                 pub $name:ident: $ty:ty,
 8 |             )+
 9 |         }
10 |     ) => {
11 |         $(#[$attr])*
12 |         pub struct Options {
13 |             $(
14 |                 $(#[$field_attr])*
15 |                 pub $name: $ty,
16 |             )+
17 |         }
18 | 
19 |         impl Options {
20 |             pub fn update(&mut self, incomplete: OptionsIncomplete) {
21 |                 $(
22 |                     if let Some(v) = incomplete.$name {
23 |                         self.$name = v;
24 |                     }
25 |                 )+
26 |             }
27 | 
28 |             pub fn update_from_str<S: AsRef<str>, I: Iterator<Item = (S, S)>>(
29 |                 &mut self,
30 |                 values: I,
31 |             ) -> Result<(), OptionParseError> {
32 |                 for (key, val) in values {
33 | 
34 |                     $(
35 |                         if key.as_ref() == stringify!($name) {
36 |                             self.$name =
37 |                                 val.as_ref()
38 |                                     .parse()
39 |                                     .map_err(|error| OptionParseError::InvalidValue {
40 |                                         key: key.as_ref().into(),
41 |                                         error: Box::new(error),
42 |                                     })?;
43 | 
44 |                             continue;
45 |                         }
46 |                     )+
47 | 
48 |                     return Err(OptionParseError::InvalidOption(key.as_ref().into()));
49 |                 }
50 | 
51 |                 Ok(())
52 |             }
53 |         }
54 | 
55 |         $(#[$attr])*
56 |         #[derive(Default)]
57 |         pub struct OptionsIncomplete {
58 |             $(
59 |                 $(#[$field_attr])*
60 |                 pub $name: Option<$ty>,
61 |             )+
62 |         }
63 | 
64 |         impl OptionsIncomplete {
65 |             pub fn from_options(opts: Options) -> Self {
66 |                 let mut o = Self::default();
67 | 
68 |                 $(
69 |                     o.$name = Some(opts.$name);
70 |                 )+
71 | 
72 |                 o
73 |             }
74 |         }
75 |     };
76 | }
77 | 


--------------------------------------------------------------------------------
/src/util/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::syntax::{SyntaxElement, SyntaxKind, SyntaxNode};
  2 | use rowan::TextRange;
  3 | use rowan::TextSize;
  4 | 
  5 | mod escape;
  6 | pub mod syntax;
  7 | 
  8 | pub use escape::check_escape;
  9 | pub use escape::{escape, unescape};
 10 | 
 11 | pub(crate) mod allowed_chars {
 12 |     pub(crate) fn comment(s: &str) -> Result<(), Vec<usize>> {
 13 |         let mut err_indices = Vec::new();
 14 | 
 15 |         for (i, c) in s.chars().enumerate() {
 16 |             if c != '\t' && c.is_control() {
 17 |                 err_indices.push(i);
 18 |             }
 19 |         }
 20 | 
 21 |         if err_indices.is_empty() {
 22 |             Ok(())
 23 |         } else {
 24 |             Err(err_indices)
 25 |         }
 26 |     }
 27 | 
 28 |     pub(crate) fn string(s: &str) -> Result<(), Vec<usize>> {
 29 |         let mut err_indices = Vec::new();
 30 | 
 31 |         for (i, c) in s.chars().enumerate() {
 32 |             if c != '\t'
 33 |                 && (('\u{0000}'..='\u{0008}').contains(&c)
 34 |                     || ('\u{000A}'..='\u{001F}').contains(&c)
 35 |                     || c == '\u{007F}')
 36 |             {
 37 |                 err_indices.push(i);
 38 |             }
 39 |         }
 40 | 
 41 |         if err_indices.is_empty() {
 42 |             Ok(())
 43 |         } else {
 44 |             Err(err_indices)
 45 |         }
 46 |     }
 47 | 
 48 |     pub(crate) fn multi_line_string(s: &str) -> Result<(), Vec<usize>> {
 49 |         let mut err_indices = Vec::new();
 50 | 
 51 |         for (i, c) in s.chars().enumerate() {
 52 |             if c != '\t'
 53 |                 && c != '\n'
 54 |                 && c != '\r'
 55 |                 && (('\u{0000}'..='\u{0008}').contains(&c)
 56 |                     || ('\u{000A}'..='\u{001F}').contains(&c)
 57 |                     || c == '\u{007F}')
 58 |             {
 59 |                 err_indices.push(i);
 60 |             }
 61 |         }
 62 | 
 63 |         if err_indices.is_empty() {
 64 |             Ok(())
 65 |         } else {
 66 |             Err(err_indices)
 67 |         }
 68 |     }
 69 | 
 70 |     pub(crate) fn string_literal(s: &str) -> Result<(), Vec<usize>> {
 71 |         let mut err_indices = Vec::new();
 72 | 
 73 |         for (i, c) in s.chars().enumerate() {
 74 |             if c != '\t' && c.is_control() {
 75 |                 err_indices.push(i);
 76 |             }
 77 |         }
 78 | 
 79 |         if err_indices.is_empty() {
 80 |             Ok(())
 81 |         } else {
 82 |             Err(err_indices)
 83 |         }
 84 |     }
 85 | 
 86 |     pub(crate) fn multi_line_string_literal(s: &str) -> Result<(), Vec<usize>> {
 87 |         let mut err_indices = Vec::new();
 88 | 
 89 |         for (i, c) in s.chars().enumerate() {
 90 |             if c != '\t' && c != '\n' && c != '\r' && c.is_control() {
 91 |                 err_indices.push(i);
 92 |             }
 93 |         }
 94 | 
 95 |         if err_indices.is_empty() {
 96 |             Ok(())
 97 |         } else {
 98 |             Err(err_indices)
 99 |         }
100 |     }
101 | }
102 | 
103 | pub trait StrExt {
104 |     fn strip_quotes(self) -> Self;
105 | }
106 | 
107 | impl StrExt for &str {
108 |     fn strip_quotes(self) -> Self {
109 |         if self.starts_with('\"') || self.starts_with('\'') {
110 |             &self[1..self.len() - 1]
111 |         } else {
112 |             self
113 |         }
114 |     }
115 | }
116 | 
117 | /// Utility extension methods for Syntax Nodes.
118 | pub trait SyntaxExt {
119 |     /// Return a syntax node that contains the given offset.
120 |     fn find_node(&self, offset: TextSize, inclusive: bool) -> Option<SyntaxNode>;
121 | 
122 |     /// Find the deepest node that contains the given offset.
123 |     fn find_node_deep(&self, offset: TextSize, inclusive: bool) -> Option<SyntaxNode> {
124 |         let mut node = self.find_node(offset, inclusive);
125 |         while let Some(n) = &node {
126 |             let new_node = n.find_node(offset, inclusive);
127 |             if new_node.is_some() {
128 |                 node = new_node;
129 |             } else {
130 |                 break;
131 |             }
132 |         }
133 | 
134 |         node
135 |     }
136 | 
137 |     /// Find a node or token by its kind.
138 |     fn find(&self, kind: SyntaxKind) -> Option<SyntaxElement>;
139 | }
140 | 
141 | impl SyntaxExt for SyntaxNode {
142 |     fn find_node(&self, offset: TextSize, inclusive: bool) -> Option<SyntaxNode> {
143 |         for d in self.descendants().skip(1) {
144 |             let range = d.text_range();
145 | 
146 |             if (inclusive && range.contains_inclusive(offset)) || range.contains(offset) {
147 |                 return Some(d);
148 |             }
149 |         }
150 | 
151 |         None
152 |     }
153 | 
154 |     fn find(&self, kind: SyntaxKind) -> Option<SyntaxElement> {
155 |         self.descendants_with_tokens().find(|d| d.kind() == kind)
156 |     }
157 | }
158 | 
159 | pub fn join_ranges<I: IntoIterator<Item = TextRange>>(ranges: I) -> TextRange {
160 |     ranges
161 |         .into_iter()
162 |         .fold(None, |ranges, range| match ranges {
163 |             Some(r) => Some(range.cover(r)),
164 |             None => Some(range),
165 |         })
166 |         .unwrap()
167 | }
168 | 
169 | pub fn try_join_ranges<I: IntoIterator<Item = TextRange>>(ranges: I) -> Option<TextRange> {
170 |     ranges.into_iter().fold(None, |ranges, range| match ranges {
171 |         Some(r) => Some(range.cover(r)),
172 |         None => Some(range),
173 |     })
174 | }
175 | 
176 | pub fn overlaps(range: TextRange, other: TextRange) -> bool {
177 |     range.contains_range(other)
178 |         || other.contains_range(range)
179 |         || range.contains(other.start())
180 |         || range.contains(other.end())
181 |         || other.contains(range.start())
182 |         || other.contains(range.end())
183 | }
184 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 4
  4 | 
  5 | [[package]]
  6 | name = "aho-corasick"
  7 | version = "1.1.4"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
 10 | dependencies = [
 11 |  "memchr",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "beef"
 16 | version = "0.5.2"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
 19 | 
 20 | [[package]]
 21 | name = "countme"
 22 | version = "3.0.1"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
 25 | 
 26 | [[package]]
 27 | name = "either"
 28 | version = "1.15.0"
 29 | source = "registry+https://github.com/rust-lang/crates.io-index"
 30 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 31 | 
 32 | [[package]]
 33 | name = "fnv"
 34 | version = "1.0.7"
 35 | source = "registry+https://github.com/rust-lang/crates.io-index"
 36 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 37 | 
 38 | [[package]]
 39 | name = "hashbrown"
 40 | version = "0.14.5"
 41 | source = "registry+https://github.com/rust-lang/crates.io-index"
 42 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 43 | 
 44 | [[package]]
 45 | name = "itertools"
 46 | version = "0.14.0"
 47 | source = "registry+https://github.com/rust-lang/crates.io-index"
 48 | checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
 49 | dependencies = [
 50 |  "either",
 51 | ]
 52 | 
 53 | [[package]]
 54 | name = "logos"
 55 | version = "0.16.0"
 56 | source = "registry+https://github.com/rust-lang/crates.io-index"
 57 | checksum = "a790d11254054e5dc83902dba85d253ff06ceb0cfafb12be8773435cb9dfb4f4"
 58 | dependencies = [
 59 |  "logos-derive",
 60 | ]
 61 | 
 62 | [[package]]
 63 | name = "logos-codegen"
 64 | version = "0.16.0"
 65 | source = "registry+https://github.com/rust-lang/crates.io-index"
 66 | checksum = "f60337c43a38313b58871f8d5d76872b8e17aa9d51fad494b5e76092c0ce05f5"
 67 | dependencies = [
 68 |  "beef",
 69 |  "fnv",
 70 |  "proc-macro2",
 71 |  "quote",
 72 |  "regex-automata",
 73 |  "regex-syntax",
 74 |  "rustc_version",
 75 |  "syn",
 76 | ]
 77 | 
 78 | [[package]]
 79 | name = "logos-derive"
 80 | version = "0.16.0"
 81 | source = "registry+https://github.com/rust-lang/crates.io-index"
 82 | checksum = "d151b2ae667f69e10b8738f5cac0c746faa22b2e15ea7e83b55476afec3767dc"
 83 | dependencies = [
 84 |  "logos-codegen",
 85 | ]
 86 | 
 87 | [[package]]
 88 | name = "memchr"
 89 | version = "2.7.6"
 90 | source = "registry+https://github.com/rust-lang/crates.io-index"
 91 | checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 92 | 
 93 | [[package]]
 94 | name = "oxc-toml"
 95 | version = "0.14.1"
 96 | dependencies = [
 97 |  "itertools",
 98 |  "logos",
 99 |  "rowan",
100 |  "rustc-hash 2.1.1",
101 | ]
102 | 
103 | [[package]]
104 | name = "proc-macro2"
105 | version = "1.0.103"
106 | source = "registry+https://github.com/rust-lang/crates.io-index"
107 | checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
108 | dependencies = [
109 |  "unicode-ident",
110 | ]
111 | 
112 | [[package]]
113 | name = "quote"
114 | version = "1.0.42"
115 | source = "registry+https://github.com/rust-lang/crates.io-index"
116 | checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
117 | dependencies = [
118 |  "proc-macro2",
119 | ]
120 | 
121 | [[package]]
122 | name = "regex-automata"
123 | version = "0.4.13"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
126 | dependencies = [
127 |  "aho-corasick",
128 |  "memchr",
129 |  "regex-syntax",
130 | ]
131 | 
132 | [[package]]
133 | name = "regex-syntax"
134 | version = "0.8.8"
135 | source = "registry+https://github.com/rust-lang/crates.io-index"
136 | checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
137 | 
138 | [[package]]
139 | name = "rowan"
140 | version = "0.16.1"
141 | source = "registry+https://github.com/rust-lang/crates.io-index"
142 | checksum = "417a3a9f582e349834051b8a10c8d71ca88da4211e4093528e36b9845f6b5f21"
143 | dependencies = [
144 |  "countme",
145 |  "hashbrown",
146 |  "rustc-hash 1.1.0",
147 |  "text-size",
148 | ]
149 | 
150 | [[package]]
151 | name = "rustc-hash"
152 | version = "1.1.0"
153 | source = "registry+https://github.com/rust-lang/crates.io-index"
154 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
155 | 
156 | [[package]]
157 | name = "rustc-hash"
158 | version = "2.1.1"
159 | source = "registry+https://github.com/rust-lang/crates.io-index"
160 | checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
161 | 
162 | [[package]]
163 | name = "rustc_version"
164 | version = "0.4.1"
165 | source = "registry+https://github.com/rust-lang/crates.io-index"
166 | checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
167 | dependencies = [
168 |  "semver",
169 | ]
170 | 
171 | [[package]]
172 | name = "semver"
173 | version = "1.0.27"
174 | source = "registry+https://github.com/rust-lang/crates.io-index"
175 | checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
176 | 
177 | [[package]]
178 | name = "syn"
179 | version = "2.0.111"
180 | source = "registry+https://github.com/rust-lang/crates.io-index"
181 | checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
182 | dependencies = [
183 |  "proc-macro2",
184 |  "quote",
185 |  "unicode-ident",
186 | ]
187 | 
188 | [[package]]
189 | name = "text-size"
190 | version = "1.1.1"
191 | source = "registry+https://github.com/rust-lang/crates.io-index"
192 | checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"
193 | 
194 | [[package]]
195 | name = "unicode-ident"
196 | version = "1.0.22"
197 | source = "registry+https://github.com/rust-lang/crates.io-index"
198 | checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
199 | 


--------------------------------------------------------------------------------
/src/util/escape.rs:
--------------------------------------------------------------------------------
  1 | use logos::{Lexer, Logos};
  2 | 
  3 | /// Escaping based on:
  4 | ///
  5 | /// \b         - backspace       (U+0008)
  6 | /// \t         - tab             (U+0009)
  7 | /// \n         - linefeed        (U+000A)
  8 | /// \f         - form feed       (U+000C)
  9 | /// \r         - carriage return (U+000D)
 10 | /// \"         - quote           (U+0022)
 11 | /// \\         - backslash       (U+005C)
 12 | /// \uXXXX     - unicode         (U+XXXX)
 13 | /// \UXXXXXXXX - unicode         (U+XXXXXXXX)
 14 | #[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 15 | pub enum Escape {
 16 |     #[token(r#"\b"#, priority = 5)]
 17 |     Backspace,
 18 | 
 19 |     #[token(r#"\t"#, priority = 5)]
 20 |     Tab,
 21 | 
 22 |     #[regex(r#"(\\\s*\n)|(\\\s*\r\n)"#, priority = 5)]
 23 |     Newline,
 24 | 
 25 |     #[token(r#"\n"#, priority = 5)]
 26 |     LineFeed,
 27 | 
 28 |     #[token(r#"\f"#, priority = 5)]
 29 |     FormFeed,
 30 | 
 31 |     #[token(r#"\r"#, priority = 5)]
 32 |     CarriageReturn,
 33 | 
 34 |     #[token(r#"\""#, priority = 5)]
 35 |     Quote,
 36 | 
 37 |     #[token(r#"\\"#, priority = 5)]
 38 |     Backslash,
 39 | 
 40 |     // Same thing repeated 4 times, but the {n} repetition syntax is not supported by Logos
 41 |     #[regex(r#"\\u[0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_]"#, priority = 5)]
 42 |     Unicode,
 43 | 
 44 |     // Same thing repeated 8 times, but the {n} repetition syntax is not supported by Logos
 45 |     #[regex(r#"\\U[0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_]"#, priority = 5)]
 46 |     UnicodeLarge,
 47 | 
 48 |     #[regex(r#"\\."#, priority = 4)]
 49 |     Unknown,
 50 | 
 51 |     UnEscaped,
 52 | }
 53 | use Escape::*;
 54 | 
 55 | /// Escape values in a given string.
 56 | pub fn escape(s: &str) -> String {
 57 |     let mut escaped = String::with_capacity(s.len());
 58 | 
 59 |     for c in s.chars() {
 60 |         match c {
 61 |             '\u{0008}' => escaped.push_str(r#"\b"#),
 62 |             '\u{0009}' => escaped.push_str(r#"\t"#),
 63 |             '\u{000A}' => escaped.push_str(r#"\n"#),
 64 |             '\u{000C}' => escaped.push_str(r#"\f"#),
 65 |             '\u{000D}' => escaped.push_str(r#"\r"#),
 66 |             '\u{0022}' => escaped.push_str(r#"\""#),
 67 |             '\u{005C}' => escaped.push_str(r#"\\"#),
 68 |             _ => {
 69 |                 escaped.push(c);
 70 |             }
 71 |         }
 72 |     }
 73 | 
 74 |     escaped
 75 | }
 76 | 
 77 | /// Unescape all supported sequences found in [Escape](Escape).
 78 | ///
 79 | /// If it fails, the index of failure is returned.
 80 | pub fn unescape(s: &str) -> Result<String, usize> {
 81 |     let mut new_s = String::with_capacity(s.len());
 82 |     let mut lexer: Lexer<Escape> = Lexer::new(s);
 83 | 
 84 |     while let Some(t) = lexer.next() {
 85 |         let t = t.unwrap_or(UnEscaped);
 86 |         match t {
 87 |             Backspace => new_s += "\u{0008}",
 88 |             Tab => new_s += "\u{0009}",
 89 |             LineFeed => new_s += "\u{000A}",
 90 |             FormFeed => new_s += "\u{000C}",
 91 |             CarriageReturn => new_s += "\u{000D}",
 92 |             Quote => new_s += "\u{0022}",
 93 |             Backslash => new_s += "\u{005C}",
 94 |             Newline => {}
 95 |             Unicode => {
 96 |                 new_s += &std::char::from_u32(
 97 |                     u32::from_str_radix(&lexer.slice()[2..], 16).map_err(|_| lexer.span().start)?,
 98 |                 )
 99 |                 .ok_or(lexer.span().start)?
100 |                 .to_string();
101 |             }
102 |             UnicodeLarge => {
103 |                 new_s += &std::char::from_u32(
104 |                     u32::from_str_radix(&lexer.slice()[2..], 16).map_err(|_| lexer.span().start)?,
105 |                 )
106 |                 .ok_or(lexer.span().start)?
107 |                 .to_string();
108 |             }
109 |             Unknown => return Err(lexer.span().end),
110 |             UnEscaped => {
111 |                 new_s += lexer.slice();
112 |             }
113 |         }
114 |     }
115 | 
116 |     Ok(new_s + lexer.remainder())
117 | }
118 | 
119 | /// Same as unescape, but doesn't create a new
120 | /// unescaped string, and returns all invalid escape indices.
121 | pub fn check_escape(s: &str) -> Result<(), Vec<usize>> {
122 |     let mut lexer: Lexer<Escape> = Lexer::new(s);
123 |     let mut invalid = Vec::new();
124 | 
125 |     while let Some(t) = lexer.next() {
126 |         let t = t.unwrap_or(UnEscaped);
127 |         match t {
128 |             Backspace => {}
129 |             Tab => {}
130 |             LineFeed => {}
131 |             FormFeed => {}
132 |             CarriageReturn => {}
133 |             Quote => {}
134 |             Backslash => {}
135 |             Newline => {}
136 |             Unicode => {
137 |                 let char_val = match u32::from_str_radix(&lexer.slice()[2..], 16) {
138 |                     Ok(v) => v,
139 |                     Err(_) => {
140 |                         invalid.push(lexer.span().start);
141 |                         continue;
142 |                     }
143 |                 };
144 | 
145 |                 match std::char::from_u32(char_val) {
146 |                     None => {
147 |                         invalid.push(lexer.span().start);
148 |                     }
149 |                     Some(_) => {}
150 |                 };
151 |             }
152 |             UnicodeLarge => {
153 |                 let char_val = match u32::from_str_radix(&lexer.slice()[2..], 16) {
154 |                     Ok(v) => v,
155 |                     Err(_) => {
156 |                         invalid.push(lexer.span().start);
157 |                         continue;
158 |                     }
159 |                 };
160 | 
161 |                 match std::char::from_u32(char_val) {
162 |                     None => {
163 |                         invalid.push(lexer.span().start);
164 |                     }
165 |                     Some(_) => {}
166 |                 };
167 |             }
168 |             Unknown => invalid.push(lexer.span().start),
169 |             UnEscaped => {}
170 |         }
171 |     }
172 | 
173 |     if invalid.is_empty() {
174 |         Ok(())
175 |     } else {
176 |         Err(invalid)
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/test-data/example.toml:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ## Comment
  3 | 
  4 | # Speak your mind with the hash symbol. They go from the symbol to the end of
  5 | # the line.
  6 | 
  7 | ################################################################################
  8 | ## Table
  9 | 
 10 | # Tables (also known as hash tables or dictionaries) are collections of
 11 | # key/value pairs. They appear in square brackets on a line by themselves.
 12 | 
 13 | root_value = 2
 14 | 
 15 | [table]
 16 | 
 17 | key = "value" # Yeah, you can do this.
 18 | 
 19 | # Nested tables are denoted by table names with dots in them. Name your tables
 20 | # whatever crap you please, just don't use #, ., [ or ].
 21 | 
 22 | [table.subtable]
 23 | 
 24 | key = "another value"
 25 | 
 26 | # You don't need to specify all the super-tables if you don't want to. TOML
 27 | # knows how to do it for you.
 28 | 
 29 | # [x] you
 30 | # [x.y] don't
 31 | # [x.y.z] need these
 32 | [x.y.z.w] # for this to work
 33 | 
 34 | ################################################################################
 35 | ## Inline Table
 36 | 
 37 | # Inline tables provide a more compact syntax for expressing tables. They are
 38 | # especially useful for grouped data that can otherwise quickly become verbose.
 39 | # Inline tables are enclosed in curly braces `{` and `}`. No newlines are
 40 | # allowed between the curly braces unless they are valid within a value.
 41 | 
 42 | [table.inline]
 43 | 
 44 | name = { first = "Tom", last = "Preston-Werner" }
 45 | point = { x = 1, y = 2 }
 46 | 
 47 | ################################################################################
 48 | ## String
 49 | 
 50 | # There are four ways to express strings: basic, multi-line basic, literal, and
 51 | # multi-line literal. All strings must contain only valid UTF-8 characters.
 52 | 
 53 | [string.basic]
 54 | 
 55 | basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."
 56 | 
 57 | [string.multiline]
 58 | 
 59 | # The following strings are byte-for-byte equivalent:
 60 | key1 = "One\nTwo"
 61 | key2 = """One\nTwo"""
 62 | key3 = """
 63 | One
 64 | Two"""
 65 | 
 66 | [string.multiline.continued]
 67 | 
 68 | # The following strings are byte-for-byte equivalent:
 69 | key1 = "The quick brown fox jumps over the lazy dog."
 70 | 
 71 | key2 = """
 72 | The quick brown \
 73 | 
 74 | 
 75 |   fox jumps over \
 76 |     the lazy dog."""
 77 | 
 78 | key3 = """\
 79 |        The quick brown \
 80 |        fox jumps over \
 81 |        the lazy dog.\
 82 |        """
 83 | 
 84 | [string.literal]
 85 | 
 86 | # What you see is what you get.
 87 | quoted = 'Tom "Dubs" Preston-Werner'
 88 | regex = '<\i\c*\s*>'
 89 | winpath = 'C:\Users\nodejs\templates'
 90 | winpath2 = '\\ServerX\admin$\system32\'
 91 | 
 92 | [string.literal.multiline]
 93 | 
 94 | lines = '''
 95 | The first newline is
 96 | trimmed in raw strings.
 97 |    All other whitespace
 98 |    is preserved.
 99 | '''
100 | regex2 = '''I [dw]on't need \d{2} apples'''
101 | 
102 | ################################################################################
103 | ## Integer
104 | 
105 | # Integers are whole numbers. Positive numbers may be prefixed with a plus sign.
106 | # Negative numbers are prefixed with a minus sign.
107 | 
108 | [integer]
109 | 
110 | key1 = +99
111 | key2 = 42
112 | key3 = 0
113 | key4 = -17
114 | 
115 | [integer.underscores]
116 | 
117 | # For large numbers, you may use underscores to enhance readability. Each
118 | # underscore must be surrounded by at least one digit.
119 | key1 = 1_000
120 | key2 = 5_349_221
121 | key3 = 1_2_3_4_5 # valid but inadvisable
122 | 
123 | ################################################################################
124 | ## Float
125 | 
126 | # A float consists of an integer part (which may be prefixed with a plus or
127 | # minus sign) followed by a fractional part and/or an exponent part.
128 | 
129 | [float.fractional]
130 | 
131 | key1 = +1.0
132 | key2 = 3.1415
133 | key3 = -0.01
134 | 
135 | [float.exponent]
136 | 
137 | key1 = 5e+22
138 | key2 = 1e6
139 | key3 = -2E-2
140 | 
141 | [float.both]
142 | 
143 | key = 6.626e-34
144 | 
145 | [float.underscores]
146 | 
147 | # This file is used for benches and toml-rs doesn't yet support these:
148 | # key1 = 9_224_617.445_991_228_313
149 | # key2 = 1e1_000
150 | 
151 | ################################################################################
152 | ## Boolean
153 | 
154 | # Booleans are just the tokens you're used to. Always lowercase.
155 | 
156 | [boolean]
157 | 
158 | False = false
159 | True = true
160 | 
161 | ################################################################################
162 | ## Datetime
163 | 
164 | # Datetimes are RFC 3339 dates.
165 | 
166 | [datetime]
167 | 
168 | key1 = 1979-05-27T07:32:00Z
169 | key2 = 1979-05-27T00:32:00-07:00
170 | key3 = 1979-05-27T00:32:00.999999-07:00
171 | 
172 | ################################################################################
173 | ## Array
174 | 
175 | # Arrays are square brackets with other primitives inside. Whitespace is
176 | # ignored. Elements are separated by commas. Data types may not be mixed.
177 | 
178 | [array]
179 | 
180 | key1 = [1, 2, 3]
181 | key2 = ["red", "yellow", "green"]
182 | key3 = [[1, 2], [3, 4, 5]]
183 | key4 = [[1, 2], ["a", "b", "c"]]  # this is ok
184 | 
185 | # Arrays can also be multiline. So in addition to ignoring whitespace, arrays
186 | # also ignore newlines between the brackets.  Terminating commas are ok before
187 | # the closing bracket.
188 | 
189 | key5 = [1, 2, 3]
190 | key6 = [
191 |   1,
192 |   2, # this is ok
193 | ]
194 | 
195 | ################################################################################
196 | ## Array of Tables
197 | 
198 | # These can be expressed by using a table name in double brackets. Each table
199 | # with the same double bracketed name will be an element in the array. The
200 | # tables are inserted in the order encountered.
201 | 
202 | [[products]]
203 | 
204 | name = "Hammer"
205 | sku = 738594937
206 | 
207 | [[products]]
208 | 
209 | [[products]]
210 | 
211 | color = "gray"
212 | name = "Nail"
213 | sku = 284758393
214 | 
215 | # You can create nested arrays of tables as well.
216 | 
217 | [[fruit]]
218 | name = "apple"
219 | 
220 | [fruit.physical]
221 | color = "red"
222 | shape = "round"
223 | 
224 | [[fruit.variety]]
225 | name = "red delicious"
226 | 
227 | [[fruit.variety]]
228 | name = "granny smith"
229 | 
230 | [[fruit]]
231 | name = "banana"
232 | 
233 | [[fruit.variety]]
234 | name = "plantain"
235 | 


--------------------------------------------------------------------------------
/test-data/rewrite/nothing.toml:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ## Comment
  3 | 
  4 | # Speak your mind with the hash symbol. They go from the symbol to the end of
  5 | # the line.
  6 | 
  7 | ################################################################################
  8 | ## Table
  9 | 
 10 | # Tables (also known as hash tables or dictionaries) are collections of
 11 | # key/value pairs. They appear in square brackets on a line by themselves.
 12 | 
 13 | root_value = 2
 14 | 
 15 | [table]
 16 | 
 17 | key = "value" # Yeah, you can do this.
 18 | 
 19 | # Nested tables are denoted by table names with dots in them. Name your tables
 20 | # whatever crap you please, just don't use #, ., [ or ].
 21 | 
 22 | [table.subtable]
 23 | 
 24 | key = "another value"
 25 | 
 26 | # You don't need to specify all the super-tables if you don't want to. TOML
 27 | # knows how to do it for you.
 28 | 
 29 | # [x] you
 30 | # [x.y] don't
 31 | # [x.y.z] need these
 32 | [x.y.z.w] # for this to work
 33 | 
 34 | ################################################################################
 35 | ## Inline Table
 36 | 
 37 | # Inline tables provide a more compact syntax for expressing tables. They are
 38 | # especially useful for grouped data that can otherwise quickly become verbose.
 39 | # Inline tables are enclosed in curly braces `{` and `}`. No newlines are
 40 | # allowed between the curly braces unless they are valid within a value.
 41 | 
 42 | [table.inline]
 43 | 
 44 | name = { first = "Tom", last = "Preston-Werner" }
 45 | point = { x = 1, y = 2 }
 46 | 
 47 | ################################################################################
 48 | ## String
 49 | 
 50 | # There are four ways to express strings: basic, multi-line basic, literal, and
 51 | # multi-line literal. All strings must contain only valid UTF-8 characters.
 52 | 
 53 | [string.basic]
 54 | 
 55 | basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."
 56 | 
 57 | [string.multiline]
 58 | 
 59 | # The following strings are byte-for-byte equivalent:
 60 | key1 = "One\nTwo"
 61 | key2 = """One\nTwo"""
 62 | key3 = """
 63 | One
 64 | Two"""
 65 | 
 66 | [string.multiline.continued]
 67 | 
 68 | # The following strings are byte-for-byte equivalent:
 69 | key1 = "The quick brown fox jumps over the lazy dog."
 70 | 
 71 | key2 = """
 72 | The quick brown \
 73 | 
 74 | 
 75 |   fox jumps over \
 76 |     the lazy dog."""
 77 | 
 78 | key3 = """\
 79 |        The quick brown \
 80 |        fox jumps over \
 81 |        the lazy dog.\
 82 |        """
 83 | 
 84 | [string.literal]
 85 | 
 86 | # What you see is what you get.
 87 | quoted = 'Tom "Dubs" Preston-Werner'
 88 | regex = '<\i\c*\s*>'
 89 | winpath = 'C:\Users\nodejs\templates'
 90 | winpath2 = '\\ServerX\admin$\system32\'
 91 | 
 92 | [string.literal.multiline]
 93 | 
 94 | lines = '''
 95 | The first newline is
 96 | trimmed in raw strings.
 97 |    All other whitespace
 98 |    is preserved.
 99 | '''
100 | regex2 = '''I [dw]on't need \d{2} apples'''
101 | 
102 | ################################################################################
103 | ## Integer
104 | 
105 | # Integers are whole numbers. Positive numbers may be prefixed with a plus sign.
106 | # Negative numbers are prefixed with a minus sign.
107 | 
108 | [integer]
109 | 
110 | key1 = +99
111 | key2 = 42
112 | key3 = 0
113 | key4 = -17
114 | 
115 | [integer.underscores]
116 | 
117 | # For large numbers, you may use underscores to enhance readability. Each
118 | # underscore must be surrounded by at least one digit.
119 | key1 = 1_000
120 | key2 = 5_349_221
121 | key3 = 1_2_3_4_5 # valid but inadvisable
122 | 
123 | ################################################################################
124 | ## Float
125 | 
126 | # A float consists of an integer part (which may be prefixed with a plus or
127 | # minus sign) followed by a fractional part and/or an exponent part.
128 | 
129 | [float.fractional]
130 | 
131 | key1 = +1.0
132 | key2 = 3.1415
133 | key3 = -0.01
134 | 
135 | [float.exponent]
136 | 
137 | key1 = 5e+22
138 | key2 = 1e6
139 | key3 = -2E-2
140 | 
141 | [float.both]
142 | 
143 | key = 6.626e-34
144 | 
145 | [float.underscores]
146 | 
147 | # This file is used for benches and toml-rs doesn't yet support these:
148 | # key1 = 9_224_617.445_991_228_313
149 | # key2 = 1e1_000
150 | 
151 | ################################################################################
152 | ## Boolean
153 | 
154 | # Booleans are just the tokens you're used to. Always lowercase.
155 | 
156 | [boolean]
157 | 
158 | False = false
159 | True = true
160 | 
161 | ################################################################################
162 | ## Datetime
163 | 
164 | # Datetimes are RFC 3339 dates.
165 | 
166 | [datetime]
167 | 
168 | key1 = 1979-05-27T07:32:00Z
169 | key2 = 1979-05-27T00:32:00-07:00
170 | key3 = 1979-05-27T00:32:00.999999-07:00
171 | 
172 | ################################################################################
173 | ## Array
174 | 
175 | # Arrays are square brackets with other primitives inside. Whitespace is
176 | # ignored. Elements are separated by commas. Data types may not be mixed.
177 | 
178 | [array]
179 | 
180 | key1 = [1, 2, 3]
181 | key2 = ["red", "yellow", "green"]
182 | key3 = [[1, 2], [3, 4, 5]]
183 | key4 = [[1, 2], ["a", "b", "c"]]  # this is ok
184 | 
185 | # Arrays can also be multiline. So in addition to ignoring whitespace, arrays
186 | # also ignore newlines between the brackets.  Terminating commas are ok before
187 | # the closing bracket.
188 | 
189 | key5 = [1, 2, 3]
190 | key6 = [
191 |   1,
192 |   2, # this is ok
193 | ]
194 | 
195 | ################################################################################
196 | ## Array of Tables
197 | 
198 | # These can be expressed by using a table name in double brackets. Each table
199 | # with the same double bracketed name will be an element in the array. The
200 | # tables are inserted in the order encountered.
201 | 
202 | [[products]]
203 | 
204 | name = "Hammer"
205 | sku = 738594937
206 | 
207 | [[products]]
208 | 
209 | [[products]]
210 | 
211 | color = "gray"
212 | name = "Nail"
213 | sku = 284758393
214 | 
215 | # You can create nested arrays of tables as well.
216 | 
217 | [[fruit]]
218 | name = "apple"
219 | 
220 | [fruit.physical]
221 | color = "red"
222 | shape = "round"
223 | 
224 | [[fruit.variety]]
225 | name = "red delicious"
226 | 
227 | [[fruit.variety]]
228 | name = "granny smith"
229 | 
230 | [[fruit]]
231 | name = "banana"
232 | 
233 | [[fruit.variety]]
234 | name = "plantain"
235 | 


--------------------------------------------------------------------------------
/test-data/rewrite/nothing_expected.toml:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ## Comment
  3 | 
  4 | # Speak your mind with the hash symbol. They go from the symbol to the end of
  5 | # the line.
  6 | 
  7 | ################################################################################
  8 | ## Table
  9 | 
 10 | # Tables (also known as hash tables or dictionaries) are collections of
 11 | # key/value pairs. They appear in square brackets on a line by themselves.
 12 | 
 13 | root_value = 2
 14 | 
 15 | [table]
 16 | 
 17 | key = "value" # Yeah, you can do this.
 18 | 
 19 | # Nested tables are denoted by table names with dots in them. Name your tables
 20 | # whatever crap you please, just don't use #, ., [ or ].
 21 | 
 22 | [table.subtable]
 23 | 
 24 | key = "another value"
 25 | 
 26 | # You don't need to specify all the super-tables if you don't want to. TOML
 27 | # knows how to do it for you.
 28 | 
 29 | # [x] you
 30 | # [x.y] don't
 31 | # [x.y.z] need these
 32 | [x.y.z.w] # for this to work
 33 | 
 34 | ################################################################################
 35 | ## Inline Table
 36 | 
 37 | # Inline tables provide a more compact syntax for expressing tables. They are
 38 | # especially useful for grouped data that can otherwise quickly become verbose.
 39 | # Inline tables are enclosed in curly braces `{` and `}`. No newlines are
 40 | # allowed between the curly braces unless they are valid within a value.
 41 | 
 42 | [table.inline]
 43 | 
 44 | name = { first = "Tom", last = "Preston-Werner" }
 45 | point = { x = 1, y = 2 }
 46 | 
 47 | ################################################################################
 48 | ## String
 49 | 
 50 | # There are four ways to express strings: basic, multi-line basic, literal, and
 51 | # multi-line literal. All strings must contain only valid UTF-8 characters.
 52 | 
 53 | [string.basic]
 54 | 
 55 | basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."
 56 | 
 57 | [string.multiline]
 58 | 
 59 | # The following strings are byte-for-byte equivalent:
 60 | key1 = "One\nTwo"
 61 | key2 = """One\nTwo"""
 62 | key3 = """
 63 | One
 64 | Two"""
 65 | 
 66 | [string.multiline.continued]
 67 | 
 68 | # The following strings are byte-for-byte equivalent:
 69 | key1 = "The quick brown fox jumps over the lazy dog."
 70 | 
 71 | key2 = """
 72 | The quick brown \
 73 | 
 74 | 
 75 |   fox jumps over \
 76 |     the lazy dog."""
 77 | 
 78 | key3 = """\
 79 |        The quick brown \
 80 |        fox jumps over \
 81 |        the lazy dog.\
 82 |        """
 83 | 
 84 | [string.literal]
 85 | 
 86 | # What you see is what you get.
 87 | quoted = 'Tom "Dubs" Preston-Werner'
 88 | regex = '<\i\c*\s*>'
 89 | winpath = 'C:\Users\nodejs\templates'
 90 | winpath2 = '\\ServerX\admin$\system32\'
 91 | 
 92 | [string.literal.multiline]
 93 | 
 94 | lines = '''
 95 | The first newline is
 96 | trimmed in raw strings.
 97 |    All other whitespace
 98 |    is preserved.
 99 | '''
100 | regex2 = '''I [dw]on't need \d{2} apples'''
101 | 
102 | ################################################################################
103 | ## Integer
104 | 
105 | # Integers are whole numbers. Positive numbers may be prefixed with a plus sign.
106 | # Negative numbers are prefixed with a minus sign.
107 | 
108 | [integer]
109 | 
110 | key1 = +99
111 | key2 = 42
112 | key3 = 0
113 | key4 = -17
114 | 
115 | [integer.underscores]
116 | 
117 | # For large numbers, you may use underscores to enhance readability. Each
118 | # underscore must be surrounded by at least one digit.
119 | key1 = 1_000
120 | key2 = 5_349_221
121 | key3 = 1_2_3_4_5 # valid but inadvisable
122 | 
123 | ################################################################################
124 | ## Float
125 | 
126 | # A float consists of an integer part (which may be prefixed with a plus or
127 | # minus sign) followed by a fractional part and/or an exponent part.
128 | 
129 | [float.fractional]
130 | 
131 | key1 = +1.0
132 | key2 = 3.1415
133 | key3 = -0.01
134 | 
135 | [float.exponent]
136 | 
137 | key1 = 5e+22
138 | key2 = 1e6
139 | key3 = -2E-2
140 | 
141 | [float.both]
142 | 
143 | key = 6.626e-34
144 | 
145 | [float.underscores]
146 | 
147 | # This file is used for benches and toml-rs doesn't yet support these:
148 | # key1 = 9_224_617.445_991_228_313
149 | # key2 = 1e1_000
150 | 
151 | ################################################################################
152 | ## Boolean
153 | 
154 | # Booleans are just the tokens you're used to. Always lowercase.
155 | 
156 | [boolean]
157 | 
158 | False = false
159 | True = true
160 | 
161 | ################################################################################
162 | ## Datetime
163 | 
164 | # Datetimes are RFC 3339 dates.
165 | 
166 | [datetime]
167 | 
168 | key1 = 1979-05-27T07:32:00Z
169 | key2 = 1979-05-27T00:32:00-07:00
170 | key3 = 1979-05-27T00:32:00.999999-07:00
171 | 
172 | ################################################################################
173 | ## Array
174 | 
175 | # Arrays are square brackets with other primitives inside. Whitespace is
176 | # ignored. Elements are separated by commas. Data types may not be mixed.
177 | 
178 | [array]
179 | 
180 | key1 = [1, 2, 3]
181 | key2 = ["red", "yellow", "green"]
182 | key3 = [[1, 2], [3, 4, 5]]
183 | key4 = [[1, 2], ["a", "b", "c"]]  # this is ok
184 | 
185 | # Arrays can also be multiline. So in addition to ignoring whitespace, arrays
186 | # also ignore newlines between the brackets.  Terminating commas are ok before
187 | # the closing bracket.
188 | 
189 | key5 = [1, 2, 3]
190 | key6 = [
191 |   1,
192 |   2, # this is ok
193 | ]
194 | 
195 | ################################################################################
196 | ## Array of Tables
197 | 
198 | # These can be expressed by using a table name in double brackets. Each table
199 | # with the same double bracketed name will be an element in the array. The
200 | # tables are inserted in the order encountered.
201 | 
202 | [[products]]
203 | 
204 | name = "Hammer"
205 | sku = 738594937
206 | 
207 | [[products]]
208 | 
209 | [[products]]
210 | 
211 | color = "gray"
212 | name = "Nail"
213 | sku = 284758393
214 | 
215 | # You can create nested arrays of tables as well.
216 | 
217 | [[fruit]]
218 | name = "apple"
219 | 
220 | [fruit.physical]
221 | color = "red"
222 | shape = "round"
223 | 
224 | [[fruit.variety]]
225 | name = "red delicious"
226 | 
227 | [[fruit.variety]]
228 | name = "granny smith"
229 | 
230 | [[fruit]]
231 | name = "banana"
232 | 
233 | [[fruit.variety]]
234 | name = "plantain"
235 | 


--------------------------------------------------------------------------------
/src/syntax.rs:
--------------------------------------------------------------------------------
  1 | //! Declaration of the syntax tokens and lexer implementation.
  2 | 
  3 | #![allow(non_camel_case_types)]
  4 | 
  5 | use logos::{Lexer, Logos};
  6 | 
  7 | /// Enum containing all the tokens in a syntax tree.
  8 | #[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
  9 | #[repr(u16)]
 10 | pub enum SyntaxKind {
 11 |     #[regex(r"([ \t])+")]
 12 |     WHITESPACE = 0,
 13 | 
 14 |     #[regex(r"(\n|\r\n)+")]
 15 |     NEWLINE,
 16 | 
 17 |     #[regex(r"#[^\n\r]*", allow_greedy = true)]
 18 |     COMMENT,
 19 | 
 20 |     #[regex(r"[A-Za-z0-9_-]+", priority = 2)]
 21 |     IDENT,
 22 | 
 23 |     /// Not part of the regular TOML syntax, only used to allow
 24 |     /// glob patterns in keys.
 25 |     #[regex(r"[*?A-Za-z0-9_-]+", priority = 1)]
 26 |     IDENT_WITH_GLOB,
 27 | 
 28 |     #[token(".")]
 29 |     PERIOD,
 30 | 
 31 |     #[token(",")]
 32 |     COMMA,
 33 | 
 34 |     #[token("=")]
 35 |     EQ,
 36 | 
 37 |     #[regex(r#"""#, lex_string)]
 38 |     STRING,
 39 | 
 40 |     #[regex(r#"""""#, lex_multi_line_string)]
 41 |     MULTI_LINE_STRING,
 42 | 
 43 |     #[regex(r#"'"#, lex_string_literal)]
 44 |     STRING_LITERAL,
 45 | 
 46 |     #[regex(r#"'''"#, lex_multi_line_string_literal)]
 47 |     MULTI_LINE_STRING_LITERAL,
 48 | 
 49 |     #[regex(r"[+-]?[0-9_]+", priority = 4)]
 50 |     INTEGER,
 51 | 
 52 |     #[regex(r"0x[0-9A-Fa-f_]+")]
 53 |     INTEGER_HEX,
 54 | 
 55 |     #[regex(r"0o[0-7_]+")]
 56 |     INTEGER_OCT,
 57 | 
 58 |     #[regex(r"0b(0|1|_)+")]
 59 |     INTEGER_BIN,
 60 | 
 61 |     #[regex(r"[-+]?([0-9_]+(\.[0-9_]+)?([eE][+-]?[0-9_]+)?|nan|inf)", priority = 3)]
 62 |     FLOAT,
 63 | 
 64 |     #[regex(r"true|false")]
 65 |     BOOL,
 66 | 
 67 |     #[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)(?:T|t| )(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?(?:[Zz]|[+-][01]\d:[0-5]\d)"#)]
 68 |     DATE_TIME_OFFSET,
 69 | 
 70 |     #[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)(?:T|t| )(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?"#)]
 71 |     DATE_TIME_LOCAL,
 72 | 
 73 |     #[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)"#)]
 74 |     DATE,
 75 | 
 76 |     #[regex(r#"(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?"#)]
 77 |     TIME,
 78 | 
 79 |     #[token("[")]
 80 |     BRACKET_START,
 81 | 
 82 |     #[token("]")]
 83 |     BRACKET_END,
 84 | 
 85 |     #[token("{")]
 86 |     BRACE_START,
 87 | 
 88 |     #[token("}")]
 89 |     BRACE_END,
 90 | 
 91 |     ERROR,
 92 | 
 93 |     // composite types
 94 |     KEY,                // e.g.: parent.child
 95 |     VALUE,              // e.g.: "2"
 96 |     TABLE_HEADER,       // e.g.: [table]
 97 |     TABLE_ARRAY_HEADER, // e.g.: [[table]]
 98 |     ENTRY,              // e.g.: key = "value"
 99 |     ARRAY,              // e.g.: [ 1, 2 ]
100 |     INLINE_TABLE,       // e.g.: { key = "value" }
101 | 
102 |     ROOT, // root node
103 | }
104 | 
105 | impl From<SyntaxKind> for rowan::SyntaxKind {
106 |     fn from(kind: SyntaxKind) -> Self {
107 |         Self(kind as u16)
108 |     }
109 | }
110 | 
111 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
112 | pub enum Lang {}
113 | impl rowan::Language for Lang {
114 |     type Kind = SyntaxKind;
115 |     fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
116 |         assert!(raw.0 <= SyntaxKind::ROOT as u16);
117 |         unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
118 |     }
119 |     fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
120 |         kind.into()
121 |     }
122 | }
123 | 
124 | pub type SyntaxNode = rowan::SyntaxNode<Lang>;
125 | pub type SyntaxToken = rowan::SyntaxToken<Lang>;
126 | pub type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;
127 | 
128 | fn lex_string(lex: &mut Lexer<SyntaxKind>) -> bool {
129 |     let remainder: &str = lex.remainder();
130 |     let mut escaped = false;
131 | 
132 |     let mut total_len = 0;
133 | 
134 |     for c in remainder.chars() {
135 |         total_len += c.len_utf8();
136 | 
137 |         if c == '\\' {
138 |             escaped = !escaped;
139 |             continue;
140 |         }
141 | 
142 |         if c == '"' && !escaped {
143 |             lex.bump(total_len);
144 |             return true;
145 |         }
146 | 
147 |         escaped = false;
148 |     }
149 |     false
150 | }
151 | 
152 | fn lex_multi_line_string(lex: &mut Lexer<SyntaxKind>) -> bool {
153 |     let remainder: &str = lex.remainder();
154 | 
155 |     let mut total_len = 0;
156 |     let mut quote_count = 0;
157 | 
158 |     let mut escaped = false;
159 | 
160 |     // As the string can contain ",
161 |     // we can end up with more than 3 "-s at
162 |     // the end, in that case we need to include all
163 |     // in the string.
164 |     let mut quotes_found = false;
165 | 
166 |     for c in remainder.chars() {
167 |         if quotes_found {
168 |             if c != '"' {
169 |                 if quote_count >= 6 {
170 |                     return false;
171 |                 }
172 | 
173 |                 lex.bump(total_len);
174 |                 return true;
175 |             } else {
176 |                 quote_count += 1;
177 |                 total_len += c.len_utf8();
178 |                 continue;
179 |             }
180 |         }
181 |         total_len += c.len_utf8();
182 | 
183 |         if c == '\\' {
184 |             escaped = true;
185 |             continue;
186 |         }
187 | 
188 |         if c == '"' && !escaped {
189 |             quote_count += 1;
190 |         } else {
191 |             quote_count = 0;
192 |         }
193 | 
194 |         if quote_count == 3 {
195 |             quotes_found = true;
196 |         }
197 | 
198 |         escaped = false;
199 |     }
200 | 
201 |     // End of input
202 |     if quotes_found {
203 |         if quote_count >= 6 {
204 |             return false;
205 |         }
206 | 
207 |         lex.bump(total_len);
208 |         true
209 |     } else {
210 |         false
211 |     }
212 | }
213 | 
214 | fn lex_string_literal(lex: &mut Lexer<SyntaxKind>) -> bool {
215 |     let remainder: &str = lex.remainder();
216 |     let mut total_len = 0;
217 | 
218 |     for c in remainder.chars() {
219 |         total_len += c.len_utf8();
220 | 
221 |         if c == '\'' {
222 |             lex.bump(total_len);
223 |             return true;
224 |         }
225 |     }
226 |     false
227 | }
228 | 
229 | fn lex_multi_line_string_literal(lex: &mut Lexer<SyntaxKind>) -> bool {
230 |     let remainder: &str = lex.remainder();
231 | 
232 |     let mut total_len = 0;
233 |     let mut quote_count = 0;
234 | 
235 |     // As the string can contain ',
236 |     // we can end up with more than 3 '-s at
237 |     // the end, in that case we need to include all
238 |     // in the string.
239 |     let mut quotes_found = false;
240 | 
241 |     for c in remainder.chars() {
242 |         if quotes_found {
243 |             if c != '\'' {
244 |                 lex.bump(total_len);
245 |                 return true;
246 |             } else {
247 |                 if quote_count > 4 {
248 |                     return false;
249 |                 }
250 | 
251 |                 quote_count += 1;
252 |                 total_len += c.len_utf8();
253 |                 continue;
254 |             }
255 |         }
256 |         total_len += c.len_utf8();
257 | 
258 |         if c == '\'' {
259 |             quote_count += 1;
260 |         } else {
261 |             quote_count = 0;
262 |         }
263 | 
264 |         if quote_count == 3 {
265 |             quotes_found = true;
266 |         }
267 |     }
268 | 
269 |     // End of input
270 |     if quotes_found {
271 |         lex.bump(total_len);
272 |         true
273 |     } else {
274 |         false
275 |     }
276 | }
277 | 


--------------------------------------------------------------------------------
/src/parser/mod.rs:
--------------------------------------------------------------------------------
  1 | //! TOML document to syntax tree parsing.
  2 | 
  3 | use crate::{
  4 |     syntax::{SyntaxKind, SyntaxKind::*, SyntaxNode},
  5 |     util::{allowed_chars, check_escape},
  6 | };
  7 | use logos::{Lexer, Logos};
  8 | use rowan::{GreenNode, GreenNodeBuilder, TextRange, TextSize};
  9 | use std::convert::TryInto;
 10 | 
 11 | #[macro_use]
 12 | mod macros;
 13 | 
 14 | /// A syntax error that can occur during parsing.
 15 | #[derive(Debug, Clone, Eq, PartialEq, Hash)]
 16 | pub struct Error {
 17 |     /// The span of the error.
 18 |     pub range: TextRange,
 19 | 
 20 |     /// Human-friendly error message.
 21 |     pub message: String,
 22 | }
 23 | 
 24 | impl core::fmt::Display for Error {
 25 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 26 |         write!(f, "{} ({:?})", &self.message, &self.range)
 27 |     }
 28 | }
 29 | impl std::error::Error for Error {}
 30 | 
 31 | /// Parse a TOML document into a [Rowan green tree](rowan::GreenNode).
 32 | ///
 33 | /// The parsing will not stop at unexpected or invalid tokens.
 34 | /// Instead errors will be collected with their character offsets and lengths,
 35 | /// and the invalid token(s) will have the `ERROR` kind in the final tree.
 36 | ///
 37 | /// The parser will also validate comment and string contents, looking for
 38 | /// invalid escape sequences and invalid characters.
 39 | /// These will also be reported as syntax errors.
 40 | ///
 41 | /// This does not check for semantic errors such as duplicate keys.
 42 | pub fn parse(source: &str) -> Parse {
 43 |     Parser::new(source).parse()
 44 | }
 45 | 
 46 | /// A hand-written parser that uses the Logos lexer
 47 | /// to tokenize the source, then constructs
 48 | /// a Rowan green tree from them.
 49 | pub(crate) struct Parser<'p> {
 50 |     skip_whitespace: bool,
 51 |     // Allow glob patterns as keys and using [] instead of dots.
 52 |     key_pattern_syntax: bool,
 53 |     current_token: Option<SyntaxKind>,
 54 | 
 55 |     // These tokens are not consumed on errors.
 56 |     //
 57 |     // The syntax error is still reported,
 58 |     // but the the surrounding context can still
 59 |     // be parsed.
 60 |     // FIXME(bit_flags):
 61 |     //      This is VERY wrong, as the members of the
 62 |     //      enums are not proper bit flags.
 63 |     //
 64 |     //      However this incorrect behavior marks fewer tokens
 65 |     //      as errors making the parser more fault-tolerant.
 66 |     //      Instead of fixing this it would probably be better to
 67 |     //      remove the ERROR token altogether, or reserving it for
 68 |     //      special cases.
 69 |     error_whitelist: u16,
 70 | 
 71 |     lexer: Lexer<'p, SyntaxKind>,
 72 |     builder: GreenNodeBuilder<'p>,
 73 |     errors: Vec<Error>,
 74 | }
 75 | 
 76 | impl Parser<'_> {
 77 |     /// Required for patch syntax
 78 |     /// and key matches.
 79 |     ///
 80 |     /// It allows a part of glob syntax in identifiers as well.
 81 |     #[allow(dead_code)]
 82 |     pub(crate) fn parse_key_only(mut self) -> Parse {
 83 |         self.key_pattern_syntax = true;
 84 |         let _ = with_node!(self.builder, KEY, self.parse_key());
 85 | 
 86 |         Parse {
 87 |             green_node: self.builder.finish(),
 88 |             errors: self.errors,
 89 |         }
 90 |     }
 91 | }
 92 | 
 93 | /// This is just a convenience type during parsing.
 94 | /// It allows using "?", making the code cleaner.
 95 | type ParserResult<T> = Result<T, ()>;
 96 | 
 97 | // FIXME(recursion)
 98 | // Deeply nested structures cause stack overflow,
 99 | // this probably has to be rewritten into a state machine
100 | // that contains minimal function calls.
101 | impl<'p> Parser<'p> {
102 |     pub(crate) fn new(source: &'p str) -> Self {
103 |         Parser {
104 |             current_token: None,
105 |             skip_whitespace: true,
106 |             key_pattern_syntax: false,
107 |             error_whitelist: 0,
108 |             lexer: SyntaxKind::lexer(source),
109 |             builder: Default::default(),
110 |             errors: Default::default(),
111 |         }
112 |     }
113 | 
114 |     fn parse(mut self) -> Parse {
115 |         let _ = with_node!(self.builder, ROOT, self.parse_root());
116 | 
117 |         Parse {
118 |             green_node: self.builder.finish(),
119 |             errors: self.errors,
120 |         }
121 |     }
122 | 
123 |     fn error(&mut self, message: &str) -> ParserResult<()> {
124 |         let span = self.lexer.span();
125 | 
126 |         let err = Error {
127 |             range: TextRange::new(
128 |                 TextSize::from(span.start as u32),
129 |                 TextSize::from(span.end as u32),
130 |             ),
131 |             message: message.into(),
132 |         };
133 | 
134 |         let same_error = self
135 |             .errors
136 |             .last()
137 |             .map(|e| e.range == err.range)
138 |             .unwrap_or(false);
139 | 
140 |         if !same_error {
141 |             self.add_error(&Error {
142 |                 range: TextRange::new(
143 |                     TextSize::from(span.start as u32),
144 |                     TextSize::from(span.end as u32),
145 |                 ),
146 |                 message: message.into(),
147 |             });
148 |             if let Some(t) = self.current_token
149 |                 && !self.whitelisted(t) {
150 |                     self.token_as(ERROR).ok();
151 |                 }
152 |         } else {
153 |             self.token_as(ERROR).ok();
154 |         }
155 | 
156 |         Err(())
157 |     }
158 | 
159 |     // report error without consuming the current the token
160 |     fn report_error(&mut self, message: &str) -> ParserResult<()> {
161 |         let span = self.lexer.span();
162 |         self.add_error(&Error {
163 |             range: TextRange::new(
164 |                 TextSize::from(span.start as u32),
165 |                 TextSize::from(span.end as u32),
166 |             ),
167 |             message: message.into(),
168 |         });
169 |         Err(())
170 |     }
171 | 
172 |     fn add_error(&mut self, e: &Error) {
173 |         if let Some(last_err) = self.errors.last_mut()
174 |             && last_err == e {
175 |                 return;
176 |             }
177 | 
178 |         self.errors.push(e.clone());
179 |     }
180 | 
181 |     #[inline]
182 |     fn whitelist_token(&mut self, token: SyntaxKind) {
183 |         self.error_whitelist |= token as u16;
184 |     }
185 | 
186 |     #[inline]
187 |     fn blacklist_token(&mut self, token: SyntaxKind) {
188 |         self.error_whitelist &= !(token as u16);
189 |     }
190 | 
191 |     #[inline]
192 |     fn whitelisted(&self, token: SyntaxKind) -> bool {
193 |         self.error_whitelist & token as u16 != 0
194 |     }
195 | 
196 |     fn insert_token(&mut self, kind: SyntaxKind, s: &str) {
197 |         self.builder.token(kind.into(), s)
198 |     }
199 | 
200 |     fn must_token_or(&mut self, kind: SyntaxKind, message: &str) -> ParserResult<()> {
201 |         match self.get_token() {
202 |             Ok(t) => {
203 |                 if kind == t {
204 |                     self.token()
205 |                 } else {
206 |                     self.error(message)
207 |                 }
208 |             }
209 |             Err(_) => {
210 |                 self.add_error(&Error {
211 |                     range: TextRange::new(
212 |                         self.lexer.span().start.try_into().unwrap(),
213 |                         self.lexer.span().end.try_into().unwrap(),
214 |                     ),
215 |                     message: "unexpected EOF".into(),
216 |                 });
217 |                 Err(())
218 |             }
219 |         }
220 |     }
221 | 
222 |     // This is the same as `token` but won't consume trailing whitespace.
223 |     fn add_token(&mut self) -> ParserResult<()> {
224 |         match self.get_token() {
225 |             Err(_) => Err(()),
226 |             Ok(token) => {
227 |                 self.builder.token(token.into(), self.lexer.slice());
228 |                 self.current_token = None;
229 |                 Ok(())
230 |             }
231 |         }
232 |     }
233 | 
234 |     fn token(&mut self) -> ParserResult<()> {
235 |         match self.get_token() {
236 |             Err(_) => Err(()),
237 |             Ok(token) => self.token_as(token),
238 |         }
239 |     }
240 | 
241 |     /// This function implicitly calls `step`,
242 |     /// it was definitely not a good design decision
243 |     /// but changing this behaviour involves a
244 |     /// different syntax tree and breakages down the line.
245 |     fn token_as(&mut self, kind: SyntaxKind) -> ParserResult<()> {
246 |         self.token_as_no_step(kind)?;
247 |         self.step();
248 |         Ok(())
249 |     }
250 | 
251 |     fn token_as_no_step(&mut self, kind: SyntaxKind) -> ParserResult<()> {
252 |         match self.get_token() {
253 |             Err(_) => return Err(()),
254 |             Ok(_) => {
255 |                 self.builder.token(kind.into(), self.lexer.slice());
256 |             }
257 |         }
258 | 
259 |         Ok(())
260 |     }
261 | 
262 |     fn step(&mut self) {
263 |         self.current_token = None;
264 |         while let Some(token) = self.lexer.next() {
265 |             let token = token.unwrap_or(ERROR);
266 |             match token {
267 |                 COMMENT => {
268 |                     match allowed_chars::comment(self.lexer.slice()) {
269 |                         Ok(_) => {}
270 |                         Err(err_indices) => {
271 |                             for e in err_indices {
272 |                                 self.add_error(&Error {
273 |                                     range: TextRange::new(
274 |                                         (self.lexer.span().start + e).try_into().unwrap(),
275 |                                         (self.lexer.span().start + e).try_into().unwrap(),
276 |                                     ),
277 |                                     message: "invalid character in comment".into(),
278 |                                 });
279 |                             }
280 |                         }
281 |                     };
282 | 
283 |                     self.insert_token(token, self.lexer.slice());
284 |                 }
285 |                 WHITESPACE => {
286 |                     if self.skip_whitespace {
287 |                         self.insert_token(token, self.lexer.slice());
288 |                     } else {
289 |                         self.current_token = Some(token);
290 |                         break;
291 |                     }
292 |                 }
293 |                 ERROR => {
294 |                     self.insert_token(token, self.lexer.slice());
295 |                     let span = self.lexer.span();
296 |                     self.add_error(&Error {
297 |                         range: TextRange::new(
298 |                             span.start.try_into().unwrap(),
299 |                             span.end.try_into().unwrap(),
300 |                         ),
301 |                         message: "unexpected token".into(),
302 |                     })
303 |                 }
304 |                 _ => {
305 |                     self.current_token = Some(token);
306 |                     break;
307 |                 }
308 |             }
309 |         }
310 |     }
311 | 
312 |     fn get_token(&mut self) -> ParserResult<SyntaxKind> {
313 |         if self.current_token.is_none() {
314 |             self.step();
315 |         }
316 | 
317 |         self.current_token.ok_or(())
318 |     }
319 | 
320 |     fn parse_root(&mut self) -> ParserResult<()> {
321 |         // Ensure we have newlines between entries
322 |         let mut not_newline = false;
323 | 
324 |         // We want to make sure that an entry spans the
325 |         // entire line, so we start/close its node manually.
326 |         let mut entry_started = false;
327 | 
328 |         while let Ok(token) = self.get_token() {
329 |             match token {
330 |                 BRACKET_START => {
331 |                     if entry_started {
332 |                         self.builder.finish_node();
333 |                         entry_started = false;
334 |                     }
335 | 
336 |                     if not_newline {
337 |                         let _ = self.error("expected new line");
338 |                         continue;
339 |                     }
340 | 
341 |                     not_newline = true;
342 | 
343 |                     if self.lexer.remainder().starts_with('[') {
344 |                         let _ = whitelisted!(
345 |                             self,
346 |                             NEWLINE,
347 |                             with_node!(
348 |                                 self.builder,
349 |                                 TABLE_ARRAY_HEADER,
350 |                                 self.parse_table_array_header()
351 |                             )
352 |                         );
353 |                     } else {
354 |                         let _ = whitelisted!(
355 |                             self,
356 |                             NEWLINE,
357 |                             with_node!(self.builder, TABLE_HEADER, self.parse_table_header())
358 |                         );
359 |                     }
360 |                 }
361 |                 NEWLINE => {
362 |                     not_newline = false;
363 |                     if entry_started {
364 |                         self.builder.finish_node();
365 |                         entry_started = false;
366 |                     }
367 |                     let _ = self.token();
368 |                 }
369 |                 _ => {
370 |                     if not_newline {
371 |                         let _ = self.error("expected new line");
372 |                         continue;
373 |                     }
374 |                     if entry_started {
375 |                         self.builder.finish_node();
376 |                     }
377 |                     not_newline = true;
378 |                     self.builder.start_node(ENTRY.into());
379 |                     entry_started = true;
380 |                     let _ = whitelisted!(self, NEWLINE, self.parse_entry());
381 |                 }
382 |             }
383 |         }
384 |         if entry_started {
385 |             self.builder.finish_node();
386 |         }
387 | 
388 |         Ok(())
389 |     }
390 | 
391 |     fn parse_table_header(&mut self) -> ParserResult<()> {
392 |         self.must_token_or(BRACKET_START, r#"expected "[""#)?;
393 |         let _ = with_node!(self.builder, KEY, self.parse_key());
394 |         self.must_token_or(BRACKET_END, r#"expected "]""#)?;
395 | 
396 |         Ok(())
397 |     }
398 | 
399 |     fn parse_table_array_header(&mut self) -> ParserResult<()> {
400 |         self.skip_whitespace = false;
401 |         self.must_token_or(BRACKET_START, r#"expected "[[""#)?;
402 |         self.must_token_or(BRACKET_START, r#"expected "[[""#)?;
403 |         self.skip_whitespace = true;
404 |         let _ = with_node!(self.builder, KEY, self.parse_key());
405 |         self.skip_whitespace = false;
406 |         let _ = self.must_token_or(BRACKET_END, r#"expected "]]""#);
407 | 
408 |         // Hack in order to avoid calling `step` after
409 |         // the second closing bracket.
410 |         let token = self.get_token()?;
411 |         match token {
412 |             BRACKET_END => {
413 |                 self.token_as_no_step(token)?;
414 |             }
415 |             _ => {
416 |                 self.error(r#"expected "]]"#)?;
417 |             }
418 |         }
419 |         self.skip_whitespace = true;
420 | 
421 |         self.step();
422 | 
423 |         Ok(())
424 |     }
425 | 
426 |     fn parse_entry(&mut self) -> ParserResult<()> {
427 |         with_node!(self.builder, KEY, self.parse_key())?;
428 |         self.must_token_or(EQ, r#"expected "=""#)?;
429 |         with_node!(self.builder, VALUE, self.parse_value())?;
430 | 
431 |         Ok(())
432 |     }
433 | 
434 |     fn parse_key(&mut self) -> ParserResult<()> {
435 |         if self.parse_ident().is_err() {
436 |             return self.report_error("expected identifier");
437 |         }
438 | 
439 |         let mut after_period = false;
440 |         loop {
441 |             let t = match self.get_token() {
442 |                 Ok(token) => token,
443 |                 Err(_) => {
444 |                     if !after_period {
445 |                         return Ok(());
446 |                     }
447 |                     return self.error("unexpected end of input");
448 |                 }
449 |             };
450 | 
451 |             match t {
452 |                 PERIOD => {
453 |                     if after_period {
454 |                         return self.error(r#"unexpected ".""#);
455 |                     } else {
456 |                         self.token()?;
457 |                         after_period = true;
458 |                     }
459 |                 }
460 |                 BRACKET_START if self.key_pattern_syntax => {
461 |                     self.step();
462 | 
463 |                     match self.parse_ident() {
464 |                         Ok(_) => {}
465 |                         Err(_) => return self.error("expected identifier"),
466 |                     }
467 | 
468 |                     let token = self.get_token()?;
469 | 
470 |                     if !matches!(token, BRACKET_END) {
471 |                         self.error(r#"expected "]""#)?;
472 |                     }
473 |                     self.step();
474 |                     after_period = false;
475 |                 }
476 |                 _ => {
477 |                     if after_period {
478 |                         match self.parse_ident() {
479 |                             Ok(_) => {}
480 |                             Err(_) => return self.report_error("expected identifier"),
481 |                         }
482 |                         after_period = false;
483 |                     } else if self.key_pattern_syntax {
484 |                         return self.error("unexpected identifier");
485 |                     } else {
486 |                         break;
487 |                     }
488 |                 }
489 |             };
490 |         }
491 | 
492 |         Ok(())
493 |     }
494 | 
495 |     fn parse_ident(&mut self) -> ParserResult<()> {
496 |         let t = self.get_token()?;
497 |         match t {
498 |             IDENT => self.token(),
499 |             IDENT_WITH_GLOB => {
500 |                 if self.key_pattern_syntax {
501 |                     self.token_as(IDENT)
502 |                 } else {
503 |                     self.error("expected identifier")
504 |                 }
505 |             }
506 |             INTEGER_HEX | INTEGER_BIN | INTEGER_OCT => self.token_as(IDENT),
507 |             INTEGER => {
508 |                 if self.lexer.slice().starts_with('+') {
509 |                     Err(())
510 |                 } else {
511 |                     self.token_as(IDENT)
512 |                 }
513 |             }
514 |             STRING_LITERAL => {
515 |                 match allowed_chars::string_literal(self.lexer.slice()) {
516 |                     Ok(_) => {}
517 |                     Err(err_indices) => {
518 |                         for e in err_indices {
519 |                             self.add_error(&Error {
520 |                                 range: TextRange::new(
521 |                                     (self.lexer.span().start + e).try_into().unwrap(),
522 |                                     (self.lexer.span().start + e).try_into().unwrap(),
523 |                                 ),
524 |                                 message: "invalid control character in string literal".into(),
525 |                             });
526 |                         }
527 |                     }
528 |                 };
529 | 
530 |                 self.token_as(IDENT)
531 |             }
532 |             STRING => {
533 |                 match allowed_chars::string(self.lexer.slice()) {
534 |                     Ok(_) => {}
535 |                     Err(err_indices) => {
536 |                         for e in err_indices {
537 |                             self.add_error(&Error {
538 |                                 range: TextRange::new(
539 |                                     (self.lexer.span().start + e).try_into().unwrap(),
540 |                                     (self.lexer.span().start + e).try_into().unwrap(),
541 |                                 ),
542 |                                 message: "invalid character in string".into(),
543 |                             });
544 |                         }
545 |                     }
546 |                 };
547 | 
548 |                 match check_escape(self.lexer.slice()) {
549 |                     Ok(_) => self.token_as(IDENT),
550 |                     Err(err_indices) => {
551 |                         for e in err_indices {
552 |                             self.add_error(&Error {
553 |                                 range: TextRange::new(
554 |                                     (self.lexer.span().start + e).try_into().unwrap(),
555 |                                     (self.lexer.span().start + e).try_into().unwrap(),
556 |                                 ),
557 |                                 message: "invalid escape sequence".into(),
558 |                             });
559 |                         }
560 | 
561 |                         // We proceed normally even if
562 |                         // the string contains invalid escapes.
563 |                         // It shouldn't affect the rest of the parsing.
564 |                         self.token_as(IDENT)
565 |                     }
566 |                 }
567 |             }
568 |             FLOAT => {
569 |                 if self.lexer.slice().starts_with('0') {
570 |                     self.error("zero-padded numbers are not allowed")
571 |                 } else if self.lexer.slice().starts_with('+') {
572 |                     Err(())
573 |                 } else {
574 |                     for (i, s) in self.lexer.slice().split('.').enumerate() {
575 |                         if i != 0 {
576 |                             self.insert_token(PERIOD, ".");
577 |                         }
578 | 
579 |                         self.insert_token(IDENT, s);
580 |                     }
581 |                     self.step();
582 |                     Ok(())
583 |                 }
584 |             }
585 |             BOOL => self.token_as(IDENT),
586 |             DATE => self.token_as(IDENT),
587 |             _ => self.error("expected identifier"),
588 |         }
589 |     }
590 | 
591 |     fn parse_value(&mut self) -> ParserResult<()> {
592 |         let t = match self.get_token() {
593 |             Ok(t) => t,
594 |             Err(_) => return self.error("expected value"),
595 |         };
596 | 
597 |         match t {
598 |             BOOL | DATE_TIME_OFFSET | DATE_TIME_LOCAL | DATE | TIME => self.token(),
599 |             INTEGER => {
600 |                 // This is probably a logos bug or a priority issue,
601 |                 // for some reason "1979-05-27" gets lexed as INTEGER.
602 |                 if !self.lexer.slice().starts_with('-') && self.lexer.slice().contains('-') {
603 |                     return self.token_as(DATE);
604 |                 }
605 | 
606 |                 // FIXME: probably another logos bug.
607 |                 if self.lexer.slice().contains(':') {
608 |                     return self.token_as(TIME);
609 |                 }
610 | 
611 |                 // This could've been done more elegantly probably.
612 |                 if (self.lexer.slice().starts_with('0') && self.lexer.slice() != "0")
613 |                     || (self.lexer.slice().starts_with("+0") && self.lexer.slice() != "+0")
614 |                     || (self.lexer.slice().starts_with("-0") && self.lexer.slice() != "-0")
615 |                 {
616 |                     self.error("zero-padded integers are not allowed")
617 |                 } else if !check_underscores(self.lexer.slice(), 10) {
618 |                     self.error("invalid underscores")
619 |                 } else {
620 |                     self.token()
621 |                 }
622 |             }
623 |             INTEGER_BIN => {
624 |                 if !check_underscores(self.lexer.slice(), 2) {
625 |                     self.error("invalid underscores")
626 |                 } else {
627 |                     self.token()
628 |                 }
629 |             }
630 |             INTEGER_HEX => {
631 |                 if !check_underscores(self.lexer.slice(), 16) {
632 |                     self.error("invalid underscores")
633 |                 } else {
634 |                     self.token()
635 |                 }
636 |             }
637 |             INTEGER_OCT => {
638 |                 if !check_underscores(self.lexer.slice(), 8) {
639 |                     self.error("invalid underscores")
640 |                 } else {
641 |                     self.token()
642 |                 }
643 |             }
644 |             FLOAT => {
645 |                 // FIXME: probably another logos bug.
646 |                 if self.lexer.slice().contains(':') {
647 |                     return self.token_as(TIME);
648 |                 }
649 | 
650 |                 let int_slice = if self.lexer.slice().contains('.') {
651 |                     self.lexer.slice().split('.').next().unwrap()
652 |                 } else {
653 |                     self.lexer.slice().split('e').next().unwrap()
654 |                 };
655 | 
656 |                 if (int_slice.starts_with('0') && int_slice != "0")
657 |                     || (int_slice.starts_with("+0") && int_slice != "+0")
658 |                     || (int_slice.starts_with("-0") && int_slice != "-0")
659 |                 {
660 |                     self.error("zero-padded numbers are not allowed")
661 |                 } else if !check_underscores(self.lexer.slice(), 10) {
662 |                     self.error("invalid underscores")
663 |                 } else {
664 |                     self.token()
665 |                 }
666 |             }
667 |             STRING_LITERAL => {
668 |                 match allowed_chars::string_literal(self.lexer.slice()) {
669 |                     Ok(_) => {}
670 |                     Err(err_indices) => {
671 |                         for e in err_indices {
672 |                             self.add_error(&Error {
673 |                                 range: TextRange::new(
674 |                                     (self.lexer.span().start + e).try_into().unwrap(),
675 |                                     (self.lexer.span().start + e).try_into().unwrap(),
676 |                                 ),
677 |                                 message: "invalid control character in string literal".into(),
678 |                             });
679 |                         }
680 |                     }
681 |                 };
682 |                 self.token()
683 |             }
684 |             MULTI_LINE_STRING_LITERAL => {
685 |                 match allowed_chars::multi_line_string_literal(self.lexer.slice()) {
686 |                     Ok(_) => {}
687 |                     Err(err_indices) => {
688 |                         for e in err_indices {
689 |                             self.add_error(&Error {
690 |                                 range: TextRange::new(
691 |                                     (self.lexer.span().start + e).try_into().unwrap(),
692 |                                     (self.lexer.span().start + e).try_into().unwrap(),
693 |                                 ),
694 |                                 message: "invalid character in string".into(),
695 |                             });
696 |                         }
697 |                     }
698 |                 };
699 |                 self.token()
700 |             }
701 |             STRING => {
702 |                 match allowed_chars::string(self.lexer.slice()) {
703 |                     Ok(_) => {}
704 |                     Err(err_indices) => {
705 |                         for e in err_indices {
706 |                             self.add_error(&Error {
707 |                                 range: TextRange::new(
708 |                                     (self.lexer.span().start + e).try_into().unwrap(),
709 |                                     (self.lexer.span().start + e).try_into().unwrap(),
710 |                                 ),
711 |                                 message: "invalid character in string".into(),
712 |                             });
713 |                         }
714 |                     }
715 |                 };
716 | 
717 |                 match check_escape(self.lexer.slice()) {
718 |                     Ok(_) => self.token(),
719 |                     Err(err_indices) => {
720 |                         for e in err_indices {
721 |                             self.add_error(&Error {
722 |                                 range: TextRange::new(
723 |                                     (self.lexer.span().start + e).try_into().unwrap(),
724 |                                     (self.lexer.span().start + e).try_into().unwrap(),
725 |                                 ),
726 |                                 message: "invalid escape sequence".into(),
727 |                             });
728 |                         }
729 | 
730 |                         // We proceed normally even if
731 |                         // the string contains invalid escapes.
732 |                         // It shouldn't affect the rest of the parsing.
733 |                         self.token()
734 |                     }
735 |                 }
736 |             }
737 |             MULTI_LINE_STRING => {
738 |                 match allowed_chars::multi_line_string(self.lexer.slice()) {
739 |                     Ok(_) => {}
740 |                     Err(err_indices) => {
741 |                         for e in err_indices {
742 |                             self.add_error(&Error {
743 |                                 range: TextRange::new(
744 |                                     (self.lexer.span().start + e).try_into().unwrap(),
745 |                                     (self.lexer.span().start + e).try_into().unwrap(),
746 |                                 ),
747 |                                 message: "invalid character in string".into(),
748 |                             });
749 |                         }
750 |                     }
751 |                 };
752 | 
753 |                 match check_escape(self.lexer.slice()) {
754 |                     Ok(_) => self.token(),
755 |                     Err(err_indices) => {
756 |                         for e in err_indices {
757 |                             self.add_error(&Error {
758 |                                 range: TextRange::new(
759 |                                     (self.lexer.span().start + e).try_into().unwrap(),
760 |                                     (self.lexer.span().start + e).try_into().unwrap(),
761 |                                 ),
762 |                                 message: "invalid escape sequence".into(),
763 |                             });
764 |                         }
765 | 
766 |                         // We proceed normally even if
767 |                         // the string contains invalid escapes.
768 |                         // It shouldn't affect the rest of the parsing.
769 |                         self.token()
770 |                     }
771 |                 }
772 |             }
773 |             BRACKET_START => {
774 |                 with_node!(self.builder, ARRAY, self.parse_array())
775 |             }
776 |             BRACE_START => {
777 |                 with_node!(self.builder, INLINE_TABLE, self.parse_inline_table())
778 |             }
779 |             IDENT | BRACE_END => {
780 |                 // FIXME(bit_flags): This branch is just a workaround.
781 |                 self.report_error("expected value").ok();
782 |                 Ok(())
783 |             }
784 |             _ => self.error("expected value"),
785 |         }
786 |     }
787 | 
788 |     fn parse_inline_table(&mut self) -> ParserResult<()> {
789 |         self.must_token_or(BRACE_START, r#"expected "{""#)?;
790 | 
791 |         let mut first = true;
792 |         let mut comma_last = false;
793 |         let mut was_newline = false;
794 | 
795 |         loop {
796 |             let t = match self.get_token() {
797 |                 Ok(t) => t,
798 |                 Err(_) => return self.report_error(r#"expected "}""#),
799 |             };
800 | 
801 |             match t {
802 |                 BRACE_END => {
803 |                     if comma_last {
804 |                         // it is still reported as a syntax error,
805 |                         // but we can still analyze it as if it was a valid
806 |                         // table.
807 |                         let _ = self.report_error("expected value, trailing comma is not allowed");
808 |                     }
809 |                     break self.add_token()?;
810 |                 }
811 |                 NEWLINE => {
812 |                     // To avoid infinite loop in case
813 |                     // new lines are whitelisted.
814 |                     if was_newline {
815 |                         break;
816 |                     }
817 | 
818 |                     let _ = self.error("newline is not allowed in an inline table");
819 |                     was_newline = true;
820 |                 }
821 |                 COMMA => {
822 |                     if comma_last {
823 |                         let _ = self.report_error(r#"unexpected ",""#);
824 |                     }
825 | 
826 |                     if first {
827 |                         let _ = self.error(r#"unexpected ",""#);
828 |                     } else {
829 |                         self.token()?;
830 |                     }
831 |                     comma_last = true;
832 |                     was_newline = false;
833 |                 }
834 |                 _ => {
835 |                     was_newline = false;
836 |                     if !comma_last && !first {
837 |                         let _ = self.error(r#"expected ",""#);
838 |                     }
839 |                     let _ = whitelisted!(
840 |                         self,
841 |                         COMMA,
842 |                         with_node!(self.builder, ENTRY, self.parse_entry())
843 |                     );
844 |                     comma_last = false;
845 |                 }
846 |             }
847 | 
848 |             first = false;
849 |         }
850 |         Ok(())
851 |     }
852 | 
853 |     fn parse_array(&mut self) -> ParserResult<()> {
854 |         self.must_token_or(BRACKET_START, r#"expected "[""#)?;
855 | 
856 |         let mut first = true;
857 |         let mut comma_last = false;
858 |         loop {
859 |             let t = match self.get_token() {
860 |                 Ok(t) => t,
861 |                 Err(_) => {
862 |                     let _ = self.report_error("unexpected EOF");
863 |                     return Err(());
864 |                 }
865 |             };
866 | 
867 |             match t {
868 |                 BRACKET_END => break self.add_token()?,
869 |                 NEWLINE => {
870 |                     self.token()?;
871 |                     continue; // as if it wasn't there, so it doesn't count as a first token
872 |                 }
873 |                 COMMA => {
874 |                     if first || comma_last {
875 |                         let _ = self.error(r#"unexpected ",""#);
876 |                     }
877 |                     self.token()?;
878 |                     comma_last = true;
879 |                 }
880 |                 _ => {
881 |                     if !comma_last && !first {
882 |                         let _ = self.error(r#"expected ",""#);
883 |                     }
884 |                     let _ = whitelisted!(
885 |                         self,
886 |                         COMMA,
887 |                         with_node!(self.builder, VALUE, self.parse_value())
888 |                     );
889 |                     comma_last = false;
890 |                 }
891 |             }
892 | 
893 |             first = false;
894 |         }
895 |         Ok(())
896 |     }
897 | }
898 | 
899 | fn check_underscores(s: &str, radix: u32) -> bool {
900 |     if s.starts_with('_') || s.ends_with('_') {
901 |         return false;
902 |     }
903 | 
904 |     let mut last_char = 0 as char;
905 | 
906 |     for c in s.chars() {
907 |         if c == '_' && !last_char.is_digit(radix) {
908 |             return false;
909 |         }
910 |         if !c.is_digit(radix) && last_char == '_' {
911 |             return false;
912 |         }
913 |         last_char = c;
914 |     }
915 | 
916 |     true
917 | }
918 | 
919 | /// The final results of a parsing.
920 | /// It contains the green tree, and
921 | /// the errors that occurred during parsing.
922 | #[derive(Debug, Clone)]
923 | pub struct Parse {
924 |     pub green_node: GreenNode,
925 |     pub errors: Vec<Error>,
926 | }
927 | 
928 | impl Parse {
929 |     /// Turn the parse into a syntax node.
930 |     pub fn into_syntax(self) -> SyntaxNode {
931 |         SyntaxNode::new_root(self.green_node)
932 |     }
933 | }
934 | 


--------------------------------------------------------------------------------
/src/formatter/mod.rs:
--------------------------------------------------------------------------------
   1 | //! This module is used to format TOML.
   2 | //!
   3 | //! The formatting can be done on documents that might
   4 | //! contain invalid syntax. In that case the invalid part is skipped.
   5 | 
   6 | use crate::{
   7 |     syntax::{SyntaxElement, SyntaxKind::*, SyntaxNode, SyntaxToken},
   8 |     util::overlaps,
   9 | };
  10 | use itertools::Itertools;
  11 | use std::cell::OnceCell;
  12 | use rowan::{GreenNode, NodeOrToken, TextRange};
  13 | use std::{
  14 |     cmp,
  15 |     collections::VecDeque,
  16 |     ops::Range,
  17 |     rc::Rc,
  18 | };
  19 | 
  20 | #[macro_use]
  21 | mod macros;
  22 | 
  23 | /// Simplified Keys struct for tracking table paths (used for indentation)
  24 | #[derive(Debug, Clone, PartialEq, Eq)]
  25 | struct Keys {
  26 |     keys: Vec<String>,
  27 | }
  28 | 
  29 | impl Keys {
  30 |     fn from_syntax(syntax: SyntaxElement) -> Self {
  31 |         let mut keys = Vec::new();
  32 |         if let Some(node) = syntax.as_node() {
  33 |             for child in node.children_with_tokens() {
  34 |                 if child.kind() == IDENT {
  35 |                     keys.push(child.to_string());
  36 |                 }
  37 |             }
  38 |         }
  39 |         Self { keys }
  40 |     }
  41 | 
  42 |     /// Check if current key contains (is nested under) another key
  43 |     fn contains(&self, other: &Keys) -> bool {
  44 |         if other.keys.len() > self.keys.len() {
  45 |             return false;
  46 |         }
  47 |         self.keys.iter().zip(&other.keys).all(|(a, b)| a == b)
  48 |     }
  49 | }
  50 | 
  51 | create_options!(
  52 |     /// All the formatting options.
  53 |     #[derive(Debug, Clone, Eq, PartialEq)]
  54 |     pub struct Options {
  55 |         /// Align entries vertically.
  56 |         ///
  57 |         /// Entries that have table headers, comments,
  58 |         /// or blank lines between them are not aligned.
  59 |         pub align_entries: bool,
  60 | 
  61 |         /// Align consecutive comments after entries and items vertically.
  62 |         ///
  63 |         /// This applies to comments that are after entries or array items.
  64 |         pub align_comments: bool,
  65 | 
  66 |         /// If `align_comments` is true, apply the alignment in cases where
  67 |         /// there's only one comment.
  68 |         pub align_single_comments: bool,
  69 | 
  70 |         /// Put trailing commas for multiline
  71 |         /// arrays.
  72 |         pub array_trailing_comma: bool,
  73 | 
  74 |         /// Automatically expand arrays to multiple lines once they
  75 |         /// exceed the configured `column_width`.
  76 |         pub array_auto_expand: bool,
  77 | 
  78 |         /// Expand values (e.g.) inside inline tables
  79 |         /// where possible.
  80 |         pub inline_table_expand: bool,
  81 | 
  82 |         /// Automatically collapse arrays if they
  83 |         /// fit in one line.
  84 |         ///
  85 |         /// The array won't be collapsed if it
  86 |         /// contains a comment.
  87 |         pub array_auto_collapse: bool,
  88 | 
  89 |         /// Omit whitespace padding inside single-line arrays.
  90 |         pub compact_arrays: bool,
  91 | 
  92 |         /// Omit whitespace padding inside inline tables.
  93 |         pub compact_inline_tables: bool,
  94 | 
  95 |         /// Omit whitespace around `=`.
  96 |         pub compact_entries: bool,
  97 | 
  98 |         /// Target maximum column width after which
  99 |         /// arrays are expanded into new lines.
 100 |         ///
 101 |         /// This is best-effort and might not be accurate.
 102 |         pub column_width: usize,
 103 | 
 104 |         /// Indent subtables if they come in order.
 105 |         pub indent_tables: bool,
 106 | 
 107 |         /// Indent entries under tables.
 108 |         pub indent_entries: bool,
 109 | 
 110 |         /// Indentation to use, should be tabs or spaces
 111 |         /// but technically could be anything.
 112 |         pub indent_string: String,
 113 | 
 114 |         /// Add trailing newline to the source.
 115 |         pub trailing_newline: bool,
 116 | 
 117 |         /// Alphabetically reorder keys that are not separated by blank lines.
 118 |         pub reorder_keys: bool,
 119 | 
 120 |         /// Alphabetically reorder array values that are not separated by blank lines.
 121 |         pub reorder_arrays: bool,
 122 | 
 123 |         /// Alphabetically reorder inline table values.
 124 |         pub reorder_inline_tables: bool,
 125 | 
 126 |         /// The maximum amount of consecutive blank lines allowed.
 127 |         pub allowed_blank_lines: usize,
 128 | 
 129 |         /// Use CRLF line endings
 130 |         pub crlf: bool,
 131 |     }
 132 | );
 133 | 
 134 | #[derive(Debug)]
 135 | pub enum OptionParseError {
 136 |     InvalidOption(String),
 137 |     InvalidValue {
 138 |         key: String,
 139 |         error: Box<dyn std::error::Error + Send + Sync>,
 140 |     },
 141 | }
 142 | 
 143 | impl core::fmt::Display for OptionParseError {
 144 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 145 |         write!(
 146 |             f,
 147 |             "invalid formatting option: {}",
 148 |             match self {
 149 |                 OptionParseError::InvalidOption(k) => {
 150 |                     format!(r#"invalid option "{}""#, k)
 151 |                 }
 152 |                 OptionParseError::InvalidValue { key, error } => {
 153 |                     format!(r#"invalid value for option "{}": {}"#, key, error)
 154 |                 }
 155 |             }
 156 |         )
 157 |     }
 158 | }
 159 | 
 160 | impl std::error::Error for OptionParseError {}
 161 | 
 162 | impl Default for Options {
 163 |     fn default() -> Self {
 164 |         Options {
 165 |             align_entries: false,
 166 |             align_comments: true,
 167 |             align_single_comments: true,
 168 |             array_trailing_comma: true,
 169 |             array_auto_expand: true,
 170 |             array_auto_collapse: true,
 171 |             compact_arrays: true,
 172 |             compact_inline_tables: false,
 173 |             compact_entries: false,
 174 |             column_width: 80,
 175 |             indent_tables: false,
 176 |             indent_entries: false,
 177 |             inline_table_expand: true,
 178 |             trailing_newline: true,
 179 |             allowed_blank_lines: 2,
 180 |             indent_string: "  ".into(),
 181 |             reorder_keys: false,
 182 |             reorder_arrays: false,
 183 |             reorder_inline_tables: false,
 184 |             crlf: false,
 185 |         }
 186 |     }
 187 | }
 188 | 
 189 | impl Options {
 190 |     fn newline(&self) -> &'static str {
 191 |         if self.crlf {
 192 |             "\r\n"
 193 |         } else {
 194 |             "\n"
 195 |         }
 196 |     }
 197 | 
 198 |     fn newlines(&self, count: usize) -> impl Iterator<Item = &'static str> {
 199 |         std::iter::repeat_n(self.newline(), usize::min(count, self.allowed_blank_lines + 1))
 200 |     }
 201 | 
 202 |     fn should_align_comments(&self, comment_count: usize) -> bool {
 203 |         (comment_count != 1 || self.align_single_comments) && self.align_comments
 204 |     }
 205 | }
 206 | 
 207 | #[derive(Debug, Clone)]
 208 | struct Context {
 209 |     indent_level: usize,
 210 |     force_multiline: bool,
 211 |     errors: Rc<[TextRange]>,
 212 | }
 213 | 
 214 | impl Default for Context {
 215 |     fn default() -> Self {
 216 |         Self {
 217 |             indent_level: Default::default(),
 218 |             force_multiline: Default::default(),
 219 |             errors: Rc::from([]),
 220 |         }
 221 |     }
 222 | }
 223 | 
 224 | impl Context {
 225 |     /// Update options based on the text range.
 226 |     /// This is a no-op now that scoped options have been removed.
 227 |     fn update_options(&self, _opts: &mut Options, _range: TextRange) {
 228 |         // No-op: scoped options removed
 229 |     }
 230 | 
 231 |     fn error_at(&self, range: TextRange) -> bool {
 232 |         for error_range in self.errors.iter().copied() {
 233 |             if overlaps(range, error_range) {
 234 |                 return true;
 235 |             }
 236 |         }
 237 | 
 238 |         false
 239 |     }
 240 | 
 241 |     fn indent<'o>(&self, opts: &'o Options) -> impl Iterator<Item = &'o str> {
 242 |         std::iter::repeat_n(opts.indent_string.as_ref(), self.indent_level)
 243 |     }
 244 | }
 245 | 
 246 | /// Formats a parsed TOML green tree.
 247 | pub fn format_green(green: GreenNode, options: Options) -> String {
 248 |     format_syntax(SyntaxNode::new_root(green), options)
 249 | }
 250 | 
 251 | /// Parses then formats a TOML document, skipping ranges that contain syntax errors.
 252 | pub fn format(src: &str, options: Options) -> String {
 253 |     let p = crate::parser::parse(src);
 254 | 
 255 |     let ctx = Context {
 256 |         errors: p.errors.iter().map(|err| err.range).collect(),
 257 |         ..Context::default()
 258 |     };
 259 | 
 260 |     format_impl(p.into_syntax(), options, ctx)
 261 | }
 262 | 
 263 | /// Formats a parsed TOML syntax tree.
 264 | pub fn format_syntax(node: SyntaxNode, options: Options) -> String {
 265 |     let mut s = format_impl(node, options.clone(), Context::default());
 266 | 
 267 |     s = s.trim_end().into();
 268 | 
 269 |     if options.trailing_newline {
 270 |         s += options.newline();
 271 |     }
 272 | 
 273 |     s
 274 | }
 275 | 
 276 | fn format_impl(node: SyntaxNode, options: Options, context: Context) -> String {
 277 |     assert!(node.kind() == ROOT);
 278 |     let mut formatted = format_root(node, &options, &context);
 279 | 
 280 |     if formatted.ends_with("\r\n") {
 281 |         formatted.truncate(formatted.len() - 2);
 282 |     } else if formatted.ends_with('\n') {
 283 |         formatted.truncate(formatted.len() - 1);
 284 |     }
 285 | 
 286 |     if options.trailing_newline {
 287 |         formatted += options.newline();
 288 |     }
 289 | 
 290 |     formatted
 291 | }
 292 | 
 293 | struct FormattedEntry {
 294 |     syntax: SyntaxElement,
 295 |     key: String,
 296 |     /// This field is used to cache the "cleaned" version of the key and should only
 297 |     /// be accessed through the `cleaned_key` helpers method.
 298 |     cleaned_key: OnceCell<Vec<String>>,
 299 |     value: String,
 300 |     comment: Option<String>,
 301 | }
 302 | 
 303 | impl FormattedEntry {
 304 |     fn cleaned_key(&self) -> &Vec<String> {
 305 |         self.cleaned_key.get_or_init(|| {
 306 |             self.key
 307 |                 .replace(['\'', '"'], "")
 308 |                 .split('.')
 309 |                 .map(ToOwned::to_owned)
 310 |                 .collect()
 311 |         })
 312 |     }
 313 | }
 314 | 
 315 | impl PartialEq for FormattedEntry {
 316 |     fn eq(&self, other: &Self) -> bool {
 317 |         self.cleaned_key().eq(other.cleaned_key())
 318 |     }
 319 | }
 320 | 
 321 | impl Eq for FormattedEntry {}
 322 | 
 323 | impl PartialOrd for FormattedEntry {
 324 |     fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
 325 |         Some(self.cmp(other))
 326 |     }
 327 | }
 328 | 
 329 | impl Ord for FormattedEntry {
 330 |     fn cmp(&self, other: &Self) -> cmp::Ordering {
 331 |         self.cleaned_key().cmp(other.cleaned_key())
 332 |     }
 333 | }
 334 | 
 335 | impl FormattedItem for FormattedEntry {
 336 |     fn write_to(&self, formatted: &mut String, options: &Options) {
 337 |         *formatted += &self.key;
 338 |         if options.compact_entries {
 339 |             *formatted += "=";
 340 |         } else {
 341 |             *formatted += " = ";
 342 |         }
 343 |         *formatted += &self.value;
 344 |     }
 345 | 
 346 |     fn trailing_comment(&self) -> Option<String> {
 347 |         self.comment.clone()
 348 |     }
 349 | 
 350 |     fn syntax(&self) -> SyntaxElement {
 351 |         self.syntax.clone()
 352 |     }
 353 | }
 354 | 
 355 | fn format_root(node: SyntaxNode, options: &Options, context: &Context) -> String {
 356 |     assert!(node.kind() == ROOT);
 357 |     let mut formatted = String::new();
 358 | 
 359 |     let mut entry_group: Vec<FormattedEntry> = Vec::new();
 360 | 
 361 |     // We defer printing the entries so that we can align them vertically.
 362 |     // Whenever an entry is added to the group, we skip its trailing newline,
 363 |     // otherwise the inserted new line would end up before the actual entries.
 364 |     let mut skip_newlines = 0;
 365 | 
 366 |     // We defer printing comments as well because we need to know
 367 |     // what comes after them for correct indentation.
 368 |     let mut comment_group: Vec<String> = Vec::new();
 369 | 
 370 |     let mut context = context.clone();
 371 | 
 372 |     // Table key for determining indents
 373 |     let mut table_key_indent_history: Vec<(Keys, usize)> = Vec::new();
 374 | 
 375 |     fn add_comments(
 376 |         comments: &mut Vec<String>,
 377 |         formatted: &mut String,
 378 |         context: &Context,
 379 |         options: &Options,
 380 |     ) -> bool {
 381 |         let were_comments = !comments.is_empty();
 382 | 
 383 |         for (idx, comment) in comments.drain(0..).enumerate() {
 384 |             if idx != 0 {
 385 |                 *formatted += options.newline();
 386 |             }
 387 |             formatted.extend(context.indent(options));
 388 |             *formatted += &comment;
 389 |         }
 390 | 
 391 |         were_comments
 392 |     }
 393 | 
 394 |     let mut dangling_newline_count = 0;
 395 |     let mut scoped_options = options.clone();
 396 | 
 397 |     for c in node.children_with_tokens() {
 398 |         if context.error_at(c.text_range()) {
 399 |             formatted += &c.to_string();
 400 |             continue;
 401 |         }
 402 | 
 403 |         let c_range = c.text_range();
 404 | 
 405 |         match c {
 406 |             NodeOrToken::Node(node) => match node.kind() {
 407 |                 TABLE_ARRAY_HEADER | TABLE_HEADER => {
 408 |                     if add_entries(&mut entry_group, &mut formatted, &scoped_options, &context) {
 409 |                         formatted += scoped_options.newline();
 410 |                         skip_newlines = 0;
 411 |                     }
 412 | 
 413 |                     scoped_options = options.clone();
 414 |                     context.update_options(&mut scoped_options, c_range);
 415 | 
 416 |                     // We treat everything as indented other than table headers from now on.
 417 |                     if scoped_options.indent_entries && context.indent_level == 0 {
 418 |                         context.indent_level = 1;
 419 |                     }
 420 | 
 421 |                     if let Some(key) = node.first_child().map(Into::into).map(Keys::from_syntax) {
 422 |                         if scoped_options.indent_tables {
 423 |                             context.indent_level = table_indent_level(
 424 |                                 &table_key_indent_history,
 425 |                                 &key,
 426 |                                 if scoped_options.indent_entries { 1 } else { 0 },
 427 |                             );
 428 |                         }
 429 |                         table_key_indent_history.push((key.clone(), context.indent_level));
 430 |                     }
 431 | 
 432 |                     let mut header_context = context.clone();
 433 | 
 434 |                     if scoped_options.indent_entries {
 435 |                         header_context.indent_level = header_context.indent_level.saturating_sub(1);
 436 |                     }
 437 | 
 438 |                     if add_comments(
 439 |                         &mut comment_group,
 440 |                         &mut formatted,
 441 |                         &header_context,
 442 |                         &scoped_options,
 443 |                     ) {
 444 |                         formatted += scoped_options.newline();
 445 |                         skip_newlines = 0;
 446 |                     }
 447 | 
 448 |                     let header = format_table_header(node, &scoped_options, &header_context);
 449 |                     let comment = header.trailing_comment();
 450 | 
 451 |                     if scoped_options.indent_tables {
 452 |                         formatted.extend(header_context.indent(&scoped_options));
 453 |                     }
 454 | 
 455 |                     header.write_to(&mut formatted, &scoped_options);
 456 |                     if let Some(c) = comment {
 457 |                         formatted += " ";
 458 |                         formatted += &c;
 459 |                     }
 460 |                 }
 461 |                 ENTRY => {
 462 |                     scoped_options = options.clone();
 463 |                     context.update_options(&mut scoped_options, c_range);
 464 | 
 465 |                     if add_comments(
 466 |                         &mut comment_group,
 467 |                         &mut formatted,
 468 |                         &context,
 469 |                         &scoped_options,
 470 |                     ) {
 471 |                         formatted += scoped_options.newline();
 472 |                         skip_newlines = 0;
 473 |                     }
 474 | 
 475 |                     entry_group.push(format_entry(node, &scoped_options, &context));
 476 |                     skip_newlines += 1;
 477 |                 }
 478 |                 _ => unreachable!(),
 479 |             },
 480 |             NodeOrToken::Token(token) => match token.kind() {
 481 |                 NEWLINE => {
 482 |                     let mut newline_count = token.text().newline_count();
 483 | 
 484 |                     match dangling_newlines(token.clone()) {
 485 |                         Some(dnl) => {
 486 |                             dangling_newline_count += dnl;
 487 |                             continue;
 488 |                         }
 489 |                         None => {
 490 |                             newline_count += dangling_newline_count;
 491 |                             dangling_newline_count = 0;
 492 |                         }
 493 |                     }
 494 | 
 495 |                     if newline_count > 1 {
 496 |                         add_comments(
 497 |                             &mut comment_group,
 498 |                             &mut formatted,
 499 |                             &context,
 500 |                             &scoped_options,
 501 |                         );
 502 |                         add_entries(&mut entry_group, &mut formatted, &scoped_options, &context);
 503 |                         skip_newlines = 0;
 504 |                     }
 505 | 
 506 |                     formatted.extend(
 507 |                         scoped_options.newlines(newline_count.saturating_sub(skip_newlines)),
 508 |                     );
 509 |                 }
 510 |                 COMMENT => {
 511 |                     if add_entries(&mut entry_group, &mut formatted, &scoped_options, &context) {
 512 |                         formatted += scoped_options.newline();
 513 |                         skip_newlines = 0;
 514 |                     }
 515 |                     comment_group.push(token.text().to_string());
 516 |                     skip_newlines += 1;
 517 |                 }
 518 |                 WHITESPACE => {}
 519 |                 _ => formatted += token.text(),
 520 |             },
 521 |         }
 522 |     }
 523 | 
 524 |     add_comments(
 525 |         &mut comment_group,
 526 |         &mut formatted,
 527 |         &context,
 528 |         &scoped_options,
 529 |     );
 530 |     add_entries(&mut entry_group, &mut formatted, &scoped_options, &context);
 531 | 
 532 |     formatted
 533 | }
 534 | 
 535 | /// Determine the indentation level using the indentation history.
 536 | ///
 537 | /// The latest key that is a strict prefix is used and indented. If none is found, the default
 538 | /// indentation is used.
 539 | fn table_indent_level(
 540 |     history: &[(Keys, usize)],
 541 |     current_key: &Keys,
 542 |     default_indent: usize,
 543 | ) -> usize {
 544 |     history
 545 |         .iter()
 546 |         .rev()
 547 |         .find_map(|(previous_key, indent)| {
 548 |             (current_key.contains(previous_key) && current_key != previous_key)
 549 |                 .then_some(*indent + 1)
 550 |         })
 551 |         .unwrap_or(default_indent)
 552 | }
 553 | 
 554 | /// Add entries to the formatted string.
 555 | fn add_entries(
 556 |     entry_group: &mut Vec<FormattedEntry>,
 557 |     formatted: &mut String,
 558 |     options: &Options,
 559 |     context: &Context,
 560 | ) -> bool {
 561 |     let were_entries = !entry_group.is_empty();
 562 | 
 563 |     if options.reorder_keys {
 564 |         entry_group.sort();
 565 |     }
 566 | 
 567 |     let indent_chars_count = context.indent_level * options.indent_string.chars().count();
 568 | 
 569 |     // We check for too long lines, and try to expand them if possible.
 570 |     // We don't take vertical alignment into account for simplicity.
 571 |     if options.array_auto_expand {
 572 |         for entry in entry_group.iter_mut() {
 573 |             let comment_chars_count = entry
 574 |                 .comment
 575 |                 .as_ref()
 576 |                 .map(
 577 |                     |c| c.chars().count() + 1, // account for the separator ' ' as well
 578 |                 )
 579 |                 .unwrap_or(0);
 580 | 
 581 |             let line_count = entry.value.split('\n').count();
 582 | 
 583 |             // check each line of the value
 584 |             // for the first line we include the actual indent, key, and the eq parts as well
 585 |             for (idx, line) in entry.value.split('\n').enumerate() {
 586 |                 let mut chars_count = line.chars().count();
 587 |                 if idx == 0 {
 588 |                     chars_count += indent_chars_count;
 589 |                     chars_count += entry.key.chars().count();
 590 |                     chars_count += if options.compact_entries { 1 } else { 3 }; // " = "
 591 |                 }
 592 | 
 593 |                 // Include comment in the last line.
 594 |                 if idx == line_count - 1 {
 595 |                     chars_count += comment_chars_count;
 596 |                 }
 597 | 
 598 |                 if chars_count > options.column_width {
 599 |                     let mut context = context.clone();
 600 |                     context.force_multiline = true;
 601 | 
 602 |                     // too long, reformat the value of the entry
 603 |                     let value = format_value(
 604 |                         entry
 605 |                             .syntax
 606 |                             .as_node()
 607 |                             .unwrap()
 608 |                             .children()
 609 |                             .find(|n| n.kind() == VALUE)
 610 |                             .unwrap(),
 611 |                         options,
 612 |                         &context,
 613 |                     );
 614 | 
 615 |                     entry.value.clear();
 616 | 
 617 |                     if let Some(c) = value.trailing_comment() {
 618 |                         debug_assert!(
 619 |                             entry.comment.is_none() || entry.comment.clone().unwrap() == c
 620 |                         );
 621 |                         entry.comment = Some(c);
 622 |                     }
 623 | 
 624 |                     value.write_to(&mut entry.value, options);
 625 |                     break;
 626 |                 }
 627 |             }
 628 |         }
 629 |     }
 630 | 
 631 |     let mut comment_count = 0;
 632 |     // Transform the entries into generic rows that can be aligned.
 633 |     let rows = entry_group
 634 |         .drain(0..)
 635 |         .map(|e| {
 636 |             let mut row = Vec::with_capacity(5);
 637 | 
 638 |             row.push(context.indent(options).collect::<String>());
 639 |             row.push(e.key);
 640 |             row.push("=".to_string());
 641 |             row.push(e.value);
 642 |             if let Some(c) = e.comment {
 643 |                 row.push(c);
 644 |                 comment_count += 1;
 645 |             }
 646 | 
 647 |             row
 648 |         })
 649 |         .collect::<Vec<_>>();
 650 | 
 651 |     let align_comments = options.should_align_comments(comment_count);
 652 |     *formatted += &format_rows(
 653 |         if !options.align_entries && !align_comments {
 654 |             0..0
 655 |         } else if !options.align_entries && align_comments {
 656 |             3..usize::MAX
 657 |         } else if options.align_entries && !align_comments {
 658 |             0..3
 659 |         } else {
 660 |             0..usize::MAX
 661 |         },
 662 |         if options.compact_entries {
 663 |             3..usize::MAX
 664 |         } else {
 665 |             1..usize::MAX
 666 |         },
 667 |         &rows,
 668 |         options.newline(),
 669 |         " ",
 670 |     );
 671 | 
 672 |     were_entries
 673 | }
 674 | 
 675 | fn format_entry(node: SyntaxNode, options: &Options, context: &Context) -> FormattedEntry {
 676 |     let mut key = String::new();
 677 |     let mut value = String::new();
 678 |     let mut comment = None;
 679 | 
 680 |     for c in node.children_with_tokens() {
 681 |         match c {
 682 |             NodeOrToken::Node(n) => match n.kind() {
 683 |                 KEY => {
 684 |                     format_key(n, &mut key, options, context);
 685 |                 }
 686 |                 VALUE => {
 687 |                     let val = format_value(n, options, context);
 688 |                     let c = val.trailing_comment();
 689 | 
 690 |                     if c.is_some() {
 691 |                         debug_assert!(comment.is_none());
 692 |                         comment = c;
 693 |                     }
 694 | 
 695 |                     val.write_to(&mut value, options);
 696 |                 }
 697 |                 _ => unreachable!(),
 698 |             },
 699 |             NodeOrToken::Token(t) => {
 700 |                 if let COMMENT = t.kind() {
 701 |                     debug_assert!(comment.is_none());
 702 |                     comment = Some(t.text().into())
 703 |                 }
 704 |             }
 705 |         }
 706 |     }
 707 | 
 708 |     FormattedEntry {
 709 |         syntax: node.into(),
 710 |         key,
 711 |         cleaned_key: OnceCell::new(),
 712 |         value,
 713 |         comment,
 714 |     }
 715 | }
 716 | 
 717 | fn format_key(node: SyntaxNode, formatted: &mut String, _options: &Options, _context: &Context) {
 718 |     // Idents and periods without whitespace
 719 |     for c in node.children_with_tokens() {
 720 |         match c {
 721 |             NodeOrToken::Node(_) => {}
 722 |             NodeOrToken::Token(t) => match t.kind() {
 723 |                 WHITESPACE | NEWLINE => {}
 724 |                 _ => {
 725 |                     *formatted += t.text();
 726 |                 }
 727 |             },
 728 |         }
 729 |     }
 730 | }
 731 | 
 732 | fn format_value(node: SyntaxNode, options: &Options, context: &Context) -> impl FormattedItem {
 733 |     let mut value = String::new();
 734 |     let mut comment = None;
 735 |     for c in node.children_with_tokens() {
 736 |         match c {
 737 |             NodeOrToken::Node(n) => match n.kind() {
 738 |                 ARRAY => {
 739 |                     let formatted = format_array(n, options, context);
 740 | 
 741 |                     let c = formatted.trailing_comment();
 742 | 
 743 |                     if let Some(c) = c {
 744 |                         debug_assert!(comment.is_none());
 745 |                         comment = Some(c)
 746 |                     }
 747 | 
 748 |                     debug_assert!(value.is_empty());
 749 |                     formatted.write_to(&mut value, options);
 750 |                 }
 751 |                 INLINE_TABLE => {
 752 |                     let formatted = format_inline_table(n, options, context);
 753 | 
 754 |                     let c = formatted.trailing_comment();
 755 | 
 756 |                     if let Some(c) = c {
 757 |                         debug_assert!(comment.is_none());
 758 |                         comment = Some(c)
 759 |                     }
 760 | 
 761 |                     debug_assert!(value.is_empty());
 762 | 
 763 |                     formatted.write_to(&mut value, options);
 764 |                 }
 765 |                 _ => unreachable!(),
 766 |             },
 767 |             NodeOrToken::Token(t) => match t.kind() {
 768 |                 NEWLINE | WHITESPACE => {}
 769 |                 COMMENT => {
 770 |                     debug_assert!(comment.is_none());
 771 |                     comment = Some(t.text().into());
 772 |                 }
 773 |                 _ => {
 774 |                     value = t.text().into();
 775 |                 }
 776 |             },
 777 |         }
 778 |     }
 779 | 
 780 |     (node.into(), value, comment)
 781 | }
 782 | 
 783 | fn format_inline_table(
 784 |     node: SyntaxNode,
 785 |     options: &Options,
 786 |     context: &Context,
 787 | ) -> impl FormattedItem {
 788 |     let mut formatted = String::new();
 789 |     let mut comment = None;
 790 | 
 791 |     let mut context = context.clone();
 792 |     if context.force_multiline {
 793 |         context.force_multiline = options.inline_table_expand;
 794 |     }
 795 |     let context = &context;
 796 | 
 797 |     let child_count = node.children().count();
 798 | 
 799 |     if node.children().count() == 0 {
 800 |         formatted = "{}".into();
 801 |     }
 802 | 
 803 |     let mut sorted_children = if options.reorder_inline_tables {
 804 |         Some(
 805 |             node.children()
 806 |                 .sorted_unstable_by(|x, y| x.to_string().cmp(&y.to_string()))
 807 |                 .collect::<VecDeque<_>>(),
 808 |         )
 809 |     } else {
 810 |         None
 811 |     };
 812 | 
 813 |     let mut node_index = 0;
 814 |     for c in node.children_with_tokens() {
 815 |         match c {
 816 |             NodeOrToken::Node(n) => {
 817 |                 if node_index != 0 {
 818 |                     formatted += ", ";
 819 |                 }
 820 | 
 821 |                 let child = if options.reorder_inline_tables {
 822 |                     sorted_children
 823 |                         .as_mut()
 824 |                         .and_then(|children| children.pop_front())
 825 |                         .unwrap_or(n)
 826 |                 } else {
 827 |                     n
 828 |                 };
 829 | 
 830 |                 let entry = format_entry(child, options, context);
 831 |                 debug_assert!(entry.comment.is_none());
 832 |                 entry.write_to(&mut formatted, options);
 833 | 
 834 |                 node_index += 1;
 835 |             }
 836 |             NodeOrToken::Token(t) => match t.kind() {
 837 |                 BRACE_START => {
 838 |                     if child_count == 0 {
 839 |                         // We're only interested in trailing comments.
 840 |                         continue;
 841 |                     }
 842 | 
 843 |                     formatted += "{";
 844 |                     if !options.compact_inline_tables {
 845 |                         formatted += " ";
 846 |                     }
 847 |                 }
 848 |                 BRACE_END => {
 849 |                     if child_count == 0 {
 850 |                         // We're only interested in trailing comments.
 851 |                         continue;
 852 |                     }
 853 | 
 854 |                     if !options.compact_inline_tables {
 855 |                         formatted += " ";
 856 |                     }
 857 |                     formatted += "}";
 858 |                 }
 859 |                 WHITESPACE | COMMA => {}
 860 |                 COMMENT => {
 861 |                     debug_assert!(comment.is_none());
 862 |                     comment = Some(t.text().into());
 863 |                 }
 864 |                 _ => formatted += t.text(),
 865 |             },
 866 |         }
 867 |     }
 868 | 
 869 |     (node.into(), formatted, comment)
 870 | }
 871 | // Check whether the array spans multiple lines in its current form.
 872 | fn is_array_multiline(node: &SyntaxNode) -> bool {
 873 |     node.descendants_with_tokens().any(|n| n.kind() == NEWLINE)
 874 | }
 875 | 
 876 | fn can_collapse_array(node: &SyntaxNode) -> bool {
 877 |     !node.descendants_with_tokens().any(|n| n.kind() == COMMENT)
 878 | }
 879 | 
 880 | fn format_array(node: SyntaxNode, options: &Options, context: &Context) -> impl FormattedItem {
 881 |     let mut multiline = is_array_multiline(&node) || context.force_multiline;
 882 | 
 883 |     let mut formatted = String::new();
 884 | 
 885 |     // We always try to collapse it if possible.
 886 |     if can_collapse_array(&node) && options.array_auto_collapse && !context.force_multiline {
 887 |         multiline = false;
 888 |     }
 889 | 
 890 |     // We use the same strategy as for entries, refer to [`format_root`].
 891 |     let mut skip_newlines = 0;
 892 | 
 893 |     // Formatted value, optional trailing comment
 894 |     // The value must not include the comma at the end.
 895 |     let mut value_group: Vec<(String, Option<String>)> = Vec::new();
 896 |     let mut commas_group: Vec<bool> = Vec::new();
 897 | 
 898 |     let add_values = |value_group: &mut Vec<(String, Option<String>)>,
 899 |                       commas_group: &mut Vec<bool>,
 900 |                       formatted: &mut String,
 901 |                       context: &Context|
 902 |      -> bool {
 903 |         let were_values = !value_group.is_empty();
 904 | 
 905 |         if options.reorder_arrays {
 906 |             value_group.sort_unstable_by(|x, y| x.0.cmp(&y.0));
 907 |         }
 908 | 
 909 |         for (has_comma, p) in commas_group.drain(0..).zip(value_group.iter_mut()) {
 910 |             if has_comma {
 911 |                 p.0 += ","
 912 |             };
 913 |         }
 914 | 
 915 |         if !multiline {
 916 |             for (idx, (val, comment)) in value_group.drain(0..).enumerate() {
 917 |                 debug_assert!(comment.is_none());
 918 |                 if idx != 0 {
 919 |                     *formatted += " "
 920 |                 }
 921 | 
 922 |                 *formatted += &val;
 923 |             }
 924 | 
 925 |             return were_values;
 926 |         }
 927 | 
 928 |         let mut comment_count = 0;
 929 |         let rows = value_group
 930 |             .drain(0..)
 931 |             .map(|(value, comment)| {
 932 |                 let mut row = Vec::with_capacity(5);
 933 | 
 934 |                 row.push(context.indent(options).collect::<String>());
 935 |                 row.push(value);
 936 |                 if let Some(c) = comment {
 937 |                     row.push(c);
 938 |                     comment_count += 1;
 939 |                 }
 940 | 
 941 |                 row
 942 |             })
 943 |             .collect::<Vec<_>>();
 944 | 
 945 |         let align_comments = options.should_align_comments(comment_count);
 946 |         *formatted += &format_rows(
 947 |             if align_comments { 0..usize::MAX } else { 0..0 },
 948 |             1..usize::MAX,
 949 |             &rows,
 950 |             options.newline(),
 951 |             " ",
 952 |         );
 953 | 
 954 |         were_values
 955 |     };
 956 | 
 957 |     let node_count = node.children().count();
 958 | 
 959 |     let mut inner_context = context.clone();
 960 | 
 961 |     if multiline {
 962 |         inner_context.indent_level += 1;
 963 |     }
 964 | 
 965 |     let mut dangling_newline_count = 0;
 966 | 
 967 |     let mut node_index = 0;
 968 |     for c in node.children_with_tokens() {
 969 |         match c {
 970 |             NodeOrToken::Node(n) => match n.kind() {
 971 |                 VALUE => {
 972 |                     if multiline && formatted.ends_with('[') {
 973 |                         formatted += options.newline();
 974 |                     }
 975 | 
 976 |                     let val = format_value(n, options, &inner_context);
 977 |                     let mut val_string = String::new();
 978 | 
 979 |                     val.write_to(&mut val_string, options);
 980 | 
 981 |                     let has_comma =
 982 |                         node_index < node_count - 1 || (multiline && options.array_trailing_comma);
 983 |                     commas_group.push(has_comma);
 984 | 
 985 |                     value_group.push((val_string, val.trailing_comment()));
 986 |                     skip_newlines += 1;
 987 | 
 988 |                     node_index += 1;
 989 |                 }
 990 |                 _ => {
 991 |                     if cfg!(debug_assertions) {
 992 |                         unreachable!()
 993 |                     }
 994 |                 }
 995 |             },
 996 |             NodeOrToken::Token(t) => match t.kind() {
 997 |                 BRACKET_START => {
 998 |                     formatted += "[";
 999 |                     if !options.compact_arrays && !multiline {
1000 |                         formatted += " ";
1001 |                     }
1002 |                 }
1003 |                 BRACKET_END => {
1004 |                     add_values(
1005 |                         &mut value_group,
1006 |                         &mut commas_group,
1007 |                         &mut formatted,
1008 |                         &inner_context,
1009 |                     );
1010 | 
1011 |                     if multiline {
1012 |                         if !formatted.ends_with('\n') {
1013 |                             formatted += options.newline();
1014 |                         }
1015 | 
1016 |                         formatted.extend(context.indent(options));
1017 |                     } else if !options.compact_arrays {
1018 |                         formatted += " ";
1019 |                     }
1020 |                     formatted += "]";
1021 |                 }
1022 |                 NEWLINE => {
1023 |                     if !multiline {
1024 |                         continue;
1025 |                     }
1026 | 
1027 |                     let mut newline_count = t.text().newline_count();
1028 | 
1029 |                     match dangling_newlines(t.clone()) {
1030 |                         Some(dnl) => {
1031 |                             dangling_newline_count += dnl;
1032 |                             continue;
1033 |                         }
1034 |                         None => {
1035 |                             newline_count += dangling_newline_count;
1036 |                             dangling_newline_count = 0;
1037 |                         }
1038 |                     }
1039 | 
1040 |                     if newline_count > 1 {
1041 |                         add_values(
1042 |                             &mut value_group,
1043 |                             &mut commas_group,
1044 |                             &mut formatted,
1045 |                             &inner_context,
1046 |                         );
1047 |                         skip_newlines = 0;
1048 |                     }
1049 | 
1050 |                     formatted.extend(options.newlines(newline_count.saturating_sub(skip_newlines)));
1051 |                 }
1052 |                 COMMENT => {
1053 |                     let newline_before = t
1054 |                         .siblings_with_tokens(rowan::Direction::Prev)
1055 |                         .skip(1)
1056 |                         .find(|s| s.kind() != WHITESPACE)
1057 |                         .map(|s| s.kind() == NEWLINE)
1058 |                         .unwrap_or(false);
1059 | 
1060 |                     if !newline_before && !value_group.is_empty() {
1061 |                         // It's actually trailing comment, so we add it to the last value.
1062 |                         value_group.last_mut().unwrap().1 = Some(t.text().to_string());
1063 |                         continue;
1064 |                     }
1065 | 
1066 |                     if add_values(
1067 |                         &mut value_group,
1068 |                         &mut commas_group,
1069 |                         &mut formatted,
1070 |                         &inner_context,
1071 |                     ) {
1072 |                         formatted += options.newline();
1073 |                         skip_newlines = 0;
1074 |                     }
1075 | 
1076 |                     if formatted.ends_with('[') {
1077 |                         formatted += " ";
1078 |                         formatted += t.text();
1079 |                     } else {
1080 |                         formatted.extend(inner_context.indent(options));
1081 |                         formatted += t.text();
1082 |                     }
1083 |                 }
1084 |                 _ => {}
1085 |             },
1086 |         }
1087 |     }
1088 | 
1089 |     if formatted.is_empty() {
1090 |         formatted = "[]".into();
1091 |     }
1092 | 
1093 |     (node.into(), formatted, None)
1094 | }
1095 | 
1096 | fn format_table_header(
1097 |     node: SyntaxNode,
1098 |     options: &Options,
1099 |     context: &Context,
1100 | ) -> impl FormattedItem {
1101 |     let mut formatted = String::new();
1102 |     let mut comment = None;
1103 | 
1104 |     for c in node.children_with_tokens() {
1105 |         match c {
1106 |             NodeOrToken::Node(n) => {
1107 |                 format_key(n, &mut formatted, options, context);
1108 |             }
1109 |             NodeOrToken::Token(t) => match t.kind() {
1110 |                 BRACKET_START | BRACKET_END => formatted += t.text(),
1111 |                 WHITESPACE | NEWLINE => {}
1112 |                 COMMENT => {
1113 |                     debug_assert!(comment.is_none());
1114 |                     comment = Some(t.text().to_string());
1115 |                 }
1116 |                 _ => formatted += t.text(),
1117 |             },
1118 |         }
1119 |     }
1120 | 
1121 |     (node.into(), formatted, comment)
1122 | }
1123 | 
1124 | // Simply a tuple of the formatted item and an optional trailing comment.
1125 | impl<T: AsRef<str>> FormattedItem for (SyntaxElement, T, Option<T>) {
1126 |     fn write_to(&self, formatted: &mut String, _options: &Options) {
1127 |         *formatted += self.1.as_ref()
1128 |     }
1129 | 
1130 |     fn trailing_comment(&self) -> Option<String> {
1131 |         self.2.as_ref().map(|s| s.as_ref().to_string())
1132 |     }
1133 | 
1134 |     fn syntax(&self) -> SyntaxElement {
1135 |         self.0.clone()
1136 |     }
1137 | }
1138 | 
1139 | trait FormattedItem {
1140 |     #[allow(dead_code)]
1141 |     fn syntax(&self) -> SyntaxElement;
1142 |     #[allow(clippy::ptr_arg)]
1143 |     fn write_to(&self, formatted: &mut String, options: &Options);
1144 |     fn trailing_comment(&self) -> Option<String>;
1145 | }
1146 | 
1147 | trait NewlineCount {
1148 |     fn newline_count(&self) -> usize;
1149 | }
1150 | 
1151 | impl NewlineCount for &str {
1152 |     fn newline_count(&self) -> usize {
1153 |         self.chars().filter(|c| c == &'\n').count()
1154 |     }
1155 | }
1156 | 
1157 | // FIXME(docs)
1158 | fn format_rows<R, S>(
1159 |     align_range: Range<usize>,
1160 |     separator_range: Range<usize>,
1161 |     rows: &[R],
1162 |     newline: &str,
1163 |     separator: &str,
1164 | ) -> String
1165 | where
1166 |     R: AsRef<[S]>,
1167 |     S: AsRef<str>,
1168 | {
1169 |     let mut out = String::new();
1170 | 
1171 |     // We currently don't support vertical alignment of complex data.
1172 |     let can_align = rows
1173 |         .iter()
1174 |         .flat_map(|r| r.as_ref().iter())
1175 |         .all(|s| !s.as_ref().contains('\n'));
1176 | 
1177 |     let diff_widths = |range: Range<usize>, row: &R| -> usize {
1178 |         let mut max_width = 0_usize;
1179 | 
1180 |         for row in rows {
1181 |             let row_len = row.as_ref().len();
1182 | 
1183 |             let range =
1184 |                 cmp::min(range.start, row_len.saturating_sub(1))..cmp::min(range.end, row_len);
1185 | 
1186 |             max_width = cmp::max(
1187 |                 max_width,
1188 |                 row.as_ref()[range]
1189 |                     .iter()
1190 |                     .map(|s| s.as_ref().chars().count())
1191 |                     .sum(),
1192 |             );
1193 |         }
1194 | 
1195 |         let row_width = row.as_ref()[range]
1196 |             .iter()
1197 |             .map(|s| s.as_ref().chars().count())
1198 |             .sum::<usize>();
1199 | 
1200 |         max_width - row_width
1201 |     };
1202 | 
1203 |     for (row_idx, row) in rows.iter().enumerate() {
1204 |         if row_idx != 0 {
1205 |             out += newline;
1206 |         }
1207 | 
1208 |         let mut last_align_idx = 0_usize;
1209 | 
1210 |         for (item_idx, item) in row.as_ref().iter().enumerate() {
1211 |             if item_idx > separator_range.start
1212 |                 && item_idx <= separator_range.end.saturating_add(1)
1213 |                 && item_idx < row.as_ref().len()
1214 |             {
1215 |                 out += separator;
1216 |             }
1217 | 
1218 |             out += item.as_ref();
1219 | 
1220 |             if can_align
1221 |                 && align_range.start <= item_idx
1222 |                 && align_range.end > item_idx
1223 |                 && item_idx < row.as_ref().len() - 1
1224 |             {
1225 |                 let diff = diff_widths(last_align_idx..item_idx + 1, row);
1226 |                 out.extend(std::iter::repeat_n(" ", diff));
1227 |                 last_align_idx = item_idx + 1;
1228 |             }
1229 |         }
1230 |     }
1231 | 
1232 |     out
1233 | }
1234 | 
1235 | /// Special handling of blank lines.
1236 | ///
1237 | /// A design decision was made in the parser that newline (LF) characters
1238 | /// and whitespace (" ", and \t) are part of separate tokens.
1239 | ///
1240 | /// Generally we count the amount of blank lines by counting LF characters in a token,
1241 | /// however if any of the consecutive blank lines contain empty characters,
1242 | /// this way of counting becomes unreliable.
1243 | ///
1244 | /// So we check if the newlines are followed by whitespace,
1245 | /// then newlines again, and return the count here,
1246 | /// and we can add these values up.
1247 | fn dangling_newlines(t: SyntaxToken) -> Option<usize> {
1248 |     let newline_count = t.text().newline_count();
1249 | 
1250 |     if let Some(nt) = t.next_sibling_or_token()
1251 |         && let Some(nnt) = nt.next_sibling_or_token()
1252 |             && nt.kind() == WHITESPACE && nnt.kind() == NEWLINE {
1253 |                 return Some(newline_count);
1254 |             }
1255 | 
1256 |     None
1257 | }
1258 | 


--------------------------------------------------------------------------------