├── .gitignore ├── test-data ├── analytics │ ├── _cargo6.toml │ ├── _cargo5.toml │ ├── _cargo9.toml │ ├── _cargo8.toml │ ├── _cargo11.toml │ ├── _cargo12.toml │ ├── _cargo4.toml │ ├── _cargo7.toml │ ├── _cargo10.toml │ ├── _cargo2.toml │ ├── _cargo3.toml │ └── _cargo1.toml ├── invalid │ ├── bare-key-3.toml │ ├── bare-key-1.toml │ ├── int-0-padded.toml │ ├── int-signed-bin.toml │ ├── int-signed-hex.toml │ ├── int-signed-oct.toml │ ├── taplo-invalid-float.toml │ ├── bare-key-2.toml │ ├── comment-control-1.toml │ ├── comment-control-2.toml │ ├── comment-control-3.toml │ ├── comment-control-4.toml │ ├── key-value-pair-1.toml │ ├── string-basic-control-1.toml │ ├── string-basic-control-2.toml │ ├── string-basic-control-3.toml │ ├── string-basic-control-4.toml │ ├── string-basic-unknown-escape.toml │ ├── string-literal-control-1.toml │ ├── string-literal-control-2.toml │ ├── string-literal-control-3.toml │ ├── string-literal-control-4.toml │ ├── taplo-invalid-array.toml │ ├── no-key-name.toml │ ├── taplo-invalid-array-comma-start.toml │ ├── taplo-table-before-array.toml │ ├── inline-table-trailing-comma.toml │ ├── string-basic-multiline-control-1.toml │ ├── string-basic-multiline-control-2.toml │ ├── string-basic-multiline-control-3.toml │ ├── string-basic-multiline-control-4.toml │ ├── string-basic-multiline-unknown-escape.toml │ ├── string-literal-multiline-control-1.toml │ ├── string-literal-multiline-control-2.toml │ ├── string-literal-multiline-control-3.toml │ ├── string-literal-multiline-control-4.toml │ ├── taplo-incomplete-inline-table.toml │ ├── string-basic-out-of-range-unicode-escape-1.toml │ ├── string-basic-out-of-range-unicode-escape-2.toml │ ├── key-value-pair-2.toml │ ├── multiple-key.toml │ ├── taplo-invalid-inline-table.toml │ ├── string-basic-multiline-invalid-backslash.toml │ ├── string-basic-multiline-out-of-range-unicode-escape-1.toml │ ├── string-basic-multiline-out-of-range-unicode-escape-2.toml │ ├── string-basic-multiline-quotes.toml │ ├── array-of-tables-1.toml │ ├── table-invalid-2.toml │ ├── inline-table-imutable-2.toml │ ├── taplo-duplicate-keys.toml │ ├── taplo-inner-key-conflict.toml │ ├── inline-table-imutable-1.toml │ ├── table-1.toml │ ├── table-3.toml │ ├── string-literal-multiline-quotes.toml │ ├── table-4.toml │ ├── table-2.toml │ ├── array-of-tables-2.toml │ ├── table-invalid-1.toml │ ├── multiple-dot-key.toml │ ├── table-invalid-3.toml │ ├── table-invalid-4.toml │ └── taplo-invalid-padding.toml ├── rewrite │ ├── table.toml │ ├── value.toml │ ├── table_expected.toml │ ├── value_expected.toml │ ├── multiple_expected.toml │ ├── key.toml │ ├── key_expected.toml │ ├── multiple.toml │ ├── nothing.toml │ └── nothing_expected.toml ├── README.md └── example.toml ├── rust-toolchain.toml ├── src ├── util │ ├── syntax.rs │ ├── mod.rs │ └── escape.rs ├── parser │ ├── macros.rs │ └── mod.rs ├── lib.rs ├── formatter │ ├── macros.rs │ └── mod.rs └── syntax.rs ├── Cargo.toml ├── LICENSE ├── tests └── formatter.rs ├── README.md └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo6.toml: -------------------------------------------------------------------------------- 1 | lib. = 2 | -------------------------------------------------------------------------------- /test-data/invalid/bare-key-3.toml: -------------------------------------------------------------------------------- 1 | barekey = -------------------------------------------------------------------------------- /test-data/rewrite/table.toml: -------------------------------------------------------------------------------- 1 | value = 2 2 | -------------------------------------------------------------------------------- /test-data/rewrite/value.toml: -------------------------------------------------------------------------------- 1 | value = 2 2 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo5.toml: -------------------------------------------------------------------------------- 1 | lib.bench = 2 | 3 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo9.toml: -------------------------------------------------------------------------------- 1 | [lib] 2 | bench = -------------------------------------------------------------------------------- /test-data/invalid/bare-key-1.toml: -------------------------------------------------------------------------------- 1 | bare!key = 123 2 | -------------------------------------------------------------------------------- /test-data/invalid/int-0-padded.toml: -------------------------------------------------------------------------------- 1 | int = 0123 2 | -------------------------------------------------------------------------------- /test-data/invalid/int-signed-bin.toml: -------------------------------------------------------------------------------- 1 | bin = +0b10 2 | -------------------------------------------------------------------------------- /test-data/invalid/int-signed-hex.toml: -------------------------------------------------------------------------------- 1 | hex = +0xab 2 | -------------------------------------------------------------------------------- /test-data/invalid/int-signed-oct.toml: -------------------------------------------------------------------------------- 1 | oct = +0o23 2 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-invalid-float.toml: -------------------------------------------------------------------------------- 1 | what = 1. -------------------------------------------------------------------------------- /test-data/analytics/_cargo8.toml: -------------------------------------------------------------------------------- 1 | lib = 2 | 3 | b = a 4 | -------------------------------------------------------------------------------- /test-data/invalid/bare-key-2.toml: -------------------------------------------------------------------------------- 1 | barekey 2 | = 123 3 | -------------------------------------------------------------------------------- /test-data/invalid/comment-control-1.toml: -------------------------------------------------------------------------------- 1 | a = "null" # 2 | -------------------------------------------------------------------------------- /test-data/invalid/comment-control-2.toml: -------------------------------------------------------------------------------- 1 | a = "ctrl-P" #  2 | -------------------------------------------------------------------------------- /test-data/invalid/comment-control-3.toml: -------------------------------------------------------------------------------- 1 | a = "ctrl-_" #  2 | -------------------------------------------------------------------------------- /test-data/invalid/comment-control-4.toml: -------------------------------------------------------------------------------- 1 | a = "0x7f" #  2 | -------------------------------------------------------------------------------- /test-data/invalid/key-value-pair-1.toml: -------------------------------------------------------------------------------- 1 | key = # INVALID 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-control-1.toml: -------------------------------------------------------------------------------- 1 | a = "null" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-control-2.toml: -------------------------------------------------------------------------------- 1 | a = "ctrl-P" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-control-3.toml: -------------------------------------------------------------------------------- 1 | a = "ctrl-_" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-control-4.toml: -------------------------------------------------------------------------------- 1 | a = "0x7f" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-unknown-escape.toml: -------------------------------------------------------------------------------- 1 | a = "\@" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-control-1.toml: -------------------------------------------------------------------------------- 1 | a = 'null' 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-control-2.toml: -------------------------------------------------------------------------------- 1 | a = 'null' 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-control-3.toml: -------------------------------------------------------------------------------- 1 | a = 'null' 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-control-4.toml: -------------------------------------------------------------------------------- 1 | a = 'null' 2 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-invalid-array.toml: -------------------------------------------------------------------------------- 1 | arr = ["value",,,,] -------------------------------------------------------------------------------- /test-data/analytics/_cargo11.toml: -------------------------------------------------------------------------------- 1 | schema = { enabled = false 2 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo12.toml: -------------------------------------------------------------------------------- 1 | [table] 2 | table = { bool = fa } -------------------------------------------------------------------------------- /test-data/invalid/no-key-name.toml: -------------------------------------------------------------------------------- 1 | = "no key name" # INVALID 2 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo4.toml: -------------------------------------------------------------------------------- 1 | lib = 2 | 3 | [package] 4 | 5 | asd = -------------------------------------------------------------------------------- /test-data/analytics/_cargo7.toml: -------------------------------------------------------------------------------- 1 | [lib] 2 | bench = 3 | 4 | stuff = { , } -------------------------------------------------------------------------------- /test-data/invalid/taplo-invalid-array-comma-start.toml: -------------------------------------------------------------------------------- 1 | arr = [,"value"] -------------------------------------------------------------------------------- /test-data/invalid/taplo-table-before-array.toml: -------------------------------------------------------------------------------- 1 | [foo.bar] 2 | [[foo]] 3 | -------------------------------------------------------------------------------- /test-data/invalid/inline-table-trailing-comma.toml: -------------------------------------------------------------------------------- 1 | abc = { abc = 123, } 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-control-1.toml: -------------------------------------------------------------------------------- 1 | a = """null""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-control-2.toml: -------------------------------------------------------------------------------- 1 | a = """null""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-control-3.toml: -------------------------------------------------------------------------------- 1 | a = """null""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-control-4.toml: -------------------------------------------------------------------------------- 1 | a = """null""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-unknown-escape.toml: -------------------------------------------------------------------------------- 1 | a = """\@""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-multiline-control-1.toml: -------------------------------------------------------------------------------- 1 | a = '''null''' 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-multiline-control-2.toml: -------------------------------------------------------------------------------- 1 | a = '''null''' 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-multiline-control-3.toml: -------------------------------------------------------------------------------- 1 | a = '''null''' 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-multiline-control-4.toml: -------------------------------------------------------------------------------- 1 | a = '''null''' 2 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-incomplete-inline-table.toml: -------------------------------------------------------------------------------- 1 | schema = { enabled = false -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.92.0" 3 | profile = "default" 4 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-out-of-range-unicode-escape-1.toml: -------------------------------------------------------------------------------- 1 | a = "\UFFFFFFFF" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-out-of-range-unicode-escape-2.toml: -------------------------------------------------------------------------------- 1 | a = "\U00D80000" 2 | -------------------------------------------------------------------------------- /test-data/invalid/key-value-pair-2.toml: -------------------------------------------------------------------------------- 1 | first = "Tom" last = "Preston-Werner" # INVALID 2 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo10.toml: -------------------------------------------------------------------------------- 1 | [features] 2 | asad = [] 3 | 4 | [lib] 5 | 6 | asd = false 7 | -------------------------------------------------------------------------------- /test-data/invalid/multiple-key.toml: -------------------------------------------------------------------------------- 1 | # DO NOT DO THIS 2 | name = "Tom" 3 | name = "Pradyun" 4 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-invalid-inline-table.toml: -------------------------------------------------------------------------------- 1 | cooldowns = { 2 | aggressive = true, 3 | } -------------------------------------------------------------------------------- /test-data/rewrite/table_expected.toml: -------------------------------------------------------------------------------- 1 | [table] 2 | original_value = 2 3 | additional_value = 3 4 | -------------------------------------------------------------------------------- /test-data/rewrite/value_expected.toml: -------------------------------------------------------------------------------- 1 | value = { original_value = 2, additional_value = 3 } 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-invalid-backslash.toml: -------------------------------------------------------------------------------- 1 | a = """ 2 | foo \ \n 3 | bar""" 4 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-out-of-range-unicode-escape-1.toml: -------------------------------------------------------------------------------- 1 | a = """\UFFFFFFFF""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-out-of-range-unicode-escape-2.toml: -------------------------------------------------------------------------------- 1 | a = """\U00D80000""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/string-basic-multiline-quotes.toml: -------------------------------------------------------------------------------- 1 | str5 = """Here are three quotation marks: """.""" 2 | -------------------------------------------------------------------------------- /test-data/invalid/array-of-tables-1.toml: -------------------------------------------------------------------------------- 1 | # INVALID TOML DOC 2 | fruit = [] 3 | 4 | [[fruit]] # Not allowed 5 | -------------------------------------------------------------------------------- /test-data/invalid/table-invalid-2.toml: -------------------------------------------------------------------------------- 1 | # INVALID TOML DOC 2 | fruit = [] 3 | 4 | [[fruit]] # Not allowed 5 | -------------------------------------------------------------------------------- /test-data/invalid/inline-table-imutable-2.toml: -------------------------------------------------------------------------------- 1 | [product] 2 | type.name = "Nail" 3 | type = { edible = false }# INVALID 4 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-duplicate-keys.toml: -------------------------------------------------------------------------------- 1 | # THIS WILL NOT WORK 2 | spelling = "favorite" 3 | "spelling" = "favourite" 4 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-inner-key-conflict.toml: -------------------------------------------------------------------------------- 1 | package.something.else = 2 2 | 3 | [package] 4 | something.other = 2 5 | -------------------------------------------------------------------------------- /test-data/invalid/inline-table-imutable-1.toml: -------------------------------------------------------------------------------- 1 | [product] 2 | type = { name = "Nail" } 3 | type.edible = false # INVALID 4 | -------------------------------------------------------------------------------- /test-data/invalid/table-1.toml: -------------------------------------------------------------------------------- 1 | # DO NOT DO THIS 2 | 3 | [fruit] 4 | apple = "red" 5 | 6 | [fruit] 7 | orange = "orange" 8 | -------------------------------------------------------------------------------- /test-data/invalid/table-3.toml: -------------------------------------------------------------------------------- 1 | [fruit] 2 | apple.color = "red" 3 | apple.taste.sweet = true 4 | 5 | [fruit.apple] # INVALID 6 | -------------------------------------------------------------------------------- /test-data/invalid/string-literal-multiline-quotes.toml: -------------------------------------------------------------------------------- 1 | apos15 = '''Here are fifteen apostrophes: '''''''''''''''''' # INVALID 2 | -------------------------------------------------------------------------------- /test-data/invalid/table-4.toml: -------------------------------------------------------------------------------- 1 | [fruit] 2 | apple.color = "red" 3 | apple.taste.sweet = true 4 | 5 | [fruit.apple.taste] # INVALID 6 | -------------------------------------------------------------------------------- /test-data/invalid/table-2.toml: -------------------------------------------------------------------------------- 1 | # DO NOT DO THIS EITHER 2 | 3 | [fruit] 4 | apple = "red" 5 | 6 | [fruit.apple] 7 | texture = "smooth" 8 | -------------------------------------------------------------------------------- /test-data/rewrite/multiple_expected.toml: -------------------------------------------------------------------------------- 1 | 2 | 3 | [table] 4 | rewritten = 2 5 | 6 | [[table.arr]] 7 | rewrite_my_value = 0 8 | 9 | [[table.arr]] 10 | rewrite_my_value = 1 11 | -------------------------------------------------------------------------------- /test-data/rewrite/key.toml: -------------------------------------------------------------------------------- 1 | rewrite_me = 2 2 | 3 | [table] 4 | rewrite_me = 2 5 | 6 | [table.subtable.rewrite_me] 7 | val = 2 8 | 9 | [[arr.rewrite_me]] 10 | [[arr.rewrite_me]] 11 | rewrite_me = "rewrite_me" 12 | [[arr.rewrite_me]] 13 | -------------------------------------------------------------------------------- /test-data/rewrite/key_expected.toml: -------------------------------------------------------------------------------- 1 | rewritten = 2 2 | 3 | [table] 4 | rewritten = 2 5 | 6 | [table.subtable.rewritten] 7 | val = 2 8 | 9 | [[arr.rewritten]] 10 | [[arr.rewritten]] 11 | rewritten = "rewrite_me" 12 | [[arr.rewritten]] 13 | -------------------------------------------------------------------------------- /test-data/invalid/array-of-tables-2.toml: -------------------------------------------------------------------------------- 1 | # INVALID TOML DOC 2 | [[fruit]] 3 | name = "apple" 4 | 5 | [[fruit.variety]] 6 | name = "red delicious" 7 | 8 | # This table conflicts with the previous table 9 | [fruit.variety] 10 | name = "granny smith" 11 | -------------------------------------------------------------------------------- /test-data/rewrite/multiple.toml: -------------------------------------------------------------------------------- 1 | [remove_this] 2 | 3 | [remove_this.subtable] 4 | value = "should be removed" 5 | 6 | [table] 7 | rewrite_me = 2 8 | 9 | [[table.arr]] 10 | rewrite_my_value = 3 11 | 12 | [[table.arr]] 13 | rewrite_my_value = 3 14 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo2.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | exclude = ["util/schema-gen", "util/test-gen"] 3 | members = ["taplo-ide", "taplo", "lsp-async-stub"] 4 | 5 | [profile.release] 6 | codegen-units = 1 7 | lto = true 8 | opt-level = 3 9 | 10 | [profile.bench] 11 | lto = true 12 | -------------------------------------------------------------------------------- /test-data/invalid/table-invalid-1.toml: -------------------------------------------------------------------------------- 1 | [fruit.physical] # subtable, but to which parent element should it belong? 2 | color = "red" 3 | shape = "round" 4 | 5 | [[fruit]] # parser must throw an error upon discovering that "fruit" is 6 | # an array rather than a table 7 | name = "apple" 8 | -------------------------------------------------------------------------------- /test-data/invalid/multiple-dot-key.toml: -------------------------------------------------------------------------------- 1 | # THE FOLLOWING IS INVALID 2 | 3 | # This defines the value of fruit.apple to be an integer. 4 | fruit.apple = 1 5 | 6 | # But then this treats fruit.apple like it's a table. 7 | # You can't turn an integer into a table. 8 | fruit.apple.smooth = true 9 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo3.toml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | [workspace] 5 | exclude = ["util/schema-gen", "util/test-gen"] 6 | members = ["taplo-ide", "taplo", "lsp-async-stub"] 7 | 8 | [profile.release] 9 | codegen-units = 1 10 | lto = true 11 | opt-level = 3 12 | 13 | [profile.bench] 14 | lto = true 15 | 16 | # a comment 17 | -------------------------------------------------------------------------------- /test-data/invalid/table-invalid-3.toml: -------------------------------------------------------------------------------- 1 | # INVALID TOML DOC 2 | [[fruit]] 3 | name = "apple" 4 | 5 | [[fruit.variety]] 6 | name = "red delicious" 7 | 8 | # INVALID: This table conflicts with the previous array of tables 9 | [fruit.variety] 10 | name = "granny smith" 11 | 12 | [fruit.physical] 13 | color = "red" 14 | shape = "round" 15 | -------------------------------------------------------------------------------- /test-data/invalid/table-invalid-4.toml: -------------------------------------------------------------------------------- 1 | # INVALID TOML DOC 2 | [[fruit]] 3 | name = "apple" 4 | 5 | [[fruit.variety]] 6 | name = "red delicious" 7 | 8 | [fruit.physical] 9 | color = "red" 10 | shape = "round" 11 | 12 | # INVALID: This array of tables conflicts with the previous table 13 | [[fruit.physical]] 14 | color = "green" 15 | -------------------------------------------------------------------------------- /src/util/syntax.rs: -------------------------------------------------------------------------------- 1 | use rowan::{GreenNodeBuilder, NodeOrToken}; 2 | 3 | use crate::syntax::SyntaxNode; 4 | 5 | pub fn add_all(node: SyntaxNode, builder: &mut GreenNodeBuilder) { 6 | builder.start_node(node.kind().into()); 7 | 8 | for c in node.children_with_tokens() { 9 | match c { 10 | NodeOrToken::Node(n) => add_all(n, builder), 11 | NodeOrToken::Token(t) => builder.token(t.kind().into(), t.text()), 12 | } 13 | } 14 | 15 | builder.finish_node() 16 | } 17 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "oxc-toml" 3 | description = "A TOML formatter library" 4 | version = "0.14.1" 5 | categories = ["parser-implementations", "parsing"] 6 | keywords = ["toml", "formatter"] 7 | readme = "README.md" 8 | authors = ["Boshen"] 9 | edition = "2024" 10 | license = "MIT" 11 | homepage = "https://github.com/oxc-project/oxc-toml" 12 | repository = "https://github.com/oxc-project/oxc-toml" 13 | 14 | [dependencies] 15 | rustc-hash = "2.1.0" 16 | itertools = "0.14.0" 17 | logos = "0.16.0" 18 | rowan = "0.16.1" 19 | -------------------------------------------------------------------------------- /src/parser/macros.rs: -------------------------------------------------------------------------------- 1 | macro_rules! with_node { 2 | ($builder:expr, $kind:ident, $($content:tt)*) => { 3 | { 4 | $builder.start_node($kind.into()); 5 | let res = $($content)*; 6 | $builder.finish_node(); 7 | res 8 | } 9 | }; 10 | } 11 | 12 | macro_rules! whitelisted { 13 | ($self:expr, $kind:ident, $($content:tt)*) => { 14 | { 15 | $self.whitelist_token($kind); 16 | let res = $($content)*; 17 | $self.blacklist_token($kind); 18 | res 19 | } 20 | }; 21 | } 22 | -------------------------------------------------------------------------------- /test-data/invalid/taplo-invalid-padding.toml: -------------------------------------------------------------------------------- 1 | [int] 2 | padded_middle = 1__2 3 | padded_start = _1_2 4 | padded_end = 1_2_ 5 | 6 | padded_plus = +_2 7 | padded_minus = -_2 8 | 9 | [int.bin] 10 | padded_middle = 0b1__0 11 | padded_start = 0b_1_0 12 | padded_end = 0b1_0_ 13 | 14 | [int.hex] 15 | padded_middle = 0x1__0 16 | padded_start = 0x_1_0 17 | padded_end = 0x1_0_ 18 | 19 | [int.oct] 20 | padded_middle = 0o1__0 21 | padded_start = 0o_1_0 22 | padded_end = 0o1_0_ 23 | 24 | [float] 25 | padded_middle = 1__2.0 26 | padded_start = _1_2.0 27 | padded_end = 1_2_.0 28 | 29 | padded_plus = +_2.0 30 | padded_minus = -_2.0 -------------------------------------------------------------------------------- /test-data/README.md: -------------------------------------------------------------------------------- 1 | ## Source of most of the invalid/valid tests: 2 | 3 | These are the spec tests for TOML used by @iarna/toml. 4 | 5 | The errors folder contains TOML files that should cause a parser to report an error. 6 | 7 | The values folder contains TOML files and paired YAML or JSON files. The 8 | YAML files should parse to a structure that's deeply equal to the TOML 9 | structure. The JSON files match the patterns found in [BurntSushi 0.4 TOML 10 | tests](https://github.com/BurntSushi/toml-test#json-encoding). 11 | 12 | We introduce the following new types to match TOML 0.5.0: 13 | 14 | * _datetime-local_ - A datetime without a timezone. Floating. 15 | * _date_ - A date without any time component 16 | * _time_ - A time without any date component 17 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::single_match)] 2 | //! # About 3 | //! 4 | //! The main purpose of this library is to format TOML documents while preserving 5 | //! the original layout, comments, and whitespace where appropriate. 6 | //! 7 | //! It uses [Rowan](::rowan) for the syntax tree, and every character is preserved from the input, 8 | //! including all comments and white space. 9 | //! 10 | //! # Usage 11 | //! 12 | //! A TOML document can be formatted directly using the [formatter::format] function: 13 | //! 14 | //! ``` 15 | //! use oxc_toml::formatter::{format, Options}; 16 | //! 17 | //! const SOURCE: &str = "value=1\n[table]\nstring='some string'"; 18 | //! 19 | //! let formatted = format(SOURCE, Options::default()); 20 | //! ``` 21 | 22 | pub mod formatter; 23 | pub mod parser; 24 | pub mod syntax; 25 | pub mod util; 26 | 27 | pub use rowan; 28 | 29 | pub type HashMap = rustc_hash::FxHashMap; 30 | pub type HashSet = rustc_hash::FxHashSet; 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Boshen (oxc-toml - Formatter-only fork) 4 | Copyright (c) 2020 Ferenc Tamás (Original Taplo project) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /tests/formatter.rs: -------------------------------------------------------------------------------- 1 | use oxc_toml::formatter::{format, Options}; 2 | 3 | #[test] 4 | fn test_basic_formatting() { 5 | const SOURCE: &str = "value=1\n[table]\nstring='some string'"; 6 | let formatted = format(SOURCE, Options::default()); 7 | 8 | // Should add spaces around = 9 | assert!(formatted.contains("value = 1")); 10 | assert!(formatted.contains("string = 'some string'")); 11 | } 12 | 13 | #[test] 14 | fn test_complex_toml() { 15 | const SOURCE: &str = r#" 16 | [package] 17 | name="test" 18 | version="1.0.0" 19 | 20 | [dependencies] 21 | foo="1.0" 22 | bar = "2.0" 23 | "#; 24 | let formatted = format(SOURCE, Options::default()); 25 | 26 | // Should normalize spacing 27 | assert!(formatted.contains("name = \"test\"")); 28 | assert!(formatted.contains("version = \"1.0.0\"")); 29 | assert!(formatted.contains("foo = \"1.0\"")); 30 | assert!(formatted.contains("bar = \"2.0\"")); 31 | } 32 | 33 | #[test] 34 | fn test_formatter_preserves_comments() { 35 | const SOURCE: &str = "# Comment\nvalue=1"; 36 | let formatted = format(SOURCE, Options::default()); 37 | 38 | assert!(formatted.contains("# Comment")); 39 | assert!(formatted.contains("value = 1")); 40 | } 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # oxc-toml 2 | 3 | A TOML v1.0.0 formatter library. 4 | 5 | This library provides TOML formatting capabilities while preserving comments, whitespace, and the original document structure where appropriate. 6 | 7 | ## Features 8 | 9 | - Format TOML documents according to configurable style options 10 | - Preserve comments and meaningful whitespace 11 | - Handle syntax errors gracefully 12 | - Fault-tolerant parsing using [Rowan](https://github.com/rust-analyzer/rowan) syntax trees 13 | 14 | ## Usage 15 | 16 | ```rust 17 | use oxc_toml::formatter::{format, Options}; 18 | 19 | const SOURCE: &str = "value=1\n[table]\nstring='some string'"; 20 | 21 | let formatted = format(SOURCE, Options::default()); 22 | ``` 23 | 24 | ## Attribution 25 | 26 | This project is a formatter-only fork of the excellent [Taplo](https://github.com/tamasfe/taplo) project, originally created by [Ferenc Tamás](https://github.com/tamasfe). 27 | 28 | ### What Changed 29 | 30 | This fork strips away all non-formatter components from Taplo, including: 31 | - CLI tool 32 | - Language Server Protocol (LSP) implementation 33 | - WebAssembly bindings 34 | - DOM (Document Object Model) for TOML manipulation 35 | - JavaScript/TypeScript packages 36 | - Editor integrations 37 | 38 | The result is a focused, lightweight library that does one thing well: format TOML documents. 39 | 40 | ### Original Taplo Project 41 | 42 | Taplo is a comprehensive TOML toolkit that provides: 43 | - TOML v1.0.0 parser 44 | - Formatter (the foundation of this fork) 45 | - Language server for IDE integration 46 | - CLI tool for formatting and validation 47 | - WebAssembly bindings for browser/Node.js 48 | - Schema validation 49 | 50 | If you need these features, please use the original [Taplo project](https://github.com/tamasfe/taplo). 51 | 52 | ### License 53 | 54 | This project maintains the original MIT License from Taplo. See [LICENSE](LICENSE) for details. 55 | -------------------------------------------------------------------------------- /test-data/analytics/_cargo1.toml: -------------------------------------------------------------------------------- 1 | [some.package] 2 | authors = ["tamasf97 "] 3 | edition = "2018" 4 | name = "taplo-ide" 5 | version = "0.1.0" 6 | 7 | [lib] 8 | crate-type = ["cdylib", "rlib"] 9 | 10 | [features] 11 | default = ["console_error_panic_hook"] 12 | 13 | [dependencies] 14 | async-trait = "0.1.30" 15 | futures = "0.3.5" 16 | indexmap = "1.4.0" 17 | js-sys = "0.3.39" 18 | lsp-async-stub = { path = "../lsp-async-stub" } 19 | lsp-types = { version = "0.74.1", features = ["proposed"] } 20 | once_cell = "1.3.1" 21 | regex = "1.3" 22 | # reqwest = "0.10.6" 23 | rowan = "0.10.0" 24 | schemars = "0.8.0-alpha-4" 25 | serde = { version = "1.0", features = ["derive"] } 26 | serde_json = "1.0" 27 | serde_yaml = "0.8" 28 | taplo = { path = "../taplo", features = ["serde", "verify"] } 29 | verify = { version = "0.3", features = ["schemars"] } 30 | wasm-bindgen = { version = "^0.2", features = ["serde-serialize"] } 31 | wasm-bindgen-futures = "0.4.12" 32 | 33 | # The `console_error_panic_hook` crate provides better debugging of panics by 34 | # logging them with `console.error`. This is great for development, but requires 35 | # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for 36 | # code size when deploying. 37 | console_error_panic_hook = { version = "0.1.1", optional = true } 38 | 39 | # `wee_alloc` is a tiny allocator for wasm that is only ~1K in code size 40 | # compared to the default allocator's ~10K. It is slower than the default 41 | # allocator, however. 42 | # 43 | # Unfortunately, `wee_alloc` requires nightly Rust when targeting wasm for now. 44 | wee_alloc = { version = "0.4.2", optional = true } 45 | 46 | [dev-dependencies] 47 | wasm-bindgen-test = "0.2" 48 | stuff. 49 | [] 50 | 51 | asd.bsd 52 | 53 | [lib.] 54 | [some.lib.] 55 | 56 | [[test]] 57 | 58 | [[test]] 59 | 60 | thing. -------------------------------------------------------------------------------- /src/formatter/macros.rs: -------------------------------------------------------------------------------- 1 | macro_rules! create_options { 2 | ( 3 | $(#[$attr:meta])* 4 | pub struct Options { 5 | $( 6 | $(#[$field_attr:meta])* 7 | pub $name:ident: $ty:ty, 8 | )+ 9 | } 10 | ) => { 11 | $(#[$attr])* 12 | pub struct Options { 13 | $( 14 | $(#[$field_attr])* 15 | pub $name: $ty, 16 | )+ 17 | } 18 | 19 | impl Options { 20 | pub fn update(&mut self, incomplete: OptionsIncomplete) { 21 | $( 22 | if let Some(v) = incomplete.$name { 23 | self.$name = v; 24 | } 25 | )+ 26 | } 27 | 28 | pub fn update_from_str, I: Iterator>( 29 | &mut self, 30 | values: I, 31 | ) -> Result<(), OptionParseError> { 32 | for (key, val) in values { 33 | 34 | $( 35 | if key.as_ref() == stringify!($name) { 36 | self.$name = 37 | val.as_ref() 38 | .parse() 39 | .map_err(|error| OptionParseError::InvalidValue { 40 | key: key.as_ref().into(), 41 | error: Box::new(error), 42 | })?; 43 | 44 | continue; 45 | } 46 | )+ 47 | 48 | return Err(OptionParseError::InvalidOption(key.as_ref().into())); 49 | } 50 | 51 | Ok(()) 52 | } 53 | } 54 | 55 | $(#[$attr])* 56 | #[derive(Default)] 57 | pub struct OptionsIncomplete { 58 | $( 59 | $(#[$field_attr])* 60 | pub $name: Option<$ty>, 61 | )+ 62 | } 63 | 64 | impl OptionsIncomplete { 65 | pub fn from_options(opts: Options) -> Self { 66 | let mut o = Self::default(); 67 | 68 | $( 69 | o.$name = Some(opts.$name); 70 | )+ 71 | 72 | o 73 | } 74 | } 75 | }; 76 | } 77 | -------------------------------------------------------------------------------- /src/util/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::syntax::{SyntaxElement, SyntaxKind, SyntaxNode}; 2 | use rowan::TextRange; 3 | use rowan::TextSize; 4 | 5 | mod escape; 6 | pub mod syntax; 7 | 8 | pub use escape::check_escape; 9 | pub use escape::{escape, unescape}; 10 | 11 | pub(crate) mod allowed_chars { 12 | pub(crate) fn comment(s: &str) -> Result<(), Vec> { 13 | let mut err_indices = Vec::new(); 14 | 15 | for (i, c) in s.chars().enumerate() { 16 | if c != '\t' && c.is_control() { 17 | err_indices.push(i); 18 | } 19 | } 20 | 21 | if err_indices.is_empty() { 22 | Ok(()) 23 | } else { 24 | Err(err_indices) 25 | } 26 | } 27 | 28 | pub(crate) fn string(s: &str) -> Result<(), Vec> { 29 | let mut err_indices = Vec::new(); 30 | 31 | for (i, c) in s.chars().enumerate() { 32 | if c != '\t' 33 | && (('\u{0000}'..='\u{0008}').contains(&c) 34 | || ('\u{000A}'..='\u{001F}').contains(&c) 35 | || c == '\u{007F}') 36 | { 37 | err_indices.push(i); 38 | } 39 | } 40 | 41 | if err_indices.is_empty() { 42 | Ok(()) 43 | } else { 44 | Err(err_indices) 45 | } 46 | } 47 | 48 | pub(crate) fn multi_line_string(s: &str) -> Result<(), Vec> { 49 | let mut err_indices = Vec::new(); 50 | 51 | for (i, c) in s.chars().enumerate() { 52 | if c != '\t' 53 | && c != '\n' 54 | && c != '\r' 55 | && (('\u{0000}'..='\u{0008}').contains(&c) 56 | || ('\u{000A}'..='\u{001F}').contains(&c) 57 | || c == '\u{007F}') 58 | { 59 | err_indices.push(i); 60 | } 61 | } 62 | 63 | if err_indices.is_empty() { 64 | Ok(()) 65 | } else { 66 | Err(err_indices) 67 | } 68 | } 69 | 70 | pub(crate) fn string_literal(s: &str) -> Result<(), Vec> { 71 | let mut err_indices = Vec::new(); 72 | 73 | for (i, c) in s.chars().enumerate() { 74 | if c != '\t' && c.is_control() { 75 | err_indices.push(i); 76 | } 77 | } 78 | 79 | if err_indices.is_empty() { 80 | Ok(()) 81 | } else { 82 | Err(err_indices) 83 | } 84 | } 85 | 86 | pub(crate) fn multi_line_string_literal(s: &str) -> Result<(), Vec> { 87 | let mut err_indices = Vec::new(); 88 | 89 | for (i, c) in s.chars().enumerate() { 90 | if c != '\t' && c != '\n' && c != '\r' && c.is_control() { 91 | err_indices.push(i); 92 | } 93 | } 94 | 95 | if err_indices.is_empty() { 96 | Ok(()) 97 | } else { 98 | Err(err_indices) 99 | } 100 | } 101 | } 102 | 103 | pub trait StrExt { 104 | fn strip_quotes(self) -> Self; 105 | } 106 | 107 | impl StrExt for &str { 108 | fn strip_quotes(self) -> Self { 109 | if self.starts_with('\"') || self.starts_with('\'') { 110 | &self[1..self.len() - 1] 111 | } else { 112 | self 113 | } 114 | } 115 | } 116 | 117 | /// Utility extension methods for Syntax Nodes. 118 | pub trait SyntaxExt { 119 | /// Return a syntax node that contains the given offset. 120 | fn find_node(&self, offset: TextSize, inclusive: bool) -> Option; 121 | 122 | /// Find the deepest node that contains the given offset. 123 | fn find_node_deep(&self, offset: TextSize, inclusive: bool) -> Option { 124 | let mut node = self.find_node(offset, inclusive); 125 | while let Some(n) = &node { 126 | let new_node = n.find_node(offset, inclusive); 127 | if new_node.is_some() { 128 | node = new_node; 129 | } else { 130 | break; 131 | } 132 | } 133 | 134 | node 135 | } 136 | 137 | /// Find a node or token by its kind. 138 | fn find(&self, kind: SyntaxKind) -> Option; 139 | } 140 | 141 | impl SyntaxExt for SyntaxNode { 142 | fn find_node(&self, offset: TextSize, inclusive: bool) -> Option { 143 | for d in self.descendants().skip(1) { 144 | let range = d.text_range(); 145 | 146 | if (inclusive && range.contains_inclusive(offset)) || range.contains(offset) { 147 | return Some(d); 148 | } 149 | } 150 | 151 | None 152 | } 153 | 154 | fn find(&self, kind: SyntaxKind) -> Option { 155 | self.descendants_with_tokens().find(|d| d.kind() == kind) 156 | } 157 | } 158 | 159 | pub fn join_ranges>(ranges: I) -> TextRange { 160 | ranges 161 | .into_iter() 162 | .fold(None, |ranges, range| match ranges { 163 | Some(r) => Some(range.cover(r)), 164 | None => Some(range), 165 | }) 166 | .unwrap() 167 | } 168 | 169 | pub fn try_join_ranges>(ranges: I) -> Option { 170 | ranges.into_iter().fold(None, |ranges, range| match ranges { 171 | Some(r) => Some(range.cover(r)), 172 | None => Some(range), 173 | }) 174 | } 175 | 176 | pub fn overlaps(range: TextRange, other: TextRange) -> bool { 177 | range.contains_range(other) 178 | || other.contains_range(range) 179 | || range.contains(other.start()) 180 | || range.contains(other.end()) 181 | || other.contains(range.start()) 182 | || other.contains(range.end()) 183 | } 184 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.4" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "beef" 16 | version = "0.5.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" 19 | 20 | [[package]] 21 | name = "countme" 22 | version = "3.0.1" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" 25 | 26 | [[package]] 27 | name = "either" 28 | version = "1.15.0" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 31 | 32 | [[package]] 33 | name = "fnv" 34 | version = "1.0.7" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 37 | 38 | [[package]] 39 | name = "hashbrown" 40 | version = "0.14.5" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 43 | 44 | [[package]] 45 | name = "itertools" 46 | version = "0.14.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" 49 | dependencies = [ 50 | "either", 51 | ] 52 | 53 | [[package]] 54 | name = "logos" 55 | version = "0.16.0" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "a790d11254054e5dc83902dba85d253ff06ceb0cfafb12be8773435cb9dfb4f4" 58 | dependencies = [ 59 | "logos-derive", 60 | ] 61 | 62 | [[package]] 63 | name = "logos-codegen" 64 | version = "0.16.0" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "f60337c43a38313b58871f8d5d76872b8e17aa9d51fad494b5e76092c0ce05f5" 67 | dependencies = [ 68 | "beef", 69 | "fnv", 70 | "proc-macro2", 71 | "quote", 72 | "regex-automata", 73 | "regex-syntax", 74 | "rustc_version", 75 | "syn", 76 | ] 77 | 78 | [[package]] 79 | name = "logos-derive" 80 | version = "0.16.0" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "d151b2ae667f69e10b8738f5cac0c746faa22b2e15ea7e83b55476afec3767dc" 83 | dependencies = [ 84 | "logos-codegen", 85 | ] 86 | 87 | [[package]] 88 | name = "memchr" 89 | version = "2.7.6" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" 92 | 93 | [[package]] 94 | name = "oxc-toml" 95 | version = "0.14.1" 96 | dependencies = [ 97 | "itertools", 98 | "logos", 99 | "rowan", 100 | "rustc-hash 2.1.1", 101 | ] 102 | 103 | [[package]] 104 | name = "proc-macro2" 105 | version = "1.0.103" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" 108 | dependencies = [ 109 | "unicode-ident", 110 | ] 111 | 112 | [[package]] 113 | name = "quote" 114 | version = "1.0.42" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" 117 | dependencies = [ 118 | "proc-macro2", 119 | ] 120 | 121 | [[package]] 122 | name = "regex-automata" 123 | version = "0.4.13" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" 126 | dependencies = [ 127 | "aho-corasick", 128 | "memchr", 129 | "regex-syntax", 130 | ] 131 | 132 | [[package]] 133 | name = "regex-syntax" 134 | version = "0.8.8" 135 | source = "registry+https://github.com/rust-lang/crates.io-index" 136 | checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" 137 | 138 | [[package]] 139 | name = "rowan" 140 | version = "0.16.1" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "417a3a9f582e349834051b8a10c8d71ca88da4211e4093528e36b9845f6b5f21" 143 | dependencies = [ 144 | "countme", 145 | "hashbrown", 146 | "rustc-hash 1.1.0", 147 | "text-size", 148 | ] 149 | 150 | [[package]] 151 | name = "rustc-hash" 152 | version = "1.1.0" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 155 | 156 | [[package]] 157 | name = "rustc-hash" 158 | version = "2.1.1" 159 | source = "registry+https://github.com/rust-lang/crates.io-index" 160 | checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" 161 | 162 | [[package]] 163 | name = "rustc_version" 164 | version = "0.4.1" 165 | source = "registry+https://github.com/rust-lang/crates.io-index" 166 | checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" 167 | dependencies = [ 168 | "semver", 169 | ] 170 | 171 | [[package]] 172 | name = "semver" 173 | version = "1.0.27" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" 176 | 177 | [[package]] 178 | name = "syn" 179 | version = "2.0.111" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" 182 | dependencies = [ 183 | "proc-macro2", 184 | "quote", 185 | "unicode-ident", 186 | ] 187 | 188 | [[package]] 189 | name = "text-size" 190 | version = "1.1.1" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" 193 | 194 | [[package]] 195 | name = "unicode-ident" 196 | version = "1.0.22" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" 199 | -------------------------------------------------------------------------------- /src/util/escape.rs: -------------------------------------------------------------------------------- 1 | use logos::{Lexer, Logos}; 2 | 3 | /// Escaping based on: 4 | /// 5 | /// \b - backspace (U+0008) 6 | /// \t - tab (U+0009) 7 | /// \n - linefeed (U+000A) 8 | /// \f - form feed (U+000C) 9 | /// \r - carriage return (U+000D) 10 | /// \" - quote (U+0022) 11 | /// \\ - backslash (U+005C) 12 | /// \uXXXX - unicode (U+XXXX) 13 | /// \UXXXXXXXX - unicode (U+XXXXXXXX) 14 | #[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 15 | pub enum Escape { 16 | #[token(r#"\b"#, priority = 5)] 17 | Backspace, 18 | 19 | #[token(r#"\t"#, priority = 5)] 20 | Tab, 21 | 22 | #[regex(r#"(\\\s*\n)|(\\\s*\r\n)"#, priority = 5)] 23 | Newline, 24 | 25 | #[token(r#"\n"#, priority = 5)] 26 | LineFeed, 27 | 28 | #[token(r#"\f"#, priority = 5)] 29 | FormFeed, 30 | 31 | #[token(r#"\r"#, priority = 5)] 32 | CarriageReturn, 33 | 34 | #[token(r#"\""#, priority = 5)] 35 | Quote, 36 | 37 | #[token(r#"\\"#, priority = 5)] 38 | Backslash, 39 | 40 | // Same thing repeated 4 times, but the {n} repetition syntax is not supported by Logos 41 | #[regex(r#"\\u[0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_]"#, priority = 5)] 42 | Unicode, 43 | 44 | // Same thing repeated 8 times, but the {n} repetition syntax is not supported by Logos 45 | #[regex(r#"\\U[0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_][0-9A-Fa-f_]"#, priority = 5)] 46 | UnicodeLarge, 47 | 48 | #[regex(r#"\\."#, priority = 4)] 49 | Unknown, 50 | 51 | UnEscaped, 52 | } 53 | use Escape::*; 54 | 55 | /// Escape values in a given string. 56 | pub fn escape(s: &str) -> String { 57 | let mut escaped = String::with_capacity(s.len()); 58 | 59 | for c in s.chars() { 60 | match c { 61 | '\u{0008}' => escaped.push_str(r#"\b"#), 62 | '\u{0009}' => escaped.push_str(r#"\t"#), 63 | '\u{000A}' => escaped.push_str(r#"\n"#), 64 | '\u{000C}' => escaped.push_str(r#"\f"#), 65 | '\u{000D}' => escaped.push_str(r#"\r"#), 66 | '\u{0022}' => escaped.push_str(r#"\""#), 67 | '\u{005C}' => escaped.push_str(r#"\\"#), 68 | _ => { 69 | escaped.push(c); 70 | } 71 | } 72 | } 73 | 74 | escaped 75 | } 76 | 77 | /// Unescape all supported sequences found in [Escape](Escape). 78 | /// 79 | /// If it fails, the index of failure is returned. 80 | pub fn unescape(s: &str) -> Result { 81 | let mut new_s = String::with_capacity(s.len()); 82 | let mut lexer: Lexer = Lexer::new(s); 83 | 84 | while let Some(t) = lexer.next() { 85 | let t = t.unwrap_or(UnEscaped); 86 | match t { 87 | Backspace => new_s += "\u{0008}", 88 | Tab => new_s += "\u{0009}", 89 | LineFeed => new_s += "\u{000A}", 90 | FormFeed => new_s += "\u{000C}", 91 | CarriageReturn => new_s += "\u{000D}", 92 | Quote => new_s += "\u{0022}", 93 | Backslash => new_s += "\u{005C}", 94 | Newline => {} 95 | Unicode => { 96 | new_s += &std::char::from_u32( 97 | u32::from_str_radix(&lexer.slice()[2..], 16).map_err(|_| lexer.span().start)?, 98 | ) 99 | .ok_or(lexer.span().start)? 100 | .to_string(); 101 | } 102 | UnicodeLarge => { 103 | new_s += &std::char::from_u32( 104 | u32::from_str_radix(&lexer.slice()[2..], 16).map_err(|_| lexer.span().start)?, 105 | ) 106 | .ok_or(lexer.span().start)? 107 | .to_string(); 108 | } 109 | Unknown => return Err(lexer.span().end), 110 | UnEscaped => { 111 | new_s += lexer.slice(); 112 | } 113 | } 114 | } 115 | 116 | Ok(new_s + lexer.remainder()) 117 | } 118 | 119 | /// Same as unescape, but doesn't create a new 120 | /// unescaped string, and returns all invalid escape indices. 121 | pub fn check_escape(s: &str) -> Result<(), Vec> { 122 | let mut lexer: Lexer = Lexer::new(s); 123 | let mut invalid = Vec::new(); 124 | 125 | while let Some(t) = lexer.next() { 126 | let t = t.unwrap_or(UnEscaped); 127 | match t { 128 | Backspace => {} 129 | Tab => {} 130 | LineFeed => {} 131 | FormFeed => {} 132 | CarriageReturn => {} 133 | Quote => {} 134 | Backslash => {} 135 | Newline => {} 136 | Unicode => { 137 | let char_val = match u32::from_str_radix(&lexer.slice()[2..], 16) { 138 | Ok(v) => v, 139 | Err(_) => { 140 | invalid.push(lexer.span().start); 141 | continue; 142 | } 143 | }; 144 | 145 | match std::char::from_u32(char_val) { 146 | None => { 147 | invalid.push(lexer.span().start); 148 | } 149 | Some(_) => {} 150 | }; 151 | } 152 | UnicodeLarge => { 153 | let char_val = match u32::from_str_radix(&lexer.slice()[2..], 16) { 154 | Ok(v) => v, 155 | Err(_) => { 156 | invalid.push(lexer.span().start); 157 | continue; 158 | } 159 | }; 160 | 161 | match std::char::from_u32(char_val) { 162 | None => { 163 | invalid.push(lexer.span().start); 164 | } 165 | Some(_) => {} 166 | }; 167 | } 168 | Unknown => invalid.push(lexer.span().start), 169 | UnEscaped => {} 170 | } 171 | } 172 | 173 | if invalid.is_empty() { 174 | Ok(()) 175 | } else { 176 | Err(invalid) 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /test-data/example.toml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Comment 3 | 4 | # Speak your mind with the hash symbol. They go from the symbol to the end of 5 | # the line. 6 | 7 | ################################################################################ 8 | ## Table 9 | 10 | # Tables (also known as hash tables or dictionaries) are collections of 11 | # key/value pairs. They appear in square brackets on a line by themselves. 12 | 13 | root_value = 2 14 | 15 | [table] 16 | 17 | key = "value" # Yeah, you can do this. 18 | 19 | # Nested tables are denoted by table names with dots in them. Name your tables 20 | # whatever crap you please, just don't use #, ., [ or ]. 21 | 22 | [table.subtable] 23 | 24 | key = "another value" 25 | 26 | # You don't need to specify all the super-tables if you don't want to. TOML 27 | # knows how to do it for you. 28 | 29 | # [x] you 30 | # [x.y] don't 31 | # [x.y.z] need these 32 | [x.y.z.w] # for this to work 33 | 34 | ################################################################################ 35 | ## Inline Table 36 | 37 | # Inline tables provide a more compact syntax for expressing tables. They are 38 | # especially useful for grouped data that can otherwise quickly become verbose. 39 | # Inline tables are enclosed in curly braces `{` and `}`. No newlines are 40 | # allowed between the curly braces unless they are valid within a value. 41 | 42 | [table.inline] 43 | 44 | name = { first = "Tom", last = "Preston-Werner" } 45 | point = { x = 1, y = 2 } 46 | 47 | ################################################################################ 48 | ## String 49 | 50 | # There are four ways to express strings: basic, multi-line basic, literal, and 51 | # multi-line literal. All strings must contain only valid UTF-8 characters. 52 | 53 | [string.basic] 54 | 55 | basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." 56 | 57 | [string.multiline] 58 | 59 | # The following strings are byte-for-byte equivalent: 60 | key1 = "One\nTwo" 61 | key2 = """One\nTwo""" 62 | key3 = """ 63 | One 64 | Two""" 65 | 66 | [string.multiline.continued] 67 | 68 | # The following strings are byte-for-byte equivalent: 69 | key1 = "The quick brown fox jumps over the lazy dog." 70 | 71 | key2 = """ 72 | The quick brown \ 73 | 74 | 75 | fox jumps over \ 76 | the lazy dog.""" 77 | 78 | key3 = """\ 79 | The quick brown \ 80 | fox jumps over \ 81 | the lazy dog.\ 82 | """ 83 | 84 | [string.literal] 85 | 86 | # What you see is what you get. 87 | quoted = 'Tom "Dubs" Preston-Werner' 88 | regex = '<\i\c*\s*>' 89 | winpath = 'C:\Users\nodejs\templates' 90 | winpath2 = '\\ServerX\admin$\system32\' 91 | 92 | [string.literal.multiline] 93 | 94 | lines = ''' 95 | The first newline is 96 | trimmed in raw strings. 97 | All other whitespace 98 | is preserved. 99 | ''' 100 | regex2 = '''I [dw]on't need \d{2} apples''' 101 | 102 | ################################################################################ 103 | ## Integer 104 | 105 | # Integers are whole numbers. Positive numbers may be prefixed with a plus sign. 106 | # Negative numbers are prefixed with a minus sign. 107 | 108 | [integer] 109 | 110 | key1 = +99 111 | key2 = 42 112 | key3 = 0 113 | key4 = -17 114 | 115 | [integer.underscores] 116 | 117 | # For large numbers, you may use underscores to enhance readability. Each 118 | # underscore must be surrounded by at least one digit. 119 | key1 = 1_000 120 | key2 = 5_349_221 121 | key3 = 1_2_3_4_5 # valid but inadvisable 122 | 123 | ################################################################################ 124 | ## Float 125 | 126 | # A float consists of an integer part (which may be prefixed with a plus or 127 | # minus sign) followed by a fractional part and/or an exponent part. 128 | 129 | [float.fractional] 130 | 131 | key1 = +1.0 132 | key2 = 3.1415 133 | key3 = -0.01 134 | 135 | [float.exponent] 136 | 137 | key1 = 5e+22 138 | key2 = 1e6 139 | key3 = -2E-2 140 | 141 | [float.both] 142 | 143 | key = 6.626e-34 144 | 145 | [float.underscores] 146 | 147 | # This file is used for benches and toml-rs doesn't yet support these: 148 | # key1 = 9_224_617.445_991_228_313 149 | # key2 = 1e1_000 150 | 151 | ################################################################################ 152 | ## Boolean 153 | 154 | # Booleans are just the tokens you're used to. Always lowercase. 155 | 156 | [boolean] 157 | 158 | False = false 159 | True = true 160 | 161 | ################################################################################ 162 | ## Datetime 163 | 164 | # Datetimes are RFC 3339 dates. 165 | 166 | [datetime] 167 | 168 | key1 = 1979-05-27T07:32:00Z 169 | key2 = 1979-05-27T00:32:00-07:00 170 | key3 = 1979-05-27T00:32:00.999999-07:00 171 | 172 | ################################################################################ 173 | ## Array 174 | 175 | # Arrays are square brackets with other primitives inside. Whitespace is 176 | # ignored. Elements are separated by commas. Data types may not be mixed. 177 | 178 | [array] 179 | 180 | key1 = [1, 2, 3] 181 | key2 = ["red", "yellow", "green"] 182 | key3 = [[1, 2], [3, 4, 5]] 183 | key4 = [[1, 2], ["a", "b", "c"]] # this is ok 184 | 185 | # Arrays can also be multiline. So in addition to ignoring whitespace, arrays 186 | # also ignore newlines between the brackets. Terminating commas are ok before 187 | # the closing bracket. 188 | 189 | key5 = [1, 2, 3] 190 | key6 = [ 191 | 1, 192 | 2, # this is ok 193 | ] 194 | 195 | ################################################################################ 196 | ## Array of Tables 197 | 198 | # These can be expressed by using a table name in double brackets. Each table 199 | # with the same double bracketed name will be an element in the array. The 200 | # tables are inserted in the order encountered. 201 | 202 | [[products]] 203 | 204 | name = "Hammer" 205 | sku = 738594937 206 | 207 | [[products]] 208 | 209 | [[products]] 210 | 211 | color = "gray" 212 | name = "Nail" 213 | sku = 284758393 214 | 215 | # You can create nested arrays of tables as well. 216 | 217 | [[fruit]] 218 | name = "apple" 219 | 220 | [fruit.physical] 221 | color = "red" 222 | shape = "round" 223 | 224 | [[fruit.variety]] 225 | name = "red delicious" 226 | 227 | [[fruit.variety]] 228 | name = "granny smith" 229 | 230 | [[fruit]] 231 | name = "banana" 232 | 233 | [[fruit.variety]] 234 | name = "plantain" 235 | -------------------------------------------------------------------------------- /test-data/rewrite/nothing.toml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Comment 3 | 4 | # Speak your mind with the hash symbol. They go from the symbol to the end of 5 | # the line. 6 | 7 | ################################################################################ 8 | ## Table 9 | 10 | # Tables (also known as hash tables or dictionaries) are collections of 11 | # key/value pairs. They appear in square brackets on a line by themselves. 12 | 13 | root_value = 2 14 | 15 | [table] 16 | 17 | key = "value" # Yeah, you can do this. 18 | 19 | # Nested tables are denoted by table names with dots in them. Name your tables 20 | # whatever crap you please, just don't use #, ., [ or ]. 21 | 22 | [table.subtable] 23 | 24 | key = "another value" 25 | 26 | # You don't need to specify all the super-tables if you don't want to. TOML 27 | # knows how to do it for you. 28 | 29 | # [x] you 30 | # [x.y] don't 31 | # [x.y.z] need these 32 | [x.y.z.w] # for this to work 33 | 34 | ################################################################################ 35 | ## Inline Table 36 | 37 | # Inline tables provide a more compact syntax for expressing tables. They are 38 | # especially useful for grouped data that can otherwise quickly become verbose. 39 | # Inline tables are enclosed in curly braces `{` and `}`. No newlines are 40 | # allowed between the curly braces unless they are valid within a value. 41 | 42 | [table.inline] 43 | 44 | name = { first = "Tom", last = "Preston-Werner" } 45 | point = { x = 1, y = 2 } 46 | 47 | ################################################################################ 48 | ## String 49 | 50 | # There are four ways to express strings: basic, multi-line basic, literal, and 51 | # multi-line literal. All strings must contain only valid UTF-8 characters. 52 | 53 | [string.basic] 54 | 55 | basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." 56 | 57 | [string.multiline] 58 | 59 | # The following strings are byte-for-byte equivalent: 60 | key1 = "One\nTwo" 61 | key2 = """One\nTwo""" 62 | key3 = """ 63 | One 64 | Two""" 65 | 66 | [string.multiline.continued] 67 | 68 | # The following strings are byte-for-byte equivalent: 69 | key1 = "The quick brown fox jumps over the lazy dog." 70 | 71 | key2 = """ 72 | The quick brown \ 73 | 74 | 75 | fox jumps over \ 76 | the lazy dog.""" 77 | 78 | key3 = """\ 79 | The quick brown \ 80 | fox jumps over \ 81 | the lazy dog.\ 82 | """ 83 | 84 | [string.literal] 85 | 86 | # What you see is what you get. 87 | quoted = 'Tom "Dubs" Preston-Werner' 88 | regex = '<\i\c*\s*>' 89 | winpath = 'C:\Users\nodejs\templates' 90 | winpath2 = '\\ServerX\admin$\system32\' 91 | 92 | [string.literal.multiline] 93 | 94 | lines = ''' 95 | The first newline is 96 | trimmed in raw strings. 97 | All other whitespace 98 | is preserved. 99 | ''' 100 | regex2 = '''I [dw]on't need \d{2} apples''' 101 | 102 | ################################################################################ 103 | ## Integer 104 | 105 | # Integers are whole numbers. Positive numbers may be prefixed with a plus sign. 106 | # Negative numbers are prefixed with a minus sign. 107 | 108 | [integer] 109 | 110 | key1 = +99 111 | key2 = 42 112 | key3 = 0 113 | key4 = -17 114 | 115 | [integer.underscores] 116 | 117 | # For large numbers, you may use underscores to enhance readability. Each 118 | # underscore must be surrounded by at least one digit. 119 | key1 = 1_000 120 | key2 = 5_349_221 121 | key3 = 1_2_3_4_5 # valid but inadvisable 122 | 123 | ################################################################################ 124 | ## Float 125 | 126 | # A float consists of an integer part (which may be prefixed with a plus or 127 | # minus sign) followed by a fractional part and/or an exponent part. 128 | 129 | [float.fractional] 130 | 131 | key1 = +1.0 132 | key2 = 3.1415 133 | key3 = -0.01 134 | 135 | [float.exponent] 136 | 137 | key1 = 5e+22 138 | key2 = 1e6 139 | key3 = -2E-2 140 | 141 | [float.both] 142 | 143 | key = 6.626e-34 144 | 145 | [float.underscores] 146 | 147 | # This file is used for benches and toml-rs doesn't yet support these: 148 | # key1 = 9_224_617.445_991_228_313 149 | # key2 = 1e1_000 150 | 151 | ################################################################################ 152 | ## Boolean 153 | 154 | # Booleans are just the tokens you're used to. Always lowercase. 155 | 156 | [boolean] 157 | 158 | False = false 159 | True = true 160 | 161 | ################################################################################ 162 | ## Datetime 163 | 164 | # Datetimes are RFC 3339 dates. 165 | 166 | [datetime] 167 | 168 | key1 = 1979-05-27T07:32:00Z 169 | key2 = 1979-05-27T00:32:00-07:00 170 | key3 = 1979-05-27T00:32:00.999999-07:00 171 | 172 | ################################################################################ 173 | ## Array 174 | 175 | # Arrays are square brackets with other primitives inside. Whitespace is 176 | # ignored. Elements are separated by commas. Data types may not be mixed. 177 | 178 | [array] 179 | 180 | key1 = [1, 2, 3] 181 | key2 = ["red", "yellow", "green"] 182 | key3 = [[1, 2], [3, 4, 5]] 183 | key4 = [[1, 2], ["a", "b", "c"]] # this is ok 184 | 185 | # Arrays can also be multiline. So in addition to ignoring whitespace, arrays 186 | # also ignore newlines between the brackets. Terminating commas are ok before 187 | # the closing bracket. 188 | 189 | key5 = [1, 2, 3] 190 | key6 = [ 191 | 1, 192 | 2, # this is ok 193 | ] 194 | 195 | ################################################################################ 196 | ## Array of Tables 197 | 198 | # These can be expressed by using a table name in double brackets. Each table 199 | # with the same double bracketed name will be an element in the array. The 200 | # tables are inserted in the order encountered. 201 | 202 | [[products]] 203 | 204 | name = "Hammer" 205 | sku = 738594937 206 | 207 | [[products]] 208 | 209 | [[products]] 210 | 211 | color = "gray" 212 | name = "Nail" 213 | sku = 284758393 214 | 215 | # You can create nested arrays of tables as well. 216 | 217 | [[fruit]] 218 | name = "apple" 219 | 220 | [fruit.physical] 221 | color = "red" 222 | shape = "round" 223 | 224 | [[fruit.variety]] 225 | name = "red delicious" 226 | 227 | [[fruit.variety]] 228 | name = "granny smith" 229 | 230 | [[fruit]] 231 | name = "banana" 232 | 233 | [[fruit.variety]] 234 | name = "plantain" 235 | -------------------------------------------------------------------------------- /test-data/rewrite/nothing_expected.toml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ## Comment 3 | 4 | # Speak your mind with the hash symbol. They go from the symbol to the end of 5 | # the line. 6 | 7 | ################################################################################ 8 | ## Table 9 | 10 | # Tables (also known as hash tables or dictionaries) are collections of 11 | # key/value pairs. They appear in square brackets on a line by themselves. 12 | 13 | root_value = 2 14 | 15 | [table] 16 | 17 | key = "value" # Yeah, you can do this. 18 | 19 | # Nested tables are denoted by table names with dots in them. Name your tables 20 | # whatever crap you please, just don't use #, ., [ or ]. 21 | 22 | [table.subtable] 23 | 24 | key = "another value" 25 | 26 | # You don't need to specify all the super-tables if you don't want to. TOML 27 | # knows how to do it for you. 28 | 29 | # [x] you 30 | # [x.y] don't 31 | # [x.y.z] need these 32 | [x.y.z.w] # for this to work 33 | 34 | ################################################################################ 35 | ## Inline Table 36 | 37 | # Inline tables provide a more compact syntax for expressing tables. They are 38 | # especially useful for grouped data that can otherwise quickly become verbose. 39 | # Inline tables are enclosed in curly braces `{` and `}`. No newlines are 40 | # allowed between the curly braces unless they are valid within a value. 41 | 42 | [table.inline] 43 | 44 | name = { first = "Tom", last = "Preston-Werner" } 45 | point = { x = 1, y = 2 } 46 | 47 | ################################################################################ 48 | ## String 49 | 50 | # There are four ways to express strings: basic, multi-line basic, literal, and 51 | # multi-line literal. All strings must contain only valid UTF-8 characters. 52 | 53 | [string.basic] 54 | 55 | basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." 56 | 57 | [string.multiline] 58 | 59 | # The following strings are byte-for-byte equivalent: 60 | key1 = "One\nTwo" 61 | key2 = """One\nTwo""" 62 | key3 = """ 63 | One 64 | Two""" 65 | 66 | [string.multiline.continued] 67 | 68 | # The following strings are byte-for-byte equivalent: 69 | key1 = "The quick brown fox jumps over the lazy dog." 70 | 71 | key2 = """ 72 | The quick brown \ 73 | 74 | 75 | fox jumps over \ 76 | the lazy dog.""" 77 | 78 | key3 = """\ 79 | The quick brown \ 80 | fox jumps over \ 81 | the lazy dog.\ 82 | """ 83 | 84 | [string.literal] 85 | 86 | # What you see is what you get. 87 | quoted = 'Tom "Dubs" Preston-Werner' 88 | regex = '<\i\c*\s*>' 89 | winpath = 'C:\Users\nodejs\templates' 90 | winpath2 = '\\ServerX\admin$\system32\' 91 | 92 | [string.literal.multiline] 93 | 94 | lines = ''' 95 | The first newline is 96 | trimmed in raw strings. 97 | All other whitespace 98 | is preserved. 99 | ''' 100 | regex2 = '''I [dw]on't need \d{2} apples''' 101 | 102 | ################################################################################ 103 | ## Integer 104 | 105 | # Integers are whole numbers. Positive numbers may be prefixed with a plus sign. 106 | # Negative numbers are prefixed with a minus sign. 107 | 108 | [integer] 109 | 110 | key1 = +99 111 | key2 = 42 112 | key3 = 0 113 | key4 = -17 114 | 115 | [integer.underscores] 116 | 117 | # For large numbers, you may use underscores to enhance readability. Each 118 | # underscore must be surrounded by at least one digit. 119 | key1 = 1_000 120 | key2 = 5_349_221 121 | key3 = 1_2_3_4_5 # valid but inadvisable 122 | 123 | ################################################################################ 124 | ## Float 125 | 126 | # A float consists of an integer part (which may be prefixed with a plus or 127 | # minus sign) followed by a fractional part and/or an exponent part. 128 | 129 | [float.fractional] 130 | 131 | key1 = +1.0 132 | key2 = 3.1415 133 | key3 = -0.01 134 | 135 | [float.exponent] 136 | 137 | key1 = 5e+22 138 | key2 = 1e6 139 | key3 = -2E-2 140 | 141 | [float.both] 142 | 143 | key = 6.626e-34 144 | 145 | [float.underscores] 146 | 147 | # This file is used for benches and toml-rs doesn't yet support these: 148 | # key1 = 9_224_617.445_991_228_313 149 | # key2 = 1e1_000 150 | 151 | ################################################################################ 152 | ## Boolean 153 | 154 | # Booleans are just the tokens you're used to. Always lowercase. 155 | 156 | [boolean] 157 | 158 | False = false 159 | True = true 160 | 161 | ################################################################################ 162 | ## Datetime 163 | 164 | # Datetimes are RFC 3339 dates. 165 | 166 | [datetime] 167 | 168 | key1 = 1979-05-27T07:32:00Z 169 | key2 = 1979-05-27T00:32:00-07:00 170 | key3 = 1979-05-27T00:32:00.999999-07:00 171 | 172 | ################################################################################ 173 | ## Array 174 | 175 | # Arrays are square brackets with other primitives inside. Whitespace is 176 | # ignored. Elements are separated by commas. Data types may not be mixed. 177 | 178 | [array] 179 | 180 | key1 = [1, 2, 3] 181 | key2 = ["red", "yellow", "green"] 182 | key3 = [[1, 2], [3, 4, 5]] 183 | key4 = [[1, 2], ["a", "b", "c"]] # this is ok 184 | 185 | # Arrays can also be multiline. So in addition to ignoring whitespace, arrays 186 | # also ignore newlines between the brackets. Terminating commas are ok before 187 | # the closing bracket. 188 | 189 | key5 = [1, 2, 3] 190 | key6 = [ 191 | 1, 192 | 2, # this is ok 193 | ] 194 | 195 | ################################################################################ 196 | ## Array of Tables 197 | 198 | # These can be expressed by using a table name in double brackets. Each table 199 | # with the same double bracketed name will be an element in the array. The 200 | # tables are inserted in the order encountered. 201 | 202 | [[products]] 203 | 204 | name = "Hammer" 205 | sku = 738594937 206 | 207 | [[products]] 208 | 209 | [[products]] 210 | 211 | color = "gray" 212 | name = "Nail" 213 | sku = 284758393 214 | 215 | # You can create nested arrays of tables as well. 216 | 217 | [[fruit]] 218 | name = "apple" 219 | 220 | [fruit.physical] 221 | color = "red" 222 | shape = "round" 223 | 224 | [[fruit.variety]] 225 | name = "red delicious" 226 | 227 | [[fruit.variety]] 228 | name = "granny smith" 229 | 230 | [[fruit]] 231 | name = "banana" 232 | 233 | [[fruit.variety]] 234 | name = "plantain" 235 | -------------------------------------------------------------------------------- /src/syntax.rs: -------------------------------------------------------------------------------- 1 | //! Declaration of the syntax tokens and lexer implementation. 2 | 3 | #![allow(non_camel_case_types)] 4 | 5 | use logos::{Lexer, Logos}; 6 | 7 | /// Enum containing all the tokens in a syntax tree. 8 | #[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 9 | #[repr(u16)] 10 | pub enum SyntaxKind { 11 | #[regex(r"([ \t])+")] 12 | WHITESPACE = 0, 13 | 14 | #[regex(r"(\n|\r\n)+")] 15 | NEWLINE, 16 | 17 | #[regex(r"#[^\n\r]*", allow_greedy = true)] 18 | COMMENT, 19 | 20 | #[regex(r"[A-Za-z0-9_-]+", priority = 2)] 21 | IDENT, 22 | 23 | /// Not part of the regular TOML syntax, only used to allow 24 | /// glob patterns in keys. 25 | #[regex(r"[*?A-Za-z0-9_-]+", priority = 1)] 26 | IDENT_WITH_GLOB, 27 | 28 | #[token(".")] 29 | PERIOD, 30 | 31 | #[token(",")] 32 | COMMA, 33 | 34 | #[token("=")] 35 | EQ, 36 | 37 | #[regex(r#"""#, lex_string)] 38 | STRING, 39 | 40 | #[regex(r#"""""#, lex_multi_line_string)] 41 | MULTI_LINE_STRING, 42 | 43 | #[regex(r#"'"#, lex_string_literal)] 44 | STRING_LITERAL, 45 | 46 | #[regex(r#"'''"#, lex_multi_line_string_literal)] 47 | MULTI_LINE_STRING_LITERAL, 48 | 49 | #[regex(r"[+-]?[0-9_]+", priority = 4)] 50 | INTEGER, 51 | 52 | #[regex(r"0x[0-9A-Fa-f_]+")] 53 | INTEGER_HEX, 54 | 55 | #[regex(r"0o[0-7_]+")] 56 | INTEGER_OCT, 57 | 58 | #[regex(r"0b(0|1|_)+")] 59 | INTEGER_BIN, 60 | 61 | #[regex(r"[-+]?([0-9_]+(\.[0-9_]+)?([eE][+-]?[0-9_]+)?|nan|inf)", priority = 3)] 62 | FLOAT, 63 | 64 | #[regex(r"true|false")] 65 | BOOL, 66 | 67 | #[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)(?:T|t| )(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?(?:[Zz]|[+-][01]\d:[0-5]\d)"#)] 68 | DATE_TIME_OFFSET, 69 | 70 | #[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)(?:T|t| )(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?"#)] 71 | DATE_TIME_LOCAL, 72 | 73 | #[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)"#)] 74 | DATE, 75 | 76 | #[regex(r#"(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?"#)] 77 | TIME, 78 | 79 | #[token("[")] 80 | BRACKET_START, 81 | 82 | #[token("]")] 83 | BRACKET_END, 84 | 85 | #[token("{")] 86 | BRACE_START, 87 | 88 | #[token("}")] 89 | BRACE_END, 90 | 91 | ERROR, 92 | 93 | // composite types 94 | KEY, // e.g.: parent.child 95 | VALUE, // e.g.: "2" 96 | TABLE_HEADER, // e.g.: [table] 97 | TABLE_ARRAY_HEADER, // e.g.: [[table]] 98 | ENTRY, // e.g.: key = "value" 99 | ARRAY, // e.g.: [ 1, 2 ] 100 | INLINE_TABLE, // e.g.: { key = "value" } 101 | 102 | ROOT, // root node 103 | } 104 | 105 | impl From for rowan::SyntaxKind { 106 | fn from(kind: SyntaxKind) -> Self { 107 | Self(kind as u16) 108 | } 109 | } 110 | 111 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 112 | pub enum Lang {} 113 | impl rowan::Language for Lang { 114 | type Kind = SyntaxKind; 115 | fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { 116 | assert!(raw.0 <= SyntaxKind::ROOT as u16); 117 | unsafe { std::mem::transmute::(raw.0) } 118 | } 119 | fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { 120 | kind.into() 121 | } 122 | } 123 | 124 | pub type SyntaxNode = rowan::SyntaxNode; 125 | pub type SyntaxToken = rowan::SyntaxToken; 126 | pub type SyntaxElement = rowan::NodeOrToken; 127 | 128 | fn lex_string(lex: &mut Lexer) -> bool { 129 | let remainder: &str = lex.remainder(); 130 | let mut escaped = false; 131 | 132 | let mut total_len = 0; 133 | 134 | for c in remainder.chars() { 135 | total_len += c.len_utf8(); 136 | 137 | if c == '\\' { 138 | escaped = !escaped; 139 | continue; 140 | } 141 | 142 | if c == '"' && !escaped { 143 | lex.bump(total_len); 144 | return true; 145 | } 146 | 147 | escaped = false; 148 | } 149 | false 150 | } 151 | 152 | fn lex_multi_line_string(lex: &mut Lexer) -> bool { 153 | let remainder: &str = lex.remainder(); 154 | 155 | let mut total_len = 0; 156 | let mut quote_count = 0; 157 | 158 | let mut escaped = false; 159 | 160 | // As the string can contain ", 161 | // we can end up with more than 3 "-s at 162 | // the end, in that case we need to include all 163 | // in the string. 164 | let mut quotes_found = false; 165 | 166 | for c in remainder.chars() { 167 | if quotes_found { 168 | if c != '"' { 169 | if quote_count >= 6 { 170 | return false; 171 | } 172 | 173 | lex.bump(total_len); 174 | return true; 175 | } else { 176 | quote_count += 1; 177 | total_len += c.len_utf8(); 178 | continue; 179 | } 180 | } 181 | total_len += c.len_utf8(); 182 | 183 | if c == '\\' { 184 | escaped = true; 185 | continue; 186 | } 187 | 188 | if c == '"' && !escaped { 189 | quote_count += 1; 190 | } else { 191 | quote_count = 0; 192 | } 193 | 194 | if quote_count == 3 { 195 | quotes_found = true; 196 | } 197 | 198 | escaped = false; 199 | } 200 | 201 | // End of input 202 | if quotes_found { 203 | if quote_count >= 6 { 204 | return false; 205 | } 206 | 207 | lex.bump(total_len); 208 | true 209 | } else { 210 | false 211 | } 212 | } 213 | 214 | fn lex_string_literal(lex: &mut Lexer) -> bool { 215 | let remainder: &str = lex.remainder(); 216 | let mut total_len = 0; 217 | 218 | for c in remainder.chars() { 219 | total_len += c.len_utf8(); 220 | 221 | if c == '\'' { 222 | lex.bump(total_len); 223 | return true; 224 | } 225 | } 226 | false 227 | } 228 | 229 | fn lex_multi_line_string_literal(lex: &mut Lexer) -> bool { 230 | let remainder: &str = lex.remainder(); 231 | 232 | let mut total_len = 0; 233 | let mut quote_count = 0; 234 | 235 | // As the string can contain ', 236 | // we can end up with more than 3 '-s at 237 | // the end, in that case we need to include all 238 | // in the string. 239 | let mut quotes_found = false; 240 | 241 | for c in remainder.chars() { 242 | if quotes_found { 243 | if c != '\'' { 244 | lex.bump(total_len); 245 | return true; 246 | } else { 247 | if quote_count > 4 { 248 | return false; 249 | } 250 | 251 | quote_count += 1; 252 | total_len += c.len_utf8(); 253 | continue; 254 | } 255 | } 256 | total_len += c.len_utf8(); 257 | 258 | if c == '\'' { 259 | quote_count += 1; 260 | } else { 261 | quote_count = 0; 262 | } 263 | 264 | if quote_count == 3 { 265 | quotes_found = true; 266 | } 267 | } 268 | 269 | // End of input 270 | if quotes_found { 271 | lex.bump(total_len); 272 | true 273 | } else { 274 | false 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | //! TOML document to syntax tree parsing. 2 | 3 | use crate::{ 4 | syntax::{SyntaxKind, SyntaxKind::*, SyntaxNode}, 5 | util::{allowed_chars, check_escape}, 6 | }; 7 | use logos::{Lexer, Logos}; 8 | use rowan::{GreenNode, GreenNodeBuilder, TextRange, TextSize}; 9 | use std::convert::TryInto; 10 | 11 | #[macro_use] 12 | mod macros; 13 | 14 | /// A syntax error that can occur during parsing. 15 | #[derive(Debug, Clone, Eq, PartialEq, Hash)] 16 | pub struct Error { 17 | /// The span of the error. 18 | pub range: TextRange, 19 | 20 | /// Human-friendly error message. 21 | pub message: String, 22 | } 23 | 24 | impl core::fmt::Display for Error { 25 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 26 | write!(f, "{} ({:?})", &self.message, &self.range) 27 | } 28 | } 29 | impl std::error::Error for Error {} 30 | 31 | /// Parse a TOML document into a [Rowan green tree](rowan::GreenNode). 32 | /// 33 | /// The parsing will not stop at unexpected or invalid tokens. 34 | /// Instead errors will be collected with their character offsets and lengths, 35 | /// and the invalid token(s) will have the `ERROR` kind in the final tree. 36 | /// 37 | /// The parser will also validate comment and string contents, looking for 38 | /// invalid escape sequences and invalid characters. 39 | /// These will also be reported as syntax errors. 40 | /// 41 | /// This does not check for semantic errors such as duplicate keys. 42 | pub fn parse(source: &str) -> Parse { 43 | Parser::new(source).parse() 44 | } 45 | 46 | /// A hand-written parser that uses the Logos lexer 47 | /// to tokenize the source, then constructs 48 | /// a Rowan green tree from them. 49 | pub(crate) struct Parser<'p> { 50 | skip_whitespace: bool, 51 | // Allow glob patterns as keys and using [] instead of dots. 52 | key_pattern_syntax: bool, 53 | current_token: Option, 54 | 55 | // These tokens are not consumed on errors. 56 | // 57 | // The syntax error is still reported, 58 | // but the the surrounding context can still 59 | // be parsed. 60 | // FIXME(bit_flags): 61 | // This is VERY wrong, as the members of the 62 | // enums are not proper bit flags. 63 | // 64 | // However this incorrect behavior marks fewer tokens 65 | // as errors making the parser more fault-tolerant. 66 | // Instead of fixing this it would probably be better to 67 | // remove the ERROR token altogether, or reserving it for 68 | // special cases. 69 | error_whitelist: u16, 70 | 71 | lexer: Lexer<'p, SyntaxKind>, 72 | builder: GreenNodeBuilder<'p>, 73 | errors: Vec, 74 | } 75 | 76 | impl Parser<'_> { 77 | /// Required for patch syntax 78 | /// and key matches. 79 | /// 80 | /// It allows a part of glob syntax in identifiers as well. 81 | #[allow(dead_code)] 82 | pub(crate) fn parse_key_only(mut self) -> Parse { 83 | self.key_pattern_syntax = true; 84 | let _ = with_node!(self.builder, KEY, self.parse_key()); 85 | 86 | Parse { 87 | green_node: self.builder.finish(), 88 | errors: self.errors, 89 | } 90 | } 91 | } 92 | 93 | /// This is just a convenience type during parsing. 94 | /// It allows using "?", making the code cleaner. 95 | type ParserResult = Result; 96 | 97 | // FIXME(recursion) 98 | // Deeply nested structures cause stack overflow, 99 | // this probably has to be rewritten into a state machine 100 | // that contains minimal function calls. 101 | impl<'p> Parser<'p> { 102 | pub(crate) fn new(source: &'p str) -> Self { 103 | Parser { 104 | current_token: None, 105 | skip_whitespace: true, 106 | key_pattern_syntax: false, 107 | error_whitelist: 0, 108 | lexer: SyntaxKind::lexer(source), 109 | builder: Default::default(), 110 | errors: Default::default(), 111 | } 112 | } 113 | 114 | fn parse(mut self) -> Parse { 115 | let _ = with_node!(self.builder, ROOT, self.parse_root()); 116 | 117 | Parse { 118 | green_node: self.builder.finish(), 119 | errors: self.errors, 120 | } 121 | } 122 | 123 | fn error(&mut self, message: &str) -> ParserResult<()> { 124 | let span = self.lexer.span(); 125 | 126 | let err = Error { 127 | range: TextRange::new( 128 | TextSize::from(span.start as u32), 129 | TextSize::from(span.end as u32), 130 | ), 131 | message: message.into(), 132 | }; 133 | 134 | let same_error = self 135 | .errors 136 | .last() 137 | .map(|e| e.range == err.range) 138 | .unwrap_or(false); 139 | 140 | if !same_error { 141 | self.add_error(&Error { 142 | range: TextRange::new( 143 | TextSize::from(span.start as u32), 144 | TextSize::from(span.end as u32), 145 | ), 146 | message: message.into(), 147 | }); 148 | if let Some(t) = self.current_token 149 | && !self.whitelisted(t) { 150 | self.token_as(ERROR).ok(); 151 | } 152 | } else { 153 | self.token_as(ERROR).ok(); 154 | } 155 | 156 | Err(()) 157 | } 158 | 159 | // report error without consuming the current the token 160 | fn report_error(&mut self, message: &str) -> ParserResult<()> { 161 | let span = self.lexer.span(); 162 | self.add_error(&Error { 163 | range: TextRange::new( 164 | TextSize::from(span.start as u32), 165 | TextSize::from(span.end as u32), 166 | ), 167 | message: message.into(), 168 | }); 169 | Err(()) 170 | } 171 | 172 | fn add_error(&mut self, e: &Error) { 173 | if let Some(last_err) = self.errors.last_mut() 174 | && last_err == e { 175 | return; 176 | } 177 | 178 | self.errors.push(e.clone()); 179 | } 180 | 181 | #[inline] 182 | fn whitelist_token(&mut self, token: SyntaxKind) { 183 | self.error_whitelist |= token as u16; 184 | } 185 | 186 | #[inline] 187 | fn blacklist_token(&mut self, token: SyntaxKind) { 188 | self.error_whitelist &= !(token as u16); 189 | } 190 | 191 | #[inline] 192 | fn whitelisted(&self, token: SyntaxKind) -> bool { 193 | self.error_whitelist & token as u16 != 0 194 | } 195 | 196 | fn insert_token(&mut self, kind: SyntaxKind, s: &str) { 197 | self.builder.token(kind.into(), s) 198 | } 199 | 200 | fn must_token_or(&mut self, kind: SyntaxKind, message: &str) -> ParserResult<()> { 201 | match self.get_token() { 202 | Ok(t) => { 203 | if kind == t { 204 | self.token() 205 | } else { 206 | self.error(message) 207 | } 208 | } 209 | Err(_) => { 210 | self.add_error(&Error { 211 | range: TextRange::new( 212 | self.lexer.span().start.try_into().unwrap(), 213 | self.lexer.span().end.try_into().unwrap(), 214 | ), 215 | message: "unexpected EOF".into(), 216 | }); 217 | Err(()) 218 | } 219 | } 220 | } 221 | 222 | // This is the same as `token` but won't consume trailing whitespace. 223 | fn add_token(&mut self) -> ParserResult<()> { 224 | match self.get_token() { 225 | Err(_) => Err(()), 226 | Ok(token) => { 227 | self.builder.token(token.into(), self.lexer.slice()); 228 | self.current_token = None; 229 | Ok(()) 230 | } 231 | } 232 | } 233 | 234 | fn token(&mut self) -> ParserResult<()> { 235 | match self.get_token() { 236 | Err(_) => Err(()), 237 | Ok(token) => self.token_as(token), 238 | } 239 | } 240 | 241 | /// This function implicitly calls `step`, 242 | /// it was definitely not a good design decision 243 | /// but changing this behaviour involves a 244 | /// different syntax tree and breakages down the line. 245 | fn token_as(&mut self, kind: SyntaxKind) -> ParserResult<()> { 246 | self.token_as_no_step(kind)?; 247 | self.step(); 248 | Ok(()) 249 | } 250 | 251 | fn token_as_no_step(&mut self, kind: SyntaxKind) -> ParserResult<()> { 252 | match self.get_token() { 253 | Err(_) => return Err(()), 254 | Ok(_) => { 255 | self.builder.token(kind.into(), self.lexer.slice()); 256 | } 257 | } 258 | 259 | Ok(()) 260 | } 261 | 262 | fn step(&mut self) { 263 | self.current_token = None; 264 | while let Some(token) = self.lexer.next() { 265 | let token = token.unwrap_or(ERROR); 266 | match token { 267 | COMMENT => { 268 | match allowed_chars::comment(self.lexer.slice()) { 269 | Ok(_) => {} 270 | Err(err_indices) => { 271 | for e in err_indices { 272 | self.add_error(&Error { 273 | range: TextRange::new( 274 | (self.lexer.span().start + e).try_into().unwrap(), 275 | (self.lexer.span().start + e).try_into().unwrap(), 276 | ), 277 | message: "invalid character in comment".into(), 278 | }); 279 | } 280 | } 281 | }; 282 | 283 | self.insert_token(token, self.lexer.slice()); 284 | } 285 | WHITESPACE => { 286 | if self.skip_whitespace { 287 | self.insert_token(token, self.lexer.slice()); 288 | } else { 289 | self.current_token = Some(token); 290 | break; 291 | } 292 | } 293 | ERROR => { 294 | self.insert_token(token, self.lexer.slice()); 295 | let span = self.lexer.span(); 296 | self.add_error(&Error { 297 | range: TextRange::new( 298 | span.start.try_into().unwrap(), 299 | span.end.try_into().unwrap(), 300 | ), 301 | message: "unexpected token".into(), 302 | }) 303 | } 304 | _ => { 305 | self.current_token = Some(token); 306 | break; 307 | } 308 | } 309 | } 310 | } 311 | 312 | fn get_token(&mut self) -> ParserResult { 313 | if self.current_token.is_none() { 314 | self.step(); 315 | } 316 | 317 | self.current_token.ok_or(()) 318 | } 319 | 320 | fn parse_root(&mut self) -> ParserResult<()> { 321 | // Ensure we have newlines between entries 322 | let mut not_newline = false; 323 | 324 | // We want to make sure that an entry spans the 325 | // entire line, so we start/close its node manually. 326 | let mut entry_started = false; 327 | 328 | while let Ok(token) = self.get_token() { 329 | match token { 330 | BRACKET_START => { 331 | if entry_started { 332 | self.builder.finish_node(); 333 | entry_started = false; 334 | } 335 | 336 | if not_newline { 337 | let _ = self.error("expected new line"); 338 | continue; 339 | } 340 | 341 | not_newline = true; 342 | 343 | if self.lexer.remainder().starts_with('[') { 344 | let _ = whitelisted!( 345 | self, 346 | NEWLINE, 347 | with_node!( 348 | self.builder, 349 | TABLE_ARRAY_HEADER, 350 | self.parse_table_array_header() 351 | ) 352 | ); 353 | } else { 354 | let _ = whitelisted!( 355 | self, 356 | NEWLINE, 357 | with_node!(self.builder, TABLE_HEADER, self.parse_table_header()) 358 | ); 359 | } 360 | } 361 | NEWLINE => { 362 | not_newline = false; 363 | if entry_started { 364 | self.builder.finish_node(); 365 | entry_started = false; 366 | } 367 | let _ = self.token(); 368 | } 369 | _ => { 370 | if not_newline { 371 | let _ = self.error("expected new line"); 372 | continue; 373 | } 374 | if entry_started { 375 | self.builder.finish_node(); 376 | } 377 | not_newline = true; 378 | self.builder.start_node(ENTRY.into()); 379 | entry_started = true; 380 | let _ = whitelisted!(self, NEWLINE, self.parse_entry()); 381 | } 382 | } 383 | } 384 | if entry_started { 385 | self.builder.finish_node(); 386 | } 387 | 388 | Ok(()) 389 | } 390 | 391 | fn parse_table_header(&mut self) -> ParserResult<()> { 392 | self.must_token_or(BRACKET_START, r#"expected "[""#)?; 393 | let _ = with_node!(self.builder, KEY, self.parse_key()); 394 | self.must_token_or(BRACKET_END, r#"expected "]""#)?; 395 | 396 | Ok(()) 397 | } 398 | 399 | fn parse_table_array_header(&mut self) -> ParserResult<()> { 400 | self.skip_whitespace = false; 401 | self.must_token_or(BRACKET_START, r#"expected "[[""#)?; 402 | self.must_token_or(BRACKET_START, r#"expected "[[""#)?; 403 | self.skip_whitespace = true; 404 | let _ = with_node!(self.builder, KEY, self.parse_key()); 405 | self.skip_whitespace = false; 406 | let _ = self.must_token_or(BRACKET_END, r#"expected "]]""#); 407 | 408 | // Hack in order to avoid calling `step` after 409 | // the second closing bracket. 410 | let token = self.get_token()?; 411 | match token { 412 | BRACKET_END => { 413 | self.token_as_no_step(token)?; 414 | } 415 | _ => { 416 | self.error(r#"expected "]]"#)?; 417 | } 418 | } 419 | self.skip_whitespace = true; 420 | 421 | self.step(); 422 | 423 | Ok(()) 424 | } 425 | 426 | fn parse_entry(&mut self) -> ParserResult<()> { 427 | with_node!(self.builder, KEY, self.parse_key())?; 428 | self.must_token_or(EQ, r#"expected "=""#)?; 429 | with_node!(self.builder, VALUE, self.parse_value())?; 430 | 431 | Ok(()) 432 | } 433 | 434 | fn parse_key(&mut self) -> ParserResult<()> { 435 | if self.parse_ident().is_err() { 436 | return self.report_error("expected identifier"); 437 | } 438 | 439 | let mut after_period = false; 440 | loop { 441 | let t = match self.get_token() { 442 | Ok(token) => token, 443 | Err(_) => { 444 | if !after_period { 445 | return Ok(()); 446 | } 447 | return self.error("unexpected end of input"); 448 | } 449 | }; 450 | 451 | match t { 452 | PERIOD => { 453 | if after_period { 454 | return self.error(r#"unexpected ".""#); 455 | } else { 456 | self.token()?; 457 | after_period = true; 458 | } 459 | } 460 | BRACKET_START if self.key_pattern_syntax => { 461 | self.step(); 462 | 463 | match self.parse_ident() { 464 | Ok(_) => {} 465 | Err(_) => return self.error("expected identifier"), 466 | } 467 | 468 | let token = self.get_token()?; 469 | 470 | if !matches!(token, BRACKET_END) { 471 | self.error(r#"expected "]""#)?; 472 | } 473 | self.step(); 474 | after_period = false; 475 | } 476 | _ => { 477 | if after_period { 478 | match self.parse_ident() { 479 | Ok(_) => {} 480 | Err(_) => return self.report_error("expected identifier"), 481 | } 482 | after_period = false; 483 | } else if self.key_pattern_syntax { 484 | return self.error("unexpected identifier"); 485 | } else { 486 | break; 487 | } 488 | } 489 | }; 490 | } 491 | 492 | Ok(()) 493 | } 494 | 495 | fn parse_ident(&mut self) -> ParserResult<()> { 496 | let t = self.get_token()?; 497 | match t { 498 | IDENT => self.token(), 499 | IDENT_WITH_GLOB => { 500 | if self.key_pattern_syntax { 501 | self.token_as(IDENT) 502 | } else { 503 | self.error("expected identifier") 504 | } 505 | } 506 | INTEGER_HEX | INTEGER_BIN | INTEGER_OCT => self.token_as(IDENT), 507 | INTEGER => { 508 | if self.lexer.slice().starts_with('+') { 509 | Err(()) 510 | } else { 511 | self.token_as(IDENT) 512 | } 513 | } 514 | STRING_LITERAL => { 515 | match allowed_chars::string_literal(self.lexer.slice()) { 516 | Ok(_) => {} 517 | Err(err_indices) => { 518 | for e in err_indices { 519 | self.add_error(&Error { 520 | range: TextRange::new( 521 | (self.lexer.span().start + e).try_into().unwrap(), 522 | (self.lexer.span().start + e).try_into().unwrap(), 523 | ), 524 | message: "invalid control character in string literal".into(), 525 | }); 526 | } 527 | } 528 | }; 529 | 530 | self.token_as(IDENT) 531 | } 532 | STRING => { 533 | match allowed_chars::string(self.lexer.slice()) { 534 | Ok(_) => {} 535 | Err(err_indices) => { 536 | for e in err_indices { 537 | self.add_error(&Error { 538 | range: TextRange::new( 539 | (self.lexer.span().start + e).try_into().unwrap(), 540 | (self.lexer.span().start + e).try_into().unwrap(), 541 | ), 542 | message: "invalid character in string".into(), 543 | }); 544 | } 545 | } 546 | }; 547 | 548 | match check_escape(self.lexer.slice()) { 549 | Ok(_) => self.token_as(IDENT), 550 | Err(err_indices) => { 551 | for e in err_indices { 552 | self.add_error(&Error { 553 | range: TextRange::new( 554 | (self.lexer.span().start + e).try_into().unwrap(), 555 | (self.lexer.span().start + e).try_into().unwrap(), 556 | ), 557 | message: "invalid escape sequence".into(), 558 | }); 559 | } 560 | 561 | // We proceed normally even if 562 | // the string contains invalid escapes. 563 | // It shouldn't affect the rest of the parsing. 564 | self.token_as(IDENT) 565 | } 566 | } 567 | } 568 | FLOAT => { 569 | if self.lexer.slice().starts_with('0') { 570 | self.error("zero-padded numbers are not allowed") 571 | } else if self.lexer.slice().starts_with('+') { 572 | Err(()) 573 | } else { 574 | for (i, s) in self.lexer.slice().split('.').enumerate() { 575 | if i != 0 { 576 | self.insert_token(PERIOD, "."); 577 | } 578 | 579 | self.insert_token(IDENT, s); 580 | } 581 | self.step(); 582 | Ok(()) 583 | } 584 | } 585 | BOOL => self.token_as(IDENT), 586 | DATE => self.token_as(IDENT), 587 | _ => self.error("expected identifier"), 588 | } 589 | } 590 | 591 | fn parse_value(&mut self) -> ParserResult<()> { 592 | let t = match self.get_token() { 593 | Ok(t) => t, 594 | Err(_) => return self.error("expected value"), 595 | }; 596 | 597 | match t { 598 | BOOL | DATE_TIME_OFFSET | DATE_TIME_LOCAL | DATE | TIME => self.token(), 599 | INTEGER => { 600 | // This is probably a logos bug or a priority issue, 601 | // for some reason "1979-05-27" gets lexed as INTEGER. 602 | if !self.lexer.slice().starts_with('-') && self.lexer.slice().contains('-') { 603 | return self.token_as(DATE); 604 | } 605 | 606 | // FIXME: probably another logos bug. 607 | if self.lexer.slice().contains(':') { 608 | return self.token_as(TIME); 609 | } 610 | 611 | // This could've been done more elegantly probably. 612 | if (self.lexer.slice().starts_with('0') && self.lexer.slice() != "0") 613 | || (self.lexer.slice().starts_with("+0") && self.lexer.slice() != "+0") 614 | || (self.lexer.slice().starts_with("-0") && self.lexer.slice() != "-0") 615 | { 616 | self.error("zero-padded integers are not allowed") 617 | } else if !check_underscores(self.lexer.slice(), 10) { 618 | self.error("invalid underscores") 619 | } else { 620 | self.token() 621 | } 622 | } 623 | INTEGER_BIN => { 624 | if !check_underscores(self.lexer.slice(), 2) { 625 | self.error("invalid underscores") 626 | } else { 627 | self.token() 628 | } 629 | } 630 | INTEGER_HEX => { 631 | if !check_underscores(self.lexer.slice(), 16) { 632 | self.error("invalid underscores") 633 | } else { 634 | self.token() 635 | } 636 | } 637 | INTEGER_OCT => { 638 | if !check_underscores(self.lexer.slice(), 8) { 639 | self.error("invalid underscores") 640 | } else { 641 | self.token() 642 | } 643 | } 644 | FLOAT => { 645 | // FIXME: probably another logos bug. 646 | if self.lexer.slice().contains(':') { 647 | return self.token_as(TIME); 648 | } 649 | 650 | let int_slice = if self.lexer.slice().contains('.') { 651 | self.lexer.slice().split('.').next().unwrap() 652 | } else { 653 | self.lexer.slice().split('e').next().unwrap() 654 | }; 655 | 656 | if (int_slice.starts_with('0') && int_slice != "0") 657 | || (int_slice.starts_with("+0") && int_slice != "+0") 658 | || (int_slice.starts_with("-0") && int_slice != "-0") 659 | { 660 | self.error("zero-padded numbers are not allowed") 661 | } else if !check_underscores(self.lexer.slice(), 10) { 662 | self.error("invalid underscores") 663 | } else { 664 | self.token() 665 | } 666 | } 667 | STRING_LITERAL => { 668 | match allowed_chars::string_literal(self.lexer.slice()) { 669 | Ok(_) => {} 670 | Err(err_indices) => { 671 | for e in err_indices { 672 | self.add_error(&Error { 673 | range: TextRange::new( 674 | (self.lexer.span().start + e).try_into().unwrap(), 675 | (self.lexer.span().start + e).try_into().unwrap(), 676 | ), 677 | message: "invalid control character in string literal".into(), 678 | }); 679 | } 680 | } 681 | }; 682 | self.token() 683 | } 684 | MULTI_LINE_STRING_LITERAL => { 685 | match allowed_chars::multi_line_string_literal(self.lexer.slice()) { 686 | Ok(_) => {} 687 | Err(err_indices) => { 688 | for e in err_indices { 689 | self.add_error(&Error { 690 | range: TextRange::new( 691 | (self.lexer.span().start + e).try_into().unwrap(), 692 | (self.lexer.span().start + e).try_into().unwrap(), 693 | ), 694 | message: "invalid character in string".into(), 695 | }); 696 | } 697 | } 698 | }; 699 | self.token() 700 | } 701 | STRING => { 702 | match allowed_chars::string(self.lexer.slice()) { 703 | Ok(_) => {} 704 | Err(err_indices) => { 705 | for e in err_indices { 706 | self.add_error(&Error { 707 | range: TextRange::new( 708 | (self.lexer.span().start + e).try_into().unwrap(), 709 | (self.lexer.span().start + e).try_into().unwrap(), 710 | ), 711 | message: "invalid character in string".into(), 712 | }); 713 | } 714 | } 715 | }; 716 | 717 | match check_escape(self.lexer.slice()) { 718 | Ok(_) => self.token(), 719 | Err(err_indices) => { 720 | for e in err_indices { 721 | self.add_error(&Error { 722 | range: TextRange::new( 723 | (self.lexer.span().start + e).try_into().unwrap(), 724 | (self.lexer.span().start + e).try_into().unwrap(), 725 | ), 726 | message: "invalid escape sequence".into(), 727 | }); 728 | } 729 | 730 | // We proceed normally even if 731 | // the string contains invalid escapes. 732 | // It shouldn't affect the rest of the parsing. 733 | self.token() 734 | } 735 | } 736 | } 737 | MULTI_LINE_STRING => { 738 | match allowed_chars::multi_line_string(self.lexer.slice()) { 739 | Ok(_) => {} 740 | Err(err_indices) => { 741 | for e in err_indices { 742 | self.add_error(&Error { 743 | range: TextRange::new( 744 | (self.lexer.span().start + e).try_into().unwrap(), 745 | (self.lexer.span().start + e).try_into().unwrap(), 746 | ), 747 | message: "invalid character in string".into(), 748 | }); 749 | } 750 | } 751 | }; 752 | 753 | match check_escape(self.lexer.slice()) { 754 | Ok(_) => self.token(), 755 | Err(err_indices) => { 756 | for e in err_indices { 757 | self.add_error(&Error { 758 | range: TextRange::new( 759 | (self.lexer.span().start + e).try_into().unwrap(), 760 | (self.lexer.span().start + e).try_into().unwrap(), 761 | ), 762 | message: "invalid escape sequence".into(), 763 | }); 764 | } 765 | 766 | // We proceed normally even if 767 | // the string contains invalid escapes. 768 | // It shouldn't affect the rest of the parsing. 769 | self.token() 770 | } 771 | } 772 | } 773 | BRACKET_START => { 774 | with_node!(self.builder, ARRAY, self.parse_array()) 775 | } 776 | BRACE_START => { 777 | with_node!(self.builder, INLINE_TABLE, self.parse_inline_table()) 778 | } 779 | IDENT | BRACE_END => { 780 | // FIXME(bit_flags): This branch is just a workaround. 781 | self.report_error("expected value").ok(); 782 | Ok(()) 783 | } 784 | _ => self.error("expected value"), 785 | } 786 | } 787 | 788 | fn parse_inline_table(&mut self) -> ParserResult<()> { 789 | self.must_token_or(BRACE_START, r#"expected "{""#)?; 790 | 791 | let mut first = true; 792 | let mut comma_last = false; 793 | let mut was_newline = false; 794 | 795 | loop { 796 | let t = match self.get_token() { 797 | Ok(t) => t, 798 | Err(_) => return self.report_error(r#"expected "}""#), 799 | }; 800 | 801 | match t { 802 | BRACE_END => { 803 | if comma_last { 804 | // it is still reported as a syntax error, 805 | // but we can still analyze it as if it was a valid 806 | // table. 807 | let _ = self.report_error("expected value, trailing comma is not allowed"); 808 | } 809 | break self.add_token()?; 810 | } 811 | NEWLINE => { 812 | // To avoid infinite loop in case 813 | // new lines are whitelisted. 814 | if was_newline { 815 | break; 816 | } 817 | 818 | let _ = self.error("newline is not allowed in an inline table"); 819 | was_newline = true; 820 | } 821 | COMMA => { 822 | if comma_last { 823 | let _ = self.report_error(r#"unexpected ",""#); 824 | } 825 | 826 | if first { 827 | let _ = self.error(r#"unexpected ",""#); 828 | } else { 829 | self.token()?; 830 | } 831 | comma_last = true; 832 | was_newline = false; 833 | } 834 | _ => { 835 | was_newline = false; 836 | if !comma_last && !first { 837 | let _ = self.error(r#"expected ",""#); 838 | } 839 | let _ = whitelisted!( 840 | self, 841 | COMMA, 842 | with_node!(self.builder, ENTRY, self.parse_entry()) 843 | ); 844 | comma_last = false; 845 | } 846 | } 847 | 848 | first = false; 849 | } 850 | Ok(()) 851 | } 852 | 853 | fn parse_array(&mut self) -> ParserResult<()> { 854 | self.must_token_or(BRACKET_START, r#"expected "[""#)?; 855 | 856 | let mut first = true; 857 | let mut comma_last = false; 858 | loop { 859 | let t = match self.get_token() { 860 | Ok(t) => t, 861 | Err(_) => { 862 | let _ = self.report_error("unexpected EOF"); 863 | return Err(()); 864 | } 865 | }; 866 | 867 | match t { 868 | BRACKET_END => break self.add_token()?, 869 | NEWLINE => { 870 | self.token()?; 871 | continue; // as if it wasn't there, so it doesn't count as a first token 872 | } 873 | COMMA => { 874 | if first || comma_last { 875 | let _ = self.error(r#"unexpected ",""#); 876 | } 877 | self.token()?; 878 | comma_last = true; 879 | } 880 | _ => { 881 | if !comma_last && !first { 882 | let _ = self.error(r#"expected ",""#); 883 | } 884 | let _ = whitelisted!( 885 | self, 886 | COMMA, 887 | with_node!(self.builder, VALUE, self.parse_value()) 888 | ); 889 | comma_last = false; 890 | } 891 | } 892 | 893 | first = false; 894 | } 895 | Ok(()) 896 | } 897 | } 898 | 899 | fn check_underscores(s: &str, radix: u32) -> bool { 900 | if s.starts_with('_') || s.ends_with('_') { 901 | return false; 902 | } 903 | 904 | let mut last_char = 0 as char; 905 | 906 | for c in s.chars() { 907 | if c == '_' && !last_char.is_digit(radix) { 908 | return false; 909 | } 910 | if !c.is_digit(radix) && last_char == '_' { 911 | return false; 912 | } 913 | last_char = c; 914 | } 915 | 916 | true 917 | } 918 | 919 | /// The final results of a parsing. 920 | /// It contains the green tree, and 921 | /// the errors that occurred during parsing. 922 | #[derive(Debug, Clone)] 923 | pub struct Parse { 924 | pub green_node: GreenNode, 925 | pub errors: Vec, 926 | } 927 | 928 | impl Parse { 929 | /// Turn the parse into a syntax node. 930 | pub fn into_syntax(self) -> SyntaxNode { 931 | SyntaxNode::new_root(self.green_node) 932 | } 933 | } 934 | -------------------------------------------------------------------------------- /src/formatter/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module is used to format TOML. 2 | //! 3 | //! The formatting can be done on documents that might 4 | //! contain invalid syntax. In that case the invalid part is skipped. 5 | 6 | use crate::{ 7 | syntax::{SyntaxElement, SyntaxKind::*, SyntaxNode, SyntaxToken}, 8 | util::overlaps, 9 | }; 10 | use itertools::Itertools; 11 | use std::cell::OnceCell; 12 | use rowan::{GreenNode, NodeOrToken, TextRange}; 13 | use std::{ 14 | cmp, 15 | collections::VecDeque, 16 | ops::Range, 17 | rc::Rc, 18 | }; 19 | 20 | #[macro_use] 21 | mod macros; 22 | 23 | /// Simplified Keys struct for tracking table paths (used for indentation) 24 | #[derive(Debug, Clone, PartialEq, Eq)] 25 | struct Keys { 26 | keys: Vec, 27 | } 28 | 29 | impl Keys { 30 | fn from_syntax(syntax: SyntaxElement) -> Self { 31 | let mut keys = Vec::new(); 32 | if let Some(node) = syntax.as_node() { 33 | for child in node.children_with_tokens() { 34 | if child.kind() == IDENT { 35 | keys.push(child.to_string()); 36 | } 37 | } 38 | } 39 | Self { keys } 40 | } 41 | 42 | /// Check if current key contains (is nested under) another key 43 | fn contains(&self, other: &Keys) -> bool { 44 | if other.keys.len() > self.keys.len() { 45 | return false; 46 | } 47 | self.keys.iter().zip(&other.keys).all(|(a, b)| a == b) 48 | } 49 | } 50 | 51 | create_options!( 52 | /// All the formatting options. 53 | #[derive(Debug, Clone, Eq, PartialEq)] 54 | pub struct Options { 55 | /// Align entries vertically. 56 | /// 57 | /// Entries that have table headers, comments, 58 | /// or blank lines between them are not aligned. 59 | pub align_entries: bool, 60 | 61 | /// Align consecutive comments after entries and items vertically. 62 | /// 63 | /// This applies to comments that are after entries or array items. 64 | pub align_comments: bool, 65 | 66 | /// If `align_comments` is true, apply the alignment in cases where 67 | /// there's only one comment. 68 | pub align_single_comments: bool, 69 | 70 | /// Put trailing commas for multiline 71 | /// arrays. 72 | pub array_trailing_comma: bool, 73 | 74 | /// Automatically expand arrays to multiple lines once they 75 | /// exceed the configured `column_width`. 76 | pub array_auto_expand: bool, 77 | 78 | /// Expand values (e.g.) inside inline tables 79 | /// where possible. 80 | pub inline_table_expand: bool, 81 | 82 | /// Automatically collapse arrays if they 83 | /// fit in one line. 84 | /// 85 | /// The array won't be collapsed if it 86 | /// contains a comment. 87 | pub array_auto_collapse: bool, 88 | 89 | /// Omit whitespace padding inside single-line arrays. 90 | pub compact_arrays: bool, 91 | 92 | /// Omit whitespace padding inside inline tables. 93 | pub compact_inline_tables: bool, 94 | 95 | /// Omit whitespace around `=`. 96 | pub compact_entries: bool, 97 | 98 | /// Target maximum column width after which 99 | /// arrays are expanded into new lines. 100 | /// 101 | /// This is best-effort and might not be accurate. 102 | pub column_width: usize, 103 | 104 | /// Indent subtables if they come in order. 105 | pub indent_tables: bool, 106 | 107 | /// Indent entries under tables. 108 | pub indent_entries: bool, 109 | 110 | /// Indentation to use, should be tabs or spaces 111 | /// but technically could be anything. 112 | pub indent_string: String, 113 | 114 | /// Add trailing newline to the source. 115 | pub trailing_newline: bool, 116 | 117 | /// Alphabetically reorder keys that are not separated by blank lines. 118 | pub reorder_keys: bool, 119 | 120 | /// Alphabetically reorder array values that are not separated by blank lines. 121 | pub reorder_arrays: bool, 122 | 123 | /// Alphabetically reorder inline table values. 124 | pub reorder_inline_tables: bool, 125 | 126 | /// The maximum amount of consecutive blank lines allowed. 127 | pub allowed_blank_lines: usize, 128 | 129 | /// Use CRLF line endings 130 | pub crlf: bool, 131 | } 132 | ); 133 | 134 | #[derive(Debug)] 135 | pub enum OptionParseError { 136 | InvalidOption(String), 137 | InvalidValue { 138 | key: String, 139 | error: Box, 140 | }, 141 | } 142 | 143 | impl core::fmt::Display for OptionParseError { 144 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 145 | write!( 146 | f, 147 | "invalid formatting option: {}", 148 | match self { 149 | OptionParseError::InvalidOption(k) => { 150 | format!(r#"invalid option "{}""#, k) 151 | } 152 | OptionParseError::InvalidValue { key, error } => { 153 | format!(r#"invalid value for option "{}": {}"#, key, error) 154 | } 155 | } 156 | ) 157 | } 158 | } 159 | 160 | impl std::error::Error for OptionParseError {} 161 | 162 | impl Default for Options { 163 | fn default() -> Self { 164 | Options { 165 | align_entries: false, 166 | align_comments: true, 167 | align_single_comments: true, 168 | array_trailing_comma: true, 169 | array_auto_expand: true, 170 | array_auto_collapse: true, 171 | compact_arrays: true, 172 | compact_inline_tables: false, 173 | compact_entries: false, 174 | column_width: 80, 175 | indent_tables: false, 176 | indent_entries: false, 177 | inline_table_expand: true, 178 | trailing_newline: true, 179 | allowed_blank_lines: 2, 180 | indent_string: " ".into(), 181 | reorder_keys: false, 182 | reorder_arrays: false, 183 | reorder_inline_tables: false, 184 | crlf: false, 185 | } 186 | } 187 | } 188 | 189 | impl Options { 190 | fn newline(&self) -> &'static str { 191 | if self.crlf { 192 | "\r\n" 193 | } else { 194 | "\n" 195 | } 196 | } 197 | 198 | fn newlines(&self, count: usize) -> impl Iterator { 199 | std::iter::repeat_n(self.newline(), usize::min(count, self.allowed_blank_lines + 1)) 200 | } 201 | 202 | fn should_align_comments(&self, comment_count: usize) -> bool { 203 | (comment_count != 1 || self.align_single_comments) && self.align_comments 204 | } 205 | } 206 | 207 | #[derive(Debug, Clone)] 208 | struct Context { 209 | indent_level: usize, 210 | force_multiline: bool, 211 | errors: Rc<[TextRange]>, 212 | } 213 | 214 | impl Default for Context { 215 | fn default() -> Self { 216 | Self { 217 | indent_level: Default::default(), 218 | force_multiline: Default::default(), 219 | errors: Rc::from([]), 220 | } 221 | } 222 | } 223 | 224 | impl Context { 225 | /// Update options based on the text range. 226 | /// This is a no-op now that scoped options have been removed. 227 | fn update_options(&self, _opts: &mut Options, _range: TextRange) { 228 | // No-op: scoped options removed 229 | } 230 | 231 | fn error_at(&self, range: TextRange) -> bool { 232 | for error_range in self.errors.iter().copied() { 233 | if overlaps(range, error_range) { 234 | return true; 235 | } 236 | } 237 | 238 | false 239 | } 240 | 241 | fn indent<'o>(&self, opts: &'o Options) -> impl Iterator { 242 | std::iter::repeat_n(opts.indent_string.as_ref(), self.indent_level) 243 | } 244 | } 245 | 246 | /// Formats a parsed TOML green tree. 247 | pub fn format_green(green: GreenNode, options: Options) -> String { 248 | format_syntax(SyntaxNode::new_root(green), options) 249 | } 250 | 251 | /// Parses then formats a TOML document, skipping ranges that contain syntax errors. 252 | pub fn format(src: &str, options: Options) -> String { 253 | let p = crate::parser::parse(src); 254 | 255 | let ctx = Context { 256 | errors: p.errors.iter().map(|err| err.range).collect(), 257 | ..Context::default() 258 | }; 259 | 260 | format_impl(p.into_syntax(), options, ctx) 261 | } 262 | 263 | /// Formats a parsed TOML syntax tree. 264 | pub fn format_syntax(node: SyntaxNode, options: Options) -> String { 265 | let mut s = format_impl(node, options.clone(), Context::default()); 266 | 267 | s = s.trim_end().into(); 268 | 269 | if options.trailing_newline { 270 | s += options.newline(); 271 | } 272 | 273 | s 274 | } 275 | 276 | fn format_impl(node: SyntaxNode, options: Options, context: Context) -> String { 277 | assert!(node.kind() == ROOT); 278 | let mut formatted = format_root(node, &options, &context); 279 | 280 | if formatted.ends_with("\r\n") { 281 | formatted.truncate(formatted.len() - 2); 282 | } else if formatted.ends_with('\n') { 283 | formatted.truncate(formatted.len() - 1); 284 | } 285 | 286 | if options.trailing_newline { 287 | formatted += options.newline(); 288 | } 289 | 290 | formatted 291 | } 292 | 293 | struct FormattedEntry { 294 | syntax: SyntaxElement, 295 | key: String, 296 | /// This field is used to cache the "cleaned" version of the key and should only 297 | /// be accessed through the `cleaned_key` helpers method. 298 | cleaned_key: OnceCell>, 299 | value: String, 300 | comment: Option, 301 | } 302 | 303 | impl FormattedEntry { 304 | fn cleaned_key(&self) -> &Vec { 305 | self.cleaned_key.get_or_init(|| { 306 | self.key 307 | .replace(['\'', '"'], "") 308 | .split('.') 309 | .map(ToOwned::to_owned) 310 | .collect() 311 | }) 312 | } 313 | } 314 | 315 | impl PartialEq for FormattedEntry { 316 | fn eq(&self, other: &Self) -> bool { 317 | self.cleaned_key().eq(other.cleaned_key()) 318 | } 319 | } 320 | 321 | impl Eq for FormattedEntry {} 322 | 323 | impl PartialOrd for FormattedEntry { 324 | fn partial_cmp(&self, other: &Self) -> Option { 325 | Some(self.cmp(other)) 326 | } 327 | } 328 | 329 | impl Ord for FormattedEntry { 330 | fn cmp(&self, other: &Self) -> cmp::Ordering { 331 | self.cleaned_key().cmp(other.cleaned_key()) 332 | } 333 | } 334 | 335 | impl FormattedItem for FormattedEntry { 336 | fn write_to(&self, formatted: &mut String, options: &Options) { 337 | *formatted += &self.key; 338 | if options.compact_entries { 339 | *formatted += "="; 340 | } else { 341 | *formatted += " = "; 342 | } 343 | *formatted += &self.value; 344 | } 345 | 346 | fn trailing_comment(&self) -> Option { 347 | self.comment.clone() 348 | } 349 | 350 | fn syntax(&self) -> SyntaxElement { 351 | self.syntax.clone() 352 | } 353 | } 354 | 355 | fn format_root(node: SyntaxNode, options: &Options, context: &Context) -> String { 356 | assert!(node.kind() == ROOT); 357 | let mut formatted = String::new(); 358 | 359 | let mut entry_group: Vec = Vec::new(); 360 | 361 | // We defer printing the entries so that we can align them vertically. 362 | // Whenever an entry is added to the group, we skip its trailing newline, 363 | // otherwise the inserted new line would end up before the actual entries. 364 | let mut skip_newlines = 0; 365 | 366 | // We defer printing comments as well because we need to know 367 | // what comes after them for correct indentation. 368 | let mut comment_group: Vec = Vec::new(); 369 | 370 | let mut context = context.clone(); 371 | 372 | // Table key for determining indents 373 | let mut table_key_indent_history: Vec<(Keys, usize)> = Vec::new(); 374 | 375 | fn add_comments( 376 | comments: &mut Vec, 377 | formatted: &mut String, 378 | context: &Context, 379 | options: &Options, 380 | ) -> bool { 381 | let were_comments = !comments.is_empty(); 382 | 383 | for (idx, comment) in comments.drain(0..).enumerate() { 384 | if idx != 0 { 385 | *formatted += options.newline(); 386 | } 387 | formatted.extend(context.indent(options)); 388 | *formatted += &comment; 389 | } 390 | 391 | were_comments 392 | } 393 | 394 | let mut dangling_newline_count = 0; 395 | let mut scoped_options = options.clone(); 396 | 397 | for c in node.children_with_tokens() { 398 | if context.error_at(c.text_range()) { 399 | formatted += &c.to_string(); 400 | continue; 401 | } 402 | 403 | let c_range = c.text_range(); 404 | 405 | match c { 406 | NodeOrToken::Node(node) => match node.kind() { 407 | TABLE_ARRAY_HEADER | TABLE_HEADER => { 408 | if add_entries(&mut entry_group, &mut formatted, &scoped_options, &context) { 409 | formatted += scoped_options.newline(); 410 | skip_newlines = 0; 411 | } 412 | 413 | scoped_options = options.clone(); 414 | context.update_options(&mut scoped_options, c_range); 415 | 416 | // We treat everything as indented other than table headers from now on. 417 | if scoped_options.indent_entries && context.indent_level == 0 { 418 | context.indent_level = 1; 419 | } 420 | 421 | if let Some(key) = node.first_child().map(Into::into).map(Keys::from_syntax) { 422 | if scoped_options.indent_tables { 423 | context.indent_level = table_indent_level( 424 | &table_key_indent_history, 425 | &key, 426 | if scoped_options.indent_entries { 1 } else { 0 }, 427 | ); 428 | } 429 | table_key_indent_history.push((key.clone(), context.indent_level)); 430 | } 431 | 432 | let mut header_context = context.clone(); 433 | 434 | if scoped_options.indent_entries { 435 | header_context.indent_level = header_context.indent_level.saturating_sub(1); 436 | } 437 | 438 | if add_comments( 439 | &mut comment_group, 440 | &mut formatted, 441 | &header_context, 442 | &scoped_options, 443 | ) { 444 | formatted += scoped_options.newline(); 445 | skip_newlines = 0; 446 | } 447 | 448 | let header = format_table_header(node, &scoped_options, &header_context); 449 | let comment = header.trailing_comment(); 450 | 451 | if scoped_options.indent_tables { 452 | formatted.extend(header_context.indent(&scoped_options)); 453 | } 454 | 455 | header.write_to(&mut formatted, &scoped_options); 456 | if let Some(c) = comment { 457 | formatted += " "; 458 | formatted += &c; 459 | } 460 | } 461 | ENTRY => { 462 | scoped_options = options.clone(); 463 | context.update_options(&mut scoped_options, c_range); 464 | 465 | if add_comments( 466 | &mut comment_group, 467 | &mut formatted, 468 | &context, 469 | &scoped_options, 470 | ) { 471 | formatted += scoped_options.newline(); 472 | skip_newlines = 0; 473 | } 474 | 475 | entry_group.push(format_entry(node, &scoped_options, &context)); 476 | skip_newlines += 1; 477 | } 478 | _ => unreachable!(), 479 | }, 480 | NodeOrToken::Token(token) => match token.kind() { 481 | NEWLINE => { 482 | let mut newline_count = token.text().newline_count(); 483 | 484 | match dangling_newlines(token.clone()) { 485 | Some(dnl) => { 486 | dangling_newline_count += dnl; 487 | continue; 488 | } 489 | None => { 490 | newline_count += dangling_newline_count; 491 | dangling_newline_count = 0; 492 | } 493 | } 494 | 495 | if newline_count > 1 { 496 | add_comments( 497 | &mut comment_group, 498 | &mut formatted, 499 | &context, 500 | &scoped_options, 501 | ); 502 | add_entries(&mut entry_group, &mut formatted, &scoped_options, &context); 503 | skip_newlines = 0; 504 | } 505 | 506 | formatted.extend( 507 | scoped_options.newlines(newline_count.saturating_sub(skip_newlines)), 508 | ); 509 | } 510 | COMMENT => { 511 | if add_entries(&mut entry_group, &mut formatted, &scoped_options, &context) { 512 | formatted += scoped_options.newline(); 513 | skip_newlines = 0; 514 | } 515 | comment_group.push(token.text().to_string()); 516 | skip_newlines += 1; 517 | } 518 | WHITESPACE => {} 519 | _ => formatted += token.text(), 520 | }, 521 | } 522 | } 523 | 524 | add_comments( 525 | &mut comment_group, 526 | &mut formatted, 527 | &context, 528 | &scoped_options, 529 | ); 530 | add_entries(&mut entry_group, &mut formatted, &scoped_options, &context); 531 | 532 | formatted 533 | } 534 | 535 | /// Determine the indentation level using the indentation history. 536 | /// 537 | /// The latest key that is a strict prefix is used and indented. If none is found, the default 538 | /// indentation is used. 539 | fn table_indent_level( 540 | history: &[(Keys, usize)], 541 | current_key: &Keys, 542 | default_indent: usize, 543 | ) -> usize { 544 | history 545 | .iter() 546 | .rev() 547 | .find_map(|(previous_key, indent)| { 548 | (current_key.contains(previous_key) && current_key != previous_key) 549 | .then_some(*indent + 1) 550 | }) 551 | .unwrap_or(default_indent) 552 | } 553 | 554 | /// Add entries to the formatted string. 555 | fn add_entries( 556 | entry_group: &mut Vec, 557 | formatted: &mut String, 558 | options: &Options, 559 | context: &Context, 560 | ) -> bool { 561 | let were_entries = !entry_group.is_empty(); 562 | 563 | if options.reorder_keys { 564 | entry_group.sort(); 565 | } 566 | 567 | let indent_chars_count = context.indent_level * options.indent_string.chars().count(); 568 | 569 | // We check for too long lines, and try to expand them if possible. 570 | // We don't take vertical alignment into account for simplicity. 571 | if options.array_auto_expand { 572 | for entry in entry_group.iter_mut() { 573 | let comment_chars_count = entry 574 | .comment 575 | .as_ref() 576 | .map( 577 | |c| c.chars().count() + 1, // account for the separator ' ' as well 578 | ) 579 | .unwrap_or(0); 580 | 581 | let line_count = entry.value.split('\n').count(); 582 | 583 | // check each line of the value 584 | // for the first line we include the actual indent, key, and the eq parts as well 585 | for (idx, line) in entry.value.split('\n').enumerate() { 586 | let mut chars_count = line.chars().count(); 587 | if idx == 0 { 588 | chars_count += indent_chars_count; 589 | chars_count += entry.key.chars().count(); 590 | chars_count += if options.compact_entries { 1 } else { 3 }; // " = " 591 | } 592 | 593 | // Include comment in the last line. 594 | if idx == line_count - 1 { 595 | chars_count += comment_chars_count; 596 | } 597 | 598 | if chars_count > options.column_width { 599 | let mut context = context.clone(); 600 | context.force_multiline = true; 601 | 602 | // too long, reformat the value of the entry 603 | let value = format_value( 604 | entry 605 | .syntax 606 | .as_node() 607 | .unwrap() 608 | .children() 609 | .find(|n| n.kind() == VALUE) 610 | .unwrap(), 611 | options, 612 | &context, 613 | ); 614 | 615 | entry.value.clear(); 616 | 617 | if let Some(c) = value.trailing_comment() { 618 | debug_assert!( 619 | entry.comment.is_none() || entry.comment.clone().unwrap() == c 620 | ); 621 | entry.comment = Some(c); 622 | } 623 | 624 | value.write_to(&mut entry.value, options); 625 | break; 626 | } 627 | } 628 | } 629 | } 630 | 631 | let mut comment_count = 0; 632 | // Transform the entries into generic rows that can be aligned. 633 | let rows = entry_group 634 | .drain(0..) 635 | .map(|e| { 636 | let mut row = Vec::with_capacity(5); 637 | 638 | row.push(context.indent(options).collect::()); 639 | row.push(e.key); 640 | row.push("=".to_string()); 641 | row.push(e.value); 642 | if let Some(c) = e.comment { 643 | row.push(c); 644 | comment_count += 1; 645 | } 646 | 647 | row 648 | }) 649 | .collect::>(); 650 | 651 | let align_comments = options.should_align_comments(comment_count); 652 | *formatted += &format_rows( 653 | if !options.align_entries && !align_comments { 654 | 0..0 655 | } else if !options.align_entries && align_comments { 656 | 3..usize::MAX 657 | } else if options.align_entries && !align_comments { 658 | 0..3 659 | } else { 660 | 0..usize::MAX 661 | }, 662 | if options.compact_entries { 663 | 3..usize::MAX 664 | } else { 665 | 1..usize::MAX 666 | }, 667 | &rows, 668 | options.newline(), 669 | " ", 670 | ); 671 | 672 | were_entries 673 | } 674 | 675 | fn format_entry(node: SyntaxNode, options: &Options, context: &Context) -> FormattedEntry { 676 | let mut key = String::new(); 677 | let mut value = String::new(); 678 | let mut comment = None; 679 | 680 | for c in node.children_with_tokens() { 681 | match c { 682 | NodeOrToken::Node(n) => match n.kind() { 683 | KEY => { 684 | format_key(n, &mut key, options, context); 685 | } 686 | VALUE => { 687 | let val = format_value(n, options, context); 688 | let c = val.trailing_comment(); 689 | 690 | if c.is_some() { 691 | debug_assert!(comment.is_none()); 692 | comment = c; 693 | } 694 | 695 | val.write_to(&mut value, options); 696 | } 697 | _ => unreachable!(), 698 | }, 699 | NodeOrToken::Token(t) => { 700 | if let COMMENT = t.kind() { 701 | debug_assert!(comment.is_none()); 702 | comment = Some(t.text().into()) 703 | } 704 | } 705 | } 706 | } 707 | 708 | FormattedEntry { 709 | syntax: node.into(), 710 | key, 711 | cleaned_key: OnceCell::new(), 712 | value, 713 | comment, 714 | } 715 | } 716 | 717 | fn format_key(node: SyntaxNode, formatted: &mut String, _options: &Options, _context: &Context) { 718 | // Idents and periods without whitespace 719 | for c in node.children_with_tokens() { 720 | match c { 721 | NodeOrToken::Node(_) => {} 722 | NodeOrToken::Token(t) => match t.kind() { 723 | WHITESPACE | NEWLINE => {} 724 | _ => { 725 | *formatted += t.text(); 726 | } 727 | }, 728 | } 729 | } 730 | } 731 | 732 | fn format_value(node: SyntaxNode, options: &Options, context: &Context) -> impl FormattedItem { 733 | let mut value = String::new(); 734 | let mut comment = None; 735 | for c in node.children_with_tokens() { 736 | match c { 737 | NodeOrToken::Node(n) => match n.kind() { 738 | ARRAY => { 739 | let formatted = format_array(n, options, context); 740 | 741 | let c = formatted.trailing_comment(); 742 | 743 | if let Some(c) = c { 744 | debug_assert!(comment.is_none()); 745 | comment = Some(c) 746 | } 747 | 748 | debug_assert!(value.is_empty()); 749 | formatted.write_to(&mut value, options); 750 | } 751 | INLINE_TABLE => { 752 | let formatted = format_inline_table(n, options, context); 753 | 754 | let c = formatted.trailing_comment(); 755 | 756 | if let Some(c) = c { 757 | debug_assert!(comment.is_none()); 758 | comment = Some(c) 759 | } 760 | 761 | debug_assert!(value.is_empty()); 762 | 763 | formatted.write_to(&mut value, options); 764 | } 765 | _ => unreachable!(), 766 | }, 767 | NodeOrToken::Token(t) => match t.kind() { 768 | NEWLINE | WHITESPACE => {} 769 | COMMENT => { 770 | debug_assert!(comment.is_none()); 771 | comment = Some(t.text().into()); 772 | } 773 | _ => { 774 | value = t.text().into(); 775 | } 776 | }, 777 | } 778 | } 779 | 780 | (node.into(), value, comment) 781 | } 782 | 783 | fn format_inline_table( 784 | node: SyntaxNode, 785 | options: &Options, 786 | context: &Context, 787 | ) -> impl FormattedItem { 788 | let mut formatted = String::new(); 789 | let mut comment = None; 790 | 791 | let mut context = context.clone(); 792 | if context.force_multiline { 793 | context.force_multiline = options.inline_table_expand; 794 | } 795 | let context = &context; 796 | 797 | let child_count = node.children().count(); 798 | 799 | if node.children().count() == 0 { 800 | formatted = "{}".into(); 801 | } 802 | 803 | let mut sorted_children = if options.reorder_inline_tables { 804 | Some( 805 | node.children() 806 | .sorted_unstable_by(|x, y| x.to_string().cmp(&y.to_string())) 807 | .collect::>(), 808 | ) 809 | } else { 810 | None 811 | }; 812 | 813 | let mut node_index = 0; 814 | for c in node.children_with_tokens() { 815 | match c { 816 | NodeOrToken::Node(n) => { 817 | if node_index != 0 { 818 | formatted += ", "; 819 | } 820 | 821 | let child = if options.reorder_inline_tables { 822 | sorted_children 823 | .as_mut() 824 | .and_then(|children| children.pop_front()) 825 | .unwrap_or(n) 826 | } else { 827 | n 828 | }; 829 | 830 | let entry = format_entry(child, options, context); 831 | debug_assert!(entry.comment.is_none()); 832 | entry.write_to(&mut formatted, options); 833 | 834 | node_index += 1; 835 | } 836 | NodeOrToken::Token(t) => match t.kind() { 837 | BRACE_START => { 838 | if child_count == 0 { 839 | // We're only interested in trailing comments. 840 | continue; 841 | } 842 | 843 | formatted += "{"; 844 | if !options.compact_inline_tables { 845 | formatted += " "; 846 | } 847 | } 848 | BRACE_END => { 849 | if child_count == 0 { 850 | // We're only interested in trailing comments. 851 | continue; 852 | } 853 | 854 | if !options.compact_inline_tables { 855 | formatted += " "; 856 | } 857 | formatted += "}"; 858 | } 859 | WHITESPACE | COMMA => {} 860 | COMMENT => { 861 | debug_assert!(comment.is_none()); 862 | comment = Some(t.text().into()); 863 | } 864 | _ => formatted += t.text(), 865 | }, 866 | } 867 | } 868 | 869 | (node.into(), formatted, comment) 870 | } 871 | // Check whether the array spans multiple lines in its current form. 872 | fn is_array_multiline(node: &SyntaxNode) -> bool { 873 | node.descendants_with_tokens().any(|n| n.kind() == NEWLINE) 874 | } 875 | 876 | fn can_collapse_array(node: &SyntaxNode) -> bool { 877 | !node.descendants_with_tokens().any(|n| n.kind() == COMMENT) 878 | } 879 | 880 | fn format_array(node: SyntaxNode, options: &Options, context: &Context) -> impl FormattedItem { 881 | let mut multiline = is_array_multiline(&node) || context.force_multiline; 882 | 883 | let mut formatted = String::new(); 884 | 885 | // We always try to collapse it if possible. 886 | if can_collapse_array(&node) && options.array_auto_collapse && !context.force_multiline { 887 | multiline = false; 888 | } 889 | 890 | // We use the same strategy as for entries, refer to [`format_root`]. 891 | let mut skip_newlines = 0; 892 | 893 | // Formatted value, optional trailing comment 894 | // The value must not include the comma at the end. 895 | let mut value_group: Vec<(String, Option)> = Vec::new(); 896 | let mut commas_group: Vec = Vec::new(); 897 | 898 | let add_values = |value_group: &mut Vec<(String, Option)>, 899 | commas_group: &mut Vec, 900 | formatted: &mut String, 901 | context: &Context| 902 | -> bool { 903 | let were_values = !value_group.is_empty(); 904 | 905 | if options.reorder_arrays { 906 | value_group.sort_unstable_by(|x, y| x.0.cmp(&y.0)); 907 | } 908 | 909 | for (has_comma, p) in commas_group.drain(0..).zip(value_group.iter_mut()) { 910 | if has_comma { 911 | p.0 += "," 912 | }; 913 | } 914 | 915 | if !multiline { 916 | for (idx, (val, comment)) in value_group.drain(0..).enumerate() { 917 | debug_assert!(comment.is_none()); 918 | if idx != 0 { 919 | *formatted += " " 920 | } 921 | 922 | *formatted += &val; 923 | } 924 | 925 | return were_values; 926 | } 927 | 928 | let mut comment_count = 0; 929 | let rows = value_group 930 | .drain(0..) 931 | .map(|(value, comment)| { 932 | let mut row = Vec::with_capacity(5); 933 | 934 | row.push(context.indent(options).collect::()); 935 | row.push(value); 936 | if let Some(c) = comment { 937 | row.push(c); 938 | comment_count += 1; 939 | } 940 | 941 | row 942 | }) 943 | .collect::>(); 944 | 945 | let align_comments = options.should_align_comments(comment_count); 946 | *formatted += &format_rows( 947 | if align_comments { 0..usize::MAX } else { 0..0 }, 948 | 1..usize::MAX, 949 | &rows, 950 | options.newline(), 951 | " ", 952 | ); 953 | 954 | were_values 955 | }; 956 | 957 | let node_count = node.children().count(); 958 | 959 | let mut inner_context = context.clone(); 960 | 961 | if multiline { 962 | inner_context.indent_level += 1; 963 | } 964 | 965 | let mut dangling_newline_count = 0; 966 | 967 | let mut node_index = 0; 968 | for c in node.children_with_tokens() { 969 | match c { 970 | NodeOrToken::Node(n) => match n.kind() { 971 | VALUE => { 972 | if multiline && formatted.ends_with('[') { 973 | formatted += options.newline(); 974 | } 975 | 976 | let val = format_value(n, options, &inner_context); 977 | let mut val_string = String::new(); 978 | 979 | val.write_to(&mut val_string, options); 980 | 981 | let has_comma = 982 | node_index < node_count - 1 || (multiline && options.array_trailing_comma); 983 | commas_group.push(has_comma); 984 | 985 | value_group.push((val_string, val.trailing_comment())); 986 | skip_newlines += 1; 987 | 988 | node_index += 1; 989 | } 990 | _ => { 991 | if cfg!(debug_assertions) { 992 | unreachable!() 993 | } 994 | } 995 | }, 996 | NodeOrToken::Token(t) => match t.kind() { 997 | BRACKET_START => { 998 | formatted += "["; 999 | if !options.compact_arrays && !multiline { 1000 | formatted += " "; 1001 | } 1002 | } 1003 | BRACKET_END => { 1004 | add_values( 1005 | &mut value_group, 1006 | &mut commas_group, 1007 | &mut formatted, 1008 | &inner_context, 1009 | ); 1010 | 1011 | if multiline { 1012 | if !formatted.ends_with('\n') { 1013 | formatted += options.newline(); 1014 | } 1015 | 1016 | formatted.extend(context.indent(options)); 1017 | } else if !options.compact_arrays { 1018 | formatted += " "; 1019 | } 1020 | formatted += "]"; 1021 | } 1022 | NEWLINE => { 1023 | if !multiline { 1024 | continue; 1025 | } 1026 | 1027 | let mut newline_count = t.text().newline_count(); 1028 | 1029 | match dangling_newlines(t.clone()) { 1030 | Some(dnl) => { 1031 | dangling_newline_count += dnl; 1032 | continue; 1033 | } 1034 | None => { 1035 | newline_count += dangling_newline_count; 1036 | dangling_newline_count = 0; 1037 | } 1038 | } 1039 | 1040 | if newline_count > 1 { 1041 | add_values( 1042 | &mut value_group, 1043 | &mut commas_group, 1044 | &mut formatted, 1045 | &inner_context, 1046 | ); 1047 | skip_newlines = 0; 1048 | } 1049 | 1050 | formatted.extend(options.newlines(newline_count.saturating_sub(skip_newlines))); 1051 | } 1052 | COMMENT => { 1053 | let newline_before = t 1054 | .siblings_with_tokens(rowan::Direction::Prev) 1055 | .skip(1) 1056 | .find(|s| s.kind() != WHITESPACE) 1057 | .map(|s| s.kind() == NEWLINE) 1058 | .unwrap_or(false); 1059 | 1060 | if !newline_before && !value_group.is_empty() { 1061 | // It's actually trailing comment, so we add it to the last value. 1062 | value_group.last_mut().unwrap().1 = Some(t.text().to_string()); 1063 | continue; 1064 | } 1065 | 1066 | if add_values( 1067 | &mut value_group, 1068 | &mut commas_group, 1069 | &mut formatted, 1070 | &inner_context, 1071 | ) { 1072 | formatted += options.newline(); 1073 | skip_newlines = 0; 1074 | } 1075 | 1076 | if formatted.ends_with('[') { 1077 | formatted += " "; 1078 | formatted += t.text(); 1079 | } else { 1080 | formatted.extend(inner_context.indent(options)); 1081 | formatted += t.text(); 1082 | } 1083 | } 1084 | _ => {} 1085 | }, 1086 | } 1087 | } 1088 | 1089 | if formatted.is_empty() { 1090 | formatted = "[]".into(); 1091 | } 1092 | 1093 | (node.into(), formatted, None) 1094 | } 1095 | 1096 | fn format_table_header( 1097 | node: SyntaxNode, 1098 | options: &Options, 1099 | context: &Context, 1100 | ) -> impl FormattedItem { 1101 | let mut formatted = String::new(); 1102 | let mut comment = None; 1103 | 1104 | for c in node.children_with_tokens() { 1105 | match c { 1106 | NodeOrToken::Node(n) => { 1107 | format_key(n, &mut formatted, options, context); 1108 | } 1109 | NodeOrToken::Token(t) => match t.kind() { 1110 | BRACKET_START | BRACKET_END => formatted += t.text(), 1111 | WHITESPACE | NEWLINE => {} 1112 | COMMENT => { 1113 | debug_assert!(comment.is_none()); 1114 | comment = Some(t.text().to_string()); 1115 | } 1116 | _ => formatted += t.text(), 1117 | }, 1118 | } 1119 | } 1120 | 1121 | (node.into(), formatted, comment) 1122 | } 1123 | 1124 | // Simply a tuple of the formatted item and an optional trailing comment. 1125 | impl> FormattedItem for (SyntaxElement, T, Option) { 1126 | fn write_to(&self, formatted: &mut String, _options: &Options) { 1127 | *formatted += self.1.as_ref() 1128 | } 1129 | 1130 | fn trailing_comment(&self) -> Option { 1131 | self.2.as_ref().map(|s| s.as_ref().to_string()) 1132 | } 1133 | 1134 | fn syntax(&self) -> SyntaxElement { 1135 | self.0.clone() 1136 | } 1137 | } 1138 | 1139 | trait FormattedItem { 1140 | #[allow(dead_code)] 1141 | fn syntax(&self) -> SyntaxElement; 1142 | #[allow(clippy::ptr_arg)] 1143 | fn write_to(&self, formatted: &mut String, options: &Options); 1144 | fn trailing_comment(&self) -> Option; 1145 | } 1146 | 1147 | trait NewlineCount { 1148 | fn newline_count(&self) -> usize; 1149 | } 1150 | 1151 | impl NewlineCount for &str { 1152 | fn newline_count(&self) -> usize { 1153 | self.chars().filter(|c| c == &'\n').count() 1154 | } 1155 | } 1156 | 1157 | // FIXME(docs) 1158 | fn format_rows( 1159 | align_range: Range, 1160 | separator_range: Range, 1161 | rows: &[R], 1162 | newline: &str, 1163 | separator: &str, 1164 | ) -> String 1165 | where 1166 | R: AsRef<[S]>, 1167 | S: AsRef, 1168 | { 1169 | let mut out = String::new(); 1170 | 1171 | // We currently don't support vertical alignment of complex data. 1172 | let can_align = rows 1173 | .iter() 1174 | .flat_map(|r| r.as_ref().iter()) 1175 | .all(|s| !s.as_ref().contains('\n')); 1176 | 1177 | let diff_widths = |range: Range, row: &R| -> usize { 1178 | let mut max_width = 0_usize; 1179 | 1180 | for row in rows { 1181 | let row_len = row.as_ref().len(); 1182 | 1183 | let range = 1184 | cmp::min(range.start, row_len.saturating_sub(1))..cmp::min(range.end, row_len); 1185 | 1186 | max_width = cmp::max( 1187 | max_width, 1188 | row.as_ref()[range] 1189 | .iter() 1190 | .map(|s| s.as_ref().chars().count()) 1191 | .sum(), 1192 | ); 1193 | } 1194 | 1195 | let row_width = row.as_ref()[range] 1196 | .iter() 1197 | .map(|s| s.as_ref().chars().count()) 1198 | .sum::(); 1199 | 1200 | max_width - row_width 1201 | }; 1202 | 1203 | for (row_idx, row) in rows.iter().enumerate() { 1204 | if row_idx != 0 { 1205 | out += newline; 1206 | } 1207 | 1208 | let mut last_align_idx = 0_usize; 1209 | 1210 | for (item_idx, item) in row.as_ref().iter().enumerate() { 1211 | if item_idx > separator_range.start 1212 | && item_idx <= separator_range.end.saturating_add(1) 1213 | && item_idx < row.as_ref().len() 1214 | { 1215 | out += separator; 1216 | } 1217 | 1218 | out += item.as_ref(); 1219 | 1220 | if can_align 1221 | && align_range.start <= item_idx 1222 | && align_range.end > item_idx 1223 | && item_idx < row.as_ref().len() - 1 1224 | { 1225 | let diff = diff_widths(last_align_idx..item_idx + 1, row); 1226 | out.extend(std::iter::repeat_n(" ", diff)); 1227 | last_align_idx = item_idx + 1; 1228 | } 1229 | } 1230 | } 1231 | 1232 | out 1233 | } 1234 | 1235 | /// Special handling of blank lines. 1236 | /// 1237 | /// A design decision was made in the parser that newline (LF) characters 1238 | /// and whitespace (" ", and \t) are part of separate tokens. 1239 | /// 1240 | /// Generally we count the amount of blank lines by counting LF characters in a token, 1241 | /// however if any of the consecutive blank lines contain empty characters, 1242 | /// this way of counting becomes unreliable. 1243 | /// 1244 | /// So we check if the newlines are followed by whitespace, 1245 | /// then newlines again, and return the count here, 1246 | /// and we can add these values up. 1247 | fn dangling_newlines(t: SyntaxToken) -> Option { 1248 | let newline_count = t.text().newline_count(); 1249 | 1250 | if let Some(nt) = t.next_sibling_or_token() 1251 | && let Some(nnt) = nt.next_sibling_or_token() 1252 | && nt.kind() == WHITESPACE && nnt.kind() == NEWLINE { 1253 | return Some(newline_count); 1254 | } 1255 | 1256 | None 1257 | } 1258 | --------------------------------------------------------------------------------