├── testdata └── escaped.kdl ├── doc.go ├── justfile ├── go.mod ├── node.go ├── prop.go ├── kdl.go ├── go.sum ├── .github └── workflows │ └── check.yml ├── kdl_test.go ├── arg.go ├── LICENSE ├── README.md ├── internal ├── token.go ├── scanner_test.go └── scanner.go ├── type.go ├── parser.go └── parser_test.go /testdata/escaped.kdl: -------------------------------------------------------------------------------- 1 | node1 " " 2 | node2 "Ê" 3 | node3 "\"" 4 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | type Doc struct { 4 | nodes []Node 5 | } 6 | 7 | func (d Doc) Nodes() []Node { 8 | return d.nodes 9 | } 10 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | check: fmt test lint 2 | 3 | build: 4 | go build ./... 5 | 6 | fmt: 7 | go fmt ./... 8 | 9 | test pattern=".*": 10 | go test ./... -run={{ pattern }} 11 | 12 | lint: 13 | go run honnef.co/go/tools/cmd/staticcheck@latest ./... 14 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lunjon/gokdl 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/davecgh/go-spew v1.1.1 // indirect 7 | github.com/pmezard/go-difflib v1.0.0 // indirect 8 | github.com/stretchr/testify v1.8.4 9 | gopkg.in/yaml.v3 v3.0.1 // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | type Node struct { 4 | // Name of the node. 5 | Name string 6 | // Children of the node. If the node doesn't 7 | // have children it is an empty list. 8 | Children []Node 9 | // Properties of the node. 10 | Props []Prop 11 | // Arguments of the node. 12 | Args []Arg 13 | // Type annotation on the node. 14 | // It has the zero value if no type annotation 15 | // exists for this node. 16 | TypeAnnotation TypeAnnotation 17 | } 18 | -------------------------------------------------------------------------------- /prop.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | import "fmt" 4 | 5 | type Prop struct { 6 | Name string 7 | Value any 8 | // ValueTypeAnnot is the type annotation for the value of the property. 9 | // Example: age=(u8)25 10 | // In this case it would be "u8". 11 | ValueTypeAnnot TypeAnnotation 12 | // TypeAnnot is the type annotation for the property itself. 13 | // Example: (author)name="Jonathan" 14 | TypeAnnot TypeAnnotation 15 | } 16 | 17 | func (p Prop) String() string { 18 | return fmt.Sprintf("%s=%v", p.Name, p.Value) 19 | } 20 | -------------------------------------------------------------------------------- /kdl.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | // Parse the bytes into a KDL Document, 8 | // returning an error if anything was invalid. 9 | // 10 | // The bytes must be valid unicode. 11 | func Parse(r io.Reader) (Doc, error) { 12 | parser := newParser(r) 13 | return parser.parse() 14 | } 15 | 16 | // ValueType is the type name of the different 17 | // primitive KDL types. 18 | type ValueType string 19 | 20 | const ( 21 | TypeString ValueType = "string" 22 | TypeInt ValueType = "int" 23 | TypeFloat ValueType = "float" 24 | TypeBool ValueType = "boolean" 25 | TypeNull ValueType = "null" 26 | ) 27 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 6 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 8 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 9 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 10 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | go-version: [ '1.20', '1.21.x' ] 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - name: Set up Go 19 | uses: actions/setup-go@v4 20 | with: 21 | go-version: ${{ matrix.version }} 22 | 23 | - name: Build 24 | run: go build -v ./... 25 | 26 | - name: Test 27 | run: go test -v ./... 28 | 29 | - name: Check format 30 | run: | 31 | go fmt ./... 32 | git diff --exit-code 33 | 34 | - name: Lint 35 | run: go run honnef.co/go/tools/cmd/staticcheck@latest ./... 36 | -------------------------------------------------------------------------------- /kdl_test.go: -------------------------------------------------------------------------------- 1 | package gokdl_test 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/lunjon/gokdl" 8 | ) 9 | 10 | func TestParseExample(t *testing.T) { 11 | doc := ` 12 | // Line comment 13 | 14 | /* 15 | multiline 16 | comment 17 | */ 18 | 19 | node "arg" prop=1 20 | 21 | one; two; // Ignore this 22 | 23 | nesting-testing /*ignore this as well*/ { 24 | child-1; child-?; 25 | 26 | child!THREE keyword="string" { 27 | nesting-should-work-here-as-well 28 | } 29 | } 30 | 31 | "Arbitrary name in quotes!" 32 | 33 | integer-arg -1234 34 | science-arg-a 1.78e12 35 | science-arg-b 1.78e-3 36 | science-arg-c 1.7883274 37 | 38 | // Node on multiple lines 39 | hello \ 40 | 1 2 3 \ 41 | myProp="wow" 42 | ` 43 | 44 | r := strings.NewReader(doc) 45 | _, err := gokdl.Parse(r) 46 | if err != nil { 47 | t.Fatalf("expected no error but was: %s", err) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /arg.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Arg struct { 8 | // Value of the argument. 9 | // It is `nil` for the KDL `null` value. 10 | Value any 11 | // Type annotation on the argument. 12 | // It has the zero value if no type annotation 13 | // exists for this argument. 14 | TypeAnnotation TypeAnnotation 15 | } 16 | 17 | func (a Arg) String() string { 18 | return fmt.Sprint(a.Value) 19 | } 20 | 21 | func newArg(value any, ta TypeAnnotation) Arg { 22 | return Arg{ 23 | Value: value, 24 | TypeAnnotation: ta, 25 | } 26 | } 27 | 28 | func newIntArg(value, typeAnnot string) (Arg, error) { 29 | val, err := parseIntValue(value, typeAnnot) 30 | return Arg{ 31 | Value: val, 32 | TypeAnnotation: TypeAnnotation(typeAnnot), 33 | }, err 34 | } 35 | 36 | func newFloatArg(value, typeAnnot string) (Arg, error) { 37 | val, err := parseFloatValue(value, typeAnnot) 38 | return Arg{ 39 | Value: val, 40 | TypeAnnotation: TypeAnnotation(typeAnnot), 41 | }, err 42 | } 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jonathan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > [!NOTE] 2 | > Due to lack of time I can no longer maintain this project. 3 | > I had a great time implementing it but nowadays the family takes 4 | > most of my spare time. Checkout [kdl-go](https://github.com/sblinch/kdl-go) instead! 5 | 6 | # GoKDL 7 | 8 | A parser implementation for the [KDL](https://kdl.dev/) document language in Go. 9 | 10 | ## Example 11 | 12 | The following code shows a minimal example of parsing a KDL document: 13 | 14 | ```go 15 | package main 16 | 17 | import ( 18 | "log" 19 | "strings" 20 | "github.com/lunjon/gokdl" 21 | ) 22 | 23 | func main() { 24 | kdl := ` 25 | MyNode "string arg" myint=1234 awesome=true { 26 | child-node 27 | } 28 | 29 | // A node with arbitrary name (in quotes) 30 | "Other node with much cooler name!" { Okay; } 31 | ` 32 | 33 | r := strings.NewReader(kdl) 34 | doc, err := gokdl.Parse(r) 35 | if err != nil { 36 | log.Fatal(err) 37 | } 38 | 39 | // Do something with doc ... 40 | } 41 | ``` 42 | 43 | ## API 44 | 45 | Although the module can be used, and the API is still very rough, 46 | I'm grateful for any feedback and suggestion regarding the API! 47 | -------------------------------------------------------------------------------- /internal/token.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "unicode" 5 | ) 6 | 7 | var EOF_RUNE = rune(0) 8 | 9 | type Token int 10 | 11 | const ( 12 | EOF Token = iota 13 | WS 14 | INVALID 15 | 16 | // Literals 17 | IDENT 18 | NUM_INT // Integer 19 | NUM_FLOAT // Float 20 | NUM_SCI // Scientific notation 21 | BOOL // true | false 22 | 23 | // Special characters 24 | SEMICOLON // ; 25 | CBRACK_OPEN // { 26 | CBRACK_CLOSE // } 27 | QUOTE // " 28 | EQUAL // = 29 | HYPHEN // - 30 | COMMENT_LINE // // 31 | COMMENT_MUL_OPEN // /* 32 | COMMENT_MUL_CLOSE // */ 33 | COMMENT_SD // /- (slash-dash) 34 | BACKSLASH // \ 35 | FORWSLASH // / 36 | PAREN_OPEN // ( 37 | PAREN_CLOSE // ) 38 | GREAT // > 39 | LESS // < 40 | SBRACK_OPEN // [ 41 | SBRACK_CLOSE // ] 42 | COMMA // , 43 | RAWSTR_OPEN // r" 44 | RAWSTR_HASH_OPEN // r#[...]" 45 | RAWSTR_HASH_CLOSE // "#[...] 46 | 47 | // Other characters 48 | CHAR // Single character 49 | CHARS // Stream of characters 50 | ) 51 | 52 | func IsInitialIdentToken(t Token) bool { 53 | return t == CHAR || t == QUOTE || t == HYPHEN 54 | } 55 | 56 | func IsIdentifierToken(t Token) bool { 57 | switch t { 58 | case NUM_INT, CHAR: 59 | return true 60 | default: 61 | return false 62 | } 63 | } 64 | 65 | func IsIdentifier(r rune) bool { 66 | return !nonIdents[r] && !unicode.IsSpace(r) 67 | } 68 | 69 | func IsAnyOf(t Token, ts ...Token) bool { 70 | for _, ot := range ts { 71 | if t == ot { 72 | return true 73 | } 74 | } 75 | return false 76 | } 77 | 78 | func ContainsNonIdent(s string) bool { 79 | for _, ch := range s { 80 | if nonIdents[ch] { 81 | return true 82 | } 83 | } 84 | return false 85 | } 86 | 87 | func init() { 88 | nonIdents = map[rune]bool{} 89 | for _, r := range `\/(){}<>;[]=,` { 90 | nonIdents[r] = true 91 | } 92 | 93 | hexRunes = map[rune]bool{} 94 | for _, r := range "0123456789abcdefABCDEF" { 95 | hexRunes[r] = true 96 | } 97 | } 98 | 99 | var ( 100 | // Runes that are not valid in identifiers 101 | nonIdents map[rune]bool 102 | hexRunes map[rune]bool 103 | ) 104 | -------------------------------------------------------------------------------- /type.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | type TypeAnnotation string 9 | 10 | func (t TypeAnnotation) String() string { 11 | return string(t) 12 | } 13 | 14 | const ( 15 | noTypeAnnot = "" 16 | I8 TypeAnnotation = "i8" 17 | I16 TypeAnnotation = "i16" 18 | I32 TypeAnnotation = "i32" 19 | I64 TypeAnnotation = "i64" 20 | U8 TypeAnnotation = "u8" 21 | U16 TypeAnnotation = "u16" 22 | U32 TypeAnnotation = "u32" 23 | U64 TypeAnnotation = "u64" 24 | F32 TypeAnnotation = "f32" 25 | F64 TypeAnnotation = "f64" 26 | ) 27 | 28 | var ( 29 | numberTypeAnnotation = map[string]TypeAnnotation{ 30 | I8.String(): I8, 31 | I16.String(): I16, 32 | I32.String(): I32, 33 | I64.String(): I64, 34 | U8.String(): U8, 35 | U16.String(): U16, 36 | U32.String(): U32, 37 | U64.String(): U64, 38 | F32.String(): F32, 39 | F64.String(): F64, 40 | } 41 | ) 42 | 43 | func init() { 44 | nums := []TypeAnnotation{ 45 | I8, 46 | I16, 47 | I32, 48 | I64, 49 | U8, 50 | U16, 51 | U32, 52 | U64, 53 | F32, 54 | F64, 55 | } 56 | 57 | for _, n := range nums { 58 | numberTypeAnnotation[n.String()] = n 59 | } 60 | } 61 | 62 | func parseStringValue(value, typeAnnot string) (string, error) { 63 | if _, isNum := numberTypeAnnotation[typeAnnot]; isNum { 64 | return "", fmt.Errorf("invalid type annotation for type string: %s", typeAnnot) 65 | } 66 | return value, nil 67 | } 68 | 69 | func parseIntValue(value, typeAnnot string) (any, error) { 70 | var bitsize int 71 | var unsigned bool 72 | 73 | switch TypeAnnotation(typeAnnot) { 74 | case noTypeAnnot: 75 | bitsize = 64 76 | case I8: 77 | bitsize = 8 78 | case I16: 79 | bitsize = 16 80 | case I32: 81 | bitsize = 32 82 | case I64: 83 | bitsize = 64 84 | case U8: 85 | bitsize = 8 86 | unsigned = true 87 | case U16: 88 | bitsize = 16 89 | unsigned = true 90 | case U32: 91 | bitsize = 32 92 | unsigned = true 93 | case U64: 94 | bitsize = 64 95 | unsigned = true 96 | default: 97 | return value, fmt.Errorf("invalid type annotation for integer: %s", typeAnnot) 98 | } 99 | 100 | if unsigned { 101 | return strconv.ParseUint(value, 10, bitsize) 102 | } else { 103 | return strconv.ParseInt(value, 10, bitsize) 104 | } 105 | } 106 | 107 | func parseFloatValue(value, typeAnnot string) (any, error) { 108 | var bitsize int 109 | 110 | switch TypeAnnotation(typeAnnot) { 111 | case F32: 112 | bitsize = 32 113 | case noTypeAnnot, F64: 114 | bitsize = 64 115 | default: 116 | return value, fmt.Errorf("invalid type annotation for integer: %s", typeAnnot) 117 | } 118 | 119 | return strconv.ParseFloat(value, bitsize) 120 | } 121 | -------------------------------------------------------------------------------- /internal/scanner_test.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestScannerScanWhitespace(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | str string 14 | }{ 15 | {"empty", " "}, 16 | {"newline", "\n"}, 17 | {"multi newline", " \n\n"}, 18 | } 19 | 20 | for _, test := range tests { 21 | sc := setup(test.str) 22 | t.Run(test.name, func(t *testing.T) { 23 | token, _ := sc.Scan() 24 | require.Equal(t, WS, token) 25 | }) 26 | } 27 | } 28 | 29 | func TestScannerScanNumbers(t *testing.T) { 30 | tests := []struct { 31 | name string 32 | str string 33 | expectedToken Token 34 | expectedLit string 35 | }{ 36 | {"integer - single digit", "1", NUM_INT, "1"}, 37 | {"integer - multi digit", "12345", NUM_INT, "12345"}, 38 | {"integer - neg", "-12345", NUM_INT, "-12345"}, 39 | {"integer - prefix", "+12345", NUM_INT, "12345"}, 40 | {"integer - underscore", "10_000", NUM_INT, "10000"}, 41 | {"float - dot", "1.1", NUM_FLOAT, "1.1"}, 42 | {"float - dot multi", "1.12345", NUM_FLOAT, "1.12345"}, 43 | {"float - scientific (pos exp)", "1.123e12", NUM_SCI, "1.123e12"}, 44 | {"float - scientific (neg exp)", "1.123e-9", NUM_SCI, "1.123e-9"}, 45 | {"float - scientific neg", "-1.123e9", NUM_SCI, "-1.123e9"}, 46 | {"binary", "0b0101", NUM_INT, "5"}, 47 | {"binary - underscore", "0b01_01", NUM_INT, "5"}, 48 | {"octal", "0o010463", NUM_INT, "4403"}, 49 | {"octal - underscore", "0o0104_63", NUM_INT, "4403"}, 50 | {"hex", "0xabc123", NUM_INT, "11256099"}, 51 | {"hex - underscore", "0xabc_123", NUM_INT, "11256099"}, 52 | } 53 | 54 | for _, test := range tests { 55 | sc := setup(test.str) 56 | t.Run(test.name, func(t *testing.T) { 57 | token, lit := sc.Scan() 58 | require.Equal(t, test.expectedToken, token) 59 | require.Equal(t, test.expectedLit, lit) 60 | }) 61 | } 62 | } 63 | 64 | func TestScannerScanRawString(t *testing.T) { 65 | tests := []struct { 66 | name string 67 | str string 68 | expectedToken Token 69 | expectedLit string 70 | }{ 71 | {"no raw string", "r", CHAR, "r"}, 72 | {"raw string", `r"`, RAWSTR_OPEN, `r"`}, 73 | {"raw string hash 1", `r#"`, RAWSTR_HASH_OPEN, `r#"`}, 74 | {"raw string hash 2", `r##"`, RAWSTR_HASH_OPEN, `r##"`}, 75 | {"other", `r##`, CHAR, `r##`}, 76 | {"quote end 1", `"`, QUOTE, `"`}, 77 | {"quote end 2", `"#`, RAWSTR_HASH_CLOSE, `"#`}, 78 | {"quote end 3", `"##`, RAWSTR_HASH_CLOSE, `"##`}, 79 | } 80 | 81 | for _, test := range tests { 82 | sc := setup(test.str) 83 | t.Run(test.name, func(t *testing.T) { 84 | token, lit := sc.Scan() 85 | require.Equal(t, test.expectedToken, token) 86 | require.Equal(t, test.expectedLit, lit) 87 | }) 88 | } 89 | } 90 | 91 | func setup(source string) *Scanner { 92 | r := strings.NewReader(source) 93 | return NewScanner(r) 94 | } 95 | -------------------------------------------------------------------------------- /internal/scanner.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "strconv" 9 | "strings" 10 | "unicode" 11 | ) 12 | 13 | type previous struct { 14 | token Token 15 | lit string 16 | } 17 | 18 | // Scanner represents a lexical Scanner. 19 | type Scanner struct { 20 | r *bufio.Reader 21 | eof bool 22 | // State used in unread. 23 | prev *previous // Set from last when Unread was called 24 | last previous 25 | } 26 | 27 | func NewScanner(r io.Reader) *Scanner { 28 | return &Scanner{ 29 | r: bufio.NewReader(r), 30 | } 31 | } 32 | 33 | func (s *Scanner) ScanLine() { 34 | if s.eof { 35 | return 36 | } 37 | _, _ = s.r.ReadBytes('\n') 38 | } 39 | 40 | // scan returns the next token and literal value. 41 | func (s *Scanner) Scan() (tok Token, lit string) { 42 | if s.eof { 43 | return EOF, "" 44 | } 45 | 46 | if s.prev != nil { 47 | token := s.prev.token 48 | lit := s.prev.lit 49 | s.prev = nil 50 | return token, lit 51 | } 52 | 53 | ch := s.read() 54 | 55 | if unicode.IsSpace(ch) { 56 | s.r.UnreadRune() 57 | return s.ScanWhitespace() 58 | } else if unicode.IsDigit(ch) { 59 | s.r.UnreadRune() 60 | return s.scanNumber(false) 61 | } 62 | 63 | var token Token 64 | var str string 65 | switch ch { 66 | case EOF_RUNE: 67 | s.eof = true 68 | token = EOF 69 | case '"': 70 | return s.scanQuote() 71 | case '=': 72 | token = EQUAL 73 | str = string(ch) 74 | case '-': 75 | next := s.read() 76 | s.r.UnreadRune() 77 | 78 | if unicode.IsDigit(next) { 79 | s.r.UnreadRune() 80 | return s.scanNumber(true) 81 | } 82 | 83 | token = HYPHEN 84 | str = string(ch) 85 | case '+': 86 | next := s.read() 87 | s.r.UnreadRune() 88 | 89 | if unicode.IsDigit(next) { 90 | s.r.UnreadRune() 91 | return s.scanNumber(false) 92 | } 93 | 94 | token = CHAR 95 | str = string(ch) 96 | case '*': 97 | next := s.read() 98 | if next == '/' { 99 | token = COMMENT_MUL_CLOSE 100 | str = "*/" 101 | } else { 102 | s.r.UnreadRune() 103 | token = CHAR 104 | str = string(ch) 105 | } 106 | case '/': 107 | next := s.read() 108 | switch next { 109 | case '/': 110 | token = COMMENT_LINE 111 | str = "//" 112 | case '*': 113 | token = COMMENT_MUL_OPEN 114 | str = "/*" 115 | case '-': 116 | token = COMMENT_SD 117 | str = "/-" 118 | default: 119 | s.r.UnreadRune() 120 | return CHAR, string(ch) 121 | } 122 | case ';': 123 | token = SEMICOLON 124 | str = string(ch) 125 | case '{': 126 | token = CBRACK_OPEN 127 | str = string(ch) 128 | case '}': 129 | token = CBRACK_CLOSE 130 | str = string(ch) 131 | case '[': 132 | token = SBRACK_OPEN 133 | str = string(ch) 134 | case ']': 135 | token = SBRACK_CLOSE 136 | str = string(ch) 137 | case '<': 138 | token = LESS 139 | str = string(ch) 140 | case '>': 141 | token = GREAT 142 | str = string(ch) 143 | case ',': 144 | token = COMMA 145 | str = string(ch) 146 | case '(': 147 | token = PAREN_OPEN 148 | str = string(ch) 149 | case ')': 150 | token = PAREN_CLOSE 151 | str = string(ch) 152 | case '\\': 153 | token = BACKSLASH 154 | str = string(ch) 155 | case 'r': 156 | return s.scanRawString() 157 | default: 158 | token = CHAR 159 | str = string(ch) 160 | } 161 | 162 | return s.setAndReturn(token, str) 163 | } 164 | 165 | func (s *Scanner) scanRawString() (Token, string) { 166 | next := s.read() 167 | switch next { 168 | case '"': 169 | return RAWSTR_OPEN, `r"` 170 | case '#': 171 | lit := s.ScanWhile(func(r rune) bool { 172 | return r == '#' 173 | }) 174 | 175 | next := s.read() 176 | if next != '"' { 177 | s.r.UnreadRune() 178 | return CHAR, fmt.Sprintf("r#%s", lit) 179 | } 180 | 181 | return RAWSTR_HASH_OPEN, fmt.Sprintf(`r#%s"`, lit) 182 | default: 183 | s.r.UnreadRune() 184 | return CHAR, "r" 185 | } 186 | } 187 | 188 | // Handles a single " as well as "##... 189 | func (s *Scanner) scanQuote() (Token, string) { 190 | next := s.read() 191 | if next != '#' { 192 | s.r.UnreadRune() 193 | return QUOTE, `"` 194 | } 195 | 196 | lit := s.ScanWhile(func(r rune) bool { 197 | return r == '#' 198 | }) 199 | return RAWSTR_HASH_CLOSE, `"#` + lit 200 | } 201 | 202 | func (s *Scanner) ScanWhile(pred func(rune) bool) string { 203 | var buf bytes.Buffer 204 | if s.prev != nil { 205 | buf.WriteString(s.prev.lit) 206 | s.prev = nil 207 | } 208 | 209 | for { 210 | ch := s.read() 211 | if ch == EOF_RUNE { 212 | break 213 | } else if !pred(ch) { 214 | s.r.UnreadRune() 215 | break 216 | } else { 217 | buf.WriteRune(ch) 218 | } 219 | } 220 | 221 | return buf.String() 222 | } 223 | 224 | // scanNumber tries to scan a number in any of the supported formats. 225 | // Use `neg` to indicate that the number was prefixed with a hyphen. 226 | func (s *Scanner) scanNumber(neg bool) (Token, string) { 227 | start := s.ScanWhile(unicode.IsDigit) 228 | next := s.read() 229 | if neg { 230 | start = "-" + start 231 | } 232 | 233 | if next == EOF_RUNE { 234 | return s.setAndReturn(NUM_INT, start) 235 | } 236 | 237 | comp := start + string(next) 238 | 239 | if strings.HasSuffix(comp, ".") { 240 | return s.scanFloat(comp) 241 | } else if comp == "0x" { 242 | return s.scanHex() 243 | } else if comp == "0o" { 244 | return s.scanOctal() 245 | } else if comp == "0b" { 246 | return s.scanBinary() 247 | } 248 | 249 | if next != '_' { 250 | if unicode.IsSpace(next) || !unicode.IsDigit(next) { 251 | s.r.UnreadRune() 252 | return s.setAndReturn(NUM_INT, start) 253 | } 254 | } 255 | 256 | // Read as integer 257 | s.r.UnreadRune() 258 | lit := s.ScanWhile(func(r rune) bool { 259 | return unicode.IsDigit(r) || r == '_' 260 | }) 261 | 262 | return s.setAndReturn(NUM_INT, strings.ReplaceAll(start+lit, "_", "")) 263 | } 264 | 265 | func (s *Scanner) scanFloat(start string) (Token, string) { 266 | // Try scientific notation: 1.234e-42 267 | if len(strings.TrimPrefix(start, "-")) == 2 { 268 | numsAfterDot := s.ScanWhile(unicode.IsDigit) 269 | if numsAfterDot == "" { 270 | return s.setAndReturn(CHARS, start) 271 | } 272 | 273 | tokenAfterNums, sAfterNums := s.Scan() 274 | 275 | if tokenAfterNums == CHAR && sAfterNums == "e" { 276 | next, ch := s.Scan() 277 | var exp string 278 | 279 | if ch == "-" { 280 | exp = s.ScanWhile(unicode.IsDigit) 281 | exp = "-" + exp 282 | } else if next == NUM_INT { 283 | exp = ch 284 | } else { 285 | return CHARS, start + numsAfterDot + sAfterNums + ch 286 | } 287 | 288 | num := fmt.Sprintf("%s%se%s", start, numsAfterDot, exp) 289 | return s.setAndReturn(NUM_SCI, num) 290 | } else if tokenAfterNums == NUM_INT { 291 | num := start + numsAfterDot + sAfterNums 292 | return s.setAndReturn(NUM_FLOAT, num) 293 | } else if tokenAfterNums == WS || tokenAfterNums == EOF { 294 | s.Unread() 295 | return s.setAndReturn(NUM_FLOAT, start+numsAfterDot) 296 | } 297 | 298 | } 299 | 300 | numsAfterDot := s.ScanWhile(unicode.IsDigit) 301 | if numsAfterDot == "" { 302 | return s.setAndReturn(CHARS, start) 303 | } 304 | 305 | return s.setAndReturn(NUM_FLOAT, start+numsAfterDot) 306 | } 307 | 308 | func (s *Scanner) scanBinary() (Token, string) { 309 | // Read binary 310 | lit := s.ScanWhile(func(r rune) bool { 311 | return r == '0' || r == '1' || r == '_' 312 | }) 313 | lit = strings.ReplaceAll(lit, "_", "") 314 | 315 | n, err := strconv.ParseInt(lit, 2, 64) 316 | if err != nil { 317 | return s.setAndReturn(CHARS, "0b"+lit) 318 | } 319 | 320 | return s.setAndReturn(NUM_INT, fmt.Sprint(n)) 321 | } 322 | 323 | func (s *Scanner) scanOctal() (Token, string) { 324 | // Read binary 325 | lit := s.ScanWhile(func(r rune) bool { 326 | return ('0' <= r && r <= '7') || r == '_' 327 | }) 328 | lit = strings.ReplaceAll(lit, "_", "") 329 | 330 | n, err := strconv.ParseInt(lit, 8, 64) 331 | if err != nil { 332 | return s.setAndReturn(CHARS, "0o"+lit) 333 | } 334 | 335 | return s.setAndReturn(NUM_INT, fmt.Sprint(n)) 336 | } 337 | 338 | func (s *Scanner) scanHex() (Token, string) { 339 | // Read hexadecimal: 0xdeadbeef 340 | lit := s.ScanWhile(func(r rune) bool { 341 | return hexRunes[r] || r == '_' 342 | }) 343 | lit = strings.ReplaceAll(lit, "_", "") 344 | 345 | n, err := strconv.ParseInt(lit, 16, 64) 346 | if err != nil { 347 | return s.setAndReturn(CHARS, "0x"+lit) 348 | } 349 | 350 | return s.setAndReturn(NUM_INT, fmt.Sprint(n)) 351 | } 352 | 353 | // Scan while whitespace only. 354 | func (s *Scanner) ScanWhitespace() (Token, string) { 355 | lit := s.ScanWhile(unicode.IsSpace) 356 | return s.setAndReturn(WS, lit) 357 | } 358 | 359 | // scanLetters consumes the current rune and all contiguous ident runes. 360 | func (s *Scanner) ScanLetters() (Token, string) { 361 | pred := func(r rune) bool { 362 | return unicode.IsLetter(r) || r == '_' 363 | } 364 | 365 | lit := s.ScanWhile(pred) 366 | return s.setAndReturn(IDENT, lit) 367 | } 368 | 369 | func (s *Scanner) ScanBareIdent() string { 370 | lit := s.ScanWhile(IsIdentifier) 371 | s.setAndReturn(IDENT, lit) 372 | return lit 373 | } 374 | 375 | // Read the next rune from the reader. 376 | // Returns `eof` if an error occurs (or io.EOF is returned). 377 | func (s *Scanner) read() rune { 378 | r, _, err := s.r.ReadRune() 379 | if err != nil { 380 | s.eof = true 381 | return EOF_RUNE 382 | } 383 | return r 384 | } 385 | 386 | func (s *Scanner) setAndReturn(t Token, lit string) (Token, string) { 387 | s.last = previous{token: t, lit: lit} 388 | return t, lit 389 | } 390 | 391 | func (s *Scanner) Unread() { 392 | s.prev = &s.last 393 | } 394 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "strconv" 7 | "strings" 8 | "unicode" 9 | 10 | pkg "github.com/lunjon/gokdl/internal" 11 | ) 12 | 13 | var newlinesToQuoted = map[string]string{ 14 | "\n": "\\n", // newline 15 | "\r": "\\r", // carriage return 16 | "\r\n": "\\r\\n", // carriage return newline 17 | "\f": "\\f", // form feed 18 | "\u0085": "\\u0085", // next line 19 | "\u2028": "\\u2028", // line separator 20 | "\u2029": "\\u2029", // paragraph separator 21 | } 22 | 23 | func isNewline(lit string) bool { 24 | for nl := range newlinesToQuoted { 25 | if strings.Contains(lit, nl) { 26 | return true 27 | } 28 | } 29 | return false 30 | } 31 | 32 | type parseContext struct{} 33 | 34 | // The type responsible for parsing the documents. 35 | // The parser relies on the Scanner (internal) for 36 | // parsing. 37 | // 38 | // Specification: https://github.com/kdl-org/kdl/blob/main/SPEC.md 39 | type parser struct { 40 | sc *pkg.Scanner 41 | } 42 | 43 | func newParser(src io.Reader) *parser { 44 | return &parser{ 45 | sc: pkg.NewScanner(src), 46 | } 47 | } 48 | 49 | func (p *parser) parse() (Doc, error) { 50 | cx := &parseContext{} 51 | nodes, err := parseScope(cx, p.sc, false) 52 | 53 | return Doc{ 54 | nodes: nodes, 55 | }, err 56 | } 57 | 58 | // Parses a root or child scope (inside a node). 59 | func parseScope(cx *parseContext, sc *pkg.Scanner, isChild bool) ([]Node, error) { 60 | nodes := []Node{} // The nodes accumulated in this scope 61 | done := false // When true, parsing of the scope (root or children) is done 62 | 63 | var typeAnnot string 64 | 65 | appendNode := func(n Node) { 66 | if typeAnnot != "" { 67 | n.TypeAnnotation = TypeAnnotation(typeAnnot) 68 | typeAnnot = "" 69 | } 70 | nodes = append(nodes, n) 71 | } 72 | 73 | for !done { 74 | token, lit := sc.Scan() 75 | if token == pkg.EOF { 76 | break 77 | } 78 | 79 | switch token { 80 | case pkg.WS: 81 | continue 82 | case pkg.SEMICOLON: 83 | continue 84 | case pkg.CBRACK_CLOSE: 85 | if isChild { 86 | done = true 87 | } else { 88 | return nil, fmt.Errorf("unexpected token: %s", lit) 89 | } 90 | case pkg.COMMENT_LINE: 91 | sc.ScanLine() 92 | case pkg.COMMENT_MUL_OPEN: 93 | if err := scanMultilineComment(cx, sc); err != nil { 94 | return nil, err 95 | } 96 | case pkg.COMMENT_SD: 97 | // Parse the following content as node and ignore the result 98 | nextToken, _ := sc.Scan() 99 | if pkg.IsInitialIdentToken(nextToken) { 100 | text := sc.ScanBareIdent() 101 | if _, err := scanNode(cx, sc, text); err != nil { 102 | return nil, fmt.Errorf("expected a node after slash-dash comment: %s", err) 103 | } 104 | } else { 105 | return nil, fmt.Errorf("expected a node after slash-dash comment") 106 | } 107 | case pkg.PAREN_OPEN: 108 | annot, err := scanTypeAnnotation(cx, sc) 109 | if err != nil { 110 | return nil, err 111 | } 112 | typeAnnot = annot 113 | case pkg.QUOTE, pkg.RAWSTR_OPEN, pkg.RAWSTR_HASH_OPEN, pkg.RAWSTR_HASH_CLOSE: 114 | // Identifier in quotes => parse as string 115 | 116 | var err error 117 | var str string 118 | switch token { 119 | case pkg.QUOTE: 120 | str, err = scanString(cx, sc, "") 121 | case pkg.RAWSTR_HASH_CLOSE: 122 | str, err = scanString(cx, sc, "") 123 | str = lit[1:] + str 124 | case pkg.RAWSTR_OPEN: 125 | str, err = scanRawString(cx, sc, "") 126 | case pkg.RAWSTR_HASH_OPEN: 127 | str, err = scanRawStringHash(cx, sc, lit, "") 128 | } 129 | 130 | if err != nil { 131 | return nil, err 132 | } 133 | 134 | node, err := scanNode(cx, sc, str) 135 | if err != nil { 136 | return nil, err 137 | } 138 | appendNode(node) 139 | default: 140 | if pkg.IsInitialIdentToken(token) { 141 | text := sc.ScanBareIdent() 142 | node, err := scanNode(cx, sc, lit+text) 143 | if err != nil { 144 | return nil, err 145 | } 146 | appendNode(node) 147 | } else { 148 | return nil, fmt.Errorf("unexpected token: %s", lit) 149 | } 150 | } 151 | } 152 | 153 | return nodes, nil 154 | } 155 | 156 | func scanMultilineComment(cx *parseContext, sc *pkg.Scanner) error { 157 | for { 158 | token, _ := sc.Scan() 159 | if token == pkg.EOF { 160 | break 161 | } 162 | 163 | if token == pkg.COMMENT_MUL_CLOSE { 164 | return nil 165 | } 166 | } 167 | 168 | return fmt.Errorf("no closing of multiline comment") 169 | } 170 | 171 | func scanNode(cx *parseContext, sc *pkg.Scanner, name string) (Node, error) { 172 | // This function gets called immediately after an 173 | // idenfitier was read. So just check that the following 174 | // token is valid. 175 | next, nextlit := sc.Scan() 176 | if !pkg.IsAnyOf(next, pkg.EOF, pkg.WS, pkg.SEMICOLON, pkg.CBRACK_CLOSE) { 177 | return Node{}, fmt.Errorf("unexpected token in identifier: %s", nextlit) 178 | } 179 | 180 | sc.Unread() 181 | 182 | children := []Node{} 183 | args := []Arg{} 184 | props := []Prop{} 185 | 186 | done := false 187 | skip := false // Used with slash-dash comments 188 | 189 | typeAnnotation := "" 190 | for !done { 191 | token, lit := sc.Scan() 192 | if token == pkg.EOF { 193 | break 194 | } 195 | 196 | if typeAnnotation != "" && pkg.IsAnyOf(token, pkg.BACKSLASH, pkg.SEMICOLON, pkg.CBRACK_OPEN) { 197 | return Node{}, fmt.Errorf("unexpected type annotation") 198 | } 199 | 200 | switch token { 201 | case pkg.BACKSLASH: 202 | sc.ScanWhitespace() 203 | case pkg.SEMICOLON: 204 | done = true 205 | case pkg.WS: 206 | if isNewline(lit) { 207 | done = true 208 | } 209 | case pkg.COMMENT_LINE: 210 | sc.ScanLine() 211 | done = true 212 | case pkg.COMMENT_MUL_OPEN: 213 | if err := scanMultilineComment(cx, sc); err != nil { 214 | return Node{}, err 215 | } 216 | case pkg.COMMENT_SD: 217 | // We need to continue to parse and ignore the next result. 218 | skip = true 219 | // typeAnnotation = "" 220 | case pkg.NUM_INT: 221 | if skip { 222 | skip = false 223 | typeAnnotation = "" 224 | continue 225 | } 226 | 227 | arg, err := newIntArg(lit, typeAnnotation) 228 | if err != nil { 229 | return Node{}, err 230 | } 231 | args = append(args, arg) 232 | typeAnnotation = "" 233 | case pkg.NUM_FLOAT, pkg.NUM_SCI: 234 | if skip { 235 | skip = false 236 | typeAnnotation = "" 237 | continue 238 | } 239 | 240 | arg, err := newFloatArg(lit, typeAnnotation) 241 | if err != nil { 242 | return Node{}, err 243 | } 244 | 245 | args = append(args, arg) 246 | typeAnnotation = "" 247 | case pkg.QUOTE, pkg.RAWSTR_OPEN, pkg.RAWSTR_HASH_OPEN, pkg.RAWSTR_HASH_CLOSE: 248 | var str string 249 | var err error 250 | switch token { 251 | case pkg.QUOTE: 252 | str, err = scanString(cx, sc, typeAnnotation) 253 | case pkg.RAWSTR_HASH_CLOSE: 254 | str, err = scanString(cx, sc, typeAnnotation) 255 | str = lit[1:] + str 256 | case pkg.RAWSTR_OPEN: 257 | str, err = scanRawString(cx, sc, typeAnnotation) 258 | case pkg.RAWSTR_HASH_OPEN: 259 | str, err = scanRawStringHash(cx, sc, lit, typeAnnotation) 260 | } 261 | if err != nil { 262 | return Node{}, err 263 | } 264 | 265 | nextToken, _ := sc.Scan() 266 | if nextToken == pkg.EQUAL { 267 | prop, err := scanProp(cx, sc, str, typeAnnotation) 268 | if err != nil { 269 | return Node{}, err 270 | } 271 | 272 | if !skip { 273 | props = append(props, prop) 274 | } 275 | skip = false 276 | } else { 277 | if !skip { 278 | sc.Unread() 279 | arg := newArg(str, TypeAnnotation(typeAnnotation)) 280 | args = append(args, arg) 281 | } 282 | 283 | skip = false 284 | } 285 | 286 | typeAnnotation = "" 287 | case pkg.CBRACK_OPEN: 288 | ns, err := parseScope(cx, sc, true) 289 | if err != nil { 290 | return Node{}, err 291 | } 292 | 293 | if !skip { 294 | children = append(children, ns...) 295 | } 296 | 297 | skip = false 298 | case pkg.CBRACK_CLOSE: 299 | done = true 300 | case pkg.PAREN_OPEN: 301 | annot, err := scanTypeAnnotation(cx, sc) 302 | if err != nil { 303 | return Node{}, err 304 | } 305 | typeAnnotation = annot 306 | default: 307 | // At this point there are multiple cases that can happen: 308 | // - The following value is a literal: null, true, false 309 | // - These should be treated as such 310 | // - It is the start of a property name 311 | // 312 | // All the literals have valid initial identifier tokens. 313 | // That is, n(ull), t(rue) and f(alse) can be the start 314 | // of an identifier and NOT the literals. 315 | // 316 | // Thus we need to check the following tokens in order 317 | // to decide what it is. 318 | 319 | { // Check literals 320 | var value any 321 | var ok bool 322 | 323 | _, next := sc.ScanLetters() 324 | next = lit + next 325 | 326 | switch next { 327 | case "null": 328 | value = nil // Default for any... 329 | ok = true 330 | case "true": 331 | value = true 332 | ok = true 333 | case "false": 334 | value = false 335 | ok = true 336 | } 337 | 338 | if ok { 339 | if typeAnnotation != "" { 340 | return Node{}, fmt.Errorf("unexpected type annotation") 341 | } 342 | 343 | if !skip { 344 | args = append(args, newArg(value, "")) 345 | skip = false 346 | } 347 | continue 348 | } else { 349 | sc.Unread() 350 | } 351 | } 352 | 353 | if pkg.IsInitialIdentToken(token) { 354 | id := sc.ScanBareIdent() 355 | next, _ := sc.Scan() 356 | if next != pkg.EQUAL { 357 | return Node{}, fmt.Errorf("unexpected identifier") 358 | } 359 | 360 | prop, err := scanProp(cx, sc, lit+id, typeAnnotation) 361 | if err != nil { 362 | return Node{}, err 363 | } 364 | 365 | if !skip { 366 | props = append(props, prop) 367 | } 368 | skip = false 369 | } else { 370 | return Node{}, fmt.Errorf("unexpected token: %s", lit) 371 | } 372 | } 373 | } 374 | 375 | return Node{ 376 | Name: name, 377 | Children: children, 378 | Props: props, 379 | Args: args, 380 | }, nil 381 | } 382 | 383 | func scanString(cx *parseContext, sc *pkg.Scanner, typeAnnot string) (string, error) { 384 | buf := strings.Builder{} 385 | done := false 386 | for !done { 387 | token, lit := sc.Scan() 388 | if token == pkg.EOF { 389 | return "", fmt.Errorf("error reading string literal: reached EOF") 390 | } 391 | 392 | switch token { 393 | case pkg.BACKSLASH: 394 | next, nextLit := sc.Scan() 395 | if next == pkg.QUOTE { 396 | buf.WriteString(`\"`) 397 | } else { 398 | buf.WriteString(lit) 399 | buf.WriteString(nextLit) 400 | } 401 | case pkg.QUOTE: 402 | done = true 403 | case pkg.WS: 404 | // Unquoted newline characters are invalid -> replace prior unquoting 405 | res := lit 406 | for nl, escaped := range newlinesToQuoted { 407 | res = strings.ReplaceAll(res, nl, escaped) 408 | } 409 | buf.WriteString(res) 410 | case pkg.RAWSTR_OPEN, pkg.RAWSTR_HASH_OPEN: 411 | buf.WriteString(lit[:len(lit)-1]) 412 | done = true 413 | default: 414 | buf.WriteString(lit) 415 | } 416 | } 417 | 418 | sss, err := strconv.Unquote("\"" + buf.String() + "\"") 419 | if err != nil { 420 | return "", err 421 | } 422 | 423 | return parseStringValue(sss, typeAnnot) 424 | } 425 | 426 | func scanRawString(cx *parseContext, sc *pkg.Scanner, typeAnnot string) (string, error) { 427 | buf := strings.Builder{} 428 | done := false 429 | for !done { 430 | token, lit := sc.Scan() 431 | if token == pkg.EOF { 432 | return "", fmt.Errorf("error reading raw string literal: reached EOF") 433 | } 434 | 435 | switch token { 436 | case pkg.QUOTE: 437 | done = true 438 | default: 439 | buf.WriteString(lit) 440 | } 441 | } 442 | 443 | return parseStringValue(buf.String(), typeAnnot) 444 | } 445 | 446 | func scanRawStringHash(cx *parseContext, sc *pkg.Scanner, start, typeAnnot string) (string, error) { 447 | end := strings.TrimPrefix(start, "r") 448 | end = strings.TrimSuffix(end, `"`) 449 | end = `"` + end 450 | 451 | buf := strings.Builder{} 452 | done := false 453 | for !done { 454 | token, lit := sc.Scan() 455 | if token == pkg.EOF { 456 | return "", fmt.Errorf("error reading raw string literal: reached EOF") 457 | } 458 | 459 | switch token { 460 | case pkg.RAWSTR_HASH_CLOSE: 461 | if lit == end { 462 | done = true 463 | } else { 464 | return "", fmt.Errorf("invalid terminal of raw string literal: %s", lit) 465 | } 466 | default: 467 | buf.WriteString(lit) 468 | } 469 | } 470 | 471 | return parseStringValue(buf.String(), typeAnnot) 472 | } 473 | 474 | func scanProp(cx *parseContext, sc *pkg.Scanner, name, typeAnnotation string) (Prop, error) { 475 | _, _ = sc.ScanWhitespace() 476 | 477 | done := false 478 | var value any 479 | var valueTypeAnnot string 480 | 481 | for !done { 482 | token, lit := sc.Scan() 483 | if token == pkg.EOF { 484 | return Prop{}, fmt.Errorf("invalid node property: reached EOF") 485 | } 486 | 487 | switch token { 488 | case pkg.INVALID: 489 | return Prop{}, fmt.Errorf("invalid property value") 490 | case pkg.NUM_INT: 491 | n, err := parseIntValue(lit, valueTypeAnnot) 492 | if err != nil { 493 | return Prop{}, err 494 | } 495 | value = n 496 | done = true 497 | case pkg.NUM_FLOAT, pkg.NUM_SCI: 498 | n, err := parseFloatValue(lit, valueTypeAnnot) 499 | if err != nil { 500 | return Prop{}, err 501 | } 502 | value = n 503 | done = true 504 | case pkg.QUOTE: 505 | s, err := scanString(cx, sc, valueTypeAnnot) 506 | if err != nil { 507 | return Prop{}, err 508 | } 509 | value = s 510 | done = true 511 | case pkg.PAREN_OPEN: 512 | t, err := scanTypeAnnotation(cx, sc) 513 | if err != nil { 514 | return Prop{}, err 515 | } 516 | 517 | valueTypeAnnot = t 518 | default: 519 | // Not a number or string => try parse bool or null 520 | sc.Unread() 521 | t, letters := sc.ScanLetters() 522 | if t != pkg.EOF { 523 | switch letters { 524 | case "null": 525 | value = nil 526 | case "true": 527 | value = true 528 | case "false": 529 | value = false 530 | default: 531 | return Prop{}, fmt.Errorf("invalid property value") 532 | } 533 | 534 | if valueTypeAnnot != "" { 535 | return Prop{}, fmt.Errorf("unexpected type annotation") 536 | } 537 | 538 | done = true 539 | } else { 540 | return Prop{}, fmt.Errorf("invalid property value") 541 | } 542 | } 543 | } 544 | 545 | return Prop{ 546 | Name: name, 547 | TypeAnnot: TypeAnnotation(typeAnnotation), 548 | Value: value, 549 | ValueTypeAnnot: TypeAnnotation(valueTypeAnnot), 550 | }, nil 551 | } 552 | 553 | func scanTypeAnnotation(cx *parseContext, sc *pkg.Scanner) (string, error) { 554 | annot := sc.ScanWhile(func(r rune) bool { 555 | return unicode.In(r, unicode.Digit, unicode.Letter) 556 | }) 557 | 558 | next, _ := sc.Scan() 559 | if next != pkg.PAREN_CLOSE { 560 | return "", fmt.Errorf("unclosed type annotation") 561 | } 562 | 563 | annot = strings.TrimSpace(annot) 564 | if annot == "" { 565 | return "", fmt.Errorf("invalid type annotation: empty") 566 | } 567 | 568 | return annot, nil 569 | } 570 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package gokdl 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "strings" 7 | 8 | // "os" 9 | "testing" 10 | 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestParserLineSeparators(t *testing.T) { 15 | tests := []struct { 16 | testname string 17 | body string 18 | }{ 19 | {"newline", "A\nB"}, 20 | {"carriage return", "A\rB"}, 21 | {"carriage return newline", "A\r\nB"}, 22 | {"form feed", "A\fB"}, 23 | {"next line", "A\u0085B"}, 24 | {"line separator", "A\u2028B"}, 25 | {"paragraph separator", "A\u2029B"}, 26 | {"multiple variants", "A\u2029\n\u2028\u0085\f\r\r\nB"}, 27 | } 28 | for _, test := range tests { 29 | t.Run(test.testname, func(t *testing.T) { 30 | nodes := setupAndParse(t, test.body).Nodes() 31 | require.Len(t, nodes, 2) 32 | require.Equal(t, "A", nodes[0].Name) 33 | require.Equal(t, "B", nodes[1].Name) 34 | }) 35 | } 36 | } 37 | 38 | func TestParserNewlineEscaping(t *testing.T) { 39 | tests := []struct { 40 | testname string 41 | body string 42 | parsed string 43 | }{ 44 | {"newline in string value", "node \"new\nline\"", "new\nline"}, 45 | {"carriage return in string value", "node \"carriage\rreturn\"", "carriage\rreturn"}, 46 | {"carriage return newline", "node \"cr\r\nnl\"", "cr\r\nnl"}, 47 | {"form feed in string value", "node \"form\ffeed\"", "form\ffeed"}, 48 | {"next line in string value", "node \"next\u0085line\"", "next\u0085line"}, 49 | {"line separator in string value", "node \"line\u2028separator\"", "line\u2028separator"}, 50 | {"paragraph separator in string value", "node \"paragraph\u2029separator\"", "paragraph\u2029separator"}, 51 | } 52 | for _, test := range tests { 53 | t.Run(test.testname, func(t *testing.T) { 54 | nodes := setupAndParse(t, test.body).Nodes() 55 | require.Len(t, nodes, 1) 56 | require.Equal(t, test.parsed, nodes[0].Args[0].String()) 57 | }) 58 | } 59 | } 60 | 61 | func TestParserLineComment(t *testing.T) { 62 | _ = setupAndParse(t, `// First line 63 | // Second line 64 | // Thirdline`) 65 | } 66 | 67 | func TestParserMultilineComment(t *testing.T) { 68 | tests := []struct { 69 | testname string 70 | body string 71 | }{ 72 | {"single line", "/* comment */"}, 73 | {"single line - two comments", "/* comment */ /* another */"}, 74 | { 75 | "multiple lines", `/* 76 | comment 77 | another 78 | */`, 79 | }, 80 | } 81 | 82 | for _, test := range tests { 83 | t.Run(test.testname, func(t *testing.T) { 84 | _ = setupAndParse(t, test.body) 85 | }) 86 | } 87 | } 88 | 89 | func TestParserSlashdashCommentNode(t *testing.T) { 90 | doc := setupAndParse(t, `/-mynode`) 91 | nodes := doc.Nodes() 92 | require.Len(t, nodes, 0) 93 | } 94 | 95 | func TestParserSlashdashCommentArg(t *testing.T) { 96 | // Arrange & Act 97 | doc := setupAndParse(t, "Node.js /-\"arg\" 1") 98 | 99 | // Assert 100 | nodes := doc.Nodes() 101 | require.Len(t, nodes, 1) 102 | args := nodes[0].Args 103 | require.Len(t, args, 1) 104 | require.Equal(t, int64(1), args[0].Value) 105 | } 106 | 107 | func TestParserSlashdashCommentProp(t *testing.T) { 108 | doc := setupAndParse(t, "Node.js uncommented=true /-properly=\"arg\" 1") 109 | nodes := doc.Nodes() 110 | require.Len(t, nodes, 1) 111 | 112 | args := nodes[0].Args 113 | require.Len(t, args, 1) 114 | require.Equal(t, int64(1), args[0].Value) 115 | 116 | props := nodes[0].Props 117 | require.Len(t, props, 1) 118 | 119 | require.Equal(t, true, props[0].Value) 120 | } 121 | 122 | func TestParserSlashdashCommentChildren(t *testing.T) { 123 | doc := setupAndParse(t, `Node.js uncommented=true 1 /-{ 124 | childNode 125 | }`) 126 | nodes := doc.Nodes() 127 | require.Len(t, nodes, 1) 128 | children := nodes[0].Children 129 | require.Len(t, children, 0) 130 | } 131 | 132 | func TestParserSlashdashCommentNestedChildren(t *testing.T) { 133 | doc := setupAndParse(t, `Node.js uncommented=true 1 { 134 | /-Ignored 1 2 135 | Exists true 136 | }`) 137 | nodes := doc.Nodes() 138 | require.Len(t, nodes, 1) 139 | require.Len(t, nodes[0].Children, 1) 140 | } 141 | 142 | func TestParserValidNodeIdentifier(t *testing.T) { 143 | tests := []struct { 144 | testname string 145 | doc string 146 | expectedName string 147 | }{ 148 | {"lower case letters", "node", "node"}, 149 | {"snake case", "node_name", "node_name"}, 150 | {"end with number", "node_name123", "node_name123"}, 151 | {"arbitrary characters #1", "-this_actually::WORKS?", "-this_actually::WORKS?"}, 152 | {"quoted named", "\"Node Name?\"", "Node Name?"}, 153 | } 154 | 155 | for _, test := range tests { 156 | t.Run(test.testname, func(t *testing.T) { 157 | doc := setupAndParse(t, test.doc) 158 | require.Len(t, doc.nodes, 1) 159 | 160 | name := doc.nodes[0].Name 161 | require.Equal(t, test.expectedName, name) 162 | require.Zero(t, doc.nodes[0].TypeAnnotation) 163 | }) 164 | } 165 | } 166 | 167 | func TestParserNodeIdentifierInvalid(t *testing.T) { 168 | tests := []struct { 169 | testname string 170 | ident string 171 | }{ 172 | {"integer", "1"}, 173 | {"parenthesis", "a(b)c"}, 174 | {"square brackets", "a[b]c"}, 175 | {"equal", "a=c"}, 176 | {"comma", "abcD,,Y"}, 177 | } 178 | 179 | for _, test := range tests { 180 | t.Run(test.testname, func(t *testing.T) { 181 | parser := setup(test.ident) 182 | _, err := parser.parse() 183 | require.Error(t, err) 184 | }) 185 | } 186 | } 187 | 188 | func TestParserNodeArgs(t *testing.T) { 189 | // Arrange 190 | nodeName := "node" 191 | tests := []struct { 192 | testname string 193 | body string 194 | expectedArgValue any 195 | }{ 196 | {"integer", "node 1", int64(1)}, 197 | {"integer with underscore", "node 1_0_0", int64(100)}, 198 | {"float1", "node 1.234", 1.234}, 199 | {"float2", "node 1234.5678", 1234.5678}, 200 | {"string1", "node \"my@value\"", "my@value"}, 201 | {"string2", `node "TODO: $1"`, "TODO: $1"}, 202 | {"string3", `node "log.Printf(\"$1\")"`, `log.Printf("$1")`}, 203 | {"string4", `node "block{ 204 | $1 205 | }"`, `block{ 206 | $1 207 | }`}, 208 | {"rawstring1", `node r"h\e\l\l"`, `h\e\l\l`}, 209 | {"rawstringhash1", `node r#"h\e\l\l"#`, `h\e\l\l`}, 210 | {"rawstringhash2", `node r##"h\e\l\l"##`, `h\e\l\l`}, 211 | {"rawstringhash3", `node r##"he"ll"##`, `he"ll`}, 212 | {"rawstringhash4", `node r##"he#ll"##`, `he#ll`}, 213 | {"string with hash", `node "#[allow(unused)]"`, `#[allow(unused)]`}, 214 | {"null", "node null", nil}, 215 | {"true", "node true", true}, 216 | {"false", "node false", false}, 217 | {"hex - small caps", "node 0x1aaeff", int64(1748735)}, 218 | {"hex - mixed caps", "node 0x1AAeff", int64(1748735)}, 219 | } 220 | 221 | for _, test := range tests { 222 | t.Run(test.testname, func(t *testing.T) { 223 | // Act 224 | parser := setup(test.body) 225 | doc, err := parser.parse() 226 | 227 | // Assert 228 | require.NoError(t, err) 229 | 230 | nodes := doc.Nodes() 231 | require.Len(t, nodes, 1) 232 | node := nodes[0] 233 | require.Equal(t, nodeName, node.Name) 234 | 235 | require.Len(t, node.Args, 1) 236 | arg := node.Args[0] 237 | 238 | require.Equal(t, test.expectedArgValue, arg.Value) 239 | require.Equal(t, TypeAnnotation(""), arg.TypeAnnotation) 240 | }) 241 | } 242 | } 243 | 244 | func TestParserNodeArgsInvalid(t *testing.T) { 245 | // Arrange 246 | tests := []struct { 247 | testname string 248 | body string 249 | }{ 250 | {"integer followed by letter", "NodeName 1a"}, 251 | {"bare identifier", "NodeName nodename"}, 252 | {"unexpected slash", "NodeName /"}, 253 | {"unexpected dot", "NodeName ."}, 254 | {"unterminated string", `NodeName ".`}, 255 | {"invalid termination of raw string 1", `NodeName r".`}, 256 | {"invalid termination of raw string 2", `NodeName r##"."#`}, 257 | } 258 | 259 | for _, test := range tests { 260 | t.Run(test.testname, func(t *testing.T) { 261 | // Act 262 | parser := setup(test.body) 263 | _, err := parser.parse() 264 | 265 | // Assert 266 | require.Error(t, err) 267 | }) 268 | } 269 | } 270 | 271 | func TestParserNodeArgsTypeAnnotationsInvalid(t *testing.T) { 272 | // Arrange 273 | tests := []struct { 274 | testname string 275 | body string 276 | }{ 277 | {"type annotation for invalid literal: null", "NodeName (u8)null"}, 278 | {"type annotation for invalid literal: true", "NodeName (u8)true"}, 279 | {"type annotation for invalid literal: false", "NodeName (u8)false"}, 280 | {"u8 for type string", `NodeName (u8)"value"`}, 281 | {"uncloses paranthesis", `NodeName (string"value"`}, 282 | {"integer for type float", "NodeName (u16)12.456"}, 283 | {"float for type integer", "NodeName (f64)12"}, 284 | {"negative for unsigned integer", "NodeName (u64)-12"}, 285 | {"overflow for u8", "NodeName (u8)1024"}, 286 | } 287 | 288 | for _, test := range tests { 289 | t.Run(test.testname, func(t *testing.T) { 290 | // Act 291 | parser := setup(test.body) 292 | _, err := parser.parse() 293 | 294 | // Assert 295 | require.Error(t, err) 296 | }) 297 | } 298 | } 299 | 300 | func TestParserNodeProp(t *testing.T) { 301 | // Arrange 302 | nodeName := "NodeName" 303 | tests := []struct { 304 | testname string 305 | body string 306 | expectedPropName string 307 | expectedPropValue any 308 | }{ 309 | {"integer value", "NodeName myprop=1", "myprop", int64(1)}, 310 | {"float value", "NodeName myprop=1.234", "myprop", 1.234}, 311 | {"string value", "NodeName myprop=\"Hello, World!\"", "myprop", "Hello, World!"}, 312 | {"string value - quoted name", "NodeName \"hehe prop\"=\"Hello, World!\"", "hehe prop", "Hello, World!"}, 313 | {"null value", "NodeName myprop=null", "myprop", nil}, 314 | {"bool: true", "NodeName myprop=true", "myprop", true}, 315 | {"bool: false", "NodeName myprop=false", "myprop", false}, 316 | } 317 | 318 | for _, test := range tests { 319 | t.Run(test.testname, func(t *testing.T) { 320 | parser := setup(test.body) 321 | // Act 322 | doc, err := parser.parse() 323 | 324 | // Assert 325 | require.NoError(t, err) 326 | 327 | nodes := doc.Nodes() 328 | require.Len(t, nodes, 1) 329 | 330 | node := nodes[0] 331 | require.Equal(t, nodeName, node.Name) 332 | 333 | props := node.Props 334 | require.Len(t, props, 1) 335 | prop := props[0] 336 | 337 | require.Equal(t, test.expectedPropName, prop.Name) 338 | require.Equal(t, test.expectedPropValue, prop.Value) 339 | }) 340 | } 341 | } 342 | 343 | func TestParserNodePropInvalid(t *testing.T) { 344 | // Arrange 345 | tests := []struct { 346 | testname string 347 | body string 348 | }{ 349 | {"missing value", "NodeName myprop= "}, 350 | {"identifier value", "NodeName myprop=identifier"}, 351 | {"unterminated string", `NodeName myprop="opened`}, 352 | {"parenthesis", `NodeName myprop=()`}, 353 | {"misc1", `NodeName myprop=123a`}, 354 | {"misc2", `NodeName myprop=1.23--`}, 355 | } 356 | 357 | for _, test := range tests { 358 | t.Run(test.testname, func(t *testing.T) { 359 | // Act 360 | parser := setup(test.body) 361 | _, err := parser.parse() 362 | 363 | // Assert 364 | require.Error(t, err) 365 | }) 366 | } 367 | } 368 | 369 | func TestParserNodePropTypeAnnotation(t *testing.T) { 370 | // Arrange 371 | nodeName := "NodeName" 372 | propName := "myprop" 373 | tests := []struct { 374 | testname string 375 | body string 376 | expectedValue any 377 | expectedTypeAnnot TypeAnnotation 378 | expectedValueTypeAnnot TypeAnnotation 379 | }{ 380 | {"integer value - type annotation on arg", "NodeName myprop=(i64)1", int64(1), noTypeAnnot, I64}, 381 | {"integer value - type annotation on prop", "NodeName (author)myprop=1", int64(1), TypeAnnotation("author"), noTypeAnnot}, 382 | {"integer value - type annotation on prop and arg", "NodeName (author)myprop=(i64)1", int64(1), TypeAnnotation("author"), I64}, 383 | } 384 | 385 | for _, test := range tests { 386 | t.Run(test.testname, func(t *testing.T) { 387 | parser := setup(test.body) 388 | // Act 389 | doc, err := parser.parse() 390 | 391 | // Assert 392 | require.NoError(t, err) 393 | 394 | nodes := doc.Nodes() 395 | require.Len(t, nodes, 1) 396 | 397 | node := nodes[0] 398 | require.Equal(t, nodeName, node.Name) 399 | 400 | props := node.Props 401 | require.Len(t, props, 1) 402 | prop := props[0] 403 | 404 | require.Equal(t, propName, prop.Name) 405 | require.Equal(t, test.expectedValue, prop.Value) 406 | require.Equal(t, test.expectedTypeAnnot, prop.TypeAnnot) 407 | require.Equal(t, test.expectedValueTypeAnnot, prop.ValueTypeAnnot) 408 | }) 409 | } 410 | } 411 | 412 | func TestParserNodeTypeAnnotation(t *testing.T) { 413 | // Arrange 414 | nodeName := "NodeName" 415 | tests := []struct { 416 | testname string 417 | body string 418 | expectedTypeAnnot TypeAnnotation 419 | err bool 420 | }{ 421 | {"ok - string annotation", "(string) NodeName", TypeAnnotation("string"), false}, 422 | {"ok - arbitrary annotation", "(user)NodeName", TypeAnnotation("user"), false}, 423 | {"ok - no annotation", "NodeName", noTypeAnnot, false}, 424 | {"error - empty annotation", "() NodeName", noTypeAnnot, true}, 425 | {"error - unclosed annotation", "( NodeName", noTypeAnnot, true}, 426 | {"error - unexpected right par", ") NodeName", noTypeAnnot, true}, 427 | } 428 | 429 | for _, test := range tests { 430 | t.Run(test.testname, func(t *testing.T) { 431 | parser := setup(test.body) 432 | // Act 433 | doc, err := parser.parse() 434 | 435 | // Assert 436 | if test.err { 437 | require.Error(t, err) 438 | } else { 439 | require.NoError(t, err) 440 | nodes := doc.Nodes() 441 | require.Len(t, nodes, 1) 442 | 443 | node := nodes[0] 444 | require.Equal(t, nodeName, node.Name) 445 | 446 | require.Empty(t, node.Props) 447 | require.Empty(t, node.Args) 448 | 449 | require.Equal(t, test.expectedTypeAnnot, node.TypeAnnotation) 450 | } 451 | }) 452 | } 453 | } 454 | 455 | func TestParserNodeChildren(t *testing.T) { 456 | tests := []struct { 457 | testname string 458 | body string 459 | expectedNodes int 460 | }{ 461 | {"single line #1", "Parent { child1 }", 2}, 462 | {"single line #2", "Parent { child1; child2 }", 3}, 463 | {"single line #3", "Parent { child1; child2; }", 3}, 464 | {"single line #4", "Parent { child1; /-child2; }", 2}, 465 | {"single line #5", "Parent { /*child1*/ child2; }", 2}, 466 | { 467 | "nested #1", `Parent { 468 | child1; child2 469 | }`, 470 | 3, 471 | }, 472 | { 473 | "nested #2", `Parent { 474 | child1; 475 | child-? 476 | }`, 477 | 3, 478 | }, 479 | { 480 | "nested #3", `Parent { 481 | child1 {} 482 | child-? 483 | }`, 484 | 3, 485 | }, 486 | { 487 | "nested #4", `Parent { 488 | child1 { child1-A } 489 | child-? }`, 490 | 4, 491 | }, 492 | { 493 | "nested #5", `Parent { 494 | child1 { child1-A } 495 | child-? 496 | 497 | deep-1 { 498 | deep-1-2 { 499 | /-deep-1-2-3-a 500 | deep-1-2-3-b 501 | deep-1-2-3-c 502 | } 503 | } 504 | }`, 505 | 8, 506 | }, 507 | } 508 | 509 | for _, test := range tests { 510 | t.Run(test.testname, func(t *testing.T) { 511 | doc := setupAndParse(t, test.body) 512 | actual := totalChildren(doc) 513 | require.Equal(t, test.expectedNodes, actual) 514 | }) 515 | } 516 | 517 | doc := setupAndParse(t, `Parent { child-1; child2; child-3 }`) 518 | children := doc.nodes[0].Children 519 | require.Len(t, children, 3) 520 | } 521 | 522 | func TestParserNodeChildrenSingle(t *testing.T) { 523 | doc := setupAndParse(t, `Parent { 524 | child 525 | }`) 526 | children := doc.nodes[0].Children 527 | require.Len(t, children, 1) 528 | require.Equal(t, "child", children[0].Name) 529 | } 530 | 531 | func TestParserNodeChildrenMultiple(t *testing.T) { 532 | doc := setupAndParse(t, `Parent { 533 | child-1; child2; 534 | child-3 535 | }`) 536 | children := doc.nodes[0].Children 537 | require.Len(t, children, 3) 538 | } 539 | 540 | func TestParserNodeChildrenMultipleSameRow(t *testing.T) { 541 | doc := setupAndParse(t, `Parent { child-1; child2; child-3 }`) 542 | children := doc.nodes[0].Children 543 | require.Len(t, children, 3) 544 | } 545 | 546 | func TestParserStringsEscaped(t *testing.T) { 547 | // Arrange 548 | filename := "testdata/escaped.kdl" 549 | bs, err := os.ReadFile(filename) 550 | require.NoError(t, err) 551 | parser := newParser(bytes.NewReader(bs)) 552 | 553 | // Act 554 | doc, err := parser.parse() 555 | 556 | //Assert 557 | require.NoError(t, err) 558 | nodes := doc.Nodes() 559 | require.Equal(t, "\t", nodes[0].Args[0].Value) 560 | require.Equal(t, "\u00CA", nodes[1].Args[0].Value) 561 | require.Equal(t, "Ê", nodes[1].Args[0].Value) 562 | require.Equal(t, `"`, nodes[2].Args[0].Value) 563 | } 564 | 565 | func setup(doc string) *parser { 566 | r := strings.NewReader(doc) 567 | return newParser(r) 568 | } 569 | 570 | func setupAndParse(t *testing.T, doc string) Doc { 571 | p := setup(doc) 572 | d, err := p.parse() 573 | if err != nil { 574 | t.Fatalf("expected no error but was: %s", err) 575 | } 576 | return d 577 | } 578 | 579 | func recNodeChildrenCount(node Node) int { 580 | if len(node.Children) == 0 { 581 | return 1 582 | } 583 | 584 | total := 1 585 | for _, ch := range node.Children { 586 | total += recNodeChildrenCount(ch) 587 | } 588 | return total 589 | } 590 | 591 | func totalChildren(doc Doc) int { 592 | total := 0 593 | for _, n := range doc.nodes { 594 | total += recNodeChildrenCount(n) 595 | } 596 | return total 597 | } 598 | --------------------------------------------------------------------------------