├── testdata
    └── escaped.kdl
├── doc.go
├── justfile
├── go.mod
├── node.go
├── prop.go
├── kdl.go
├── go.sum
├── .github
    └── workflows
    │   └── check.yml
├── kdl_test.go
├── arg.go
├── LICENSE
├── README.md
├── internal
    ├── token.go
    ├── scanner_test.go
    └── scanner.go
├── type.go
├── parser.go
└── parser_test.go


/testdata/escaped.kdl:
--------------------------------------------------------------------------------
1 | node1 "	"
2 | node2 "Ê"
3 | node3 "\""
4 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | package gokdl
 2 | 
 3 | type Doc struct {
 4 | 	nodes []Node
 5 | }
 6 | 
 7 | func (d Doc) Nodes() []Node {
 8 | 	return d.nodes
 9 | }
10 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | check: fmt test lint
 2 | 
 3 | build:
 4 | 	go build ./...
 5 | 
 6 | fmt:
 7 | 	go fmt ./...
 8 | 
 9 | test pattern=".*":
10 | 	go test ./... -run={{ pattern }}
11 | 
12 | lint:
13 | 	go run honnef.co/go/tools/cmd/staticcheck@latest ./...
14 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/lunjon/gokdl
 2 | 
 3 | go 1.21
 4 | 
 5 | require (
 6 | 	github.com/davecgh/go-spew v1.1.1 // indirect
 7 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
 8 | 	github.com/stretchr/testify v1.8.4
 9 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
10 | )
11 | 


--------------------------------------------------------------------------------
/node.go:
--------------------------------------------------------------------------------
 1 | package gokdl
 2 | 
 3 | type Node struct {
 4 | 	// Name of the node.
 5 | 	Name string
 6 | 	// Children of the node. If the node doesn't
 7 | 	// have children it is an empty list.
 8 | 	Children []Node
 9 | 	// Properties of the node.
10 | 	Props []Prop
11 | 	// Arguments of the node.
12 | 	Args []Arg
13 | 	// Type annotation on the node.
14 | 	// It has the zero value if no type annotation
15 | 	// exists for this node.
16 | 	TypeAnnotation TypeAnnotation
17 | }
18 | 


--------------------------------------------------------------------------------
/prop.go:
--------------------------------------------------------------------------------
 1 | package gokdl
 2 | 
 3 | import "fmt"
 4 | 
 5 | type Prop struct {
 6 | 	Name  string
 7 | 	Value any
 8 | 	// ValueTypeAnnot is the type annotation for the value of the property.
 9 | 	// Example: age=(u8)25
10 | 	// In this case it would be "u8".
11 | 	ValueTypeAnnot TypeAnnotation
12 | 	// TypeAnnot is the type annotation for the property itself.
13 | 	// Example: (author)name="Jonathan"
14 | 	TypeAnnot TypeAnnotation
15 | }
16 | 
17 | func (p Prop) String() string {
18 | 	return fmt.Sprintf("%s=%v", p.Name, p.Value)
19 | }
20 | 


--------------------------------------------------------------------------------
/kdl.go:
--------------------------------------------------------------------------------
 1 | package gokdl
 2 | 
 3 | import (
 4 | 	"io"
 5 | )
 6 | 
 7 | // Parse the bytes into a KDL Document,
 8 | // returning an error if anything was invalid.
 9 | //
10 | // The bytes must be valid unicode.
11 | func Parse(r io.Reader) (Doc, error) {
12 | 	parser := newParser(r)
13 | 	return parser.parse()
14 | }
15 | 
16 | // ValueType is the type name of the different
17 | // primitive KDL types.
18 | type ValueType string
19 | 
20 | const (
21 | 	TypeString ValueType = "string"
22 | 	TypeInt    ValueType = "int"
23 | 	TypeFloat  ValueType = "float"
24 | 	TypeBool   ValueType = "boolean"
25 | 	TypeNull   ValueType = "null"
26 | )
27 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 5 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 6 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 7 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 8 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 9 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
10 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | name: Check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         go-version: [ '1.20', '1.21.x' ]
15 |     steps:
16 |       - uses: actions/checkout@v3
17 | 
18 |       - name: Set up Go
19 |         uses: actions/setup-go@v4
20 |         with:
21 |           go-version: ${{ matrix.version }}
22 | 
23 |       - name: Build
24 |         run: go build -v ./...
25 | 
26 |       - name: Test
27 |         run: go test -v ./...
28 | 
29 |       - name: Check format
30 |         run: |
31 |           go fmt ./...
32 |           git diff --exit-code
33 | 
34 |       - name: Lint
35 |         run: go run honnef.co/go/tools/cmd/staticcheck@latest ./...
36 | 


--------------------------------------------------------------------------------
/kdl_test.go:
--------------------------------------------------------------------------------
 1 | package gokdl_test
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/lunjon/gokdl"
 8 | )
 9 | 
10 | func TestParseExample(t *testing.T) {
11 | 	doc := `
12 | // Line comment
13 | 
14 | /*
15 | multiline
16 | 	comment
17 | */
18 | 
19 | node "arg" prop=1
20 | 
21 | one; two; // Ignore this
22 | 
23 | nesting-testing /*ignore this as well*/ {
24 | 	child-1; child-?;
25 | 
26 | 	child!THREE keyword="string" {
27 | 		nesting-should-work-here-as-well
28 | 	}
29 | }
30 | 
31 | "Arbitrary name in quotes!"
32 | 
33 | integer-arg -1234
34 | science-arg-a 1.78e12
35 | science-arg-b 1.78e-3
36 | science-arg-c 1.7883274
37 | 
38 | // Node on multiple lines
39 | hello \
40 | 	1 2 3 \
41 | 	myProp="wow"
42 | `
43 | 
44 | 	r := strings.NewReader(doc)
45 | 	_, err := gokdl.Parse(r)
46 | 	if err != nil {
47 | 		t.Fatalf("expected no error but was: %s", err)
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/arg.go:
--------------------------------------------------------------------------------
 1 | package gokdl
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | type Arg struct {
 8 | 	// Value of the argument.
 9 | 	// It is `nil` for the KDL `null` value.
10 | 	Value any
11 | 	// Type annotation on the argument.
12 | 	// It has the zero value if no type annotation
13 | 	// exists for this argument.
14 | 	TypeAnnotation TypeAnnotation
15 | }
16 | 
17 | func (a Arg) String() string {
18 | 	return fmt.Sprint(a.Value)
19 | }
20 | 
21 | func newArg(value any, ta TypeAnnotation) Arg {
22 | 	return Arg{
23 | 		Value:          value,
24 | 		TypeAnnotation: ta,
25 | 	}
26 | }
27 | 
28 | func newIntArg(value, typeAnnot string) (Arg, error) {
29 | 	val, err := parseIntValue(value, typeAnnot)
30 | 	return Arg{
31 | 		Value:          val,
32 | 		TypeAnnotation: TypeAnnotation(typeAnnot),
33 | 	}, err
34 | }
35 | 
36 | func newFloatArg(value, typeAnnot string) (Arg, error) {
37 | 	val, err := parseFloatValue(value, typeAnnot)
38 | 	return Arg{
39 | 		Value:          val,
40 | 		TypeAnnotation: TypeAnnotation(typeAnnot),
41 | 	}, err
42 | }
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jonathan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | > [!NOTE]
 2 | > Due to lack of time I can no longer maintain this project.
 3 | > I had a great time implementing it but nowadays the family takes
 4 | > most of my spare time. Checkout [kdl-go](https://github.com/sblinch/kdl-go) instead!
 5 | 
 6 | # GoKDL
 7 | 
 8 | A parser implementation for the [KDL](https://kdl.dev/) document language in Go.
 9 | 
10 | ## Example
11 | 
12 | The following code shows a minimal example of parsing a KDL document:
13 | 
14 | ```go
15 | package main
16 | 
17 | import (
18 |     "log"
19 |     "strings"
20 |     "github.com/lunjon/gokdl"
21 | )
22 | 
23 | func main() {
24 |     kdl := `
25 | MyNode "string arg" myint=1234 awesome=true {
26 |   child-node 
27 | }      
28 | 
29 | // A node with arbitrary name (in quotes)
30 | "Other node with much cooler name!" { Okay; }
31 | `
32 | 
33 |     r := strings.NewReader(kdl)
34 |     doc, err := gokdl.Parse(r)
35 |     if err != nil {
36 |         log.Fatal(err)
37 |     }
38 | 
39 |     // Do something with doc ...
40 | }
41 | ```
42 | 
43 | ## API
44 | 
45 | Although the module can be used, and the API is still very rough,
46 | I'm grateful for any feedback and suggestion regarding the API!
47 | 


--------------------------------------------------------------------------------
/internal/token.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"unicode"
  5 | )
  6 | 
  7 | var EOF_RUNE = rune(0)
  8 | 
  9 | type Token int
 10 | 
 11 | const (
 12 | 	EOF Token = iota
 13 | 	WS
 14 | 	INVALID
 15 | 
 16 | 	// Literals
 17 | 	IDENT
 18 | 	NUM_INT   // Integer
 19 | 	NUM_FLOAT // Float
 20 | 	NUM_SCI   // Scientific notation
 21 | 	BOOL      // true | false
 22 | 
 23 | 	// Special characters
 24 | 	SEMICOLON         // ;
 25 | 	CBRACK_OPEN       // {
 26 | 	CBRACK_CLOSE      // }
 27 | 	QUOTE             // "
 28 | 	EQUAL             // =
 29 | 	HYPHEN            // -
 30 | 	COMMENT_LINE      // //
 31 | 	COMMENT_MUL_OPEN  // /*
 32 | 	COMMENT_MUL_CLOSE // */
 33 | 	COMMENT_SD        // /- (slash-dash)
 34 | 	BACKSLASH         // \
 35 | 	FORWSLASH         // /
 36 | 	PAREN_OPEN        // (
 37 | 	PAREN_CLOSE       // )
 38 | 	GREAT             // >
 39 | 	LESS              // <
 40 | 	SBRACK_OPEN       // [
 41 | 	SBRACK_CLOSE      // ]
 42 | 	COMMA             // ,
 43 | 	RAWSTR_OPEN       // r"
 44 | 	RAWSTR_HASH_OPEN  // r#[...]"
 45 | 	RAWSTR_HASH_CLOSE // "#[...]
 46 | 
 47 | 	// Other characters
 48 | 	CHAR  // Single character
 49 | 	CHARS // Stream of characters
 50 | )
 51 | 
 52 | func IsInitialIdentToken(t Token) bool {
 53 | 	return t == CHAR || t == QUOTE || t == HYPHEN
 54 | }
 55 | 
 56 | func IsIdentifierToken(t Token) bool {
 57 | 	switch t {
 58 | 	case NUM_INT, CHAR:
 59 | 		return true
 60 | 	default:
 61 | 		return false
 62 | 	}
 63 | }
 64 | 
 65 | func IsIdentifier(r rune) bool {
 66 | 	return !nonIdents[r] && !unicode.IsSpace(r)
 67 | }
 68 | 
 69 | func IsAnyOf(t Token, ts ...Token) bool {
 70 | 	for _, ot := range ts {
 71 | 		if t == ot {
 72 | 			return true
 73 | 		}
 74 | 	}
 75 | 	return false
 76 | }
 77 | 
 78 | func ContainsNonIdent(s string) bool {
 79 | 	for _, ch := range s {
 80 | 		if nonIdents[ch] {
 81 | 			return true
 82 | 		}
 83 | 	}
 84 | 	return false
 85 | }
 86 | 
 87 | func init() {
 88 | 	nonIdents = map[rune]bool{}
 89 | 	for _, r := range `\/(){}<>;[]=,` {
 90 | 		nonIdents[r] = true
 91 | 	}
 92 | 
 93 | 	hexRunes = map[rune]bool{}
 94 | 	for _, r := range "0123456789abcdefABCDEF" {
 95 | 		hexRunes[r] = true
 96 | 	}
 97 | }
 98 | 
 99 | var (
100 | 	// Runes that are not valid in identifiers
101 | 	nonIdents map[rune]bool
102 | 	hexRunes  map[rune]bool
103 | )
104 | 


--------------------------------------------------------------------------------
/type.go:
--------------------------------------------------------------------------------
  1 | package gokdl
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | )
  7 | 
  8 | type TypeAnnotation string
  9 | 
 10 | func (t TypeAnnotation) String() string {
 11 | 	return string(t)
 12 | }
 13 | 
 14 | const (
 15 | 	noTypeAnnot                = ""
 16 | 	I8          TypeAnnotation = "i8"
 17 | 	I16         TypeAnnotation = "i16"
 18 | 	I32         TypeAnnotation = "i32"
 19 | 	I64         TypeAnnotation = "i64"
 20 | 	U8          TypeAnnotation = "u8"
 21 | 	U16         TypeAnnotation = "u16"
 22 | 	U32         TypeAnnotation = "u32"
 23 | 	U64         TypeAnnotation = "u64"
 24 | 	F32         TypeAnnotation = "f32"
 25 | 	F64         TypeAnnotation = "f64"
 26 | )
 27 | 
 28 | var (
 29 | 	numberTypeAnnotation = map[string]TypeAnnotation{
 30 | 		I8.String():  I8,
 31 | 		I16.String(): I16,
 32 | 		I32.String(): I32,
 33 | 		I64.String(): I64,
 34 | 		U8.String():  U8,
 35 | 		U16.String(): U16,
 36 | 		U32.String(): U32,
 37 | 		U64.String(): U64,
 38 | 		F32.String(): F32,
 39 | 		F64.String(): F64,
 40 | 	}
 41 | )
 42 | 
 43 | func init() {
 44 | 	nums := []TypeAnnotation{
 45 | 		I8,
 46 | 		I16,
 47 | 		I32,
 48 | 		I64,
 49 | 		U8,
 50 | 		U16,
 51 | 		U32,
 52 | 		U64,
 53 | 		F32,
 54 | 		F64,
 55 | 	}
 56 | 
 57 | 	for _, n := range nums {
 58 | 		numberTypeAnnotation[n.String()] = n
 59 | 	}
 60 | }
 61 | 
 62 | func parseStringValue(value, typeAnnot string) (string, error) {
 63 | 	if _, isNum := numberTypeAnnotation[typeAnnot]; isNum {
 64 | 		return "", fmt.Errorf("invalid type annotation for type string: %s", typeAnnot)
 65 | 	}
 66 | 	return value, nil
 67 | }
 68 | 
 69 | func parseIntValue(value, typeAnnot string) (any, error) {
 70 | 	var bitsize int
 71 | 	var unsigned bool
 72 | 
 73 | 	switch TypeAnnotation(typeAnnot) {
 74 | 	case noTypeAnnot:
 75 | 		bitsize = 64
 76 | 	case I8:
 77 | 		bitsize = 8
 78 | 	case I16:
 79 | 		bitsize = 16
 80 | 	case I32:
 81 | 		bitsize = 32
 82 | 	case I64:
 83 | 		bitsize = 64
 84 | 	case U8:
 85 | 		bitsize = 8
 86 | 		unsigned = true
 87 | 	case U16:
 88 | 		bitsize = 16
 89 | 		unsigned = true
 90 | 	case U32:
 91 | 		bitsize = 32
 92 | 		unsigned = true
 93 | 	case U64:
 94 | 		bitsize = 64
 95 | 		unsigned = true
 96 | 	default:
 97 | 		return value, fmt.Errorf("invalid type annotation for integer: %s", typeAnnot)
 98 | 	}
 99 | 
100 | 	if unsigned {
101 | 		return strconv.ParseUint(value, 10, bitsize)
102 | 	} else {
103 | 		return strconv.ParseInt(value, 10, bitsize)
104 | 	}
105 | }
106 | 
107 | func parseFloatValue(value, typeAnnot string) (any, error) {
108 | 	var bitsize int
109 | 
110 | 	switch TypeAnnotation(typeAnnot) {
111 | 	case F32:
112 | 		bitsize = 32
113 | 	case noTypeAnnot, F64:
114 | 		bitsize = 64
115 | 	default:
116 | 		return value, fmt.Errorf("invalid type annotation for integer: %s", typeAnnot)
117 | 	}
118 | 
119 | 	return strconv.ParseFloat(value, bitsize)
120 | }
121 | 


--------------------------------------------------------------------------------
/internal/scanner_test.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func TestScannerScanWhitespace(t *testing.T) {
11 | 	tests := []struct {
12 | 		name string
13 | 		str  string
14 | 	}{
15 | 		{"empty", " "},
16 | 		{"newline", "\n"},
17 | 		{"multi newline", " \n\n"},
18 | 	}
19 | 
20 | 	for _, test := range tests {
21 | 		sc := setup(test.str)
22 | 		t.Run(test.name, func(t *testing.T) {
23 | 			token, _ := sc.Scan()
24 | 			require.Equal(t, WS, token)
25 | 		})
26 | 	}
27 | }
28 | 
29 | func TestScannerScanNumbers(t *testing.T) {
30 | 	tests := []struct {
31 | 		name          string
32 | 		str           string
33 | 		expectedToken Token
34 | 		expectedLit   string
35 | 	}{
36 | 		{"integer - single digit", "1", NUM_INT, "1"},
37 | 		{"integer - multi digit", "12345", NUM_INT, "12345"},
38 | 		{"integer - neg", "-12345", NUM_INT, "-12345"},
39 | 		{"integer - prefix", "+12345", NUM_INT, "12345"},
40 | 		{"integer - underscore", "10_000", NUM_INT, "10000"},
41 | 		{"float - dot", "1.1", NUM_FLOAT, "1.1"},
42 | 		{"float - dot multi", "1.12345", NUM_FLOAT, "1.12345"},
43 | 		{"float - scientific (pos exp)", "1.123e12", NUM_SCI, "1.123e12"},
44 | 		{"float - scientific (neg exp)", "1.123e-9", NUM_SCI, "1.123e-9"},
45 | 		{"float - scientific neg", "-1.123e9", NUM_SCI, "-1.123e9"},
46 | 		{"binary", "0b0101", NUM_INT, "5"},
47 | 		{"binary - underscore", "0b01_01", NUM_INT, "5"},
48 | 		{"octal", "0o010463", NUM_INT, "4403"},
49 | 		{"octal - underscore", "0o0104_63", NUM_INT, "4403"},
50 | 		{"hex", "0xabc123", NUM_INT, "11256099"},
51 | 		{"hex - underscore", "0xabc_123", NUM_INT, "11256099"},
52 | 	}
53 | 
54 | 	for _, test := range tests {
55 | 		sc := setup(test.str)
56 | 		t.Run(test.name, func(t *testing.T) {
57 | 			token, lit := sc.Scan()
58 | 			require.Equal(t, test.expectedToken, token)
59 | 			require.Equal(t, test.expectedLit, lit)
60 | 		})
61 | 	}
62 | }
63 | 
64 | func TestScannerScanRawString(t *testing.T) {
65 | 	tests := []struct {
66 | 		name          string
67 | 		str           string
68 | 		expectedToken Token
69 | 		expectedLit   string
70 | 	}{
71 | 		{"no raw string", "r", CHAR, "r"},
72 | 		{"raw string", `r"`, RAWSTR_OPEN, `r"`},
73 | 		{"raw string hash 1", `r#"`, RAWSTR_HASH_OPEN, `r#"`},
74 | 		{"raw string hash 2", `r##"`, RAWSTR_HASH_OPEN, `r##"`},
75 | 		{"other", `r##`, CHAR, `r##`},
76 | 		{"quote end 1", `"`, QUOTE, `"`},
77 | 		{"quote end 2", `"#`, RAWSTR_HASH_CLOSE, `"#`},
78 | 		{"quote end 3", `"##`, RAWSTR_HASH_CLOSE, `"##`},
79 | 	}
80 | 
81 | 	for _, test := range tests {
82 | 		sc := setup(test.str)
83 | 		t.Run(test.name, func(t *testing.T) {
84 | 			token, lit := sc.Scan()
85 | 			require.Equal(t, test.expectedToken, token)
86 | 			require.Equal(t, test.expectedLit, lit)
87 | 		})
88 | 	}
89 | }
90 | 
91 | func setup(source string) *Scanner {
92 | 	r := strings.NewReader(source)
93 | 	return NewScanner(r)
94 | }
95 | 


--------------------------------------------------------------------------------
/internal/scanner.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"strconv"
  9 | 	"strings"
 10 | 	"unicode"
 11 | )
 12 | 
 13 | type previous struct {
 14 | 	token Token
 15 | 	lit   string
 16 | }
 17 | 
 18 | // Scanner represents a lexical Scanner.
 19 | type Scanner struct {
 20 | 	r   *bufio.Reader
 21 | 	eof bool
 22 | 	// State used in unread.
 23 | 	prev *previous // Set from last when Unread was called
 24 | 	last previous
 25 | }
 26 | 
 27 | func NewScanner(r io.Reader) *Scanner {
 28 | 	return &Scanner{
 29 | 		r: bufio.NewReader(r),
 30 | 	}
 31 | }
 32 | 
 33 | func (s *Scanner) ScanLine() {
 34 | 	if s.eof {
 35 | 		return
 36 | 	}
 37 | 	_, _ = s.r.ReadBytes('\n')
 38 | }
 39 | 
 40 | // scan returns the next token and literal value.
 41 | func (s *Scanner) Scan() (tok Token, lit string) {
 42 | 	if s.eof {
 43 | 		return EOF, ""
 44 | 	}
 45 | 
 46 | 	if s.prev != nil {
 47 | 		token := s.prev.token
 48 | 		lit := s.prev.lit
 49 | 		s.prev = nil
 50 | 		return token, lit
 51 | 	}
 52 | 
 53 | 	ch := s.read()
 54 | 
 55 | 	if unicode.IsSpace(ch) {
 56 | 		s.r.UnreadRune()
 57 | 		return s.ScanWhitespace()
 58 | 	} else if unicode.IsDigit(ch) {
 59 | 		s.r.UnreadRune()
 60 | 		return s.scanNumber(false)
 61 | 	}
 62 | 
 63 | 	var token Token
 64 | 	var str string
 65 | 	switch ch {
 66 | 	case EOF_RUNE:
 67 | 		s.eof = true
 68 | 		token = EOF
 69 | 	case '"':
 70 | 		return s.scanQuote()
 71 | 	case '=':
 72 | 		token = EQUAL
 73 | 		str = string(ch)
 74 | 	case '-':
 75 | 		next := s.read()
 76 | 		s.r.UnreadRune()
 77 | 
 78 | 		if unicode.IsDigit(next) {
 79 | 			s.r.UnreadRune()
 80 | 			return s.scanNumber(true)
 81 | 		}
 82 | 
 83 | 		token = HYPHEN
 84 | 		str = string(ch)
 85 | 	case '+':
 86 | 		next := s.read()
 87 | 		s.r.UnreadRune()
 88 | 
 89 | 		if unicode.IsDigit(next) {
 90 | 			s.r.UnreadRune()
 91 | 			return s.scanNumber(false)
 92 | 		}
 93 | 
 94 | 		token = CHAR
 95 | 		str = string(ch)
 96 | 	case '*':
 97 | 		next := s.read()
 98 | 		if next == '/' {
 99 | 			token = COMMENT_MUL_CLOSE
100 | 			str = "*/"
101 | 		} else {
102 | 			s.r.UnreadRune()
103 | 			token = CHAR
104 | 			str = string(ch)
105 | 		}
106 | 	case '/':
107 | 		next := s.read()
108 | 		switch next {
109 | 		case '/':
110 | 			token = COMMENT_LINE
111 | 			str = "//"
112 | 		case '*':
113 | 			token = COMMENT_MUL_OPEN
114 | 			str = "/*"
115 | 		case '-':
116 | 			token = COMMENT_SD
117 | 			str = "/-"
118 | 		default:
119 | 			s.r.UnreadRune()
120 | 			return CHAR, string(ch)
121 | 		}
122 | 	case ';':
123 | 		token = SEMICOLON
124 | 		str = string(ch)
125 | 	case '{':
126 | 		token = CBRACK_OPEN
127 | 		str = string(ch)
128 | 	case '}':
129 | 		token = CBRACK_CLOSE
130 | 		str = string(ch)
131 | 	case '[':
132 | 		token = SBRACK_OPEN
133 | 		str = string(ch)
134 | 	case ']':
135 | 		token = SBRACK_CLOSE
136 | 		str = string(ch)
137 | 	case '<':
138 | 		token = LESS
139 | 		str = string(ch)
140 | 	case '>':
141 | 		token = GREAT
142 | 		str = string(ch)
143 | 	case ',':
144 | 		token = COMMA
145 | 		str = string(ch)
146 | 	case '(':
147 | 		token = PAREN_OPEN
148 | 		str = string(ch)
149 | 	case ')':
150 | 		token = PAREN_CLOSE
151 | 		str = string(ch)
152 | 	case '\\':
153 | 		token = BACKSLASH
154 | 		str = string(ch)
155 | 	case 'r':
156 | 		return s.scanRawString()
157 | 	default:
158 | 		token = CHAR
159 | 		str = string(ch)
160 | 	}
161 | 
162 | 	return s.setAndReturn(token, str)
163 | }
164 | 
165 | func (s *Scanner) scanRawString() (Token, string) {
166 | 	next := s.read()
167 | 	switch next {
168 | 	case '"':
169 | 		return RAWSTR_OPEN, `r"`
170 | 	case '#':
171 | 		lit := s.ScanWhile(func(r rune) bool {
172 | 			return r == '#'
173 | 		})
174 | 
175 | 		next := s.read()
176 | 		if next != '"' {
177 | 			s.r.UnreadRune()
178 | 			return CHAR, fmt.Sprintf("r#%s", lit)
179 | 		}
180 | 
181 | 		return RAWSTR_HASH_OPEN, fmt.Sprintf(`r#%s"`, lit)
182 | 	default:
183 | 		s.r.UnreadRune()
184 | 		return CHAR, "r"
185 | 	}
186 | }
187 | 
188 | // Handles a single " as well as "##...
189 | func (s *Scanner) scanQuote() (Token, string) {
190 | 	next := s.read()
191 | 	if next != '#' {
192 | 		s.r.UnreadRune()
193 | 		return QUOTE, `"`
194 | 	}
195 | 
196 | 	lit := s.ScanWhile(func(r rune) bool {
197 | 		return r == '#'
198 | 	})
199 | 	return RAWSTR_HASH_CLOSE, `"#` + lit
200 | }
201 | 
202 | func (s *Scanner) ScanWhile(pred func(rune) bool) string {
203 | 	var buf bytes.Buffer
204 | 	if s.prev != nil {
205 | 		buf.WriteString(s.prev.lit)
206 | 		s.prev = nil
207 | 	}
208 | 
209 | 	for {
210 | 		ch := s.read()
211 | 		if ch == EOF_RUNE {
212 | 			break
213 | 		} else if !pred(ch) {
214 | 			s.r.UnreadRune()
215 | 			break
216 | 		} else {
217 | 			buf.WriteRune(ch)
218 | 		}
219 | 	}
220 | 
221 | 	return buf.String()
222 | }
223 | 
224 | // scanNumber tries to scan a number in any of the supported formats.
225 | // Use `neg` to indicate that the number was prefixed with a hyphen.
226 | func (s *Scanner) scanNumber(neg bool) (Token, string) {
227 | 	start := s.ScanWhile(unicode.IsDigit)
228 | 	next := s.read()
229 | 	if neg {
230 | 		start = "-" + start
231 | 	}
232 | 
233 | 	if next == EOF_RUNE {
234 | 		return s.setAndReturn(NUM_INT, start)
235 | 	}
236 | 
237 | 	comp := start + string(next)
238 | 
239 | 	if strings.HasSuffix(comp, ".") {
240 | 		return s.scanFloat(comp)
241 | 	} else if comp == "0x" {
242 | 		return s.scanHex()
243 | 	} else if comp == "0o" {
244 | 		return s.scanOctal()
245 | 	} else if comp == "0b" {
246 | 		return s.scanBinary()
247 | 	}
248 | 
249 | 	if next != '_' {
250 | 		if unicode.IsSpace(next) || !unicode.IsDigit(next) {
251 | 			s.r.UnreadRune()
252 | 			return s.setAndReturn(NUM_INT, start)
253 | 		}
254 | 	}
255 | 
256 | 	// Read as integer
257 | 	s.r.UnreadRune()
258 | 	lit := s.ScanWhile(func(r rune) bool {
259 | 		return unicode.IsDigit(r) || r == '_'
260 | 	})
261 | 
262 | 	return s.setAndReturn(NUM_INT, strings.ReplaceAll(start+lit, "_", ""))
263 | }
264 | 
265 | func (s *Scanner) scanFloat(start string) (Token, string) {
266 | 	// Try scientific notation: 1.234e-42
267 | 	if len(strings.TrimPrefix(start, "-")) == 2 {
268 | 		numsAfterDot := s.ScanWhile(unicode.IsDigit)
269 | 		if numsAfterDot == "" {
270 | 			return s.setAndReturn(CHARS, start)
271 | 		}
272 | 
273 | 		tokenAfterNums, sAfterNums := s.Scan()
274 | 
275 | 		if tokenAfterNums == CHAR && sAfterNums == "e" {
276 | 			next, ch := s.Scan()
277 | 			var exp string
278 | 
279 | 			if ch == "-" {
280 | 				exp = s.ScanWhile(unicode.IsDigit)
281 | 				exp = "-" + exp
282 | 			} else if next == NUM_INT {
283 | 				exp = ch
284 | 			} else {
285 | 				return CHARS, start + numsAfterDot + sAfterNums + ch
286 | 			}
287 | 
288 | 			num := fmt.Sprintf("%s%se%s", start, numsAfterDot, exp)
289 | 			return s.setAndReturn(NUM_SCI, num)
290 | 		} else if tokenAfterNums == NUM_INT {
291 | 			num := start + numsAfterDot + sAfterNums
292 | 			return s.setAndReturn(NUM_FLOAT, num)
293 | 		} else if tokenAfterNums == WS || tokenAfterNums == EOF {
294 | 			s.Unread()
295 | 			return s.setAndReturn(NUM_FLOAT, start+numsAfterDot)
296 | 		}
297 | 
298 | 	}
299 | 
300 | 	numsAfterDot := s.ScanWhile(unicode.IsDigit)
301 | 	if numsAfterDot == "" {
302 | 		return s.setAndReturn(CHARS, start)
303 | 	}
304 | 
305 | 	return s.setAndReturn(NUM_FLOAT, start+numsAfterDot)
306 | }
307 | 
308 | func (s *Scanner) scanBinary() (Token, string) {
309 | 	// Read binary
310 | 	lit := s.ScanWhile(func(r rune) bool {
311 | 		return r == '0' || r == '1' || r == '_'
312 | 	})
313 | 	lit = strings.ReplaceAll(lit, "_", "")
314 | 
315 | 	n, err := strconv.ParseInt(lit, 2, 64)
316 | 	if err != nil {
317 | 		return s.setAndReturn(CHARS, "0b"+lit)
318 | 	}
319 | 
320 | 	return s.setAndReturn(NUM_INT, fmt.Sprint(n))
321 | }
322 | 
323 | func (s *Scanner) scanOctal() (Token, string) {
324 | 	// Read binary
325 | 	lit := s.ScanWhile(func(r rune) bool {
326 | 		return ('0' <= r && r <= '7') || r == '_'
327 | 	})
328 | 	lit = strings.ReplaceAll(lit, "_", "")
329 | 
330 | 	n, err := strconv.ParseInt(lit, 8, 64)
331 | 	if err != nil {
332 | 		return s.setAndReturn(CHARS, "0o"+lit)
333 | 	}
334 | 
335 | 	return s.setAndReturn(NUM_INT, fmt.Sprint(n))
336 | }
337 | 
338 | func (s *Scanner) scanHex() (Token, string) {
339 | 	// Read hexadecimal: 0xdeadbeef
340 | 	lit := s.ScanWhile(func(r rune) bool {
341 | 		return hexRunes[r] || r == '_'
342 | 	})
343 | 	lit = strings.ReplaceAll(lit, "_", "")
344 | 
345 | 	n, err := strconv.ParseInt(lit, 16, 64)
346 | 	if err != nil {
347 | 		return s.setAndReturn(CHARS, "0x"+lit)
348 | 	}
349 | 
350 | 	return s.setAndReturn(NUM_INT, fmt.Sprint(n))
351 | }
352 | 
353 | // Scan while whitespace only.
354 | func (s *Scanner) ScanWhitespace() (Token, string) {
355 | 	lit := s.ScanWhile(unicode.IsSpace)
356 | 	return s.setAndReturn(WS, lit)
357 | }
358 | 
359 | // scanLetters consumes the current rune and all contiguous ident runes.
360 | func (s *Scanner) ScanLetters() (Token, string) {
361 | 	pred := func(r rune) bool {
362 | 		return unicode.IsLetter(r) || r == '_'
363 | 	}
364 | 
365 | 	lit := s.ScanWhile(pred)
366 | 	return s.setAndReturn(IDENT, lit)
367 | }
368 | 
369 | func (s *Scanner) ScanBareIdent() string {
370 | 	lit := s.ScanWhile(IsIdentifier)
371 | 	s.setAndReturn(IDENT, lit)
372 | 	return lit
373 | }
374 | 
375 | // Read the next rune from the reader.
376 | // Returns `eof` if an error occurs (or io.EOF is returned).
377 | func (s *Scanner) read() rune {
378 | 	r, _, err := s.r.ReadRune()
379 | 	if err != nil {
380 | 		s.eof = true
381 | 		return EOF_RUNE
382 | 	}
383 | 	return r
384 | }
385 | 
386 | func (s *Scanner) setAndReturn(t Token, lit string) (Token, string) {
387 | 	s.last = previous{token: t, lit: lit}
388 | 	return t, lit
389 | }
390 | 
391 | func (s *Scanner) Unread() {
392 | 	s.prev = &s.last
393 | }
394 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
  1 | package gokdl
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 	"unicode"
  9 | 
 10 | 	pkg "github.com/lunjon/gokdl/internal"
 11 | )
 12 | 
 13 | var newlinesToQuoted = map[string]string{
 14 | 	"\n":     "\\n",     // newline
 15 | 	"\r":     "\\r",     // carriage return
 16 | 	"\r\n":   "\\r\\n",  // carriage return newline
 17 | 	"\f":     "\\f",     // form feed
 18 | 	"\u0085": "\\u0085", // next line
 19 | 	"\u2028": "\\u2028", // line separator
 20 | 	"\u2029": "\\u2029", // paragraph separator
 21 | }
 22 | 
 23 | func isNewline(lit string) bool {
 24 | 	for nl := range newlinesToQuoted {
 25 | 		if strings.Contains(lit, nl) {
 26 | 			return true
 27 | 		}
 28 | 	}
 29 | 	return false
 30 | }
 31 | 
 32 | type parseContext struct{}
 33 | 
 34 | // The type responsible for parsing the documents.
 35 | // The parser relies on the Scanner (internal) for
 36 | // parsing.
 37 | //
 38 | // Specification: https://github.com/kdl-org/kdl/blob/main/SPEC.md
 39 | type parser struct {
 40 | 	sc *pkg.Scanner
 41 | }
 42 | 
 43 | func newParser(src io.Reader) *parser {
 44 | 	return &parser{
 45 | 		sc: pkg.NewScanner(src),
 46 | 	}
 47 | }
 48 | 
 49 | func (p *parser) parse() (Doc, error) {
 50 | 	cx := &parseContext{}
 51 | 	nodes, err := parseScope(cx, p.sc, false)
 52 | 
 53 | 	return Doc{
 54 | 		nodes: nodes,
 55 | 	}, err
 56 | }
 57 | 
 58 | // Parses a root or child scope (inside a node).
 59 | func parseScope(cx *parseContext, sc *pkg.Scanner, isChild bool) ([]Node, error) {
 60 | 	nodes := []Node{} // The nodes accumulated in this scope
 61 | 	done := false     // When true, parsing of the scope (root or children) is done
 62 | 
 63 | 	var typeAnnot string
 64 | 
 65 | 	appendNode := func(n Node) {
 66 | 		if typeAnnot != "" {
 67 | 			n.TypeAnnotation = TypeAnnotation(typeAnnot)
 68 | 			typeAnnot = ""
 69 | 		}
 70 | 		nodes = append(nodes, n)
 71 | 	}
 72 | 
 73 | 	for !done {
 74 | 		token, lit := sc.Scan()
 75 | 		if token == pkg.EOF {
 76 | 			break
 77 | 		}
 78 | 
 79 | 		switch token {
 80 | 		case pkg.WS:
 81 | 			continue
 82 | 		case pkg.SEMICOLON:
 83 | 			continue
 84 | 		case pkg.CBRACK_CLOSE:
 85 | 			if isChild {
 86 | 				done = true
 87 | 			} else {
 88 | 				return nil, fmt.Errorf("unexpected token: %s", lit)
 89 | 			}
 90 | 		case pkg.COMMENT_LINE:
 91 | 			sc.ScanLine()
 92 | 		case pkg.COMMENT_MUL_OPEN:
 93 | 			if err := scanMultilineComment(cx, sc); err != nil {
 94 | 				return nil, err
 95 | 			}
 96 | 		case pkg.COMMENT_SD:
 97 | 			// Parse the following content as node and ignore the result
 98 | 			nextToken, _ := sc.Scan()
 99 | 			if pkg.IsInitialIdentToken(nextToken) {
100 | 				text := sc.ScanBareIdent()
101 | 				if _, err := scanNode(cx, sc, text); err != nil {
102 | 					return nil, fmt.Errorf("expected a node after slash-dash comment: %s", err)
103 | 				}
104 | 			} else {
105 | 				return nil, fmt.Errorf("expected a node after slash-dash comment")
106 | 			}
107 | 		case pkg.PAREN_OPEN:
108 | 			annot, err := scanTypeAnnotation(cx, sc)
109 | 			if err != nil {
110 | 				return nil, err
111 | 			}
112 | 			typeAnnot = annot
113 | 		case pkg.QUOTE, pkg.RAWSTR_OPEN, pkg.RAWSTR_HASH_OPEN, pkg.RAWSTR_HASH_CLOSE:
114 | 			// Identifier in quotes => parse as string
115 | 
116 | 			var err error
117 | 			var str string
118 | 			switch token {
119 | 			case pkg.QUOTE:
120 | 				str, err = scanString(cx, sc, "")
121 | 			case pkg.RAWSTR_HASH_CLOSE:
122 | 				str, err = scanString(cx, sc, "")
123 | 				str = lit[1:] + str
124 | 			case pkg.RAWSTR_OPEN:
125 | 				str, err = scanRawString(cx, sc, "")
126 | 			case pkg.RAWSTR_HASH_OPEN:
127 | 				str, err = scanRawStringHash(cx, sc, lit, "")
128 | 			}
129 | 
130 | 			if err != nil {
131 | 				return nil, err
132 | 			}
133 | 
134 | 			node, err := scanNode(cx, sc, str)
135 | 			if err != nil {
136 | 				return nil, err
137 | 			}
138 | 			appendNode(node)
139 | 		default:
140 | 			if pkg.IsInitialIdentToken(token) {
141 | 				text := sc.ScanBareIdent()
142 | 				node, err := scanNode(cx, sc, lit+text)
143 | 				if err != nil {
144 | 					return nil, err
145 | 				}
146 | 				appendNode(node)
147 | 			} else {
148 | 				return nil, fmt.Errorf("unexpected token: %s", lit)
149 | 			}
150 | 		}
151 | 	}
152 | 
153 | 	return nodes, nil
154 | }
155 | 
156 | func scanMultilineComment(cx *parseContext, sc *pkg.Scanner) error {
157 | 	for {
158 | 		token, _ := sc.Scan()
159 | 		if token == pkg.EOF {
160 | 			break
161 | 		}
162 | 
163 | 		if token == pkg.COMMENT_MUL_CLOSE {
164 | 			return nil
165 | 		}
166 | 	}
167 | 
168 | 	return fmt.Errorf("no closing of multiline comment")
169 | }
170 | 
171 | func scanNode(cx *parseContext, sc *pkg.Scanner, name string) (Node, error) {
172 | 	// This function gets called immediately after an
173 | 	// idenfitier was read. So just check that the following
174 | 	// token is valid.
175 | 	next, nextlit := sc.Scan()
176 | 	if !pkg.IsAnyOf(next, pkg.EOF, pkg.WS, pkg.SEMICOLON, pkg.CBRACK_CLOSE) {
177 | 		return Node{}, fmt.Errorf("unexpected token in identifier: %s", nextlit)
178 | 	}
179 | 
180 | 	sc.Unread()
181 | 
182 | 	children := []Node{}
183 | 	args := []Arg{}
184 | 	props := []Prop{}
185 | 
186 | 	done := false
187 | 	skip := false // Used with slash-dash comments
188 | 
189 | 	typeAnnotation := ""
190 | 	for !done {
191 | 		token, lit := sc.Scan()
192 | 		if token == pkg.EOF {
193 | 			break
194 | 		}
195 | 
196 | 		if typeAnnotation != "" && pkg.IsAnyOf(token, pkg.BACKSLASH, pkg.SEMICOLON, pkg.CBRACK_OPEN) {
197 | 			return Node{}, fmt.Errorf("unexpected type annotation")
198 | 		}
199 | 
200 | 		switch token {
201 | 		case pkg.BACKSLASH:
202 | 			sc.ScanWhitespace()
203 | 		case pkg.SEMICOLON:
204 | 			done = true
205 | 		case pkg.WS:
206 | 			if isNewline(lit) {
207 | 				done = true
208 | 			}
209 | 		case pkg.COMMENT_LINE:
210 | 			sc.ScanLine()
211 | 			done = true
212 | 		case pkg.COMMENT_MUL_OPEN:
213 | 			if err := scanMultilineComment(cx, sc); err != nil {
214 | 				return Node{}, err
215 | 			}
216 | 		case pkg.COMMENT_SD:
217 | 			// We need to continue to parse and ignore the next result.
218 | 			skip = true
219 | 			// typeAnnotation = ""
220 | 		case pkg.NUM_INT:
221 | 			if skip {
222 | 				skip = false
223 | 				typeAnnotation = ""
224 | 				continue
225 | 			}
226 | 
227 | 			arg, err := newIntArg(lit, typeAnnotation)
228 | 			if err != nil {
229 | 				return Node{}, err
230 | 			}
231 | 			args = append(args, arg)
232 | 			typeAnnotation = ""
233 | 		case pkg.NUM_FLOAT, pkg.NUM_SCI:
234 | 			if skip {
235 | 				skip = false
236 | 				typeAnnotation = ""
237 | 				continue
238 | 			}
239 | 
240 | 			arg, err := newFloatArg(lit, typeAnnotation)
241 | 			if err != nil {
242 | 				return Node{}, err
243 | 			}
244 | 
245 | 			args = append(args, arg)
246 | 			typeAnnotation = ""
247 | 		case pkg.QUOTE, pkg.RAWSTR_OPEN, pkg.RAWSTR_HASH_OPEN, pkg.RAWSTR_HASH_CLOSE:
248 | 			var str string
249 | 			var err error
250 | 			switch token {
251 | 			case pkg.QUOTE:
252 | 				str, err = scanString(cx, sc, typeAnnotation)
253 | 			case pkg.RAWSTR_HASH_CLOSE:
254 | 				str, err = scanString(cx, sc, typeAnnotation)
255 | 				str = lit[1:] + str
256 | 			case pkg.RAWSTR_OPEN:
257 | 				str, err = scanRawString(cx, sc, typeAnnotation)
258 | 			case pkg.RAWSTR_HASH_OPEN:
259 | 				str, err = scanRawStringHash(cx, sc, lit, typeAnnotation)
260 | 			}
261 | 			if err != nil {
262 | 				return Node{}, err
263 | 			}
264 | 
265 | 			nextToken, _ := sc.Scan()
266 | 			if nextToken == pkg.EQUAL {
267 | 				prop, err := scanProp(cx, sc, str, typeAnnotation)
268 | 				if err != nil {
269 | 					return Node{}, err
270 | 				}
271 | 
272 | 				if !skip {
273 | 					props = append(props, prop)
274 | 				}
275 | 				skip = false
276 | 			} else {
277 | 				if !skip {
278 | 					sc.Unread()
279 | 					arg := newArg(str, TypeAnnotation(typeAnnotation))
280 | 					args = append(args, arg)
281 | 				}
282 | 
283 | 				skip = false
284 | 			}
285 | 
286 | 			typeAnnotation = ""
287 | 		case pkg.CBRACK_OPEN:
288 | 			ns, err := parseScope(cx, sc, true)
289 | 			if err != nil {
290 | 				return Node{}, err
291 | 			}
292 | 
293 | 			if !skip {
294 | 				children = append(children, ns...)
295 | 			}
296 | 
297 | 			skip = false
298 | 		case pkg.CBRACK_CLOSE:
299 | 			done = true
300 | 		case pkg.PAREN_OPEN:
301 | 			annot, err := scanTypeAnnotation(cx, sc)
302 | 			if err != nil {
303 | 				return Node{}, err
304 | 			}
305 | 			typeAnnotation = annot
306 | 		default:
307 | 			// At this point there are multiple cases that can happen:
308 | 			// - The following value is a literal: null, true, false
309 | 			//   - These should be treated as such
310 | 			// - It is the start of a property name
311 | 			//
312 | 			// All the literals have valid initial identifier tokens.
313 | 			// That is, n(ull), t(rue) and f(alse) can be the start
314 | 			// of an identifier and NOT the literals.
315 | 			//
316 | 			// Thus we need to check the following tokens in order
317 | 			// to decide what it is.
318 | 
319 | 			{ // Check literals
320 | 				var value any
321 | 				var ok bool
322 | 
323 | 				_, next := sc.ScanLetters()
324 | 				next = lit + next
325 | 
326 | 				switch next {
327 | 				case "null":
328 | 					value = nil // Default for any...
329 | 					ok = true
330 | 				case "true":
331 | 					value = true
332 | 					ok = true
333 | 				case "false":
334 | 					value = false
335 | 					ok = true
336 | 				}
337 | 
338 | 				if ok {
339 | 					if typeAnnotation != "" {
340 | 						return Node{}, fmt.Errorf("unexpected type annotation")
341 | 					}
342 | 
343 | 					if !skip {
344 | 						args = append(args, newArg(value, ""))
345 | 						skip = false
346 | 					}
347 | 					continue
348 | 				} else {
349 | 					sc.Unread()
350 | 				}
351 | 			}
352 | 
353 | 			if pkg.IsInitialIdentToken(token) {
354 | 				id := sc.ScanBareIdent()
355 | 				next, _ := sc.Scan()
356 | 				if next != pkg.EQUAL {
357 | 					return Node{}, fmt.Errorf("unexpected identifier")
358 | 				}
359 | 
360 | 				prop, err := scanProp(cx, sc, lit+id, typeAnnotation)
361 | 				if err != nil {
362 | 					return Node{}, err
363 | 				}
364 | 
365 | 				if !skip {
366 | 					props = append(props, prop)
367 | 				}
368 | 				skip = false
369 | 			} else {
370 | 				return Node{}, fmt.Errorf("unexpected token: %s", lit)
371 | 			}
372 | 		}
373 | 	}
374 | 
375 | 	return Node{
376 | 		Name:     name,
377 | 		Children: children,
378 | 		Props:    props,
379 | 		Args:     args,
380 | 	}, nil
381 | }
382 | 
383 | func scanString(cx *parseContext, sc *pkg.Scanner, typeAnnot string) (string, error) {
384 | 	buf := strings.Builder{}
385 | 	done := false
386 | 	for !done {
387 | 		token, lit := sc.Scan()
388 | 		if token == pkg.EOF {
389 | 			return "", fmt.Errorf("error reading string literal: reached EOF")
390 | 		}
391 | 
392 | 		switch token {
393 | 		case pkg.BACKSLASH:
394 | 			next, nextLit := sc.Scan()
395 | 			if next == pkg.QUOTE {
396 | 				buf.WriteString(`\"`)
397 | 			} else {
398 | 				buf.WriteString(lit)
399 | 				buf.WriteString(nextLit)
400 | 			}
401 | 		case pkg.QUOTE:
402 | 			done = true
403 | 		case pkg.WS:
404 | 			// Unquoted newline characters are invalid -> replace prior unquoting
405 | 			res := lit
406 | 			for nl, escaped := range newlinesToQuoted {
407 | 				res = strings.ReplaceAll(res, nl, escaped)
408 | 			}
409 | 			buf.WriteString(res)
410 | 		case pkg.RAWSTR_OPEN, pkg.RAWSTR_HASH_OPEN:
411 | 			buf.WriteString(lit[:len(lit)-1])
412 | 			done = true
413 | 		default:
414 | 			buf.WriteString(lit)
415 | 		}
416 | 	}
417 | 
418 | 	sss, err := strconv.Unquote("\"" + buf.String() + "\"")
419 | 	if err != nil {
420 | 		return "", err
421 | 	}
422 | 
423 | 	return parseStringValue(sss, typeAnnot)
424 | }
425 | 
426 | func scanRawString(cx *parseContext, sc *pkg.Scanner, typeAnnot string) (string, error) {
427 | 	buf := strings.Builder{}
428 | 	done := false
429 | 	for !done {
430 | 		token, lit := sc.Scan()
431 | 		if token == pkg.EOF {
432 | 			return "", fmt.Errorf("error reading raw string literal: reached EOF")
433 | 		}
434 | 
435 | 		switch token {
436 | 		case pkg.QUOTE:
437 | 			done = true
438 | 		default:
439 | 			buf.WriteString(lit)
440 | 		}
441 | 	}
442 | 
443 | 	return parseStringValue(buf.String(), typeAnnot)
444 | }
445 | 
446 | func scanRawStringHash(cx *parseContext, sc *pkg.Scanner, start, typeAnnot string) (string, error) {
447 | 	end := strings.TrimPrefix(start, "r")
448 | 	end = strings.TrimSuffix(end, `"`)
449 | 	end = `"` + end
450 | 
451 | 	buf := strings.Builder{}
452 | 	done := false
453 | 	for !done {
454 | 		token, lit := sc.Scan()
455 | 		if token == pkg.EOF {
456 | 			return "", fmt.Errorf("error reading raw string literal: reached EOF")
457 | 		}
458 | 
459 | 		switch token {
460 | 		case pkg.RAWSTR_HASH_CLOSE:
461 | 			if lit == end {
462 | 				done = true
463 | 			} else {
464 | 				return "", fmt.Errorf("invalid terminal of raw string literal: %s", lit)
465 | 			}
466 | 		default:
467 | 			buf.WriteString(lit)
468 | 		}
469 | 	}
470 | 
471 | 	return parseStringValue(buf.String(), typeAnnot)
472 | }
473 | 
474 | func scanProp(cx *parseContext, sc *pkg.Scanner, name, typeAnnotation string) (Prop, error) {
475 | 	_, _ = sc.ScanWhitespace()
476 | 
477 | 	done := false
478 | 	var value any
479 | 	var valueTypeAnnot string
480 | 
481 | 	for !done {
482 | 		token, lit := sc.Scan()
483 | 		if token == pkg.EOF {
484 | 			return Prop{}, fmt.Errorf("invalid node property: reached EOF")
485 | 		}
486 | 
487 | 		switch token {
488 | 		case pkg.INVALID:
489 | 			return Prop{}, fmt.Errorf("invalid property value")
490 | 		case pkg.NUM_INT:
491 | 			n, err := parseIntValue(lit, valueTypeAnnot)
492 | 			if err != nil {
493 | 				return Prop{}, err
494 | 			}
495 | 			value = n
496 | 			done = true
497 | 		case pkg.NUM_FLOAT, pkg.NUM_SCI:
498 | 			n, err := parseFloatValue(lit, valueTypeAnnot)
499 | 			if err != nil {
500 | 				return Prop{}, err
501 | 			}
502 | 			value = n
503 | 			done = true
504 | 		case pkg.QUOTE:
505 | 			s, err := scanString(cx, sc, valueTypeAnnot)
506 | 			if err != nil {
507 | 				return Prop{}, err
508 | 			}
509 | 			value = s
510 | 			done = true
511 | 		case pkg.PAREN_OPEN:
512 | 			t, err := scanTypeAnnotation(cx, sc)
513 | 			if err != nil {
514 | 				return Prop{}, err
515 | 			}
516 | 
517 | 			valueTypeAnnot = t
518 | 		default:
519 | 			// Not a number or string => try parse bool or null
520 | 			sc.Unread()
521 | 			t, letters := sc.ScanLetters()
522 | 			if t != pkg.EOF {
523 | 				switch letters {
524 | 				case "null":
525 | 					value = nil
526 | 				case "true":
527 | 					value = true
528 | 				case "false":
529 | 					value = false
530 | 				default:
531 | 					return Prop{}, fmt.Errorf("invalid property value")
532 | 				}
533 | 
534 | 				if valueTypeAnnot != "" {
535 | 					return Prop{}, fmt.Errorf("unexpected type annotation")
536 | 				}
537 | 
538 | 				done = true
539 | 			} else {
540 | 				return Prop{}, fmt.Errorf("invalid property value")
541 | 			}
542 | 		}
543 | 	}
544 | 
545 | 	return Prop{
546 | 		Name:           name,
547 | 		TypeAnnot:      TypeAnnotation(typeAnnotation),
548 | 		Value:          value,
549 | 		ValueTypeAnnot: TypeAnnotation(valueTypeAnnot),
550 | 	}, nil
551 | }
552 | 
553 | func scanTypeAnnotation(cx *parseContext, sc *pkg.Scanner) (string, error) {
554 | 	annot := sc.ScanWhile(func(r rune) bool {
555 | 		return unicode.In(r, unicode.Digit, unicode.Letter)
556 | 	})
557 | 
558 | 	next, _ := sc.Scan()
559 | 	if next != pkg.PAREN_CLOSE {
560 | 		return "", fmt.Errorf("unclosed type annotation")
561 | 	}
562 | 
563 | 	annot = strings.TrimSpace(annot)
564 | 	if annot == "" {
565 | 		return "", fmt.Errorf("invalid type annotation: empty")
566 | 	}
567 | 
568 | 	return annot, nil
569 | }
570 | 


--------------------------------------------------------------------------------
/parser_test.go:
--------------------------------------------------------------------------------
  1 | package gokdl
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"os"
  6 | 	"strings"
  7 | 
  8 | 	// "os"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/stretchr/testify/require"
 12 | )
 13 | 
 14 | func TestParserLineSeparators(t *testing.T) {
 15 | 	tests := []struct {
 16 | 		testname string
 17 | 		body     string
 18 | 	}{
 19 | 		{"newline", "A\nB"},
 20 | 		{"carriage return", "A\rB"},
 21 | 		{"carriage return newline", "A\r\nB"},
 22 | 		{"form feed", "A\fB"},
 23 | 		{"next line", "A\u0085B"},
 24 | 		{"line separator", "A\u2028B"},
 25 | 		{"paragraph separator", "A\u2029B"},
 26 | 		{"multiple variants", "A\u2029\n\u2028\u0085\f\r\r\nB"},
 27 | 	}
 28 | 	for _, test := range tests {
 29 | 		t.Run(test.testname, func(t *testing.T) {
 30 | 			nodes := setupAndParse(t, test.body).Nodes()
 31 | 			require.Len(t, nodes, 2)
 32 | 			require.Equal(t, "A", nodes[0].Name)
 33 | 			require.Equal(t, "B", nodes[1].Name)
 34 | 		})
 35 | 	}
 36 | }
 37 | 
 38 | func TestParserNewlineEscaping(t *testing.T) {
 39 | 	tests := []struct {
 40 | 		testname string
 41 | 		body     string
 42 | 		parsed   string
 43 | 	}{
 44 | 		{"newline in string value", "node \"new\nline\"", "new\nline"},
 45 | 		{"carriage return in string value", "node \"carriage\rreturn\"", "carriage\rreturn"},
 46 | 		{"carriage return newline", "node \"cr\r\nnl\"", "cr\r\nnl"},
 47 | 		{"form feed in string value", "node \"form\ffeed\"", "form\ffeed"},
 48 | 		{"next line in string value", "node \"next\u0085line\"", "next\u0085line"},
 49 | 		{"line separator in string value", "node \"line\u2028separator\"", "line\u2028separator"},
 50 | 		{"paragraph separator in string value", "node \"paragraph\u2029separator\"", "paragraph\u2029separator"},
 51 | 	}
 52 | 	for _, test := range tests {
 53 | 		t.Run(test.testname, func(t *testing.T) {
 54 | 			nodes := setupAndParse(t, test.body).Nodes()
 55 | 			require.Len(t, nodes, 1)
 56 | 			require.Equal(t, test.parsed, nodes[0].Args[0].String())
 57 | 		})
 58 | 	}
 59 | }
 60 | 
 61 | func TestParserLineComment(t *testing.T) {
 62 | 	_ = setupAndParse(t, `// First line
 63 | // Second line
 64 | // Thirdline`)
 65 | }
 66 | 
 67 | func TestParserMultilineComment(t *testing.T) {
 68 | 	tests := []struct {
 69 | 		testname string
 70 | 		body     string
 71 | 	}{
 72 | 		{"single line", "/* comment */"},
 73 | 		{"single line - two comments", "/* comment */ /* another */"},
 74 | 		{
 75 | 			"multiple lines", `/*
 76 | comment
 77 | another
 78 | */`,
 79 | 		},
 80 | 	}
 81 | 
 82 | 	for _, test := range tests {
 83 | 		t.Run(test.testname, func(t *testing.T) {
 84 | 			_ = setupAndParse(t, test.body)
 85 | 		})
 86 | 	}
 87 | }
 88 | 
 89 | func TestParserSlashdashCommentNode(t *testing.T) {
 90 | 	doc := setupAndParse(t, `/-mynode`)
 91 | 	nodes := doc.Nodes()
 92 | 	require.Len(t, nodes, 0)
 93 | }
 94 | 
 95 | func TestParserSlashdashCommentArg(t *testing.T) {
 96 | 	// Arrange & Act
 97 | 	doc := setupAndParse(t, "Node.js /-\"arg\" 1")
 98 | 
 99 | 	// Assert
100 | 	nodes := doc.Nodes()
101 | 	require.Len(t, nodes, 1)
102 | 	args := nodes[0].Args
103 | 	require.Len(t, args, 1)
104 | 	require.Equal(t, int64(1), args[0].Value)
105 | }
106 | 
107 | func TestParserSlashdashCommentProp(t *testing.T) {
108 | 	doc := setupAndParse(t, "Node.js uncommented=true /-properly=\"arg\" 1")
109 | 	nodes := doc.Nodes()
110 | 	require.Len(t, nodes, 1)
111 | 
112 | 	args := nodes[0].Args
113 | 	require.Len(t, args, 1)
114 | 	require.Equal(t, int64(1), args[0].Value)
115 | 
116 | 	props := nodes[0].Props
117 | 	require.Len(t, props, 1)
118 | 
119 | 	require.Equal(t, true, props[0].Value)
120 | }
121 | 
122 | func TestParserSlashdashCommentChildren(t *testing.T) {
123 | 	doc := setupAndParse(t, `Node.js uncommented=true  1 /-{
124 | 	childNode
125 | }`)
126 | 	nodes := doc.Nodes()
127 | 	require.Len(t, nodes, 1)
128 | 	children := nodes[0].Children
129 | 	require.Len(t, children, 0)
130 | }
131 | 
132 | func TestParserSlashdashCommentNestedChildren(t *testing.T) {
133 | 	doc := setupAndParse(t, `Node.js uncommented=true  1 {
134 | 	/-Ignored 1 2
135 | 	Exists true
136 | }`)
137 | 	nodes := doc.Nodes()
138 | 	require.Len(t, nodes, 1)
139 | 	require.Len(t, nodes[0].Children, 1)
140 | }
141 | 
142 | func TestParserValidNodeIdentifier(t *testing.T) {
143 | 	tests := []struct {
144 | 		testname     string
145 | 		doc          string
146 | 		expectedName string
147 | 	}{
148 | 		{"lower case letters", "node", "node"},
149 | 		{"snake case", "node_name", "node_name"},
150 | 		{"end with number", "node_name123", "node_name123"},
151 | 		{"arbitrary characters #1", "-this_actually::WORKS?", "-this_actually::WORKS?"},
152 | 		{"quoted named", "\"Node Name?\"", "Node Name?"},
153 | 	}
154 | 
155 | 	for _, test := range tests {
156 | 		t.Run(test.testname, func(t *testing.T) {
157 | 			doc := setupAndParse(t, test.doc)
158 | 			require.Len(t, doc.nodes, 1)
159 | 
160 | 			name := doc.nodes[0].Name
161 | 			require.Equal(t, test.expectedName, name)
162 | 			require.Zero(t, doc.nodes[0].TypeAnnotation)
163 | 		})
164 | 	}
165 | }
166 | 
167 | func TestParserNodeIdentifierInvalid(t *testing.T) {
168 | 	tests := []struct {
169 | 		testname string
170 | 		ident    string
171 | 	}{
172 | 		{"integer", "1"},
173 | 		{"parenthesis", "a(b)c"},
174 | 		{"square brackets", "a[b]c"},
175 | 		{"equal", "a=c"},
176 | 		{"comma", "abcD,,Y"},
177 | 	}
178 | 
179 | 	for _, test := range tests {
180 | 		t.Run(test.testname, func(t *testing.T) {
181 | 			parser := setup(test.ident)
182 | 			_, err := parser.parse()
183 | 			require.Error(t, err)
184 | 		})
185 | 	}
186 | }
187 | 
188 | func TestParserNodeArgs(t *testing.T) {
189 | 	// Arrange
190 | 	nodeName := "node"
191 | 	tests := []struct {
192 | 		testname         string
193 | 		body             string
194 | 		expectedArgValue any
195 | 	}{
196 | 		{"integer", "node 1", int64(1)},
197 | 		{"integer with underscore", "node 1_0_0", int64(100)},
198 | 		{"float1", "node 1.234", 1.234},
199 | 		{"float2", "node 1234.5678", 1234.5678},
200 | 		{"string1", "node \"my@value\"", "my@value"},
201 | 		{"string2", `node "TODO: $1"`, "TODO: $1"},
202 | 		{"string3", `node "log.Printf(\"$1\")"`, `log.Printf("$1")`},
203 | 		{"string4", `node "block{
204 | 	$1
205 | }"`, `block{
206 | 	$1
207 | }`},
208 | 		{"rawstring1", `node r"h\e\l\l"`, `h\e\l\l`},
209 | 		{"rawstringhash1", `node r#"h\e\l\l"#`, `h\e\l\l`},
210 | 		{"rawstringhash2", `node r##"h\e\l\l"##`, `h\e\l\l`},
211 | 		{"rawstringhash3", `node r##"he"ll"##`, `he"ll`},
212 | 		{"rawstringhash4", `node r##"he#ll"##`, `he#ll`},
213 | 		{"string with hash", `node "#[allow(unused)]"`, `#[allow(unused)]`},
214 | 		{"null", "node null", nil},
215 | 		{"true", "node true", true},
216 | 		{"false", "node false", false},
217 | 		{"hex - small caps", "node 0x1aaeff", int64(1748735)},
218 | 		{"hex - mixed caps", "node 0x1AAeff", int64(1748735)},
219 | 	}
220 | 
221 | 	for _, test := range tests {
222 | 		t.Run(test.testname, func(t *testing.T) {
223 | 			// Act
224 | 			parser := setup(test.body)
225 | 			doc, err := parser.parse()
226 | 
227 | 			// Assert
228 | 			require.NoError(t, err)
229 | 
230 | 			nodes := doc.Nodes()
231 | 			require.Len(t, nodes, 1)
232 | 			node := nodes[0]
233 | 			require.Equal(t, nodeName, node.Name)
234 | 
235 | 			require.Len(t, node.Args, 1)
236 | 			arg := node.Args[0]
237 | 
238 | 			require.Equal(t, test.expectedArgValue, arg.Value)
239 | 			require.Equal(t, TypeAnnotation(""), arg.TypeAnnotation)
240 | 		})
241 | 	}
242 | }
243 | 
244 | func TestParserNodeArgsInvalid(t *testing.T) {
245 | 	// Arrange
246 | 	tests := []struct {
247 | 		testname string
248 | 		body     string
249 | 	}{
250 | 		{"integer followed by letter", "NodeName 1a"},
251 | 		{"bare identifier", "NodeName nodename"},
252 | 		{"unexpected slash", "NodeName /"},
253 | 		{"unexpected dot", "NodeName ."},
254 | 		{"unterminated string", `NodeName ".`},
255 | 		{"invalid termination of raw string 1", `NodeName r".`},
256 | 		{"invalid termination of raw string 2", `NodeName r##"."#`},
257 | 	}
258 | 
259 | 	for _, test := range tests {
260 | 		t.Run(test.testname, func(t *testing.T) {
261 | 			// Act
262 | 			parser := setup(test.body)
263 | 			_, err := parser.parse()
264 | 
265 | 			// Assert
266 | 			require.Error(t, err)
267 | 		})
268 | 	}
269 | }
270 | 
271 | func TestParserNodeArgsTypeAnnotationsInvalid(t *testing.T) {
272 | 	// Arrange
273 | 	tests := []struct {
274 | 		testname string
275 | 		body     string
276 | 	}{
277 | 		{"type annotation for invalid literal: null", "NodeName (u8)null"},
278 | 		{"type annotation for invalid literal: true", "NodeName (u8)true"},
279 | 		{"type annotation for invalid literal: false", "NodeName (u8)false"},
280 | 		{"u8 for type string", `NodeName (u8)"value"`},
281 | 		{"uncloses paranthesis", `NodeName (string"value"`},
282 | 		{"integer for type float", "NodeName (u16)12.456"},
283 | 		{"float for type integer", "NodeName (f64)12"},
284 | 		{"negative for unsigned integer", "NodeName (u64)-12"},
285 | 		{"overflow for u8", "NodeName (u8)1024"},
286 | 	}
287 | 
288 | 	for _, test := range tests {
289 | 		t.Run(test.testname, func(t *testing.T) {
290 | 			// Act
291 | 			parser := setup(test.body)
292 | 			_, err := parser.parse()
293 | 
294 | 			// Assert
295 | 			require.Error(t, err)
296 | 		})
297 | 	}
298 | }
299 | 
300 | func TestParserNodeProp(t *testing.T) {
301 | 	// Arrange
302 | 	nodeName := "NodeName"
303 | 	tests := []struct {
304 | 		testname          string
305 | 		body              string
306 | 		expectedPropName  string
307 | 		expectedPropValue any
308 | 	}{
309 | 		{"integer value", "NodeName myprop=1", "myprop", int64(1)},
310 | 		{"float value", "NodeName myprop=1.234", "myprop", 1.234},
311 | 		{"string value", "NodeName myprop=\"Hello, World!\"", "myprop", "Hello, World!"},
312 | 		{"string value - quoted name", "NodeName \"hehe prop\"=\"Hello, World!\"", "hehe prop", "Hello, World!"},
313 | 		{"null value", "NodeName myprop=null", "myprop", nil},
314 | 		{"bool: true", "NodeName myprop=true", "myprop", true},
315 | 		{"bool: false", "NodeName myprop=false", "myprop", false},
316 | 	}
317 | 
318 | 	for _, test := range tests {
319 | 		t.Run(test.testname, func(t *testing.T) {
320 | 			parser := setup(test.body)
321 | 			// Act
322 | 			doc, err := parser.parse()
323 | 
324 | 			// Assert
325 | 			require.NoError(t, err)
326 | 
327 | 			nodes := doc.Nodes()
328 | 			require.Len(t, nodes, 1)
329 | 
330 | 			node := nodes[0]
331 | 			require.Equal(t, nodeName, node.Name)
332 | 
333 | 			props := node.Props
334 | 			require.Len(t, props, 1)
335 | 			prop := props[0]
336 | 
337 | 			require.Equal(t, test.expectedPropName, prop.Name)
338 | 			require.Equal(t, test.expectedPropValue, prop.Value)
339 | 		})
340 | 	}
341 | }
342 | 
343 | func TestParserNodePropInvalid(t *testing.T) {
344 | 	// Arrange
345 | 	tests := []struct {
346 | 		testname string
347 | 		body     string
348 | 	}{
349 | 		{"missing value", "NodeName myprop= "},
350 | 		{"identifier value", "NodeName myprop=identifier"},
351 | 		{"unterminated string", `NodeName myprop="opened`},
352 | 		{"parenthesis", `NodeName myprop=()`},
353 | 		{"misc1", `NodeName myprop=123a`},
354 | 		{"misc2", `NodeName myprop=1.23--`},
355 | 	}
356 | 
357 | 	for _, test := range tests {
358 | 		t.Run(test.testname, func(t *testing.T) {
359 | 			// Act
360 | 			parser := setup(test.body)
361 | 			_, err := parser.parse()
362 | 
363 | 			// Assert
364 | 			require.Error(t, err)
365 | 		})
366 | 	}
367 | }
368 | 
369 | func TestParserNodePropTypeAnnotation(t *testing.T) {
370 | 	// Arrange
371 | 	nodeName := "NodeName"
372 | 	propName := "myprop"
373 | 	tests := []struct {
374 | 		testname               string
375 | 		body                   string
376 | 		expectedValue          any
377 | 		expectedTypeAnnot      TypeAnnotation
378 | 		expectedValueTypeAnnot TypeAnnotation
379 | 	}{
380 | 		{"integer value - type annotation on arg", "NodeName myprop=(i64)1", int64(1), noTypeAnnot, I64},
381 | 		{"integer value - type annotation on prop", "NodeName (author)myprop=1", int64(1), TypeAnnotation("author"), noTypeAnnot},
382 | 		{"integer value - type annotation on prop and arg", "NodeName (author)myprop=(i64)1", int64(1), TypeAnnotation("author"), I64},
383 | 	}
384 | 
385 | 	for _, test := range tests {
386 | 		t.Run(test.testname, func(t *testing.T) {
387 | 			parser := setup(test.body)
388 | 			// Act
389 | 			doc, err := parser.parse()
390 | 
391 | 			// Assert
392 | 			require.NoError(t, err)
393 | 
394 | 			nodes := doc.Nodes()
395 | 			require.Len(t, nodes, 1)
396 | 
397 | 			node := nodes[0]
398 | 			require.Equal(t, nodeName, node.Name)
399 | 
400 | 			props := node.Props
401 | 			require.Len(t, props, 1)
402 | 			prop := props[0]
403 | 
404 | 			require.Equal(t, propName, prop.Name)
405 | 			require.Equal(t, test.expectedValue, prop.Value)
406 | 			require.Equal(t, test.expectedTypeAnnot, prop.TypeAnnot)
407 | 			require.Equal(t, test.expectedValueTypeAnnot, prop.ValueTypeAnnot)
408 | 		})
409 | 	}
410 | }
411 | 
412 | func TestParserNodeTypeAnnotation(t *testing.T) {
413 | 	// Arrange
414 | 	nodeName := "NodeName"
415 | 	tests := []struct {
416 | 		testname          string
417 | 		body              string
418 | 		expectedTypeAnnot TypeAnnotation
419 | 		err               bool
420 | 	}{
421 | 		{"ok - string annotation", "(string) NodeName", TypeAnnotation("string"), false},
422 | 		{"ok - arbitrary annotation", "(user)NodeName", TypeAnnotation("user"), false},
423 | 		{"ok - no annotation", "NodeName", noTypeAnnot, false},
424 | 		{"error - empty annotation", "() NodeName", noTypeAnnot, true},
425 | 		{"error - unclosed annotation", "( NodeName", noTypeAnnot, true},
426 | 		{"error - unexpected right par", ") NodeName", noTypeAnnot, true},
427 | 	}
428 | 
429 | 	for _, test := range tests {
430 | 		t.Run(test.testname, func(t *testing.T) {
431 | 			parser := setup(test.body)
432 | 			// Act
433 | 			doc, err := parser.parse()
434 | 
435 | 			// Assert
436 | 			if test.err {
437 | 				require.Error(t, err)
438 | 			} else {
439 | 				require.NoError(t, err)
440 | 				nodes := doc.Nodes()
441 | 				require.Len(t, nodes, 1)
442 | 
443 | 				node := nodes[0]
444 | 				require.Equal(t, nodeName, node.Name)
445 | 
446 | 				require.Empty(t, node.Props)
447 | 				require.Empty(t, node.Args)
448 | 
449 | 				require.Equal(t, test.expectedTypeAnnot, node.TypeAnnotation)
450 | 			}
451 | 		})
452 | 	}
453 | }
454 | 
455 | func TestParserNodeChildren(t *testing.T) {
456 | 	tests := []struct {
457 | 		testname      string
458 | 		body          string
459 | 		expectedNodes int
460 | 	}{
461 | 		{"single line #1", "Parent { child1 }", 2},
462 | 		{"single line #2", "Parent { child1; child2 }", 3},
463 | 		{"single line #3", "Parent { child1; child2; }", 3},
464 | 		{"single line #4", "Parent { child1; /-child2; }", 2},
465 | 		{"single line #5", "Parent { /*child1*/ child2; }", 2},
466 | 		{
467 | 			"nested #1", `Parent {
468 | 	child1; child2
469 | 		}`,
470 | 			3,
471 | 		},
472 | 		{
473 | 			"nested #2", `Parent {
474 | 	child1;
475 | 	child-?
476 | 		}`,
477 | 			3,
478 | 		},
479 | 		{
480 | 			"nested #3", `Parent {
481 | 	child1 {}
482 | 	child-?
483 | 		}`,
484 | 			3,
485 | 		},
486 | 		{
487 | 			"nested #4", `Parent {
488 | 	child1 { child1-A }
489 | 	child-? }`,
490 | 			4,
491 | 		},
492 | 		{
493 | 			"nested #5", `Parent {
494 | 	child1 { child1-A }
495 | 	child-?
496 | 
497 | 	deep-1 {
498 | 		deep-1-2 {
499 | 			/-deep-1-2-3-a
500 | 			deep-1-2-3-b
501 | 			deep-1-2-3-c
502 | 		}
503 | 	}
504 | }`,
505 | 			8,
506 | 		},
507 | 	}
508 | 
509 | 	for _, test := range tests {
510 | 		t.Run(test.testname, func(t *testing.T) {
511 | 			doc := setupAndParse(t, test.body)
512 | 			actual := totalChildren(doc)
513 | 			require.Equal(t, test.expectedNodes, actual)
514 | 		})
515 | 	}
516 | 
517 | 	doc := setupAndParse(t, `Parent { child-1; child2; child-3 }`)
518 | 	children := doc.nodes[0].Children
519 | 	require.Len(t, children, 3)
520 | }
521 | 
522 | func TestParserNodeChildrenSingle(t *testing.T) {
523 | 	doc := setupAndParse(t, `Parent {
524 | 	child
525 | }`)
526 | 	children := doc.nodes[0].Children
527 | 	require.Len(t, children, 1)
528 | 	require.Equal(t, "child", children[0].Name)
529 | }
530 | 
531 | func TestParserNodeChildrenMultiple(t *testing.T) {
532 | 	doc := setupAndParse(t, `Parent {
533 | 	child-1; child2;
534 | 	child-3
535 | }`)
536 | 	children := doc.nodes[0].Children
537 | 	require.Len(t, children, 3)
538 | }
539 | 
540 | func TestParserNodeChildrenMultipleSameRow(t *testing.T) {
541 | 	doc := setupAndParse(t, `Parent { child-1; child2; child-3 }`)
542 | 	children := doc.nodes[0].Children
543 | 	require.Len(t, children, 3)
544 | }
545 | 
546 | func TestParserStringsEscaped(t *testing.T) {
547 | 	// Arrange
548 | 	filename := "testdata/escaped.kdl"
549 | 	bs, err := os.ReadFile(filename)
550 | 	require.NoError(t, err)
551 | 	parser := newParser(bytes.NewReader(bs))
552 | 
553 | 	// Act
554 | 	doc, err := parser.parse()
555 | 
556 | 	//Assert
557 | 	require.NoError(t, err)
558 | 	nodes := doc.Nodes()
559 | 	require.Equal(t, "\t", nodes[0].Args[0].Value)
560 | 	require.Equal(t, "\u00CA", nodes[1].Args[0].Value)
561 | 	require.Equal(t, "Ê", nodes[1].Args[0].Value)
562 | 	require.Equal(t, `"`, nodes[2].Args[0].Value)
563 | }
564 | 
565 | func setup(doc string) *parser {
566 | 	r := strings.NewReader(doc)
567 | 	return newParser(r)
568 | }
569 | 
570 | func setupAndParse(t *testing.T, doc string) Doc {
571 | 	p := setup(doc)
572 | 	d, err := p.parse()
573 | 	if err != nil {
574 | 		t.Fatalf("expected no error but was: %s", err)
575 | 	}
576 | 	return d
577 | }
578 | 
579 | func recNodeChildrenCount(node Node) int {
580 | 	if len(node.Children) == 0 {
581 | 		return 1
582 | 	}
583 | 
584 | 	total := 1
585 | 	for _, ch := range node.Children {
586 | 		total += recNodeChildrenCount(ch)
587 | 	}
588 | 	return total
589 | }
590 | 
591 | func totalChildren(doc Doc) int {
592 | 	total := 0
593 | 	for _, n := range doc.nodes {
594 | 		total += recNodeChildrenCount(n)
595 | 	}
596 | 	return total
597 | }
598 | 


--------------------------------------------------------------------------------