├── .travis.yml ├── LICENSE ├── README.md ├── ast ├── ast.go └── walk.go ├── parser ├── parser.go ├── parser_test.go └── test-fixtures │ ├── array_comment.hcl │ ├── assign_colon.hcl │ ├── assign_deep.hcl │ ├── comment.hcl │ ├── comment_single.hcl │ ├── complex.hcl │ ├── complex_key.hcl │ ├── empty.hcl │ ├── list.hcl │ ├── list_comma.hcl │ ├── multiple.hcl │ ├── old.hcl │ ├── structure.hcl │ ├── structure_basic.hcl │ ├── structure_empty.hcl │ └── types.hcl ├── printer ├── nodes.go ├── printer.go ├── printer_test.go └── testdata │ ├── comment.golden │ ├── comment.input │ ├── comment_aligned.golden │ ├── comment_aligned.input │ ├── comment_standalone.golden │ ├── comment_standalone.input │ ├── complexhcl.golden │ ├── complexhcl.input │ ├── list.golden │ └── list.input ├── scanner ├── scanner.go └── scanner_test.go └── token ├── position.go ├── token.go └── token_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 1.5 3 | 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Fatih Arslan 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of hcl nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This project is moved to the official HCL repo: 2 | [github/hashicorp/hcl](https://github.com/hashicorp/hcl) 3 | 4 | Checkout my blog post about the journey and move of the HCL parser: 5 | https://medium.com/@farslan/hcl-parser-family-in-pure-go-86589a5648fb 6 | -------------------------------------------------------------------------------- /ast/ast.go: -------------------------------------------------------------------------------- 1 | // Package ast declares the types used to represent syntax trees for HCL 2 | // (HashiCorp Configuration Language) 3 | package ast 4 | 5 | import "github.com/fatih/hcl/token" 6 | 7 | // Node is an element in the abstract syntax tree. 8 | type Node interface { 9 | node() 10 | Pos() token.Pos 11 | } 12 | 13 | func (File) node() {} 14 | func (ObjectList) node() {} 15 | func (ObjectKey) node() {} 16 | func (ObjectItem) node() {} 17 | 18 | func (Comment) node() {} 19 | func (CommentGroup) node() {} 20 | func (ObjectType) node() {} 21 | func (LiteralType) node() {} 22 | func (ListType) node() {} 23 | 24 | // File represents a single HCL file 25 | type File struct { 26 | Node Node // usually a *ObjectList 27 | Comments []*CommentGroup // list of all comments in the source 28 | } 29 | 30 | func (f *File) Pos() token.Pos { 31 | return f.Node.Pos() 32 | } 33 | 34 | // ObjectList represents a list of ObjectItems. An HCL file itself is an 35 | // ObjectList. 36 | type ObjectList struct { 37 | Items []*ObjectItem 38 | } 39 | 40 | func (o *ObjectList) Add(item *ObjectItem) { 41 | o.Items = append(o.Items, item) 42 | } 43 | 44 | func (o *ObjectList) Pos() token.Pos { 45 | // always returns the uninitiliazed position 46 | return o.Items[0].Pos() 47 | } 48 | 49 | // ObjectItem represents a HCL Object Item. An item is represented with a key 50 | // (or keys). It can be an assignment or an object (both normal and nested) 51 | type ObjectItem struct { 52 | // keys is only one length long if it's of type assignment. If it's a 53 | // nested object it can be larger than one. In that case "assign" is 54 | // invalid as there is no assignments for a nested object. 55 | Keys []*ObjectKey 56 | 57 | // assign contains the position of "=", if any 58 | Assign token.Pos 59 | 60 | // val is the item itself. It can be an object,list, number, bool or a 61 | // string. If key length is larger than one, val can be only of type 62 | // Object. 63 | Val Node 64 | 65 | LeadComment *CommentGroup // associated lead comment 66 | LineComment *CommentGroup // associated line comment 67 | } 68 | 69 | func (o *ObjectItem) Pos() token.Pos { 70 | return o.Keys[0].Pos() 71 | } 72 | 73 | // ObjectKeys are either an identifier or of type string. 74 | type ObjectKey struct { 75 | Token token.Token 76 | } 77 | 78 | func (o *ObjectKey) Pos() token.Pos { 79 | return o.Token.Pos 80 | } 81 | 82 | // LiteralType represents a literal of basic type. Valid types are: 83 | // token.NUMBER, token.FLOAT, token.BOOL and token.STRING 84 | type LiteralType struct { 85 | Token token.Token 86 | 87 | // associated line comment, only when used in a list 88 | LineComment *CommentGroup 89 | } 90 | 91 | func (l *LiteralType) Pos() token.Pos { 92 | return l.Token.Pos 93 | } 94 | 95 | // ListStatement represents a HCL List type 96 | type ListType struct { 97 | Lbrack token.Pos // position of "[" 98 | Rbrack token.Pos // position of "]" 99 | List []Node // the elements in lexical order 100 | } 101 | 102 | func (l *ListType) Pos() token.Pos { 103 | return l.Lbrack 104 | } 105 | 106 | func (l *ListType) Add(node Node) { 107 | l.List = append(l.List, node) 108 | } 109 | 110 | // ObjectType represents a HCL Object Type 111 | type ObjectType struct { 112 | Lbrace token.Pos // position of "{" 113 | Rbrace token.Pos // position of "}" 114 | List *ObjectList // the nodes in lexical order 115 | } 116 | 117 | func (o *ObjectType) Pos() token.Pos { 118 | return o.Lbrace 119 | } 120 | 121 | // Comment node represents a single //, # style or /*- style commment 122 | type Comment struct { 123 | Start token.Pos // position of / or # 124 | Text string 125 | } 126 | 127 | func (c *Comment) Pos() token.Pos { 128 | return c.Start 129 | } 130 | 131 | // CommentGroup node represents a sequence of comments with no other tokens and 132 | // no empty lines between. 133 | type CommentGroup struct { 134 | List []*Comment // len(List) > 0 135 | } 136 | 137 | func (c *CommentGroup) Pos() token.Pos { 138 | return c.List[0].Pos() 139 | } 140 | -------------------------------------------------------------------------------- /ast/walk.go: -------------------------------------------------------------------------------- 1 | package ast 2 | 3 | import "fmt" 4 | 5 | // Walk traverses an AST in depth-first order: It starts by calling fn(node); 6 | // node must not be nil. If f returns true, Walk invokes f recursively for 7 | // each of the non-nil children of node, followed by a call of f(nil). 8 | func Walk(node Node, fn func(Node) bool) { 9 | if !fn(node) { 10 | return 11 | } 12 | 13 | switch n := node.(type) { 14 | case *File: 15 | Walk(n.Node, fn) 16 | case *ObjectList: 17 | for _, item := range n.Items { 18 | Walk(item, fn) 19 | } 20 | case *ObjectKey: 21 | // nothing to do 22 | case *ObjectItem: 23 | for _, k := range n.Keys { 24 | Walk(k, fn) 25 | } 26 | Walk(n.Val, fn) 27 | case *LiteralType: 28 | // nothing to do 29 | case *ListType: 30 | for _, l := range n.List { 31 | Walk(l, fn) 32 | } 33 | case *ObjectType: 34 | for _, l := range n.List.Items { 35 | Walk(l, fn) 36 | } 37 | default: 38 | fmt.Printf(" unknown type: %T\n", n) 39 | } 40 | 41 | fn(nil) 42 | } 43 | -------------------------------------------------------------------------------- /parser/parser.go: -------------------------------------------------------------------------------- 1 | // Package parser implements a parser for HCL (HashiCorp Configuration 2 | // Language) 3 | package parser 4 | 5 | import ( 6 | "errors" 7 | "fmt" 8 | 9 | "github.com/fatih/hcl/ast" 10 | "github.com/fatih/hcl/scanner" 11 | "github.com/fatih/hcl/token" 12 | ) 13 | 14 | type Parser struct { 15 | sc *scanner.Scanner 16 | 17 | // Last read token 18 | tok token.Token 19 | commaPrev token.Token 20 | 21 | comments []*ast.CommentGroup 22 | leadComment *ast.CommentGroup // last lead comment 23 | lineComment *ast.CommentGroup // last line comment 24 | 25 | enableTrace bool 26 | indent int 27 | n int // buffer size (max = 1) 28 | } 29 | 30 | func newParser(src []byte) *Parser { 31 | return &Parser{ 32 | sc: scanner.New(src), 33 | } 34 | } 35 | 36 | // Parse returns the fully parsed source and returns the abstract syntax tree. 37 | func Parse(src []byte) (*ast.File, error) { 38 | p := newParser(src) 39 | return p.Parse() 40 | } 41 | 42 | var errEofToken = errors.New("EOF token found") 43 | 44 | // Parse returns the fully parsed source and returns the abstract syntax tree. 45 | func (p *Parser) Parse() (*ast.File, error) { 46 | f := &ast.File{} 47 | var err error 48 | f.Node, err = p.objectList() 49 | if err != nil { 50 | return nil, err 51 | } 52 | 53 | f.Comments = p.comments 54 | return f, nil 55 | } 56 | 57 | func (p *Parser) objectList() (*ast.ObjectList, error) { 58 | defer un(trace(p, "ParseObjectList")) 59 | node := &ast.ObjectList{} 60 | 61 | for { 62 | n, err := p.objectItem() 63 | if err == errEofToken { 64 | break // we are finished 65 | } 66 | 67 | // we don't return a nil node, because might want to use already 68 | // collected items. 69 | if err != nil { 70 | return node, err 71 | } 72 | 73 | node.Add(n) 74 | } 75 | return node, nil 76 | } 77 | 78 | func (p *Parser) consumeComment() (comment *ast.Comment, endline int) { 79 | endline = p.tok.Pos.Line 80 | 81 | // count the endline if it's multiline comment, ie starting with /* 82 | if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' { 83 | // don't use range here - no need to decode Unicode code points 84 | for i := 0; i < len(p.tok.Text); i++ { 85 | if p.tok.Text[i] == '\n' { 86 | endline++ 87 | } 88 | } 89 | } 90 | 91 | comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text} 92 | p.tok = p.sc.Scan() 93 | return 94 | } 95 | 96 | func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) { 97 | var list []*ast.Comment 98 | endline = p.tok.Pos.Line 99 | 100 | for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n { 101 | var comment *ast.Comment 102 | comment, endline = p.consumeComment() 103 | list = append(list, comment) 104 | } 105 | 106 | // add comment group to the comments list 107 | comments = &ast.CommentGroup{List: list} 108 | p.comments = append(p.comments, comments) 109 | 110 | return 111 | } 112 | 113 | // objectItem parses a single object item 114 | func (p *Parser) objectItem() (*ast.ObjectItem, error) { 115 | defer un(trace(p, "ParseObjectItem")) 116 | 117 | keys, err := p.objectKey() 118 | if err != nil { 119 | return nil, err 120 | } 121 | 122 | o := &ast.ObjectItem{ 123 | Keys: keys, 124 | } 125 | 126 | if p.leadComment != nil { 127 | o.LeadComment = p.leadComment 128 | p.leadComment = nil 129 | } 130 | 131 | switch p.tok.Type { 132 | case token.ASSIGN: 133 | o.Assign = p.tok.Pos 134 | o.Val, err = p.object() 135 | if err != nil { 136 | return nil, err 137 | } 138 | case token.LBRACE: 139 | o.Val, err = p.objectType() 140 | if err != nil { 141 | return nil, err 142 | } 143 | } 144 | 145 | // do a look-ahead for line comment 146 | p.scan() 147 | if o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil { 148 | o.LineComment = p.lineComment 149 | p.lineComment = nil 150 | } 151 | p.unscan() 152 | return o, nil 153 | } 154 | 155 | // objectKey parses an object key and returns a ObjectKey AST 156 | func (p *Parser) objectKey() ([]*ast.ObjectKey, error) { 157 | keyCount := 0 158 | keys := make([]*ast.ObjectKey, 0) 159 | 160 | for { 161 | tok := p.scan() 162 | switch tok.Type { 163 | case token.EOF: 164 | return nil, errEofToken 165 | case token.ASSIGN: 166 | // assignment or object only, but not nested objects. this is not 167 | // allowed: `foo bar = {}` 168 | if keyCount > 1 { 169 | return nil, fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type) 170 | } 171 | 172 | if keyCount == 0 { 173 | return nil, errors.New("no keys found!!!") 174 | } 175 | 176 | return keys, nil 177 | case token.LBRACE: 178 | // object 179 | return keys, nil 180 | case token.IDENT, token.STRING: 181 | keyCount++ 182 | keys = append(keys, &ast.ObjectKey{Token: p.tok}) 183 | case token.ILLEGAL: 184 | fmt.Println("illegal") 185 | default: 186 | return nil, fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type) 187 | } 188 | } 189 | } 190 | 191 | // object parses any type of object, such as number, bool, string, object or 192 | // list. 193 | func (p *Parser) object() (ast.Node, error) { 194 | defer un(trace(p, "ParseType")) 195 | tok := p.scan() 196 | 197 | switch tok.Type { 198 | case token.NUMBER, token.FLOAT, token.BOOL, token.STRING: 199 | return p.literalType() 200 | case token.LBRACE: 201 | return p.objectType() 202 | case token.LBRACK: 203 | return p.listType() 204 | case token.COMMENT: 205 | // implement comment 206 | case token.EOF: 207 | return nil, errEofToken 208 | } 209 | 210 | return nil, fmt.Errorf("Unknown token: %+v", tok) 211 | } 212 | 213 | // objectType parses an object type and returns a ObjectType AST 214 | func (p *Parser) objectType() (*ast.ObjectType, error) { 215 | defer un(trace(p, "ParseObjectType")) 216 | 217 | // we assume that the currently scanned token is a LBRACE 218 | o := &ast.ObjectType{ 219 | Lbrace: p.tok.Pos, 220 | } 221 | 222 | l, err := p.objectList() 223 | 224 | // if we hit RBRACE, we are good to go (means we parsed all Items), if it's 225 | // not a RBRACE, it's an syntax error and we just return it. 226 | if err != nil && p.tok.Type != token.RBRACE { 227 | return nil, err 228 | } 229 | 230 | o.List = l 231 | o.Rbrace = p.tok.Pos // advanced via parseObjectList 232 | return o, nil 233 | } 234 | 235 | // listType parses a list type and returns a ListType AST 236 | func (p *Parser) listType() (*ast.ListType, error) { 237 | defer un(trace(p, "ParseListType")) 238 | 239 | // we assume that the currently scanned token is a LBRACK 240 | l := &ast.ListType{ 241 | Lbrack: p.tok.Pos, 242 | } 243 | 244 | for { 245 | tok := p.scan() 246 | switch tok.Type { 247 | case token.NUMBER, token.FLOAT, token.STRING: 248 | node, err := p.literalType() 249 | if err != nil { 250 | return nil, err 251 | } 252 | 253 | l.Add(node) 254 | case token.COMMA: 255 | // get next list item or we are at the end 256 | // do a look-ahead for line comment 257 | p.scan() 258 | if p.lineComment != nil { 259 | lit, ok := l.List[len(l.List)-1].(*ast.LiteralType) 260 | if ok { 261 | lit.LineComment = p.lineComment 262 | l.List[len(l.List)-1] = lit 263 | p.lineComment = nil 264 | } 265 | } 266 | p.unscan() 267 | continue 268 | case token.BOOL: 269 | // TODO(arslan) should we support? not supported by HCL yet 270 | case token.LBRACK: 271 | // TODO(arslan) should we support nested lists? Even though it's 272 | // written in README of HCL, it's not a part of the grammar 273 | // (not defined in parse.y) 274 | case token.RBRACK: 275 | // finished 276 | l.Rbrack = p.tok.Pos 277 | return l, nil 278 | default: 279 | return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type) 280 | } 281 | 282 | } 283 | } 284 | 285 | // literalType parses a literal type and returns a LiteralType AST 286 | func (p *Parser) literalType() (*ast.LiteralType, error) { 287 | defer un(trace(p, "ParseLiteral")) 288 | 289 | return &ast.LiteralType{ 290 | Token: p.tok, 291 | }, nil 292 | } 293 | 294 | // scan returns the next token from the underlying scanner. If a token has 295 | // been unscanned then read that instead. In the process, it collects any 296 | // comment groups encountered, and remembers the last lead and line comments. 297 | func (p *Parser) scan() token.Token { 298 | // If we have a token on the buffer, then return it. 299 | if p.n != 0 { 300 | p.n = 0 301 | return p.tok 302 | } 303 | 304 | // Otherwise read the next token from the scanner and Save it to the buffer 305 | // in case we unscan later. 306 | prev := p.tok 307 | p.tok = p.sc.Scan() 308 | 309 | if p.tok.Type == token.COMMENT { 310 | var comment *ast.CommentGroup 311 | var endline int 312 | 313 | // fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n", 314 | // p.tok.Pos.Line, prev.Pos.Line, endline) 315 | if p.tok.Pos.Line == prev.Pos.Line { 316 | // The comment is on same line as the previous token; it 317 | // cannot be a lead comment but may be a line comment. 318 | comment, endline = p.consumeCommentGroup(0) 319 | if p.tok.Pos.Line != endline { 320 | // The next token is on a different line, thus 321 | // the last comment group is a line comment. 322 | p.lineComment = comment 323 | } 324 | } 325 | 326 | // consume successor comments, if any 327 | endline = -1 328 | for p.tok.Type == token.COMMENT { 329 | comment, endline = p.consumeCommentGroup(1) 330 | } 331 | 332 | if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE { 333 | switch p.tok.Type { 334 | case token.RBRACE, token.RBRACK: 335 | // Do not count for these cases 336 | default: 337 | // The next token is following on the line immediately after the 338 | // comment group, thus the last comment group is a lead comment. 339 | p.leadComment = comment 340 | } 341 | } 342 | 343 | } 344 | 345 | return p.tok 346 | } 347 | 348 | // unscan pushes the previously read token back onto the buffer. 349 | func (p *Parser) unscan() { 350 | p.n = 1 351 | } 352 | 353 | // ---------------------------------------------------------------------------- 354 | // Parsing support 355 | 356 | func (p *Parser) printTrace(a ...interface{}) { 357 | if !p.enableTrace { 358 | return 359 | } 360 | 361 | const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " 362 | const n = len(dots) 363 | fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column) 364 | 365 | i := 2 * p.indent 366 | for i > n { 367 | fmt.Print(dots) 368 | i -= n 369 | } 370 | // i <= n 371 | fmt.Print(dots[0:i]) 372 | fmt.Println(a...) 373 | } 374 | 375 | func trace(p *Parser, msg string) *Parser { 376 | p.printTrace(msg, "(") 377 | p.indent++ 378 | return p 379 | } 380 | 381 | // Usage pattern: defer un(trace(p, "...")) 382 | func un(p *Parser) { 383 | p.indent-- 384 | p.printTrace(")") 385 | } 386 | -------------------------------------------------------------------------------- /parser/parser_test.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "path/filepath" 7 | "reflect" 8 | "runtime" 9 | "testing" 10 | 11 | "github.com/fatih/hcl/ast" 12 | "github.com/fatih/hcl/token" 13 | ) 14 | 15 | func TestType(t *testing.T) { 16 | var literals = []struct { 17 | typ token.Type 18 | src string 19 | }{ 20 | {token.STRING, `foo = "foo"`}, 21 | {token.NUMBER, `foo = 123`}, 22 | {token.FLOAT, `foo = 123.12`}, 23 | {token.FLOAT, `foo = -123.12`}, 24 | {token.BOOL, `foo = true`}, 25 | } 26 | 27 | for _, l := range literals { 28 | p := newParser([]byte(l.src)) 29 | item, err := p.objectItem() 30 | if err != nil { 31 | t.Error(err) 32 | } 33 | 34 | lit, ok := item.Val.(*ast.LiteralType) 35 | if !ok { 36 | t.Errorf("node should be of type LiteralType, got: %T", item.Val) 37 | } 38 | 39 | if lit.Token.Type != l.typ { 40 | t.Errorf("want: %s, got: %s", l.typ, lit.Token.Type) 41 | } 42 | } 43 | } 44 | 45 | func TestListType(t *testing.T) { 46 | var literals = []struct { 47 | src string 48 | tokens []token.Type 49 | }{ 50 | { 51 | `foo = ["123", 123]`, 52 | []token.Type{token.STRING, token.NUMBER}, 53 | }, 54 | { 55 | `foo = [123, "123",]`, 56 | []token.Type{token.NUMBER, token.STRING}, 57 | }, 58 | { 59 | `foo = []`, 60 | []token.Type{}, 61 | }, 62 | { 63 | `foo = ["123", 123]`, 64 | []token.Type{token.STRING, token.NUMBER}, 65 | }, 66 | } 67 | 68 | for _, l := range literals { 69 | p := newParser([]byte(l.src)) 70 | item, err := p.objectItem() 71 | if err != nil { 72 | t.Error(err) 73 | } 74 | 75 | list, ok := item.Val.(*ast.ListType) 76 | if !ok { 77 | t.Errorf("node should be of type LiteralType, got: %T", item.Val) 78 | } 79 | 80 | tokens := []token.Type{} 81 | for _, li := range list.List { 82 | if tp, ok := li.(*ast.LiteralType); ok { 83 | tokens = append(tokens, tp.Token.Type) 84 | } 85 | } 86 | 87 | equals(t, l.tokens, tokens) 88 | } 89 | } 90 | 91 | func TestObjectType(t *testing.T) { 92 | var literals = []struct { 93 | src string 94 | nodeType []ast.Node 95 | itemLen int 96 | }{ 97 | { 98 | `foo = {}`, 99 | nil, 100 | 0, 101 | }, 102 | { 103 | `foo = { 104 | bar = "fatih" 105 | }`, 106 | []ast.Node{&ast.LiteralType{}}, 107 | 1, 108 | }, 109 | { 110 | `foo = { 111 | bar = "fatih" 112 | baz = ["arslan"] 113 | }`, 114 | []ast.Node{ 115 | &ast.LiteralType{}, 116 | &ast.ListType{}, 117 | }, 118 | 2, 119 | }, 120 | { 121 | `foo = { 122 | bar {} 123 | }`, 124 | []ast.Node{ 125 | &ast.ObjectType{}, 126 | }, 127 | 1, 128 | }, 129 | { 130 | `foo { 131 | bar {} 132 | foo = true 133 | }`, 134 | []ast.Node{ 135 | &ast.ObjectType{}, 136 | &ast.LiteralType{}, 137 | }, 138 | 2, 139 | }, 140 | } 141 | 142 | for _, l := range literals { 143 | p := newParser([]byte(l.src)) 144 | // p.enableTrace = true 145 | item, err := p.objectItem() 146 | if err != nil { 147 | t.Error(err) 148 | } 149 | 150 | // we know that the ObjectKey name is foo for all cases, what matters 151 | // is the object 152 | obj, ok := item.Val.(*ast.ObjectType) 153 | if !ok { 154 | t.Errorf("node should be of type LiteralType, got: %T", item.Val) 155 | } 156 | 157 | // check if the total length of items are correct 158 | equals(t, l.itemLen, len(obj.List.Items)) 159 | 160 | // check if the types are correct 161 | for i, item := range obj.List.Items { 162 | equals(t, reflect.TypeOf(l.nodeType[i]), reflect.TypeOf(item.Val)) 163 | } 164 | } 165 | } 166 | 167 | func TestObjectKey(t *testing.T) { 168 | keys := []struct { 169 | exp []token.Type 170 | src string 171 | }{ 172 | {[]token.Type{token.IDENT}, `foo {}`}, 173 | {[]token.Type{token.IDENT}, `foo = {}`}, 174 | {[]token.Type{token.IDENT}, `foo = bar`}, 175 | {[]token.Type{token.IDENT}, `foo = 123`}, 176 | {[]token.Type{token.IDENT}, `foo = "${var.bar}`}, 177 | {[]token.Type{token.STRING}, `"foo" {}`}, 178 | {[]token.Type{token.STRING}, `"foo" = {}`}, 179 | {[]token.Type{token.STRING}, `"foo" = "${var.bar}`}, 180 | {[]token.Type{token.IDENT, token.IDENT}, `foo bar {}`}, 181 | {[]token.Type{token.IDENT, token.STRING}, `foo "bar" {}`}, 182 | {[]token.Type{token.STRING, token.IDENT}, `"foo" bar {}`}, 183 | {[]token.Type{token.IDENT, token.IDENT, token.IDENT}, `foo bar baz {}`}, 184 | } 185 | 186 | for _, k := range keys { 187 | p := newParser([]byte(k.src)) 188 | keys, err := p.objectKey() 189 | if err != nil { 190 | t.Fatal(err) 191 | } 192 | 193 | tokens := []token.Type{} 194 | for _, o := range keys { 195 | tokens = append(tokens, o.Token.Type) 196 | } 197 | 198 | equals(t, k.exp, tokens) 199 | } 200 | 201 | errKeys := []struct { 202 | src string 203 | }{ 204 | {`foo 12 {}`}, 205 | {`foo bar = {}`}, 206 | {`foo []`}, 207 | {`12 {}`}, 208 | } 209 | 210 | for _, k := range errKeys { 211 | p := newParser([]byte(k.src)) 212 | _, err := p.objectKey() 213 | if err == nil { 214 | t.Errorf("case '%s' should give an error", k.src) 215 | } 216 | } 217 | } 218 | 219 | // Official HCL tests 220 | func TestParse(t *testing.T) { 221 | cases := []struct { 222 | Name string 223 | Err bool 224 | }{ 225 | { 226 | "assign_colon.hcl", 227 | true, 228 | }, 229 | { 230 | "comment.hcl", 231 | false, 232 | }, 233 | { 234 | "comment_single.hcl", 235 | false, 236 | }, 237 | { 238 | "empty.hcl", 239 | false, 240 | }, 241 | { 242 | "list_comma.hcl", 243 | false, 244 | }, 245 | { 246 | "multiple.hcl", 247 | false, 248 | }, 249 | { 250 | "structure.hcl", 251 | false, 252 | }, 253 | { 254 | "structure_basic.hcl", 255 | false, 256 | }, 257 | { 258 | "structure_empty.hcl", 259 | false, 260 | }, 261 | { 262 | "complex.hcl", 263 | false, 264 | }, 265 | { 266 | "assign_deep.hcl", 267 | true, 268 | }, 269 | { 270 | "types.hcl", 271 | false, 272 | }, 273 | { 274 | "array_comment.hcl", 275 | false, 276 | }, 277 | } 278 | 279 | const fixtureDir = "./test-fixtures" 280 | 281 | for _, tc := range cases { 282 | d, err := ioutil.ReadFile(filepath.Join(fixtureDir, tc.Name)) 283 | if err != nil { 284 | t.Fatalf("err: %s", err) 285 | } 286 | 287 | _, err = Parse(d) 288 | if (err != nil) != tc.Err { 289 | t.Fatalf("Input: %s\n\nError: %s", tc.Name, err) 290 | } 291 | } 292 | } 293 | 294 | // equals fails the test if exp is not equal to act. 295 | func equals(tb testing.TB, exp, act interface{}) { 296 | if !reflect.DeepEqual(exp, act) { 297 | _, file, line, _ := runtime.Caller(1) 298 | fmt.Printf("\033[31m%s:%d:\n\n\texp: %#v\n\n\tgot: %#v\033[39m\n\n", filepath.Base(file), line, exp, act) 299 | tb.FailNow() 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /parser/test-fixtures/array_comment.hcl: -------------------------------------------------------------------------------- 1 | foo = [ 2 | "1", 3 | "2", # comment 4 | ] 5 | -------------------------------------------------------------------------------- /parser/test-fixtures/assign_colon.hcl: -------------------------------------------------------------------------------- 1 | resource = [{ 2 | "foo": { 3 | "bar": {}, 4 | "baz": [1, 2, "foo"], 5 | } 6 | }] 7 | -------------------------------------------------------------------------------- /parser/test-fixtures/assign_deep.hcl: -------------------------------------------------------------------------------- 1 | resource = [{ 2 | foo = [{ 3 | bar = {} 4 | }] 5 | }] 6 | -------------------------------------------------------------------------------- /parser/test-fixtures/comment.hcl: -------------------------------------------------------------------------------- 1 | // Foo 2 | 3 | /* Bar */ 4 | 5 | /* 6 | /* 7 | Baz 8 | */ 9 | 10 | # Another 11 | 12 | # Multiple 13 | # Lines 14 | 15 | foo = "bar" 16 | -------------------------------------------------------------------------------- /parser/test-fixtures/comment_single.hcl: -------------------------------------------------------------------------------- 1 | # Hello 2 | -------------------------------------------------------------------------------- /parser/test-fixtures/complex.hcl: -------------------------------------------------------------------------------- 1 | variable "foo" { 2 | default = "bar" 3 | description = "bar" 4 | } 5 | 6 | provider "aws" { 7 | access_key = "foo" 8 | secret_key = "bar" 9 | } 10 | 11 | provider "do" { 12 | api_key = "${var.foo}" 13 | } 14 | 15 | resource "aws_security_group" "firewall" { 16 | count = 5 17 | } 18 | 19 | resource aws_instance "web" { 20 | ami = "${var.foo}" 21 | security_groups = [ 22 | "foo", 23 | "${aws_security_group.firewall.foo}", 24 | ] 25 | network_interface = { 26 | device_index = 0 27 | description = "Main network interface" 28 | } 29 | } 30 | 31 | resource "aws_instance" "db" { 32 | security_groups = "${aws_security_group.firewall.*.id}" 33 | VPC = "foo" 34 | depends_on = ["aws_instance.web"] 35 | } 36 | 37 | output "web_ip" { 38 | value = "${aws_instance.web.private_ip}" 39 | } 40 | -------------------------------------------------------------------------------- /parser/test-fixtures/complex_key.hcl: -------------------------------------------------------------------------------- 1 | foo.bar = "baz" 2 | -------------------------------------------------------------------------------- /parser/test-fixtures/empty.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fatih/hcl/8f83adfc08e6d7162ef328a06cf00ee5fb865f30/parser/test-fixtures/empty.hcl -------------------------------------------------------------------------------- /parser/test-fixtures/list.hcl: -------------------------------------------------------------------------------- 1 | foo = [1, 2, "foo"] 2 | -------------------------------------------------------------------------------- /parser/test-fixtures/list_comma.hcl: -------------------------------------------------------------------------------- 1 | foo = [1, 2, "foo",] 2 | -------------------------------------------------------------------------------- /parser/test-fixtures/multiple.hcl: -------------------------------------------------------------------------------- 1 | foo = "bar" 2 | key = 7 3 | -------------------------------------------------------------------------------- /parser/test-fixtures/old.hcl: -------------------------------------------------------------------------------- 1 | default = { 2 | "eu-west-1": "ami-b1cf19c6", 3 | } 4 | -------------------------------------------------------------------------------- /parser/test-fixtures/structure.hcl: -------------------------------------------------------------------------------- 1 | // This is a test structure for the lexer 2 | foo bar "baz" { 3 | key = 7 4 | foo = "bar" 5 | } 6 | -------------------------------------------------------------------------------- /parser/test-fixtures/structure_basic.hcl: -------------------------------------------------------------------------------- 1 | foo { 2 | value = 7 3 | "value" = 8 4 | "complex::value" = 9 5 | } 6 | -------------------------------------------------------------------------------- /parser/test-fixtures/structure_empty.hcl: -------------------------------------------------------------------------------- 1 | resource "foo" "bar" {} 2 | -------------------------------------------------------------------------------- /parser/test-fixtures/types.hcl: -------------------------------------------------------------------------------- 1 | foo = "bar" 2 | bar = 7 3 | baz = [1,2,3] 4 | foo = -12 5 | bar = 3.14159 6 | foo = true 7 | bar = false 8 | -------------------------------------------------------------------------------- /printer/nodes.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "sort" 7 | 8 | "github.com/fatih/hcl/ast" 9 | "github.com/fatih/hcl/token" 10 | ) 11 | 12 | const ( 13 | blank = byte(' ') 14 | newline = byte('\n') 15 | tab = byte('\t') 16 | infinity = 1 << 30 // offset or line 17 | ) 18 | 19 | type printer struct { 20 | cfg Config 21 | prev token.Pos 22 | 23 | comments []*ast.CommentGroup // may be nil, contains all comments 24 | standaloneComments []*ast.CommentGroup // contains all standalone comments (not assigned to any node) 25 | 26 | enableTrace bool 27 | indentTrace int 28 | } 29 | 30 | type ByPosition []*ast.CommentGroup 31 | 32 | func (b ByPosition) Len() int { return len(b) } 33 | func (b ByPosition) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 34 | func (b ByPosition) Less(i, j int) bool { return b[i].Pos().Before(b[j].Pos()) } 35 | 36 | // collectComments comments all standalone comments which are not lead or line 37 | // comment 38 | func (p *printer) collectComments(node ast.Node) { 39 | // first collect all comments. This is already stored in 40 | // ast.File.(comments) 41 | ast.Walk(node, func(nn ast.Node) bool { 42 | switch t := nn.(type) { 43 | case *ast.File: 44 | p.comments = t.Comments 45 | return false 46 | } 47 | return true 48 | }) 49 | 50 | standaloneComments := make(map[token.Pos]*ast.CommentGroup, 0) 51 | for _, c := range p.comments { 52 | standaloneComments[c.Pos()] = c 53 | } 54 | 55 | // next remove all lead and line comments from the overall comment map. 56 | // This will give us comments which are standalone, comments which are not 57 | // assigned to any kind of node. 58 | ast.Walk(node, func(nn ast.Node) bool { 59 | switch t := nn.(type) { 60 | case *ast.LiteralType: 61 | if t.LineComment != nil { 62 | for _, comment := range t.LineComment.List { 63 | if _, ok := standaloneComments[comment.Pos()]; ok { 64 | delete(standaloneComments, comment.Pos()) 65 | } 66 | } 67 | } 68 | case *ast.ObjectItem: 69 | if t.LeadComment != nil { 70 | for _, comment := range t.LeadComment.List { 71 | if _, ok := standaloneComments[comment.Pos()]; ok { 72 | delete(standaloneComments, comment.Pos()) 73 | } 74 | } 75 | } 76 | 77 | if t.LineComment != nil { 78 | for _, comment := range t.LineComment.List { 79 | if _, ok := standaloneComments[comment.Pos()]; ok { 80 | delete(standaloneComments, comment.Pos()) 81 | } 82 | } 83 | } 84 | } 85 | 86 | return true 87 | }) 88 | 89 | for _, c := range standaloneComments { 90 | p.standaloneComments = append(p.standaloneComments, c) 91 | } 92 | 93 | sort.Sort(ByPosition(p.standaloneComments)) 94 | 95 | } 96 | 97 | // output prints creates b printable HCL output and returns it. 98 | func (p *printer) output(n interface{}) []byte { 99 | var buf bytes.Buffer 100 | 101 | switch t := n.(type) { 102 | case *ast.File: 103 | return p.output(t.Node) 104 | case *ast.ObjectList: 105 | var index int 106 | var nextItem token.Pos 107 | var commented bool 108 | for { 109 | // TODO(arslan): refactor below comment printing, we have the same in objectType 110 | for _, c := range p.standaloneComments { 111 | for _, comment := range c.List { 112 | if index != len(t.Items) { 113 | nextItem = t.Items[index].Pos() 114 | } else { 115 | nextItem = token.Pos{Offset: infinity, Line: infinity} 116 | } 117 | 118 | if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) { 119 | // if we hit the end add newlines so we can print the comment 120 | if index == len(t.Items) { 121 | buf.Write([]byte{newline, newline}) 122 | } 123 | 124 | buf.WriteString(comment.Text) 125 | 126 | buf.WriteByte(newline) 127 | if index != len(t.Items) { 128 | buf.WriteByte(newline) 129 | } 130 | } 131 | } 132 | } 133 | 134 | if index == len(t.Items) { 135 | break 136 | } 137 | 138 | buf.Write(p.output(t.Items[index])) 139 | if !commented && index != len(t.Items)-1 { 140 | buf.Write([]byte{newline, newline}) 141 | } 142 | index++ 143 | } 144 | case *ast.ObjectKey: 145 | buf.WriteString(t.Token.Text) 146 | case *ast.ObjectItem: 147 | p.prev = t.Pos() 148 | buf.Write(p.objectItem(t)) 149 | case *ast.LiteralType: 150 | buf.WriteString(t.Token.Text) 151 | case *ast.ListType: 152 | buf.Write(p.list(t)) 153 | case *ast.ObjectType: 154 | buf.Write(p.objectType(t)) 155 | default: 156 | fmt.Printf(" unknown type: %T\n", n) 157 | } 158 | 159 | return buf.Bytes() 160 | } 161 | 162 | // objectItem returns the printable HCL form of an object item. An object type 163 | // starts with one/multiple keys and has a value. The value might be of any 164 | // type. 165 | func (p *printer) objectItem(o *ast.ObjectItem) []byte { 166 | defer un(trace(p, fmt.Sprintf("ObjectItem: %s", o.Keys[0].Token.Text))) 167 | var buf bytes.Buffer 168 | 169 | if o.LeadComment != nil { 170 | for _, comment := range o.LeadComment.List { 171 | buf.WriteString(comment.Text) 172 | buf.WriteByte(newline) 173 | } 174 | } 175 | 176 | for i, k := range o.Keys { 177 | buf.WriteString(k.Token.Text) 178 | buf.WriteByte(blank) 179 | 180 | // reach end of key 181 | if i == len(o.Keys)-1 && len(o.Keys) == 1 { 182 | buf.WriteString("=") 183 | buf.WriteByte(blank) 184 | } 185 | } 186 | 187 | buf.Write(p.output(o.Val)) 188 | 189 | if o.Val.Pos().Line == o.Keys[0].Pos().Line && o.LineComment != nil { 190 | buf.WriteByte(blank) 191 | for _, comment := range o.LineComment.List { 192 | buf.WriteString(comment.Text) 193 | } 194 | } 195 | 196 | return buf.Bytes() 197 | } 198 | 199 | // objectType returns the printable HCL form of an object type. An object type 200 | // begins with a brace and ends with a brace. 201 | func (p *printer) objectType(o *ast.ObjectType) []byte { 202 | defer un(trace(p, "ObjectType")) 203 | var buf bytes.Buffer 204 | buf.WriteString("{") 205 | buf.WriteByte(newline) 206 | 207 | var index int 208 | var nextItem token.Pos 209 | var commented bool 210 | for { 211 | // Print stand alone comments 212 | for _, c := range p.standaloneComments { 213 | for _, comment := range c.List { 214 | // if we hit the end, last item should be the brace 215 | if index != len(o.List.Items) { 216 | nextItem = o.List.Items[index].Pos() 217 | } else { 218 | nextItem = o.Rbrace 219 | } 220 | 221 | if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) { 222 | // add newline if it's between other printed nodes 223 | if index > 0 { 224 | commented = true 225 | buf.WriteByte(newline) 226 | } 227 | 228 | buf.Write(p.indent([]byte(comment.Text))) 229 | buf.WriteByte(newline) 230 | if index != len(o.List.Items) { 231 | buf.WriteByte(newline) // do not print on the end 232 | } 233 | } 234 | } 235 | } 236 | 237 | if index == len(o.List.Items) { 238 | p.prev = o.Rbrace 239 | break 240 | } 241 | 242 | // check if we have adjacent one liner items. If yes we'll going to align 243 | // the comments. 244 | var aligned []*ast.ObjectItem 245 | for _, item := range o.List.Items[index:] { 246 | // we don't group one line lists 247 | if len(o.List.Items) == 1 { 248 | break 249 | } 250 | 251 | // one means a oneliner with out any lead comment 252 | // two means a oneliner with lead comment 253 | // anything else might be something else 254 | cur := lines(string(p.objectItem(item))) 255 | if cur > 2 { 256 | break 257 | } 258 | 259 | curPos := item.Pos() 260 | 261 | nextPos := token.Pos{} 262 | if index != len(o.List.Items)-1 { 263 | nextPos = o.List.Items[index+1].Pos() 264 | } 265 | 266 | prevPos := token.Pos{} 267 | if index != 0 { 268 | prevPos = o.List.Items[index-1].Pos() 269 | } 270 | 271 | // fmt.Println("DEBUG ----------------") 272 | // fmt.Printf("prev = %+v prevPos: %s\n", prev, prevPos) 273 | // fmt.Printf("cur = %+v curPos: %s\n", cur, curPos) 274 | // fmt.Printf("next = %+v nextPos: %s\n", next, nextPos) 275 | 276 | if curPos.Line+1 == nextPos.Line { 277 | aligned = append(aligned, item) 278 | index++ 279 | continue 280 | } 281 | 282 | if curPos.Line-1 == prevPos.Line { 283 | aligned = append(aligned, item) 284 | index++ 285 | 286 | // finish if we have a new line or comment next. This happens 287 | // if the next item is not adjacent 288 | if curPos.Line+1 != nextPos.Line { 289 | break 290 | } 291 | continue 292 | } 293 | 294 | break 295 | } 296 | 297 | // put newlines if the items are between other non aligned items. 298 | // newlines are also added if there is a standalone comment already, so 299 | // check it too 300 | if !commented && index != len(aligned) { 301 | buf.WriteByte(newline) 302 | } 303 | 304 | if len(aligned) >= 1 { 305 | p.prev = aligned[len(aligned)-1].Pos() 306 | 307 | items := p.alignedItems(aligned) 308 | buf.Write(p.indent(items)) 309 | } else { 310 | p.prev = o.List.Items[index].Pos() 311 | 312 | buf.Write(p.indent(p.objectItem(o.List.Items[index]))) 313 | index++ 314 | } 315 | 316 | buf.WriteByte(newline) 317 | } 318 | 319 | buf.WriteString("}") 320 | return buf.Bytes() 321 | } 322 | 323 | func (p *printer) alignedItems(items []*ast.ObjectItem) []byte { 324 | var buf bytes.Buffer 325 | 326 | // find the longest key and value length, needed for alignment 327 | var longestKeyLen int // longest key length 328 | var longestValLen int // longest value length 329 | for _, item := range items { 330 | key := len(item.Keys[0].Token.Text) 331 | val := len(p.output(item.Val)) 332 | 333 | if key > longestKeyLen { 334 | longestKeyLen = key 335 | } 336 | 337 | if val > longestValLen { 338 | longestValLen = val 339 | } 340 | } 341 | 342 | for i, item := range items { 343 | if item.LeadComment != nil { 344 | for _, comment := range item.LeadComment.List { 345 | buf.WriteString(comment.Text) 346 | buf.WriteByte(newline) 347 | } 348 | } 349 | 350 | for i, k := range item.Keys { 351 | keyLen := len(k.Token.Text) 352 | buf.WriteString(k.Token.Text) 353 | for i := 0; i < longestKeyLen-keyLen+1; i++ { 354 | buf.WriteByte(blank) 355 | } 356 | 357 | // reach end of key 358 | if i == len(item.Keys)-1 && len(item.Keys) == 1 { 359 | buf.WriteString("=") 360 | buf.WriteByte(blank) 361 | } 362 | } 363 | 364 | val := p.output(item.Val) 365 | valLen := len(val) 366 | buf.Write(val) 367 | 368 | if item.Val.Pos().Line == item.Keys[0].Pos().Line && item.LineComment != nil { 369 | for i := 0; i < longestValLen-valLen+1; i++ { 370 | buf.WriteByte(blank) 371 | } 372 | 373 | for _, comment := range item.LineComment.List { 374 | buf.WriteString(comment.Text) 375 | } 376 | } 377 | 378 | // do not print for the last item 379 | if i != len(items)-1 { 380 | buf.WriteByte(newline) 381 | } 382 | } 383 | 384 | return buf.Bytes() 385 | } 386 | 387 | // list returns the printable HCL form of an list type. 388 | func (p *printer) list(l *ast.ListType) []byte { 389 | var buf bytes.Buffer 390 | buf.WriteString("[") 391 | 392 | var longestLine int 393 | for _, item := range l.List { 394 | // for now we assume that the list only contains literal types 395 | if lit, ok := item.(*ast.LiteralType); ok { 396 | lineLen := len(lit.Token.Text) 397 | if lineLen > longestLine { 398 | longestLine = lineLen 399 | } 400 | } 401 | } 402 | 403 | for i, item := range l.List { 404 | if item.Pos().Line != l.Lbrack.Line { 405 | // multiline list, add newline before we add each item 406 | buf.WriteByte(newline) 407 | // also indent each line 408 | val := p.output(item) 409 | curLen := len(val) 410 | buf.Write(p.indent(val)) 411 | buf.WriteString(",") 412 | 413 | if lit, ok := item.(*ast.LiteralType); ok && lit.LineComment != nil { 414 | // if the next item doesn't have any comments, do not align 415 | buf.WriteByte(blank) // align one space 416 | if i != len(l.List)-1 { 417 | if lit, ok := l.List[i+1].(*ast.LiteralType); ok && lit.LineComment != nil { 418 | for i := 0; i < longestLine-curLen; i++ { 419 | buf.WriteByte(blank) 420 | } 421 | } 422 | } 423 | 424 | for _, comment := range lit.LineComment.List { 425 | buf.WriteString(comment.Text) 426 | } 427 | } 428 | 429 | if i == len(l.List)-1 { 430 | buf.WriteByte(newline) 431 | } 432 | } else { 433 | buf.Write(p.output(item)) 434 | if i != len(l.List)-1 { 435 | buf.WriteString(",") 436 | buf.WriteByte(blank) 437 | } 438 | } 439 | 440 | } 441 | 442 | buf.WriteString("]") 443 | return buf.Bytes() 444 | } 445 | 446 | // indent indents the lines of the given buffer for each non-empty line 447 | func (p *printer) indent(buf []byte) []byte { 448 | var prefix []byte 449 | if p.cfg.SpacesWidth != 0 { 450 | for i := 0; i < p.cfg.SpacesWidth; i++ { 451 | prefix = append(prefix, blank) 452 | } 453 | } else { 454 | prefix = []byte{tab} 455 | } 456 | 457 | var res []byte 458 | bol := true 459 | for _, c := range buf { 460 | if bol && c != '\n' { 461 | res = append(res, prefix...) 462 | } 463 | res = append(res, c) 464 | bol = c == '\n' 465 | } 466 | return res 467 | } 468 | 469 | func lines(txt string) int { 470 | endline := 1 471 | for i := 0; i < len(txt); i++ { 472 | if txt[i] == '\n' { 473 | endline++ 474 | } 475 | } 476 | return endline 477 | } 478 | 479 | // ---------------------------------------------------------------------------- 480 | // Tracing support 481 | 482 | func (p *printer) printTrace(a ...interface{}) { 483 | if !p.enableTrace { 484 | return 485 | } 486 | 487 | const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " 488 | const n = len(dots) 489 | i := 2 * p.indentTrace 490 | for i > n { 491 | fmt.Print(dots) 492 | i -= n 493 | } 494 | // i <= n 495 | fmt.Print(dots[0:i]) 496 | fmt.Println(a...) 497 | } 498 | 499 | func trace(p *printer, msg string) *printer { 500 | p.printTrace(msg, "(") 501 | p.indentTrace++ 502 | return p 503 | } 504 | 505 | // Usage pattern: defer un(trace(p, "...")) 506 | func un(p *printer) { 507 | p.indentTrace-- 508 | p.printTrace(")") 509 | } 510 | -------------------------------------------------------------------------------- /printer/printer.go: -------------------------------------------------------------------------------- 1 | // Package printer implements printing of AST nodes to HCL format. 2 | package printer 3 | 4 | import ( 5 | "bytes" 6 | "io" 7 | "text/tabwriter" 8 | 9 | "github.com/fatih/hcl/ast" 10 | "github.com/fatih/hcl/parser" 11 | ) 12 | 13 | var DefaultConfig = Config{ 14 | SpacesWidth: 2, 15 | } 16 | 17 | // A Config node controls the output of Fprint. 18 | type Config struct { 19 | SpacesWidth int // if set, it will use spaces instead of tabs for alignment 20 | } 21 | 22 | func (c *Config) Fprint(output io.Writer, node ast.Node) error { 23 | p := &printer{ 24 | cfg: *c, 25 | comments: make([]*ast.CommentGroup, 0), 26 | standaloneComments: make([]*ast.CommentGroup, 0), 27 | // enableTrace: true, 28 | } 29 | 30 | p.collectComments(node) 31 | 32 | if _, err := output.Write(p.output(node)); err != nil { 33 | return err 34 | } 35 | 36 | // flush tabwriter, if any 37 | var err error 38 | if tw, _ := output.(*tabwriter.Writer); tw != nil { 39 | err = tw.Flush() 40 | } 41 | 42 | return err 43 | } 44 | 45 | // Fprint "pretty-prints" an HCL node to output 46 | // It calls Config.Fprint with default settings. 47 | func Fprint(output io.Writer, node ast.Node) error { 48 | return DefaultConfig.Fprint(output, node) 49 | } 50 | 51 | // Format formats src HCL and returns the result. 52 | func Format(src []byte) ([]byte, error) { 53 | node, err := parser.Parse(src) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | var buf bytes.Buffer 59 | if err := DefaultConfig.Fprint(&buf, node); err != nil { 60 | return nil, err 61 | } 62 | 63 | return buf.Bytes(), nil 64 | } 65 | -------------------------------------------------------------------------------- /printer/printer_test.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "path/filepath" 10 | "testing" 11 | 12 | "github.com/fatih/hcl/parser" 13 | ) 14 | 15 | var update = flag.Bool("update", false, "update golden files") 16 | 17 | const ( 18 | dataDir = "testdata" 19 | ) 20 | 21 | type entry struct { 22 | source, golden string 23 | } 24 | 25 | // Use go test -update to create/update the respective golden files. 26 | var data = []entry{ 27 | {"complexhcl.input", "complexhcl.golden"}, 28 | {"list.input", "list.golden"}, 29 | {"comment.input", "comment.golden"}, 30 | {"comment_aligned.input", "comment_aligned.golden"}, 31 | {"comment_standalone.input", "comment_standalone.golden"}, 32 | } 33 | 34 | func TestFiles(t *testing.T) { 35 | for _, e := range data { 36 | source := filepath.Join(dataDir, e.source) 37 | golden := filepath.Join(dataDir, e.golden) 38 | check(t, source, golden) 39 | } 40 | } 41 | 42 | func check(t *testing.T, source, golden string) { 43 | src, err := ioutil.ReadFile(source) 44 | if err != nil { 45 | t.Error(err) 46 | return 47 | } 48 | 49 | res, err := format(src) 50 | if err != nil { 51 | t.Error(err) 52 | return 53 | } 54 | 55 | // update golden files if necessary 56 | if *update { 57 | if err := ioutil.WriteFile(golden, res, 0644); err != nil { 58 | t.Error(err) 59 | } 60 | return 61 | } 62 | 63 | // get golden 64 | gld, err := ioutil.ReadFile(golden) 65 | if err != nil { 66 | t.Error(err) 67 | return 68 | } 69 | 70 | // formatted source and golden must be the same 71 | if err := diff(source, golden, res, gld); err != nil { 72 | t.Error(err) 73 | return 74 | } 75 | } 76 | 77 | // diff compares a and b. 78 | func diff(aname, bname string, a, b []byte) error { 79 | var buf bytes.Buffer // holding long error message 80 | 81 | // compare lengths 82 | if len(a) != len(b) { 83 | fmt.Fprintf(&buf, "\nlength changed: len(%s) = %d, len(%s) = %d", aname, len(a), bname, len(b)) 84 | } 85 | 86 | // compare contents 87 | line := 1 88 | offs := 1 89 | for i := 0; i < len(a) && i < len(b); i++ { 90 | ch := a[i] 91 | if ch != b[i] { 92 | fmt.Fprintf(&buf, "\n%s:%d:%d: %s", aname, line, i-offs+1, lineAt(a, offs)) 93 | fmt.Fprintf(&buf, "\n%s:%d:%d: %s", bname, line, i-offs+1, lineAt(b, offs)) 94 | fmt.Fprintf(&buf, "\n\n") 95 | break 96 | } 97 | if ch == '\n' { 98 | line++ 99 | offs = i + 1 100 | } 101 | } 102 | 103 | if buf.Len() > 0 { 104 | return errors.New(buf.String()) 105 | } 106 | return nil 107 | } 108 | 109 | // format parses src, prints the corresponding AST, verifies the resulting 110 | // src is syntactically correct, and returns the resulting src or an error 111 | // if any. 112 | func format(src []byte) ([]byte, error) { 113 | // parse src 114 | node, err := parser.Parse(src) 115 | if err != nil { 116 | return nil, fmt.Errorf("parse: %s\n%s", err, src) 117 | } 118 | 119 | var buf bytes.Buffer 120 | 121 | cfg := &Config{} 122 | if err := cfg.Fprint(&buf, node); err != nil { 123 | return nil, fmt.Errorf("print: %s", err) 124 | } 125 | 126 | // make sure formatted output is syntactically correct 127 | res := buf.Bytes() 128 | 129 | if _, err := parser.Parse(src); err != nil { 130 | return nil, fmt.Errorf("parse: %s\n%s", err, src) 131 | } 132 | 133 | return res, nil 134 | } 135 | 136 | // lineAt returns the line in text starting at offset offs. 137 | func lineAt(text []byte, offs int) []byte { 138 | i := offs 139 | for i < len(text) && text[i] != '\n' { 140 | i++ 141 | } 142 | return text[offs:i] 143 | } 144 | -------------------------------------------------------------------------------- /printer/testdata/comment.golden: -------------------------------------------------------------------------------- 1 | // A standalone comment is a comment which is not attached to any kind of node 2 | 3 | // This comes from Terraform, as a test 4 | variable "foo" { 5 | # Standalone comment should be still here 6 | 7 | default = "bar" 8 | description = "bar" # yooo 9 | } 10 | 11 | /* This is a multi line standalone 12 | comment*/ 13 | 14 | // fatih arslan 15 | /* This is a developer test 16 | account and a multine comment */ 17 | developer = ["fatih", "arslan"] // fatih arslan 18 | 19 | # One line here 20 | numbers = [1, 2] // another line here 21 | 22 | # Another comment 23 | variable = { 24 | description = "bar" # another yooo 25 | 26 | foo = { 27 | # Nested standalone 28 | 29 | bar = "fatih" 30 | } 31 | } 32 | 33 | // lead comment 34 | foo = { 35 | bar = "fatih" // line comment 2 36 | } // line comment 3 -------------------------------------------------------------------------------- /printer/testdata/comment.input: -------------------------------------------------------------------------------- 1 | // A standalone comment is a comment which is not attached to any kind of node 2 | 3 | // This comes from Terraform, as a test 4 | variable "foo" { 5 | # Standalone comment should be still here 6 | 7 | default = "bar" 8 | description = "bar" # yooo 9 | } 10 | 11 | /* This is a multi line standalone 12 | comment*/ 13 | 14 | 15 | // fatih arslan 16 | /* This is a developer test 17 | account and a multine comment */ 18 | developer = [ "fatih", "arslan"] // fatih arslan 19 | 20 | # One line here 21 | numbers = [1,2] // another line here 22 | 23 | # Another comment 24 | variable = { 25 | description = "bar" # another yooo 26 | foo { 27 | # Nested standalone 28 | 29 | bar = "fatih" 30 | } 31 | } 32 | 33 | // lead comment 34 | foo { 35 | bar = "fatih" // line comment 2 36 | } // line comment 3 37 | 38 | -------------------------------------------------------------------------------- /printer/testdata/comment_aligned.golden: -------------------------------------------------------------------------------- 1 | aligned = { 2 | # We have some aligned items below 3 | foo = "fatih" # yoo1 4 | default = "bar" # yoo2 5 | bar = "bar and foo" # yoo3 6 | 7 | default = { 8 | bar = "example" 9 | } 10 | 11 | #deneme arslan 12 | fatih = ["fatih"] # yoo4 13 | 14 | #fatih arslan 15 | fatiharslan = ["arslan"] // yoo5 16 | 17 | default = { 18 | bar = "example" 19 | } 20 | 21 | security_groups = [ 22 | "foo", # kenya 1 23 | "${aws_security_group.firewall.foo}", # kenya 2 24 | ] 25 | } -------------------------------------------------------------------------------- /printer/testdata/comment_aligned.input: -------------------------------------------------------------------------------- 1 | aligned { 2 | # We have some aligned items below 3 | foo = "fatih" # yoo1 4 | default = "bar" # yoo2 5 | bar = "bar and foo" # yoo3 6 | default = { 7 | bar = "example" 8 | } 9 | #deneme arslan 10 | fatih = ["fatih"] # yoo4 11 | #fatih arslan 12 | fatiharslan = ["arslan"] // yoo5 13 | default = { 14 | bar = "example" 15 | } 16 | 17 | security_groups = [ 18 | "foo", # kenya 1 19 | "${aws_security_group.firewall.foo}", # kenya 2 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /printer/testdata/comment_standalone.golden: -------------------------------------------------------------------------------- 1 | // A standalone comment 2 | 3 | aligned = { 4 | # Standalone 1 5 | 6 | a = "bar" # yoo1 7 | default = "bar" # yoo2 8 | 9 | # Standalone 2 10 | } 11 | 12 | # Standalone 3 13 | 14 | numbers = [1, 2] // another line here 15 | 16 | # Standalone 4 17 | -------------------------------------------------------------------------------- /printer/testdata/comment_standalone.input: -------------------------------------------------------------------------------- 1 | // A standalone comment 2 | 3 | aligned { 4 | # Standalone 1 5 | 6 | a = "bar" # yoo1 7 | default = "bar" # yoo2 8 | 9 | # Standalone 2 10 | } 11 | 12 | # Standalone 3 13 | 14 | numbers = [1,2] // another line here 15 | 16 | # Standalone 4 17 | -------------------------------------------------------------------------------- /printer/testdata/complexhcl.golden: -------------------------------------------------------------------------------- 1 | variable "foo" { 2 | default = "bar" 3 | description = "bar" 4 | } 5 | 6 | developer = ["fatih", "arslan"] 7 | 8 | provider "aws" { 9 | access_key = "foo" 10 | secret_key = "bar" 11 | } 12 | 13 | provider "do" { 14 | api_key = "${var.foo}" 15 | } 16 | 17 | resource "aws_security_group" "firewall" { 18 | count = 5 19 | } 20 | 21 | resource aws_instance "web" { 22 | ami = "${var.foo}" 23 | 24 | security_groups = [ 25 | "foo", 26 | "${aws_security_group.firewall.foo}", 27 | ] 28 | 29 | network_interface = { 30 | device_index = 0 31 | description = "Main network interface" 32 | } 33 | } 34 | 35 | resource "aws_instance" "db" { 36 | security_groups = "${aws_security_group.firewall.*.id}" 37 | VPC = "foo" 38 | 39 | depends_on = ["aws_instance.web"] 40 | } 41 | 42 | output "web_ip" { 43 | value = "${aws_instance.web.private_ip}" 44 | } -------------------------------------------------------------------------------- /printer/testdata/complexhcl.input: -------------------------------------------------------------------------------- 1 | variable "foo" { 2 | default = "bar" 3 | description = "bar" 4 | } 5 | 6 | developer = [ "fatih", "arslan"] 7 | 8 | provider "aws" { 9 | access_key ="foo" 10 | secret_key = "bar" 11 | } 12 | 13 | provider "do" { 14 | api_key = "${var.foo}" 15 | } 16 | 17 | resource "aws_security_group" "firewall" { 18 | count = 5 19 | } 20 | 21 | resource aws_instance "web" { 22 | ami = "${var.foo}" 23 | security_groups = [ 24 | "foo", 25 | "${aws_security_group.firewall.foo}" 26 | ] 27 | 28 | network_interface { 29 | device_index = 0 30 | description = "Main network interface" 31 | } 32 | } 33 | 34 | resource "aws_instance" "db" { 35 | security_groups = "${aws_security_group.firewall.*.id}" 36 | VPC = "foo" 37 | 38 | depends_on = ["aws_instance.web"] 39 | } 40 | 41 | output "web_ip" { 42 | 43 | value="${aws_instance.web.private_ip}" 44 | } 45 | -------------------------------------------------------------------------------- /printer/testdata/list.golden: -------------------------------------------------------------------------------- 1 | foo = ["fatih", "arslan"] 2 | 3 | foo = ["bar", "qaz"] 4 | 5 | foo = ["zeynep", 6 | "arslan", 7 | ] 8 | 9 | foo = ["fatih", "zeynep", 10 | "arslan", 11 | ] 12 | 13 | foo = [ 14 | "vim-go", 15 | "golang", 16 | "hcl", 17 | ] 18 | 19 | foo = [] 20 | 21 | foo = [1, 2, 3, 4] 22 | 23 | foo = [ 24 | "kenya", 25 | "ethiopia", 26 | "columbia", 27 | ] -------------------------------------------------------------------------------- /printer/testdata/list.input: -------------------------------------------------------------------------------- 1 | foo = ["fatih", "arslan" ] 2 | 3 | foo = [ "bar", "qaz", ] 4 | 5 | foo = [ "zeynep", 6 | "arslan", ] 7 | 8 | foo = ["fatih", "zeynep", 9 | "arslan", ] 10 | 11 | foo = [ 12 | "vim-go", 13 | "golang", "hcl"] 14 | 15 | foo = [] 16 | 17 | foo = [1, 2,3, 4] 18 | 19 | foo = [ 20 | "kenya", "ethiopia", 21 | "columbia"] 22 | -------------------------------------------------------------------------------- /scanner/scanner.go: -------------------------------------------------------------------------------- 1 | // Package scanner implements a scanner for HCL (HashiCorp Configuration 2 | // Language) source text. 3 | package scanner 4 | 5 | import ( 6 | "bytes" 7 | "fmt" 8 | "os" 9 | "unicode" 10 | "unicode/utf8" 11 | 12 | "github.com/fatih/hcl/token" 13 | ) 14 | 15 | // eof represents a marker rune for the end of the reader. 16 | const eof = rune(0) 17 | 18 | // Scanner defines a lexical scanner 19 | type Scanner struct { 20 | buf *bytes.Buffer // Source buffer for advancing and scanning 21 | src []byte // Source buffer for immutable access 22 | 23 | // Source Position 24 | srcPos token.Pos // current position 25 | prevPos token.Pos // previous position, used for peek() method 26 | 27 | lastCharLen int // length of last character in bytes 28 | lastLineLen int // length of last line in characters (for correct column reporting) 29 | 30 | tokStart int // token text start position 31 | tokEnd int // token text end position 32 | 33 | // Error is called for each error encountered. If no Error 34 | // function is set, the error is reported to os.Stderr. 35 | Error func(pos token.Pos, msg string) 36 | 37 | // ErrorCount is incremented by one for each error encountered. 38 | ErrorCount int 39 | 40 | // tokPos is the start position of most recently scanned token; set by 41 | // Scan. The Filename field is always left untouched by the Scanner. If 42 | // an error is reported (via Error) and Position is invalid, the scanner is 43 | // not inside a token. 44 | tokPos token.Pos 45 | } 46 | 47 | // New creates and initializes a new instance of Scanner using src as 48 | // its source content. 49 | func New(src []byte) *Scanner { 50 | // even though we accept a src, we read from a io.Reader compatible type 51 | // (*bytes.Buffer). So in the future we might easily change it to streaming 52 | // read. 53 | b := bytes.NewBuffer(src) 54 | s := &Scanner{ 55 | buf: b, 56 | src: src, 57 | } 58 | 59 | // srcPosition always starts with 1 60 | s.srcPos.Line = 1 61 | return s 62 | } 63 | 64 | // next reads the next rune from the bufferred reader. Returns the rune(0) if 65 | // an error occurs (or io.EOF is returned). 66 | func (s *Scanner) next() rune { 67 | ch, size, err := s.buf.ReadRune() 68 | if err != nil { 69 | // advance for error reporting 70 | s.srcPos.Column++ 71 | s.srcPos.Offset += size 72 | s.lastCharLen = size 73 | return eof 74 | } 75 | 76 | if ch == utf8.RuneError && size == 1 { 77 | s.srcPos.Column++ 78 | s.srcPos.Offset += size 79 | s.lastCharLen = size 80 | s.err("illegal UTF-8 encoding") 81 | return ch 82 | } 83 | 84 | // remember last position 85 | s.prevPos = s.srcPos 86 | 87 | s.srcPos.Column++ 88 | s.lastCharLen = size 89 | s.srcPos.Offset += size 90 | 91 | if ch == '\n' { 92 | s.srcPos.Line++ 93 | s.lastLineLen = s.srcPos.Column 94 | s.srcPos.Column = 0 95 | } 96 | 97 | // debug 98 | // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column) 99 | return ch 100 | } 101 | 102 | // unread unreads the previous read Rune and updates the source position 103 | func (s *Scanner) unread() { 104 | if err := s.buf.UnreadRune(); err != nil { 105 | panic(err) // this is user fault, we should catch it 106 | } 107 | s.srcPos = s.prevPos // put back last position 108 | } 109 | 110 | // peek returns the next rune without advancing the reader. 111 | func (s *Scanner) peek() rune { 112 | peek, _, err := s.buf.ReadRune() 113 | if err != nil { 114 | return eof 115 | } 116 | 117 | s.buf.UnreadRune() 118 | return peek 119 | } 120 | 121 | // Scan scans the next token and returns the token. 122 | func (s *Scanner) Scan() token.Token { 123 | ch := s.next() 124 | 125 | // skip white space 126 | for isWhitespace(ch) { 127 | ch = s.next() 128 | } 129 | 130 | var tok token.Type 131 | 132 | // token text markings 133 | s.tokStart = s.srcPos.Offset - s.lastCharLen 134 | 135 | // token position, initial next() is moving the offset by one(size of rune 136 | // actually), though we are interested with the starting point 137 | s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen 138 | if s.srcPos.Column > 0 { 139 | // common case: last character was not a '\n' 140 | s.tokPos.Line = s.srcPos.Line 141 | s.tokPos.Column = s.srcPos.Column 142 | } else { 143 | // last character was a '\n' 144 | // (we cannot be at the beginning of the source 145 | // since we have called next() at least once) 146 | s.tokPos.Line = s.srcPos.Line - 1 147 | s.tokPos.Column = s.lastLineLen 148 | } 149 | 150 | switch { 151 | case isLetter(ch): 152 | tok = token.IDENT 153 | lit := s.scanIdentifier() 154 | if lit == "true" || lit == "false" { 155 | tok = token.BOOL 156 | } 157 | case isDecimal(ch): 158 | tok = s.scanNumber(ch) 159 | default: 160 | switch ch { 161 | case eof: 162 | tok = token.EOF 163 | case '"': 164 | tok = token.STRING 165 | s.scanString() 166 | case '#', '/': 167 | tok = token.COMMENT 168 | s.scanComment(ch) 169 | case '.': 170 | tok = token.PERIOD 171 | ch = s.peek() 172 | if isDecimal(ch) { 173 | tok = token.FLOAT 174 | ch = s.scanMantissa(ch) 175 | ch = s.scanExponent(ch) 176 | } 177 | case '[': 178 | tok = token.LBRACK 179 | case ']': 180 | tok = token.RBRACK 181 | case '{': 182 | tok = token.LBRACE 183 | case '}': 184 | tok = token.RBRACE 185 | case ',': 186 | tok = token.COMMA 187 | case '=': 188 | tok = token.ASSIGN 189 | case '+': 190 | tok = token.ADD 191 | case '-': 192 | if isDecimal(s.peek()) { 193 | ch := s.next() 194 | tok = s.scanNumber(ch) 195 | } else { 196 | tok = token.SUB 197 | } 198 | default: 199 | s.err("illegal char") 200 | } 201 | } 202 | 203 | // finish token ending 204 | s.tokEnd = s.srcPos.Offset 205 | 206 | // create token literal 207 | var tokenText string 208 | if s.tokStart >= 0 { 209 | tokenText = string(s.src[s.tokStart:s.tokEnd]) 210 | } 211 | s.tokStart = s.tokEnd // ensure idempotency of tokenText() call 212 | 213 | return token.Token{ 214 | Type: tok, 215 | Pos: s.tokPos, 216 | Text: tokenText, 217 | } 218 | } 219 | 220 | func (s *Scanner) scanComment(ch rune) { 221 | // single line comments 222 | if ch == '#' || (ch == '/' && s.peek() != '*') { 223 | ch = s.next() 224 | for ch != '\n' && ch >= 0 { 225 | ch = s.next() 226 | } 227 | s.unread() 228 | return 229 | } 230 | 231 | // be sure we get the character after /* This allows us to find comment's 232 | // that are not erminated 233 | if ch == '/' { 234 | s.next() 235 | ch = s.next() // read character after "/*" 236 | } 237 | 238 | // look for /* - style comments 239 | for { 240 | if ch < 0 || ch == eof { 241 | s.err("comment not terminated") 242 | break 243 | } 244 | 245 | ch0 := ch 246 | ch = s.next() 247 | if ch0 == '*' && ch == '/' { 248 | break 249 | } 250 | } 251 | } 252 | 253 | // scanNumber scans a HCL number definition starting with the given rune 254 | func (s *Scanner) scanNumber(ch rune) token.Type { 255 | if ch == '0' { 256 | // check for hexadecimal, octal or float 257 | ch = s.next() 258 | if ch == 'x' || ch == 'X' { 259 | // hexadecimal 260 | ch = s.next() 261 | found := false 262 | for isHexadecimal(ch) { 263 | ch = s.next() 264 | found = true 265 | } 266 | 267 | if !found { 268 | s.err("illegal hexadecimal number") 269 | } 270 | 271 | if ch != eof { 272 | s.unread() 273 | } 274 | 275 | return token.NUMBER 276 | } 277 | 278 | // now it's either something like: 0421(octal) or 0.1231(float) 279 | illegalOctal := false 280 | for isDecimal(ch) { 281 | ch = s.next() 282 | if ch == '8' || ch == '9' { 283 | // this is just a possibility. For example 0159 is illegal, but 284 | // 0159.23 is valid. So we mark a possible illegal octal. If 285 | // the next character is not a period, we'll print the error. 286 | illegalOctal = true 287 | } 288 | } 289 | 290 | // literals of form 01e10 are treates as Numbers in HCL, which differs from Go. 291 | if ch == 'e' || ch == 'E' { 292 | ch = s.scanExponent(ch) 293 | return token.NUMBER 294 | } 295 | 296 | if ch == '.' { 297 | ch = s.scanFraction(ch) 298 | 299 | if ch == 'e' || ch == 'E' { 300 | ch = s.next() 301 | ch = s.scanExponent(ch) 302 | } 303 | return token.FLOAT 304 | } 305 | 306 | if illegalOctal { 307 | s.err("illegal octal number") 308 | } 309 | 310 | if ch != eof { 311 | s.unread() 312 | } 313 | return token.NUMBER 314 | } 315 | 316 | s.scanMantissa(ch) 317 | ch = s.next() // seek forward 318 | // literals of form 1e10 are treates as Numbers in HCL, which differs from Go. 319 | if ch == 'e' || ch == 'E' { 320 | ch = s.scanExponent(ch) 321 | return token.NUMBER 322 | } 323 | 324 | if ch == '.' { 325 | ch = s.scanFraction(ch) 326 | if ch == 'e' || ch == 'E' { 327 | ch = s.next() 328 | ch = s.scanExponent(ch) 329 | } 330 | return token.FLOAT 331 | } 332 | 333 | if ch != eof { 334 | s.unread() 335 | } 336 | return token.NUMBER 337 | } 338 | 339 | // scanMantissa scans the mantissa begining from the rune. It returns the next 340 | // non decimal rune. It's used to determine wheter it's a fraction or exponent. 341 | func (s *Scanner) scanMantissa(ch rune) rune { 342 | scanned := false 343 | for isDecimal(ch) { 344 | ch = s.next() 345 | scanned = true 346 | } 347 | 348 | if scanned && ch != eof { 349 | s.unread() 350 | } 351 | return ch 352 | } 353 | 354 | // scanFraction scans the fraction after the '.' rune 355 | func (s *Scanner) scanFraction(ch rune) rune { 356 | if ch == '.' { 357 | ch = s.peek() // we peek just to see if we can move forward 358 | ch = s.scanMantissa(ch) 359 | } 360 | return ch 361 | } 362 | 363 | // scanExponent scans the remaining parts of an exponent after the 'e' or 'E' 364 | // rune. 365 | func (s *Scanner) scanExponent(ch rune) rune { 366 | if ch == 'e' || ch == 'E' { 367 | ch = s.next() 368 | if ch == '-' || ch == '+' { 369 | ch = s.next() 370 | } 371 | ch = s.scanMantissa(ch) 372 | } 373 | return ch 374 | } 375 | 376 | // scanString scans a quoted string 377 | func (s *Scanner) scanString() { 378 | for { 379 | // '"' opening already consumed 380 | // read character after quote 381 | ch := s.next() 382 | 383 | if ch == '\n' || ch < 0 || ch == eof { 384 | s.err("literal not terminated") 385 | return 386 | } 387 | 388 | if ch == '"' { 389 | break 390 | } 391 | 392 | if ch == '\\' { 393 | s.scanEscape() 394 | } 395 | } 396 | 397 | return 398 | } 399 | 400 | // scanEscape scans an escape sequence 401 | func (s *Scanner) scanEscape() rune { 402 | // http://en.cppreference.com/w/cpp/language/escape 403 | ch := s.next() // read character after '/' 404 | switch ch { 405 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"': 406 | // nothing to do 407 | case '0', '1', '2', '3', '4', '5', '6', '7': 408 | // octal notation 409 | ch = s.scanDigits(ch, 8, 3) 410 | case 'x': 411 | // hexademical notation 412 | ch = s.scanDigits(s.next(), 16, 2) 413 | case 'u': 414 | // universal character name 415 | ch = s.scanDigits(s.next(), 16, 4) 416 | case 'U': 417 | // universal character name 418 | ch = s.scanDigits(s.next(), 16, 8) 419 | default: 420 | s.err("illegal char escape") 421 | } 422 | return ch 423 | } 424 | 425 | // scanDigits scans a rune with the given base for n times. For example an 426 | // octal notation \184 would yield in scanDigits(ch, 8, 3) 427 | func (s *Scanner) scanDigits(ch rune, base, n int) rune { 428 | for n > 0 && digitVal(ch) < base { 429 | ch = s.next() 430 | n-- 431 | } 432 | if n > 0 { 433 | s.err("illegal char escape") 434 | } 435 | 436 | // we scanned all digits, put the last non digit char back 437 | s.unread() 438 | return ch 439 | } 440 | 441 | // scanIdentifier scans an identifier and returns the literal string 442 | func (s *Scanner) scanIdentifier() string { 443 | offs := s.srcPos.Offset - s.lastCharLen 444 | ch := s.next() 445 | for isLetter(ch) || isDigit(ch) { 446 | ch = s.next() 447 | } 448 | 449 | if ch != eof { 450 | s.unread() // we got identifier, put back latest char 451 | } 452 | 453 | return string(s.src[offs:s.srcPos.Offset]) 454 | } 455 | 456 | // recentPosition returns the position of the character immediately after the 457 | // character or token returned by the last call to Scan. 458 | func (s *Scanner) recentPosition() (pos token.Pos) { 459 | pos.Offset = s.srcPos.Offset - s.lastCharLen 460 | switch { 461 | case s.srcPos.Column > 0: 462 | // common case: last character was not a '\n' 463 | pos.Line = s.srcPos.Line 464 | pos.Column = s.srcPos.Column 465 | case s.lastLineLen > 0: 466 | // last character was a '\n' 467 | // (we cannot be at the beginning of the source 468 | // since we have called next() at least once) 469 | pos.Line = s.srcPos.Line - 1 470 | pos.Column = s.lastLineLen 471 | default: 472 | // at the beginning of the source 473 | pos.Line = 1 474 | pos.Column = 1 475 | } 476 | return 477 | } 478 | 479 | // err prints the error of any scanning to s.Error function. If the function is 480 | // not defined, by default it prints them to os.Stderr 481 | func (s *Scanner) err(msg string) { 482 | s.ErrorCount++ 483 | pos := s.recentPosition() 484 | 485 | if s.Error != nil { 486 | s.Error(pos, msg) 487 | return 488 | } 489 | 490 | fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) 491 | } 492 | 493 | // isHexadecimal returns true if the given rune is a letter 494 | func isLetter(ch rune) bool { 495 | return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) 496 | } 497 | 498 | // isHexadecimal returns true if the given rune is a decimal digit 499 | func isDigit(ch rune) bool { 500 | return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) 501 | } 502 | 503 | // isHexadecimal returns true if the given rune is a decimal number 504 | func isDecimal(ch rune) bool { 505 | return '0' <= ch && ch <= '9' 506 | } 507 | 508 | // isHexadecimal returns true if the given rune is an hexadecimal number 509 | func isHexadecimal(ch rune) bool { 510 | return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' 511 | } 512 | 513 | // isWhitespace returns true if the rune is a space, tab, newline or carriage return 514 | func isWhitespace(ch rune) bool { 515 | return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' 516 | } 517 | 518 | // digitVal returns the integer value of a given octal,decimal or hexadecimal rune 519 | func digitVal(ch rune) int { 520 | switch { 521 | case '0' <= ch && ch <= '9': 522 | return int(ch - '0') 523 | case 'a' <= ch && ch <= 'f': 524 | return int(ch - 'a' + 10) 525 | case 'A' <= ch && ch <= 'F': 526 | return int(ch - 'A' + 10) 527 | } 528 | return 16 // larger than any legal digit val 529 | } 530 | -------------------------------------------------------------------------------- /scanner/scanner_test.go: -------------------------------------------------------------------------------- 1 | package scanner 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/fatih/hcl/token" 9 | ) 10 | 11 | var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" 12 | 13 | type tokenPair struct { 14 | tok token.Type 15 | text string 16 | } 17 | 18 | var tokenLists = map[string][]tokenPair{ 19 | "comment": []tokenPair{ 20 | {token.COMMENT, "//"}, 21 | {token.COMMENT, "////"}, 22 | {token.COMMENT, "// comment"}, 23 | {token.COMMENT, "// /* comment */"}, 24 | {token.COMMENT, "// // comment //"}, 25 | {token.COMMENT, "//" + f100}, 26 | {token.COMMENT, "#"}, 27 | {token.COMMENT, "##"}, 28 | {token.COMMENT, "# comment"}, 29 | {token.COMMENT, "# /* comment */"}, 30 | {token.COMMENT, "# # comment #"}, 31 | {token.COMMENT, "#" + f100}, 32 | {token.COMMENT, "/**/"}, 33 | {token.COMMENT, "/***/"}, 34 | {token.COMMENT, "/* comment */"}, 35 | {token.COMMENT, "/* // comment */"}, 36 | {token.COMMENT, "/* /* comment */"}, 37 | {token.COMMENT, "/*\n comment\n*/"}, 38 | {token.COMMENT, "/*" + f100 + "*/"}, 39 | }, 40 | "operator": []tokenPair{ 41 | {token.LBRACK, "["}, 42 | {token.LBRACE, "{"}, 43 | {token.COMMA, ","}, 44 | {token.PERIOD, "."}, 45 | {token.RBRACK, "]"}, 46 | {token.RBRACE, "}"}, 47 | {token.ASSIGN, "="}, 48 | {token.ADD, "+"}, 49 | {token.SUB, "-"}, 50 | }, 51 | "bool": []tokenPair{ 52 | {token.BOOL, "true"}, 53 | {token.BOOL, "false"}, 54 | }, 55 | "ident": []tokenPair{ 56 | {token.IDENT, "a"}, 57 | {token.IDENT, "a0"}, 58 | {token.IDENT, "foobar"}, 59 | {token.IDENT, "abc123"}, 60 | {token.IDENT, "LGTM"}, 61 | {token.IDENT, "_"}, 62 | {token.IDENT, "_abc123"}, 63 | {token.IDENT, "abc123_"}, 64 | {token.IDENT, "_abc_123_"}, 65 | {token.IDENT, "_äöü"}, 66 | {token.IDENT, "_本"}, 67 | {token.IDENT, "äöü"}, 68 | {token.IDENT, "本"}, 69 | {token.IDENT, "a۰۱۸"}, 70 | {token.IDENT, "foo६४"}, 71 | {token.IDENT, "bar9876"}, 72 | }, 73 | "string": []tokenPair{ 74 | {token.STRING, `" "`}, 75 | {token.STRING, `"a"`}, 76 | {token.STRING, `"本"`}, 77 | {token.STRING, `"\a"`}, 78 | {token.STRING, `"\b"`}, 79 | {token.STRING, `"\f"`}, 80 | {token.STRING, `"\n"`}, 81 | {token.STRING, `"\r"`}, 82 | {token.STRING, `"\t"`}, 83 | {token.STRING, `"\v"`}, 84 | {token.STRING, `"\""`}, 85 | {token.STRING, `"\000"`}, 86 | {token.STRING, `"\777"`}, 87 | {token.STRING, `"\x00"`}, 88 | {token.STRING, `"\xff"`}, 89 | {token.STRING, `"\u0000"`}, 90 | {token.STRING, `"\ufA16"`}, 91 | {token.STRING, `"\U00000000"`}, 92 | {token.STRING, `"\U0000ffAB"`}, 93 | {token.STRING, `"` + f100 + `"`}, 94 | }, 95 | "number": []tokenPair{ 96 | {token.NUMBER, "0"}, 97 | {token.NUMBER, "1"}, 98 | {token.NUMBER, "9"}, 99 | {token.NUMBER, "42"}, 100 | {token.NUMBER, "1234567890"}, 101 | {token.NUMBER, "00"}, 102 | {token.NUMBER, "01"}, 103 | {token.NUMBER, "07"}, 104 | {token.NUMBER, "042"}, 105 | {token.NUMBER, "01234567"}, 106 | {token.NUMBER, "0x0"}, 107 | {token.NUMBER, "0x1"}, 108 | {token.NUMBER, "0xf"}, 109 | {token.NUMBER, "0x42"}, 110 | {token.NUMBER, "0x123456789abcDEF"}, 111 | {token.NUMBER, "0x" + f100}, 112 | {token.NUMBER, "0X0"}, 113 | {token.NUMBER, "0X1"}, 114 | {token.NUMBER, "0XF"}, 115 | {token.NUMBER, "0X42"}, 116 | {token.NUMBER, "0X123456789abcDEF"}, 117 | {token.NUMBER, "0X" + f100}, 118 | {token.NUMBER, "0e0"}, 119 | {token.NUMBER, "1e0"}, 120 | {token.NUMBER, "42e0"}, 121 | {token.NUMBER, "01234567890e0"}, 122 | {token.NUMBER, "0E0"}, 123 | {token.NUMBER, "1E0"}, 124 | {token.NUMBER, "42E0"}, 125 | {token.NUMBER, "01234567890E0"}, 126 | {token.NUMBER, "0e+10"}, 127 | {token.NUMBER, "1e-10"}, 128 | {token.NUMBER, "42e+10"}, 129 | {token.NUMBER, "01234567890e-10"}, 130 | {token.NUMBER, "0E+10"}, 131 | {token.NUMBER, "1E-10"}, 132 | {token.NUMBER, "42E+10"}, 133 | {token.NUMBER, "01234567890E-10"}, 134 | {token.NUMBER, "-0"}, 135 | {token.NUMBER, "-1"}, 136 | {token.NUMBER, "-9"}, 137 | {token.NUMBER, "-42"}, 138 | {token.NUMBER, "-1234567890"}, 139 | {token.NUMBER, "-00"}, 140 | {token.NUMBER, "-01"}, 141 | {token.NUMBER, "-07"}, 142 | {token.NUMBER, "-042"}, 143 | {token.NUMBER, "-01234567"}, 144 | {token.NUMBER, "-0x0"}, 145 | {token.NUMBER, "-0x1"}, 146 | {token.NUMBER, "-0xf"}, 147 | {token.NUMBER, "-0x42"}, 148 | {token.NUMBER, "-0x123456789abcDEF"}, 149 | {token.NUMBER, "-0x" + f100}, 150 | {token.NUMBER, "-0X0"}, 151 | {token.NUMBER, "-0X1"}, 152 | {token.NUMBER, "-0XF"}, 153 | {token.NUMBER, "-0X42"}, 154 | {token.NUMBER, "-0X123456789abcDEF"}, 155 | {token.NUMBER, "-0X" + f100}, 156 | {token.NUMBER, "-0e0"}, 157 | {token.NUMBER, "-1e0"}, 158 | {token.NUMBER, "-42e0"}, 159 | {token.NUMBER, "-01234567890e0"}, 160 | {token.NUMBER, "-0E0"}, 161 | {token.NUMBER, "-1E0"}, 162 | {token.NUMBER, "-42E0"}, 163 | {token.NUMBER, "-01234567890E0"}, 164 | {token.NUMBER, "-0e+10"}, 165 | {token.NUMBER, "-1e-10"}, 166 | {token.NUMBER, "-42e+10"}, 167 | {token.NUMBER, "-01234567890e-10"}, 168 | {token.NUMBER, "-0E+10"}, 169 | {token.NUMBER, "-1E-10"}, 170 | {token.NUMBER, "-42E+10"}, 171 | {token.NUMBER, "-01234567890E-10"}, 172 | }, 173 | "float": []tokenPair{ 174 | {token.FLOAT, "0."}, 175 | {token.FLOAT, "1."}, 176 | {token.FLOAT, "42."}, 177 | {token.FLOAT, "01234567890."}, 178 | {token.FLOAT, ".0"}, 179 | {token.FLOAT, ".1"}, 180 | {token.FLOAT, ".42"}, 181 | {token.FLOAT, ".0123456789"}, 182 | {token.FLOAT, "0.0"}, 183 | {token.FLOAT, "1.0"}, 184 | {token.FLOAT, "42.0"}, 185 | {token.FLOAT, "01234567890.0"}, 186 | {token.FLOAT, "01.8e0"}, 187 | {token.FLOAT, "1.4e0"}, 188 | {token.FLOAT, "42.2e0"}, 189 | {token.FLOAT, "01234567890.12e0"}, 190 | {token.FLOAT, "0.E0"}, 191 | {token.FLOAT, "1.12E0"}, 192 | {token.FLOAT, "42.123E0"}, 193 | {token.FLOAT, "01234567890.213E0"}, 194 | {token.FLOAT, "0.2e+10"}, 195 | {token.FLOAT, "1.2e-10"}, 196 | {token.FLOAT, "42.54e+10"}, 197 | {token.FLOAT, "01234567890.98e-10"}, 198 | {token.FLOAT, "0.1E+10"}, 199 | {token.FLOAT, "1.1E-10"}, 200 | {token.FLOAT, "42.1E+10"}, 201 | {token.FLOAT, "01234567890.1E-10"}, 202 | {token.FLOAT, "-0.0"}, 203 | {token.FLOAT, "-1.0"}, 204 | {token.FLOAT, "-42.0"}, 205 | {token.FLOAT, "-01234567890.0"}, 206 | {token.FLOAT, "-01.8e0"}, 207 | {token.FLOAT, "-1.4e0"}, 208 | {token.FLOAT, "-42.2e0"}, 209 | {token.FLOAT, "-01234567890.12e0"}, 210 | {token.FLOAT, "-0.E0"}, 211 | {token.FLOAT, "-1.12E0"}, 212 | {token.FLOAT, "-42.123E0"}, 213 | {token.FLOAT, "-01234567890.213E0"}, 214 | {token.FLOAT, "-0.2e+10"}, 215 | {token.FLOAT, "-1.2e-10"}, 216 | {token.FLOAT, "-42.54e+10"}, 217 | {token.FLOAT, "-01234567890.98e-10"}, 218 | {token.FLOAT, "-0.1E+10"}, 219 | {token.FLOAT, "-1.1E-10"}, 220 | {token.FLOAT, "-42.1E+10"}, 221 | {token.FLOAT, "-01234567890.1E-10"}, 222 | }, 223 | } 224 | 225 | var orderedTokenLists = []string{ 226 | "comment", 227 | "operator", 228 | "bool", 229 | "ident", 230 | "string", 231 | "number", 232 | "float", 233 | } 234 | 235 | func TestPosition(t *testing.T) { 236 | // create artifical source code 237 | buf := new(bytes.Buffer) 238 | 239 | for _, listName := range orderedTokenLists { 240 | for _, ident := range tokenLists[listName] { 241 | fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text) 242 | } 243 | } 244 | 245 | s := New(buf.Bytes()) 246 | 247 | pos := token.Pos{"", 4, 1, 5} 248 | s.Scan() 249 | for _, listName := range orderedTokenLists { 250 | 251 | for _, k := range tokenLists[listName] { 252 | curPos := s.tokPos 253 | // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column) 254 | 255 | if curPos.Offset != pos.Offset { 256 | t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text) 257 | } 258 | if curPos.Line != pos.Line { 259 | t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text) 260 | } 261 | if curPos.Column != pos.Column { 262 | t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text) 263 | } 264 | pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline 265 | pos.Line += countNewlines(k.text) + 1 // each token is on a new line 266 | s.Scan() 267 | } 268 | } 269 | // make sure there were no token-internal errors reported by scanner 270 | if s.ErrorCount != 0 { 271 | t.Errorf("%d errors", s.ErrorCount) 272 | } 273 | } 274 | 275 | func TestComment(t *testing.T) { 276 | testTokenList(t, tokenLists["comment"]) 277 | } 278 | 279 | func TestOperator(t *testing.T) { 280 | testTokenList(t, tokenLists["operator"]) 281 | } 282 | 283 | func TestBool(t *testing.T) { 284 | testTokenList(t, tokenLists["bool"]) 285 | } 286 | 287 | func TestIdent(t *testing.T) { 288 | testTokenList(t, tokenLists["ident"]) 289 | } 290 | 291 | func TestString(t *testing.T) { 292 | testTokenList(t, tokenLists["string"]) 293 | } 294 | 295 | func TestNumber(t *testing.T) { 296 | testTokenList(t, tokenLists["number"]) 297 | } 298 | 299 | func TestFloat(t *testing.T) { 300 | testTokenList(t, tokenLists["float"]) 301 | } 302 | 303 | func TestRealExample(t *testing.T) { 304 | complexHCL := `// This comes from Terraform, as a test 305 | variable "foo" { 306 | default = "bar" 307 | description = "bar" 308 | } 309 | 310 | provider "aws" { 311 | access_key = "foo" 312 | secret_key = "bar" 313 | } 314 | 315 | resource "aws_security_group" "firewall" { 316 | count = 5 317 | } 318 | 319 | resource aws_instance "web" { 320 | ami = "${var.foo}" 321 | security_groups = [ 322 | "foo", 323 | "${aws_security_group.firewall.foo}" 324 | ] 325 | 326 | network_interface { 327 | device_index = 0 328 | description = "Main network interface" 329 | } 330 | }` 331 | 332 | literals := []struct { 333 | tokenType token.Type 334 | literal string 335 | }{ 336 | {token.COMMENT, `// This comes from Terraform, as a test`}, 337 | {token.IDENT, `variable`}, 338 | {token.STRING, `"foo"`}, 339 | {token.LBRACE, `{`}, 340 | {token.IDENT, `default`}, 341 | {token.ASSIGN, `=`}, 342 | {token.STRING, `"bar"`}, 343 | {token.IDENT, `description`}, 344 | {token.ASSIGN, `=`}, 345 | {token.STRING, `"bar"`}, 346 | {token.RBRACE, `}`}, 347 | {token.IDENT, `provider`}, 348 | {token.STRING, `"aws"`}, 349 | {token.LBRACE, `{`}, 350 | {token.IDENT, `access_key`}, 351 | {token.ASSIGN, `=`}, 352 | {token.STRING, `"foo"`}, 353 | {token.IDENT, `secret_key`}, 354 | {token.ASSIGN, `=`}, 355 | {token.STRING, `"bar"`}, 356 | {token.RBRACE, `}`}, 357 | {token.IDENT, `resource`}, 358 | {token.STRING, `"aws_security_group"`}, 359 | {token.STRING, `"firewall"`}, 360 | {token.LBRACE, `{`}, 361 | {token.IDENT, `count`}, 362 | {token.ASSIGN, `=`}, 363 | {token.NUMBER, `5`}, 364 | {token.RBRACE, `}`}, 365 | {token.IDENT, `resource`}, 366 | {token.IDENT, `aws_instance`}, 367 | {token.STRING, `"web"`}, 368 | {token.LBRACE, `{`}, 369 | {token.IDENT, `ami`}, 370 | {token.ASSIGN, `=`}, 371 | {token.STRING, `"${var.foo}"`}, 372 | {token.IDENT, `security_groups`}, 373 | {token.ASSIGN, `=`}, 374 | {token.LBRACK, `[`}, 375 | {token.STRING, `"foo"`}, 376 | {token.COMMA, `,`}, 377 | {token.STRING, `"${aws_security_group.firewall.foo}"`}, 378 | {token.RBRACK, `]`}, 379 | {token.IDENT, `network_interface`}, 380 | {token.LBRACE, `{`}, 381 | {token.IDENT, `device_index`}, 382 | {token.ASSIGN, `=`}, 383 | {token.NUMBER, `0`}, 384 | {token.IDENT, `description`}, 385 | {token.ASSIGN, `=`}, 386 | {token.STRING, `"Main network interface"`}, 387 | {token.RBRACE, `}`}, 388 | {token.RBRACE, `}`}, 389 | {token.EOF, ``}, 390 | } 391 | 392 | s := New([]byte(complexHCL)) 393 | for _, l := range literals { 394 | tok := s.Scan() 395 | if l.tokenType != tok.Type { 396 | t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String()) 397 | } 398 | 399 | if l.literal != tok.Text { 400 | t.Errorf("got: %s want %s\n", tok, l.literal) 401 | } 402 | } 403 | 404 | } 405 | 406 | func TestError(t *testing.T) { 407 | testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) 408 | testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL) 409 | 410 | testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", token.IDENT) 411 | testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", token.IDENT) 412 | 413 | testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING) 414 | testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING) 415 | 416 | testError(t, `01238`, "1:6", "illegal octal number", token.NUMBER) 417 | testError(t, `01238123`, "1:9", "illegal octal number", token.NUMBER) 418 | testError(t, `0x`, "1:3", "illegal hexadecimal number", token.NUMBER) 419 | testError(t, `0xg`, "1:3", "illegal hexadecimal number", token.NUMBER) 420 | testError(t, `'aa'`, "1:1", "illegal char", token.ILLEGAL) 421 | 422 | testError(t, `"`, "1:2", "literal not terminated", token.STRING) 423 | testError(t, `"abc`, "1:5", "literal not terminated", token.STRING) 424 | testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING) 425 | testError(t, `/*/`, "1:4", "comment not terminated", token.COMMENT) 426 | } 427 | 428 | func testError(t *testing.T, src, pos, msg string, tok token.Type) { 429 | s := New([]byte(src)) 430 | 431 | errorCalled := false 432 | s.Error = func(p token.Pos, m string) { 433 | if !errorCalled { 434 | if pos != p.String() { 435 | t.Errorf("pos = %q, want %q for %q", p, pos, src) 436 | } 437 | 438 | if m != msg { 439 | t.Errorf("msg = %q, want %q for %q", m, msg, src) 440 | } 441 | errorCalled = true 442 | } 443 | } 444 | 445 | tk := s.Scan() 446 | if tk.Type != tok { 447 | t.Errorf("tok = %s, want %s for %q", tk, tok, src) 448 | } 449 | if !errorCalled { 450 | t.Errorf("error handler not called for %q", src) 451 | } 452 | if s.ErrorCount == 0 { 453 | t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src) 454 | } 455 | } 456 | 457 | func testTokenList(t *testing.T, tokenList []tokenPair) { 458 | // create artifical source code 459 | buf := new(bytes.Buffer) 460 | for _, ident := range tokenList { 461 | fmt.Fprintf(buf, "%s\n", ident.text) 462 | } 463 | 464 | s := New(buf.Bytes()) 465 | for _, ident := range tokenList { 466 | tok := s.Scan() 467 | if tok.Type != ident.tok { 468 | t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text) 469 | } 470 | 471 | if tok.Text != ident.text { 472 | t.Errorf("text = %q want %q", tok.String(), ident.text) 473 | } 474 | 475 | } 476 | } 477 | 478 | func countNewlines(s string) int { 479 | n := 0 480 | for _, ch := range s { 481 | if ch == '\n' { 482 | n++ 483 | } 484 | } 485 | return n 486 | } 487 | -------------------------------------------------------------------------------- /token/position.go: -------------------------------------------------------------------------------- 1 | package token 2 | 3 | import "fmt" 4 | 5 | // Pos describes an arbitrary source position 6 | // including the file, line, and column location. 7 | // A Position is valid if the line number is > 0. 8 | type Pos struct { 9 | Filename string // filename, if any 10 | Offset int // offset, starting at 0 11 | Line int // line number, starting at 1 12 | Column int // column number, starting at 1 (character count) 13 | } 14 | 15 | // IsValid returns true if the position is valid. 16 | func (p *Pos) IsValid() bool { return p.Line > 0 } 17 | 18 | // String returns a string in one of several forms: 19 | // 20 | // file:line:column valid position with file name 21 | // line:column valid position without file name 22 | // file invalid position with file name 23 | // - invalid position without file name 24 | func (p Pos) String() string { 25 | s := p.Filename 26 | if p.IsValid() { 27 | if s != "" { 28 | s += ":" 29 | } 30 | s += fmt.Sprintf("%d:%d", p.Line, p.Column) 31 | } 32 | if s == "" { 33 | s = "-" 34 | } 35 | return s 36 | } 37 | 38 | // Before reports whether the position p is before u. 39 | func (p Pos) Before(u Pos) bool { 40 | return u.Offset > p.Offset || u.Line > p.Line 41 | } 42 | 43 | // After reports whether the position p is after u. 44 | func (p Pos) After(u Pos) bool { 45 | return u.Offset < p.Offset || u.Line < p.Line 46 | } 47 | -------------------------------------------------------------------------------- /token/token.go: -------------------------------------------------------------------------------- 1 | // Package token defines constants representing the lexical tokens for HCL 2 | // (HashiCorp Configuration Language) 3 | package token 4 | 5 | import ( 6 | "fmt" 7 | "strconv" 8 | ) 9 | 10 | // Token defines a single HCL token which can be obtained via the Scanner 11 | type Token struct { 12 | Type Type 13 | Pos Pos 14 | Text string 15 | } 16 | 17 | // Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language) 18 | type Type int 19 | 20 | const ( 21 | // Special tokens 22 | ILLEGAL Type = iota 23 | EOF 24 | COMMENT 25 | 26 | identifier_beg 27 | IDENT // literals 28 | literal_beg 29 | NUMBER // 12345 30 | FLOAT // 123.45 31 | BOOL // true,false 32 | STRING // "abc" 33 | literal_end 34 | identifier_end 35 | 36 | operator_beg 37 | LBRACK // [ 38 | LBRACE // { 39 | COMMA // , 40 | PERIOD // . 41 | 42 | RBRACK // ] 43 | RBRACE // } 44 | 45 | ASSIGN // = 46 | ADD // + 47 | SUB // - 48 | operator_end 49 | ) 50 | 51 | var tokens = [...]string{ 52 | ILLEGAL: "ILLEGAL", 53 | 54 | EOF: "EOF", 55 | COMMENT: "COMMENT", 56 | 57 | IDENT: "IDENT", 58 | NUMBER: "NUMBER", 59 | FLOAT: "FLOAT", 60 | BOOL: "BOOL", 61 | STRING: "STRING", 62 | 63 | LBRACK: "LBRACK", 64 | LBRACE: "LBRACE", 65 | COMMA: "COMMA", 66 | PERIOD: "PERIOD", 67 | 68 | RBRACK: "RBRACK", 69 | RBRACE: "RBRACE", 70 | 71 | ASSIGN: "ASSIGN", 72 | ADD: "ADD", 73 | SUB: "SUB", 74 | } 75 | 76 | // String returns the string corresponding to the token tok. 77 | func (t Type) String() string { 78 | s := "" 79 | if 0 <= t && t < Type(len(tokens)) { 80 | s = tokens[t] 81 | } 82 | if s == "" { 83 | s = "token(" + strconv.Itoa(int(t)) + ")" 84 | } 85 | return s 86 | } 87 | 88 | // IsIdentifier returns true for tokens corresponding to identifiers and basic 89 | // type literals; it returns false otherwise. 90 | func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end } 91 | 92 | // IsLiteral returns true for tokens corresponding to basic type literals; it 93 | // returns false otherwise. 94 | func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end } 95 | 96 | // IsOperator returns true for tokens corresponding to operators and 97 | // delimiters; it returns false otherwise. 98 | func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end } 99 | 100 | // String returns the token's literal text. Note that this is only 101 | // applicable for certain token types, such as token.IDENT, 102 | // token.STRING, etc.. 103 | func (t Token) String() string { 104 | return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text) 105 | } 106 | -------------------------------------------------------------------------------- /token/token_test.go: -------------------------------------------------------------------------------- 1 | package token 2 | 3 | import "testing" 4 | 5 | func TestTypeString(t *testing.T) { 6 | var tokens = []struct { 7 | tt Type 8 | str string 9 | }{ 10 | {ILLEGAL, "ILLEGAL"}, 11 | {EOF, "EOF"}, 12 | {COMMENT, "COMMENT"}, 13 | {IDENT, "IDENT"}, 14 | {NUMBER, "NUMBER"}, 15 | {FLOAT, "FLOAT"}, 16 | {BOOL, "BOOL"}, 17 | {STRING, "STRING"}, 18 | {LBRACK, "LBRACK"}, 19 | {LBRACE, "LBRACE"}, 20 | {COMMA, "COMMA"}, 21 | {PERIOD, "PERIOD"}, 22 | {RBRACK, "RBRACK"}, 23 | {RBRACE, "RBRACE"}, 24 | {ASSIGN, "ASSIGN"}, 25 | {ADD, "ADD"}, 26 | {SUB, "SUB"}, 27 | } 28 | 29 | for _, token := range tokens { 30 | if token.tt.String() != token.str { 31 | t.Errorf("want: %q got:%q\n", token.str, token.tt) 32 | 33 | } 34 | } 35 | 36 | } 37 | --------------------------------------------------------------------------------