├── AUTHORS ├── LICENSE ├── README.md ├── byhand.go ├── decode.go ├── decode_test.go ├── errors_test.go ├── gopp.go ├── gopp.gopp ├── literals_test.go ├── parse.go ├── parse_test.go ├── parsemath_test.go ├── parseself_test.go ├── tokenize.go ├── tokenizer_test.go └── util.go /AUTHORS: -------------------------------------------------------------------------------- 1 | John Asmuth 2 | Branden J Brown 3 | Ian Remmler 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 the AUTHORS. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * The names in AUTHORs may not be used to endorse or promote 10 | products derived from this software without specific prior written 11 | permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 14 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 16 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 17 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gopp 2 | ==== 3 | 4 | A GO Parser Parser. 5 | 6 | Pronounced 'gahp', rather than 'go pee pee'. 7 | 8 | gopp is a library that takes a grammar, specified in .gopp format, a document, and an object, parses the document using the grammar, and decodes the resulting tree into the provided object. 9 | 10 | .gopp is a BNF-like format for describing context-free grammars. 11 | 12 | This README does not attempt to describe the use and purpose of context-free grammars - see google for more information about grammars and recursive descent parsing. Or try http://blog.reverberate.org/2013/07/ll-and-lr-parsing-demystified.html. 13 | 14 | Example 15 | ------- 16 | 17 | The following grammar can be used to parse simple arithmetic equations. 18 | 19 | ``` 20 | Eqn => {type=MathEqn} {field=Left} <> '=' {field=Right} <> '\n' 21 | Expr => {type=MathSum} {field=First} <> '+' {field=Second} <> 22 | Expr => 23 | Term => {type=MathProduct} {field=First} <> '*' {field=Second} <> 24 | Term => 25 | Factor => {type=MathExprFactor} '(' {field=Expr} <> ')' 26 | Factor => {type=MathNumberFactor} {field=Number} 27 | number = /(\d+)/ 28 | ``` 29 | 30 | A grammar is made up of rules ```<>```, inline rules ``````, literals ```'string'```, and tags ```{tag}```. 31 | 32 | When parsing a document, a rule creates a new subtree as a child of the current tree, and an inline rule creates a new tree and adds its children to the current tree (the difference between ```[1,2,3,[a,b,c]]``` and ```[1,2,3,a,b,c]```). 33 | 34 | Literals are strings that must appear exactly in the document text. To have other kinds of text matched, a .gopp also defines a set of symbols using regular expressions, and they are brought into the main tree by using inline rules. 35 | 36 | Tags are elements that are put into the AST if their rule can be parsed. They do not match anything in the actual document text, but they can be used to provide information about the tree structure. For things to be decoded into objects, the "type=" and "field=" tags are used. A "type=" tag tells the decoder what type to allocate in the case that the field or slice element being decoded into is an interface without concrete type. A "field=" tag tells the decoder that, if the current object is a struct, the subtree in the next element is decoded into the field with the given name. Tags can be anything, and can be seen if the AST is accessed directly before decoding. 37 | 38 | The grammar above can be used to decode documents into objects of type MathEqn, with the following types defined. 39 | 40 | ``` 41 | type MathEqn struct { 42 | Left, Right interface{} 43 | } 44 | 45 | type MathSum struct { 46 | First, Second interface{} 47 | } 48 | 49 | type MathProduct struct { 50 | First, Second interface{} 51 | } 52 | 53 | type MathExprFactor struct { 54 | Expr 55 | } 56 | 57 | type MathNumberFactor struct { 58 | Number string 59 | } 60 | ``` 61 | 62 | So, the document "5+1=6" would get the AST 63 | 64 | ``` 65 | AST{ 66 | Tag("type=MathEqn"), 67 | Tag("field=Left"), 68 | []Node{ 69 | Tag("type=MathSum"), 70 | Tag("field=First"), 71 | []Node{ 72 | Tag("type=MathNumberFactor"), 73 | Tag("field=Number"), 74 | return SymbolText{ 75 | Type: "number", 76 | Text: "5", 77 | } 78 | }, 79 | Literal("+"), 80 | Tag("field=Second"), 81 | []Node{ 82 | Tag("type=MathNumberFactor"), 83 | Tag("field=Number"), 84 | return SymbolText{ 85 | Type: "number", 86 | Text: "1", 87 | } 88 | }, 89 | }, 90 | Literal("=") 91 | Tag("field=Right"), 92 | []Node{ 93 | Tag("type=MathNumberFactor"), 94 | Tag("field=Number"), 95 | return SymbolText{ 96 | Type: "number", 97 | Text: "6", 98 | } 99 | }, 100 | Literal("\n"), 101 | } 102 | ``` 103 | 104 | and the object 105 | 106 | ``` 107 | MathEqn{ 108 | Left:MathSum{ 109 | First:MathNumberFactor{"5"}, 110 | Second:MathNumberFactor{"1"}, 111 | }, 112 | Right: MathNumberFactor{"6"}, 113 | } 114 | ``` 115 | 116 | Clearly, the object is a more reasonable representation than the AST for actually dealing with in your code, which is why the decoding step was created. 117 | 118 | Grammar 119 | ------- 120 | 121 | The following .gopp grammar describes .gopp grammars, and how to decode them into gopp.Grammar objects. 122 | 123 | ``` 124 | # The first things are lex steps, which are for use by the tokenizer. 125 | # Currently the only recognized lex step is stuff to ignore. 126 | 127 | # We ignore comments, 128 | ignore: /^#.*\n/ 129 | # and whitespace that preceeds something more interesting. 130 | ignore: /^(?:[ \t])+/ 131 | 132 | # After the lex steps are the rules. 133 | # The fact that Grammar is first is irrelevant. The name of the starting rule 134 | # needs to be provided in code. 135 | # A Grammar is made up of lists of LexSteps, Rules, and Symbols, in that order, 136 | # and there may be zero LexSteps or Symbols. There must be at least one Rule. 137 | Grammar => {type=Grammar} '\n'* {field=LexSteps} <>* {field=Rules} <>+ {field=Symbols} <>* 138 | 139 | # The next three rules define the major types of elements in a grammar. 140 | 141 | # A LexStep is an identifier, a literal ':', and a regexp pattern. If the name 142 | # is 'ignore', then when the lexer goes to get the next token, it will try to 143 | # trim the remaining document using the provided pattern. No other names are 144 | # used, currently. 145 | LexStep => {field=Name} ':' {field=Pattern} '\n'+ 146 | 147 | # A Rule is an identifier, a literal '=>', an Expr, and ends with one or more 148 | # newlines. 149 | Rule => {field=Name} '=>' {field=Expr} '\n'+ 150 | # A Symbol is an identifier, a literal '=', a regexp, and ends with one or more 151 | # newlines. 152 | Symbol => {field=Name} '=' {field=Pattern} '\n'+ 153 | 154 | # An Expr is one or more Terms. 155 | Expr => <>+ 156 | 157 | # A Term can be a Term1, 158 | Term => 159 | # or a Term2. 160 | Term => 161 | 162 | # A Term1 can be a Term2 followed by a literal '*', 163 | Term1 => {type=RepeatZeroTerm} {field=Term} <> '*' 164 | # or a Term2 followd by a literal '+'. 165 | Term1 => {type=RepeatOneTerm} {field=Term} <> '+' 166 | 167 | # A Term2 can be an Expr surrounded by '[' and ']', 168 | Term2 => {type=OptionalTerm} '[' {field=Expr} ']' 169 | # or by '(' and ')', 170 | Term2 => {type=GroupTerm} '(' {field=Expr} ')' 171 | # or an identifier surrounded by '<<' and '>>', 172 | Term2 => {type=RuleTerm} '<<' {field=Name} '>>' 173 | # or by '<' and '>', 174 | Term2 => {type=InlineRuleTerm} '<' {field=Name} '>' 175 | # or a tag, 176 | Term2 => {type=TagTerm} {field=Tag} 177 | # or a literal. 178 | Term2 => {type=LiteralTerm} {field=Literal} 179 | 180 | # And last is the symbols, which are regular expressions that can be found in 181 | # the document. Their order is important - it indicates the order in which the 182 | # tokenizer attempts to match them against the rest of the document. So, if two 183 | # symbols could be used starting at the same point in the document, the one 184 | # that is listed first will win. 185 | identifier = /([a-zA-Z][a-zA-Z0-9_]*)/ 186 | literal = /'((?:[\\']|[^'])+?)'/ 187 | tag = /\{((?:[\\']|[^'])+?)\}/ 188 | regexp = /\/((?:\\/|[^\n])+?)\// 189 | 190 | ``` 191 | 192 | 193 | The ```<>``` and `````` indicate recursively evaluated rules and inline rules. A rule will create an AST subtree in its parent. An inline rule will expand its children into its parent, rather than creating a new subtree. In otherword, if the child evaluates to [1,2,3], if that child were from a rule, the parent that already had [a,b,c] would become [a,b,c,[1,2,3]] when adding that child. For an inline rule, that same parent becomes [a,b,c,1,2,3]. This inlining is useful for keeping trees compact and easy to work with. 194 | 195 | Anything within a '[' and ']' is optional: if it cannot be parsed, the parent rule may still successfully parse without the optional component. 196 | 197 | Anything within a '(' and ')' is grouped, and external operators (like '*' or '+', or a forthcoming '|') apply to the group as a whole. 198 | 199 | The '*' and '+' operators indicate that the rule should be applied as many times as possible, with the '+' requiring at least one successful application for the '+' to succeed. 200 | 201 | A tag inserts a gopp.Tag into the tree when evaluated, and is always evaluated successfully when reached. This element is useful for inserting information into the tree that can be looked at by a post-processor. gopp itself makes use of several tags to help it decode into objects, described in the decoding section. 202 | 203 | Decoding 204 | -------- 205 | 206 | A parsed tree is decoded into an object. 207 | 208 | If that object is a slice and the tree is also a slice, each element of the tree-slice is decoded into a new element for the object slice. 209 | 210 | If that object is a struct, a tag of the form "{field=X}" indicates that the subsequent tree element should be decoded into the object's .X field. As a special case, "{field=.}" will apply the subsequent tree element to the current object. 211 | 212 | If a field or a slice element is an interface type, the tree needs to have a tag of the form "{type=T}", indicating that the type T should be used to allocate the element for decoding. T must have been registered before-hand. 213 | -------------------------------------------------------------------------------- /byhand.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | var ByHandGrammar = Grammar{ 8 | LexSteps: []LexStep{ 9 | LexStep{ 10 | Name: "ignore", 11 | Pattern: `^#.*\n`, 12 | }, 13 | LexStep{ 14 | Name: "ignore", 15 | Pattern: `^(?:[ \t])+`, 16 | }, 17 | }, 18 | Rules: []Rule{ 19 | Rule{ // Grammar => {field=Rules} <>+ {field=Symbols} <>* 20 | Name: "Grammar", 21 | Expr: Expr{ // '\n'* {field=Rules} <>+ {field=Symbols} <>* 22 | TagTerm{Tag: "type=Grammar"}, 23 | RepeatZeroTerm{ 24 | LiteralTerm{Literal: "\n"}, 25 | }, 26 | TagTerm{Tag: "field=LexSteps"}, 27 | RepeatZeroTerm{ 28 | RuleTerm{Name: "LexStep"}, 29 | }, 30 | TagTerm{Tag: "field=Rules"}, 31 | RepeatOneTerm{ 32 | RuleTerm{Name: "Rule"}, 33 | }, 34 | TagTerm{Tag: "field=Symbols"}, 35 | RepeatZeroTerm{ 36 | RuleTerm{Name: "Symbol"}, 37 | }, 38 | }, 39 | }, 40 | Rule{ // Symbol => {field=Name} '=' {field=Pattern} '\n'+ 41 | Name: "LexStep", 42 | Expr: Expr{ 43 | TagTerm{Tag: "field=Name"}, 44 | InlineRuleTerm{Name: "identifier"}, 45 | LiteralTerm{Literal: ":"}, 46 | TagTerm{Tag: "field=Pattern"}, 47 | InlineRuleTerm{Name: "regexp"}, 48 | RepeatOneTerm{ 49 | LiteralTerm{Literal: "\n"}, 50 | }, 51 | }, 52 | }, 53 | Rule{ // Rule => {field=Name} '=>' {field=Expr} '\n'+ 54 | Name: "Rule", 55 | Expr: Expr{ 56 | TagTerm{Tag: "field=Name"}, 57 | InlineRuleTerm{Name: "identifier"}, 58 | LiteralTerm{Literal: "=>"}, 59 | TagTerm{Tag: "field=Expr"}, 60 | InlineRuleTerm{Name: "Expr"}, 61 | RepeatOneTerm{ 62 | LiteralTerm{Literal: "\n"}, 63 | }, 64 | }, 65 | }, 66 | Rule{ // Symbol => {field=Name} '=' {field=Pattern} '\n'+ 67 | Name: "Symbol", 68 | Expr: Expr{ 69 | TagTerm{Tag: "field=Name"}, 70 | InlineRuleTerm{Name: "identifier"}, 71 | LiteralTerm{Literal: "="}, 72 | TagTerm{Tag: "field=Pattern"}, 73 | InlineRuleTerm{Name: "regexp"}, 74 | RepeatOneTerm{ 75 | LiteralTerm{Literal: "\n"}, 76 | }, 77 | }, 78 | }, 79 | Rule{ // Expr => <>+ 80 | Name: "Expr", 81 | Expr: Expr{ 82 | RepeatOneTerm{ 83 | RuleTerm{Name: "Term"}, 84 | }, 85 | }, 86 | }, 87 | Rule{ // Term => Term1 88 | Name: "Term", 89 | Expr: Expr{ 90 | InlineRuleTerm{Name: "Term1"}, 91 | }, 92 | }, 93 | Rule{ // Term => Term2 94 | Name: "Term", 95 | Expr: Expr{ 96 | InlineRuleTerm{Name: "Term2"}, 97 | }, 98 | }, 99 | Rule{ // Term => {type=RepeatZeroTerm} {field=Term} <> '*' 100 | Name: "Term1", 101 | Expr: Expr{ 102 | TagTerm{Tag: "type=RepeatZeroTerm"}, 103 | TagTerm{Tag: "field=Term"}, 104 | RuleTerm{Name: "Term2"}, 105 | LiteralTerm{Literal: "*"}, 106 | }, 107 | }, 108 | Rule{ // Term => {type=RepeatOneTerm} {field=Term} <> '+' 109 | Name: "Term1", 110 | Expr: Expr{ 111 | TagTerm{Tag: "type=RepeatOneTerm"}, 112 | TagTerm{Tag: "field=Term"}, 113 | RuleTerm{Name: "Term2"}, 114 | LiteralTerm{Literal: "+"}, 115 | }, 116 | }, 117 | Rule{ // Term => {type=OptionalTerm} '[' {field=Expr} <> ']' 118 | Name: "Term2", 119 | Expr: Expr{ 120 | TagTerm{Tag: "type=OptionalTerm"}, 121 | LiteralTerm{Literal: "["}, 122 | TagTerm{Tag: "field=Expr"}, 123 | InlineRuleTerm{Name: "Expr"}, 124 | LiteralTerm{Literal: "]"}, 125 | }, 126 | }, 127 | Rule{ // Term => {type=GroupTerm} '(' {field=Expr} <> ')' 128 | Name: "Term2", 129 | Expr: Expr{ 130 | TagTerm{Tag: "type=GroupTerm"}, 131 | LiteralTerm{Literal: "("}, 132 | TagTerm{Tag: "field=Expr"}, 133 | InlineRuleTerm{Name: "Expr"}, 134 | LiteralTerm{Literal: ")"}, 135 | }, 136 | }, 137 | Rule{ // Term => {type=RuleTerm} '<<' {field=Name} '>>' 138 | Name: "Term2", 139 | Expr: Expr{ 140 | TagTerm{Tag: "type=RuleTerm"}, 141 | LiteralTerm{Literal: "<<"}, 142 | TagTerm{Tag: "field=Name"}, 143 | InlineRuleTerm{Name: "identifier"}, 144 | LiteralTerm{Literal: ">>"}, 145 | }, 146 | }, 147 | Rule{ // Term => {type=InlineRuleTerm} '<' {field=Name} '>' 148 | Name: "Term2", 149 | Expr: Expr{ 150 | TagTerm{Tag: "type=InlineRuleTerm"}, 151 | LiteralTerm{Literal: "<"}, 152 | TagTerm{Tag: "field=Name"}, 153 | InlineRuleTerm{Name: "identifier"}, 154 | LiteralTerm{Literal: ">"}, 155 | }, 156 | }, 157 | Rule{ // Term => {type=TagTerm} {field=Tag} 158 | Name: "Term2", 159 | Expr: Expr{ 160 | TagTerm{Tag: "type=TagTerm"}, 161 | TagTerm{Tag: "field=Tag"}, 162 | InlineRuleTerm{Name: "tag"}, 163 | }, 164 | }, 165 | Rule{ // Term => {type=LiteralTerm} {field=Literal} 166 | Name: "Term2", 167 | Expr: Expr{ 168 | TagTerm{Tag: "type=LiteralTerm"}, 169 | TagTerm{Tag: "field=Literal"}, 170 | InlineRuleTerm{Name: "literal"}, 171 | }, 172 | }, 173 | }, 174 | Symbols: []Symbol{ 175 | Symbol{ 176 | Name: "identifier", 177 | Pattern: `([a-zA-Z][a-zA-Z0-9_]*)`, 178 | }, 179 | Symbol{ 180 | Name: "literal", 181 | Pattern: `'((?:[\\']|[^'])+?)'`, 182 | }, 183 | Symbol{ 184 | Name: "tag", 185 | Pattern: `\{((?:[\\']|[^'])+?)\}`, 186 | }, 187 | Symbol{ 188 | Name: "regexp", 189 | Pattern: `\/((?:\\/|[^\n])+?)\/`, 190 | }, 191 | }, 192 | } 193 | 194 | func mki(text string) SymbolText { 195 | return SymbolText{ 196 | Type: "identifier", 197 | Text: text, 198 | } 199 | } 200 | 201 | func mkr(text string) SymbolText { 202 | return SymbolText{ 203 | Type: "regexp", 204 | Text: text, 205 | } 206 | } 207 | 208 | func mkt(text string) SymbolText { 209 | return SymbolText{ 210 | Type: "tag", 211 | Text: text, 212 | } 213 | } 214 | 215 | func mkl(text string) SymbolText { 216 | return SymbolText{ 217 | Type: "literal", 218 | Text: escapeString(text), 219 | } 220 | } 221 | 222 | func mkGrammar(lexsteps, rules, symbols []Node) AST { 223 | return []Node{ 224 | Tag("type=Grammar"), 225 | []Node{ 226 | Literal("\n"), 227 | }, 228 | Tag("field=LexSteps"), 229 | lexsteps, 230 | Tag("field=Rules"), 231 | rules, 232 | Tag("field=Symbols"), 233 | symbols, 234 | } 235 | } 236 | 237 | func mkRule(name string, nodes ...Node) []Node { 238 | return []Node{ 239 | Tag("field=Name"), 240 | mki(name), 241 | Literal("=>"), 242 | Tag("field=Expr"), 243 | mkExpr(nodes...), 244 | []Node{ 245 | Literal("\n"), 246 | }, 247 | } 248 | return nodes 249 | } 250 | 251 | func mkLexStep(name, pattern string) []Node { 252 | return []Node{ 253 | Tag("field=Name"), 254 | mki(name), 255 | Literal(":"), 256 | Tag("field=Pattern"), 257 | mkr(pattern), 258 | []Node{ 259 | Literal("\n"), 260 | }, 261 | } 262 | } 263 | 264 | func mkSymbol(name, pattern string) []Node { 265 | return []Node{ 266 | Tag("field=Name"), 267 | mki(name), 268 | Literal("="), 269 | Tag("field=Pattern"), 270 | mkr(pattern), 271 | []Node{ 272 | Literal("\n"), 273 | }, 274 | } 275 | } 276 | 277 | func mkExpr(nodes ...Node) []Node { 278 | return nodes 279 | } 280 | func mkRepeatZeroTerm(node Node) []Node { 281 | return []Node{ 282 | Tag("type=RepeatZeroTerm"), 283 | Tag("field=Term"), 284 | node, 285 | Literal("*"), 286 | } 287 | } 288 | 289 | func mkRepeatOneTerm(node Node) []Node { 290 | return []Node{ 291 | Tag("type=RepeatOneTerm"), 292 | Tag("field=Term"), 293 | node, 294 | Literal("+"), 295 | } 296 | } 297 | 298 | func mkOptionalTerm(node Node) []Node { 299 | return []Node{ 300 | Tag("type=OptionalTerm"), 301 | Literal("["), 302 | Tag("field=Expr"), 303 | mkExpr(node), 304 | Literal("]"), 305 | } 306 | } 307 | 308 | func mkRuleTerm(text string) []Node { 309 | return []Node{ 310 | Tag("type=RuleTerm"), 311 | Literal("<<"), 312 | Tag("field=Name"), 313 | mki(text), 314 | Literal(">>"), 315 | } 316 | } 317 | 318 | func mkInlineRuleTerm(text string) []Node { 319 | return []Node{ 320 | Tag("type=InlineRuleTerm"), 321 | Literal("<"), 322 | Tag("field=Name"), 323 | mki(text), 324 | Literal(">"), 325 | } 326 | } 327 | 328 | func mkTagTerm(text string) []Node { 329 | return []Node{ 330 | Tag("type=TagTerm"), 331 | Tag("field=Tag"), 332 | mkt(text), 333 | } 334 | } 335 | 336 | func mkLiteralTerm(text string) []Node { 337 | return []Node{ 338 | Tag("type=LiteralTerm"), 339 | Tag("field=Literal"), 340 | mkl(text), 341 | } 342 | } 343 | 344 | var ByHandGoppAST = mkGrammar( 345 | []Node{ 346 | mkLexStep("ignore", `^#.*\n`), 347 | mkLexStep("ignore", `^(?:[ \t])+`), 348 | }, 349 | []Node{ 350 | mkRule("Grammar", 351 | mkTagTerm("type=Grammar"), 352 | mkRepeatZeroTerm(mkLiteralTerm("\n")), 353 | mkTagTerm("field=LexSteps"), 354 | mkRepeatZeroTerm( 355 | mkRuleTerm("LexStep"), 356 | ), 357 | mkTagTerm("field=Rules"), 358 | mkRepeatOneTerm( 359 | mkRuleTerm("Rule"), 360 | ), 361 | mkTagTerm("field=Symbols"), 362 | mkRepeatZeroTerm( 363 | mkRuleTerm("Symbol"), 364 | ), 365 | ), 366 | mkRule("LexStep", 367 | mkTagTerm("field=Name"), 368 | mkInlineRuleTerm("identifier"), 369 | mkLiteralTerm(":"), 370 | mkTagTerm("field=Pattern"), 371 | mkInlineRuleTerm("regexp"), 372 | mkRepeatOneTerm(mkLiteralTerm("\n")), 373 | ), 374 | mkRule("Rule", 375 | mkTagTerm("field=Name"), 376 | mkInlineRuleTerm("identifier"), 377 | mkLiteralTerm("=>"), 378 | mkTagTerm("field=Expr"), 379 | mkInlineRuleTerm("Expr"), 380 | mkRepeatOneTerm(mkLiteralTerm("\n")), 381 | ), 382 | mkRule("Symbol", 383 | mkTagTerm("field=Name"), 384 | mkInlineRuleTerm("identifier"), 385 | mkLiteralTerm("="), 386 | mkTagTerm("field=Pattern"), 387 | mkInlineRuleTerm("regexp"), 388 | mkRepeatOneTerm(mkLiteralTerm("\n")), 389 | ), 390 | mkRule("Expr", 391 | mkRepeatOneTerm(mkRuleTerm("Term")), 392 | ), 393 | mkRule("Term", 394 | mkInlineRuleTerm("Term1"), 395 | ), 396 | mkRule("Term", 397 | mkInlineRuleTerm("Term2"), 398 | ), 399 | mkRule("Term1", 400 | mkTagTerm("type=RepeatZeroTerm"), 401 | mkTagTerm("field=Term"), 402 | mkRuleTerm("Term2"), 403 | mkLiteralTerm("*"), 404 | ), 405 | mkRule("Term1", 406 | mkTagTerm("type=RepeatOneTerm"), 407 | mkTagTerm("field=Term"), 408 | mkRuleTerm("Term2"), 409 | mkLiteralTerm("+"), 410 | ), 411 | mkRule("Term2", 412 | mkTagTerm("type=OptionalTerm"), 413 | mkLiteralTerm("["), 414 | mkTagTerm("field=Expr"), 415 | mkInlineRuleTerm("Expr"), 416 | mkLiteralTerm("]"), 417 | ), 418 | mkRule("Term2", 419 | mkTagTerm("type=GroupTerm"), 420 | mkLiteralTerm("("), 421 | mkTagTerm("field=Expr"), 422 | mkInlineRuleTerm("Expr"), 423 | mkLiteralTerm(")"), 424 | ), 425 | mkRule("Term2", 426 | mkTagTerm("type=RuleTerm"), 427 | mkLiteralTerm("<<"), 428 | mkTagTerm("field=Name"), 429 | mkInlineRuleTerm("identifier"), 430 | mkLiteralTerm(">>"), 431 | ), 432 | mkRule("Term2", 433 | mkTagTerm("type=InlineRuleTerm"), 434 | mkLiteralTerm("<"), 435 | mkTagTerm("field=Name"), 436 | mkInlineRuleTerm("identifier"), 437 | mkLiteralTerm(">"), 438 | ), 439 | mkRule("Term2", 440 | mkTagTerm("type=TagTerm"), 441 | mkTagTerm("field=Tag"), 442 | mkInlineRuleTerm("tag"), 443 | ), 444 | mkRule("Term2", 445 | mkTagTerm("type=LiteralTerm"), 446 | mkTagTerm("field=Literal"), 447 | mkInlineRuleTerm("literal"), 448 | ), 449 | }, 450 | []Node{ 451 | mkSymbol("identifier", `([a-zA-Z][a-zA-Z0-9_]*)`), 452 | mkSymbol("literal", `'((?:[\\']|[^'])+?)'`), 453 | mkSymbol("tag", `\{((?:[\\']|[^'])+?)\}`), 454 | mkSymbol("regexp", `\/((?:\\/|[^\n])+?)\/`), 455 | }, 456 | ) 457 | -------------------------------------------------------------------------------- /decode.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "errors" 9 | "fmt" 10 | "github.com/skelterjohn/debugtags" 11 | "io" 12 | "io/ioutil" 13 | "reflect" 14 | "strconv" 15 | "strings" 16 | ) 17 | 18 | type DecoderFactory struct { 19 | g Grammar 20 | start string 21 | types map[string]reflect.Type 22 | } 23 | 24 | func NewDecoderFactory(gopp string, start string) (df *DecoderFactory, err error) { 25 | df = &DecoderFactory{ 26 | start: start, 27 | types: map[string]reflect.Type{}, 28 | } 29 | ast, err := Parse(ByHandGrammar, "Grammar", []byte(gopp)) 30 | if err != nil { 31 | return 32 | } 33 | sa := NewStructuredAST(ast) 34 | sa.RegisterType(RepeatZeroTerm{}) 35 | sa.RegisterType(RepeatOneTerm{}) 36 | sa.RegisterType(OptionalTerm{}) 37 | sa.RegisterType(GroupTerm{}) 38 | sa.RegisterType(RuleTerm{}) 39 | sa.RegisterType(InlineRuleTerm{}) 40 | sa.RegisterType(TagTerm{}) 41 | sa.RegisterType(LiteralTerm{}) 42 | err = sa.Decode(&df.g) 43 | if err != nil { 44 | return 45 | } 46 | return 47 | } 48 | 49 | func (df *DecoderFactory) RegisterType(x interface{}) { 50 | typ := reflect.TypeOf(x) 51 | df.types[typ.Name()] = typ 52 | } 53 | 54 | func (df *DecoderFactory) NewDecoder(r io.Reader) (d Decoder) { 55 | d = Decoder{ 56 | DecoderFactory: df, 57 | Reader: r, 58 | } 59 | return 60 | } 61 | 62 | type Decoder struct { 63 | *DecoderFactory 64 | io.Reader 65 | } 66 | 67 | func (d *Decoder) Decode(obj interface{}) (err error) { 68 | document, err := ioutil.ReadAll(d.Reader) 69 | if err != nil { 70 | return 71 | } 72 | ast, err := Parse(d.g, d.start, document) 73 | if err != nil { 74 | return 75 | } 76 | sa := NewStructuredAST(ast) 77 | sa.types = d.types 78 | err = sa.Decode(obj) 79 | if err != nil { 80 | return 81 | } 82 | return 83 | } 84 | 85 | func getTagValue(typ string, t Tag) (value string, ok bool) { 86 | prefix := typ + "=" 87 | if strings.HasPrefix(string(t), prefix) { 88 | value = string(t[len(prefix):]) 89 | ok = true 90 | } 91 | return 92 | } 93 | 94 | var _ = fmt.Println 95 | 96 | type StructuredAST struct { 97 | ast AST 98 | types map[string]reflect.Type 99 | } 100 | 101 | func NewStructuredAST(ast AST) (sa StructuredAST) { 102 | sa = StructuredAST{ 103 | ast: ast, 104 | types: map[string]reflect.Type{}, 105 | } 106 | return 107 | } 108 | 109 | func (sa StructuredAST) RegisterType(x interface{}) { 110 | t := reflect.TypeOf(x) 111 | sa.types[t.Name()] = t 112 | } 113 | 114 | func (sa StructuredAST) Decode(obj interface{}) (err error) { 115 | return sa.decode([]Node(sa.ast), reflect.ValueOf(obj)) 116 | } 117 | 118 | var dtr = debugtags.Tracer{Enabled: false} 119 | 120 | func SetDTr(enabled bool) { 121 | dtr.Enabled = enabled 122 | } 123 | 124 | func (sa StructuredAST) decode(node Node, v reflect.Value) (err error) { 125 | name := fmt.Sprintf("%T", v.Interface()) 126 | dtr.In(name, node) 127 | defer func() { 128 | dtr.Out(name, v.Interface()) 129 | }() 130 | 131 | typ := v.Type() 132 | 133 | // deref a pointer 134 | if typ.Kind() == reflect.Ptr { 135 | // but first check if it's nil and, if so, allocate 136 | if v.IsNil() { 137 | v.Elem().Set(reflect.New(typ.Elem())) 138 | } 139 | v = v.Elem() 140 | typ = typ.Elem() 141 | } 142 | 143 | switch typ.Kind() { 144 | // populate struct fields 145 | case reflect.Struct: 146 | // we've got a struct pointer - iterate through node looking for field= tags 147 | nodes, ok := node.([]Node) 148 | if !ok { 149 | err = errors.New("Need to populate struct via []Node with tags.") 150 | return 151 | } 152 | for i := range nodes { 153 | if tag, ok := nodes[i].(Tag); ok { 154 | if typName, isType := getTagValue("type", tag); isType { 155 | if typName != typ.Name() { 156 | err = fmt.Errorf("AST wants type %q, being decoded to type %q.", typName, typ.Name()) 157 | } 158 | } 159 | 160 | if name, isField := getTagValue("field", tag); isField { 161 | // if we have a field tag, that indicates that the next node should be decided into the field with the given name. 162 | var fv reflect.Value 163 | fv, err = getField(v, name) 164 | if err != nil { 165 | return 166 | } 167 | 168 | if fv.Type().Kind() == reflect.Interface { 169 | //dtr.Println("field of interface") 170 | var pv reflect.Value 171 | pv, err = sa.makePointerWithType(nodes[i+1]) 172 | if err != nil { 173 | return 174 | } 175 | err = sa.decode(nodes[i+1], pv.Elem()) 176 | fv.Set(pv.Elem()) 177 | if err != nil { 178 | return 179 | } 180 | } else { 181 | sa.decode(nodes[i+1], fv) 182 | } 183 | } 184 | } 185 | } 186 | 187 | // map things into slices 188 | case reflect.Slice: 189 | //fmt.Printf("Going into %s is\n", typ.Elem().Name()) 190 | //printNode(node, 0) 191 | isInterfaceSlice := typ.Elem().Kind() == reflect.Interface 192 | // if isInterfaceSlice { 193 | // dtr.Println("slice of interface") 194 | // } 195 | nodes, ok := node.([]Node) 196 | if !ok { 197 | err = errors.New("Need to populate slice via []Node.") 198 | return 199 | } 200 | for _, n := range nodes { 201 | // create an addressable value to put in the slice 202 | ev := reflect.New(typ.Elem()).Elem() 203 | if isInterfaceSlice { 204 | var pv reflect.Value 205 | pv, err = sa.makePointerWithType(n) 206 | if err != nil { 207 | return 208 | } 209 | err = sa.decode(n, pv.Elem()) 210 | ev.Set(pv.Elem()) 211 | if err != nil { 212 | return 213 | } 214 | } else { 215 | // temporarily shadow ev, leaving the thing to be put back in the slice alone. 216 | ev := ev 217 | for ev.Type().Kind() == reflect.Ptr { 218 | pv := ev 219 | newthing := reflect.New(pv.Type().Elem()) 220 | pv.Set(newthing) 221 | ev = pv.Elem() 222 | } 223 | err = sa.decode(n, ev) 224 | if err != nil { 225 | return 226 | } 227 | } 228 | // this is how append looks w/ reflect 229 | v.Set(reflect.Append(v, ev)) 230 | } 231 | 232 | // symbols, literals, and tags go into strings 233 | case reflect.String: 234 | s := "" 235 | if s, err = getString(node); err != nil { 236 | err = errors.New("Trying to store invalid type into string field.") 237 | return 238 | } 239 | v.SetString(s) 240 | 241 | // and into ints 242 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 243 | s := "" 244 | if s, err = getString(node); err != nil { 245 | err = errors.New("Trying to store invalid type into integer field.") 246 | return 247 | } 248 | var x int64 249 | if x, err = strconv.ParseInt(string(s), 0, 64); err != nil { 250 | return 251 | } 252 | v.SetInt(x) 253 | 254 | // and also into uints 255 | case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 256 | s := "" 257 | if s, err = getString(node); err != nil { 258 | err = errors.New("Trying to store invalid type into unsigned integer field.") 259 | return 260 | } 261 | var x uint64 262 | if x, err = strconv.ParseUint(string(s), 0, 64); err != nil { 263 | return 264 | } 265 | v.SetUint(x) 266 | 267 | default: 268 | err = fmt.Errorf("Unanticipated type: %s.", typ.Name()) 269 | } 270 | 271 | return 272 | } 273 | 274 | func (sa StructuredAST) makePointerWithType(node Node) (pointer reflect.Value, err error) { 275 | var ntag Tag 276 | nodes, ok := node.([]Node) 277 | if ok && len(nodes) != 0 { 278 | ntag, ok = nodes[0].(Tag) 279 | if ok { 280 | ok = strings.HasPrefix(string(ntag), "type=") 281 | } 282 | } 283 | if !ok { 284 | err = errors.New("Can only infer type from []Node with a type= tag.") 285 | return 286 | } 287 | typeName := ntag[len("type="):] 288 | typ := sa.types[string(typeName)] 289 | pointer = reflect.New(typ) 290 | return 291 | } 292 | 293 | func getField(v reflect.Value, field string) (fv reflect.Value, err error) { 294 | defer func() { 295 | if recover() != nil { 296 | err = fmt.Errorf("Type %s has no field named %q.", v.Type().Name(), field) 297 | } 298 | }() 299 | if field == "." { 300 | // . means to store the next level deeper in the same value 301 | fv = v 302 | } else { 303 | fv = v.FieldByName(field) 304 | } 305 | return 306 | } 307 | 308 | func getString(node Node) (s string, err error) { 309 | switch nn := node.(type) { 310 | case SymbolText: 311 | s = nn.Text 312 | case Tag: 313 | s = string(nn) 314 | case Literal: 315 | s = string(nn) 316 | default: 317 | return "", fmt.Errorf("Expected symbol, tag, or literal, but got %T", node) 318 | } 319 | ds, derr := descapeString(s) 320 | if derr == nil { 321 | s = ds 322 | } 323 | return 324 | } 325 | -------------------------------------------------------------------------------- /decode_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp_test 6 | 7 | import ( 8 | "strings" 9 | "testing" 10 | 11 | "github.com/skelterjohn/gopp" 12 | ) 13 | 14 | type Node struct { 15 | Val string 16 | Kids []*Node 17 | } 18 | 19 | func TestDecodePtrSlice(t *testing.T) { 20 | grammar := ` 21 | ignore: /^\s+/ 22 | 23 | Start => {field=Kids} <>* 24 | Node => {field=Val} 25 | 26 | dig = /(\d+)/ 27 | ` 28 | data := []string{"1", "4", "9", "42"} 29 | in := strings.Join(data, " ") 30 | 31 | df, err := gopp.NewDecoderFactory(grammar, "Start") 32 | if err != nil { 33 | t.Error(err) 34 | } 35 | dec := df.NewDecoder(strings.NewReader(in)) 36 | out := &Node{} 37 | err = dec.Decode(out) 38 | if err != nil { 39 | t.Error(err) 40 | } 41 | if len(data) != len(out.Kids) { 42 | t.Fatalf("Expected %d nodes, got %d", len(data), len(out.Kids)) 43 | } 44 | for i, s := range data { 45 | if out.Kids[i].Val != s { 46 | t.Errorf("Expected node %s, got %s", s, out.Kids[i]) 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /errors_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp_test 6 | 7 | import ( 8 | "github.com/skelterjohn/gopp" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | type ErrorCase struct { 14 | Document string 15 | ExpectedError string 16 | } 17 | 18 | type ErrorSubject struct { 19 | Name string 20 | Gopp string 21 | Grammar gopp.Grammar 22 | Cases []ErrorCase 23 | } 24 | 25 | var ErrorSubjects = []ErrorSubject{ 26 | ErrorSubject{ 27 | Name: "LiteralConjunction", 28 | Gopp: ` 29 | Start => 'x' 'y' 'z' 30 | `, 31 | Cases: []ErrorCase{ 32 | ErrorCase{`xyz`, ``}, 33 | ErrorCase{`xzy`, `Expected "y" at 0:1.`}, 34 | ErrorCase{`x`, `Expected "y" at EOF.`}, 35 | }, 36 | }, 37 | } 38 | 39 | func TestErrors(t *testing.T) { 40 | subject: 41 | for _, s := range ErrorSubjects { 42 | df, err := gopp.NewDecoderFactory(s.Gopp, "Start") 43 | if err != nil { 44 | t.Error(err) 45 | continue subject 46 | } 47 | scase: 48 | for _, c := range s.Cases { 49 | dec := df.NewDecoder(strings.NewReader(c.Document)) 50 | err = dec.Decode(&XYZ{}) 51 | if !(err == nil && c.ExpectedError == "") && (err != nil && err.Error() != c.ExpectedError) { 52 | t.Error(err) 53 | continue scase 54 | } 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /gopp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "fmt" 9 | "regexp" 10 | "sort" 11 | "strings" 12 | ) 13 | 14 | type Grammar struct { 15 | LexSteps []LexStep 16 | Rules []Rule 17 | Symbols []Symbol 18 | } 19 | 20 | func (g Grammar) RulesForName(name string) (rs []Rule) { 21 | for _, rule := range g.Rules { 22 | if rule.Name == name { 23 | rs = append(rs, rule) 24 | } 25 | } 26 | return 27 | } 28 | 29 | func (g Grammar) Symbol(name string) (s Symbol, ok bool) { 30 | for _, symb := range g.Symbols { 31 | if symb.Name == name { 32 | s = symb 33 | ok = true 34 | return 35 | } 36 | } 37 | return 38 | } 39 | 40 | func (g Grammar) CollectLiterals(literals map[string]bool) { 41 | for _, rule := range g.Rules { 42 | rule.CollectLiterals(literals) 43 | } 44 | return 45 | } 46 | 47 | type TypedRegexp struct { 48 | Type string 49 | *regexp.Regexp 50 | } 51 | 52 | func (g Grammar) TokenREs() (res []TypedRegexp, err error) { 53 | // first get all the literals, and sort them longest first (so smaller ones don't eat larger ones). 54 | literals := map[string]bool{} 55 | g.CollectLiterals(literals) 56 | sortedLiterals := literalSorter{} 57 | for literal := range literals { 58 | sortedLiterals = append(sortedLiterals, literal) 59 | } 60 | sort.Sort(sortedLiterals) 61 | for _, literal := range sortedLiterals { 62 | re, err := regexp.Compile("^(" + regexp.QuoteMeta(literal) + ")") 63 | if err != nil { 64 | panic("regexp.QuoteMeta returned something that didn't compile") 65 | } 66 | res = append(res, TypedRegexp{"RAW", re}) 67 | } 68 | for _, symbol := range g.Symbols { 69 | var re *regexp.Regexp 70 | re, err = regexp.Compile("^" + symbol.Pattern) 71 | if err != nil { 72 | return 73 | } 74 | res = append(res, TypedRegexp{symbol.Name, re}) 75 | } 76 | return 77 | } 78 | 79 | func (g Grammar) IgnoreREs() (res []*regexp.Regexp, err error) { 80 | for _, ls := range g.LexSteps { 81 | if ls.Name == "ignore" { 82 | var re *regexp.Regexp 83 | re, err = regexp.Compile(ls.Pattern) 84 | if err != nil { 85 | return 86 | } 87 | res = append(res, re) 88 | } 89 | } 90 | return 91 | } 92 | 93 | type LexStep struct { 94 | Name string 95 | Pattern string 96 | } 97 | 98 | type Rule struct { 99 | Name string 100 | Expr 101 | } 102 | 103 | func (r Rule) String() string { 104 | return fmt.Sprintf("Rule(%s:%v)", r.Name, r.Expr) 105 | } 106 | 107 | type Symbol struct { 108 | Name string 109 | Pattern string 110 | } 111 | 112 | type Expr []Term 113 | 114 | func (e Expr) CollectLiterals(literals map[string]bool) { 115 | for _, term := range e { 116 | term.CollectLiterals(literals) 117 | } 118 | return 119 | } 120 | 121 | type Term interface { 122 | CollectLiterals(literals map[string]bool) 123 | Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) 124 | } 125 | 126 | type RepeatZeroTerm struct { 127 | Term 128 | } 129 | 130 | func (rzt RepeatZeroTerm) String() string { 131 | return fmt.Sprintf("RepeatZeroTerm(%v)", rzt.Term) 132 | } 133 | 134 | type RepeatOneTerm struct { 135 | Term 136 | } 137 | 138 | func (rot RepeatOneTerm) String() string { 139 | return fmt.Sprintf("RepeatOneTerm(%v)", rot.Term) 140 | } 141 | 142 | type OptionalTerm struct { 143 | Expr 144 | } 145 | 146 | func (ot OptionalTerm) String() string { 147 | return fmt.Sprintf("OptionalTerm(%v)", ot.Expr) 148 | } 149 | 150 | type GroupTerm struct { 151 | Expr 152 | } 153 | 154 | func (gt GroupTerm) String() string { 155 | return fmt.Sprintf("GroupTerm(%v)", gt.Expr) 156 | } 157 | 158 | type noLiterals struct{} 159 | 160 | func (n noLiterals) CollectLiterals(literals map[string]bool) { 161 | return 162 | } 163 | 164 | type RuleTerm struct { 165 | Name string 166 | noLiterals 167 | } 168 | 169 | func (rt RuleTerm) String() string { 170 | return fmt.Sprintf("RuleTerm(%s)", rt.Name) 171 | } 172 | 173 | type InlineRuleTerm struct { 174 | Name string 175 | noLiterals 176 | } 177 | 178 | func (irt InlineRuleTerm) String() string { 179 | return fmt.Sprintf("InlineRuleTerm(%s)", irt.Name) 180 | } 181 | 182 | type TagTerm struct { 183 | Tag string 184 | noLiterals 185 | } 186 | 187 | func (tt TagTerm) String() string { 188 | return fmt.Sprintf("TagTerm(%q)", tt.Tag) 189 | } 190 | 191 | type LiteralTerm struct { 192 | Literal string 193 | } 194 | 195 | func (lt LiteralTerm) String() string { 196 | return fmt.Sprintf("LiteralTerm(%q)", lt.Literal) 197 | } 198 | 199 | func (l LiteralTerm) CollectLiterals(literals map[string]bool) { 200 | literals[l.Literal] = true 201 | return 202 | } 203 | 204 | type AST []Node 205 | 206 | type Node interface{} 207 | type Tag string 208 | 209 | func (t Tag) String() string { 210 | return fmt.Sprintf("Tag(%s)", string(t)) 211 | } 212 | 213 | type Literal string 214 | 215 | func (l Literal) String() string { 216 | return fmt.Sprintf("Literal(%s)", string(strings.Replace(string(l), "\n", `\n`, -1))) 217 | } 218 | 219 | type Identifier string 220 | 221 | func (i Identifier) String() string { 222 | return fmt.Sprintf("Identifier(%s)", string(i)) 223 | } 224 | 225 | type Regexp string 226 | 227 | func (r Regexp) String() string { 228 | return fmt.Sprintf("Regexp(%s)", string(r)) 229 | } 230 | 231 | type SymbolText struct { 232 | Type string 233 | Text string 234 | } 235 | 236 | func (s SymbolText) String() string { 237 | return fmt.Sprintf("<%s:%q>", s.Type, s.Text) 238 | } 239 | -------------------------------------------------------------------------------- /gopp.gopp: -------------------------------------------------------------------------------- 1 | # Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | # The first things are lex steps, which are for use by the tokenizer. 6 | # Currently the only recognized lex step is stuff to ignore. 7 | 8 | # We ignore comments, 9 | ignore: /^#.*\n/ 10 | # and whitespace that preceeds something more interesting. 11 | ignore: /^(?:[ \t])+/ 12 | 13 | # After the lex steps are the rules. 14 | # The fact that Grammar is first is irrelevant. The name of the starting rule 15 | # needs to be provided in code. 16 | # A Grammar is made up of lists of LexSteps, Rules, and Symbols, in that order, 17 | # and there may be zero LexSteps or Symbols. There must be at least one Rule. 18 | Grammar => {type=Grammar} '\n'* {field=LexSteps} <>* {field=Rules} <>+ {field=Symbols} <>* 19 | 20 | # The next three rules define the major types of elements in a grammar. 21 | 22 | # A LexStep is an identifier, a literal ':', and a regexp pattern. If the name 23 | # is 'ignore', then when the lexer goes to get the next token, it will try to 24 | # trim the remaining document using the provided pattern. No other names are 25 | # used, currently. 26 | LexStep => {field=Name} ':' {field=Pattern} '\n'+ 27 | 28 | # A Rule is an identifier, a literal '=>', an Expr, and ends with one or more 29 | # newlines. 30 | Rule => {field=Name} '=>' {field=Expr} '\n'+ 31 | # A Symbol is an identifier, a literal '=', a regexp, and ends with one or more 32 | # newlines. 33 | Symbol => {field=Name} '=' {field=Pattern} '\n'+ 34 | 35 | # An Expr is one or more Terms. 36 | Expr => <>+ 37 | 38 | # A Term can be a Term1, 39 | Term => 40 | # or a Term2. 41 | Term => 42 | 43 | # A Term1 can be a Term2 followed by a literal '*', 44 | Term1 => {type=RepeatZeroTerm} {field=Term} <> '*' 45 | # or a Term2 followd by a literal '+'. 46 | Term1 => {type=RepeatOneTerm} {field=Term} <> '+' 47 | 48 | # A Term2 can be an Expr surrounded by '[' and ']', 49 | Term2 => {type=OptionalTerm} '[' {field=Expr} ']' 50 | # or by '(' and ')', 51 | Term2 => {type=GroupTerm} '(' {field=Expr} ')' 52 | # or an identifier surrounded by '<<' and '>>', 53 | Term2 => {type=RuleTerm} '<<' {field=Name} '>>' 54 | # or by '<' and '>', 55 | Term2 => {type=InlineRuleTerm} '<' {field=Name} '>' 56 | # or a tag, 57 | Term2 => {type=TagTerm} {field=Tag} 58 | # or a literal. 59 | Term2 => {type=LiteralTerm} {field=Literal} 60 | 61 | # And last is the symbols, which are regular expressions that can be found in 62 | # the document. Their order is important - it indicates the order in which the 63 | # tokenizer attempts to match them against the rest of the document. So, if two 64 | # symbols could be used starting at the same point in the document, the one 65 | # that is listed first will win. 66 | identifier = /([a-zA-Z][a-zA-Z0-9_]*)/ 67 | literal = /'((?:[\\']|[^'])+?)'/ 68 | tag = /\{((?:[\\']|[^'])+?)\}/ 69 | regexp = /\/((?:\\/|[^\n])+?)\// -------------------------------------------------------------------------------- /literals_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp_test 6 | 7 | import ( 8 | "github.com/skelterjohn/gopp" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | // test literal syntax 14 | 15 | const literalgopp = ` 16 | Start => 17 | 18 | Rule => {field=R} 'Success!' 19 | Rule => {field=S} '1' 20 | Rule => {field=T} 21 | Rule => {field=X} {Success!} 'X' 22 | Rule => {field=Y} {1} 'Y' 23 | Rule => {field=Z} {1} 'Z' 24 | 25 | symbol = /(2)/ 26 | ` 27 | 28 | type LiteralTester struct { 29 | R string 30 | S int 31 | T int 32 | X string 33 | Y int 34 | Z uint 35 | } 36 | 37 | var LiteralTestTable = [...]struct { 38 | expected LiteralTester 39 | src string 40 | }{ 41 | {LiteralTester{X: "Success!"}, "X"}, 42 | {LiteralTester{Y: 1}, "Y"}, 43 | {LiteralTester{Z: 1}, "Z"}, 44 | {LiteralTester{R: "Success!"}, "Success!"}, 45 | {LiteralTester{S: 1}, "1"}, 46 | {LiteralTester{T: 2}, "2"}, 47 | } 48 | 49 | func TestLiteral(t *testing.T) { 50 | df, err := gopp.NewDecoderFactory(literalgopp, "Start") 51 | if err != nil { 52 | t.Error(err) 53 | return 54 | } 55 | for _, test := range LiteralTestTable { 56 | dec := df.NewDecoder(strings.NewReader(test.src)) 57 | var lit LiteralTester 58 | err = dec.Decode(&lit) 59 | if err != nil { 60 | t.Error(err) 61 | return 62 | } 63 | if lit != test.expected { 64 | t.Errorf("Expected %+v, got %+v.", test.expected, lit) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /parse.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "errors" 9 | "fmt" 10 | "github.com/skelterjohn/debugtags" 11 | "strconv" 12 | ) 13 | 14 | func Parse(g Grammar, startRule string, document []byte) (ast AST, err error) { 15 | tokenREs, err := g.TokenREs() 16 | if err != nil { 17 | return 18 | } 19 | ignoreREs, err := g.IgnoreREs() 20 | if err != nil { 21 | return 22 | } 23 | ti := TokenizeInfo{ 24 | TokenREs: tokenREs, 25 | IgnoreREs: ignoreREs, 26 | } 27 | tokens, err := Tokenize(ti, document) 28 | if err != nil { 29 | return 30 | } 31 | rules := g.RulesForName(startRule) 32 | if len(rules) != 1 { 33 | err = fmt.Errorf("Rule %q had %d definitions.", startRule, len(rules)) 34 | return 35 | } 36 | start := rules[0] 37 | pd := NewParseData() 38 | items, remaining, err := start.Parse(g, tokens, pd, []string{}) 39 | 40 | if err != nil { 41 | // TODO: use pd to return informative error messages. 42 | err = pd.FarthestErrors[0] 43 | return 44 | } 45 | if len(remaining) != 0 { 46 | err = errors.New("Did not parse entire file.") 47 | } 48 | 49 | ast = items 50 | 51 | return 52 | } 53 | 54 | const debug = false 55 | 56 | func SetTr(e bool) { 57 | tr.Enabled = e 58 | } 59 | 60 | var tr = debugtags.Tracer{Enabled: false} 61 | 62 | type ParseData struct { 63 | accepted bool 64 | LastUnacceptedTokens []Token 65 | errored bool 66 | FarthestErrors []error 67 | TokensForError []Token 68 | } 69 | 70 | func NewParseData() (pd *ParseData) { 71 | pd = &ParseData{} 72 | return 73 | } 74 | 75 | func (pd *ParseData) AcceptUpTo(remaining []Token) { 76 | if !pd.accepted || len(remaining) < len(pd.LastUnacceptedTokens) { 77 | pd.LastUnacceptedTokens = remaining 78 | } 79 | pd.accepted = true 80 | } 81 | 82 | func (pd *ParseData) ErrorWith(err error, remaining []Token) { 83 | if !pd.errored || len(remaining) < len(pd.TokensForError) { 84 | pd.FarthestErrors = append(pd.FarthestErrors, err) 85 | pd.TokensForError = remaining 86 | } 87 | pd.errored = true 88 | } 89 | 90 | func (r Rule) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 91 | rName := fmt.Sprintf("Rule(%q)", r.Name) 92 | tr.In(rName, tokens) 93 | defer func() { 94 | if err == nil { 95 | tr.Out(rName, items) 96 | } else { 97 | tr.Out(rName, err) 98 | } 99 | }() 100 | 101 | for _, n := range parentRuleNames { 102 | if n == r.Name { 103 | err = fmt.Errorf("Rule cycle with %q.", r.Name) 104 | return 105 | } 106 | } 107 | 108 | items, remainingTokens, err = r.Expr.Parse(g, tokens, pd, append(parentRuleNames, r.Name)) 109 | return 110 | } 111 | 112 | func (e Expr) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 113 | rName := fmt.Sprintf("Expr") 114 | tr.In(rName, tokens) 115 | defer func() { 116 | if err == nil { 117 | tr.Out(rName, items) 118 | } else { 119 | tr.Out(rName, err) 120 | } 121 | }() 122 | 123 | startTokens := tokens 124 | 125 | for _, term := range e { 126 | var newItems []Node 127 | var prns []string 128 | if len(startTokens) == len(tokens) { 129 | prns = parentRuleNames 130 | } 131 | newItems, tokens, err = term.Parse(g, tokens, pd, prns) 132 | if err != nil { 133 | return 134 | } 135 | items = append(items, newItems...) 136 | } 137 | remainingTokens = tokens 138 | return 139 | } 140 | 141 | func (t RepeatZeroTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 142 | rName := fmt.Sprintf("RepeatZeroTerm") 143 | tr.In(rName, tokens) 144 | defer func() { 145 | if err == nil { 146 | tr.Out(rName, items) 147 | } else { 148 | tr.Out(rName, err) 149 | } 150 | }() 151 | 152 | remainingTokens = tokens 153 | var myitems []Node 154 | first := true 155 | for { 156 | var prns []string 157 | if first { 158 | prns = parentRuleNames 159 | first = false 160 | } 161 | subitems, subtokens, suberr := t.Term.Parse(g, remainingTokens, pd, prns) 162 | if suberr != nil { 163 | break 164 | } 165 | myitems = append(myitems, subitems...) 166 | remainingTokens = subtokens 167 | } 168 | items = []Node{myitems} 169 | return 170 | } 171 | 172 | func (t RepeatOneTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 173 | rName := fmt.Sprintf("RepeatOneTerm") 174 | tr.In(rName, tokens) 175 | defer func() { 176 | if err == nil { 177 | tr.Out(rName, items) 178 | } else { 179 | tr.Out(rName, err) 180 | } 181 | }() 182 | 183 | remainingTokens = tokens 184 | var myitems []Node 185 | first := true 186 | var suberr error 187 | for { 188 | var prns []string 189 | if first { 190 | prns = parentRuleNames 191 | first = false 192 | } 193 | var subitems []Node 194 | var subtokens []Token 195 | subitems, subtokens, suberr = t.Term.Parse(g, remainingTokens, pd, prns) 196 | if suberr != nil { 197 | break 198 | } 199 | myitems = append(myitems, subitems...) 200 | remainingTokens = subtokens 201 | } 202 | items = []Node{myitems} 203 | if len(items) == 0 { 204 | err = suberr 205 | pd.ErrorWith(err, tokens) 206 | } 207 | return 208 | } 209 | 210 | func (t OptionalTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 211 | rName := fmt.Sprintf("OptionalTerm") 212 | tr.In(rName, tokens) 213 | defer func() { 214 | if err == nil { 215 | tr.Out(rName, items) 216 | } else { 217 | tr.Out(rName, err) 218 | } 219 | }() 220 | 221 | subitems, subtokens, suberr := t.Expr.Parse(g, tokens, pd, parentRuleNames) 222 | if suberr != nil { 223 | remainingTokens = tokens 224 | return 225 | } 226 | items = subitems 227 | remainingTokens = subtokens 228 | return 229 | } 230 | 231 | func (t RuleTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 232 | rName := fmt.Sprintf("RuleTerm(%q)", t.Name) 233 | tr.In(rName, tokens) 234 | defer func() { 235 | if err == nil { 236 | tr.Out(rName, items) 237 | } else { 238 | tr.Out(rName, err) 239 | } 240 | }() 241 | 242 | rules := g.RulesForName(t.Name) 243 | if len(rules) == 0 { 244 | err = fmt.Errorf("Unknown rule name: %q.", t.Name) 245 | pd.ErrorWith(err, tokens) 246 | return 247 | } 248 | 249 | var subitems []Node 250 | //fmt.Printf("%d rules for %q.\n", len(rules), t.Name) 251 | for _, rule := range rules { 252 | // if tt, ok := rule.Expr[0].(TagTerm); ok { 253 | // fmt.Printf("Trying %q.\n", tt.Tag) 254 | // } 255 | subitems, remainingTokens, err = rule.Parse(g, tokens, pd, parentRuleNames) 256 | 257 | if err == nil { 258 | items = []Node{subitems} 259 | return 260 | } 261 | } 262 | 263 | return 264 | } 265 | 266 | func (t InlineRuleTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 267 | rName := fmt.Sprintf("InlineRuleTerm(%q)", t.Name) 268 | tr.In(rName, tokens) 269 | defer func() { 270 | if err == nil { 271 | tr.Out(rName, items) 272 | } else { 273 | tr.Out(rName, err) 274 | } 275 | }() 276 | 277 | rules := g.RulesForName(t.Name) 278 | for _, rule := range rules { 279 | items, remainingTokens, err = rule.Parse(g, tokens, pd, parentRuleNames) 280 | 281 | if err == nil { 282 | return 283 | } 284 | } 285 | err = nil 286 | if _, ok := g.Symbol(t.Name); ok { 287 | if len(tokens) < 1 { 288 | err = errors.New("Need at least one token to make a symbol.") 289 | pd.ErrorWith(err, tokens) 290 | return 291 | } 292 | if t.Name == tokens[0].Type { 293 | st := SymbolText{ 294 | Type: t.Name, 295 | Text: tokens[0].Text, 296 | } 297 | items = []Node{st} 298 | remainingTokens = tokens[1:] 299 | pd.AcceptUpTo(remainingTokens) 300 | return 301 | } 302 | err = fmt.Errorf("Expected %s at %d:%d.", t.Name, tokens[0].Row, tokens[0].Col) 303 | pd.ErrorWith(err, tokens) 304 | return 305 | } 306 | 307 | err = fmt.Errorf("Unknown rule name: %q.", t.Name) 308 | pd.ErrorWith(err, tokens) 309 | 310 | return 311 | } 312 | 313 | func (t TagTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 314 | tr.Println(Tag(t.Tag)) 315 | items = []Node{Tag(t.Tag)} 316 | remainingTokens = tokens 317 | return 318 | } 319 | 320 | func (t LiteralTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { 321 | rName := fmt.Sprintf("LiteralTerm(%q)", t.Literal) 322 | tr.In(rName, tokens) 323 | defer func() { 324 | if err == nil { 325 | tr.Out(rName, items) 326 | } else { 327 | tr.Out(rName, err) 328 | } 329 | }() 330 | 331 | if len(tokens) == 0 { 332 | err = fmt.Errorf("Expected %q at EOF.", t.Literal) 333 | pd.ErrorWith(err, tokens) 334 | return 335 | } 336 | if tokens[0].Type != "RAW" { 337 | err = fmt.Errorf("Expected %q at %d:%d.", t.Literal, tokens[0].Row, tokens[0].Col) 338 | pd.ErrorWith(err, tokens) 339 | return 340 | } 341 | 342 | literalText := t.Literal 343 | // quoted := fmt.Sprintf("\"%s\"", t.Literal) 344 | // _ = quoted 345 | // unquoted, qerr := strconv.Unquote(quoted) 346 | unquoted, qerr := descapeString(t.Literal) 347 | 348 | if qerr == nil && false { 349 | literalText = unquoted 350 | } 351 | 352 | if tokens[0].Text != literalText { 353 | err = fmt.Errorf("Expected %q at %d:%d.", t.Literal, tokens[0].Row, tokens[0].Col) 354 | pd.ErrorWith(err, tokens) 355 | return 356 | } 357 | items = []Node{Literal(literalText)} 358 | remainingTokens = tokens[1:] 359 | pd.AcceptUpTo(remainingTokens) 360 | return 361 | 362 | } 363 | 364 | var _ = strconv.Unquote 365 | -------------------------------------------------------------------------------- /parse_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp_test 6 | 7 | import ( 8 | "github.com/skelterjohn/gopp" 9 | "reflect" 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | type Case struct { 15 | Document string 16 | Object, Expected interface{} 17 | } 18 | 19 | type Subject struct { 20 | Name string 21 | Gopp string 22 | Grammar gopp.Grammar 23 | Cases []Case 24 | } 25 | 26 | type XYZ struct { 27 | X, Y, Z string 28 | } 29 | 30 | var Subjects = []Subject{ 31 | Subject{ 32 | Name: "OptionalTest", 33 | Gopp: ` 34 | Start => {field=Y} [{field=Z} ] 35 | Y = /(y)/ 36 | Z = /(z)/ 37 | `, 38 | Cases: []Case{ 39 | Case{`yz`, &XYZ{}, &XYZ{Y: "y", Z: "z"}}, 40 | Case{`y`, &XYZ{}, &XYZ{Y: "y"}}, 41 | }, 42 | }, 43 | } 44 | 45 | func TestSubjects(t *testing.T) { 46 | subject: 47 | for _, s := range Subjects { 48 | df, err := gopp.NewDecoderFactory(s.Gopp, "Start") 49 | if err != nil { 50 | t.Error(err) 51 | continue subject 52 | } 53 | scase: 54 | for _, c := range s.Cases { 55 | dec := df.NewDecoder(strings.NewReader(c.Document)) 56 | err = dec.Decode(c.Object) 57 | if err != nil { 58 | t.Error(err) 59 | continue scase 60 | } 61 | if !reflect.DeepEqual(c.Object, c.Expected) { 62 | t.Errorf("(%s) With %q, got %+v, expected %+v.", s.Name, c.Document, c.Object, c.Expected) 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /parsemath_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp_test 6 | 7 | import ( 8 | "fmt" 9 | "github.com/skelterjohn/gopp" 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | // tests where we create a grammar and parse a document 15 | 16 | const mathgopp = ` 17 | # The root is an equation, with a left-hand and right-hand side. 18 | Eqn => {type=MathEqn} {field=Left} <> '=' {field=Right} <> '\n' 19 | 20 | # An Expr is either the sum of two terms, 21 | Expr => {type=MathSum} {field=First} <> '+' {field=Second} <> 22 | # or just another term. 23 | Expr => 24 | 25 | # A Term is either the product of two factors, 26 | Term => {type=MathProduct} {field=First} <> '*' {field=Second} <> 27 | # or just another factor. 28 | Term => 29 | 30 | # A factor is either a parenthesized expression, 31 | Factor => {type=MathExprFactor} '(' {field=Expr} <> ')' 32 | # or just a number. 33 | Factor => {type=MathNumberFactor} {field=Number} 34 | 35 | # A number is a string of consecutive digits. 36 | number = /(\d+)/ 37 | ` 38 | 39 | type MathEqn struct { 40 | Left, Right interface{} 41 | } 42 | 43 | func (e MathEqn) String() string { 44 | return fmt.Sprintf("%d=%d", e.Left, e.Right) 45 | } 46 | 47 | type MathSum struct { 48 | First, Second interface{} 49 | } 50 | 51 | func (s MathSum) String() string { 52 | return fmt.Sprintf("%d+%d", s.First, s.Second) 53 | } 54 | 55 | type MathProduct struct { 56 | First, Second interface{} 57 | } 58 | 59 | func (p MathProduct) String() string { 60 | return fmt.Sprintf("%d*%d", p.First, p.Second) 61 | } 62 | 63 | type MathExprFactor struct { 64 | Expr interface{} 65 | } 66 | 67 | func (ef MathExprFactor) String() string { 68 | return fmt.Sprintf("(%d)", ef.Expr) 69 | } 70 | 71 | type MathNumberFactor struct { 72 | Number int 73 | } 74 | 75 | func (nf MathNumberFactor) String() string { 76 | return fmt.Sprint(nf.Number) 77 | } 78 | 79 | func TestMath(t *testing.T) { 80 | df, err := gopp.NewDecoderFactory(mathgopp, "Eqn") 81 | if err != nil { 82 | t.Error(err) 83 | return 84 | } 85 | df.RegisterType(MathExprFactor{}) 86 | df.RegisterType(MathNumberFactor{}) 87 | df.RegisterType(MathSum{}) 88 | df.RegisterType(MathProduct{}) 89 | dec := df.NewDecoder(strings.NewReader("5+1=6\n")) 90 | var eqn MathEqn 91 | err = dec.Decode(&eqn) 92 | if err != nil { 93 | t.Error(err) 94 | return 95 | } 96 | 97 | expectedEqn := MathEqn{ 98 | Left: MathSum{ 99 | First: MathNumberFactor{5}, 100 | Second: MathNumberFactor{1}, 101 | }, 102 | Right: MathNumberFactor{6}, 103 | } 104 | 105 | if eqn != expectedEqn { 106 | t.Errorf("Expected %q, got %q.", expectedEqn, eqn) 107 | } 108 | } 109 | 110 | func TestMathPrecedence(t *testing.T) { 111 | df, err := gopp.NewDecoderFactory(mathgopp, "Eqn") 112 | if err != nil { 113 | t.Error(err) 114 | return 115 | } 116 | df.RegisterType(MathExprFactor{}) 117 | df.RegisterType(MathNumberFactor{}) 118 | df.RegisterType(MathSum{}) 119 | df.RegisterType(MathProduct{}) 120 | dec := df.NewDecoder(strings.NewReader("5+5*2=6*2+3\n")) 121 | var eqn MathEqn 122 | err = dec.Decode(&eqn) 123 | if err != nil { 124 | t.Error(err) 125 | return 126 | } 127 | 128 | expectedEqn := MathEqn{ 129 | Left: MathSum{ 130 | MathNumberFactor{5}, 131 | MathProduct{ 132 | MathNumberFactor{5}, 133 | MathNumberFactor{2}, 134 | }, 135 | }, 136 | Right: MathSum{ 137 | MathProduct{ 138 | MathNumberFactor{6}, 139 | MathNumberFactor{2}, 140 | }, 141 | MathNumberFactor{3}, 142 | }, 143 | } 144 | 145 | if eqn != expectedEqn { 146 | t.Errorf("Expected %q, got %q.", expectedEqn, eqn) 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /parseself_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "fmt" 9 | "reflect" 10 | "regexp" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | func TestFixedPointDecoder(t *testing.T) { 16 | df, err := NewDecoderFactory(goppgopp, "Grammar") 17 | if err != nil { 18 | t.Error(err) 19 | return 20 | } 21 | df.RegisterType(RepeatZeroTerm{}) 22 | df.RegisterType(RepeatOneTerm{}) 23 | df.RegisterType(OptionalTerm{}) 24 | df.RegisterType(GroupTerm{}) 25 | df.RegisterType(RuleTerm{}) 26 | df.RegisterType(InlineRuleTerm{}) 27 | df.RegisterType(TagTerm{}) 28 | df.RegisterType(LiteralTerm{}) 29 | var g Grammar 30 | dec := df.NewDecoder(strings.NewReader(goppgopp)) 31 | err = dec.Decode(&g) 32 | if err != nil { 33 | t.Error(err) 34 | return 35 | } 36 | err = compareGrammars(g, ByHandGrammar) 37 | if err != nil { 38 | t.Error(err) 39 | return 40 | } 41 | 42 | // now see if the just-populated grammar can generate itself 43 | df.g = g 44 | dec = df.NewDecoder(strings.NewReader(goppgopp)) 45 | var g2 Grammar 46 | err = dec.Decode(&g2) 47 | if err != nil { 48 | t.Error(err) 49 | return 50 | } 51 | err = compareGrammars(g, g2) 52 | if err != nil { 53 | t.Error(err) 54 | return 55 | } 56 | } 57 | 58 | func TestDecodeGrammar(t *testing.T) { 59 | var g Grammar 60 | ast, err := Parse(ByHandGrammar, "Grammar", []byte(goppgopp)) 61 | if err != nil { 62 | t.Error(err) 63 | } 64 | sa := NewStructuredAST(ast) 65 | sa.RegisterType(RepeatZeroTerm{}) 66 | sa.RegisterType(RepeatOneTerm{}) 67 | sa.RegisterType(OptionalTerm{}) 68 | sa.RegisterType(GroupTerm{}) 69 | sa.RegisterType(RuleTerm{}) 70 | sa.RegisterType(InlineRuleTerm{}) 71 | sa.RegisterType(TagTerm{}) 72 | sa.RegisterType(LiteralTerm{}) 73 | err = sa.Decode(&g) 74 | if err != nil { 75 | t.Error(err) 76 | } 77 | err = compareGrammars(g, ByHandGrammar) 78 | if err != nil { 79 | t.Error(err) 80 | } 81 | } 82 | 83 | func compareGrammars(g1, g2 Grammar) (err error) { 84 | for i := range g1.Rules { 85 | r1 := g1.Rules[i] 86 | if len(g2.Rules) <= i { 87 | err = fmt.Errorf("Not enough rules in g2: %d.", len(g1.Rules)) 88 | return 89 | } 90 | r2 := g2.Rules[i] 91 | err = compareRules(r1, r2) 92 | if err != nil { 93 | return 94 | } 95 | } 96 | return 97 | } 98 | 99 | func compareRules(r1, r2 Rule) (err error) { 100 | if r1.Name != r2.Name { 101 | err = fmt.Errorf("Rule names %q and %q don't match.", r1.Name, r2.Name) 102 | } 103 | err = compareExprs(r1.Expr, r2.Expr) 104 | return 105 | } 106 | 107 | func compareExprs(e1, e2 Expr) (err error) { 108 | if len(e1) != len(e2) { 109 | err = fmt.Errorf("Expr lengths %d and %d don't match.", len(e1), len(e2)) 110 | return 111 | } 112 | for i := range e1 { 113 | err = compareTerms(e1[i], e2[i]) 114 | if err != nil { 115 | return 116 | } 117 | } 118 | return 119 | } 120 | 121 | func compareTerms(t1, t2 Term) (err error) { 122 | if reflect.TypeOf(t1) != reflect.TypeOf(t2) { 123 | err = fmt.Errorf("Term types %T and %T don't match.", t1, t2) 124 | return 125 | } 126 | switch t1 := t1.(type) { 127 | case RepeatZeroTerm: 128 | err = compareTerms(t1.Term, t2.(RepeatZeroTerm).Term) 129 | case RepeatOneTerm: 130 | err = compareTerms(t1.Term, t2.(RepeatOneTerm).Term) 131 | case LiteralTerm: 132 | if t1.Literal != t2.(LiteralTerm).Literal { 133 | err = fmt.Errorf("Literals %q and %q don't match.", t1.Literal, t2.(LiteralTerm).Literal) 134 | return 135 | } 136 | case TagTerm: 137 | if t1.Tag != t2.(TagTerm).Tag { 138 | err = fmt.Errorf("Tags %q and %q don't match.", t1.Tag, t2.(TagTerm).Tag) 139 | return 140 | } 141 | case RuleTerm: 142 | if t1.Name != t2.(RuleTerm).Name { 143 | err = fmt.Errorf("Names %q and %q don't match.", t1.Name, t2.(RuleTerm).Name) 144 | return 145 | } 146 | case InlineRuleTerm: 147 | if t1.Name != t2.(InlineRuleTerm).Name { 148 | err = fmt.Errorf("Names %q and %q don't match.", t1.Name, t2.(InlineRuleTerm).Name) 149 | return 150 | } 151 | default: 152 | fmt.Printf("%T\n", t1) 153 | } 154 | return 155 | } 156 | 157 | var ByHandGrammarREs []TypedRegexp 158 | var ByHandGrammarIgnoreREs []*regexp.Regexp 159 | var ByHandTokenInfo TokenizeInfo 160 | 161 | func init() { 162 | var err error 163 | ByHandGrammarREs, err = ByHandGrammar.TokenREs() 164 | if err != nil { 165 | panic(err) 166 | } 167 | ByHandGrammarIgnoreREs, err = ByHandGrammar.IgnoreREs() 168 | if err != nil { 169 | panic(err) 170 | } 171 | ByHandTokenInfo = TokenizeInfo{ 172 | TokenREs: ByHandGrammarREs, 173 | IgnoreREs: ByHandGrammarIgnoreREs, 174 | } 175 | } 176 | 177 | func compareNodes(n1, n2 Node) (ok bool, indicesToError []int) { 178 | if a, ok := n1.(AST); ok { 179 | n1 = []Node(a) 180 | } 181 | if a, ok := n2.(AST); ok { 182 | n2 = []Node(a) 183 | } 184 | if nl1, isList1 := n1.([]Node); isList1 { 185 | if nl2, isList2 := n2.([]Node); isList2 { 186 | if len(nl1) != len(nl2) { 187 | fmt.Printf("one has length %d, other %d\n", len(nl1), len(nl2)) 188 | fmt.Println(nl1) 189 | fmt.Println(nl2) 190 | ok = false 191 | return 192 | } 193 | ok = true 194 | for i := range nl1 { 195 | var subindices []int 196 | ok, subindices = compareNodes(nl1[i], nl2[i]) 197 | if !ok { 198 | indicesToError = append([]int{i}, subindices...) 199 | return 200 | } 201 | } 202 | return 203 | } 204 | fmt.Printf("one is []Node, other is %T\n", n2) 205 | ok = false 206 | return 207 | } 208 | ok = n1 == n2 209 | return 210 | } 211 | 212 | type textByHand struct { 213 | Name string 214 | Text string 215 | ByHand Node 216 | } 217 | 218 | func getGoppASTRules(ast AST) []Node { 219 | return ast[5].([]Node) 220 | } 221 | 222 | var rulesTextAndByHand = []textByHand{ 223 | { 224 | "Grammar", 225 | `Grammar => {type=Grammar} '\n'* {field=LexSteps} <>* {field=Rules} <>+ {field=Symbols} <>*`, 226 | getGoppASTRules(ByHandGoppAST)[0], 227 | }, 228 | { 229 | "LexStep", 230 | `LexStep => {field=Name} ':' {field=Pattern} '\n'+`, 231 | getGoppASTRules(ByHandGoppAST)[1], 232 | }, 233 | { 234 | "Rule", 235 | `Rule => {field=Name} '=>' {field=Expr} '\n'+`, 236 | getGoppASTRules(ByHandGoppAST)[2], 237 | }, 238 | { 239 | "Symbol", 240 | `Symbol => {field=Name} '=' {field=Pattern} '\n'+`, 241 | getGoppASTRules(ByHandGoppAST)[3], 242 | }, 243 | { 244 | "Expr", 245 | `Expr => <>+`, 246 | getGoppASTRules(ByHandGoppAST)[4], 247 | }, 248 | { 249 | "Term.1", 250 | `Term => `, 251 | getGoppASTRules(ByHandGoppAST)[5], 252 | }, 253 | { 254 | "Term.2", 255 | `Term => `, 256 | getGoppASTRules(ByHandGoppAST)[6], 257 | }, 258 | { 259 | "Term1.1", 260 | `Term1 => {type=RepeatZeroTerm} {field=Term} <> '*'`, 261 | getGoppASTRules(ByHandGoppAST)[7], 262 | }, 263 | { 264 | "Term1.2", 265 | `Term1 => {type=RepeatOneTerm} {field=Term} <> '+'`, 266 | getGoppASTRules(ByHandGoppAST)[8], 267 | }, 268 | { 269 | "Term2.1", 270 | `Term2 => {type=OptionalTerm} '[' {field=Expr} ']'`, 271 | getGoppASTRules(ByHandGoppAST)[9], 272 | }, 273 | { 274 | "Term2.2", 275 | `Term2 => {type=GroupTerm} '(' {field=Expr} ')'`, 276 | getGoppASTRules(ByHandGoppAST)[10], 277 | }, 278 | { 279 | "Term2.3", 280 | `Term2 => {type=RuleTerm} '<<' {field=Name} '>>'`, 281 | getGoppASTRules(ByHandGoppAST)[11], 282 | }, 283 | { 284 | "Term2.4", 285 | `Term2 => {type=InlineRuleTerm} '<' {field=Name} '>'`, 286 | getGoppASTRules(ByHandGoppAST)[12], 287 | }, 288 | { 289 | "Term2.5", 290 | `Term2 => {type=TagTerm} {field=Tag} `, 291 | getGoppASTRules(ByHandGoppAST)[13], 292 | }, 293 | { 294 | "Term2.6", 295 | `Term2 => {type=LiteralTerm} {field=Literal} `, 296 | getGoppASTRules(ByHandGoppAST)[14], 297 | }, 298 | } 299 | 300 | func TestParseRulesIndividual(t *testing.T) { 301 | for _, th := range rulesTextAndByHand { 302 | rule := th.ByHand 303 | byHandAST := mkGrammar( 304 | []Node{}, 305 | []Node{rule}, 306 | []Node{}, 307 | ) 308 | 309 | txt := fmt.Sprintf("\n%s\n", th.Text) 310 | tokens, err := Tokenize(ByHandTokenInfo, []byte(txt)) 311 | if err != nil { 312 | t.Errorf("%s: %s", th.Name, err) 313 | return 314 | } 315 | start := ByHandGrammar.RulesForName("Grammar")[0] 316 | // tr.Enabled = true 317 | pd := &ParseData{} 318 | items, remaining, err := start.Parse(ByHandGrammar, tokens, pd, []string{}) 319 | // tr.Enabled = false 320 | if err != nil { 321 | fmt.Printf("Remaining: %v\n", pd.TokensForError) 322 | for _, err := range pd.FarthestErrors { 323 | fmt.Printf(" - %s\n", err) 324 | } 325 | t.Errorf("%s: %s", th.Name, err) 326 | return 327 | } 328 | if len(remaining) != 0 { 329 | t.Errorf("%s: leftover tokens: %v.", th.Name, remaining) 330 | } 331 | 332 | if false && th.Name == "Expr" { 333 | dig := func(top AST) interface{} { 334 | return top[2].([]Node)[0].([]Node)[4].([]Node)[0] 335 | } 336 | byhand := dig(byHandAST) 337 | gen := dig(AST(items)) 338 | ok, indices := compareNodes(byhand, gen) 339 | if !ok { 340 | fmt.Println("byhand") 341 | printNode(byhand, 0) 342 | fmt.Println("generated") 343 | printNode(gen, 0) 344 | fmt.Println(ok, indices) 345 | } 346 | } 347 | 348 | ok, indices := compareNodes(byHandAST, AST(items)) 349 | if !ok { 350 | t.Errorf("%s: Generated AST doesn't match by-hand AST at %v.", th.Name, indices) 351 | } 352 | } 353 | } 354 | 355 | func TestParseFullGrammar(t *testing.T) { 356 | tokens, err := Tokenize(ByHandTokenInfo, []byte(goppgopp)) 357 | if err != nil { 358 | t.Error(err) 359 | return 360 | } 361 | start := ByHandGrammar.RulesForName("Grammar")[0] 362 | // tr.Enabled = true 363 | pd := &ParseData{} 364 | items, remaining, err := start.Parse(ByHandGrammar, tokens, pd, []string{}) 365 | // tr.Enabled = false 366 | if err != nil { 367 | fmt.Printf("Remaining: %v\n", pd.TokensForError) 368 | for _, err := range pd.FarthestErrors { 369 | fmt.Printf(" - %s\n", err) 370 | } 371 | t.Errorf("%s", err) 372 | return 373 | } 374 | if len(remaining) != 0 { 375 | t.Errorf("leftover tokens: %v.", remaining) 376 | } 377 | 378 | if true { 379 | dig := func(top AST) interface{} { 380 | return top[3] 381 | } 382 | byhand := dig(ByHandGoppAST) 383 | gen := dig(AST(items)) 384 | ok, indices := compareNodes(byhand, gen) 385 | if !ok { 386 | fmt.Println("byhand") 387 | printNode(byhand, 0) 388 | fmt.Println("generated") 389 | printNode(gen, 0) 390 | fmt.Println(ok, indices) 391 | } 392 | } 393 | 394 | ok, indices := compareNodes(ByHandGoppAST, AST(items)) 395 | if !ok { 396 | t.Errorf("Generated AST doesn't match by-hand AST at %v.", indices) 397 | } 398 | } 399 | 400 | func TestParseEasyGrammar(t *testing.T) { 401 | byHandAST := mkGrammar( 402 | []Node{}, 403 | []Node{ 404 | mkRule("X", 405 | mkLiteralTerm("y"), 406 | ), 407 | }, 408 | []Node{ 409 | mkSymbol("w", "z"), 410 | }, 411 | ) 412 | 413 | tokens, err := Tokenize(ByHandTokenInfo, []byte(` 414 | X => 'y' 415 | w = /z/ 416 | `)) 417 | if err != nil { 418 | t.Error(err) 419 | return 420 | } 421 | start := ByHandGrammar.RulesForName("Grammar")[0] 422 | pd := &ParseData{} 423 | items, remaining, err := start.Parse(ByHandGrammar, tokens, pd, []string{}) 424 | if err != nil { 425 | t.Error(err) 426 | return 427 | } 428 | if len(remaining) != 0 { 429 | t.Errorf("Leftover tokens: %v.", remaining) 430 | } 431 | 432 | ok, indices := compareNodes(byHandAST, AST(items)) 433 | if !ok { 434 | t.Errorf("Generated AST doesn't match by-hand AST at %v.", indices) 435 | } 436 | 437 | if false { 438 | dig := func(top AST) interface{} { 439 | return top 440 | } 441 | fmt.Println("byhand") 442 | printNode(dig(byHandAST), 0) 443 | fmt.Println("generated") 444 | printNode(dig(AST(items)), 0) 445 | } 446 | } 447 | 448 | func TestParseMultiRule(t *testing.T) { 449 | byHandAST := mkGrammar( 450 | []Node{}, 451 | []Node{ 452 | mkRule("X", 453 | mkOptionalTerm( 454 | mkLiteralTerm("y"), 455 | ), 456 | ), 457 | mkRule("Z", 458 | mkRepeatOneTerm( 459 | mkRuleTerm("X"), 460 | ), 461 | ), 462 | }, 463 | []Node{ 464 | mkSymbol("w", "z"), 465 | }, 466 | ) 467 | 468 | tokens, err := Tokenize(ByHandTokenInfo, []byte(` 469 | X => ['y'] 470 | Z => <>+ 471 | w = /z/ 472 | `)) 473 | if err != nil { 474 | t.Error(err) 475 | return 476 | } 477 | start := ByHandGrammar.RulesForName("Grammar")[0] 478 | pd := &ParseData{} 479 | items, remaining, err := start.Parse(ByHandGrammar, tokens, pd, []string{}) 480 | if err != nil { 481 | t.Error(err) 482 | return 483 | } 484 | if len(remaining) != 0 { 485 | t.Errorf("Leftover tokens: %v.", remaining) 486 | } 487 | 488 | ok, indices := compareNodes(byHandAST, AST(items)) 489 | if !ok { 490 | t.Errorf("Generated AST doesn't match by-hand AST at %v.", indices) 491 | } 492 | 493 | if false { 494 | dig := func(top AST) interface{} { 495 | return top[2] 496 | } 497 | fmt.Println("byhand") 498 | printNode(dig(byHandAST), 0) 499 | fmt.Println("generated") 500 | printNode(dig(AST(items)), 0) 501 | } 502 | } 503 | 504 | func TestParseSymbol(t *testing.T) { 505 | tokens, err := Tokenize(ByHandTokenInfo, []byte("'junkinthetrunk' stuff")) 506 | if err != nil { 507 | t.Error(err) 508 | return 509 | } 510 | term := InlineRuleTerm{Name: "literal"} 511 | pd := &ParseData{} 512 | items, _, err := term.Parse(ByHandGrammar, tokens, pd, []string{}) 513 | if err != nil { 514 | t.Error(err) 515 | return 516 | } 517 | st, ok := items[0].(SymbolText) 518 | if !ok { 519 | t.Errorf("Got a %T, expected a SymbolText.", items[0]) 520 | return 521 | } 522 | if st.Type != "literal" { 523 | t.Errorf("Got a %q, expected a %q.", st.Type, "literal") 524 | return 525 | } 526 | if st.Text != "junkinthetrunk" { 527 | t.Errorf("Got %q, expected %q.", st.Text, "junkinthetrunk") 528 | return 529 | } 530 | } 531 | 532 | func TestParseTag(t *testing.T) { 533 | tokens, err := Tokenize(ByHandTokenInfo, []byte("=> stuff")) 534 | if err != nil { 535 | t.Error(err) 536 | return 537 | } 538 | term := TagTerm{Tag: "hello"} 539 | pd := &ParseData{} 540 | items, remaining, err := term.Parse(ByHandGrammar, tokens, pd, []string{}) 541 | if err != nil { 542 | t.Error(err) 543 | return 544 | } 545 | if tag, ok := items[0].(Tag); ok { 546 | if tag != "hello" { 547 | t.Errorf("Expected %q, got %q.", "hello", tag) 548 | return 549 | } 550 | } else { 551 | t.Errorf("Expected Tag, got %T.", items[0]) 552 | return 553 | } 554 | if !reflect.DeepEqual(remaining, tokens) { 555 | t.Errorf("Got wrong tokens remaining.") 556 | return 557 | } 558 | } 559 | 560 | func TestParseLiteral(t *testing.T) { 561 | tokens, err := Tokenize(ByHandTokenInfo, []byte("=> stuff")) 562 | if err != nil { 563 | t.Error(err) 564 | return 565 | } 566 | term := LiteralTerm{Literal: "=>"} 567 | pd := &ParseData{} 568 | items, remaining, err := term.Parse(ByHandGrammar, tokens, pd, []string{}) 569 | if err != nil { 570 | t.Error(err) 571 | return 572 | } 573 | if lit, ok := items[0].(Literal); ok { 574 | if lit != "=>" { 575 | t.Errorf("Expected %q, got %q.", "=>", lit) 576 | return 577 | } 578 | } else { 579 | t.Errorf("Expected Literal, got %T.", items[0]) 580 | return 581 | } 582 | if !reflect.DeepEqual(remaining, tokens[1:]) { 583 | t.Errorf("Got wrong tokens remaining.") 584 | return 585 | } 586 | } 587 | -------------------------------------------------------------------------------- /tokenize.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "fmt" 9 | "regexp" 10 | ) 11 | 12 | type Token struct { 13 | Type string 14 | Raw string 15 | Text string 16 | Row, Col int 17 | } 18 | 19 | func (t Token) String() string { 20 | return fmt.Sprintf("(%s: %q)", t.Type, t.Text) 21 | } 22 | 23 | type TokenizeInfo struct { 24 | TokenREs []TypedRegexp 25 | IgnoreREs []*regexp.Regexp 26 | } 27 | 28 | func Tokenize(ti TokenizeInfo, document []byte) (tokens []Token, err error) { 29 | var row, col int 30 | tokenloop: 31 | for len(document) != 0 { 32 | 33 | snippet := document 34 | if len(snippet) > 20 { 35 | snippet = snippet[:20] 36 | } 37 | 38 | // If something to ignore, trim it off. 39 | for _, re := range ti.IgnoreREs { 40 | matches := re.FindSubmatch(document) 41 | if len(matches) == 0 { 42 | continue 43 | } 44 | if string(document[:len(matches[0])]) != string(matches[0]) { 45 | err = fmt.Errorf("Regexp matched text not at beginning: %s", re) 46 | return 47 | } 48 | document = document[len(matches[0]):] 49 | continue tokenloop 50 | } 51 | 52 | var newdocument []byte 53 | for _, re := range ti.TokenREs { 54 | 55 | matches := re.FindSubmatch(document) 56 | if len(matches) == 0 { 57 | continue 58 | } 59 | 60 | matchedText := matches[0] 61 | capturedText := matches[1] 62 | 63 | token := Token{ 64 | Type: re.Type, 65 | Raw: string(matchedText), 66 | Row: row, 67 | Col: col, 68 | } 69 | if len(matches) > 1 { 70 | token.Text = string(capturedText) 71 | if err != nil { 72 | return 73 | } 74 | } 75 | for _, c := range matchedText { 76 | if c == '\n' { 77 | row++ 78 | col = 0 79 | } else { 80 | col++ 81 | } 82 | } 83 | newdocument = document[len(matchedText):] 84 | tokens = append(tokens, token) 85 | break 86 | } 87 | if newdocument == nil { 88 | snippet := document 89 | if len(snippet) > 80 { 90 | snippet = snippet[:80] 91 | } 92 | err = fmt.Errorf("Could not match starting from %q.", snippet) 93 | return 94 | } 95 | document = newdocument 96 | } 97 | return 98 | } 99 | -------------------------------------------------------------------------------- /tokenizer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "fmt" 9 | "testing" 10 | ) 11 | 12 | var _ = fmt.Println 13 | 14 | func TestCollectLiterals(t *testing.T) { 15 | correctLiterals := []string{ 16 | "=>", 17 | "=", 18 | ":", 19 | "[", 20 | "]", 21 | "(", 22 | ")", 23 | "<", 24 | ">", 25 | "<<", 26 | ">>", 27 | "*", 28 | "+", 29 | "\n", 30 | } 31 | 32 | literals := map[string]bool{} 33 | ByHandGrammar.CollectLiterals(literals) 34 | if len(literals) != len(correctLiterals) { 35 | t.Error("Wrong number of literals") 36 | } 37 | for _, literal := range correctLiterals { 38 | if !literals[literal] { 39 | t.Errorf("Could not find %q", literal) 40 | } 41 | } 42 | } 43 | 44 | var symbolTests = map[string][][]string{ 45 | "identifier": [][]string{ 46 | {"stuff", "stuff"}, 47 | {"xyz123", "xyz123"}, 48 | {"x_b", "x_b"}, 49 | }, 50 | } 51 | var symbolFailTests = map[string][]string{ 52 | "identifier": []string{ 53 | "123", 54 | ".sdf-", 55 | "!", 56 | }, 57 | } 58 | 59 | func TestSymbolTokenize(t *testing.T) { 60 | for typ, examples := range symbolTests { 61 | for _, example := range examples { 62 | tokens, err := Tokenize(TokenizeInfo{TokenREs: ByHandGrammarREs}, []byte(example[0])) 63 | if err != nil { 64 | t.Error(err) 65 | continue 66 | } 67 | if len(tokens) == 0 { 68 | t.Error("No tokens for %q.", example[0]) 69 | continue 70 | } 71 | if typ != tokens[0].Type { 72 | t.Errorf("Expected type %q, got %q.", typ, tokens[0].Type) 73 | continue 74 | } 75 | if example[1] != tokens[0].Text { 76 | t.Errorf("Expected %q, got %q.", example[1], tokens[0].Text) 77 | continue 78 | } 79 | } 80 | } 81 | } 82 | 83 | func TestSymbolFailTokenize(t *testing.T) { 84 | for typ, examples := range symbolFailTests { 85 | for _, example := range examples { 86 | tokens, err := Tokenize(TokenizeInfo{TokenREs: ByHandGrammarREs}, []byte(example)) 87 | if err != nil { 88 | continue 89 | } 90 | if len(tokens) == 0 { 91 | continue 92 | } 93 | if typ == tokens[0].Type { 94 | t.Errorf("Mistakenly parsed %q as %q.", example, typ) 95 | } 96 | } 97 | } 98 | } 99 | 100 | var goppgopp = ` 101 | # a comment to ignore 102 | ignore: /^#.*\n/ 103 | ignore: /^(?:[ \t])+/ 104 | Grammar => {type=Grammar} '\n'* {field=LexSteps} <>* {field=Rules} <>+ {field=Symbols} <>* 105 | LexStep => {field=Name} ':' {field=Pattern} '\n'+ 106 | Rule => {field=Name} '=>' {field=Expr} '\n'+ 107 | Symbol => {field=Name} '=' {field=Pattern} '\n'+ 108 | Expr => <>+ 109 | Term => 110 | Term => 111 | Term1 => {type=RepeatZeroTerm} {field=Term} <> '*' 112 | Term1 => {type=RepeatOneTerm} {field=Term} <> '+' 113 | Term2 => {type=OptionalTerm} '[' {field=Expr} ']' 114 | Term2 => {type=GroupTerm} '(' {field=Expr} ')' 115 | Term2 => {type=RuleTerm} '<<' {field=Name} '>>' 116 | Term2 => {type=InlineRuleTerm} '<' {field=Name} '>' 117 | Term2 => {type=TagTerm} {field=Tag} 118 | Term2 => {type=LiteralTerm} {field=Literal} 119 | identifier = /([a-zA-Z][a-zA-Z0-9_]*)/ 120 | literal = /'((?:[\\']|[^'])+?)'/ 121 | tag = /\{((?:[\\']|[^'])+?)\}/ 122 | regexp = /\/((?:\\/|[^\n])+?)\// 123 | ` 124 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The gopp AUTHORS. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package gopp 6 | 7 | import ( 8 | "fmt" 9 | "strconv" 10 | ) 11 | 12 | type literalSorter []string 13 | 14 | func (l literalSorter) Len() int { 15 | return len(l) 16 | } 17 | 18 | func (l literalSorter) Swap(i, j int) { 19 | l[i], l[j] = l[j], l[i] 20 | } 21 | 22 | func (l literalSorter) Less(i, j int) bool { 23 | if len(l[i]) > len(l[j]) { 24 | return true 25 | } 26 | if len(l[i]) < len(l[j]) { 27 | return false 28 | } 29 | return i < j 30 | } 31 | 32 | func escapeString(s string) (r string) { 33 | r = strconv.Quote(s) 34 | r = r[1 : len(r)-1] 35 | return 36 | } 37 | 38 | func descapeString(s string) (r string, err error) { 39 | quoted := fmt.Sprintf("\"%s\"", s) 40 | r, err = strconv.Unquote(quoted) 41 | return 42 | } 43 | 44 | func printNode(node Node, indentCount int) { 45 | indent := func(tag string) { 46 | for i := 0; i < indentCount; i++ { 47 | fmt.Print(" ") 48 | } 49 | fmt.Println(tag) 50 | } 51 | switch node := node.(type) { 52 | case []Node: 53 | indent("[") 54 | for _, n := range node { 55 | printNode(n, indentCount+1) 56 | } 57 | indent("]") 58 | case AST: 59 | indent("[") 60 | for _, n := range node { 61 | printNode(n, indentCount+1) 62 | } 63 | indent("]") 64 | default: 65 | indent(fmt.Sprint(node)) 66 | } 67 | } 68 | --------------------------------------------------------------------------------