├── README.markdown ├── example ├── example └── example.go └── parsec.go /README.markdown: -------------------------------------------------------------------------------- 1 | Go Parse 2 | 3 | A [Parsec](http://hackage.haskell.org/package/parsec-3.0.0)-like library for Go. 4 | 5 | Structure: 6 | 7 | A Vessel is what carries around the input and any user-specified state, as well as internal state such as the position in the input. It should know how to return and set those 3 values, as well as Get from the input, and push/pop (which just adjusts the position). 8 | 9 | A Parser takes a Vessel and returns an Output and whether or not the parse was successful. 10 | 11 | Parsers can typically be combined in many ways. For example Symbol is just String followed by Whitespace, Many takes a Parser and repeatedly applies it, matching 0 or more times (thus, the parse is always successful), and Any takes any number of Parsers and tries them all in order until one succeeds. 12 | 13 | Example: 14 | 15 | func main() { 16 | in := new(StringVessel); 17 | in.SetInput(`< (>)( 18 | < 19 | )( >) < > 20 | > 21 | 22 | >`); 23 | 24 | ltgt := Any(Symbol("<"), Symbol(">")); 25 | 26 | parser := Many(Any(ltgt, Parens(ltgt))); 27 | out, parsed := parser(in); 28 | 29 | fmt.Printf("Matched: %#v\n", parsed); 30 | fmt.Printf("Matches: %v\n", out); 31 | fmt.Printf("Vessel: %+v\n", in); 32 | } 33 | 34 | Output: 35 | 36 | go-parse $ go parsec 37 | Matched: true 38 | Matches: [< > < > < > > >] 39 | Vessel: &{state: input:< (>)( 40 | < 41 | )( >) < > 42 | > 43 | 44 | > position:{Name: Line:0 Column:0 Offset:29}} 45 | 46 | -------------------------------------------------------------------------------- /example/example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vito/go-parse/ca8122a7499f7b9f0a02603d16daaecb0c023a9e/example/example -------------------------------------------------------------------------------- /example/example.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "unicode" 7 | 8 | . "github.com/vito/go-parse" 9 | ) 10 | 11 | type rChar struct { 12 | char rune 13 | str string 14 | } 15 | type rToken struct { 16 | char rune 17 | str string 18 | } 19 | type rGroup struct { 20 | target interface{} 21 | } 22 | type rOption struct { 23 | target interface{} 24 | } 25 | type rStar struct { 26 | target interface{} 27 | } 28 | 29 | func isMeta(char rune) bool { 30 | switch char { 31 | case '(', ')', '[', ']', '?', '^', '*', '.', '+', '$', '|': 32 | return true 33 | } 34 | 35 | return false 36 | } 37 | 38 | func isSpecial(char rune) bool { 39 | switch char { 40 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\': 41 | return true 42 | case 'w', 's', 'd': 43 | return true 44 | } 45 | 46 | return false 47 | } 48 | 49 | func isNotMeta(char rune) bool { return !isMeta(char) } 50 | 51 | func isNotSpecial(char rune) bool { return !isSpecial(char) } 52 | 53 | func special(char rune) Output { 54 | switch char { 55 | case '\\': 56 | return rChar{'\\', "\\"} 57 | case 'a', 'b', 'f': 58 | return rChar{char - rune(90), string(char - rune(90))} 59 | case 'n': 60 | return rChar{'\n', "\n"} 61 | case 'r': 62 | return rChar{'\r', "\r"} 63 | case 't': 64 | return rChar{'\t', "\t"} 65 | case 'v': 66 | return rChar{'\v', "\v"} 67 | case 'w', 's', 'd': 68 | return rToken{char, string(char)} 69 | } 70 | 71 | return nil 72 | } 73 | 74 | func grouped(match Parser) Parser { 75 | return func(in Vessel) (c Output, ok bool) { 76 | call, ok := Between(String("("), String(")"), match)(in) 77 | if !ok { 78 | return 79 | } 80 | 81 | c = rGroup{call} 82 | return 83 | } 84 | } 85 | 86 | func char() Parser { 87 | return func(in Vessel) (out Output, ok bool) { 88 | next, ok := Satisfy(isNotMeta)(in) 89 | if !ok { 90 | return 91 | } 92 | 93 | char := next.(rune) 94 | if char == '\\' { 95 | next, ok = Satisfy(func(c rune) bool { return isMeta(c) || isSpecial(c) })(in) 96 | if ok { 97 | char = next.(rune) 98 | if isSpecial(char) { 99 | out = special(char) 100 | } 101 | } 102 | } else { 103 | out = rChar{char, string(char)} 104 | } 105 | 106 | return 107 | } 108 | } 109 | 110 | func optional() Parser { 111 | return func(in Vessel) (Output, bool) { 112 | result, ok := Collect(Any(char(), grouped(regexp())), String("?"))(in) 113 | if !ok { 114 | return nil, false 115 | } 116 | 117 | return rOption{result.([]interface{})[0]}, true 118 | } 119 | } 120 | 121 | func star() Parser { 122 | return func(in Vessel) (Output, bool) { 123 | result, ok := Collect(Any(char(), grouped(regexp())), String("*"))(in) 124 | 125 | if !ok { 126 | return nil, false 127 | } 128 | 129 | return rStar{result.([]interface{})[0]}, true 130 | } 131 | } 132 | 133 | func regexp() Parser { 134 | return func(in Vessel) (Output, bool) { 135 | return Many( 136 | Any( 137 | Identifier(), 138 | Try(star()), 139 | Try(optional()), 140 | Skip(All(OneLineComment(), String("\n"))), 141 | MultiLineComment(), 142 | Try(char()), 143 | grouped(regexp())))(in) 144 | } 145 | } 146 | 147 | // A hacked-together monstrosity that pretty-prints any complex 148 | // structure with indenting and whitespace and such. 149 | func pretty(thing interface{}) (s string) { 150 | in := fmt.Sprintf("%#v\n", thing) 151 | 152 | indent := 0 153 | inString := false 154 | for i, char := range in { 155 | if !inString || char == '"' { 156 | switch char { 157 | case ',': 158 | s += string(char) + "\n" + strings.Repeat(" ", indent) 159 | case '(', '{': 160 | if in[i+2] != '}' { 161 | indent++ 162 | s += string(char) + "\n" + strings.Repeat(" ", indent) 163 | } else { 164 | s += "{}" 165 | } 166 | case ')', '}': 167 | if in[i-2] != '{' { 168 | indent-- 169 | s += "\n" + strings.Repeat(" ", indent) + string(char) 170 | } 171 | case ':': 172 | s += ": " 173 | case ' ': 174 | if in[i-1] != ',' && in[i-9:i] != "interface" { 175 | s += " " 176 | } 177 | case '"': 178 | inString = !inString 179 | fallthrough 180 | default: 181 | s += string(char) 182 | } 183 | } else { 184 | s += string(char) 185 | } 186 | } 187 | 188 | return 189 | } 190 | 191 | func main() { 192 | in := new(StringVessel) 193 | 194 | in.SetSpec(Spec{ 195 | "{-", 196 | "-}", 197 | "--", 198 | true, 199 | Satisfy(unicode.IsUpper), 200 | Satisfy(unicode.IsLower), 201 | nil, 202 | nil, 203 | []Output{"Foo"}, 204 | nil, 205 | true, 206 | }) 207 | 208 | in.SetInput(`a 日本語 \[\]\({- test -} ( b)?ccc*-- comment 209 | l*{- foo {- {- test -} -}-}Bar FooFizz 210 | Buzz\a\n\t\f\w\s\d*`) 211 | 212 | fmt.Printf("Parsing `%s`...\n", in.GetInput()) 213 | 214 | out, ok := regexp()(in) 215 | 216 | if _, unfinished := in.Next(); unfinished { 217 | fmt.Printf("Incomplete parse: %s\n", pretty(out)) 218 | fmt.Println("Parse error.") 219 | fmt.Printf("Position: %+v\n", in.GetPosition()) 220 | fmt.Printf("State: %+v\n", in.GetState()) 221 | fmt.Printf("Rest: `%s`\n", in.GetInput()) 222 | return 223 | } 224 | 225 | fmt.Printf("Parsed: %#v\n", ok) 226 | fmt.Printf("Tree: %s\n", pretty(out)) 227 | fmt.Printf("Rest: %#v\n", in.GetInput()) 228 | } 229 | -------------------------------------------------------------------------------- /parsec.go: -------------------------------------------------------------------------------- 1 | package parsec 2 | 3 | import ( 4 | "unicode" 5 | ) 6 | 7 | // Container of the input, position, and any user/parser state. 8 | type Vessel interface { 9 | GetState() State 10 | SetState(State) 11 | 12 | GetInput() Input 13 | SetInput(Input) 14 | 15 | GetPosition() Position 16 | SetPosition(Position) 17 | 18 | GetSpec() Spec 19 | SetSpec(Spec) 20 | 21 | Get(int) (Input, bool) 22 | Next() (rune, bool) 23 | Pop(int) 24 | Push(int) 25 | } 26 | 27 | // Specifications for the parser 28 | type Spec struct { 29 | CommentStart string 30 | CommentEnd string 31 | CommentLine string 32 | NestedComments bool 33 | IdentStart Parser 34 | IdentLetter Parser 35 | OpStart Parser 36 | OpLetter Parser 37 | ReservedNames []Output 38 | ReservedOpNames []Output 39 | CaseSensitive bool 40 | } 41 | 42 | // A Parser is a function that takes a vessel and returns any matches 43 | // (Output) and whether or not the match was valid. 44 | type Parser func(Vessel) (Output, bool) 45 | 46 | // Input type used by vessels 47 | type Input interface{} 48 | 49 | // Output of Parsers 50 | type Output interface{} 51 | 52 | // Any value can be a vessel's state. 53 | type State interface{} 54 | 55 | // Position in the input. 56 | type Position struct { 57 | Name string 58 | Line int 59 | Column int 60 | Offset int 61 | } 62 | 63 | // Token that satisfies a condition. 64 | func Satisfy(check func(c rune) bool) Parser { 65 | return func(in Vessel) (Output, bool) { 66 | target, ok := in.Next() 67 | if ok && check(target) { 68 | in.Pop(1) 69 | return target, true 70 | } 71 | 72 | return nil, false 73 | } 74 | } 75 | 76 | // Skip whitespace and comments 77 | func Whitespace() Parser { 78 | return Many(Any(Satisfy(unicode.IsSpace), OneLineComment(), MultiLineComment())) 79 | } 80 | 81 | func OneLineComment() Parser { 82 | return func(in Vessel) (Output, bool) { 83 | if in.GetSpec().CommentLine == "" { 84 | return nil, false 85 | } 86 | 87 | return Skip(All( 88 | Try(String(in.GetSpec().CommentLine)), 89 | Many(Satisfy(func(c rune) bool { return c != '\n' }))))(in) 90 | } 91 | } 92 | 93 | func MultiLineComment() Parser { 94 | return func(in Vessel) (Output, bool) { 95 | spec := in.GetSpec() 96 | 97 | return Skip(All( 98 | String(spec.CommentStart), 99 | InComment()))(in) 100 | } 101 | } 102 | 103 | func InComment() Parser { 104 | return func(in Vessel) (Output, bool) { 105 | if in.GetSpec().NestedComments { 106 | return inMulti()(in) 107 | } 108 | 109 | return inSingle()(in) 110 | } 111 | } 112 | 113 | func inMulti() Parser { 114 | return func(in Vessel) (Output, bool) { 115 | spec := in.GetSpec() 116 | startEnd := spec.CommentStart + spec.CommentEnd 117 | 118 | return Any( 119 | Try(String(spec.CommentEnd)), 120 | All(MultiLineComment(), inMulti()), 121 | All(Many1(NoneOf(startEnd)), inMulti()), 122 | All(OneOf(startEnd), inMulti()))(in) 123 | } 124 | } 125 | 126 | func inSingle() Parser { 127 | return func(in Vessel) (Output, bool) { 128 | spec := in.GetSpec() 129 | startEnd := spec.CommentStart + spec.CommentEnd 130 | 131 | return Any( 132 | Try(String(spec.CommentEnd)), 133 | All(Many1(NoneOf(startEnd)), inSingle()), 134 | All(OneOf(startEnd), inSingle()))(in) 135 | } 136 | } 137 | 138 | func OneOf(cs string) Parser { 139 | return func(in Vessel) (Output, bool) { 140 | next, ok := in.Next() 141 | if !ok { 142 | return nil, false 143 | } 144 | 145 | for _, v := range cs { 146 | if v == next { 147 | in.Pop(1) 148 | return v, true 149 | } 150 | } 151 | 152 | return next, false 153 | } 154 | } 155 | 156 | func NoneOf(cs string) Parser { 157 | return func(in Vessel) (Output, bool) { 158 | next, ok := in.Next() 159 | if !ok { 160 | return nil, false 161 | } 162 | 163 | for _, v := range cs { 164 | if v == next { 165 | return v, false 166 | } 167 | } 168 | 169 | in.Pop(1) 170 | return next, true 171 | } 172 | } 173 | 174 | func Skip(match Parser) Parser { 175 | return func(in Vessel) (Output, bool) { 176 | _, ok := match(in) 177 | return nil, ok 178 | } 179 | } 180 | 181 | func Token() Parser { 182 | return func(in Vessel) (next Output, ok bool) { 183 | next, ok = in.Next() 184 | in.Pop(1) 185 | return 186 | } 187 | } 188 | 189 | // Match a parser and skip whitespace 190 | func Lexeme(match Parser) Parser { 191 | return func(in Vessel) (Output, bool) { 192 | out, matched := match(in) 193 | if !matched { 194 | return nil, false 195 | } 196 | 197 | Whitespace()(in) 198 | 199 | return out, true 200 | } 201 | } 202 | 203 | // Match a parser 0 or more times. 204 | func Many(match Parser) Parser { 205 | return func(in Vessel) (Output, bool) { 206 | matches := []interface{}{} 207 | for { 208 | out, parsed := match(in) 209 | if !parsed { 210 | break 211 | } 212 | 213 | if out != nil { 214 | matches = append(matches, out) 215 | } 216 | } 217 | 218 | return matches, true 219 | } 220 | } 221 | 222 | func Many1(match Parser) Parser { 223 | return func(in Vessel) (Output, bool) { 224 | a, ok := match(in) 225 | if !ok { 226 | return nil, false 227 | } 228 | 229 | rest, ok := Many(match)(in) 230 | if !ok { 231 | return nil, false 232 | } 233 | 234 | as := rest.([]interface{}) 235 | 236 | all := make([]interface{}, len(as)+1) 237 | all[0] = a 238 | for i := 0; i < len(as); i++ { 239 | all[i+1] = as[i] 240 | } 241 | 242 | return all, true 243 | } 244 | } 245 | 246 | // Match a parser seperated by another parser 0 or more times. 247 | // Trailing delimeters are valid. 248 | func SepBy(delim Parser, match Parser) Parser { 249 | return func(in Vessel) (Output, bool) { 250 | matches := []interface{}{} 251 | for { 252 | out, parsed := match(in) 253 | if !parsed { 254 | break 255 | } 256 | 257 | matches = append(matches, out) 258 | 259 | _, sep := delim(in) 260 | if !sep { 261 | break 262 | } 263 | } 264 | 265 | return matches, true 266 | } 267 | } 268 | 269 | // Go through the parsers until one matches. 270 | func Any(parsers ...Parser) Parser { 271 | return func(in Vessel) (Output, bool) { 272 | for _, parser := range parsers { 273 | match, ok := parser(in) 274 | if ok { 275 | return match, ok 276 | } 277 | } 278 | 279 | return nil, false 280 | } 281 | } 282 | 283 | // Match all parsers, returning the final result. If one fails, it stops. 284 | // NOTE: Consumes input on failure. Wrap calls in Try(...) to avoid. 285 | func All(parsers ...Parser) Parser { 286 | return func(in Vessel) (match Output, ok bool) { 287 | for _, parser := range parsers { 288 | match, ok = parser(in) 289 | if !ok { 290 | return 291 | } 292 | } 293 | 294 | return 295 | } 296 | } 297 | 298 | // Match all parsers, collecting their outputs into a vector. 299 | // If one parser fails, the whole thing fails. 300 | // NOTE: Consumes input on failure. Wrap calls in Try(...) to avoid. 301 | func Collect(parsers ...Parser) Parser { 302 | return func(in Vessel) (Output, bool) { 303 | matches := []interface{}{} 304 | for _, parser := range parsers { 305 | match, ok := parser(in) 306 | if !ok { 307 | return nil, false 308 | } 309 | 310 | matches = append(matches, match) 311 | } 312 | 313 | return matches, true 314 | } 315 | } 316 | 317 | // Try matching begin, match, and then end. 318 | func Between(begin Parser, end Parser, match Parser) Parser { 319 | return func(in Vessel) (Output, bool) { 320 | parse, ok := Try(Collect(begin, match, end))(in) 321 | if !ok { 322 | return nil, false 323 | } 324 | 325 | return parse.([]interface{})[1], true 326 | } 327 | } 328 | 329 | // Lexeme parser for `match' wrapped in parens. 330 | func Parens(match Parser) Parser { return Lexeme(Between(Symbol("("), Symbol(")"), match)) } 331 | 332 | // Match a string and consume any following whitespace. 333 | func Symbol(str string) Parser { return Lexeme(String(str)) } 334 | 335 | // Match a string and pop the string's length from the input. 336 | // NOTE: Consumes input on failure. Wrap calls in Try(...) to avoid. 337 | func String(str string) Parser { 338 | return func(in Vessel) (Output, bool) { 339 | for _, v := range str { 340 | next, ok := in.Next() 341 | if !ok || next != v { 342 | return nil, false 343 | } 344 | 345 | in.Pop(1) 346 | } 347 | 348 | return str, true 349 | } 350 | } 351 | 352 | // Try a parse and revert the state and position if it fails. 353 | func Try(match Parser) Parser { 354 | return func(in Vessel) (Output, bool) { 355 | st, pos := in.GetState(), in.GetPosition() 356 | out, ok := match(in) 357 | if !ok { 358 | in.SetState(st) 359 | in.SetPosition(pos) 360 | } 361 | 362 | return out, ok 363 | } 364 | } 365 | 366 | func Ident() Parser { 367 | return func(in Vessel) (name Output, ok bool) { 368 | sp := in.GetSpec() 369 | n, ok := sp.IdentStart(in) 370 | if !ok { 371 | return 372 | } 373 | 374 | ns, ok := Many(sp.IdentLetter)(in) 375 | if !ok { 376 | return 377 | } 378 | 379 | rest := make([]rune, len(ns.([]interface{}))) 380 | for k, v := range ns.([]interface{}) { 381 | rest[k] = v.(rune) 382 | } 383 | 384 | return string(n.(rune)) + string(rest), true 385 | } 386 | } 387 | 388 | func Identifier() Parser { 389 | return Lexeme(Try(func(in Vessel) (name Output, ok bool) { 390 | name, ok = Ident()(in) 391 | if !ok { 392 | return 393 | } 394 | 395 | for _, v := range in.GetSpec().ReservedNames { 396 | if v == name { 397 | return nil, false 398 | } 399 | } 400 | 401 | return 402 | })) 403 | } 404 | 405 | // Basic string vessel for parsing over a string input. 406 | type StringVessel struct { 407 | state State 408 | input string 409 | position Position 410 | spec Spec 411 | } 412 | 413 | func (self *StringVessel) GetState() State { return self.state } 414 | 415 | func (self *StringVessel) SetState(st State) { self.state = st } 416 | 417 | func (self *StringVessel) GetInput() Input { 418 | i := 0 419 | for o, _ := range self.input { 420 | if i == self.position.Offset { 421 | return self.input[o:] 422 | } 423 | i++ 424 | } 425 | 426 | return "" 427 | } 428 | 429 | func (self *StringVessel) Get(i int) (Input, bool) { 430 | if len(self.input) < self.position.Offset+i { 431 | return "", false 432 | } 433 | 434 | s := "" 435 | n := 0 436 | for _, v := range self.input { 437 | if n >= self.position.Offset { 438 | if n > self.position.Offset+i { 439 | break 440 | } 441 | s += string(v) 442 | } 443 | n++ 444 | } 445 | 446 | return s, true 447 | } 448 | 449 | func (self *StringVessel) Next() (rune, bool) { 450 | if len(self.input) < self.position.Offset+1 { 451 | return 0, false 452 | } 453 | 454 | i := 0 455 | for _, v := range self.input { 456 | if i == self.position.Offset { 457 | return rune(v), true 458 | } 459 | i++ 460 | } 461 | 462 | return 0, false 463 | } 464 | 465 | func (self *StringVessel) Pop(i int) { self.position.Offset += i } 466 | 467 | func (self *StringVessel) Push(i int) { self.position.Offset -= i } 468 | 469 | func (self *StringVessel) SetInput(in Input) { self.input = in.(string) } 470 | 471 | func (self *StringVessel) GetPosition() Position { 472 | return self.position 473 | } 474 | 475 | func (self *StringVessel) SetPosition(pos Position) { 476 | self.position = pos 477 | } 478 | 479 | func (self *StringVessel) GetSpec() Spec { return self.spec } 480 | 481 | func (self *StringVessel) SetSpec(sp Spec) { self.spec = sp } 482 | --------------------------------------------------------------------------------