├── go.sum ├── .gitignore ├── grammars ├── .gitignore ├── longtest │ ├── long.peg │ └── long_test.go ├── fexl │ ├── doc │ │ ├── NOTICE │ │ ├── README │ │ └── try.fxl │ ├── fexl_test.go │ └── fexl.peg ├── calculatorast │ ├── calculator_test.go │ ├── calculator.peg │ └── calculator.go ├── calculator │ ├── calculator_test.go │ ├── calculator.peg │ └── calculator.go ├── java │ ├── example-1.java │ ├── java_test.go │ ├── example-2.java │ └── java_1_7.peg └── c │ ├── c_test.go │ └── c.peg ├── bootstrap ├── .gitignore └── main.go ├── go.mod ├── cmd └── peg-bootstrap │ ├── .gitignore │ ├── main.go │ ├── bootstrap.peg │ └── peg.bootstrap.peg ├── .golangci.yml ├── AUTHORS ├── generate-grammars.bash ├── .github └── workflows │ └── build.yml ├── bootstrap.bash ├── docs ├── links.md └── peg-file-syntax.md ├── LICENSE ├── README.md ├── main.go ├── set ├── set_test.go └── set.go ├── peg.peg ├── peg_test.go └── tree ├── peg.go.tmpl └── peg.go /go.sum: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | peg 2 | peg.exe 3 | -------------------------------------------------------------------------------- /grammars/.gitignore: -------------------------------------------------------------------------------- 1 | **/*.peg.go -------------------------------------------------------------------------------- /bootstrap/.gitignore: -------------------------------------------------------------------------------- 1 | bootstrap 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pointlander/peg 2 | 3 | go 1.25 4 | -------------------------------------------------------------------------------- /cmd/peg-bootstrap/.gitignore: -------------------------------------------------------------------------------- 1 | peg0 2 | peg1 3 | peg2 4 | peg3 5 | peg-bootstrap 6 | *.peg.go -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | linters: 3 | enable: 4 | - errname 5 | - errorlint 6 | - godot 7 | - revive 8 | exclusions: 9 | presets: 10 | - comments 11 | generated: strict 12 | formatters: 13 | enable: 14 | - gofumpt 15 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the official list of Go authors for copyright purposes. 2 | 3 | # Names should be added to this file as 4 | # Name or Organization 5 | # The email address is not required for organizations. 6 | 7 | Andrew J Snodgrass 8 | -------------------------------------------------------------------------------- /grammars/longtest/long.peg: -------------------------------------------------------------------------------- 1 | # Copyright 2010 The Go Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | package longtest 6 | 7 | type Long Peg { 8 | 9 | } 10 | 11 | String <- '\"' (!'\"' .)* '\"' !. 12 | -------------------------------------------------------------------------------- /generate-grammars.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -Eeuo pipefail 4 | 5 | (cd grammars/c/ && go generate) 6 | (cd grammars/calculator/ && go generate) 7 | (cd grammars/calculatorast/ && go generate) 8 | (cd grammars/fexl/ && go generate) 9 | (cd grammars/java/ && go generate) 10 | (cd grammars/longtest/ && go generate) 11 | -------------------------------------------------------------------------------- /grammars/fexl/doc/NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2011 Patrick Chkoreff 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions 13 | and limitations under the License. 14 | -------------------------------------------------------------------------------- /grammars/fexl/fexl_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:generate ../../peg -switch -inline fexl.peg 6 | 7 | package fexl 8 | 9 | import ( 10 | "os" 11 | "testing" 12 | ) 13 | 14 | func TestFexl(t *testing.T) { 15 | buffer, err := os.ReadFile("doc/try.fxl") 16 | if err != nil { 17 | t.Fatal(err) 18 | } 19 | 20 | fexl := &Fexl[uint32]{Buffer: string(buffer)} 21 | err = fexl.Init() 22 | if err != nil { 23 | t.Fatal(err) 24 | } 25 | 26 | if err := fexl.Parse(); err != nil { 27 | t.Fatal(err) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /grammars/fexl/fexl.peg: -------------------------------------------------------------------------------- 1 | # fexl 2 | 3 | package fexl 4 | 5 | type Fexl Peg { 6 | 7 | } 8 | 9 | Fexl <- ws Expression+ Input? !. 10 | 11 | Input <- '\\\\' .* 12 | 13 | Expression <- Comment / ';' ws Expression* / Definition / Argument / Term 14 | 15 | Comment <- '#' (![\n\r] .)* ws 16 | 17 | Definition <- '\\' Symbol '=' ws Term / Recursive 18 | 19 | Recursive <- '\\' Symbol '==' ws Term 20 | 21 | Argument <- '\\' Symbol 22 | 23 | Term <- open Expression+ close / Symbol 24 | 25 | Symbol <- (String / (![ \t\n\r\\()"~;=] .)+) ws 26 | 27 | String <- '"' (!'"' .)* '"' / Complex 28 | 29 | Complex <- tilde '@' (!'@' .)* '@' 30 | 31 | tilde <- '~' 32 | 33 | open <- '(' ws 34 | 35 | close <- ')' ws 36 | 37 | ws <- [ \t\n\r]* 38 | -------------------------------------------------------------------------------- /grammars/calculatorast/calculator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:generate ../../peg -switch -inline calculator.peg 6 | 7 | package calculatorast 8 | 9 | import ( 10 | "math/big" 11 | "testing" 12 | ) 13 | 14 | func TestCalculator(t *testing.T) { 15 | expression := "( 1 - -3 ) / 3 + 2 * ( 3 + -4 ) + 3 % 2^2" 16 | calc := &Calculator[uint32]{Buffer: expression} 17 | err := calc.Init() 18 | if err != nil { 19 | t.Fatal(err) 20 | } 21 | if err := calc.Parse(); err != nil { 22 | t.Fatal(err) 23 | } 24 | if calc.Eval().Cmp(big.NewInt(2)) != 0 { 25 | t.Fatal("got incorrect result") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /cmd/peg-bootstrap/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:build bootstrap 6 | // +build bootstrap 7 | 8 | package main 9 | 10 | import ( 11 | "io" 12 | "log" 13 | "os" 14 | 15 | "github.com/pointlander/peg/tree" 16 | ) 17 | 18 | func main() { 19 | buffer, err := io.ReadAll(os.Stdin) 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | p := &Peg[uint32]{Tree: tree.New(false, false, false), Buffer: string(buffer)} 24 | p.Init(Pretty[uint32](true), Size[uint32](1<<15)) 25 | if err := p.Parse(); err != nil { 26 | log.Fatal(err) 27 | } 28 | p.Execute() 29 | p.Compile("boot.peg.go", os.Args, os.Stdout) 30 | } 31 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ master, main ] 6 | pull_request: 7 | branches: [ master, main ] 8 | 9 | jobs: 10 | build: 11 | name: Build 12 | runs-on: ubuntu-latest 13 | steps: 14 | 15 | - name: Checkout 16 | uses: actions/checkout@v5 17 | 18 | - name: Setup Go 19 | uses: actions/setup-go@v6 20 | with: 21 | go-version-file: 'go.mod' 22 | 23 | - name: Generated files up-to-date 24 | run: | 25 | go generate 26 | git diff --quiet 27 | 28 | - name: Lint 29 | uses: golangci/golangci-lint-action@v9 30 | with: 31 | # Require: The version of golangci-lint to use. 32 | version: latest 33 | 34 | - name: Test 35 | run: go test -short ./... 36 | -------------------------------------------------------------------------------- /grammars/longtest/long_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:generate ../../peg -switch -inline long.peg 6 | 7 | package longtest 8 | 9 | import ( 10 | "testing" 11 | ) 12 | 13 | func TestLong(t *testing.T) { 14 | length := 100000 15 | if testing.Short() { 16 | length = 100 17 | } 18 | 19 | expression := "" 20 | long := &Long[uint32]{Buffer: "\"" + expression + "\""} 21 | err := long.Init() 22 | if err != nil { 23 | t.Fatal(err) 24 | } 25 | for range length { 26 | if err := long.Parse(); err != nil { 27 | t.Fatal(err) 28 | } 29 | long.Reset() 30 | expression = expression + "X" 31 | long.Buffer = "\"" + expression + "\"" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /grammars/calculator/calculator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:generate ../../peg -switch -inline calculator.peg 6 | 7 | package calculator 8 | 9 | import ( 10 | "math/big" 11 | "testing" 12 | ) 13 | 14 | func TestCalculator(t *testing.T) { 15 | expression := "( 1 - -3 ) / 3 + 2 * ( 3 + -4 ) + 3 % 2^2" 16 | calc := &Calculator[uint32]{Buffer: expression} 17 | err := calc.Init() 18 | if err != nil { 19 | t.Fatal(err) 20 | } 21 | calc.Expression.Init(expression) 22 | if err := calc.Parse(); err != nil { 23 | t.Fatal(err) 24 | } 25 | calc.Execute() 26 | if calc.Evaluate().Cmp(big.NewInt(2)) != 0 { 27 | t.Fatal("got incorrect result") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /grammars/calculatorast/calculator.peg: -------------------------------------------------------------------------------- 1 | # Copyright 2010 The Go Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | package calculatorast 6 | 7 | type Calculator Peg { 8 | } 9 | 10 | e <- sp e1 !. 11 | e1 <- e2 ( add e2 12 | / minus e2 13 | )* 14 | e2 <- e3 ( multiply e3 15 | / divide e3 16 | / modulus e3 17 | )* 18 | e3 <- e4 ( exponentiation e4 19 | )* 20 | e4 <- minus value 21 | / value 22 | value <- number 23 | / sub 24 | number <- < [0-9]+ > sp 25 | sub <- open e1 close 26 | add <- '+' sp 27 | minus <- '-' sp 28 | multiply <- '*' sp 29 | divide <- '/' sp 30 | modulus <- '%' sp 31 | exponentiation <- '^' sp 32 | open <- '(' sp 33 | close <- ')' sp 34 | sp <- ( ' ' / '\t' )* 35 | -------------------------------------------------------------------------------- /bootstrap.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -Eeuo pipefail 4 | 5 | 6 | (cd bootstrap && go build && rm -f bootstrap/bootstrap.peg.go) 7 | 8 | 9 | cd cmd/peg-bootstrap 10 | 11 | # Build peg0 12 | ./../../bootstrap/bootstrap 13 | go build -tags bootstrap -o peg0 14 | rm -f bootstrap.peg.go 15 | 16 | # Build peg1 17 | ./peg0 < bootstrap.peg > peg1.peg.go 18 | go build -tags bootstrap -o peg1 19 | rm -f peg1.peg.go 20 | 21 | # Build peg2 22 | ./peg1 < peg.bootstrap.peg > peg2.peg.go 23 | go build -tags bootstrap -o peg2 24 | rm -f peg2.peg.go 25 | 26 | # Build peg3 27 | ./peg2 < ../../peg.peg > peg3.peg.go 28 | go build -tags bootstrap -o peg3 29 | rm -f peg3.peg.go 30 | 31 | # Build peg-bootstrap 32 | ./peg3 < ../../peg.peg > peg-bootstrap.peg.go 33 | go build -tags bootstrap -o peg-bootstrap 34 | rm -f peg-bootstrap.peg.go 35 | 36 | # Build peg 37 | cd ../.. 38 | ./cmd/peg-bootstrap/peg-bootstrap < peg.peg > peg.peg.go 39 | go build 40 | ./peg -inline -switch peg.peg 41 | -------------------------------------------------------------------------------- /docs/links.md: -------------------------------------------------------------------------------- 1 | https://www.microsoft.com/en-us/research/blog/fp2-fully-in-place-functional-programming-provides-memory-reuse-for-pure-functional-programs/ 2 | 3 | https://medium.com/@octskyward/graal-truffle-134d8f28fb69#.jo3luf4dn 4 | http://nez-peg.github.io/ 5 | https://en.wikipedia.org/wiki/DFA_minimization 6 | 7 | https://news.ycombinator.com/item?id=14589173 8 | http://jamey.thesharps.us/2017/06/search-based-compiler-code-generation.html 9 | 10 | https://news.ycombinator.com/item?id=15105119 11 | https://en.wikipedia.org/wiki/Tree_transducer 12 | 13 | # Type-Driven Program Synthesis 14 | https://news.ycombinator.com/item?id=18251145 15 | https://www.youtube.com/watch?v=HnOix9TFy1A 16 | http://comcom.csail.mit.edu/comcom/#welcome 17 | https://bitbucket.org/nadiapolikarpova/synquid 18 | 19 | # Formality – An efficient programming language and proof assistant 20 | https://news.ycombinator.com/item?id=18230148 21 | https://github.com/maiavictor/formality 22 | 23 | https://treecalcul.us/ 24 | -------------------------------------------------------------------------------- /grammars/calculator/calculator.peg: -------------------------------------------------------------------------------- 1 | # Copyright 2010 The Go Authors. All rights reserved. 2 | # Use of this source code is governed by a BSD-style 3 | # license that can be found in the LICENSE file. 4 | 5 | package calculator 6 | 7 | type Calculator Peg { 8 | Expression 9 | } 10 | 11 | e <- sp e1 !. 12 | e1 <- e2 ( add e2 { p.AddOperator(TypeAdd) } 13 | / minus e2 { p.AddOperator(TypeSubtract) } 14 | )* 15 | e2 <- e3 ( multiply e3 { p.AddOperator(TypeMultiply) } 16 | / divide e3 { p.AddOperator(TypeDivide) } 17 | / modulus e3 { p.AddOperator(TypeModulus) } 18 | )* 19 | e3 <- e4 ( exponentiation e4 { p.AddOperator(TypeExponentiation) } 20 | )* 21 | e4 <- minus value { p.AddOperator(TypeNegation) } 22 | / value 23 | value <- < [0-9]+ > sp { p.AddValue(buffer[begin:end]) } 24 | / open e1 close 25 | add <- '+' sp 26 | minus <- '-' sp 27 | multiply <- '*' sp 28 | divide <- '/' sp 29 | modulus <- '%' sp 30 | exponentiation <- '^' sp 31 | open <- '(' sp 32 | close <- ')' sp 33 | sp <- ( ' ' / '\t' )* 34 | -------------------------------------------------------------------------------- /grammars/java/example-1.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | 3 | public class TryWithResourcesDemo { 4 | public static void main(String[] args) { 5 | try (BufferedReader br = new BufferedReader(new FileReader("test.txt"))) { 6 | System.out.println(br.readLine()); 7 | } catch (IOException e) { 8 | e.printStackTrace(); 9 | } 10 | } 11 | } 12 | 13 | public class MultiCatchExample { 14 | public static void main(String[] args) { 15 | try { 16 | int a = 10 / 0; 17 | String str = null; 18 | str.length(); 19 | } catch (ArithmeticException | NullPointerException e) { 20 | System.out.println("Exception caught: " + e.getMessage()); 21 | } 22 | } 23 | } 24 | 25 | public class StringSwitchDemo { 26 | public static void main(String[] args) { 27 | String day = "MONDAY"; 28 | switch (day) { 29 | case "MONDAY": 30 | System.out.println("Start of the workweek!"); 31 | break; 32 | case "FRIDAY": 33 | System.out.println("Almost weekend!"); 34 | break; 35 | default: 36 | System.out.println("Regular day."); 37 | } 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /grammars/java/java_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:generate ../../peg -switch -inline java_1_7.peg 6 | 7 | package java 8 | 9 | import ( 10 | "io/fs" 11 | "os" 12 | "path/filepath" 13 | "testing" 14 | ) 15 | 16 | var example1 = `public class HelloWorld { 17 | public static void main(String[] args) { 18 | System.out.println("Hello, World"); 19 | } 20 | } 21 | ` 22 | 23 | func TestBasic(t *testing.T) { 24 | java := &Java[uint32]{Buffer: example1} 25 | err := java.Init() 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | 30 | if err := java.Parse(); err != nil { 31 | t.Fatal(err) 32 | } 33 | } 34 | 35 | func TestJavaFiles(t *testing.T) { 36 | err := filepath.Walk(".", func(path string, _ fs.FileInfo, err error) error { 37 | if err != nil { 38 | return err 39 | } 40 | if filepath.Ext(path) == ".java" { 41 | b, err := os.ReadFile(path) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | java := &Java[uint32]{Buffer: string(b)} 47 | err = java.Init() 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | if err := java.Parse(); err != nil { 52 | t.Fatalf("Parse failed: %v", err) 53 | } 54 | } 55 | return nil 56 | }) 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /grammars/java/example-2.java: -------------------------------------------------------------------------------- 1 | import java.util.concurrent.RecursiveTask; 2 | import java.util.concurrent.ForkJoinPool; 3 | 4 | class SumTask extends RecursiveTask { 5 | private int[] arr; 6 | private int start, end; 7 | 8 | SumTask(int[] arr, int start, int end) { 9 | this.arr = arr; 10 | this.start = start; 11 | this.end = end; 12 | } 13 | 14 | @Override 15 | protected Integer compute() { 16 | if (end - start <= 5) { // base case 17 | int sum = 0; 18 | for (int i = start; i < end; i++) { 19 | sum += arr[i]; 20 | } 21 | return sum; 22 | } else { // split task 23 | int mid = (start + end) / 2; 24 | SumTask leftTask = new SumTask(arr, start, mid); 25 | SumTask rightTask = new SumTask(arr, mid, end); 26 | leftTask.fork(); 27 | int rightResult = rightTask.compute(); 28 | int leftResult = leftTask.join(); 29 | return leftResult + rightResult; 30 | } 31 | } 32 | } 33 | 34 | public class ForkJoinExample { 35 | public static void main(String[] args) { 36 | int[] numbers = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; 37 | ForkJoinPool pool = new ForkJoinPool(); 38 | SumTask task = new SumTask(numbers, 0, numbers.length); 39 | int result = pool.invoke(task); 40 | System.out.println("Sum: " + result); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Go Authors 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation and/or 10 | other materials provided with the distribution. 11 | * Neither the name of the Go Authors nor the names of its contributors may be used to 12 | endorse or promote products derived from this software without specific prior written permission. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 15 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 16 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 17 | THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 19 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 21 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 22 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /cmd/peg-bootstrap/bootstrap.peg: -------------------------------------------------------------------------------- 1 | # Core bootstrap PE Grammar for peg language. 2 | # Adapted from peg.peg. 3 | 4 | Grammar <- Spacing { p.AddPackage("main") } 5 | { p.AddImport("github.com/pointlander/peg/tree") } 6 | { p.AddPeg("Peg"); p.AddState("*tree.Tree") } 7 | Action* Definition* !. 8 | 9 | Definition <- Identifier { p.AddRule(text) } 10 | LeftArrow Expression { p.AddExpression() } 11 | Expression <- Sequence (Slash Sequence { p.AddAlternate() } )* 12 | Sequence <- Prefix (Prefix { p.AddSequence() } )* 13 | Prefix <- '!' Suffix { p.AddPeekNot() } / Suffix 14 | Suffix <- Primary (Question { p.AddQuery() } 15 | / Star { p.AddStar() } )? 16 | Primary <- Identifier !LeftArrow { p.AddName(text) } 17 | / Open Expression Close 18 | / Literal / Class / Dot { p.AddDot() } 19 | / Action { p.AddAction(text) } 20 | / Begin Expression End { p.AddPush() } 21 | 22 | Identifier <- < Ident Ident* > Spacing 23 | Ident <- [A-Za-z] 24 | Literal <- ['] !['] Char (!['] Char { p.AddSequence() } )* ['] Spacing 25 | Class <- '[' Range (!']' Range { p.AddAlternate() } )* ']' Spacing 26 | Range <- Char '-' Char { p.AddRange() } / Char 27 | Char <- '\\0x' <[0-9a-f]*> { p.AddHexaCharacter(text) } 28 | / '\\\\' { p.AddCharacter("\\") } 29 | / !'\\' <.> { p.AddCharacter(text) } 30 | 31 | LeftArrow <- '<-' Spacing 32 | Slash <- '/' Spacing 33 | Question <- '?' Spacing 34 | Star <- '*' Spacing 35 | Open <- '(' Spacing 36 | Close <- ')' Spacing 37 | Dot <- '.' Spacing 38 | 39 | Spacing <- (Space / Comment)* 40 | Comment <- '#' (!EndOfLine .)* 41 | Space <- ' ' / '\0x9' / EndOfLine 42 | EndOfLine <- '\0xd\0xa' / '\0xa' / '\0xd' 43 | 44 | Action <- '{' < (![}].)* > '}' Spacing 45 | Begin <- '<' Spacing 46 | End <- '>' Spacing 47 | -------------------------------------------------------------------------------- /grammars/calculator/calculator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package calculator 6 | 7 | import ( 8 | "math/big" 9 | ) 10 | 11 | type Type uint8 12 | 13 | const ( 14 | TypeNumber Type = iota 15 | TypeNegation 16 | TypeAdd 17 | TypeSubtract 18 | TypeMultiply 19 | TypeDivide 20 | TypeModulus 21 | TypeExponentiation 22 | ) 23 | 24 | type ByteCode struct { 25 | T Type 26 | Value *big.Int 27 | } 28 | 29 | func (code *ByteCode) String() string { 30 | switch code.T { 31 | case TypeNumber: 32 | return code.Value.String() 33 | case TypeAdd: 34 | return "+" 35 | case TypeNegation, TypeSubtract: 36 | return "-" 37 | case TypeMultiply: 38 | return "*" 39 | case TypeDivide: 40 | return "/" 41 | case TypeModulus: 42 | return "%" 43 | case TypeExponentiation: 44 | return "^" 45 | } 46 | return "" 47 | } 48 | 49 | type Expression struct { 50 | Code []ByteCode 51 | Top int 52 | } 53 | 54 | func (e *Expression) Init(expression string) { 55 | e.Code = make([]ByteCode, len(expression)) 56 | } 57 | 58 | func (e *Expression) AddOperator(operator Type) { 59 | code, top := e.Code, e.Top 60 | e.Top++ 61 | code[top].T = operator 62 | } 63 | 64 | func (e *Expression) AddValue(value string) { 65 | code, top := e.Code, e.Top 66 | e.Top++ 67 | code[top].Value = new(big.Int) 68 | code[top].Value.SetString(value, 10) 69 | } 70 | 71 | func (e *Expression) Evaluate() *big.Int { 72 | stack, top := make([]big.Int, len(e.Code)), 0 73 | for _, code := range e.Code[0:e.Top] { 74 | switch code.T { 75 | case TypeNumber: 76 | stack[top].Set(code.Value) 77 | top++ 78 | continue 79 | case TypeNegation: 80 | a := &stack[top-1] 81 | a.Neg(a) 82 | continue 83 | } 84 | a, b := &stack[top-2], &stack[top-1] 85 | top-- 86 | switch code.T { 87 | case TypeAdd: 88 | a.Add(a, b) 89 | case TypeSubtract: 90 | a.Sub(a, b) 91 | case TypeMultiply: 92 | a.Mul(a, b) 93 | case TypeDivide: 94 | a.Div(a, b) 95 | case TypeModulus: 96 | a.Mod(a, b) 97 | case TypeExponentiation: 98 | a.Exp(a, b, nil) 99 | } 100 | } 101 | return &stack[0] 102 | } 103 | -------------------------------------------------------------------------------- /grammars/fexl/doc/README: -------------------------------------------------------------------------------- 1 | Fexl (Function EXpression Language) http://fexl.com 2 | 3 | AUTHOR 4 | 5 | Patrick Chkoreff wrote this software. Please see the NOTICE file for terms of 6 | use. 7 | 8 | 9 | CREDITS 10 | 11 | I thank Moses Schönfinkel, who in 1924 wrote a paper titled "On the building 12 | blocks of mathematical logic". I found this paper in "From Frege to Gödel, A 13 | Source Book in Mathematical Logic, 1879-1931". 14 | 15 | Mr. Schönfinkel observes that all computable functions can be defined in terms 16 | of just two primitive functions C and S applied together in various 17 | combinations. This is a profound and magnificent insight for which I am very 18 | grateful. 19 | 20 | The C function is governed by the rule ((C x) y) = x. This is known as the 21 | "constancy function", or "Konstanzfunktion" in the original German. 22 | 23 | The S function is governed by the rule (((S x) y) z) = ((x z) (y z)). This is 24 | known as the "fusion function", or "Verschmelzungfunktion" in the original 25 | German. 26 | 27 | I also thank Jørgen Steensgaard-Madsen, who in 1989 wrote a paper titled 28 | "Typed Representation of Objects by Functions". I found this paper in the 29 | "ACM Transactions on Programming Languages and Systems, January 1989, Volume 30 | 11 Number 1". 31 | 32 | Mr. Steensgaard-Madsen observes that all of what we ordinarily understand as 33 | "data" can be represented as pure functions. Even a piece of data as humble 34 | as a single bit is in essence just a function. 35 | 36 | 37 | HOW TO INSTALL 38 | 39 | Go into the source code directory and run this command: 40 | 41 | ./build install 42 | 43 | You may be prompted for your sudo (superuser) password. 44 | 45 | That builds the program locally in ../bin, then copies it to the /usr/bin 46 | directory. If you need to install it in a different place, change the 47 | install_location variable inside the build script. 48 | 49 | 50 | HOW TO BUILD LOCALLY 51 | 52 | If you wish to enhance or test the program, you might prefer to build it 53 | locally in the ../bin directory and run it from there, without installing into 54 | /usr/bin. You do that with this command: 55 | 56 | ./build 57 | 58 | 59 | HOW TO RUN 60 | 61 | To run a fexl program which is read from standard input: 62 | 63 | fexl 64 | 65 | To run a fexl program which is read from a file named "script": 66 | 67 | fexl script 68 | 69 | You may also use the "shebang" method to create an executable fexl file. For 70 | example, create a file named "script" and put this on the first line: 71 | 72 | #!/usr/bin/fexl 73 | 74 | Then make your script executable with: 75 | 76 | chmod +x script 77 | 78 | Now you can run your script directly this way: 79 | 80 | ./script 81 | -------------------------------------------------------------------------------- /grammars/calculatorast/calculator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package calculatorast 6 | 7 | import ( 8 | "math/big" 9 | ) 10 | 11 | func (c *Calculator[_]) Eval() *big.Int { 12 | return c.Rulee(c.AST()) 13 | } 14 | 15 | func (c *Calculator[U]) Rulee(node *node[U]) *big.Int { 16 | node = node.up 17 | for node != nil { 18 | switch node.pegRule { 19 | case rulee1: 20 | return c.Rulee1(node) 21 | } 22 | node = node.next 23 | } 24 | return nil 25 | } 26 | 27 | func (c *Calculator[U]) Rulee1(node *node[U]) *big.Int { 28 | node = node.up 29 | var a *big.Int 30 | for node != nil { 31 | switch node.pegRule { 32 | case rulee2: 33 | a = c.Rulee2(node) 34 | case ruleadd: 35 | node = node.next 36 | b := c.Rulee2(node) 37 | a.Add(a, b) 38 | case ruleminus: 39 | node = node.next 40 | b := c.Rulee2(node) 41 | a.Sub(a, b) 42 | } 43 | node = node.next 44 | } 45 | return a 46 | } 47 | 48 | func (c *Calculator[U]) Rulee2(node *node[U]) *big.Int { 49 | node = node.up 50 | var a *big.Int 51 | for node != nil { 52 | switch node.pegRule { 53 | case rulee3: 54 | a = c.Rulee3(node) 55 | case rulemultiply: 56 | node = node.next 57 | b := c.Rulee3(node) 58 | a.Mul(a, b) 59 | case ruledivide: 60 | node = node.next 61 | b := c.Rulee3(node) 62 | a.Div(a, b) 63 | case rulemodulus: 64 | node = node.next 65 | b := c.Rulee3(node) 66 | a.Mod(a, b) 67 | } 68 | node = node.next 69 | } 70 | return a 71 | } 72 | 73 | func (c *Calculator[U]) Rulee3(node *node[U]) *big.Int { 74 | node = node.up 75 | var a *big.Int 76 | for node != nil { 77 | switch node.pegRule { 78 | case rulee4: 79 | a = c.Rulee4(node) 80 | case ruleexponentiation: 81 | node = node.next 82 | b := c.Rulee4(node) 83 | a.Exp(a, b, nil) 84 | } 85 | node = node.next 86 | } 87 | return a 88 | } 89 | 90 | func (c *Calculator[U]) Rulee4(node *node[U]) *big.Int { 91 | node = node.up 92 | minus := false 93 | for node != nil { 94 | switch node.pegRule { 95 | case rulevalue: 96 | a := c.Rulevalue(node) 97 | if minus { 98 | a.Neg(a) 99 | } 100 | return a 101 | case ruleminus: 102 | minus = true 103 | } 104 | node = node.next 105 | } 106 | return nil 107 | } 108 | 109 | func (c *Calculator[U]) Rulevalue(node *node[U]) *big.Int { 110 | node = node.up 111 | for node != nil { 112 | switch node.pegRule { 113 | case rulenumber: 114 | a := big.NewInt(0) 115 | a.SetString(string(c.buffer[node.begin:node.end]), 10) 116 | return a 117 | case rulesub: 118 | return c.Rulesub(node) 119 | } 120 | node = node.next 121 | } 122 | return nil 123 | } 124 | 125 | func (c *Calculator[U]) Rulesub(node *node[U]) *big.Int { 126 | node = node.up 127 | for node != nil { 128 | switch node.pegRule { 129 | case rulee1: 130 | return c.Rulee1(node) 131 | } 132 | node = node.next 133 | } 134 | return nil 135 | } 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PEG, an Implementation of a Packrat Parsing Expression Grammar in Go 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/pointlander/peg.svg)](https://pkg.go.dev/github.com/pointlander/peg) 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/pointlander/peg)](https://goreportcard.com/report/github.com/pointlander/peg) 5 | 6 | A [Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) ( hence `peg`) is a way to create grammars similar in principle to [regular expressions](https://en.wikipedia.org/wiki/Regular_expression) but which allow better code integration. Specifically, `peg` is an implementation of the [Packrat](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars) parser generator originally implemented as [peg/leg](https://www.piumarta.com/software/peg/) by [Ian Piumarta](https://www.piumarta.com/cv/) in C. A Packrat parser is a "descent recursive parser" capable of backtracking and negative look-ahead assertions which are problematic for regular expression engines. 7 | 8 | ## Installation 9 | 10 | ``` 11 | go install github.com/pointlander/peg@latest 12 | ``` 13 | 14 | 15 | ## Usage 16 | 17 | ### Build executable 18 | 19 | ``` 20 | go generate && go build 21 | ``` 22 | 23 | ### Help 24 | 25 | ``` 26 | ./peg -h 27 | ``` 28 | 29 | 30 | ### Example 31 | 32 | This creates the file `peg.peg.go`: 33 | ``` 34 | ./peg -inline -switch peg.peg 35 | ``` 36 | 37 | 38 | ## PEG file syntax 39 | 40 | See [peg-file-syntax.md](docs/peg-file-syntax.md) 41 | 42 | 43 | ## Development 44 | 45 | ### Requirements 46 | 47 | * [Golang](https://golang.org/doc/install), see [go.mod](go.mod) for version 48 | * [golangci-lint latest version](https://github.com/golangci/golangci-lint#install) (v2 or later) 49 | * [Bash 3.2.x or higher](https://www.gnu.org/software/bash) 50 | 51 | 52 | ### Generate 53 | 54 | Bootstrap and generate grammar *.peg.go. This commands should initially be executed once before other commands. 55 | ``` 56 | go generate 57 | ``` 58 | 59 | 60 | ### Build 61 | 62 | ``` 63 | go build 64 | ``` 65 | 66 | ([`go generate`](#generate) required once beforehand) 67 | 68 | 69 | #### Set version 70 | 71 | Use the version from the tag if the current commit has a tag. If not use the current commit hash. 72 | ``` 73 | go build -ldflags "-X main.Version=$(git describe --tags --exact-match 2>/dev/null || git rev-parse --short HEAD)" 74 | ``` 75 | 76 | Additionally, since [Go 1.18](https://go.dev/doc/go1.18) the go command embeds version control information. Read the information: 77 | ``` 78 | go version -m peg 79 | ``` 80 | 81 | 82 | ### Test 83 | 84 | ``` 85 | go test -short ./... 86 | ``` 87 | 88 | ([`go generate`](#generate) required once beforehand) 89 | 90 | 91 | ### Lint 92 | 93 | ``` 94 | golangci-lint run 95 | ``` 96 | 97 | ([`go generate`](#generate) required once beforehand) 98 | 99 | 100 | ### Format 101 | 102 | ``` 103 | golangci-lint fmt 104 | ``` 105 | 106 | 107 | ### Benchmark 108 | ``` 109 | go test -benchmem -bench . 110 | ``` 111 | 112 | ([`go generate`](#generate) required once beforehand) 113 | 114 | 115 | ## Author 116 | 117 | Andrew Snodgrass 118 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "flag" 9 | "fmt" 10 | "io" 11 | "log" 12 | "os" 13 | 14 | "github.com/pointlander/peg/tree" 15 | ) 16 | 17 | //go:generate ./bootstrap.bash 18 | //go:generate ./generate-grammars.bash 19 | 20 | var ( 21 | Version = "dev" 22 | 23 | inline = flag.Bool("inline", false, "parse rule inlining") 24 | switchFlag = flag.Bool("switch", false, "replace if-else if-else like blocks with switch blocks") 25 | printFlag = flag.Bool("print", false, "directly dump the syntax tree") 26 | syntax = flag.Bool("syntax", false, "print out the syntax tree") 27 | noast = flag.Bool("noast", false, "disable AST") 28 | strict = flag.Bool("strict", false, "treat compiler warnings as errors") 29 | outputFile = flag.String("output", "", "output to `FILE` (\"-\" for stdout)") 30 | showVersion = flag.Bool("version", false, "print the version and exit") 31 | ) 32 | 33 | // main is the entry point for the PEG compiler. 34 | func main() { 35 | flag.Parse() 36 | 37 | if *showVersion { 38 | fmt.Println("version:", Version) 39 | return 40 | } 41 | 42 | err := parse( 43 | func(p *Peg[uint32], out io.Writer) error { 44 | if *printFlag { 45 | p.Print() 46 | } 47 | if *syntax { 48 | p.PrintSyntaxTree() 49 | } 50 | 51 | p.Strict = *strict 52 | if err := p.Compile(*outputFile, os.Args, out); err != nil { 53 | return err 54 | } 55 | return nil 56 | }, 57 | ) 58 | if err != nil { 59 | if *strict { 60 | log.Fatal(err) 61 | } 62 | fmt.Fprintln(os.Stderr, "warning:", err) 63 | } 64 | } 65 | 66 | // getIO returns input and output streams based on command-line flags. 67 | func getIO() (in io.ReadCloser, out io.WriteCloser, err error) { 68 | in, out = os.Stdin, os.Stdout 69 | 70 | if flag.NArg() > 0 && flag.Arg(0) != "-" { 71 | in, err = os.Open(flag.Arg(0)) 72 | if err != nil { 73 | return nil, nil, err 74 | } 75 | if *outputFile == "" { 76 | *outputFile = flag.Arg(0) + ".go" 77 | } 78 | } 79 | 80 | if *outputFile != "" && *outputFile != "-" { 81 | out, err = os.OpenFile(*outputFile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644) 82 | if err != nil { 83 | if in != nil && in != os.Stdin { 84 | err := in.Close() 85 | if err != nil { 86 | panic(err) 87 | } 88 | } 89 | return nil, nil, err 90 | } 91 | } 92 | 93 | return in, out, nil 94 | } 95 | 96 | // parse reads input, parses, executes, and compiles the PEG grammar. 97 | func parse(compile func(*Peg[uint32], io.Writer) error) error { 98 | in, out, err := getIO() 99 | if err != nil { 100 | return err 101 | } 102 | defer func() { 103 | if in != nil && in != os.Stdin { 104 | err := in.Close() 105 | if err != nil { 106 | panic(err) 107 | } 108 | } 109 | if out != nil && out != os.Stdout { 110 | err := out.Close() 111 | if err != nil { 112 | panic(err) 113 | } 114 | } 115 | }() 116 | 117 | buffer, err := io.ReadAll(in) 118 | if err != nil { 119 | return err 120 | } 121 | 122 | p := &Peg[uint32]{Tree: tree.New(*inline, *switchFlag, *noast), Buffer: string(buffer)} 123 | _ = p.Init(Pretty[uint32](true), Size[uint32](1<<15)) 124 | if err = p.Parse(); err != nil { 125 | return err 126 | } 127 | 128 | p.Execute() 129 | 130 | return compile(p, out) 131 | } 132 | -------------------------------------------------------------------------------- /docs/peg-file-syntax.md: -------------------------------------------------------------------------------- 1 | # PEG file syntax 2 | 3 | ## Examples of PEG grammars 4 | 5 | Here are some projects that use `peg` to provide further examples of PEG grammars: 6 | 7 | * https://github.com/tj/go-naturaldate - natural date/time parsing 8 | * https://github.com/gnames/gnparser - scientific names parsing 9 | 10 | ## Go package and imports 11 | 12 | First declare the package name and any import(s) required. 13 | 14 | ``` 15 | package 16 | 17 | import 18 | ``` 19 | 20 | ## Parser 21 | 22 | Then declare the parser: 23 | 24 | ``` 25 | type Peg { 26 | 27 | } 28 | ``` 29 | 30 | ## Rules 31 | 32 | Next declare the rules. Note that the main rules are described below but are based on the [peg/leg rules](https://www.piumarta.com/software/peg/peg.1.html) which provide additional documentation. 33 | 34 | The first rule is the entry point into the parser: 35 | 36 | ``` 37 | <- 38 | ``` 39 | 40 | The first rule should probably end with `!.` to indicate no more input follows: 41 | 42 | ``` 43 | first <- . !. 44 | ``` 45 | 46 | This is often set to `END` to make PEG rules more readable: 47 | 48 | ``` 49 | END <- !. 50 | ``` 51 | 52 | `.` means any character matches. For zero or more character matches, use: 53 | 54 | ``` 55 | repetition <- .* 56 | ``` 57 | 58 | For one or more character matches, use: 59 | 60 | ``` 61 | oneOrMore <- .+ 62 | ``` 63 | 64 | For an optional character match, use: 65 | 66 | ``` 67 | optional <- .? 68 | ``` 69 | 70 | If specific characters are to be matched, use single quotes: 71 | 72 | ``` 73 | specific <- 'a'* 'bc'+ 'de'? 74 | ``` 75 | 76 | This will match the string `"aaabcbcde"`. 77 | 78 | For choosing between different inputs, use alternates: 79 | 80 | ``` 81 | prioritized <- 'a' 'a'* / 'bc'+ / 'de'? 82 | ``` 83 | 84 | This will match `"aaaa"` or `"bcbc"` or `"de"` or `""`. The matches are attempted in order. 85 | 86 | If the characters are case-insensitive, use double quotes: 87 | 88 | ``` 89 | insensitive <- "abc" 90 | ``` 91 | 92 | This will match `"abc"` or `"Abc"` or `"ABc"` and so on. 93 | 94 | For matching a set of characters, use a character class: 95 | 96 | ``` 97 | class <- [a-z] 98 | ``` 99 | 100 | This will match `"a"` or `"b"` or all the way to `"z"`. 101 | 102 | For an inverse character class, start with a caret: 103 | 104 | ``` 105 | inverse <- [^a-z] 106 | ``` 107 | 108 | This will match anything but `"a"` or `"b"` or all the way to `"z"`. 109 | 110 | If the character class is case-insensitive, use double brackets: 111 | 112 | ``` 113 | insensitive <- [[A-Z]] 114 | ``` 115 | 116 | (Note that this is not available in regular expression syntax.) 117 | 118 | Use parentheses for grouping: 119 | 120 | ``` 121 | grouping <- (rule1 / rule2) rule3 122 | ``` 123 | 124 | For looking ahead a match (predicate), use: 125 | 126 | ``` 127 | lookAhead <- &rule1 rule2 128 | ``` 129 | 130 | For inverse look ahead, use: 131 | 132 | ``` 133 | inverse <- !rule1 rule2 134 | ``` 135 | 136 | Use curly braces for Go code: 137 | 138 | ``` 139 | gocode <- { fmt.Println("hello world") } 140 | ``` 141 | 142 | For string captures, use less than and greater than: 143 | 144 | ``` 145 | capture <- <'capture'> { fmt.Println(text) } 146 | ``` 147 | 148 | Will print out `"capture"`. The captured string is stored in `buffer[begin:end]`. 149 | 150 | ## Naming convention 151 | 152 | Use caution when picking your names to avoid overwriting existing `.go` files. Since only one PEG grammar is allowed per Go package (currently) the use of the name `grammar.peg` is suggested as a convention. 153 | 154 | ``` 155 | grammar.peg 156 | grammar.go 157 | ``` -------------------------------------------------------------------------------- /grammars/c/c_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //go:generate ../../peg -switch -inline c.peg 6 | 7 | package c 8 | 9 | import ( 10 | "fmt" 11 | "io/fs" 12 | "os" 13 | "path/filepath" 14 | "testing" 15 | ) 16 | 17 | func parseCBuffer(buffer string) (*C[uint32], error) { 18 | clang := &C[uint32]{Buffer: buffer} 19 | err := clang.Init() 20 | if err != nil { 21 | return nil, err 22 | } 23 | err = clang.Parse() 24 | return clang, err 25 | } 26 | 27 | func parseC4t(t *testing.T, src string) *C[uint32] { 28 | c, err := parseCBuffer(src) 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | return c 33 | } 34 | 35 | func noParseC4t(t *testing.T, src string) { 36 | _, err := parseCBuffer(src) 37 | if err == nil { 38 | t.Fatal("Parsed what should not have parsed.") 39 | } 40 | } 41 | 42 | func TestCParsing_Expressions1(t *testing.T) { 43 | case1src := `int a() { 44 | (es); 45 | 1++; 46 | 1+1; 47 | a+1; 48 | (a)+1; 49 | a->x; 50 | return 0; 51 | }` 52 | parseC4t(t, case1src) 53 | } 54 | 55 | func TestCParsing_Expressions2(t *testing.T) { 56 | parseC4t(t, 57 | `int a() { 58 | if (a) { return (a); } 59 | 60 | return (0); 61 | return a+b; 62 | return (a+b); 63 | return (a)+0; 64 | }`) 65 | 66 | parseC4t(t, `int a() { return (a)+0; }`) 67 | } 68 | 69 | func TestCParsing_Expressions3(t *testing.T) { 70 | parseC4t(t, 71 | `int a() { 72 | 1+(a); 73 | (a)++; 74 | (es)++; 75 | (es)||a; 76 | (es)->a; 77 | return (a)+(b); 78 | return 0+(a); 79 | }`) 80 | } 81 | 82 | func TestCParsing_Expressions4(t *testing.T) { 83 | parseC4t(t, `int a(){1+(a);}`) 84 | } 85 | 86 | func TestCParsing_Expressions5(t *testing.T) { 87 | parseC4t(t, `int a(){return (int)0;}`) 88 | } 89 | 90 | func TestCParsing_Expressions6(t *testing.T) { 91 | parseC4t(t, `int a(){return (in)0;}`) 92 | } 93 | 94 | func TestCParsing_Expressions7(t *testing.T) { 95 | parseC4t(t, `int a() 96 | { return (0); }`) 97 | } 98 | 99 | func TestCParsing_Cast0(t *testing.T) { 100 | parseC4t(t, `int a(){(cast)0;}`) 101 | } 102 | 103 | func TestCParsing_Cast1(t *testing.T) { 104 | parseC4t(t, `int a(){(m*)(rsp);}`) 105 | parseC4t(t, `int a(){(struct m*)(rsp);}`) 106 | } 107 | 108 | func TestCParsing_Empty(t *testing.T) { 109 | parseC4t(t, `/** empty is valid. */ `) 110 | } 111 | 112 | func TestCParsing_EmptyStruct(t *testing.T) { 113 | parseC4t(t, `struct empty{};`) 114 | parseC4t(t, `struct {} empty;`) 115 | parseC4t(t, `struct empty {} empty;`) 116 | } 117 | 118 | func TestCParsing_EmptyEmbeddedUnion(t *testing.T) { 119 | parseC4t(t, `struct empty{ 120 | union { 121 | int a; 122 | char b; 123 | }; 124 | };`) 125 | } 126 | 127 | func TestCParsing_ExtraSEMI(t *testing.T) { 128 | parseC4t(t, `int func(){} 129 | ; 130 | struct {} empty; 131 | struct {} empty;; 132 | int foo() {}; 133 | int foo() {};; 134 | `) 135 | 136 | noParseC4t(t, `struct empty{}`) 137 | } 138 | 139 | func TestCParsing_ExtraSEMI2(t *testing.T) { 140 | parseC4t(t, ` 141 | struct a { int b; ; }; 142 | `) 143 | 144 | noParseC4t(t, `struct empty{}`) 145 | } 146 | 147 | func TestCParsing_Escapes(t *testing.T) { 148 | parseC4t(t, ` 149 | int f() { 150 | printf("%s", "\a\b\f\n\r\t\v"); 151 | printf("\\"); 152 | printf("\%"); 153 | printf("\""); 154 | printf('\"'); // <- semantically wrong but syntactically valid. 155 | }`) 156 | } 157 | 158 | func TestCFiles(t *testing.T) { 159 | // TODO: find appropriate c files. 160 | err := filepath.Walk(".", func(path string, _ fs.FileInfo, err error) error { 161 | if err != nil { 162 | fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err) 163 | return err 164 | } 165 | if filepath.Ext(path) == ".c" { 166 | b, err := os.ReadFile(path) 167 | if err != nil { 168 | return err 169 | } 170 | clang := &C[uint32]{Buffer: string(b)} 171 | err = clang.Init() 172 | if err != nil { 173 | t.Fatal(err) 174 | } 175 | if err := clang.Parse(); err != nil { 176 | t.Fatalf("Parse failed: %v", err) 177 | } 178 | } 179 | return nil 180 | }) 181 | if err != nil { 182 | t.Fatal(err) 183 | } 184 | } 185 | 186 | func TestCParsing_WideString(t *testing.T) { 187 | parseC4t(t, `wchar_t *msg = L"Hello";`) 188 | } 189 | -------------------------------------------------------------------------------- /set/set_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package set 6 | 7 | import ( 8 | "math" 9 | "testing" 10 | ) 11 | 12 | func TestString(t *testing.T) { 13 | s := NewSet() 14 | s.AddRange('a', 'c') 15 | s.AddRange('c', 'e') 16 | 17 | if s.String() != "[97 98 99 100 101]" { 18 | t.Fatal("string is broken") 19 | } 20 | } 21 | 22 | func TestCopy(t *testing.T) { 23 | s := NewSet() 24 | s.AddRange('a', 'c') 25 | s.AddRange('c', 'e') 26 | 27 | cp := s.Copy() 28 | if !cp.Equal(s) { 29 | t.Fatal("cp should be a copy of s") 30 | } 31 | } 32 | 33 | func TestAdd(t *testing.T) { 34 | s := NewSet() 35 | s.Add('a') 36 | 37 | if s.Len() != 1 { 38 | t.Fatal("length should be 1", s.Len()) 39 | } 40 | 41 | if !s.Has('a') { 42 | t.Fatal("set should have a") 43 | } 44 | 45 | s.Add('c') 46 | s.Add('e') 47 | s.Add('A') 48 | } 49 | 50 | func TestAddRange(t *testing.T) { 51 | s := NewSet() 52 | s.AddRange('a', 'c') 53 | s.AddRange('c', 'e') 54 | if s.Len() != 5 { 55 | t.Fatal("size should be 5") 56 | } 57 | if !s.Has('b') { 58 | t.Fatal("set should have b") 59 | } 60 | if !s.Has('d') { 61 | t.Fatal("set should have d") 62 | } 63 | 64 | s.AddRange('g', 'i') 65 | if s.Len() != 8 { 66 | t.Log(s.Len()) 67 | t.Fatal("size should be 7") 68 | } 69 | if !s.Has('h') { 70 | t.Fatal("set should have h") 71 | } 72 | 73 | s.AddRange('A', 'C') 74 | if s.Len() != 11 { 75 | t.Log(s.Len()) 76 | t.Fatal("size should be 10") 77 | } 78 | if !s.Has('B') { 79 | t.Fatal("set should have B") 80 | } 81 | 82 | s.AddRange('A', 'z') 83 | if s.Len() != 'z'-'A'+1 { 84 | t.Log(s.Len()) 85 | t.Fatalf("size should be %d", 'z'-'A'+1) 86 | } 87 | if !s.Has('B') { 88 | t.Fatal("set should have B") 89 | } 90 | } 91 | 92 | func TestHas(t *testing.T) { 93 | r := NewSet() 94 | r.AddRange('a', 'c') 95 | 96 | if !r.Has('b') { 97 | t.Fatal("set should have b") 98 | } 99 | 100 | if r.Has('d') { 101 | t.Fatal("set should not have d") 102 | } 103 | } 104 | 105 | func TestComplement(t *testing.T) { 106 | s := NewSet() 107 | s.AddRange('a', 'c') 108 | s.AddRange('c', 'e') 109 | s.AddRange('g', 'i') 110 | s.AddRange('A', 'C') 111 | c1 := s.Complement(rune(math.MaxInt32)) 112 | c2 := c1.Complement(rune(math.MaxInt32)) 113 | if !s.Equal(c2) { 114 | t.Fatal("sets should be equal") 115 | } 116 | } 117 | 118 | func TestUnion(t *testing.T) { 119 | r := NewSet() 120 | r.AddRange('a', 'c') 121 | r.AddRange('c', 'e') 122 | 123 | s := NewSet() 124 | s.AddRange('a', 'c') 125 | s.AddRange('c', 'e') 126 | s.AddRange('g', 'i') 127 | s.AddRange('A', 'C') 128 | 129 | z := NewSet() 130 | z.AddRange('g', 'i') 131 | z.AddRange('A', 'C') 132 | 133 | z = r.Union(z) 134 | 135 | if !z.Equal(s) { 136 | t.Fatal("sets should be equal") 137 | } 138 | } 139 | 140 | func TestIntersects(t *testing.T) { 141 | r := NewSet() 142 | r.AddRange('a', 'c') 143 | 144 | s := NewSet() 145 | s.AddRange('a', 'c') 146 | s.AddRange('c', 'e') 147 | s.AddRange('g', 'i') 148 | s.AddRange('A', 'C') 149 | 150 | if !r.Intersects(s) { 151 | t.Fatal("sets should intersect") 152 | } 153 | 154 | z := NewSet() 155 | z.Add('z') 156 | 157 | if z.Intersects(s) { 158 | t.Fatal("sets should not intersect") 159 | } 160 | } 161 | 162 | func TestEqual(t *testing.T) { 163 | r := NewSet() 164 | r.AddRange('a', 'c') 165 | r.AddRange('c', 'e') 166 | r.AddRange('g', 'i') 167 | 168 | s := NewSet() 169 | s.AddRange('a', 'c') 170 | s.AddRange('c', 'e') 171 | s.AddRange('g', 'i') 172 | s.AddRange('A', 'C') 173 | 174 | if r.Equal(s) { 175 | t.Fatal("sets should not be equal") 176 | } 177 | 178 | r.AddRange('A', 'C') 179 | 180 | if !r.Equal(s) { 181 | t.Fatal("sets should be equal") 182 | } 183 | } 184 | 185 | func TestLen(t *testing.T) { 186 | r := NewSet() 187 | r.AddRange('a', 'c') 188 | r.AddRange('c', 'e') 189 | r.AddRange('g', 'i') 190 | 191 | s := NewSet() 192 | s.AddRange('a', 'c') 193 | s.AddRange('c', 'e') 194 | s.AddRange('g', 'i') 195 | s.AddRange('A', 'C') 196 | 197 | if r.Len() == s.Len() { 198 | t.Fatal("sets should not be equal in length") 199 | } 200 | 201 | r.AddRange('A', 'C') 202 | 203 | if r.Len() != s.Len() { 204 | t.Fatal("sets should be equal in length") 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /cmd/peg-bootstrap/peg.bootstrap.peg: -------------------------------------------------------------------------------- 1 | # PE Grammar for bootstrap peg language 2 | # 3 | # Adapted from peg.peg. 4 | 5 | # Hierarchical syntax 6 | Grammar <- Spacing 'package' MustSpacing Identifier { p.AddPackage(text) } 7 | Import* 8 | 'type' MustSpacing Identifier { p.AddPeg(text) } 9 | 'Peg' Spacing Action { p.AddState(text) } 10 | Definition Definition* EndOfFile 11 | 12 | Import <- 'import' Spacing ["] < ([a-zA-Z_/.]/'-')([a-zA-Z_/.]/'-')* > ["] Spacing { p.AddImport(text) } 13 | 14 | Definition <- Identifier { p.AddRule(text) } 15 | LeftArrow Expression { p.AddExpression() } 16 | Expression <- Sequence (Slash Sequence { p.AddAlternate() } 17 | )* (Slash { p.AddNil(); p.AddAlternate() } 18 | )? 19 | / { p.AddNil() } 20 | Sequence <- Prefix (Prefix { p.AddSequence() } 21 | )* 22 | Prefix <- And Action { p.AddPredicate(text) } 23 | / Not Action { p.AddStateChange(text) } 24 | / And Suffix { p.AddPeekFor() } 25 | / Not Suffix { p.AddPeekNot() } 26 | / Suffix 27 | Suffix <- Primary (Question { p.AddQuery() } 28 | / Star { p.AddStar() } 29 | / Plus { p.AddPlus() } 30 | )? 31 | Primary <- Identifier !LeftArrow { p.AddName(text) } 32 | / Open Expression Close 33 | / Literal 34 | / Class 35 | / Dot { p.AddDot() } 36 | / Action { p.AddAction(text) } 37 | / Begin Expression End { p.AddPush() } 38 | 39 | # Lexical syntax 40 | 41 | Identifier <- < IdentStart IdentCont* > Spacing 42 | IdentStart <- [A-Za-z_] 43 | IdentCont <- IdentStart / [0-9] 44 | Literal <- ['] (!['] Char)? (!['] Char { p.AddSequence() } 45 | )* ['] Spacing 46 | / ["] (!["] DoubleChar)? (!["] DoubleChar { p.AddSequence() } 47 | )* ["] Spacing 48 | Class <- ( '[[' ( '^' DoubleRanges { p.AddPeekNot(); p.AddDot(); p.AddSequence() } 49 | / DoubleRanges )? 50 | ']]' 51 | / '[' ( '^' Ranges { p.AddPeekNot(); p.AddDot(); p.AddSequence() } 52 | / Ranges )? 53 | ']' ) 54 | Spacing 55 | Ranges <- !']' Range (!']' Range { p.AddAlternate() } 56 | )* 57 | DoubleRanges <- !']]' DoubleRange (!']]' DoubleRange { p.AddAlternate() } 58 | )* 59 | Range <- Char '-' Char { p.AddRange() } 60 | / Char 61 | DoubleRange <- Char '-' Char { p.AddDoubleRange() } 62 | / DoubleChar 63 | Char <- Escape 64 | / !'\\' <.> { p.AddCharacter(text) } 65 | DoubleChar <- Escape 66 | / <[a-zA-Z]> { p.AddDoubleCharacter(text) } 67 | / !'\\' <.> { p.AddCharacter(text) } 68 | Escape <- '\\' [aA] { p.AddCharacter("\a") } # bell 69 | / '\\' [bB] { p.AddCharacter("\b") } # bs 70 | / '\\' [eE] { p.AddCharacter("\x1B") } # esc 71 | / '\\' [fF] { p.AddCharacter("\f") } # ff 72 | / '\\' [nN] { p.AddCharacter("\n") } # nl 73 | / '\\' [rR] { p.AddCharacter("\r") } # cr 74 | / '\\' [tT] { p.AddCharacter("\t") } # ht 75 | / '\\' [vV] { p.AddCharacter("\v") } # vt 76 | / '\\' ['] { p.AddCharacter("'") } 77 | / '\\"' { p.AddCharacter("\"") } 78 | / '\\[' { p.AddCharacter("[") } 79 | / '\\]' { p.AddCharacter("]") } 80 | / '\\-' { p.AddCharacter("-") } 81 | / '\\' '0'[xX] <[0-9a-fA-F][0-9a-fA-F]*> { p.AddHexaCharacter(text) } 82 | / '\\' <[0-3][0-7][0-7]> { p.AddOctalCharacter(text) } 83 | / '\\' <[0-7][0-7]?> { p.AddOctalCharacter(text) } 84 | / '\\\\' { p.AddCharacter("\\") } 85 | LeftArrow <- ('<-' / '\0x2190') Spacing 86 | Slash <- '/' Spacing 87 | And <- '&' Spacing 88 | Not <- '!' Spacing 89 | Question <- '?' Spacing 90 | Star <- '*' Spacing 91 | Plus <- '+' Spacing 92 | Open <- '(' Spacing 93 | Close <- ')' Spacing 94 | Dot <- '.' Spacing 95 | SpaceComment <- (Space / Comment) 96 | Spacing <- SpaceComment* 97 | MustSpacing <- SpaceComment Spacing 98 | Comment <- '#' (!EndOfLine .)* EndOfLine 99 | Space <- ' ' / '\0x9' / EndOfLine 100 | EndOfLine <- '\0xd\0xa' / '\0xa' / '\0xd' 101 | EndOfFile <- !. 102 | Action <- '{' < ActionBody* > '}' Spacing 103 | ActionBody <- ![{}]. / '{' ActionBody* '}' 104 | Begin <- '<' Spacing 105 | End <- '>' Spacing 106 | 107 | -------------------------------------------------------------------------------- /peg.peg: -------------------------------------------------------------------------------- 1 | # PE Grammar for PE Grammars 2 | # 3 | # Adapted from [1] by Ian Piumarta . 4 | # 5 | # Best viewed using 140 columns monospaced with tabs every 8. 6 | # 7 | # [1] Bryan Ford. "Parsing Expression Grammars: A Recognition-Based Syntactic 8 | # Foundation." Symposium on Principles of Programming Languages, 9 | # January 14--16, 2004, Venice, Italy. 10 | 11 | package main 12 | 13 | import "github.com/pointlander/peg/tree" 14 | 15 | # parser declaration 16 | 17 | type Peg Peg { 18 | *tree.Tree 19 | } 20 | 21 | # Hierarchical syntax 22 | Grammar <- Header 'package' MustSpacing Identifier { p.AddPackage(text) } 23 | Import* 24 | 'type' MustSpacing Identifier { p.AddPeg(text) } 25 | 'Peg' Spacing Action { p.AddState(text) } 26 | Definition+ EndOfFile 27 | 28 | Import <- 'import' Spacing (MultiImport / SingleImport) Spacing 29 | SingleImport <- ImportName 30 | MultiImport <- '(' Spacing (ImportName '\n' Spacing)* Spacing ')' 31 | 32 | ImportName <- ["] < [0-9a-zA-Z_/.\-]+ > ["] { p.AddImport(text) } 33 | 34 | Definition <- Identifier { p.AddRule(text) } 35 | LeftArrow Expression { p.AddExpression() } &(Identifier LeftArrow / !.) 36 | Expression <- Sequence (Slash Sequence { p.AddAlternate() } 37 | )* (Slash { p.AddNil(); p.AddAlternate() } 38 | )? 39 | / { p.AddNil() } 40 | Sequence <- Prefix (Prefix { p.AddSequence() } 41 | )* 42 | Prefix <- And Action { p.AddPredicate(text) } 43 | / Not Action { p.AddStateChange(text) } 44 | / And Suffix { p.AddPeekFor() } 45 | / Not Suffix { p.AddPeekNot() } 46 | / Suffix 47 | Suffix <- Primary (Question { p.AddQuery() } 48 | / Star { p.AddStar() } 49 | / Plus { p.AddPlus() } 50 | )? 51 | Primary <- Identifier !LeftArrow { p.AddName(text) } 52 | / Open Expression Close 53 | / Literal 54 | / Class 55 | / Dot { p.AddDot() } 56 | / Action { p.AddAction(text) } 57 | / Begin Expression End { p.AddPush() } 58 | 59 | # Lexical syntax 60 | 61 | #PrivateIdentifier <- < [a-z_] IdentCont* > Spacing 62 | Identifier <- < IdentStart IdentCont* > Spacing 63 | IdentStart <- [[a-z_]] 64 | IdentCont <- IdentStart / [0-9] 65 | Literal <- ['] (!['] Char)? (!['] Char { p.AddSequence() } 66 | )* ['] Spacing 67 | / ["] (!["] DoubleChar)? (!["] DoubleChar { p.AddSequence() } 68 | )* ["] Spacing 69 | Class <- ( '[[' ( '^' DoubleRanges { p.AddPeekNot(); p.AddDot(); p.AddSequence() } 70 | / DoubleRanges )? 71 | ']]' 72 | / '[' ( '^' Ranges { p.AddPeekNot(); p.AddDot(); p.AddSequence() } 73 | / Ranges )? 74 | ']' ) 75 | Spacing 76 | Ranges <- !']' Range (!']' Range { p.AddAlternate() } 77 | )* 78 | DoubleRanges <- !']]' DoubleRange (!']]' DoubleRange { p.AddAlternate() } 79 | )* 80 | Range <- Char '-' Char { p.AddRange() } 81 | / Char 82 | DoubleRange <- Char '-' Char { p.AddDoubleRange() } 83 | / DoubleChar 84 | Char <- Escape 85 | / !'\\' <.> { p.AddCharacter(text) } 86 | DoubleChar <- Escape 87 | / <[a-zA-Z]> { p.AddDoubleCharacter(text) } 88 | / !'\\' <.> { p.AddCharacter(text) } 89 | Escape <- "\\a" { p.AddCharacter("\a") } # bell 90 | / "\\b" { p.AddCharacter("\b") } # bs 91 | / "\\e" { p.AddCharacter("\x1B") } # esc 92 | / "\\f" { p.AddCharacter("\f") } # ff 93 | / "\\n" { p.AddCharacter("\n") } # nl 94 | / "\\r" { p.AddCharacter("\r") } # cr 95 | / "\\t" { p.AddCharacter("\t") } # ht 96 | / "\\v" { p.AddCharacter("\v") } # vt 97 | / "\\'" { p.AddCharacter("'") } 98 | / '\\"' { p.AddCharacter("\"") } 99 | / '\\[' { p.AddCharacter("[") } 100 | / '\\]' { p.AddCharacter("]") } 101 | / '\\-' { p.AddCharacter("-") } 102 | / '\\' "0x"<[0-9a-fA-F]+> { p.AddHexaCharacter(text) } 103 | / '\\' <[0-3][0-7][0-7]> { p.AddOctalCharacter(text) } 104 | / '\\' <[0-7][0-7]?> { p.AddOctalCharacter(text) } 105 | / '\\\\' { p.AddCharacter("\\") } 106 | LeftArrow <- ('<-' / '\0x2190') Spacing 107 | Slash <- '/' Spacing 108 | And <- '&' Spacing 109 | Not <- '!' Spacing 110 | Question <- '?' Spacing 111 | Star <- '*' Spacing 112 | Plus <- '+' Spacing 113 | Open <- '(' Spacing 114 | Close <- ')' Spacing 115 | Dot <- '.' Spacing 116 | SpaceComment <- (Space / Comment) 117 | Spacing <- SpaceComment* 118 | MustSpacing <- SpaceComment+ 119 | Comment <- ('#' / '//') (!EndOfLine .)* EndOfLine 120 | Space <- ' ' / '\t' / EndOfLine 121 | Header <- HeaderSpaceComment* 122 | HeaderSpaceComment <- (HeaderComment / { p.AddSpace(text) } ) 123 | HeaderComment <- ('#' / '//') <(!EndOfLine .)*> { p.AddComment(text) } EndOfLine 124 | EndOfLine <- '\r\n' / '\n' / '\r' 125 | EndOfFile <- !. 126 | Action <- '{' < ActionBody* > '}' Spacing 127 | ActionBody <- [^{}] / '{' ActionBody* '}' 128 | Begin <- '<' Spacing 129 | End <- '>' Spacing 130 | 131 | -------------------------------------------------------------------------------- /set/set.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package set 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | ) 11 | 12 | // Node is a node. 13 | type Node struct { 14 | Forward *Node 15 | Backward *Node 16 | Begin rune 17 | End rune 18 | } 19 | 20 | // Set is a set. 21 | type Set struct { 22 | Head Node 23 | Tail Node 24 | } 25 | 26 | // NewSet returns a new set. 27 | func NewSet() *Set { 28 | return &Set{ 29 | Head: Node{ 30 | Begin: math.MaxInt32, 31 | }, 32 | } 33 | } 34 | 35 | // String returns the string of a set. 36 | func (s *Set) String() string { 37 | codes, space := "[", "" 38 | node := s.Head.Forward 39 | for node.Forward != nil { 40 | for code := node.Begin; code <= node.End; code++ { 41 | codes += space + fmt.Sprintf("%v", code) 42 | space = " " 43 | } 44 | node = node.Forward 45 | } 46 | return codes + "]" 47 | } 48 | 49 | // Copy copies a set. 50 | func (s *Set) Copy() *Set { 51 | set := NewSet() 52 | if s.Head.Forward == nil { 53 | return set 54 | } 55 | a, b := s.Head.Forward, &set.Head 56 | for a.Forward != nil { 57 | node := Node{ 58 | Backward: b, 59 | Begin: a.Begin, 60 | End: a.End, 61 | } 62 | b.Forward = &node 63 | a = a.Forward 64 | b = b.Forward 65 | } 66 | b.Forward = &set.Tail 67 | set.Tail.Backward = b 68 | return set 69 | } 70 | 71 | // Add adds a symbol to the set. 72 | func (s *Set) Add(a rune) { 73 | s.AddRange(a, a) 74 | } 75 | 76 | // AddRange adds to a set. 77 | func (s *Set) AddRange(begin, end rune) { 78 | beginNode := &s.Head 79 | for beginNode.Forward != nil && begin > beginNode.Forward.End { 80 | beginNode = beginNode.Forward 81 | } 82 | endNode := &s.Tail 83 | for endNode.Backward != nil && end < endNode.Backward.Begin { 84 | endNode = endNode.Backward 85 | } 86 | if beginNode.Forward == nil && endNode.Backward == nil { 87 | node := Node{ 88 | Begin: begin, 89 | End: end, 90 | } 91 | node.Forward = endNode 92 | endNode.Backward = &node 93 | node.Backward = beginNode 94 | beginNode.Forward = &node 95 | } else if beginNode.Forward == endNode.Backward { 96 | if begin < beginNode.Forward.Begin { 97 | beginNode.Forward.Begin = begin 98 | } 99 | if end > beginNode.Forward.End { 100 | beginNode.Forward.End = end 101 | } 102 | } else if beginNode.Forward != nil && endNode.Backward == nil { 103 | node := Node{ 104 | Begin: begin, 105 | End: end, 106 | } 107 | node.Backward = beginNode 108 | node.Forward = beginNode.Forward 109 | beginNode.Forward.Backward = &node 110 | beginNode.Forward = &node 111 | } else if beginNode.Forward == nil && endNode.Backward != nil { 112 | node := Node{ 113 | Begin: begin, 114 | End: end, 115 | } 116 | node.Forward = endNode 117 | node.Backward = endNode.Backward 118 | endNode.Backward.Forward = &node 119 | endNode.Backward = &node 120 | } else if beginNode.Forward == endNode { 121 | node := Node{ 122 | Begin: begin, 123 | End: end, 124 | } 125 | node.Backward = beginNode 126 | node.Forward = beginNode.Forward 127 | beginNode.Forward.Backward = &node 128 | beginNode.Forward = &node 129 | } else if beginNode == endNode.Backward { 130 | node := Node{ 131 | Begin: begin, 132 | End: end, 133 | } 134 | node.Forward = endNode 135 | node.Backward = endNode.Backward 136 | endNode.Backward.Forward = &node 137 | endNode.Backward = &node 138 | } else { 139 | if begin < beginNode.Forward.Begin { 140 | beginNode.Forward.Begin = begin 141 | } 142 | beginNode.Forward.End = max(end, endNode.Backward.End) 143 | node := beginNode.Forward 144 | node.Forward = endNode 145 | endNode.Backward = node 146 | } 147 | } 148 | 149 | // Has tests if a set has a rune. 150 | func (s *Set) Has(begin rune) bool { 151 | beginNode := &s.Head 152 | for beginNode.Forward != nil && begin > beginNode.Forward.End { 153 | beginNode = beginNode.Forward 154 | } 155 | if beginNode.Forward == nil { 156 | return false 157 | } 158 | return begin >= beginNode.Forward.Begin 159 | } 160 | 161 | // Complement computes the complement of a set. 162 | func (s *Set) Complement(endSymbol rune) *Set { 163 | set := NewSet() 164 | if s.Len() == 0 { 165 | node := Node{ 166 | Forward: &set.Tail, 167 | Backward: &set.Head, 168 | Begin: 0, 169 | End: endSymbol, 170 | } 171 | set.Head.Forward = &node 172 | set.Tail.Backward = &node 173 | return set 174 | } 175 | if s.Head.Forward.Begin == 0 && s.Head.Forward.End == endSymbol { 176 | return set 177 | } 178 | a, b := &s.Head, &set.Head 179 | pre := rune(0) 180 | if pre == a.Forward.Begin { 181 | a = a.Forward 182 | pre = a.End + 1 183 | } 184 | a = a.Forward 185 | for a.Forward != nil { 186 | node := Node{ 187 | Backward: b, 188 | Begin: pre, 189 | End: a.Begin - 1, 190 | } 191 | if a.End == endSymbol { 192 | pre = endSymbol 193 | } else { 194 | pre = a.End + 1 195 | } 196 | b.Forward = &node 197 | a = a.Forward 198 | b = b.Forward 199 | } 200 | if pre < endSymbol { 201 | node := Node{ 202 | Backward: b, 203 | Begin: pre, 204 | End: endSymbol, 205 | } 206 | b.Forward = &node 207 | b = b.Forward 208 | } 209 | b.Forward = &set.Tail 210 | set.Tail.Backward = b 211 | return set 212 | } 213 | 214 | // Union is the union of two sets. 215 | func (s *Set) Union(a *Set) *Set { 216 | set := s.Copy() 217 | node := a.Head.Forward 218 | if node == nil { 219 | return set 220 | } 221 | for node.Forward != nil { 222 | set.AddRange(node.Begin, node.End) 223 | node = node.Forward 224 | } 225 | return set 226 | } 227 | 228 | // Intersects returns true if two sets intersect. 229 | func (s *Set) Intersects(b *Set) bool { 230 | x := s.Head.Forward 231 | if x == nil { 232 | return false 233 | } 234 | for x.Forward != nil { 235 | y := b.Head.Forward 236 | if y == nil { 237 | return false 238 | } 239 | for y.Forward != nil { 240 | if y.Begin >= x.Begin && y.Begin <= x.End { 241 | return true 242 | } else if y.End >= x.Begin && y.End <= x.End { 243 | return true 244 | } 245 | y = y.Forward 246 | } 247 | x = x.Forward 248 | } 249 | x = b.Head.Forward 250 | if x == nil { 251 | return false 252 | } 253 | for x.Forward != nil { 254 | y := s.Head.Forward 255 | if y == nil { 256 | return false 257 | } 258 | for y.Forward != nil { 259 | if y.Begin >= x.Begin && y.Begin <= x.End { 260 | return true 261 | } else if y.End >= x.Begin && y.End <= x.End { 262 | return true 263 | } 264 | y = y.Forward 265 | } 266 | x = x.Forward 267 | } 268 | return false 269 | } 270 | 271 | // Equal returns true if two sets are equal. 272 | func (s *Set) Equal(a *Set) bool { 273 | lens, lena := s.Len(), a.Len() 274 | if lens != lena { 275 | return false 276 | } else if lens == 0 && lena == 0 { 277 | return true 278 | } 279 | x, y := s.Head.Forward, a.Head.Forward 280 | for { 281 | if x.Begin != y.Begin || x.End != y.End { 282 | return false 283 | } 284 | x, y = x.Forward, y.Forward 285 | if x == nil && y == nil { 286 | break 287 | } 288 | } 289 | return true 290 | } 291 | 292 | // Len returns the size of the set. 293 | func (s *Set) Len() int { 294 | size := 0 295 | if s.Head.Forward == nil { 296 | return size 297 | } 298 | beginNode := s.Head.Forward 299 | for beginNode.Forward != nil { 300 | size += int(beginNode.End) - int(beginNode.Begin) + 1 301 | beginNode = beginNode.Forward 302 | } 303 | return size 304 | } 305 | -------------------------------------------------------------------------------- /peg_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "testing" 7 | 8 | "github.com/pointlander/peg/tree" 9 | ) 10 | 11 | func TestCorrect(t *testing.T) { 12 | buffer := `package p 13 | type T Peg {} 14 | Grammar <- !. 15 | ` 16 | p := &Peg[uint32]{Tree: tree.New(false, false, false), Buffer: buffer} 17 | _ = p.Init() 18 | err := p.Parse() 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | 23 | p = &Peg[uint32]{Tree: tree.New(false, false, false), Buffer: buffer} 24 | _ = p.Init(Size[uint32](1 << 15)) 25 | err = p.Parse() 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | } 30 | 31 | func TestNoSpacePackage(t *testing.T) { 32 | buffer := `packagenospace 33 | type T Peg {} 34 | Grammar <- !. 35 | ` 36 | p := &Peg[uint32]{Tree: tree.New(false, false, false), Buffer: buffer} 37 | _ = p.Init(Size[uint32](1 << 15)) 38 | err := p.Parse() 39 | if err == nil { 40 | t.Fatal("packagenospace was parsed without error") 41 | } 42 | } 43 | 44 | func TestNoSpaceType(t *testing.T) { 45 | buffer := ` 46 | package p 47 | typenospace Peg {} 48 | Grammar <- !. 49 | ` 50 | p := &Peg[uint32]{Tree: tree.New(false, false, false), Buffer: buffer} 51 | _ = p.Init(Size[uint32](1 << 15)) 52 | err := p.Parse() 53 | if err == nil { 54 | t.Fatal("typenospace was parsed without error") 55 | } 56 | } 57 | 58 | func TestSame(t *testing.T) { 59 | buffer, err := os.ReadFile("peg.peg") 60 | if err != nil { 61 | t.Fatal(err) 62 | } 63 | 64 | p := &Peg[uint32]{Tree: tree.New(true, true, false), Buffer: string(buffer)} 65 | _ = p.Init(Size[uint32](1 << 15)) 66 | if err = p.Parse(); err != nil { 67 | t.Fatal(err) 68 | } 69 | 70 | p.Execute() 71 | 72 | out := &bytes.Buffer{} 73 | _ = p.Compile("peg.peg.go", []string{"./peg", "-inline", "-switch", "peg.peg"}, out) 74 | 75 | bootstrap, err := os.ReadFile("peg.peg.go") 76 | if err != nil { 77 | t.Fatal(err) 78 | } 79 | 80 | if len(out.Bytes()) != len(bootstrap) { 81 | t.Fatal("code generated from peg.peg is not the same as .go") 82 | } 83 | 84 | for i, v := range out.Bytes() { 85 | if v != bootstrap[i] { 86 | t.Fatal("code generated from peg.peg is not the same as .go") 87 | } 88 | } 89 | } 90 | 91 | func TestStrict(t *testing.T) { 92 | tt := []string{ 93 | // rule used but not defined 94 | ` 95 | package main 96 | type test Peg {} 97 | Begin <- begin !. 98 | `, 99 | // rule defined but not used 100 | ` 101 | package main 102 | type test Peg {} 103 | Begin <- . 104 | unused <- 'unused' 105 | `, 106 | // left recursive rule 107 | `package main 108 | type test Peg {} 109 | Begin <- Begin 'x' 110 | `, 111 | } 112 | 113 | for i, buffer := range tt { 114 | p := &Peg[uint32]{Tree: tree.New(false, false, false), Buffer: buffer} 115 | _ = p.Init(Size[uint32](1 << 15)) 116 | if err := p.Parse(); err != nil { 117 | t.Fatal(err) 118 | } 119 | p.Execute() 120 | 121 | tempDir := t.TempDir() 122 | 123 | out := &bytes.Buffer{} 124 | p.Strict = true 125 | if err := p.Compile(tempDir, []string{"peg"}, out); err == nil { 126 | t.Fatalf("#%d: expected warning error", i) 127 | } 128 | p.Strict = false 129 | if err := p.Compile(tempDir, []string{"peg"}, out); err != nil { 130 | t.Fatalf("#%d: unexpected error (%v)", i, err) 131 | } 132 | } 133 | } 134 | 135 | func TestCJKCharacter(t *testing.T) { 136 | buffer := ` 137 | package main 138 | 139 | type DiceExprParser Peg { 140 | } 141 | 142 | Expr <- 'CJK' / '汉字' / 'test' 143 | ` 144 | p := &Peg[uint32]{Tree: tree.New(false, true, false), Buffer: buffer} 145 | _ = p.Init(Size[uint32](1 << 15)) 146 | err := p.Parse() 147 | if err != nil { 148 | t.Fatal("cjk character test failed") 149 | } 150 | } 151 | 152 | func TestCheckAlwaysSucceeds(t *testing.T) { 153 | pegHeader := ` 154 | package main 155 | type Test Peg {} 156 | ` 157 | 158 | testCases := []struct { 159 | name string 160 | testRule string 161 | expectedResult bool 162 | }{ 163 | { 164 | name: "Character expression does not always succeed (TypeChar)", 165 | testRule: `A <- 'a'`, 166 | expectedResult: false, 167 | }, 168 | { 169 | name: "Star expression always succeeds (TypeStar)", 170 | testRule: `A <- 'a'*`, 171 | expectedResult: true, 172 | }, 173 | { 174 | name: "Dot expression does not always succeed (TypeDot)", 175 | testRule: `A <- .`, 176 | expectedResult: false, 177 | }, 178 | { 179 | name: "Range expression does not always succeed (TypeRange)", 180 | testRule: `A <- [a-z]`, 181 | expectedResult: false, 182 | }, 183 | { 184 | name: "String expression does not always succeed (TypeString)", 185 | testRule: `A <- "abc"`, 186 | expectedResult: false, 187 | }, 188 | { 189 | name: "Predicate expression does not always succeed (TypePredicate)", 190 | testRule: `A <- &{ true } 'a'*`, 191 | expectedResult: false, 192 | }, 193 | { 194 | name: "StateChange expression does not always succeed (TypeStateChange)", 195 | testRule: `A <- !{ false } 'a'*`, 196 | expectedResult: false, 197 | }, 198 | { 199 | name: "Action expression always succeeds (TypeAction)", 200 | testRule: `A <- { } 'a'*`, 201 | expectedResult: true, 202 | }, 203 | { 204 | name: "Space expression does not always succeed (TypeSpace)", 205 | testRule: `A <- ' '`, 206 | expectedResult: false, 207 | }, 208 | { 209 | name: "PeekFor expression does not always succeed (TypePeekFor)", 210 | testRule: `A <- &'a'`, 211 | expectedResult: false, 212 | }, 213 | { 214 | name: "PeekNot expression does not always succeed (TypePeekNot)", 215 | testRule: `A <- !'a'`, 216 | expectedResult: false, 217 | }, 218 | { 219 | name: "Plus expression does not always succeed (TypePlus)", 220 | testRule: `A <- 'a'+`, 221 | expectedResult: false, 222 | }, 223 | { 224 | name: "Push expression always succeeds (TypePush)", 225 | testRule: `A <- <'a'*>`, 226 | expectedResult: true, 227 | }, 228 | { 229 | name: "Nil expression always succeeds (TypeNil)", 230 | testRule: `A <- `, 231 | expectedResult: true, 232 | }, 233 | { 234 | name: "Optional expression always succeeds (TypeQuery)", 235 | testRule: `A <- 'b'?`, 236 | expectedResult: true, 237 | }, 238 | { 239 | name: "Nested star expression always succeeds", 240 | testRule: `A <- ('a' / 'b')*`, 241 | expectedResult: true, 242 | }, 243 | { 244 | name: "Sequence with star always succeeds", 245 | testRule: `A <- 'a'* 'b'*`, 246 | expectedResult: true, 247 | }, 248 | { 249 | name: "Sequence with non-star does not always succeed", 250 | testRule: `A <- 'a'* 'b'`, 251 | expectedResult: false, 252 | }, 253 | { 254 | name: "Alternate with star always succeeds", 255 | testRule: `A <- 'a' / 'b'*`, 256 | expectedResult: true, 257 | }, 258 | { 259 | name: "Alternate without star does not always succeed", 260 | testRule: `A <- 'a' / 'b'`, 261 | expectedResult: false, 262 | }, 263 | } 264 | 265 | for _, tc := range testCases { 266 | t.Run(tc.name, func(t *testing.T) { 267 | sourceCode := pegHeader + tc.testRule 268 | 269 | p := &Peg[uint32]{Tree: tree.New(false, true, true), Buffer: sourceCode} 270 | _ = p.Init(Size[uint32](1 << 15)) 271 | if err := p.Parse(); err != nil { 272 | t.Fatal(err) 273 | } 274 | p.Execute() 275 | buf := &bytes.Buffer{} 276 | _ = p.Compile("", []string{"peg"}, buf) 277 | 278 | if len(p.RuleNames) == 0 { 279 | t.Fatal("No rules found in the parsed tree") 280 | } 281 | rule := p.RuleNames[0] 282 | actualResult := rule.CheckAlwaysSucceeds(p.Tree) 283 | if actualResult != tc.expectedResult { 284 | t.Errorf("Rule [%s]: expected CheckAlwaysSucceeds() = %v, got %v", 285 | tc.name, tc.expectedResult, actualResult) 286 | } 287 | }) 288 | } 289 | } 290 | 291 | var pegFileContents = func(files []string) []string { 292 | contents := make([]string, len(files)) 293 | for i, file := range files { 294 | input, err := os.ReadFile(file) 295 | if err != nil { 296 | panic(err) 297 | } 298 | contents[i] = string(input) 299 | } 300 | return contents 301 | }([]string{ 302 | "peg.peg", 303 | "grammars/c/c.peg", 304 | "grammars/calculator/calculator.peg", 305 | "grammars/fexl/fexl.peg", 306 | "grammars/java/java_1_7.peg", 307 | }) 308 | 309 | func BenchmarkInitOnly(b *testing.B) { 310 | for b.Loop() { 311 | for _, peg := range pegFileContents { 312 | p := &Peg[uint32]{Tree: tree.New(true, true, false), Buffer: peg} 313 | _ = p.Init(Size[uint32](1 << 15)) 314 | } 315 | } 316 | } 317 | 318 | func BenchmarkParse(b *testing.B) { 319 | pegs := make([]*Peg[uint32], len(pegFileContents)) 320 | for i, content := range pegFileContents { 321 | p := &Peg[uint32]{Tree: tree.New(true, true, false), Buffer: content} 322 | _ = p.Init(Size[uint32](1 << 15)) 323 | pegs[i] = p 324 | } 325 | 326 | for b.Loop() { 327 | for _, peg := range pegs { 328 | if err := peg.Parse(); err != nil { 329 | b.Fatal(err) 330 | } 331 | b.StopTimer() 332 | peg.Reset() 333 | b.StartTimer() 334 | } 335 | } 336 | } 337 | 338 | func BenchmarkParseAndReset(b *testing.B) { 339 | pegs := make([]*Peg[uint32], len(pegFileContents)) 340 | for i, content := range pegFileContents { 341 | p := &Peg[uint32]{Tree: tree.New(true, true, false), Buffer: content} 342 | _ = p.Init(Size[uint32](1 << 15)) 343 | pegs[i] = p 344 | } 345 | 346 | for b.Loop() { 347 | for _, peg := range pegs { 348 | if err := peg.Parse(); err != nil { 349 | b.Fatal(err) 350 | } 351 | peg.Reset() 352 | } 353 | } 354 | } 355 | 356 | func BenchmarkInitAndParse(b *testing.B) { 357 | for b.Loop() { 358 | for _, peg := range pegFileContents { 359 | p := &Peg[uint32]{Tree: tree.New(true, true, false), Buffer: peg} 360 | _ = p.Init(Size[uint32](1 << 15)) 361 | if err := p.Parse(); err != nil { 362 | b.Fatal(err) 363 | } 364 | } 365 | } 366 | } 367 | 368 | func BenchmarkInitParseAndReset(b *testing.B) { 369 | for b.Loop() { 370 | for _, peg := range pegFileContents { 371 | p := &Peg[uint32]{Tree: tree.New(true, true, false), Buffer: peg} 372 | _ = p.Init(Size[uint32](1 << 15)) 373 | if err := p.Parse(); err != nil { 374 | b.Fatal(err) 375 | } 376 | p.Reset() 377 | } 378 | } 379 | } 380 | -------------------------------------------------------------------------------- /tree/peg.go.tmpl: -------------------------------------------------------------------------------- 1 | // Code generated by {{.Generator}}. DO NOT EDIT. 2 | 3 | {{.Comments}} 4 | 5 | package {{.PackageName}} 6 | 7 | import ( 8 | {{range .Imports}}"{{.}}" 9 | {{end}} 10 | ) 11 | 12 | const endSymbol rune = {{.EndSymbol}} 13 | 14 | /* The rule types inferred from the grammar are below. */ 15 | type pegRule {{.PegRuleType}} 16 | 17 | const ( 18 | ruleUnknown pegRule = iota 19 | {{range .RuleNames}}rule{{.String}} 20 | {{end}} 21 | ) 22 | 23 | var rul3s = [...]string { 24 | "Unknown", 25 | {{range .RuleNames}}"{{.String}}", 26 | {{end}} 27 | } 28 | 29 | type Uint interface { 30 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 31 | } 32 | 33 | type token[U Uint] struct { 34 | pegRule 35 | begin, end U 36 | } 37 | 38 | func (t *token[_]) String() string { 39 | // \x1B[34m = blue 40 | // \x1B[m = normal (disable color) 41 | return fmt.Sprintf("\x1B[34m%v\x1B[m %v %v", rul3s[t.pegRule], t.begin, t.end) 42 | } 43 | 44 | {{if .Ast}} 45 | type node[U Uint] struct { 46 | token[U] 47 | up, next *node[U] 48 | } 49 | 50 | func (n *node[U]) print(w io.Writer, pretty bool, buffer string) { 51 | var printFunc func(n *node[U], depth int) 52 | printFunc = func(n *node[U], depth int) { 53 | for n != nil { 54 | for range depth { 55 | fmt.Fprint(w, " ") 56 | } 57 | rule := rul3s[n.pegRule] 58 | quote := strconv.Quote(string([]rune(buffer)[n.begin:n.end])) 59 | if !pretty { 60 | fmt.Fprintf(w, "%v %v\n", rule, quote) 61 | } else { 62 | fmt.Fprintf(w, "\x1B[36m%v\x1B[m %v\n", rule, quote) 63 | } 64 | if n.up != nil { 65 | printFunc(n.up, depth+1) 66 | } 67 | n = n.next 68 | } 69 | } 70 | printFunc(n, 0) 71 | } 72 | 73 | func (n *node[_]) Print(w io.Writer, buffer string) { 74 | n.print(w, false, buffer) 75 | } 76 | 77 | func (n *node[_]) PrettyPrint(w io.Writer, buffer string) { 78 | n.print(w, true, buffer) 79 | } 80 | 81 | type tokens[U Uint] struct { 82 | tree []token[U] 83 | } 84 | 85 | func (t *tokens[_]) Trim(length uint32) { 86 | t.tree = t.tree[:length] 87 | } 88 | 89 | func (t *tokens[_]) Print() { 90 | for _, token := range t.tree { 91 | fmt.Println(token.String()) 92 | } 93 | } 94 | 95 | func (t *tokens[U]) AST() *node[U] { 96 | type element struct { 97 | node *node[U] 98 | down *element 99 | } 100 | tokenSlice := t.Tokens() 101 | var stack *element 102 | for _, token := range tokenSlice { 103 | if token.begin == token.end { 104 | continue 105 | } 106 | node := &node[U]{token: token} 107 | for stack != nil && stack.node.begin >= token.begin && stack.node.end <= token.end { 108 | stack.node.next = node.up 109 | node.up = stack.node 110 | stack = stack.down 111 | } 112 | stack = &element{node: node, down: stack} 113 | } 114 | if stack != nil { 115 | return stack.node 116 | } 117 | return nil 118 | } 119 | 120 | func (t *tokens[_]) PrintSyntaxTree(buffer string) { 121 | t.AST().Print(os.Stdout, buffer) 122 | } 123 | 124 | func (t *tokens[_]) WriteSyntaxTree(w io.Writer, buffer string) { 125 | t.AST().Print(w, buffer) 126 | } 127 | 128 | func (t *tokens[_]) PrettyPrintSyntaxTree(buffer string) { 129 | t.AST().PrettyPrint(os.Stdout, buffer) 130 | } 131 | 132 | func (t *tokens[U]) Add(rule pegRule, begin, end, index U) { 133 | tree, i := t.tree, int(index) 134 | newToken := token[U]{pegRule: rule, begin: begin, end: end} 135 | 136 | if i >= len(tree) { 137 | t.tree = append(tree, newToken) 138 | return 139 | } 140 | tree[i] = newToken 141 | } 142 | 143 | func (t *tokens[U]) Tokens() []token[U] { 144 | return t.tree 145 | } 146 | {{end}} 147 | 148 | type {{.StructName}}[U Uint] struct { 149 | {{.StructVariables}} 150 | Buffer string 151 | buffer []rune 152 | rules [{{.RulesCount}}]func() bool 153 | parse func(rule ...int) error 154 | reset func() 155 | Pretty bool 156 | {{if .Ast -}} 157 | disableMemoize bool 158 | tokens[U] 159 | {{end -}} 160 | } 161 | 162 | func (p *{{.StructName}}[_]) Parse(rule ...int) error { 163 | return p.parse(rule...) 164 | } 165 | 166 | func (p *{{.StructName}}[_]) Reset() { 167 | p.reset() 168 | } 169 | 170 | type textPosition struct { 171 | line, symbol int 172 | } 173 | 174 | type textPositionMap map[int] textPosition 175 | 176 | func translatePositions(buffer []rune, positions []int) textPositionMap { 177 | length := len(positions) 178 | translations := make(textPositionMap, length) 179 | posIdx := 0 180 | line := 1 181 | symbol := 0 182 | 183 | slices.Sort(positions) 184 | 185 | for i, c := range buffer { 186 | if c == '\n' { 187 | line, symbol = line+1, 0 188 | } else { 189 | symbol++ 190 | } 191 | if i == positions[posIdx] { 192 | translations[positions[posIdx]] = textPosition{line, symbol} 193 | for posIdx++; posIdx < length; posIdx++ { 194 | if i == positions[posIdx] { 195 | return translations 196 | } 197 | } 198 | } 199 | if posIdx >= length { 200 | break 201 | } 202 | } 203 | 204 | return translations 205 | } 206 | 207 | type parseError[U Uint] struct { 208 | p *{{.StructName}}[U] 209 | maxToken token[U] 210 | } 211 | 212 | func (e *parseError[U]) Error() string { 213 | tokenSlice, err := []token[U]{e.maxToken}, "\n" 214 | positions, p := make([]int, 2*len(tokenSlice)), 0 215 | for _, t := range tokenSlice { 216 | positions[p], p = int(t.begin), p+1 217 | positions[p], p = int(t.end), p+1 218 | } 219 | translations := translatePositions(e.p.buffer, positions) 220 | format := "parse error near %v (line %v symbol %v - line %v symbol %v):\n%v\n" 221 | if e.p.Pretty { 222 | format = "parse error near \x1B[34m%v\x1B[m (line %v symbol %v - line %v symbol %v):\n%v\n" 223 | } 224 | for _, t := range tokenSlice { 225 | begin, end := int(t.begin), int(t.end) 226 | err += fmt.Sprintf(format, 227 | rul3s[t.pegRule], 228 | translations[begin].line, translations[begin].symbol, 229 | translations[end].line, translations[end].symbol, 230 | strconv.Quote(string(e.p.buffer[begin:end]))) 231 | } 232 | 233 | return err 234 | } 235 | 236 | {{if .Ast}} 237 | func (p *{{.StructName}}[_]) PrintSyntaxTree() { 238 | if p.Pretty { 239 | p.tokens.PrettyPrintSyntaxTree(p.Buffer) 240 | } else { 241 | p.tokens.PrintSyntaxTree(p.Buffer) 242 | } 243 | } 244 | 245 | func (p *{{.StructName}}[_]) WriteSyntaxTree(w io.Writer) { 246 | p.tokens.WriteSyntaxTree(w, p.Buffer) 247 | } 248 | 249 | func (p *{{.StructName}}[_]) SprintSyntaxTree() string { 250 | var b bytes.Buffer 251 | p.WriteSyntaxTree(&b) 252 | return b.String() 253 | } 254 | 255 | {{if .HasActions}} 256 | func (p *{{.StructName}}[_]) Execute() { 257 | buffer, _buffer, text, begin, end := p.Buffer, p.buffer, "", 0, 0 258 | for _, t := range p.Tokens() { 259 | switch t.pegRule { 260 | {{if .HasPush}} 261 | case rulePegText: 262 | begin, end = int(t.begin), int(t.end) 263 | text = string(_buffer[begin:end]) 264 | {{end}} 265 | {{range .Actions}}case ruleAction{{.GetID}}: 266 | {{.String}} 267 | {{end}} 268 | } 269 | } 270 | _, _, _, _, _ = buffer, _buffer, text, begin, end 271 | } 272 | {{end}} 273 | {{end}} 274 | 275 | func Pretty[U Uint](pretty bool) func(*{{.StructName}}[U]) error { 276 | return func(p *{{.StructName}}[U]) error { 277 | p.Pretty = pretty 278 | return nil 279 | } 280 | } 281 | 282 | {{if .Ast -}} 283 | func Size[U Uint](size int) func(*{{.StructName}}[U]) error { 284 | return func(p *{{.StructName}}[U]) error { 285 | p.tokens = tokens[U]{tree: make([]token[U], 0, size)} 286 | return nil 287 | } 288 | } 289 | 290 | func DisableMemoize[U Uint]() func(*{{.StructName}}[U]) error { 291 | return func(p *{{.StructName}}[U]) error { 292 | p.disableMemoize = true 293 | return nil 294 | } 295 | } 296 | 297 | type memo[U Uint] struct { 298 | Matched bool 299 | Partial []token[U] 300 | } 301 | 302 | type memoKey[U Uint] struct { 303 | Rule U 304 | Position U 305 | } 306 | {{end -}} 307 | 308 | func (p *{{.StructName}}[U]) Init(options ...func(*{{.StructName}}[U]) error) error { 309 | var ( 310 | maxToken token[U] 311 | position, tokenIndex U 312 | buffer []rune 313 | {{if .Ast -}} 314 | memoization map[memoKey[U]]memo[U] 315 | {{end -}} 316 | {{if not .Ast -}} 317 | {{if .HasPush -}} 318 | text string 319 | {{end -}} 320 | {{end -}} 321 | ) 322 | for _, option := range options { 323 | err := option(p) 324 | if err != nil { 325 | return err 326 | } 327 | } 328 | p.reset = func() { 329 | maxToken = token[U]{} 330 | position, tokenIndex = 0, 0 331 | {{if .Ast -}} 332 | memoization = make(map[memoKey[U]]memo[U]) 333 | {{end -}} 334 | 335 | p.buffer = []rune(p.Buffer) 336 | if len(p.buffer) == 0 || p.buffer[len(p.buffer) - 1] != endSymbol { 337 | p.buffer = append(p.buffer, endSymbol) 338 | } 339 | buffer = p.buffer 340 | } 341 | p.reset() 342 | 343 | _rules := p.rules 344 | {{if .Ast -}} 345 | tree := p.tokens 346 | {{end -}} 347 | p.parse = func(rule ...int) error { 348 | r := 1 349 | if len(rule) > 0 { 350 | r = rule[0] 351 | } 352 | matches := p.rules[r]() 353 | {{if .Ast -}} 354 | p.tokens = tree 355 | {{end -}} 356 | if matches { 357 | {{if .Ast -}} 358 | p.Trim(uint32(tokenIndex)) 359 | {{end -}} 360 | return nil 361 | } 362 | return &parseError[U]{p, maxToken} 363 | } 364 | 365 | add := func(rule pegRule, begin U) { 366 | {{if .Ast -}} 367 | tree.Add(rule, begin, position, tokenIndex) 368 | {{end -}} 369 | tokenIndex++ 370 | if begin != position && position > maxToken.end { 371 | maxToken = token[U]{rule, begin, position} 372 | } 373 | } 374 | 375 | {{if .Ast -}} 376 | memoize := func(rule U, begin U, tokenIndexStart U, matched bool) { 377 | if p.disableMemoize { 378 | return 379 | } 380 | key := memoKey[U]{rule, begin} 381 | if !matched { 382 | memoization[key] = memo[U]{Matched: false} 383 | } else { 384 | memoization[key] = memo[U]{ 385 | Matched: true, 386 | Partial: slices.Clone(tree.tree[tokenIndexStart:tokenIndex]), 387 | } 388 | } 389 | } 390 | 391 | memoizedResult := func(m memo[U]) bool { 392 | if !m.Matched { 393 | return false 394 | } 395 | tree.tree = append(tree.tree[:tokenIndex], m.Partial...) 396 | tokenIndex += U(len(m.Partial)) 397 | position = m.Partial[len(m.Partial)-1].end 398 | if tree.tree[tokenIndex-1].begin != position && position > maxToken.end { 399 | maxToken = tree.tree[tokenIndex-1] 400 | } 401 | return true 402 | } 403 | {{end -}} 404 | 405 | {{if .HasDot}} 406 | matchDot := func() bool { 407 | if buffer[position] != endSymbol { 408 | position++ 409 | return true 410 | } 411 | return false 412 | } 413 | {{end}} 414 | 415 | {{if .HasString}} 416 | matchString := func(s string) bool { 417 | i := position 418 | for _, c := range s { 419 | if buffer[i] != c { 420 | return false 421 | } 422 | i++ 423 | } 424 | position = i 425 | return true 426 | } 427 | {{end}} 428 | 429 | _rules = [...]func() bool { 430 | nil, 431 | -------------------------------------------------------------------------------- /bootstrap/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "os" 10 | 11 | "github.com/pointlander/peg/tree" 12 | ) 13 | 14 | func main() { 15 | t := tree.New(true, true, false) 16 | 17 | /*package main 18 | 19 | import "fmt" 20 | import "math" 21 | import "sort" 22 | import "strconv" 23 | 24 | type Peg Peg { 25 | *Tree 26 | }*/ 27 | t.AddPackage("main") 28 | t.AddImport("github.com/pointlander/peg/tree") 29 | t.AddPeg("Peg") 30 | t.AddState(` 31 | *tree.Tree 32 | `) 33 | 34 | addDot := t.AddDot 35 | addName := t.AddName 36 | addCharacter := t.AddCharacter 37 | addAction := t.AddAction 38 | 39 | addRule := func(name string, item func()) { 40 | t.AddRule(name) 41 | item() 42 | t.AddExpression() 43 | } 44 | 45 | addSequence := func(items ...func()) { 46 | sequence := false 47 | for _, item := range items { 48 | item() 49 | if sequence { 50 | t.AddSequence() 51 | } else { 52 | sequence = true 53 | } 54 | } 55 | } 56 | 57 | addAlternate := func(items ...func()) { 58 | alternate := false 59 | for _, item := range items { 60 | item() 61 | if alternate { 62 | t.AddAlternate() 63 | } else { 64 | alternate = true 65 | } 66 | } 67 | } 68 | 69 | addString := func(s string) { 70 | sequence := false 71 | for _, r := range s { 72 | t.AddCharacter(string(r)) 73 | if sequence { 74 | t.AddSequence() 75 | } else { 76 | sequence = true 77 | } 78 | } 79 | } 80 | 81 | addRange := func(begin, end string) { 82 | addCharacter(begin) 83 | addCharacter(end) 84 | t.AddRange() 85 | } 86 | 87 | addStar := func(item func()) { 88 | item() 89 | t.AddStar() 90 | } 91 | 92 | addQuery := func(item func()) { 93 | item() 94 | t.AddQuery() 95 | } 96 | 97 | addPush := func(item func()) { 98 | item() 99 | t.AddPush() 100 | } 101 | 102 | addPeekNot := func(item func()) { 103 | item() 104 | t.AddPeekNot() 105 | } 106 | 107 | addPeekFor := func(item func()) { 108 | item() 109 | t.AddPeekFor() 110 | } 111 | 112 | /* Grammar <- Spacing { hdr; } Action* Definition* !. */ 113 | addRule("Grammar", func() { 114 | addSequence( 115 | func() { addName("Spacing") }, 116 | func() { addAction(`p.AddPackage("main")`) }, 117 | func() { addAction(`p.AddImport("github.com/pointlander/peg/tree")`) }, 118 | func() { addAction(`p.AddPeg("Peg")`) }, 119 | func() { addAction(`p.AddState("*tree.Tree")`) }, 120 | func() { addStar(func() { addName("Action") }) }, 121 | func() { addStar(func() { addName("Definition") }) }, 122 | func() { addPeekNot(func() { addDot() }) }, 123 | ) 124 | }) 125 | 126 | /* Definition <- Identifier { p.AddRule(text) } 127 | LeftArrow Expression { p.AddExpression() } &(Identifier LeftArrow / !.)*/ 128 | addRule("Definition", func() { 129 | addSequence( 130 | func() { addName("Identifier") }, 131 | func() { addAction(" p.AddRule(text) ") }, 132 | func() { addName("LeftArrow") }, 133 | func() { addName("Expression") }, 134 | func() { addAction(" p.AddExpression() ") }, 135 | func() { 136 | addPeekFor(func() { 137 | addAlternate( 138 | func() { 139 | addSequence( 140 | func() { addName("Identifier") }, 141 | func() { addName("LeftArrow") }, 142 | ) 143 | }, 144 | func() { addPeekNot(func() { addDot() }) }, 145 | ) 146 | }) 147 | }, 148 | ) 149 | }) 150 | 151 | /* Expression <- Sequence (Slash Sequence { p.AddAlternate() })* */ 152 | addRule("Expression", func() { 153 | addSequence( 154 | func() { addName("Sequence") }, 155 | func() { 156 | addStar(func() { 157 | addSequence( 158 | func() { addName("Slash") }, 159 | func() { addName("Sequence") }, 160 | func() { addAction(" p.AddAlternate() ") }, 161 | ) 162 | }) 163 | }, 164 | ) 165 | }) 166 | 167 | /* Sequence <- Prefix (Prefix { p.AddSequence() } )* */ 168 | addRule("Sequence", func() { 169 | addSequence( 170 | func() { addName("Prefix") }, 171 | func() { 172 | addStar(func() { 173 | addSequence( 174 | func() { addName("Prefix") }, 175 | func() { addAction(" p.AddSequence() ") }, 176 | ) 177 | }) 178 | }, 179 | ) 180 | }) 181 | 182 | /* Prefix <- '!' Suffix { p.AddPeekNot() } / Suffix */ 183 | addRule("Prefix", func() { 184 | addAlternate( 185 | func() { 186 | addSequence( 187 | func() { addCharacter(`!`) }, 188 | func() { addName("Suffix") }, 189 | func() { addAction(" p.AddPeekNot() ") }, 190 | ) 191 | }, 192 | func() { addName("Suffix") }, 193 | ) 194 | }) 195 | 196 | /* Suffix <- Primary ( Question { p.AddQuery() } 197 | / Star { p.AddStar() } 198 | )? */ 199 | addRule("Suffix", func() { 200 | addSequence( 201 | func() { addName("Primary") }, 202 | func() { 203 | addQuery(func() { 204 | addAlternate( 205 | func() { 206 | addSequence( 207 | func() { addName("Question") }, 208 | func() { addAction(" p.AddQuery() ") }, 209 | ) 210 | }, 211 | func() { 212 | addSequence( 213 | func() { addName("Star") }, 214 | func() { addAction(" p.AddStar() ") }, 215 | ) 216 | }, 217 | ) 218 | }) 219 | }, 220 | ) 221 | }) 222 | 223 | /* Primary <- Identifier !LeftArrow { p.AddName(text) } 224 | / Open Expression Close 225 | / Literal 226 | / Class 227 | / Dot { p.AddDot() } 228 | / Action { p.AddAction(text) } 229 | / Begin Expression End { p.AddPush() }*/ 230 | addRule("Primary", func() { 231 | addAlternate( 232 | func() { 233 | addSequence( 234 | func() { addName("Identifier") }, 235 | func() { addPeekNot(func() { t.AddName("LeftArrow") }) }, 236 | func() { addAction(" p.AddName(text) ") }, 237 | ) 238 | }, 239 | func() { 240 | addSequence( 241 | func() { addName("Open") }, 242 | func() { addName("Expression") }, 243 | func() { addName("Close") }, 244 | ) 245 | }, 246 | func() { addName("Literal") }, 247 | func() { addName("Class") }, 248 | func() { 249 | addSequence( 250 | func() { addName("Dot") }, 251 | func() { addAction(" p.AddDot() ") }, 252 | ) 253 | }, 254 | func() { 255 | addSequence( 256 | func() { addName("Action") }, 257 | func() { addAction(" p.AddAction(text) ") }, 258 | ) 259 | }, 260 | func() { 261 | addSequence( 262 | func() { addName("Begin") }, 263 | func() { addName("Expression") }, 264 | func() { addName("End") }, 265 | func() { addAction(" p.AddPush() ") }, 266 | ) 267 | }, 268 | ) 269 | }) 270 | 271 | /* Identifier <- < Ident Ident* > Spacing */ 272 | addRule("Identifier", func() { 273 | addSequence( 274 | func() { 275 | addPush(func() { 276 | addSequence( 277 | func() { addName("Ident") }, 278 | func() { addStar(func() { addName("Ident") }) }, 279 | ) 280 | }) 281 | }, 282 | func() { addName("Spacing") }, 283 | ) 284 | }) 285 | 286 | /* Ident <- [A-Za-z] */ 287 | addRule("Ident", func() { 288 | addAlternate( 289 | func() { addRange(`A`, `Z`) }, 290 | func() { addRange(`a`, `z`) }, 291 | ) 292 | }) 293 | 294 | /* Literal <- ['] !['] Char (!['] Char { p.AddSequence() } )* ['] Spacing */ 295 | addRule("Literal", func() { 296 | addSequence( 297 | func() { addCharacter(`'`) }, 298 | func() { 299 | addSequence( 300 | func() { addPeekNot(func() { addCharacter(`'`) }) }, 301 | func() { addName("Char") }, 302 | ) 303 | }, 304 | func() { 305 | addStar(func() { 306 | addSequence( 307 | func() { addPeekNot(func() { addCharacter(`'`) }) }, 308 | func() { addName("Char") }, 309 | func() { addAction(` p.AddSequence() `) }, 310 | ) 311 | }) 312 | }, 313 | func() { addCharacter(`'`) }, 314 | func() { addName("Spacing") }, 315 | ) 316 | }) 317 | 318 | /* Class <- '[' Range (!']' Range { p.AddAlternate() })* ']' Spacing */ 319 | addRule("Class", func() { 320 | addSequence( 321 | func() { addCharacter(`[`) }, 322 | func() { addName("Range") }, 323 | func() { 324 | addStar(func() { 325 | addSequence( 326 | func() { addPeekNot(func() { addCharacter(`]`) }) }, 327 | func() { addName("Range") }, 328 | func() { addAction(" p.AddAlternate() ") }, 329 | ) 330 | }) 331 | }, 332 | func() { addCharacter(`]`) }, 333 | func() { addName("Spacing") }, 334 | ) 335 | }) 336 | 337 | /* Range <- Char '-' Char { p.AddRange() } 338 | / Char */ 339 | addRule("Range", func() { 340 | addAlternate( 341 | func() { 342 | addSequence( 343 | func() { addName("Char") }, 344 | func() { addCharacter(`-`) }, 345 | func() { addName("Char") }, 346 | func() { addAction(" p.AddRange() ") }, 347 | ) 348 | }, 349 | func() { addName("Char") }, 350 | ) 351 | }) 352 | 353 | /* Char <- Escape 354 | / '\\' "0x"<[0-9a-f]*> { p.AddHexaCharacter(text) } 355 | / '\\\\' { p.AddCharacter("\\") } 356 | / !'\\' <.> { p.AddCharacter(text) } */ 357 | addRule("Char", func() { 358 | addAlternate( 359 | func() { 360 | addSequence( 361 | func() { addCharacter("\\") }, 362 | func() { addCharacter(`0`) }, 363 | func() { addCharacter(`x`) }, 364 | func() { 365 | addPush(func() { 366 | addStar(func() { 367 | addAlternate( 368 | func() { addRange(`0`, `9`) }, 369 | func() { addRange(`a`, `f`) }, 370 | ) 371 | }) 372 | }) 373 | }, 374 | func() { addAction(` p.AddHexaCharacter(text) `) }, 375 | ) 376 | }, 377 | func() { 378 | addSequence( 379 | func() { addCharacter("\\") }, 380 | func() { addCharacter("\\") }, 381 | func() { addAction(` p.AddCharacter("\\") `) }, 382 | ) 383 | }, 384 | func() { 385 | addSequence( 386 | func() { addPeekNot(func() { addCharacter("\\") }) }, 387 | func() { addPush(func() { addDot() }) }, 388 | func() { addAction(` p.AddCharacter(text) `) }, 389 | ) 390 | }, 391 | ) 392 | }) 393 | /* LeftArrow <- '<-' Spacing */ 394 | addRule("LeftArrow", func() { 395 | addSequence( 396 | func() { addString(`<-`) }, 397 | func() { addName("Spacing") }, 398 | ) 399 | }) 400 | 401 | /* Slash <- '/' Spacing */ 402 | addRule("Slash", func() { 403 | addSequence( 404 | func() { addCharacter(`/`) }, 405 | func() { addName("Spacing") }, 406 | ) 407 | }) 408 | 409 | /* Question <- '?' Spacing */ 410 | addRule("Question", func() { 411 | addSequence( 412 | func() { addCharacter(`?`) }, 413 | func() { addName("Spacing") }, 414 | ) 415 | }) 416 | 417 | /* Star <- '*' Spacing */ 418 | addRule("Star", func() { 419 | addSequence( 420 | func() { addCharacter(`*`) }, 421 | func() { addName("Spacing") }, 422 | ) 423 | }) 424 | 425 | /* Open <- '(' Spacing */ 426 | addRule("Open", func() { 427 | addSequence( 428 | func() { addCharacter(`(`) }, 429 | func() { addName("Spacing") }, 430 | ) 431 | }) 432 | 433 | /* Close <- ')' Spacing */ 434 | addRule("Close", func() { 435 | addSequence( 436 | func() { addCharacter(`)`) }, 437 | func() { addName("Spacing") }, 438 | ) 439 | }) 440 | 441 | /* Dot <- '.' Spacing */ 442 | addRule("Dot", func() { 443 | addSequence( 444 | func() { addCharacter(`.`) }, 445 | func() { addName("Spacing") }, 446 | ) 447 | }) 448 | 449 | addRule("Spacing", func() { 450 | addStar(func() { 451 | addAlternate( 452 | func() { addName("Space") }, 453 | func() { addName("Comment") }, 454 | ) 455 | }) 456 | }) 457 | 458 | /* Comment <- '#' (!EndOfLine .)* */ 459 | addRule("Comment", func() { 460 | addSequence( 461 | func() { addCharacter(`#`) }, 462 | func() { 463 | addStar(func() { 464 | addSequence( 465 | func() { addPeekNot(func() { addName("EndOfLine") }) }, 466 | func() { addDot() }, 467 | ) 468 | }) 469 | }, 470 | ) 471 | }) 472 | 473 | /* Space <- ' ' / '\t' / EndOfLine */ 474 | addRule("Space", func() { 475 | addAlternate( 476 | func() { addCharacter(` `) }, 477 | func() { addCharacter("\t") }, 478 | func() { addName("EndOfLine") }, 479 | ) 480 | }) 481 | 482 | /* EndOfLine <- '\r\n' / '\n' / '\r' */ 483 | addRule("EndOfLine", func() { 484 | addAlternate( 485 | func() { addString("\r\n") }, 486 | func() { addCharacter("\n") }, 487 | func() { addCharacter("\r") }, 488 | ) 489 | }) 490 | 491 | /* Action <- '{' < (![}].)* > '}' Spacing */ 492 | addRule("Action", func() { 493 | addSequence( 494 | func() { addCharacter(`{`) }, 495 | func() { 496 | addPush(func() { 497 | addStar(func() { 498 | addSequence( 499 | func() { 500 | addPeekNot(func() { 501 | addCharacter(`}`) 502 | }) 503 | }, 504 | func() { addDot() }, 505 | ) 506 | }) 507 | }) 508 | }, 509 | func() { addCharacter(`}`) }, 510 | func() { addName("Spacing") }, 511 | ) 512 | }) 513 | 514 | /* Begin <- '<' Spacing */ 515 | addRule("Begin", func() { 516 | addSequence( 517 | func() { addCharacter(`<`) }, 518 | func() { addName("Spacing") }, 519 | ) 520 | }) 521 | 522 | /* End <- '>' Spacing */ 523 | addRule("End", func() { 524 | addSequence( 525 | func() { addCharacter(`>`) }, 526 | func() { addName("Spacing") }, 527 | ) 528 | }) 529 | 530 | filename := "bootstrap.peg.go" 531 | out, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644) 532 | if err != nil { 533 | fmt.Printf("%v: %v\n", filename, err) 534 | return 535 | } 536 | defer func() { 537 | err := out.Close() 538 | if err != nil { 539 | fmt.Printf("%v: %v\n", filename, err) 540 | } 541 | }() 542 | _ = t.Compile(filename, os.Args, out) 543 | } 544 | -------------------------------------------------------------------------------- /grammars/c/c.peg: -------------------------------------------------------------------------------- 1 | #=========================================================================== 2 | # 3 | # Parsing Expression Grammar of C for Mouse 1.1 - 1.5. 4 | # Based on standard ISO/IEC 9899.1999:TC2, without preprocessor. 5 | # Requires semantics class to process Typedefs. 6 | # 7 | #--------------------------------------------------------------------------- 8 | # 9 | # Copyright (C) 2007, 2009, 2010 by Roman R Redziejowski (www.romanredz.se). 10 | # 11 | # The author gives unlimited permission to copy and distribute 12 | # this file, with or without modifications, as long as this notice 13 | # is preserved, and any changes are properly documented. 14 | # 15 | # This file is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 18 | # 19 | #--------------------------------------------------------------------------- 20 | # 21 | # Latest update 2010-11-19 22 | # 23 | #--------------------------------------------------------------------------- 24 | # 25 | # Modifications to the standard grammar: 26 | # 27 | # Defined # as start of line comment. 28 | # Added FunctionSpecifier "_stdcall". 29 | # Added TypeQualifier "__declspec()". 30 | # Added TypeSpecifier "__attribute__()". 31 | # The scope of TypedefNames is not implemented. 32 | # 33 | #--------------------------------------------------------------------------- 34 | # 35 | # Implementation of typedefs. 36 | # 37 | # A TypedefName is an Identifier that has been declared as such 38 | # by a previous typedef declaration. It can be used as TypeSpecifier 39 | # in DeclarationSpecifiers and SpecifierQualifierList. 40 | # Recognizing it as such is essential for correct parsing. 41 | # In other contexts, TypedefName is treated as an ordinary Identifier. 42 | # 43 | # According to 6.7.2, comment 2, of the Standard, TypedefName can appear 44 | # in DeclarationSpecifiers or SpecifierQualifierList at most once, 45 | # and then as the only TypeSpecifier. To make sure that an Identifer 46 | # is recognized as TypedefName only in these contexts, definitions 47 | # of these items are changed as follows: 48 | # 49 | # - TypedefName is removed as an alternative of TypeSpecifier. 50 | # 51 | # - DeclarationSpecifiers and SpecifierQualifierList are redefined 52 | # to allow either single TypedefName or one or more TypeSpecifiers. 53 | # 54 | # The semantics class, via semantic actions, maintains a table of TypedefNames. 55 | # 56 | # The rule defining TypedefName as Identifier has a semantic action 57 | # that returns true iff the Identifier is in the table. 58 | # That means TypedefName is accepted iff it is in the table. 59 | # 60 | # According to 6.7.7, comment 3, of the Standard, 61 | # in a Declaration whose StorageClassSpecifier is TYPEDEF, 62 | # each Declarator defines an Identifier to be a TypedefName. 63 | # These Identifiers are entered into the table as follows. 64 | # 65 | # - Each Identifier has itself as semantic value. 66 | # 67 | # - Each DirectDeclarator starts with either Identifier 68 | # or Declarator in parentheses. 69 | # Its semantic value is either that Identifier, 70 | # or the Identifier obtained as semantic value of that Declarator. 71 | # 72 | # - Each Declarator has as semantic value the Identifier 73 | # appearing in its DirectDeclarator, 74 | # 75 | # - Each InitDeclarator has as semantic value the Identifier 76 | # appearing in its Declarator. 77 | # 78 | # - InitDeclaratorList has as semantic value 79 | # the list of Identifiers appearing in its InitDeclarators. 80 | # 81 | # - DeclarationSpecifiers has semantic value "typedef" 82 | # if any of the specifiers is "typedef" or null otherwise. 83 | # 84 | # - Declaration has a semantic action that enters Identifiers 85 | # delivered by InitDeclaratorList into typedef table 86 | # if DeclarationSpecifiers indicate "typedef". 87 | # 88 | # 89 | #--------------------------------------------------------------------------- 90 | # 91 | # Change log 92 | # 2009-07-13 Posted on Internet. 93 | # 2010-11-19 Removed superfluous '?' after 'Spacing'. 94 | # 95 | # 96 | #--------------------------------------------------------------------------- 97 | # 98 | # 2013-02-21 Modified to work with github.com/pointlander/peg 99 | # 100 | #=========================================================================== 101 | 102 | 103 | #------------------------------------------------------------------------- 104 | # A.2.4 External definitions 105 | #------------------------------------------------------------------------- 106 | 107 | package c 108 | 109 | type C Peg { 110 | 111 | } 112 | 113 | TranslationUnit <- Spacing ( ExternalDeclaration / SEMI ) * EOT 114 | 115 | ExternalDeclaration <- FunctionDefinition / Declaration 116 | 117 | FunctionDefinition <- DeclarationSpecifiers Declarator DeclarationList? CompoundStatement 118 | 119 | DeclarationList <- Declaration+ 120 | 121 | 122 | #------------------------------------------------------------------------- 123 | # A.2.2 Declarations 124 | #------------------------------------------------------------------------- 125 | 126 | Declaration <- DeclarationSpecifiers InitDeclaratorList? SEMI #{} 127 | 128 | DeclarationSpecifiers 129 | <- (( StorageClassSpecifier 130 | / TypeQualifier 131 | / FunctionSpecifier 132 | )* 133 | TypedefName 134 | ( StorageClassSpecifier 135 | / TypeQualifier 136 | / FunctionSpecifier 137 | )* 138 | ) #{DeclarationSpecifiers} 139 | / ( StorageClassSpecifier 140 | / TypeSpecifier 141 | / TypeQualifier 142 | / FunctionSpecifier 143 | )+ #{DeclarationSpecifiers} 144 | 145 | InitDeclaratorList <- InitDeclarator (COMMA InitDeclarator)* #{} 146 | 147 | InitDeclarator <- Declarator (EQU Initializer)? #{} 148 | 149 | StorageClassSpecifier 150 | <- TYPEDEF 151 | / EXTERN 152 | / STATIC 153 | / AUTO 154 | / REGISTER 155 | / ATTRIBUTE LPAR LPAR (!RPAR .)* RPAR RPAR 156 | 157 | TypeSpecifier 158 | <- VOID 159 | / CHAR 160 | / SHORT 161 | / INT 162 | / LONG 163 | / FLOAT 164 | / DOUBLE 165 | / SIGNED 166 | / UNSIGNED 167 | / BOOL 168 | / COMPLEX 169 | / StructOrUnionSpecifier 170 | / EnumSpecifier 171 | 172 | StructOrUnionSpecifier 173 | <- StructOrUnion 174 | ( Identifier? LWING StructDeclaration* RWING 175 | / Identifier 176 | ) 177 | 178 | StructOrUnion <- STRUCT / UNION 179 | 180 | StructDeclaration <- ( SpecifierQualifierList StructDeclaratorList? )? SEMI 181 | 182 | SpecifierQualifierList 183 | <- ( TypeQualifier* 184 | TypedefName 185 | TypeQualifier* 186 | ) 187 | / ( TypeSpecifier 188 | / TypeQualifier 189 | )+ 190 | 191 | StructDeclaratorList <- StructDeclarator (COMMA StructDeclarator)* 192 | 193 | StructDeclarator 194 | <- Declarator? COLON ConstantExpression 195 | / Declarator 196 | 197 | EnumSpecifier 198 | <- ENUM 199 | ( Identifier? LWING EnumeratorList COMMA? RWING 200 | / Identifier 201 | ) 202 | 203 | EnumeratorList <- Enumerator (COMMA Enumerator)* 204 | 205 | Enumerator <- EnumerationConstant (EQU ConstantExpression)? 206 | 207 | TypeQualifier 208 | <- CONST 209 | / RESTRICT 210 | / VOLATILE 211 | / DECLSPEC LPAR Identifier RPAR 212 | 213 | FunctionSpecifier <- INLINE / STDCALL 214 | 215 | Declarator <- Pointer? DirectDeclarator #{} 216 | 217 | DirectDeclarator 218 | <- ( Identifier 219 | / LPAR Declarator RPAR 220 | ) 221 | ( LBRK TypeQualifier* AssignmentExpression? RBRK 222 | / LBRK STATIC TypeQualifier* AssignmentExpression RBRK 223 | / LBRK TypeQualifier+ STATIC AssignmentExpression RBRK 224 | / LBRK TypeQualifier* STAR RBRK 225 | / LPAR ParameterTypeList RPAR 226 | / LPAR IdentifierList? RPAR 227 | )* #{} 228 | 229 | Pointer <- ( STAR TypeQualifier* )+ 230 | 231 | ParameterTypeList <- ParameterList (COMMA ELLIPSIS)? 232 | 233 | ParameterList <- ParameterDeclaration (COMMA ParameterDeclaration)* 234 | 235 | ParameterDeclaration 236 | <- DeclarationSpecifiers 237 | ( Declarator 238 | / AbstractDeclarator 239 | )? 240 | 241 | IdentifierList <- Identifier (COMMA Identifier)* 242 | 243 | TypeName <- SpecifierQualifierList AbstractDeclarator? 244 | 245 | AbstractDeclarator 246 | <- Pointer? DirectAbstractDeclarator 247 | / Pointer 248 | 249 | DirectAbstractDeclarator 250 | <- ( LPAR AbstractDeclarator RPAR 251 | / LBRK (AssignmentExpression / STAR)? RBRK 252 | / LPAR ParameterTypeList? RPAR 253 | ) 254 | ( LBRK (AssignmentExpression / STAR)? RBRK 255 | / LPAR ParameterTypeList? RPAR 256 | )* 257 | 258 | TypedefName <-Identifier #{&TypedefName} 259 | 260 | Initializer 261 | <- AssignmentExpression 262 | / LWING InitializerList COMMA? RWING 263 | 264 | InitializerList <- Designation? Initializer (COMMA Designation? Initializer)* 265 | 266 | Designation <- Designator+ EQU 267 | 268 | Designator 269 | <- LBRK ConstantExpression RBRK 270 | / DOT Identifier 271 | 272 | 273 | #------------------------------------------------------------------------- 274 | # A.2.3 Statements 275 | #------------------------------------------------------------------------- 276 | 277 | Statement 278 | <- LabeledStatement 279 | / CompoundStatement 280 | / ExpressionStatement 281 | / SelectionStatement 282 | / IterationStatement 283 | / JumpStatement 284 | 285 | LabeledStatement 286 | <- Identifier COLON Statement 287 | / CASE ConstantExpression COLON Statement 288 | / DEFAULT COLON Statement 289 | 290 | CompoundStatement <- LWING ( Declaration / Statement )* RWING 291 | 292 | ExpressionStatement <- Expression? SEMI 293 | 294 | SelectionStatement 295 | <- IF LPAR Expression RPAR Statement (ELSE Statement)? 296 | / SWITCH LPAR Expression RPAR Statement 297 | 298 | IterationStatement 299 | <- WHILE LPAR Expression RPAR Statement 300 | / DO Statement WHILE LPAR Expression RPAR SEMI 301 | / FOR LPAR Expression? SEMI Expression? SEMI Expression? RPAR Statement 302 | / FOR LPAR Declaration Expression? SEMI Expression? RPAR Statement 303 | 304 | JumpStatement 305 | <- GOTO Identifier SEMI 306 | / CONTINUE SEMI 307 | / BREAK SEMI 308 | / RETURN Expression? SEMI 309 | 310 | 311 | #------------------------------------------------------------------------- 312 | # A.2.1 Expressions 313 | #------------------------------------------------------------------------- 314 | 315 | PrimaryExpression 316 | <- StringLiteral 317 | / Constant 318 | / Identifier 319 | / LPAR Expression RPAR 320 | 321 | PostfixExpression 322 | <- ( PrimaryExpression 323 | / LPAR TypeName RPAR LWING InitializerList COMMA? RWING 324 | ) 325 | ( LBRK Expression RBRK 326 | / LPAR ArgumentExpressionList? RPAR 327 | / DOT Identifier 328 | / PTR Identifier 329 | / INC 330 | / DEC 331 | )* 332 | 333 | ArgumentExpressionList <- AssignmentExpression (COMMA AssignmentExpression)* 334 | 335 | UnaryExpression 336 | <- PostfixExpression 337 | / INC UnaryExpression 338 | / DEC UnaryExpression 339 | / UnaryOperator CastExpression 340 | / SIZEOF (UnaryExpression / LPAR TypeName RPAR ) 341 | 342 | UnaryOperator 343 | <- AND 344 | / STAR 345 | / PLUS 346 | / MINUS 347 | / TILDA 348 | / BANG 349 | 350 | CastExpression <- (LPAR TypeName RPAR CastExpression) / UnaryExpression 351 | 352 | MultiplicativeExpression <- CastExpression ((STAR / DIV / MOD) CastExpression)* 353 | 354 | AdditiveExpression <- MultiplicativeExpression ((PLUS / MINUS) MultiplicativeExpression)* 355 | 356 | ShiftExpression <- AdditiveExpression ((LEFT / RIGHT) AdditiveExpression)* 357 | 358 | RelationalExpression <- ShiftExpression ((LE / GE / LT / GT) ShiftExpression)* 359 | 360 | EqualityExpression <- RelationalExpression ((EQUEQU / BANGEQU) RelationalExpression)* 361 | 362 | ANDExpression <- EqualityExpression (AND EqualityExpression)* 363 | 364 | ExclusiveORExpression <- ANDExpression (HAT ANDExpression)* 365 | 366 | InclusiveORExpression <- ExclusiveORExpression (OR ExclusiveORExpression)* 367 | 368 | LogicalANDExpression <- InclusiveORExpression (ANDAND InclusiveORExpression)* 369 | 370 | LogicalORExpression <- LogicalANDExpression (OROR LogicalANDExpression)* 371 | 372 | ConditionalExpression <- LogicalORExpression (QUERY Expression COLON LogicalORExpression)* 373 | 374 | AssignmentExpression 375 | <- UnaryExpression AssignmentOperator AssignmentExpression 376 | / ConditionalExpression 377 | 378 | AssignmentOperator 379 | <- EQU 380 | / STAREQU 381 | / DIVEQU 382 | / MODEQU 383 | / PLUSEQU 384 | / MINUSEQU 385 | / LEFTEQU 386 | / RIGHTEQU 387 | / ANDEQU 388 | / HATEQU 389 | / OREQU 390 | 391 | Expression <- AssignmentExpression (COMMA AssignmentExpression)* 392 | 393 | ConstantExpression <- ConditionalExpression 394 | 395 | 396 | #------------------------------------------------------------------------- 397 | # A.1.1 Lexical elements 398 | # Tokens are: Keyword, Identifier, Constant, StringLiteral, Punctuator. 399 | # Tokens are separated by Spacing. 400 | #------------------------------------------------------------------------- 401 | 402 | Spacing 403 | <- ( WhiteSpace 404 | / LongComment 405 | / LineComment 406 | / Pragma 407 | )* 408 | 409 | WhiteSpace <- [ \n\r\t] # 7.4.1.10 [\u000B\u000C] 410 | 411 | LongComment <- '/*' (!'*/'.)* '*/' # 6.4.9 412 | 413 | LineComment <- '//' (!'\n' .)* # 6.4.9 414 | 415 | Pragma <- '#' (!'\n' .)* # Treat pragma as comment 416 | 417 | 418 | #------------------------------------------------------------------------- 419 | # A.1.2 Keywords 420 | #------------------------------------------------------------------------- 421 | 422 | AUTO <- 'auto' !IdChar Spacing 423 | BREAK <- 'break' !IdChar Spacing 424 | CASE <- 'case' !IdChar Spacing 425 | CHAR <- 'char' !IdChar Spacing 426 | CONST <- 'const' !IdChar Spacing 427 | CONTINUE <- 'continue' !IdChar Spacing 428 | DEFAULT <- 'default' !IdChar Spacing 429 | DOUBLE <- 'double' !IdChar Spacing 430 | DO <- 'do' !IdChar Spacing 431 | ELSE <- 'else' !IdChar Spacing 432 | ENUM <- 'enum' !IdChar Spacing 433 | EXTERN <- 'extern' !IdChar Spacing 434 | FLOAT <- 'float' !IdChar Spacing 435 | FOR <- 'for' !IdChar Spacing 436 | GOTO <- 'goto' !IdChar Spacing 437 | IF <- 'if' !IdChar Spacing 438 | INT <- 'int' !IdChar Spacing 439 | INLINE <- 'inline' !IdChar Spacing 440 | LONG <- 'long' !IdChar Spacing 441 | REGISTER <- 'register' !IdChar Spacing 442 | RESTRICT <- 'restrict' !IdChar Spacing 443 | RETURN <- 'return' !IdChar Spacing 444 | SHORT <- 'short' !IdChar Spacing 445 | SIGNED <- 'signed' !IdChar Spacing 446 | SIZEOF <- 'sizeof' !IdChar Spacing 447 | STATIC <- 'static' !IdChar Spacing 448 | STRUCT <- 'struct' !IdChar Spacing 449 | SWITCH <- 'switch' !IdChar Spacing 450 | TYPEDEF <- 'typedef' !IdChar Spacing 451 | UNION <- 'union' !IdChar Spacing 452 | UNSIGNED <- 'unsigned' !IdChar Spacing 453 | VOID <- 'void' !IdChar Spacing 454 | VOLATILE <- 'volatile' !IdChar Spacing 455 | WHILE <- 'while' !IdChar Spacing 456 | BOOL <- '_Bool' !IdChar Spacing 457 | COMPLEX <- '_Complex' !IdChar Spacing 458 | STDCALL <- '_stdcall' !IdChar Spacing 459 | DECLSPEC <- '__declspec' !IdChar Spacing 460 | ATTRIBUTE <- '__attribute__' !IdChar Spacing 461 | 462 | Keyword 463 | <- ( 'auto' 464 | / 'break' 465 | / 'case' 466 | / 'char' 467 | / 'const' 468 | / 'continue' 469 | / 'default' 470 | / 'double' 471 | / 'do' 472 | / 'else' 473 | / 'enum' 474 | / 'extern' 475 | / 'float' 476 | / 'for' 477 | / 'goto' 478 | / 'if' 479 | / 'int' 480 | / 'inline' 481 | / 'long' 482 | / 'register' 483 | / 'restrict' 484 | / 'return' 485 | / 'short' 486 | / 'signed' 487 | / 'sizeof' 488 | / 'static' 489 | / 'struct' 490 | / 'switch' 491 | / 'typedef' 492 | / 'union' 493 | / 'unsigned' 494 | / 'void' 495 | / 'volatile' 496 | / 'while' 497 | / '_Bool' 498 | / '_Complex' 499 | / '_Imaginary' 500 | / '_stdcall' 501 | / '__declspec' 502 | / '__attribute__' 503 | ) 504 | !IdChar 505 | 506 | 507 | #------------------------------------------------------------------------- 508 | # A.1.3 Identifiers 509 | # The standard does not explicitly state that identifiers must be 510 | # distinct from keywords, but it seems so. 511 | #------------------------------------------------------------------------- 512 | 513 | Identifier <- !Keyword IdNondigit IdChar* Spacing #{} 514 | 515 | IdNondigit 516 | <- [a-z] / [A-Z] / [_] 517 | / UniversalCharacter 518 | 519 | IdChar 520 | <- [a-z] / [A-Z] / [0-9] / [_] 521 | / UniversalCharacter 522 | 523 | 524 | #------------------------------------------------------------------------- 525 | # A.1.4 Universal character names 526 | #------------------------------------------------------------------------- 527 | 528 | UniversalCharacter 529 | <- '\\u' HexQuad 530 | / '\\U' HexQuad HexQuad 531 | 532 | HexQuad <- HexDigit HexDigit HexDigit HexDigit 533 | 534 | 535 | #------------------------------------------------------------------------- 536 | # A.1.5 Constants 537 | #------------------------------------------------------------------------- 538 | 539 | Constant 540 | <- FloatConstant 541 | / IntegerConstant # Note: can be a prefix of Float Constant! 542 | / EnumerationConstant 543 | / CharacterConstant 544 | 545 | IntegerConstant 546 | <- ( DecimalConstant 547 | / HexConstant 548 | / OctalConstant 549 | ) 550 | IntegerSuffix? Spacing 551 | 552 | DecimalConstant <- [1-9][0-9]* 553 | 554 | OctalConstant <- '0' [0-7]* 555 | 556 | HexConstant <- HexPrefix HexDigit+ 557 | 558 | HexPrefix <- '0x' / '0X' 559 | 560 | HexDigit <- [a-f] / [A-F] / [0-9] 561 | 562 | IntegerSuffix 563 | <- [uU] Lsuffix? 564 | / Lsuffix [uU]? 565 | 566 | Lsuffix 567 | <- 'll' 568 | / 'LL' 569 | / [lL] 570 | 571 | FloatConstant 572 | <- ( DecimalFloatConstant 573 | / HexFloatConstant 574 | ) 575 | FloatSuffix? Spacing 576 | 577 | DecimalFloatConstant 578 | <- Fraction Exponent? 579 | / [0-9]+ Exponent 580 | 581 | HexFloatConstant 582 | <- HexPrefix HexFraction BinaryExponent? 583 | / HexPrefix HexDigit+ BinaryExponent 584 | 585 | Fraction 586 | <- [0-9]* '.' [0-9]+ 587 | / [0-9]+ '.' 588 | 589 | HexFraction 590 | <- HexDigit* '.' HexDigit+ 591 | / HexDigit+ '.' 592 | 593 | Exponent <- [eE][+\-]? [0-9]+ 594 | 595 | BinaryExponent <- [pP][+\-]? [0-9]+ 596 | 597 | FloatSuffix <- [flFL] 598 | 599 | EnumerationConstant <- Identifier 600 | 601 | CharacterConstant <- 'L'? ['] Char* ['] Spacing 602 | 603 | Char <- Escape / !['\n\\] . 604 | 605 | Escape 606 | <- SimpleEscape 607 | / OctalEscape 608 | / HexEscape 609 | / UniversalCharacter 610 | 611 | SimpleEscape <- '\\' ['\"?\\%abfnrtv] 612 | OctalEscape <- '\\' [0-7][0-7]?[0-7]? 613 | HexEscape <- '\\x' HexDigit+ 614 | 615 | 616 | #------------------------------------------------------------------------- 617 | # A.1.6 String Literals 618 | #------------------------------------------------------------------------- 619 | 620 | StringLiteral <- 'L'? (["] StringChar* ["] Spacing)+ 621 | 622 | StringChar <- Escape / ![\"\n\\] . 623 | 624 | 625 | #------------------------------------------------------------------------- 626 | # A.1.7 Punctuators 627 | #------------------------------------------------------------------------- 628 | 629 | LBRK <- '[' Spacing 630 | RBRK <- ']' Spacing 631 | LPAR <- '(' Spacing 632 | RPAR <- ')' Spacing 633 | LWING <- '{' Spacing 634 | RWING <- '}' Spacing 635 | DOT <- '.' Spacing 636 | PTR <- '->' Spacing 637 | INC <- '++' Spacing 638 | DEC <- '--' Spacing 639 | AND <- '&' ![&] Spacing 640 | STAR <- '*' ![=] Spacing 641 | PLUS <- '+' ![+=] Spacing 642 | MINUS <- '-' ![\-=>] Spacing 643 | TILDA <- '~' Spacing 644 | BANG <- '!' ![=] Spacing 645 | DIV <- '/' ![=] Spacing 646 | MOD <- '%' ![=>] Spacing 647 | LEFT <- '<<' ![=] Spacing 648 | RIGHT <- '>>' ![=] Spacing 649 | LT <- '<' ![=] Spacing 650 | GT <- '>' ![=] Spacing 651 | LE <- '<=' Spacing 652 | GE <- '>=' Spacing 653 | EQUEQU <- '==' Spacing 654 | BANGEQU <- '!=' Spacing 655 | HAT <- '^' ![=] Spacing 656 | OR <- '|' ![=] Spacing 657 | ANDAND <- '&&' Spacing 658 | OROR <- '||' Spacing 659 | QUERY <- '?' Spacing 660 | COLON <- ':' ![>] Spacing 661 | SEMI <- ';' Spacing 662 | ELLIPSIS <- '...' Spacing 663 | EQU <- '=' !"=" Spacing 664 | STAREQU <- '*=' Spacing 665 | DIVEQU <- '/=' Spacing 666 | MODEQU <- '%=' Spacing 667 | PLUSEQU <- '+=' Spacing 668 | MINUSEQU <- '-=' Spacing 669 | LEFTEQU <- '<<=' Spacing 670 | RIGHTEQU <- '>>=' Spacing 671 | ANDEQU <- '&=' Spacing 672 | HATEQU <- '^=' Spacing 673 | OREQU <- '|=' Spacing 674 | COMMA <- ',' Spacing 675 | 676 | EOT <- !. 677 | -------------------------------------------------------------------------------- /grammars/fexl/doc/try.fxl: -------------------------------------------------------------------------------- 1 | #!../bin/fexl 2 | # ^^^ use that line for the locally built version 3 | 4 | #!/usr/bin/fexl 5 | # ^^^ use that line for the installed version 6 | 7 | 8 | # NOTE: If you run ./try.fxl, it will go through a bunch of tests, including 9 | # one at the end where it asks you to type lines of text and terminate with 10 | # Ctrl-D. If you'd like to run the test without having to type anything, and 11 | # compare it with the reference output, do this: 12 | # 13 | # cat try.fxl | ../bin/fexl | cmp - out 14 | # 15 | # That should run quietly with exit code 0. 16 | 17 | 18 | ##### 19 | 20 | # This function halts by simply consuming all arguments given to it. 21 | \halt == (\_ halt) 22 | 23 | # Useful: 24 | \string_from = (\x 25 | string_type x x; 26 | long_type x (long_string x); 27 | double_type x (double_string x); 28 | x) 29 | 30 | \print = (\item string_put (string_from item)) 31 | \nl = (print " 32 | ") 33 | 34 | \T = (\T\F T) 35 | \F = (\T\F F) 36 | 37 | \string_eq=(\x\y string_compare x y F T F) 38 | 39 | \long_le = (\x\y long_compare x y T T F) 40 | \long_lt = (\x\y long_compare x y T F F) 41 | \long_ge = (\x\y long_compare x y F T T) 42 | \long_gt = (\x\y long_compare x y F F T) 43 | \long_ne = (\x\y long_compare x y T F T) 44 | \long_min = (\x\y long_compare x y x x y) 45 | 46 | ### List functions 47 | 48 | # "end" is the empty list. 49 | \end = (\end\item end) 50 | 51 | # The "item" (cons) list constructor is built-in, but could be defined thus: 52 | # \item = (\head\tail \end\item item head tail) 53 | 54 | # Return the first N items of the list. 55 | \list_prefix == (\list\N 56 | long_le N 0 end; 57 | list end \head\tail 58 | \N = (long_sub N 1) 59 | item head; list_prefix tail N 60 | ) 61 | 62 | # Return the item at position N in the list, or default if no such item. 63 | \list_at == (\list\N\default 64 | list default \head\tail 65 | long_compare N 0 default head 66 | \N = (long_sub N 1) 67 | list_at tail N default 68 | ) 69 | 70 | \list_map == (\next\fun\list 71 | list next \head\tail fun head; list_map next fun tail) 72 | 73 | \list_do = (list_map I) 74 | 75 | \list_print = (\fun list_do \x string_put; fun x) 76 | 77 | # We don't use char_put because of buffering problems. 78 | \chars_print = (list_print long_char) 79 | 80 | \bits_print = (list_print \x x "1" "0") 81 | 82 | # Reverse a list. 83 | \reverse=(\list 84 | \reverse==(\list\result list result \h\t reverse t (item h result)) 85 | reverse list end 86 | ) 87 | 88 | ######## 89 | 90 | \test_hello_world== 91 | ( 92 | print "hello world" nl; 93 | ) 94 | 95 | ######## 96 | 97 | \test_cat== 98 | ( 99 | print "=== Enter lines of text and I'll echo them. Press Ctrl-D to stop";nl; 100 | 101 | \long_lt = (\x\y long_compare x y T F F) 102 | 103 | # The cat program echoes the input to the output. 104 | \cat == (char_get \ch long_lt ch 0 I; char_put ch; cat) 105 | cat 106 | ) 107 | 108 | ######## 109 | 110 | \test_string_slice== 111 | ( 112 | print (string_slice "abcde" 0 1); nl; 113 | print (string_slice "abcde" 0 2); nl; 114 | print (string_slice "abcde" 0 3); nl; 115 | print (string_slice "abcde" 0 4); nl; 116 | print (string_slice "abcde" 0 5); nl; 117 | print (string_slice "abcde" 0 6); nl; 118 | print (string_slice "abcde" 0 700); nl; 119 | print (string_slice "a" 0 0); nl; 120 | print (string_slice "a" 0 1); nl; 121 | print (string_slice "a" 0 2); nl; 122 | print (string_slice "a" -1 0); nl; 123 | print "=====";nl; 124 | print (string_slice "a" 0 1); nl; 125 | print (string_slice "a" -1 2); nl; 126 | print (string_slice "a" -2 3); nl; 127 | print (string_slice "a" -2 4); nl; 128 | print (string_slice "a" -2 0); nl; 129 | print (string_slice "abcde" 0 5); nl; 130 | print (string_slice "abcde" -1 5); nl; 131 | print (string_slice "abcde" -2 5); nl; 132 | print (string_slice "abcde" -3 5); nl; 133 | print (string_slice "abcde" -4 5); nl; 134 | print (string_slice "abcde" -5 5); nl; 135 | print (string_slice "abcde" -5 6); nl; 136 | print (string_slice "abcde" -5 7); nl; 137 | print (string_slice "abcde" -5 8); nl; 138 | print (string_slice "abcde" -5 9); nl; 139 | print (string_slice "abcde" -5 10); nl; 140 | print (string_slice "abcde" -5 11); nl; 141 | print "=====";nl; 142 | print (string_slice "" 0 0); nl; 143 | print (string_slice "" 0 800); nl; 144 | print (string_slice "" -30 800); nl; 145 | #string_put (string_from (string_slice "a" 0 1));nl; 146 | #string_put (string_from (string_slice "a" 0 1));nl; 147 | #string_put (string_from (string_slice "a" 0 1));nl; 148 | #string_put (string_from (string_slice "a" 0 1));nl; 149 | 150 | #string_put (string_slice "a" 0 0) 151 | ) 152 | 153 | ######## 154 | 155 | \test_write_binary == 156 | ( 157 | # Writing binary 158 | 159 | \string_3014 = 160 | ( 161 | string_append (long_char 03); 162 | string_append (long_char 00); 163 | string_append (long_char 01); 164 | string_append (long_char 04); 165 | "" 166 | ) 167 | 168 | string_put string_3014; 169 | ) 170 | 171 | ######## 172 | 173 | \test_string_len== 174 | ( 175 | print (string_len ""); nl; 176 | print (string_len "a"); nl; 177 | print (string_len "ab"); nl; 178 | print (string_len "12345678901234567890123456789012"); nl; 179 | ) 180 | 181 | ######## 182 | 183 | \test_string_at== 184 | ( 185 | print (string_at "abc" -1); nl; 186 | print (string_at "abc" 0); nl; 187 | print (string_at "abc" 1); nl; 188 | print (string_at "abc" 2); nl; 189 | print (string_at "abc" 3); nl; 190 | ) 191 | 192 | ######## 193 | 194 | \test_string_compare== 195 | ( 196 | \string_014 = 197 | ( 198 | string_append (long_char 00); 199 | string_append (long_char 01); 200 | string_append (long_char 04); 201 | "" 202 | ) 203 | 204 | \string_041 = 205 | ( 206 | string_append (long_char 00); 207 | string_append (long_char 04); 208 | string_append (long_char 01); 209 | "" 210 | ) 211 | 212 | \string_0142 = (string_append string_014; long_char 02); 213 | 214 | \do_compare=(\x\y\expect 215 | \result = (string_compare x y "LT" "EQ" "GT") 216 | print "string_compare "; print x; print " "; print y; print " "; 217 | print result; print " "; 218 | print (string_eq result expect "GOOD" "BAD"); 219 | nl; 220 | ) 221 | 222 | do_compare string_0142 string_014 "GT"; 223 | do_compare string_014 string_0142 "LT"; 224 | do_compare string_014 string_014 "EQ"; 225 | do_compare string_014 string_041 "LT"; 226 | do_compare string_041 string_014 "GT"; 227 | do_compare string_041 string_0142 "GT"; 228 | ) 229 | 230 | ######## 231 | \test_string_common == 232 | ( 233 | \string_eq=(\x\y string_compare x y F T F) 234 | \long_eq=(\x\y long_compare x y F T F) 235 | 236 | \check = (\value\expect 237 | \halt == (\_ halt) 238 | \ok = (long_eq value expect) 239 | print " "; print (ok "GOOD" "BAD");nl; 240 | ok I halt 241 | ) 242 | 243 | \test_string_common = (\x\y\expect 244 | \len = (string_common x y) 245 | print "string_common ";print x; print " "; print y; print " = "; print len; 246 | check len expect; 247 | ) 248 | 249 | test_string_common "" "" 0; 250 | test_string_common "" "a" 0; 251 | test_string_common "a" "a" 1; 252 | test_string_common "a" "ab" 1; 253 | test_string_common "ab" "a" 1; 254 | test_string_common "ab" "ab" 2; 255 | test_string_common "abc" "abd" 2; 256 | test_string_common "aac" "abd" 1; 257 | test_string_common "abd" "abd" 3; 258 | test_string_common "cbd" "abd" 0; 259 | test_string_common "x" "" 0; 260 | ) 261 | 262 | ######## 263 | 264 | \test_long_add== 265 | ( 266 | \x=(long_add 37 23) 267 | print "The value of x is "; print x; print "."; nl; 268 | ) 269 | 270 | ######## 271 | \test_procedural== 272 | ( 273 | # Make some abbreviations. 274 | \add=double_add 275 | \sub=double_sub 276 | \mul=double_mul 277 | \div=double_div 278 | 279 | 280 | print ~@ 281 | === 282 | Here we demonstrate an ordinary "procedural" style of programming. This works 283 | because definitions are NOT recursive by default. If you want a recursive 284 | definition, you must use "==" instead of just "=". 285 | 286 | @; 287 | 288 | \show=(\name\value print name; print " = "; print value; nl;) 289 | 290 | \x=3.0 291 | \y=4.0 292 | \x=(add x x) 293 | \y=(mul y x) 294 | show "x" x; show "y" y; 295 | \x=(div x; mul y 4.0) 296 | show "x" x; show "y" y; 297 | 298 | \z=(mul x; mul y; add 1.0 y) 299 | show "x" x; show "y" y; show "z" z; 300 | \z=(div z 5.0) 301 | show "z" z; 302 | ) 303 | 304 | \test_eager== 305 | ( 306 | \long_le = (\x\y long_compare x y T T F) 307 | 308 | \sum == (\total\count 309 | long_le count 0 total; 310 | 311 | # This form evaluates eagerly: 312 | \total = (long_add total count) 313 | 314 | # Or if you prefer, you can use "?" to force eager evaluation like this: 315 | #? (long_add total count) \total 316 | 317 | sum total (long_sub count 1)) 318 | 319 | \sum = (sum 0) 320 | 321 | \count = 100000 322 | print "The sum of 1 .. ";print count; print " is "; print (sum count);nl; 323 | ) 324 | 325 | \test_double_compare == 326 | ( 327 | \do_compare=(\x\y\expect 328 | \result = (double_compare x y "LT" "EQ" "GT") 329 | print "double_compare "; print x; print " "; print y; print " "; 330 | print result; print " "; 331 | print (string_eq result expect "GOOD" "BAD"); 332 | nl; 333 | ) 334 | do_compare 23.0 23.0 "EQ" 335 | do_compare 23.0 24.0 "LT" 336 | do_compare 23.1 23.2 "LT" 337 | do_compare 24.0 23.0 "GT" 338 | do_compare 24.0 240.0 "LT" 339 | do_compare -1.0 4.0 "LT" 340 | do_compare 4.0 -1.0 "GT" 341 | do_compare -1.0 -1.0 "EQ" 342 | ) 343 | 344 | ####### Some tests with arbitrary precision arithmetic. 345 | 346 | \module_test_arithmetic == 347 | ( 348 | 349 | # These put a binary digit 0 or 1 on the front of a list. 350 | \d0 = (item F) 351 | \d1 = (item T) 352 | 353 | # the natural numbers 0 and 1 354 | \nat_0 = end 355 | \nat_1 = (d1 nat_0) 356 | 357 | # (nat_2x x) is twice x. 358 | \nat_2x=(\x x nat_0 \_\_ d0 x) 359 | 360 | # (nat_2x1 x) is twice x plus 1. 361 | \nat_2x1=d1 362 | 363 | # (nat_eq0 x) is true iff x = 0 364 | \nat_eq0=(\x x T \_\_ F) 365 | 366 | # (nat_inc x) is x+1. (x incremented by 1). Both x and the result are of 367 | # type nat. 368 | \nat_inc==(\x x nat_1 \b\n b (d0; nat_inc n) (d1 n)) 369 | 370 | # (nat_dec x) is x-1 if x > 0, or 0 if x = 0. (x decremented by 1) Both x 371 | # and the result are of type nat. 372 | \nat_dec==(\x x nat_0 \b\n b (nat_eq0 n nat_0 (d0 n)) (d1; nat_dec n)) 373 | 374 | # (nat_add x y) is x+y. (the sum of x and y) The x, y, and result are of 375 | # type nat. 376 | \nat_add == (\x\y x y \bx\nx y x \by\ny 377 | \sum=(nat_add nx ny) 378 | bx 379 | (by (d0; nat_inc sum) (d1 sum)) 380 | (item by sum) 381 | ) 382 | 383 | # (nat_mul x y) is x*y. (the product of x and y) The x, y, and result are 384 | # of type nat. 385 | \nat_mul == (\x\y x nat_0 \bx\nx y nat_0 \by\ny 386 | bx 387 | (by (d1; nat_add nx (nat_mul ny x)) (d0; nat_mul ny x)) 388 | (by (d0; nat_mul nx y) (d0; d0; nat_mul nx ny)) 389 | ) 390 | 391 | # (int_ge0 x) is true if int x >= 0. 392 | \int_ge0=(\x x T \s\_ s) 393 | 394 | # (int_abs x) is the absolute value of int x. The result is a nat. 395 | \int_abs=(\x x nat_0 \_\n n) 396 | 397 | \int_0 = end 398 | \int_1 = (d1; d1; int_0) 399 | 400 | # (nat_int x) is nat x converted to the int +x. 401 | \nat_int=(\x nat_eq0 x int_0; d1 x) 402 | # (nat_neg x) is nat x converted to the int -x. 403 | \nat_neg=(\x nat_eq0 x int_0; d0 x) 404 | 405 | # (int_2x x) is twice x. 406 | \int_2x=(\x x int_0 \b\n item b; d0; n) 407 | 408 | # (int_inc x) is int x+1. 409 | \int_inc=(\x x int_1 \b\n b (d1; nat_inc n) (nat_neg (nat_dec n))) 410 | 411 | # (int_dec x) is int x-1. 412 | \int_dec=(\x x (d0; nat_1) \b\n b (nat_int (nat_dec n)) (d0; nat_inc n)) 413 | 414 | # (nat_sub x y) is x-y. (x minus y) The x, y are of type nat, but the 415 | # result is of type int because the result might be negative. 416 | \nat_sub==(\x\y x (nat_neg y) \bx\nx y (nat_int x) \by\ny 417 | \z = (int_2x (nat_sub nx ny)) 418 | bx (by I int_inc) (by int_dec I) z 419 | ) 420 | 421 | # (nat_div x y) divides x by y. It yields a pair , where q is the 422 | # quotient and r is the remainder. 423 | # 424 | # The result satisfies the equation x = q*y + r, 0 <= r < y. 425 | # 426 | # NOTE: If you divide by zero, the function yields the pair <0,0>. 427 | 428 | \nat_div==(\x\y\return 429 | x (return nat_0 nat_0) \bx\nx 430 | y (return nat_0 nat_0) \by\ny 431 | by 432 | ( 433 | # divide by odd 434 | nat_div nx y \q\r 435 | \r=(bx nat_2x1 nat_2x r) 436 | \d=(nat_sub r y) 437 | int_ge0 d 438 | (return (nat_2x1 q) (int_abs d)) 439 | (return (nat_2x q) r) 440 | ) 441 | ( 442 | # divide by even 443 | nat_div nx ny \q\r 444 | return q (bx nat_2x1 nat_2x r) 445 | ) 446 | ) 447 | 448 | \nat_compare == (\x\y \lt\eq\gt 449 | x (y eq \_\_ lt) \bx\nx 450 | y gt \by\ny 451 | nat_compare nx ny 452 | lt 453 | (bx (by eq gt) (by lt eq)) 454 | gt 455 | ) 456 | 457 | \nat_le = (\x\y nat_compare x y T T F) 458 | \nat_ge = (\x\y nat_compare x y F T T) 459 | 460 | \nat_2 = (d0 nat_1) 461 | \nat_5 = (d1 nat_2) 462 | \nat_10 = (d0 nat_5) 463 | 464 | \nat_div10 = (\x nat_div x nat_10) 465 | 466 | # Convert a nat into a machine long value, ignoring any overflow. 467 | \nat_long = 468 | ( 469 | \nat_long == (\sum\pow\bits 470 | bits sum \bit\bits 471 | \sum = (bit (long_add pow) I sum) 472 | \pow = (long_mul 2 pow) 473 | nat_long sum pow bits 474 | ) 475 | 476 | nat_long 0 1 477 | ) 478 | 479 | # (nat_base_10_lo n) is the list of ASCII decimal digits for n starting 480 | # with the least significant digit. 481 | \nat_base_10_lo == (\x 482 | nat_div10 x \q\r 483 | \ch = (long_add 48; nat_long r); 484 | item ch; 485 | nat_eq0 q end; 486 | nat_base_10_lo q 487 | ) 488 | 489 | # (nat_base_10 n) is the list of ASCII decimal digits for n starting 490 | # with the most significant digit. 491 | \nat_base_10=(\n reverse; nat_base_10_lo n) 492 | 493 | \nat_print = (\x chars_print (nat_base_10 x)) 494 | \nat_print_lo = (\x chars_print (nat_base_10_lo x)) 495 | 496 | # for testing: 497 | # show in reverse decimal 498 | #\nat_print = nat_print_lo 499 | # show in binary 500 | #\nat_print = bits_print 501 | 502 | \int_base_10 = (\x 503 | int_ge0 x 504 | (nat_base_10; int_abs x) 505 | (item 45; nat_base_10; int_abs x) 506 | ) 507 | 508 | \int_print = (\x chars_print (int_base_10 x)) 509 | 510 | # LATER maybe char constants? e.g. '0' == 48 '-' == 45 511 | # This would be handled in the standard resolution context. It would not be 512 | # part of the grammar. The symbol "'0'" would simply be resolved to the long 513 | # value 48. 514 | 515 | ###### 516 | 517 | \nat_2 = (d0 nat_1) 518 | \nat_3 = (d1 nat_1) 519 | \nat_4 = (d0 nat_2) 520 | \nat_5 = (d1 nat_2) 521 | \nat_6 = (d0 nat_3) 522 | \nat_7 = (d1 nat_3) 523 | \nat_8 = (d0 nat_4) 524 | \nat_9 = (d1 nat_4) 525 | \nat_10 = (d0 nat_5) 526 | \nat_11 = (d1 nat_5) 527 | \nat_12 = (d0 nat_6) 528 | \nat_13 = (d1 nat_6) 529 | \nat_14 = (d0 nat_7) 530 | \nat_15 = (d1 nat_7) 531 | \nat_16 = (d0 nat_8) 532 | \nat_17 = (d1 nat_8) 533 | \nat_18 = (d0 nat_9) 534 | \nat_19 = (d1 nat_9) 535 | \nat_32 = (d0 nat_16) 536 | \nat_20 = (d0 nat_10) 537 | \nat_24 = (d0 nat_12) 538 | \nat_31 = (d1 nat_15) 539 | \nat_48 = (d0 nat_24) 540 | \nat_49 = (d1 nat_24) 541 | 542 | #### 543 | 544 | \test_fibonacci == 545 | ( 546 | 547 | # This lets you use either built-in arithmetic or arbitrary-precision 548 | # arithmetic. 549 | 550 | \test_case == 551 | ( 552 | \if_show_all 553 | \number_type 554 | \num_rounds 555 | 556 | if_show_all 557 | ( 558 | print "Print the first ";print num_rounds; print " Fibonacci numbers "; 559 | print "using number type ";print number_type;nl; 560 | ) 561 | ( 562 | print "Print the Fibonacci number at position ";print num_rounds; 563 | print " using number type ";print number_type;nl; 564 | ) 565 | 566 | \choose = 567 | ( 568 | \return 569 | \case = (string_eq number_type) 570 | case "double" (return print double_add 1.0); 571 | case "nat" (return nat_print nat_add nat_1); 572 | halt 573 | ) 574 | 575 | choose \num_print \num_add \num_1 576 | 577 | \nums_print = (list_do \x num_print x; nl) 578 | 579 | # Produces the infinite list of all Fibonacci numbers. 580 | \fibonacci = 581 | ( 582 | \1 583 | \add 584 | 585 | \fibonacci == (\x\y 586 | item x; 587 | \z = (add x y) 588 | fibonacci y z 589 | ) 590 | 591 | fibonacci 1 1 592 | ) 593 | 594 | \fibonacci = (fibonacci num_1 num_add) 595 | 596 | if_show_all 597 | (nums_print; list_prefix fibonacci num_rounds) 598 | (num_print; list_at fibonacci (long_sub num_rounds 1) num_1) 599 | nl; 600 | ) 601 | 602 | #test_case T "nat" 200; 603 | #test_case T "nat" 2000; 604 | #test_case T "nat" 1; 605 | #test_case T "nat" 2; 606 | #test_case T "nat" 3; 607 | #test_case F "nat" 4; 608 | #test_case F "double" 4; 609 | #test_case F "nat" 1000; 610 | #test_case F "nat" 100; 611 | #test_case T "nat" 100; 612 | #test_case F "double" 1000; 613 | #test_case F "nat" 10000; 614 | #test_case T "nat" 100; 615 | #test_case T "nat" 10; 616 | #test_case F "nat" 500; 617 | 618 | #test_case T "double" 200; 619 | #test_case T "nat" 200; 620 | #test_case F "nat" 200; 621 | #test_case F "nat" 2000; 622 | 623 | test_case T "nat" 300; 624 | test_case F "nat" 1600; # 10.208s 625 | ) 626 | 627 | #### 628 | \test_binary_counter == 629 | ( 630 | 631 | \loop == 632 | ( 633 | \count 634 | \num 635 | long_le count 0 I; 636 | print (nat_long num); print " "; 637 | bits_print num; 638 | nl; 639 | \count = (long_sub count 1) 640 | \num = (nat_inc num) 641 | loop count num 642 | ) 643 | 644 | loop 50 nat_0 645 | ) 646 | 647 | \test_divide== 648 | ( 649 | # LATER automatically check the constraints 650 | \test_div = (\x\y 651 | nat_div x y \q\r 652 | print "test_div";nl; 653 | \show=(\key\val print key;print " = "; nat_print val; nl;) 654 | show "x" x; 655 | show "y" y; 656 | show "q" q; 657 | show "r" r; 658 | nl; 659 | ) 660 | 661 | test_div nat_0 nat_0; 662 | test_div nat_0 nat_1; 663 | test_div nat_1 nat_0; 664 | test_div nat_1 nat_1; 665 | test_div nat_2 nat_1; 666 | 667 | test_div nat_0 nat_2; 668 | test_div nat_1 nat_2; 669 | test_div nat_2 nat_2; 670 | test_div nat_3 nat_2; 671 | test_div nat_4 nat_2; 672 | 673 | test_div nat_0 nat_3; 674 | test_div nat_1 nat_3; 675 | test_div nat_2 nat_3; 676 | test_div nat_3 nat_3; 677 | test_div nat_4 nat_3; 678 | test_div nat_5 nat_3; 679 | test_div nat_6 nat_3; 680 | test_div nat_7 nat_3; 681 | test_div nat_8 nat_3; 682 | test_div nat_9 nat_3; 683 | test_div nat_10 nat_3; 684 | test_div nat_11 nat_3; 685 | test_div nat_12 nat_3; 686 | 687 | test_div nat_0 nat_4; 688 | test_div nat_1 nat_4; 689 | test_div nat_2 nat_4; 690 | test_div nat_3 nat_4; 691 | test_div nat_4 nat_4; 692 | test_div nat_5 nat_4; 693 | test_div nat_6 nat_4; 694 | test_div nat_7 nat_4; 695 | test_div nat_8 nat_4; 696 | test_div nat_9 nat_4; 697 | test_div nat_10 nat_4; 698 | test_div nat_11 nat_4; 699 | test_div nat_12 nat_4; 700 | test_div nat_12 nat_4; 701 | 702 | test_div nat_0 nat_5; 703 | test_div nat_1 nat_5; 704 | test_div nat_2 nat_5; 705 | test_div nat_3 nat_5; 706 | test_div nat_4 nat_5; 707 | test_div nat_5 nat_5; 708 | test_div nat_6 nat_5; 709 | test_div nat_7 nat_5; 710 | test_div nat_8 nat_5; 711 | test_div nat_9 nat_5; 712 | test_div nat_10 nat_5; 713 | test_div nat_11 nat_5; 714 | test_div nat_12 nat_5; 715 | test_div nat_13 nat_5; 716 | test_div nat_14 nat_5; 717 | test_div nat_15 nat_5; 718 | test_div nat_16 nat_5; 719 | test_div nat_17 nat_5; 720 | test_div nat_18 nat_5; 721 | test_div nat_19 nat_5; 722 | 723 | \big_test = 724 | ( 725 | \next 726 | \x = (nat_mul nat_31 nat_19) 727 | \churn = (\x nat_add nat_17; nat_mul x x) 728 | \x = (churn x) 729 | \x = (churn x) 730 | \x = (churn x) 731 | \x = (churn x) 732 | \x = (churn x) 733 | \x = (churn x) 734 | \y =nat_10 735 | \y=(nat_mul y y) 736 | 737 | test_div x y; 738 | test_div (churn x) (churn; churn; churn; churn; churn; churn y); 739 | next 740 | ) 741 | 742 | big_test 743 | #big_test; 744 | #big_test; 745 | #big_test; 746 | #big_test; 747 | ) 748 | 749 | \test_sub == 750 | ( 751 | \test_sub = (\x\y 752 | \z = (nat_sub x y) 753 | print "== test_sub: "; 754 | nat_print x; 755 | print " - "; 756 | nat_print y; 757 | print " = "; 758 | int_print z;nl; 759 | ) 760 | 761 | test_sub nat_0 nat_0 762 | test_sub nat_1 nat_0 763 | test_sub nat_2 nat_0 764 | test_sub nat_3 nat_0 765 | test_sub nat_4 nat_0 766 | 767 | test_sub nat_1 nat_1 768 | test_sub nat_0 nat_1 769 | 770 | test_sub nat_0 nat_2 771 | test_sub nat_1 nat_2 772 | test_sub nat_2 nat_2 773 | test_sub nat_3 nat_2 774 | test_sub nat_4 nat_2 775 | 776 | test_sub nat_0 nat_3 777 | test_sub nat_1 nat_3 778 | test_sub nat_2 nat_3 779 | test_sub nat_3 nat_3 780 | test_sub nat_4 nat_3 781 | test_sub nat_5 nat_3 782 | test_sub nat_6 nat_3 783 | 784 | test_sub nat_0 nat_4 785 | test_sub nat_1 nat_4 786 | test_sub nat_2 nat_4 787 | test_sub nat_3 nat_4 788 | test_sub nat_4 nat_4 789 | test_sub nat_5 nat_4 790 | test_sub nat_6 nat_4 791 | test_sub nat_7 nat_4 792 | 793 | test_sub nat_0 nat_5 794 | test_sub nat_1 nat_5 795 | test_sub nat_2 nat_5 796 | test_sub nat_3 nat_5 797 | test_sub nat_4 nat_5 798 | test_sub nat_5 nat_5 799 | test_sub nat_6 nat_5 800 | test_sub nat_7 nat_5 801 | test_sub nat_8 nat_5 802 | test_sub nat_9 nat_5 803 | 804 | test_sub nat_3 nat_19 805 | test_sub nat_19 nat_19 806 | test_sub nat_49 nat_19 807 | test_sub nat_48 nat_19 808 | ) 809 | 810 | \return return test_fibonacci test_binary_counter test_divide 811 | test_sub 812 | ) 813 | 814 | module_test_arithmetic 815 | \test_fibonacci \test_binary_counter \test_divide \test_sub 816 | 817 | ######## 818 | 819 | # Choose your test(s) to run down here. Comment the ones don't want to run. 820 | 821 | \test_string_type== 822 | ( 823 | \test_case=(\x\expect 824 | \result = (string_type x "yes" "no") 825 | print "string_type "; print result; 826 | print " ["; 827 | print (string_eq result expect "GOOD" "BAD"); 828 | print "]"; 829 | nl; 830 | ) 831 | 832 | test_case 4 "no" 833 | test_case 2.3 "no" 834 | test_case (\x\y y x) "no" 835 | test_case C "no" 836 | test_case (string_append "hello " "world") "yes" 837 | test_case ((\x\y y x) "hi" I) "yes" 838 | test_case "hey!" "yes" 839 | ) 840 | 841 | \test_double_type== 842 | ( 843 | \test_case=(\x\expect 844 | \result = (double_type x "yes" "no") 845 | print "double_type "; print result; 846 | print " ["; 847 | print (string_eq result expect "GOOD" "BAD"); 848 | print "]"; 849 | nl; 850 | ) 851 | 852 | test_case 4 "no" 853 | test_case 2.3 "yes" 854 | test_case (\x\y y x) "no" 855 | test_case C "no" 856 | test_case (string_append "hello " "world") "no" 857 | test_case ((\x\y y x) (double_add 4.2 2.6) I) "yes" 858 | test_case "hey!" "no" 859 | ) 860 | 861 | \test_long_type== 862 | ( 863 | \test_case=(\x\expect 864 | \result = (long_type x "yes" "no") 865 | print "long_type "; print result; 866 | print " ["; 867 | print (string_eq result expect "GOOD" "BAD"); 868 | print "]"; 869 | nl; 870 | ) 871 | 872 | test_case 4 "yes" 873 | test_case 2.3 "no" 874 | test_case (\x\y y x) "no" 875 | test_case C "no" 876 | test_case (string_append "hello " "world") "no" 877 | test_case ((\x\y y x) (long_add 4 2) I) "yes" 878 | test_case "hey!" "no" 879 | ) 880 | 881 | \test_string_long == 882 | ( 883 | \test_case=(\x\expect 884 | \quote = ~@ "@ 885 | \result = (string_long x "no" \n string_append "yes " (long_string n)) 886 | print "string_long "; 887 | string_put quote; string_put x; string_put quote 888 | print " : "; print result; 889 | print " ["; 890 | print (string_eq result expect "GOOD" "BAD"); 891 | print "]"; 892 | nl; 893 | ) 894 | 895 | test_case "0" "yes 0" 896 | test_case "1" "yes 1" 897 | test_case "-1" "yes -1" 898 | test_case "123" "yes 123" 899 | test_case "-123" "yes -123" 900 | test_case "x123" "no" 901 | test_case "1x23" "no" 902 | test_case "" "no" 903 | test_case " 456 " "no" 904 | test_case "456 " "no" 905 | test_case "1.6" "no" 906 | test_case "0." "no" 907 | ) 908 | 909 | \test_string_double == 910 | ( 911 | \test_case=(\x\expect 912 | \quote = ~@ "@ 913 | \result = (string_double x "no" \n string_append "yes " (double_string n)) 914 | print "string_double "; 915 | string_put quote; string_put x; string_put quote 916 | print " : "; print result; 917 | print " ["; 918 | print (string_eq result expect "GOOD" "BAD"); 919 | print "]"; 920 | nl; 921 | ) 922 | 923 | test_case "0" "yes 0" 924 | test_case "1" "yes 1" 925 | test_case "-1" "yes -1" 926 | test_case "123" "yes 123" 927 | test_case "-123" "yes -123" 928 | test_case "x123" "no" 929 | test_case "1x23" "no" 930 | test_case "" "no" 931 | test_case " 456 " "no" 932 | test_case " 456.78 " "no" 933 | test_case "456.78" "yes 456.78" 934 | test_case "456 " "no" 935 | test_case "1.6" "yes 1.6" 936 | test_case "0." "yes 0" 937 | test_case "-0" "yes -0" 938 | test_case "-0.0" "yes -0" 939 | test_case "-0.0123" "yes -0.0123" 940 | ) 941 | 942 | \test_long_double == 943 | ( 944 | \test_case = ( 945 | \x 946 | \y = (long_double x) 947 | 948 | \x_str = (long_string x) 949 | \y_str = (double_string y) 950 | 951 | print "long x = "; string_put x_str; 952 | print " double y = "; string_put y_str;nl; 953 | ) 954 | 955 | test_case 4 956 | test_case 0 957 | test_case -1 958 | test_case -37 959 | test_case 126478 960 | ) 961 | 962 | \test_double_long == 963 | ( 964 | \test_case = ( 965 | \x 966 | \y = (double_long x) 967 | 968 | \x_str = (double_string x) 969 | \y_str = (long_string y) 970 | 971 | print "double x = "; string_put x_str; 972 | print " long y = "; string_put y_str;nl; 973 | ) 974 | 975 | test_case 4.0 976 | test_case 0.0 977 | test_case -1.0 978 | test_case -37.0 979 | test_case 126478.0 980 | test_case 4.3 981 | test_case 0.3 982 | test_case -1.3 983 | test_case -37.3 984 | test_case 126478.3 985 | test_case 4.9 986 | test_case 0.9 987 | test_case -1.9 988 | test_case -37.9 989 | test_case 126478.9 990 | test_case -126478.9 991 | ) 992 | 993 | #### 994 | 995 | test_string_type 996 | test_double_type 997 | test_long_type 998 | 999 | test_long_double 1000 | test_double_long 1001 | 1002 | test_string_long 1003 | test_string_double 1004 | 1005 | test_hello_world 1006 | test_string_slice 1007 | test_write_binary 1008 | test_string_len 1009 | test_string_at 1010 | test_string_compare 1011 | test_string_common 1012 | test_long_add 1013 | test_double_compare 1014 | test_procedural 1015 | test_eager 1016 | 1017 | test_binary_counter; 1018 | test_divide; 1019 | test_sub; 1020 | test_fibonacci 1021 | test_cat 1022 | 1023 | \\Extra stuff down here becomes input 1024 | to the test_cat function. 1025 | -------------------------------------------------------------------------------- /grammars/java/java_1_7.peg: -------------------------------------------------------------------------------- 1 | #=========================================================================== 2 | # 3 | # Parsing Expression Grammar for Java 1.7 for Mouse 1.1 - 1.5. 4 | # Based on Chapters 3 and 18 of Java Language Specification, Third Edition, 5 | # at http://java.sun.com/docs/books/jls/third_edition/html/j3TOC.html, 6 | # and description of Java SE 7 enhancements in 7 | # http://download.java.net/jdk7/docs/technotes/guides/language/enhancements.html. 8 | # 9 | #--------------------------------------------------------------------------- 10 | # 11 | # Copyright (C) 2006, 2009, 2010, 2011 12 | # by Roman R Redziejowski(www.romanredz.se). 13 | # 14 | # The author gives unlimited permission to copy and distribute 15 | # this file, with or without modifications, as long as this notice 16 | # is preserved, and any changes are properly documented. 17 | # 18 | # This file is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 21 | # 22 | #--------------------------------------------------------------------------- 23 | # 24 | # Latest update 2011-07-21 25 | # 26 | #--------------------------------------------------------------------------- 27 | # 28 | # Change log 29 | # 2006-12-06 Posted on Internet. 30 | # 2009-04-04 Modified to conform to Mouse syntax: 31 | # Underscore removed from names 32 | # \f in Space replaced by Unicode for FormFeed. 33 | # 2009-07-10 Unused rule THREADSAFE removed. 34 | # 2009-07-10 Copying and distribution conditions relaxed by the author. 35 | # 2010-07-01 Updated Mouse version in the comment. 36 | # 2010-09-15 Updated comment on Java release. 37 | # 2010-09-18 Updated list of reserved words ("keywords") according to 38 | # JLS 3.9: added "const" and "goto", removed "threadsafe". 39 | # 2010-09-18 Removed superfluous "?" everywhere after "Spacing". 40 | # 2010-10-05 Removed erroneous "TypeArguments?" from "EnumConstant". 41 | # See JLS 8.9, JLS 18.1. 42 | # NB. "Annotations" are optional, but not shown as such in JLS. 43 | # 2010-10-20 Corrected "FormalParameterList" according to JLS 8.4.1. 44 | # NB. "VariableModifiers" in "FormalParameter" and "LastFormalParameter" 45 | # are optional, but not shown as such in JLS. 46 | # 2010-10-20 Corrected "Annotation" according to JLS 9.7. 47 | # Is incorrect in JLS 18.1 (does not allow list of value pairs). 48 | # 2010-10-20 Corrected "LocalVariableDeclarationStatement". 49 | # Is incorrect in JLS 18.1: only FINAL allowed as "VariableModifier". 50 | # Is incorrect in JLS 14.4: "VariableModifiers" not shown as optional. 51 | # 2010-10-20 Corrected "AnnotationTypeElementRest": added SEMI as last alternative. 52 | # See JLS 9.6. NB. Missing in JLS 18.1. 53 | # 2010-10-20 Moved "Identifier" from "AnnotationTypeElementRest" to 54 | # "AnnotationMethodRest". Was incorrect in JLS 18.1. 55 | # 2010-10-21 Inverted order of alternatives in "HexSignificand". 56 | # 2010-10-24 Corrected previous correction: moved SEMI from 57 | # "AnnotationTypeElementRest" to "AnnotationTypeElementDeclaration". 58 | # 2010-10-25 Repeated "u" allowed in UnicodeEscape (JLS 3.3). 59 | # Line terminators not allowed in StringLiteral (JLS 3.10.5). 60 | # (Found thanks to Java PEG for Parboiled, which in turn credits 61 | # Reinier Zwitserloot for finding it.) 62 | # 2011-07-19 Added SEMI after "VariableDeclarators" in "MemberDecl" (JLS 8.3). 63 | # 2011-07-21 Corrected "ArrayInitializer" to allow for "{,}" (JLS 10.6). 64 | # 65 | #--------------------------------------------------------------------------- 66 | # 67 | # Changes for Java 1.7 68 | # 2011-07-18 Implemented Binary Literals: added "BinaryNumeral". 69 | # 2011-07-19 Implemented Underscores in Numerical Literals: 70 | # Added "Digits" and "HexDigits". Removed "Digit". 71 | # Modified "DecimalNumeral", "HexNumeral", "BinaryNumeral", 72 | # "OctalNumeral", "DecimalFloat", "Exponent", 73 | # "HexSignificand", and "BinaryExponent". 74 | # 2011-07-20 Implemented Type Inference for Generic Instance Creation: 75 | # Added "Diamond". 76 | # Modified "ClassCreatorRest" by adding "Diamond?". 77 | # 2011-07-20 Implemented try-with-resources Statement: 78 | # Added try-with-resources as an alternative of "Statement". 79 | # Added "Resource". (Based on comments to JavacParser). 80 | # 2011-07-20 Implemented catching of multiple exceptions: 81 | # Modified "Catch" to allow multiple exception types. 82 | # Based on a pure guess. 83 | # 84 | #--------------------------------------------------------------------------- 85 | # 86 | # 2013-02-16 Modified to work with github.com/pointlander/peg 87 | # 88 | #=========================================================================== 89 | 90 | #------------------------------------------------------------------------- 91 | # Compilation Unit 92 | #------------------------------------------------------------------------- 93 | 94 | package java 95 | 96 | type Java Peg { 97 | 98 | } 99 | 100 | CompilationUnit <- Spacing PackageDeclaration? ImportDeclaration* TypeDeclaration* EOT 101 | PackageDeclaration <- Annotation* PACKAGE QualifiedIdentifier SEMI 102 | ImportDeclaration <- IMPORT STATIC? QualifiedIdentifier (DOT STAR)? SEMI 103 | 104 | TypeDeclaration <- Modifier* (ClassDeclaration 105 | / EnumDeclaration 106 | / InterfaceDeclaration 107 | / AnnotationTypeDeclaration) 108 | / SEMI 109 | 110 | #------------------------------------------------------------------------- 111 | # Class Declaration 112 | #------------------------------------------------------------------------- 113 | 114 | ClassDeclaration <- CLASS Identifier TypeParameters? (EXTENDS ClassType)? (IMPLEMENTS ClassTypeList)? ClassBody 115 | 116 | ClassBody <- LWING ClassBodyDeclaration* RWING 117 | 118 | ClassBodyDeclaration 119 | <- SEMI 120 | / STATIC? Block # Static or Instance Initializer 121 | / Modifier* MemberDecl # ClassMemberDeclaration 122 | 123 | MemberDecl 124 | <- TypeParameters GenericMethodOrConstructorRest # Generic Method or Constructor 125 | / Type Identifier MethodDeclaratorRest # Method 126 | / Type VariableDeclarators SEMI # Field 127 | / VOID Identifier VoidMethodDeclaratorRest # Void method 128 | / Identifier ConstructorDeclaratorRest # Constructor 129 | / InterfaceDeclaration # Interface 130 | / ClassDeclaration # Class 131 | / EnumDeclaration # Enum 132 | / AnnotationTypeDeclaration # Annotation 133 | 134 | GenericMethodOrConstructorRest 135 | <- (Type / VOID) Identifier MethodDeclaratorRest 136 | / Identifier ConstructorDeclaratorRest 137 | 138 | MethodDeclaratorRest 139 | <- FormalParameters Dim* (THROWS ClassTypeList)? (MethodBody / SEMI) 140 | 141 | VoidMethodDeclaratorRest 142 | <- FormalParameters (THROWS ClassTypeList)? (MethodBody / SEMI) 143 | 144 | ConstructorDeclaratorRest 145 | <- FormalParameters (THROWS ClassTypeList)? MethodBody 146 | 147 | MethodBody 148 | <- Block 149 | 150 | #------------------------------------------------------------------------- 151 | # Interface Declaration 152 | #------------------------------------------------------------------------- 153 | 154 | InterfaceDeclaration 155 | <- INTERFACE Identifier TypeParameters? (EXTENDS ClassTypeList)? InterfaceBody 156 | 157 | InterfaceBody 158 | <- LWING InterfaceBodyDeclaration* RWING 159 | 160 | InterfaceBodyDeclaration 161 | <- Modifier* InterfaceMemberDecl 162 | / SEMI 163 | 164 | InterfaceMemberDecl 165 | <- InterfaceMethodOrFieldDecl 166 | / InterfaceGenericMethodDecl 167 | / VOID Identifier VoidInterfaceMethodDeclaratorRest 168 | / InterfaceDeclaration 169 | / AnnotationTypeDeclaration 170 | / ClassDeclaration 171 | / EnumDeclaration 172 | 173 | InterfaceMethodOrFieldDecl 174 | <- Type Identifier InterfaceMethodOrFieldRest 175 | 176 | InterfaceMethodOrFieldRest 177 | <- ConstantDeclaratorsRest SEMI 178 | / InterfaceMethodDeclaratorRest 179 | 180 | InterfaceMethodDeclaratorRest 181 | <- FormalParameters Dim* (THROWS ClassTypeList)? SEMI 182 | 183 | InterfaceGenericMethodDecl 184 | <- TypeParameters (Type / VOID) Identifier InterfaceMethodDeclaratorRest 185 | 186 | VoidInterfaceMethodDeclaratorRest 187 | <- FormalParameters (THROWS ClassTypeList)? SEMI 188 | 189 | ConstantDeclaratorsRest 190 | <- ConstantDeclaratorRest (COMMA ConstantDeclarator)* 191 | 192 | ConstantDeclarator 193 | <- Identifier ConstantDeclaratorRest 194 | 195 | ConstantDeclaratorRest 196 | <- Dim* EQU VariableInitializer 197 | 198 | #------------------------------------------------------------------------- 199 | # Enum Declaration 200 | #------------------------------------------------------------------------- 201 | 202 | EnumDeclaration 203 | <- ENUM Identifier (IMPLEMENTS ClassTypeList)? EnumBody 204 | 205 | EnumBody 206 | <- LWING EnumConstants? COMMA? EnumBodyDeclarations? RWING 207 | 208 | EnumConstants 209 | <- EnumConstant (COMMA EnumConstant)* 210 | 211 | EnumConstant 212 | <- Annotation* Identifier Arguments? ClassBody? 213 | 214 | EnumBodyDeclarations 215 | <- SEMI ClassBodyDeclaration* 216 | 217 | #------------------------------------------------------------------------- 218 | # Variable Declarations 219 | #------------------------------------------------------------------------- 220 | 221 | LocalVariableDeclarationStatement 222 | <- (FINAL / Annotation)* Type VariableDeclarators SEMI 223 | 224 | VariableDeclarators 225 | <- VariableDeclarator (COMMA VariableDeclarator)* 226 | 227 | VariableDeclarator 228 | <- Identifier Dim* (EQU VariableInitializer)? 229 | 230 | #------------------------------------------------------------------------- 231 | # Formal Parameters 232 | #------------------------------------------------------------------------- 233 | 234 | FormalParameters 235 | <- LPAR FormalParameterList? RPAR 236 | 237 | FormalParameter 238 | <- (FINAL / Annotation)* Type VariableDeclaratorId 239 | 240 | LastFormalParameter 241 | <- (FINAL / Annotation)* Type ELLIPSIS VariableDeclaratorId 242 | 243 | FormalParameterList 244 | <- FormalParameter (COMMA FormalParameter)* (COMMA LastFormalParameter)? 245 | / LastFormalParameter 246 | 247 | VariableDeclaratorId 248 | <- Identifier Dim* 249 | 250 | #------------------------------------------------------------------------- 251 | # Statements 252 | #------------------------------------------------------------------------- 253 | 254 | Block 255 | <- LWING BlockStatements RWING 256 | 257 | BlockStatements 258 | <- BlockStatement* 259 | 260 | BlockStatement 261 | <- LocalVariableDeclarationStatement 262 | / Modifier* 263 | ( ClassDeclaration 264 | / EnumDeclaration 265 | ) 266 | / Statement 267 | 268 | Statement 269 | <- Block 270 | / ASSERT Expression (COLON Expression)? SEMI 271 | / IF ParExpression Statement (ELSE Statement)? 272 | / FOR LPAR ForInit? SEMI Expression? SEMI ForUpdate? RPAR Statement 273 | / FOR LPAR FormalParameter COLON Expression RPAR Statement 274 | / WHILE ParExpression Statement 275 | / DO Statement WHILE ParExpression SEMI 276 | / TRY LPAR Resource (SEMI Resource)* SEMI? RPAR Block Catch* Finally? 277 | / TRY Block (Catch+ Finally? / Finally) 278 | / SWITCH ParExpression LWING SwitchBlockStatementGroups RWING 279 | / SYNCHRONIZED ParExpression Block 280 | / RETURN Expression? SEMI 281 | / THROW Expression SEMI 282 | / BREAK Identifier? SEMI 283 | / CONTINUE Identifier? SEMI 284 | / SEMI 285 | / StatementExpression SEMI 286 | / Identifier COLON Statement 287 | 288 | Resource 289 | <- Modifier* Type VariableDeclaratorId EQU Expression 290 | 291 | Catch 292 | <- CATCH LPAR (FINAL / Annotation)* Type (OR Type)* VariableDeclaratorId RPAR Block 293 | 294 | Finally 295 | <- FINALLY Block 296 | 297 | SwitchBlockStatementGroups 298 | <- SwitchBlockStatementGroup* 299 | 300 | SwitchBlockStatementGroup 301 | <- SwitchLabel BlockStatements 302 | 303 | SwitchLabel 304 | <- CASE ConstantExpression COLON 305 | / CASE EnumConstantName COLON 306 | / DEFAULT COLON 307 | 308 | ForInit 309 | <- (FINAL / Annotation)* Type VariableDeclarators 310 | / StatementExpression (COMMA StatementExpression)* 311 | 312 | ForUpdate 313 | <- StatementExpression (COMMA StatementExpression)* 314 | 315 | EnumConstantName 316 | <- Identifier 317 | 318 | #------------------------------------------------------------------------- 319 | # Expressions 320 | #------------------------------------------------------------------------- 321 | 322 | StatementExpression 323 | <- Expression 324 | 325 | # This is more generous than definition in section 14.8, which allows only 326 | # specific forms of Expression. 327 | 328 | 329 | ConstantExpression 330 | <- Expression 331 | 332 | Expression 333 | <- ConditionalExpression (AssignmentOperator ConditionalExpression)* 334 | 335 | # This definition is part of the modification in JLS Chapter 18 336 | # to minimize look ahead. In JLS Chapter 15.27, Expression is defined 337 | # as AssignmentExpression, which is effectively defined as 338 | # (LeftHandSide AssignmentOperator)* ConditionalExpression. 339 | # The above is obtained by allowing ANY ConditionalExpression 340 | # as LeftHandSide, which results in accepting statements like 5 = a. 341 | 342 | 343 | AssignmentOperator 344 | <- EQU 345 | / PLUSEQU 346 | / MINUSEQU 347 | / STAREQU 348 | / DIVEQU 349 | / ANDEQU 350 | / OREQU 351 | / HATEQU 352 | / MODEQU 353 | / SLEQU 354 | / SREQU 355 | / BSREQU 356 | 357 | ConditionalExpression 358 | <- ConditionalOrExpression (QUERY Expression COLON ConditionalOrExpression)* 359 | 360 | ConditionalOrExpression 361 | <- ConditionalAndExpression (OROR ConditionalAndExpression)* 362 | 363 | ConditionalAndExpression 364 | <- InclusiveOrExpression (ANDAND InclusiveOrExpression)* 365 | 366 | InclusiveOrExpression 367 | <- ExclusiveOrExpression (OR ExclusiveOrExpression)* 368 | 369 | ExclusiveOrExpression 370 | <- AndExpression (HAT AndExpression)* 371 | 372 | AndExpression 373 | <- EqualityExpression (AND EqualityExpression)* 374 | 375 | EqualityExpression 376 | <- RelationalExpression ((EQUAL / NOTEQUAL) RelationalExpression)* 377 | 378 | RelationalExpression 379 | <- ShiftExpression ((LE / GE / LT / GT) ShiftExpression / INSTANCEOF ReferenceType)* 380 | 381 | ShiftExpression 382 | <- AdditiveExpression ((SL / SR / BSR) AdditiveExpression)* 383 | 384 | AdditiveExpression 385 | <- MultiplicativeExpression ((PLUS / MINUS) MultiplicativeExpression)* 386 | 387 | MultiplicativeExpression 388 | <- UnaryExpression ((STAR / DIV / MOD) UnaryExpression)* 389 | 390 | UnaryExpression 391 | <- PrefixOp UnaryExpression 392 | / LPAR Type RPAR UnaryExpression 393 | / Primary (Selector)* (PostfixOp)* 394 | 395 | Primary 396 | <- ParExpression 397 | / NonWildcardTypeArguments (ExplicitGenericInvocationSuffix / THIS Arguments) 398 | / THIS Arguments? 399 | / SUPER SuperSuffix 400 | / Literal 401 | / NEW Creator 402 | / QualifiedIdentifier IdentifierSuffix? 403 | / BasicType Dim* DOT CLASS 404 | / VOID DOT CLASS 405 | 406 | IdentifierSuffix 407 | <- LBRK ( RBRK Dim* DOT CLASS / Expression RBRK) 408 | / Arguments 409 | / DOT 410 | ( CLASS 411 | / ExplicitGenericInvocation 412 | / THIS 413 | / SUPER Arguments 414 | / NEW NonWildcardTypeArguments? InnerCreator 415 | ) 416 | 417 | ExplicitGenericInvocation 418 | <- NonWildcardTypeArguments ExplicitGenericInvocationSuffix 419 | 420 | NonWildcardTypeArguments 421 | <- LPOINT ReferenceType (COMMA ReferenceType)* RPOINT 422 | 423 | ExplicitGenericInvocationSuffix 424 | <- SUPER SuperSuffix 425 | / Identifier Arguments 426 | 427 | PrefixOp 428 | <- INC 429 | / DEC 430 | / BANG 431 | / TILDA 432 | / PLUS 433 | / MINUS 434 | 435 | PostfixOp 436 | <- INC 437 | / DEC 438 | 439 | Selector 440 | <- DOT Identifier Arguments? 441 | / DOT ExplicitGenericInvocation 442 | / DOT THIS 443 | / DOT SUPER SuperSuffix 444 | / DOT NEW NonWildcardTypeArguments? InnerCreator 445 | / DimExpr 446 | 447 | SuperSuffix 448 | <- Arguments 449 | / DOT Identifier Arguments? 450 | 451 | BasicType 452 | <- ( 'byte' 453 | / 'short' 454 | / 'char' 455 | / 'int' 456 | / 'long' 457 | / 'float' 458 | / 'double' 459 | / 'boolean' 460 | ) !LetterOrDigit Spacing 461 | 462 | Arguments 463 | <- LPAR (Expression (COMMA Expression)*)? RPAR 464 | 465 | Creator 466 | <- NonWildcardTypeArguments? CreatedName ClassCreatorRest 467 | / NonWildcardTypeArguments? (ClassType / BasicType) ArrayCreatorRest 468 | 469 | CreatedName 470 | <- Identifier NonWildcardTypeArguments? (DOT Identifier NonWildcardTypeArguments?)* 471 | 472 | InnerCreator 473 | <- Identifier ClassCreatorRest 474 | 475 | ArrayCreatorRest 476 | <- LBRK ( RBRK Dim* ArrayInitializer / Expression RBRK DimExpr* Dim* ) 477 | 478 | # This is more generous than JLS 15.10. According to that definition, 479 | # BasicType must be followed by at least one DimExpr or by ArrayInitializer. 480 | 481 | 482 | ClassCreatorRest 483 | <- Diamond? Arguments ClassBody? 484 | 485 | Diamond 486 | <- LPOINT RPOINT 487 | 488 | ArrayInitializer 489 | <- LWING (VariableInitializer (COMMA VariableInitializer)*)? COMMA? RWING 490 | 491 | VariableInitializer 492 | <- ArrayInitializer 493 | / Expression 494 | 495 | ParExpression 496 | <- LPAR Expression RPAR 497 | 498 | QualifiedIdentifier 499 | <- Identifier (DOT Identifier)* 500 | 501 | Dim 502 | <- LBRK RBRK 503 | 504 | DimExpr 505 | <- LBRK Expression RBRK 506 | 507 | #------------------------------------------------------------------------- 508 | # Types and Modifiers 509 | #------------------------------------------------------------------------- 510 | 511 | Type 512 | <- (BasicType / ClassType) Dim* 513 | 514 | ReferenceType 515 | <- BasicType Dim+ 516 | / ClassType Dim* 517 | 518 | ClassType 519 | <- Identifier TypeArguments? (DOT Identifier TypeArguments?)* 520 | 521 | ClassTypeList 522 | <- ClassType (COMMA ClassType)* 523 | 524 | TypeArguments 525 | <- LPOINT TypeArgument (COMMA TypeArgument)* RPOINT 526 | 527 | TypeArgument 528 | <- ReferenceType 529 | / QUERY ((EXTENDS / SUPER) ReferenceType)? 530 | 531 | TypeParameters 532 | <- LPOINT TypeParameter (COMMA TypeParameter)* RPOINT 533 | 534 | TypeParameter 535 | <- Identifier (EXTENDS Bound)? 536 | 537 | Bound 538 | <- ClassType (AND ClassType)* 539 | 540 | Modifier 541 | <- Annotation 542 | / ( 'public' 543 | / 'protected' 544 | / 'private' 545 | / 'static' 546 | / 'abstract' 547 | / 'final' 548 | / 'native' 549 | / 'synchronized' 550 | / 'transient' 551 | / 'volatile' 552 | / 'strictfp' 553 | ) !LetterOrDigit Spacing 554 | 555 | # This common definition of Modifier is part of the modification 556 | # in JLS Chapter 18 to minimize look ahead. The main body of JLS has 557 | # different lists of modifiers for different language elements. 558 | 559 | #------------------------------------------------------------------------- 560 | # Annotations 561 | #------------------------------------------------------------------------- 562 | 563 | AnnotationTypeDeclaration 564 | <- AT INTERFACE Identifier AnnotationTypeBody 565 | 566 | AnnotationTypeBody 567 | <- LWING AnnotationTypeElementDeclaration* RWING 568 | 569 | AnnotationTypeElementDeclaration 570 | <- Modifier* AnnotationTypeElementRest 571 | / SEMI 572 | 573 | AnnotationTypeElementRest 574 | <- Type AnnotationMethodOrConstantRest SEMI 575 | / ClassDeclaration 576 | / EnumDeclaration 577 | / InterfaceDeclaration 578 | / AnnotationTypeDeclaration 579 | 580 | AnnotationMethodOrConstantRest 581 | <- AnnotationMethodRest 582 | / AnnotationConstantRest 583 | 584 | AnnotationMethodRest 585 | <- Identifier LPAR RPAR DefaultValue? 586 | 587 | AnnotationConstantRest 588 | <- VariableDeclarators 589 | 590 | DefaultValue 591 | <- DEFAULT ElementValue 592 | 593 | Annotation 594 | <- NormalAnnotation 595 | / SingleElementAnnotation 596 | / MarkerAnnotation 597 | 598 | NormalAnnotation 599 | <- AT QualifiedIdentifier LPAR ElementValuePairs? RPAR 600 | 601 | SingleElementAnnotation 602 | <- AT QualifiedIdentifier LPAR ElementValue RPAR 603 | 604 | MarkerAnnotation 605 | <- AT QualifiedIdentifier 606 | 607 | ElementValuePairs 608 | <- ElementValuePair (COMMA ElementValuePair)* 609 | 610 | ElementValuePair 611 | <- Identifier EQU ElementValue 612 | 613 | ElementValue 614 | <- ConditionalExpression 615 | / Annotation 616 | / ElementValueArrayInitializer 617 | 618 | ElementValueArrayInitializer 619 | <- LWING ElementValues? COMMA? RWING 620 | 621 | ElementValues 622 | <- ElementValue (COMMA ElementValue)* 623 | 624 | 625 | #========================================================================= 626 | # Lexical Structure 627 | #========================================================================= 628 | #------------------------------------------------------------------------- 629 | # JLS 3.6-7 Spacing 630 | #------------------------------------------------------------------------- 631 | 632 | Spacing 633 | <- ( [ \t\r\n]+ # WhiteSpace [ \t\r\n\u000C]+ 634 | / '/*' (!'*/' .)* '*/' # TraditionalComment 635 | / '//' (![\r\n] .)* [\r\n] # EndOfLineComment 636 | )* 637 | 638 | #------------------------------------------------------------------------- 639 | # JLS 3.8 Identifiers 640 | #------------------------------------------------------------------------- 641 | 642 | Identifier <- !Keyword Letter LetterOrDigit* Spacing 643 | 644 | Letter <- [a-z] / [A-Z] / [_$] 645 | 646 | LetterOrDigit <- [a-z] / [A-Z] / [0-9] / [_$] 647 | 648 | # These are traditional definitions of letters and digits. 649 | # JLS defines letters and digits as Unicode characters recognized 650 | # as such by special Java procedures, which is difficult 651 | # to express in terms of Parsing Expressions. 652 | 653 | #------------------------------------------------------------------------- 654 | # JLS 3.9 Keywords 655 | # More precisely: reserved words. According to JLS, "true", "false", 656 | # and "null" are technically not keywords - but still must not appear 657 | # as identifiers. Keywords "const" and "goto" are not used; JLS explains 658 | # the reason. 659 | #------------------------------------------------------------------------- 660 | 661 | Keyword 662 | <- ( 'abstract' 663 | / 'assert' 664 | / 'boolean' 665 | / 'break' 666 | / 'byte' 667 | / 'case' 668 | / 'catch' 669 | / 'char' 670 | / 'class' 671 | / 'const' 672 | / 'continue' 673 | / 'default' 674 | / 'double' 675 | / 'do' 676 | / 'else' 677 | / 'enum' 678 | / 'extends' 679 | / 'false' 680 | / 'finally' 681 | / 'final' 682 | / 'float' 683 | / 'for' 684 | / 'goto' 685 | / 'if' 686 | / 'implements' 687 | / 'import' 688 | / 'interface' 689 | / 'int' 690 | / 'instanceof' 691 | / 'long' 692 | / 'native' 693 | / 'new' 694 | / 'null' 695 | / 'package' 696 | / 'private' 697 | / 'protected' 698 | / 'public' 699 | / 'return' 700 | / 'short' 701 | / 'static' 702 | / 'strictfp' 703 | / 'super' 704 | / 'switch' 705 | / 'synchronized' 706 | / 'this' 707 | / 'throws' 708 | / 'throw' 709 | / 'transient' 710 | / 'true' 711 | / 'try' 712 | / 'void' 713 | / 'volatile' 714 | / 'while' 715 | ) !LetterOrDigit 716 | 717 | ASSERT <- 'assert' !LetterOrDigit Spacing 718 | BREAK <- 'break' !LetterOrDigit Spacing 719 | CASE <- 'case' !LetterOrDigit Spacing 720 | CATCH <- 'catch' !LetterOrDigit Spacing 721 | CLASS <- 'class' !LetterOrDigit Spacing 722 | CONTINUE <- 'continue' !LetterOrDigit Spacing 723 | DEFAULT <- 'default' !LetterOrDigit Spacing 724 | DO <- 'do' !LetterOrDigit Spacing 725 | ELSE <- 'else' !LetterOrDigit Spacing 726 | ENUM <- 'enum' !LetterOrDigit Spacing 727 | EXTENDS <- 'extends' !LetterOrDigit Spacing 728 | FINALLY <- 'finally' !LetterOrDigit Spacing 729 | FINAL <- 'final' !LetterOrDigit Spacing 730 | FOR <- 'for' !LetterOrDigit Spacing 731 | IF <- 'if' !LetterOrDigit Spacing 732 | IMPLEMENTS <- 'implements' !LetterOrDigit Spacing 733 | IMPORT <- 'import' !LetterOrDigit Spacing 734 | INTERFACE <- 'interface' !LetterOrDigit Spacing 735 | INSTANCEOF <- 'instanceof' !LetterOrDigit Spacing 736 | NEW <- 'new' !LetterOrDigit Spacing 737 | PACKAGE <- 'package' !LetterOrDigit Spacing 738 | RETURN <- 'return' !LetterOrDigit Spacing 739 | STATIC <- 'static' !LetterOrDigit Spacing 740 | SUPER <- 'super' !LetterOrDigit Spacing 741 | SWITCH <- 'switch' !LetterOrDigit Spacing 742 | SYNCHRONIZED <- 'synchronized' !LetterOrDigit Spacing 743 | THIS <- 'this' !LetterOrDigit Spacing 744 | THROWS <- 'throws' !LetterOrDigit Spacing 745 | THROW <- 'throw' !LetterOrDigit Spacing 746 | TRY <- 'try' !LetterOrDigit Spacing 747 | VOID <- 'void' !LetterOrDigit Spacing 748 | WHILE <- 'while' !LetterOrDigit Spacing 749 | 750 | #------------------------------------------------------------------------- 751 | # JLS 3.10 Literals 752 | #------------------------------------------------------------------------- 753 | 754 | Literal 755 | <- ( FloatLiteral 756 | / IntegerLiteral # May be a prefix of FloatLiteral 757 | / CharLiteral 758 | / StringLiteral 759 | / 'true' !LetterOrDigit 760 | / 'false' !LetterOrDigit 761 | / 'null' !LetterOrDigit 762 | ) Spacing 763 | 764 | IntegerLiteral 765 | <- ( HexNumeral 766 | / BinaryNumeral 767 | / OctalNumeral # May be a prefix of HexNumeral or BinaryNumeral 768 | / DecimalNumeral # May be a prefix of OctalNumeral 769 | ) [lL]? 770 | 771 | DecimalNumeral <- '0' / [1-9] ([_]* [0-9])* 772 | 773 | HexNumeral <- ('0x' / '0X') HexDigits 774 | 775 | BinaryNumeral <- ('0b' / '0B') [01] ([_]* [01])* 776 | 777 | OctalNumeral <- '0' ([_]* [0-7])+ 778 | 779 | FloatLiteral <- HexFloat / DecimalFloat 780 | 781 | DecimalFloat 782 | <- Digits '.' Digits? Exponent? [fFdD]? 783 | / '.' Digits Exponent? [fFdD]? 784 | / Digits Exponent [fFdD]? 785 | / Digits Exponent? [fFdD] 786 | 787 | Exponent <- [eE] [+\-]? Digits 788 | 789 | HexFloat <- HexSignificand BinaryExponent [fFdD]? 790 | 791 | HexSignificand 792 | <- ('0x' / '0X') HexDigits? '.' HexDigits 793 | / HexNumeral '.'? # May be a prefix of above 794 | 795 | BinaryExponent <- [pP] [+\-]? Digits 796 | 797 | Digits <- [0-9]([_]*[0-9])* 798 | 799 | HexDigits <- HexDigit ([_]*HexDigit)* 800 | 801 | HexDigit <- [a-f] / [A-F] / [0-9] 802 | 803 | CharLiteral <- ['] (Escape / !['\\] .) ['] 804 | 805 | StringLiteral <- '\"' (Escape / !["\\\n\r] .)* '\"' 806 | 807 | Escape <- '\\' ([btnfr"'\\] / OctalEscape / UnicodeEscape) 808 | 809 | OctalEscape 810 | <- [0-3][0-7][0-7] 811 | / [0-7][0-7] 812 | / [0-7] 813 | 814 | UnicodeEscape 815 | <- 'u'+ HexDigit HexDigit HexDigit HexDigit 816 | 817 | #------------------------------------------------------------------------- 818 | # JLS 3.11-12 Separators, Operators 819 | #------------------------------------------------------------------------- 820 | 821 | AT <- '@' Spacing 822 | AND <- '&'![=&] Spacing 823 | ANDAND <- '&&' Spacing 824 | ANDEQU <- '&=' Spacing 825 | BANG <- '!' !'=' Spacing 826 | BSR <- '>>>' !'=' Spacing 827 | BSREQU <- '>>>=' Spacing 828 | COLON <- ':' Spacing 829 | COMMA <- ',' Spacing 830 | DEC <- '--' Spacing 831 | DIV <- '/' !'=' Spacing 832 | DIVEQU <- '/=' Spacing 833 | DOT <- '.' Spacing 834 | ELLIPSIS <- '...' Spacing 835 | EQU <- '=' !'=' Spacing 836 | EQUAL <- '==' Spacing 837 | GE <- '>=' Spacing 838 | GT <- '>'![=>] Spacing 839 | HAT <- '^' !'=' Spacing 840 | HATEQU <- '^=' Spacing 841 | INC <- '++' Spacing 842 | LBRK <- '[' Spacing 843 | LE <- '<=' Spacing 844 | LPAR <- '(' Spacing 845 | LPOINT <- '<' Spacing 846 | LT <- '<' ![=<] Spacing 847 | LWING <- '{' Spacing 848 | MINUS <- '-' ![=\-] Spacing 849 | MINUSEQU <- '-=' Spacing 850 | MOD <- '%' !'=' Spacing 851 | MODEQU <- '%=' Spacing 852 | NOTEQUAL <- '!=' Spacing 853 | OR <- '|' ![=|] Spacing 854 | OREQU <- '|=' Spacing 855 | OROR <- '||' Spacing 856 | PLUS <- '+' ![=+] Spacing 857 | PLUSEQU <- '+=' Spacing 858 | QUERY <- '?' Spacing 859 | RBRK <- ']' Spacing 860 | RPAR <- ')' Spacing 861 | RPOINT <- '>' Spacing 862 | RWING <- '}' Spacing 863 | SEMI <- ';' Spacing 864 | SL <- '<<' !'=' Spacing 865 | SLEQU <- '<<=' Spacing 866 | SR <- '>>' ![=>] Spacing 867 | SREQU <- '>>=' Spacing 868 | STAR <- '*' !'=' Spacing 869 | STAREQU <- '*=' Spacing 870 | TILDA <- '~' Spacing 871 | 872 | EOT <- !. 873 | -------------------------------------------------------------------------------- /tree/peg.go: -------------------------------------------------------------------------------- 1 | // Copyright 2010 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package tree 6 | 7 | import ( 8 | "bytes" 9 | _ "embed" 10 | "fmt" 11 | "go/parser" 12 | "go/printer" 13 | "go/token" 14 | "io" 15 | "iter" 16 | "math" 17 | "os" 18 | "slices" 19 | "strconv" 20 | "strings" 21 | "sync" 22 | "text/template" 23 | "unicode" 24 | 25 | "github.com/pointlander/peg/set" 26 | ) 27 | 28 | //go:embed peg.go.tmpl 29 | var pegHeaderTemplate string 30 | 31 | type Type uint8 32 | 33 | const ( 34 | TypeUnknown Type = iota 35 | TypeRule 36 | TypeName 37 | TypeDot 38 | TypeCharacter 39 | TypeRange 40 | TypeString 41 | TypePredicate 42 | TypeStateChange 43 | TypeCommit 44 | TypeAction 45 | TypeSpace 46 | TypeComment 47 | TypePackage 48 | TypeImport 49 | TypeState 50 | TypeAlternate 51 | TypeUnorderedAlternate 52 | TypeSequence 53 | TypePeekFor 54 | TypePeekNot 55 | TypeQuery 56 | TypeStar 57 | TypePlus 58 | TypePeg 59 | TypePush 60 | TypeImplicitPush 61 | TypeNil 62 | TypeLast 63 | ) 64 | 65 | /* 66 | var TypeMap = [...]string{ 67 | "TypeUnknown", 68 | "TypeRule", 69 | "TypeName", 70 | "TypeDot", 71 | "TypeCharacter", 72 | "TypeRange", 73 | "TypeString", 74 | "TypePredicate", 75 | "TypeStateChange", 76 | "TypeCommit", 77 | "TypeAction", 78 | "TypeSpace", 79 | "TypeComment", 80 | "TypePackage", 81 | "TypeImport", 82 | "TypeState", 83 | "TypeAlternate", 84 | "TypeUnorderedAlternate", 85 | "TypeSequence", 86 | "TypePeekFor", 87 | "TypePeekNot", 88 | "TypeQuery", 89 | "TypeStar", 90 | "TypePlus", 91 | "TypePeg", 92 | "TypePush", 93 | "TypeImplicitPush", 94 | "TypeNil", 95 | "TypeLast", 96 | } 97 | 98 | func (n *node) debug() { 99 | if len(n.string) == 1 { 100 | fmt.Printf("%v %v '%v' %d\n", n.id, TypeMap[n.Type], n.string, n.string[0]) 101 | return 102 | } 103 | fmt.Printf("%v %v '%v'\n", n.id, TypeMap[n.Type], n.string) 104 | } 105 | */ 106 | 107 | func (t Type) GetType() Type { 108 | return t 109 | } 110 | 111 | type node struct { 112 | Type 113 | string 114 | id int 115 | 116 | front *node 117 | back *node 118 | length int 119 | 120 | /* use hash table here instead of Copy? */ 121 | next *node 122 | 123 | parentDetect bool 124 | parentMultipleKey bool 125 | } 126 | 127 | func (n *node) String() string { 128 | return n.string 129 | } 130 | 131 | func (n *node) Escaped() string { 132 | return escape(n.string) 133 | } 134 | 135 | func (n *node) SetString(s string) { 136 | n.string = s 137 | } 138 | 139 | func (n *node) SetType(t Type) { 140 | n.Type = t 141 | } 142 | 143 | func (n *node) GetID() int { 144 | return n.id 145 | } 146 | 147 | func (n *node) SetID(id int) { 148 | n.id = id 149 | } 150 | 151 | func (n *node) Init() { 152 | n.front = nil 153 | n.back = nil 154 | n.length = 0 155 | } 156 | 157 | func (n *node) Front() *node { 158 | return n.front 159 | } 160 | 161 | func (n *node) Next() *node { 162 | return n.next 163 | } 164 | 165 | func (n *node) PushFront(value *node) { 166 | if n.back == nil { 167 | n.back = value 168 | } else { 169 | value.next = n.front 170 | } 171 | n.front = value 172 | n.length++ 173 | } 174 | 175 | func (n *node) PopFront() *node { 176 | front := n.front 177 | 178 | switch true { 179 | case front == nil: 180 | panic("tree is empty") 181 | case front == n.back: 182 | n.front, n.back = nil, nil 183 | default: 184 | n.front, front.next = front.next, nil 185 | } 186 | 187 | n.length-- 188 | return front 189 | } 190 | 191 | func (n *node) PushBack(value *node) { 192 | if n.front == nil { 193 | n.front = value 194 | } else { 195 | n.back.next = value 196 | } 197 | n.back = value 198 | n.length++ 199 | } 200 | 201 | func (n *node) Len() (c int) { 202 | return n.length 203 | } 204 | 205 | func (n *node) Copy() *node { 206 | return &node{Type: n.Type, string: n.string, id: n.id, front: n.front, back: n.back, length: n.length} 207 | } 208 | 209 | func (n *node) Iterator() iter.Seq[*node] { 210 | element := n.Front() 211 | return func(yield func(*node) bool) { 212 | for element != nil { 213 | if !yield(element) { 214 | return 215 | } 216 | element = element.Next() 217 | } 218 | } 219 | } 220 | 221 | func (n *node) Iterator2() iter.Seq2[int, *node] { 222 | element := n.Front() 223 | return func(yield func(int, *node) bool) { 224 | i := 0 225 | for element != nil { 226 | if !yield(i, element) { 227 | return 228 | } 229 | i++ 230 | element = element.Next() 231 | } 232 | } 233 | } 234 | 235 | func (n *node) ParentDetect() bool { 236 | return n.parentDetect 237 | } 238 | 239 | func (n *node) SetParentDetect(detect bool) { 240 | n.parentDetect = detect 241 | } 242 | 243 | func (n *node) ParentMultipleKey() bool { 244 | return n.parentMultipleKey 245 | } 246 | 247 | func (n *node) SetParentMultipleKey(multipleKey bool) { 248 | n.parentMultipleKey = multipleKey 249 | } 250 | 251 | func (n *node) CheckAlwaysSucceeds(t *Tree) bool { 252 | visited := make(map[*node]bool) 253 | return n.checkAlwaysSucceedsRecursion(t, visited) 254 | } 255 | 256 | func (n *node) checkAlwaysSucceedsRecursion(t *Tree, visited map[*node]bool) bool { 257 | switch n.GetType() { 258 | case TypeRule: 259 | if child := n.Front(); child != nil { 260 | return child.checkAlwaysSucceedsRecursion(t, visited) 261 | } 262 | return false 263 | case TypeName: 264 | rule := t.Rules[n.String()] 265 | if rule == nil { 266 | return false 267 | } 268 | if visited[rule] { 269 | return true 270 | } 271 | visited[rule] = true 272 | result := rule.Front().checkAlwaysSucceedsRecursion(t, visited) 273 | visited[rule] = false 274 | return result 275 | case TypeAlternate, TypeUnorderedAlternate: 276 | for element := range n.Iterator() { 277 | if element.checkAlwaysSucceedsRecursion(t, visited) { 278 | return true 279 | } 280 | } 281 | return false 282 | case TypeSequence: 283 | for element := range n.Iterator() { 284 | if !element.checkAlwaysSucceedsRecursion(t, visited) { 285 | return false 286 | } 287 | } 288 | return true 289 | case TypePush, TypeImplicitPush: 290 | if child := n.Front(); child != nil { 291 | return child.checkAlwaysSucceedsRecursion(t, visited) 292 | } 293 | return false 294 | case TypeAction, TypeQuery, TypeStar, TypeNil: 295 | return true 296 | default: 297 | return false 298 | } 299 | } 300 | 301 | // Tree is a tree data structure into which a PEG can be parsed. 302 | type Tree struct { 303 | Rules map[string]*node 304 | rulesCount map[string]uint 305 | node 306 | inline, _switch, Ast bool 307 | Strict bool 308 | werr error 309 | 310 | Generator string 311 | RuleNames []*node 312 | Comments string 313 | PackageName string 314 | Imports []string 315 | EndSymbol rune 316 | PegRuleType string 317 | StructName string 318 | StructVariables string 319 | RulesCount int 320 | HasActions bool 321 | Actions []*node 322 | HasPush bool 323 | HasCommit bool 324 | HasDot bool 325 | HasCharacter bool 326 | HasString bool 327 | HasRange bool 328 | } 329 | 330 | func New(inline, _switch, noast bool) *Tree { 331 | return &Tree{ 332 | Rules: make(map[string]*node), 333 | rulesCount: make(map[string]uint), 334 | inline: inline, 335 | _switch: _switch, 336 | Ast: !noast, 337 | } 338 | } 339 | 340 | func (t *Tree) AddRule(name string) { 341 | t.PushFront(&node{Type: TypeRule, string: name, id: t.RulesCount}) 342 | t.RulesCount++ 343 | } 344 | 345 | func (t *Tree) AddExpression() { 346 | expression := t.PopFront() 347 | rule := t.PopFront() 348 | rule.PushBack(expression) 349 | t.PushBack(rule) 350 | } 351 | 352 | func (t *Tree) AddName(text string) { 353 | t.PushFront(&node{Type: TypeName, string: text}) 354 | } 355 | 356 | func (t *Tree) AddDot() { t.PushFront(&node{Type: TypeDot, string: "."}) } 357 | func (t *Tree) AddCharacter(text string) { 358 | t.PushFront(&node{Type: TypeCharacter, string: text}) 359 | } 360 | 361 | func (t *Tree) AddDoubleCharacter(text string) { 362 | t.PushFront(&node{Type: TypeCharacter, string: strings.ToLower(text)}) 363 | t.PushFront(&node{Type: TypeCharacter, string: strings.ToUpper(text)}) 364 | t.AddAlternate() 365 | } 366 | 367 | func (t *Tree) AddHexaCharacter(text string) { 368 | hexa, _ := strconv.ParseInt(text, 16, 32) 369 | t.PushFront(&node{Type: TypeCharacter, string: string(rune(hexa))}) 370 | } 371 | 372 | func (t *Tree) AddOctalCharacter(text string) { 373 | octal, _ := strconv.ParseInt(text, 8, 8) 374 | t.PushFront(&node{Type: TypeCharacter, string: string(rune(octal))}) 375 | } 376 | func (t *Tree) AddPredicate(text string) { t.PushFront(&node{Type: TypePredicate, string: text}) } 377 | func (t *Tree) AddStateChange(text string) { t.PushFront(&node{Type: TypeStateChange, string: text}) } 378 | func (t *Tree) AddNil() { t.PushFront(&node{Type: TypeNil, string: ""}) } 379 | func (t *Tree) AddAction(text string) { t.PushFront(&node{Type: TypeAction, string: text}) } 380 | func (t *Tree) AddPackage(text string) { t.PushBack(&node{Type: TypePackage, string: text}) } 381 | func (t *Tree) AddSpace(text string) { t.PushBack(&node{Type: TypeSpace, string: text}) } 382 | func (t *Tree) AddComment(text string) { t.PushBack(&node{Type: TypeComment, string: text}) } 383 | func (t *Tree) AddImport(text string) { t.PushBack(&node{Type: TypeImport, string: text}) } 384 | func (t *Tree) AddState(text string) { 385 | peg := t.PopFront() 386 | peg.PushBack(&node{Type: TypeState, string: text}) 387 | t.PushBack(peg) 388 | } 389 | 390 | func (t *Tree) addList(listType Type) { 391 | a := t.PopFront() 392 | b := t.PopFront() 393 | var l *node 394 | if b.GetType() == listType { 395 | l = b 396 | } else { 397 | l = &node{Type: listType} 398 | l.PushBack(b) 399 | } 400 | l.PushBack(a) 401 | t.PushFront(l) 402 | } 403 | func (t *Tree) AddAlternate() { t.addList(TypeAlternate) } 404 | func (t *Tree) AddSequence() { t.addList(TypeSequence) } 405 | func (t *Tree) AddRange() { t.addList(TypeRange) } 406 | func (t *Tree) AddDoubleRange() { 407 | a := t.PopFront() 408 | b := t.PopFront() 409 | 410 | t.AddCharacter(strings.ToLower(b.String())) 411 | t.AddCharacter(strings.ToLower(a.String())) 412 | t.addList(TypeRange) 413 | 414 | t.AddCharacter(strings.ToUpper(b.String())) 415 | t.AddCharacter(strings.ToUpper(a.String())) 416 | t.addList(TypeRange) 417 | 418 | t.AddAlternate() 419 | } 420 | 421 | func (t *Tree) addFix(fixType Type) { 422 | n := &node{Type: fixType} 423 | n.PushBack(t.PopFront()) 424 | t.PushFront(n) 425 | } 426 | func (t *Tree) AddPeekFor() { t.addFix(TypePeekFor) } 427 | func (t *Tree) AddPeekNot() { t.addFix(TypePeekNot) } 428 | func (t *Tree) AddQuery() { t.addFix(TypeQuery) } 429 | func (t *Tree) AddStar() { t.addFix(TypeStar) } 430 | func (t *Tree) AddPlus() { t.addFix(TypePlus) } 431 | func (t *Tree) AddPush() { t.addFix(TypePush) } 432 | 433 | func (t *Tree) AddPeg(text string) { t.PushFront(&node{Type: TypePeg, string: text}) } 434 | 435 | func escape(c string) string { 436 | switch c { 437 | case "'": 438 | return "\\'" 439 | case "\"": 440 | return "\"" 441 | default: 442 | c = strconv.Quote(c) 443 | return c[1 : len(c)-1] 444 | } 445 | } 446 | 447 | func (t *Tree) countRules(n *node, ruleReached []bool) { 448 | switch n.GetType() { 449 | case TypeRule: 450 | name, id := n.String(), n.GetID() 451 | if count, ok := t.rulesCount[name]; ok { 452 | t.rulesCount[name] = count + 1 453 | } else { 454 | t.rulesCount[name] = 1 455 | } 456 | if ruleReached[id] { 457 | return 458 | } 459 | ruleReached[id] = true 460 | t.countRules(n.Front(), ruleReached) 461 | case TypeName: 462 | t.countRules(t.Rules[n.String()], ruleReached) 463 | case TypeImplicitPush, TypePush: 464 | t.countRules(n.Front(), ruleReached) 465 | case TypeAlternate, TypeUnorderedAlternate, TypeSequence, 466 | TypePeekFor, TypePeekNot, TypeQuery, TypeStar, TypePlus: 467 | for element := range n.Iterator() { 468 | t.countRules(element, ruleReached) 469 | } 470 | } 471 | } 472 | 473 | func (t *Tree) checkRecursion(n *node, ruleReached []bool) bool { 474 | switch n.GetType() { 475 | case TypeRule: 476 | id := n.GetID() 477 | if ruleReached[id] { 478 | t.warn(fmt.Errorf("possible infinite left recursion in rule '%v'", n)) 479 | return false 480 | } 481 | ruleReached[id] = true 482 | consumes := t.checkRecursion(n.Front(), ruleReached) 483 | ruleReached[id] = false 484 | return consumes 485 | case TypeAlternate: 486 | for element := range n.Iterator() { 487 | if !t.checkRecursion(element, ruleReached) { 488 | return false 489 | } 490 | } 491 | return true 492 | case TypeSequence: 493 | return slices.ContainsFunc(slices.Collect(n.Iterator()), func(n *node) bool { 494 | return t.checkRecursion(n, ruleReached) 495 | }) 496 | case TypeName: 497 | return t.checkRecursion(t.Rules[n.String()], ruleReached) 498 | case TypePlus, TypePush, TypeImplicitPush: 499 | return t.checkRecursion(n.Front(), ruleReached) 500 | case TypeCharacter, TypeString: 501 | return len(n.String()) > 0 502 | case TypeDot, TypeRange: 503 | return true 504 | } 505 | return false 506 | } 507 | 508 | func (t *Tree) warn(e error) { 509 | if t.werr == nil { 510 | t.werr = fmt.Errorf("warning: %w", e) 511 | return 512 | } 513 | t.werr = fmt.Errorf("%w\nwarning: %w", t.werr, e) 514 | } 515 | 516 | func (t *Tree) link(countsForRule *[TypeLast]uint, n *node, counts *[TypeLast]uint, countsByRule *[]*[TypeLast]uint, rule *node) { 517 | nodeType := n.GetType() 518 | id := counts[nodeType] 519 | counts[nodeType]++ 520 | countsForRule[nodeType]++ 521 | switch nodeType { 522 | case TypeAction: 523 | n.SetID(int(id)) 524 | cp := n.Copy() 525 | name := fmt.Sprintf("Action%v", id) 526 | t.Actions = append(t.Actions, cp) 527 | n.Init() 528 | n.SetType(TypeName) 529 | n.SetString(name) 530 | n.SetID(t.RulesCount) 531 | 532 | emptyRule := &node{Type: TypeRule, string: name, id: t.RulesCount} 533 | implicitPush := &node{Type: TypeImplicitPush} 534 | emptyRule.PushBack(implicitPush) 535 | implicitPush.PushBack(cp) 536 | implicitPush.PushBack(emptyRule.Copy()) 537 | t.PushBack(emptyRule) 538 | t.RulesCount++ 539 | 540 | t.Rules[name] = emptyRule 541 | t.RuleNames = append(t.RuleNames, emptyRule) 542 | *countsByRule = append(*countsByRule, &[TypeLast]uint{}) 543 | case TypeName: 544 | name := n.String() 545 | if _, ok := t.Rules[name]; !ok { 546 | emptyRule := &node{Type: TypeRule, string: name, id: t.RulesCount} 547 | implicitPush := &node{Type: TypeImplicitPush} 548 | emptyRule.PushBack(implicitPush) 549 | implicitPush.PushBack(&node{Type: TypeNil, string: ""}) 550 | implicitPush.PushBack(emptyRule.Copy()) 551 | t.PushBack(emptyRule) 552 | t.RulesCount++ 553 | 554 | t.Rules[name] = emptyRule 555 | t.RuleNames = append(t.RuleNames, emptyRule) 556 | *countsByRule = append(*countsByRule, &[TypeLast]uint{}) 557 | } 558 | case TypePush: 559 | cp := rule.Copy() 560 | name := "PegText" 561 | cp.SetString(name) 562 | if _, ok := t.Rules[name]; !ok { 563 | emptyRule := &node{Type: TypeRule, string: name, id: t.RulesCount} 564 | emptyRule.PushBack(&node{Type: TypeNil, string: ""}) 565 | t.PushBack(emptyRule) 566 | t.RulesCount++ 567 | 568 | t.Rules[name] = emptyRule 569 | t.RuleNames = append(t.RuleNames, emptyRule) 570 | *countsByRule = append(*countsByRule, &[TypeLast]uint{}) 571 | } 572 | n.PushBack(cp) 573 | fallthrough 574 | case TypeImplicitPush: 575 | t.link(countsForRule, n.Front(), counts, countsByRule, rule) 576 | case TypeRule, TypeAlternate, TypeUnorderedAlternate, TypeSequence, 577 | TypePeekFor, TypePeekNot, TypeQuery, TypeStar, TypePlus: 578 | for node := range n.Iterator() { 579 | t.link(countsForRule, node, counts, countsByRule, rule) 580 | } 581 | } 582 | } 583 | 584 | func (t *Tree) Compile(file string, args []string, out io.Writer) (err error) { 585 | t.AddImport("fmt") 586 | if t.Ast { 587 | t.AddImport("io") 588 | t.AddImport("os") 589 | t.AddImport("bytes") 590 | } 591 | t.AddImport("slices") 592 | t.AddImport("strconv") 593 | t.EndSymbol = 0x110000 594 | t.RulesCount++ 595 | 596 | t.Generator = strings.Join(slices.Concat([]string{"peg"}, args[1:]), " ") 597 | 598 | counts := [TypeLast]uint{} 599 | countsByRule := make([]*[TypeLast]uint, t.RulesCount) 600 | 601 | /* first pass */ 602 | for n := range t.Iterator() { 603 | switch n.GetType() { 604 | case TypePackage: 605 | t.PackageName = n.String() 606 | case TypeImport: 607 | t.Imports = append(t.Imports, n.String()) 608 | case TypePeg: 609 | t.StructName = n.String() 610 | t.StructVariables = n.Front().String() 611 | case TypeRule: 612 | if _, ok := t.Rules[n.String()]; !ok { 613 | expression := n.Front() 614 | cp := expression.Copy() 615 | expression.Init() 616 | expression.SetType(TypeImplicitPush) 617 | expression.PushBack(cp) 618 | expression.PushBack(n.Copy()) 619 | 620 | t.Rules[n.String()] = n 621 | t.RuleNames = append(t.RuleNames, n) 622 | } 623 | } 624 | } 625 | /* sort imports to satisfy gofmt */ 626 | slices.Sort(t.Imports) 627 | 628 | /* second pass */ 629 | for _, n := range slices.Collect(t.Iterator()) { 630 | if n.GetType() == TypeRule { 631 | countsForRule := [TypeLast]uint{} 632 | countsByRule[n.GetID()] = &countsForRule 633 | t.link(&countsForRule, n, &counts, &countsByRule, n) 634 | } 635 | } 636 | 637 | usage := [TypeLast]uint{} 638 | 639 | wg := sync.WaitGroup{} 640 | 641 | wg.Go(func() { 642 | ruleReached := make([]bool, t.RulesCount) 643 | for n := range t.Iterator() { 644 | if n.GetType() == TypeRule { 645 | t.countRules(n, ruleReached) 646 | break 647 | } 648 | } 649 | for id, reached := range ruleReached { 650 | if reached { 651 | for i, count := range countsByRule[id] { 652 | usage[i] += count 653 | } 654 | } 655 | } 656 | }) 657 | 658 | wg.Go(func() { 659 | ruleReached := make([]bool, t.RulesCount) 660 | for n := range t.Iterator() { 661 | if n.GetType() == TypeRule { 662 | t.checkRecursion(n, ruleReached) 663 | } 664 | } 665 | }) 666 | 667 | wg.Wait() 668 | 669 | if t._switch { 670 | var optimizeAlternates func(node *node) (consumes bool, s *set.Set) 671 | cache := make([]struct { 672 | reached bool 673 | consumes bool 674 | s *set.Set 675 | }, t.RulesCount) 676 | 677 | firstPass := true 678 | for i := range cache { 679 | cache[i].s = set.NewSet() 680 | } 681 | optimizeAlternates = func(n *node) (consumes bool, s *set.Set) { 682 | s = set.NewSet() 683 | /*n.debug()*/ 684 | switch n.GetType() { 685 | case TypeRule: 686 | cache := &cache[n.GetID()] 687 | if cache.reached { 688 | consumes = cache.consumes 689 | s = cache.s 690 | return consumes, s 691 | } 692 | cache.reached = true 693 | consumes, s = optimizeAlternates(n.Front()) 694 | cache.consumes = consumes 695 | cache.s = s 696 | case TypeName: 697 | consumes, s = optimizeAlternates(t.Rules[n.String()]) 698 | case TypeDot: 699 | consumes = true 700 | /* TypeDot set doesn't include the EndSymbol */ 701 | s.Add(t.EndSymbol) 702 | s = s.Complement(t.EndSymbol - 1) 703 | case TypeString, TypeCharacter: 704 | consumes = true 705 | s.Add([]rune(n.String())[0]) 706 | case TypeRange: 707 | consumes = true 708 | element := n.Front() 709 | lower := []rune(element.String())[0] 710 | element = element.Next() 711 | upper := []rune(element.String())[0] 712 | s.AddRange(lower, upper) 713 | case TypeAlternate: 714 | consumes = true 715 | properties := make([]struct { 716 | intersects bool 717 | s *set.Set 718 | }, n.Len()) 719 | 720 | for i := range properties { 721 | properties[i].s = set.NewSet() 722 | } 723 | for i, element := range n.Iterator2() { 724 | consumes, properties[i].s = optimizeAlternates(element) 725 | s = s.Union(properties[i].s) 726 | } 727 | 728 | if firstPass { 729 | break 730 | } 731 | 732 | intersections := 2 733 | for ai, a := range properties[:len(properties)-1] { 734 | for _, b := range properties[ai+1:] { 735 | if a.s.Intersects(b.s) { 736 | intersections++ 737 | properties[ai].intersects = true 738 | break 739 | } 740 | } 741 | } 742 | if intersections >= len(properties) { 743 | break 744 | } 745 | 746 | unordered := &node{Type: TypeUnorderedAlternate} 747 | ordered := &node{Type: TypeAlternate} 748 | maxVal := 0 749 | for i, element := range n.Iterator2() { 750 | if properties[i].intersects { 751 | ordered.PushBack(element.Copy()) 752 | } else { 753 | class := &node{Type: TypeUnorderedAlternate} 754 | for d := range unicode.MaxRune { 755 | if properties[i].s.Has(d) { 756 | class.PushBack(&node{Type: TypeCharacter, string: string(d)}) 757 | } 758 | } 759 | 760 | sequence := &node{Type: TypeSequence} 761 | predicate := &node{Type: TypePeekFor} 762 | length := properties[i].s.Len() 763 | if length == 0 { 764 | class.PushBack(&node{Type: TypeNil, string: ""}) 765 | } 766 | predicate.PushBack(class) 767 | sequence.PushBack(predicate) 768 | sequence.PushBack(element.Copy()) 769 | 770 | if element.GetType() == TypeNil { 771 | unordered.PushBack(sequence) 772 | } else if length > maxVal { 773 | unordered.PushBack(sequence) 774 | maxVal = length 775 | } else { 776 | unordered.PushFront(sequence) 777 | } 778 | } 779 | } 780 | n.Init() 781 | if ordered.Front() == nil { 782 | n.SetType(TypeUnorderedAlternate) 783 | for element := range unordered.Iterator() { 784 | n.PushBack(element.Copy()) 785 | } 786 | } else { 787 | for element := range ordered.Iterator() { 788 | n.PushBack(element.Copy()) 789 | } 790 | n.PushBack(unordered) 791 | } 792 | case TypeSequence: 793 | classes := make([]struct { 794 | s *set.Set 795 | }, n.Len()) 796 | for i := range classes { 797 | classes[i].s = set.NewSet() 798 | } 799 | elements := slices.Collect(n.Iterator()) 800 | for c, element := range elements { 801 | consumes, classes[c].s = optimizeAlternates(element) 802 | if consumes { 803 | elements, classes = elements[c+1:], classes[:c+1] 804 | break 805 | } 806 | } 807 | 808 | for c := range slices.Backward(classes) { 809 | s = s.Union(classes[c].s) 810 | } 811 | 812 | for _, element := range elements { 813 | optimizeAlternates(element) 814 | } 815 | case TypePeekNot, TypePeekFor: 816 | optimizeAlternates(n.Front()) 817 | case TypeQuery, TypeStar: 818 | _, s = optimizeAlternates(n.Front()) 819 | case TypePlus, TypePush, TypeImplicitPush: 820 | consumes, s = optimizeAlternates(n.Front()) 821 | case TypeAction, TypeNil: 822 | // empty 823 | } 824 | return consumes, s 825 | } 826 | for element := range t.Iterator() { 827 | if element.GetType() == TypeRule { 828 | optimizeAlternates(element) 829 | break 830 | } 831 | } 832 | 833 | for i := range cache { 834 | cache[i].reached = false 835 | } 836 | firstPass = false 837 | for element := range t.Iterator() { 838 | if element.GetType() == TypeRule { 839 | optimizeAlternates(element) 840 | break 841 | } 842 | } 843 | } 844 | 845 | var buffer bytes.Buffer 846 | 847 | _print := func(format string, a ...any) { _, _ = fmt.Fprintf(&buffer, format, a...) } 848 | printSave := func(n uint) { _print("\n position%d, tokenIndex%d := position, tokenIndex", n, n) } 849 | printRestore := func(n uint) { _print("\n position, tokenIndex = position%d, tokenIndex%d", n, n) } 850 | printMemoSave := func(rule int, n uint64, ret bool) { 851 | _print("\n memoize(%d, position%d, tokenIndex%d, %t)", rule, n, n, ret) 852 | } 853 | printMemoCheck := func(rule int) { 854 | _print("\n if memoized, ok := memoization[memoKey[U]{%d, position}]; ok {", rule) 855 | _print("\n return memoizedResult(memoized)") 856 | _print("\n }") 857 | } 858 | 859 | t.HasActions = usage[TypeAction] > 0 860 | t.HasPush = usage[TypePush] > 0 861 | t.HasCommit = usage[TypeCommit] > 0 862 | t.HasDot = usage[TypeDot] > 0 863 | t.HasCharacter = usage[TypeCharacter] > 0 864 | t.HasString = usage[TypeString] > 0 865 | t.HasRange = usage[TypeRange] > 0 866 | 867 | var printRule func(n *node) 868 | var compile func(expression *node, ko uint) (labelLast bool) 869 | var label uint 870 | labels := make(map[uint]bool) 871 | printBegin := func() { _print("\n {") } 872 | printEnd := func() { _print("\n }") } 873 | printLabel := func(n uint) bool { 874 | _print("\n") 875 | if labels[n] { 876 | _print(" l%d:\t", n) 877 | return true 878 | } 879 | return false 880 | } 881 | printJump := func(n uint) { 882 | _print("\n goto l%d", n) 883 | labels[n] = true 884 | } 885 | printRule = func(n *node) { 886 | switch n.GetType() { 887 | case TypeRule: 888 | _print("%v <- ", n) 889 | printRule(n.Front()) 890 | case TypeDot: 891 | _print(".") 892 | case TypeName: 893 | _print("%v", n) 894 | case TypeCharacter: 895 | _print("'%v'", escape(n.String())) 896 | case TypeString: 897 | s := escape(n.String()) 898 | _print("'%v'", s[1:len(s)-1]) 899 | case TypeRange: 900 | element := n.Front() 901 | lower := element 902 | element = element.Next() 903 | upper := element 904 | _print("[%v-%v]", escape(lower.String()), escape(upper.String())) 905 | case TypePredicate: 906 | _print("&{%v}", n) 907 | case TypeStateChange: 908 | _print("!{%v}", n) 909 | case TypeAction: 910 | _print("{%v}", n) 911 | case TypeCommit: 912 | _print("commit") 913 | case TypeAlternate: 914 | _print("(") 915 | elements := slices.Collect(n.Iterator()) 916 | printRule(elements[0]) 917 | for _, element := range elements[1:] { 918 | _print(" / ") 919 | printRule(element) 920 | } 921 | _print(")") 922 | case TypeUnorderedAlternate: 923 | _print("(") 924 | elements := slices.Collect(n.Iterator()) 925 | printRule(elements[0]) 926 | for _, element := range elements[1:] { 927 | _print(" | ") 928 | printRule(element) 929 | } 930 | _print(")") 931 | case TypeSequence: 932 | _print("(") 933 | elements := slices.Collect(n.Iterator()) 934 | printRule(elements[0]) 935 | for _, element := range elements[1:] { 936 | _print(" ") 937 | printRule(element) 938 | } 939 | _print(")") 940 | case TypePeekFor: 941 | _print("&") 942 | printRule(n.Front()) 943 | case TypePeekNot: 944 | _print("!") 945 | printRule(n.Front()) 946 | case TypeQuery: 947 | printRule(n.Front()) 948 | _print("?") 949 | case TypeStar: 950 | printRule(n.Front()) 951 | _print("*") 952 | case TypePlus: 953 | printRule(n.Front()) 954 | _print("+") 955 | case TypePush, TypeImplicitPush: 956 | _print("<") 957 | printRule(n.Front()) 958 | _print(">") 959 | case TypeComment: 960 | case TypeNil: 961 | default: 962 | t.warn(fmt.Errorf("illegal node type: %v", n.GetType())) 963 | } 964 | } 965 | dryCompile := true 966 | 967 | compile = func(n *node, ko uint) (labelLast bool) { 968 | switch n.GetType() { 969 | case TypeRule: 970 | t.warn(fmt.Errorf("internal error #1 (%v)", n)) 971 | case TypeDot: 972 | if n.ParentDetect() { 973 | break 974 | } 975 | _print("\n if !matchDot() {") 976 | /*print("\n if buffer[position] == endSymbol {")*/ 977 | printJump(ko) 978 | /*print("}\nposition++")*/ 979 | _print("}") 980 | case TypeName: 981 | name := n.String() 982 | rule := t.Rules[name] 983 | if t.inline && t.rulesCount[name] == 1 { 984 | element := rule.Front() 985 | element.SetParentDetect(n.ParentDetect()) 986 | element.SetParentMultipleKey(n.ParentMultipleKey()) 987 | compile(element, ko) 988 | return labelLast 989 | } 990 | // If the rule always succeeds, do not output the if statement 991 | if rule.CheckAlwaysSucceeds(t) { 992 | _print("\n _rules[rule%v]()", name /*rule.GetID()*/) 993 | } else { 994 | _print("\n if !_rules[rule%v]() {", name /*rule.GetID()*/) 995 | printJump(ko) 996 | _print("}") 997 | } 998 | case TypeRange: 999 | if n.ParentDetect() { 1000 | _print("\nposition++") 1001 | break 1002 | } 1003 | element := n.Front() 1004 | lower := element 1005 | element = element.Next() 1006 | upper := element 1007 | /*print("\n if !matchRange('%v', '%v') {", escape(lower.String()), escape(upper.String()))*/ 1008 | _print("\n if c := buffer[position]; c < '%v' || c > '%v' {", escape(lower.String()), escape(upper.String())) 1009 | printJump(ko) 1010 | _print("}\nposition++") 1011 | case TypeCharacter: 1012 | if n.ParentDetect() && !n.ParentMultipleKey() { 1013 | _print("\nposition++") 1014 | break 1015 | } 1016 | /*print("\n if !matchChar('%v') {", escape(n.String()))*/ 1017 | _print("\n if buffer[position] != '%v' {", escape(n.String())) 1018 | printJump(ko) 1019 | _print("}\nposition++") 1020 | case TypeString: 1021 | _print("\n if !matchString(%v) {", strconv.Quote(n.String())) 1022 | printJump(ko) 1023 | _print("}") 1024 | case TypePredicate: 1025 | _print("\n if !(%v) {", n) 1026 | printJump(ko) 1027 | _print("}") 1028 | case TypeStateChange: 1029 | _print("\n %v", n) 1030 | case TypeAction: 1031 | case TypeCommit: 1032 | case TypePush: 1033 | fallthrough 1034 | case TypeImplicitPush: 1035 | ok, element := label, n.Front() 1036 | element.SetParentDetect(n.ParentDetect()) 1037 | element.SetParentMultipleKey(n.ParentMultipleKey()) 1038 | label++ 1039 | nodeType, rule := element.GetType(), element.Next() 1040 | printBegin() 1041 | if nodeType == TypeAction { 1042 | if t.Ast { 1043 | _print("\nadd(rule%v, position)", rule) 1044 | } else { 1045 | // There is no AST support, so inline the rule code 1046 | _print("\n%v", element) 1047 | } 1048 | } else { 1049 | _print("\nposition%d := position", ok) 1050 | compile(element, ko) 1051 | if n.GetType() == TypePush && !t.Ast { 1052 | // This is TypePush and there is no AST support, 1053 | // so inline capture to text right here 1054 | _print("\nbegin := position%d", ok) 1055 | _print("\nend := position") 1056 | _print("\ntext = string(buffer[begin:end])") 1057 | } else { 1058 | _print("\nadd(rule%v, position%d)", rule, ok) 1059 | } 1060 | } 1061 | printEnd() 1062 | case TypeAlternate: 1063 | ok := label 1064 | label++ 1065 | printBegin() 1066 | elements := slices.Collect(n.Iterator()) 1067 | elements[0].SetParentDetect(n.ParentDetect()) 1068 | elements[0].SetParentMultipleKey(n.ParentMultipleKey()) 1069 | printSave(ok) 1070 | for _, element := range elements[:len(elements)-1] { 1071 | next := label 1072 | label++ 1073 | compile(element, next) 1074 | printJump(ok) 1075 | printLabel(next) 1076 | printRestore(ok) 1077 | } 1078 | compile(elements[len(elements)-1], ko) 1079 | printEnd() 1080 | labelLast = printLabel(ok) 1081 | case TypeUnorderedAlternate: 1082 | done := ko 1083 | ok := label 1084 | label++ 1085 | printBegin() 1086 | _print("\n switch buffer[position] {") 1087 | elements := slices.Collect(n.Iterator()) 1088 | elements, last := elements[:len(elements)-1], elements[len(elements)-1].Front().Next() 1089 | for _, element := range elements { 1090 | sequence := element.Front() 1091 | class := sequence.Front() 1092 | sequence = sequence.Next() 1093 | _print("\n case") 1094 | comma := false 1095 | for character := range class.Iterator() { 1096 | if comma { 1097 | _print(",") 1098 | } else { 1099 | comma = true 1100 | } 1101 | _print(" '%s'", escape(character.String())) 1102 | } 1103 | _print(":") 1104 | if !dryCompile { 1105 | sequence.SetParentDetect(true) 1106 | if class.Len() > 1 { 1107 | sequence.SetParentMultipleKey(true) 1108 | } 1109 | } 1110 | if compile(sequence, done) { 1111 | _print("\nbreak") 1112 | } 1113 | } 1114 | _print("\n default:") 1115 | if compile(last, done) { 1116 | _print("\nbreak") 1117 | } 1118 | _print("\n }") 1119 | printEnd() 1120 | labelLast = printLabel(ok) 1121 | case TypeSequence: 1122 | elements := slices.Collect(n.Iterator()) 1123 | elements[0].SetParentDetect(n.ParentDetect()) 1124 | elements[0].SetParentMultipleKey(n.ParentMultipleKey()) 1125 | for _, element := range elements { 1126 | labelLast = compile(element, ko) 1127 | } 1128 | case TypePeekFor: 1129 | ok := label 1130 | label++ 1131 | printBegin() 1132 | printSave(ok) 1133 | element := n.Front() 1134 | element.SetParentDetect(n.ParentDetect()) 1135 | element.SetParentMultipleKey(n.ParentMultipleKey()) 1136 | compile(element, ko) 1137 | printRestore(ok) 1138 | printEnd() 1139 | case TypePeekNot: 1140 | ok := label 1141 | label++ 1142 | printBegin() 1143 | printSave(ok) 1144 | element := n.Front() 1145 | element.SetParentDetect(n.ParentDetect()) 1146 | element.SetParentMultipleKey(n.ParentMultipleKey()) 1147 | compile(element, ok) 1148 | printJump(ko) 1149 | printLabel(ok) 1150 | printRestore(ok) 1151 | printEnd() 1152 | case TypeQuery: 1153 | qko := label 1154 | label++ 1155 | qok := label 1156 | label++ 1157 | printBegin() 1158 | printSave(qko) 1159 | element := n.Front() 1160 | element.SetParentDetect(n.ParentDetect()) 1161 | element.SetParentMultipleKey(n.ParentMultipleKey()) 1162 | compile(element, qko) 1163 | printJump(qok) 1164 | printLabel(qko) 1165 | printRestore(qko) 1166 | printEnd() 1167 | labelLast = printLabel(qok) 1168 | case TypeStar: 1169 | again := label 1170 | label++ 1171 | out := label 1172 | label++ 1173 | printLabel(again) 1174 | printBegin() 1175 | printSave(out) 1176 | element := n.Front() 1177 | element.SetParentDetect(n.ParentDetect()) 1178 | element.SetParentMultipleKey(n.ParentMultipleKey()) 1179 | compile(element, out) 1180 | printJump(again) 1181 | printLabel(out) 1182 | printRestore(out) 1183 | printEnd() 1184 | case TypePlus: 1185 | again := label 1186 | label++ 1187 | out := label 1188 | label++ 1189 | compile(n.Front(), ko) 1190 | printLabel(again) 1191 | printBegin() 1192 | printSave(out) 1193 | compile(n.Front(), out) 1194 | printJump(again) 1195 | printLabel(out) 1196 | printRestore(out) 1197 | printEnd() 1198 | case TypeComment: 1199 | case TypeNil: 1200 | default: 1201 | t.warn(fmt.Errorf("illegal node type: %v", n.GetType())) 1202 | } 1203 | return labelLast 1204 | } 1205 | 1206 | /* let's figure out which jump labels are going to be used with this dry compile */ 1207 | printTemp, _print := _print, func(_ string, _ ...any) {} 1208 | for element := range t.Iterator() { 1209 | if element.GetType() == TypeComment { 1210 | t.Comments += "//" + element.String() + "\n" 1211 | } else if element.GetType() == TypeSpace { 1212 | t.Comments += element.String() 1213 | } 1214 | if element.GetType() != TypeRule { 1215 | continue 1216 | } 1217 | expression := element.Front() 1218 | if expression.GetType() == TypeNil { 1219 | continue 1220 | } 1221 | ko := label 1222 | label++ 1223 | if count, ok := t.rulesCount[element.String()]; !ok { 1224 | continue 1225 | } else if t.inline && count == 1 && ko != 0 { 1226 | continue 1227 | } 1228 | compile(expression, ko) 1229 | } 1230 | _print = printTemp 1231 | label = 0 1232 | dryCompile = false 1233 | 1234 | /* now for the real compile pass */ 1235 | t.PegRuleType = "uint8" 1236 | if length := int64(t.Len()); length > math.MaxUint32 { 1237 | t.PegRuleType = "uint64" 1238 | } else if length > math.MaxUint16 { 1239 | t.PegRuleType = "uint32" 1240 | } else if length > math.MaxUint8 { 1241 | t.PegRuleType = "uint16" 1242 | } 1243 | 1244 | tmpl, err := template.New("peg").Parse(pegHeaderTemplate) 1245 | if err != nil { 1246 | return err 1247 | } 1248 | 1249 | err = tmpl.Execute(&buffer, t) 1250 | if err != nil { 1251 | return err 1252 | } 1253 | 1254 | for element := range t.Iterator() { 1255 | if element.GetType() != TypeRule { 1256 | continue 1257 | } 1258 | expression := element.Front() 1259 | if implicit := expression.Front(); expression.GetType() == TypeNil || implicit.GetType() == TypeNil { 1260 | if element.String() != "PegText" { 1261 | t.warn(fmt.Errorf("rule '%v' used but not defined", element)) 1262 | } 1263 | _print("\n nil,") 1264 | continue 1265 | } 1266 | ko := label 1267 | label++ 1268 | _print("\n /* %v ", element.GetID()) 1269 | printRule(element) 1270 | _print(" */") 1271 | if count, ok := t.rulesCount[element.String()]; !ok { 1272 | t.warn(fmt.Errorf("rule '%v' defined but not used", element)) 1273 | _print("\n nil,") 1274 | continue 1275 | } else if t.inline && count == 1 && ko != 0 { 1276 | _print("\n nil,") 1277 | continue 1278 | } 1279 | _print("\n func() bool {") 1280 | if t.Ast { 1281 | printMemoCheck(element.GetID()) 1282 | } 1283 | if t.Ast || labels[ko] { 1284 | printSave(ko) 1285 | } 1286 | compile(expression, ko) 1287 | // print("\n fmt.Printf(\"%v\\n\")", element.String()) 1288 | if t.Ast { 1289 | printMemoSave(element.GetID(), uint64(ko), true) 1290 | } 1291 | _print("\n return true") 1292 | if labels[ko] { 1293 | printLabel(ko) 1294 | if t.Ast { 1295 | printMemoSave(element.GetID(), uint64(ko), false) 1296 | } 1297 | printRestore(ko) 1298 | _print("\n return false") 1299 | } 1300 | _print("\n },") 1301 | } 1302 | _print("\n }\n p.rules = _rules") 1303 | _print("\n return nil") 1304 | _print("\n}\n") 1305 | 1306 | if t.Strict && t.werr != nil { 1307 | // Treat warnings as errors. 1308 | err = t.werr 1309 | } 1310 | if !t.Strict && t.werr != nil { 1311 | // Display warnings. 1312 | _, _ = fmt.Fprintln(os.Stderr, t.werr) 1313 | } 1314 | if err != nil { 1315 | return err 1316 | } 1317 | fileSet := token.NewFileSet() 1318 | code, err := parser.ParseFile(fileSet, file, &buffer, parser.ParseComments) 1319 | if err != nil { 1320 | _, _ = buffer.WriteTo(out) 1321 | return err 1322 | } 1323 | formatter := printer.Config{Mode: printer.TabIndent | printer.UseSpaces, Tabwidth: 8} 1324 | err = formatter.Fprint(out, fileSet, code) 1325 | if err != nil { 1326 | _, _ = buffer.WriteTo(out) 1327 | return err 1328 | } 1329 | 1330 | return nil 1331 | } 1332 | --------------------------------------------------------------------------------