├── LICENSE.md
├── README.md
├── build
    ├── boot
    ├── build.rkt
    ├── exe.bat
    ├── make
    ├── make.rkt
    ├── unix
    └── waxeye
├── docs
    └── book
    │   ├── book
    │   ├── scheme.lang
    │   └── waxeye.lang
├── grammars
    ├── calc.waxeye
    ├── json.waxeye
    ├── modular
    │   └── mod.rkt
    ├── num.waxeye
    ├── regexp.waxeye
    ├── templ.waxeye
    └── waxeye.waxeye
├── src
    ├── example
    │   └── racket
    │   │   ├── calculator.rkt
    │   │   └── example.rkt
    ├── racket
    │   └── waxeye
    │   │   ├── ast.rkt
    │   │   ├── fa.rkt
    │   │   ├── parser.rkt
    │   │   └── set.rkt
    └── waxeye
    │   ├── action.rkt
    │   ├── code.rkt
    │   ├── debug.rkt
    │   ├── dfa.rkt
    │   ├── dot.rkt
    │   ├── expand.rkt
    │   ├── file.rkt
    │   ├── gen.rkt
    │   ├── grammar-parser.rkt
    │   ├── header.txt
    │   ├── interp.rkt
    │   ├── load.rkt
    │   ├── main.rkt
    │   ├── nfa.rkt
    │   ├── racket.rkt
    │   ├── set.rkt
    │   ├── tester.rkt
    │   ├── transform.rkt
    │   ├── util.rkt
    │   ├── version.rkt
    │   └── waxeye.rkt
└── test
    └── grammars
        ├── json.rkt
        ├── templ.rkt
        └── waxeye.rkt


/LICENSE.md:
--------------------------------------------------------------------------------
  1 | # PolyForm Noncommercial License 1.0.0
  2 | 
  3 | <https://polyformproject.org/licenses/noncommercial/1.0.0>
  4 | 
  5 | ## Acceptance
  6 | 
  7 | In order to get any license under these terms, you must agree
  8 | to them as both strict obligations and conditions to all
  9 | your licenses.
 10 | 
 11 | ## Copyright License
 12 | 
 13 | The licensor grants you a copyright license for the
 14 | software to do everything you might do with the software
 15 | that would otherwise infringe the licensor's copyright
 16 | in it for any permitted purpose.  However, you may
 17 | only distribute the software according to [Distribution
 18 | License](#distribution-license) and make changes or new works
 19 | based on the software according to [Changes and New Works
 20 | License](#changes-and-new-works-license).
 21 | 
 22 | ## Distribution License
 23 | 
 24 | The licensor grants you an additional copyright license
 25 | to distribute copies of the software.  Your license
 26 | to distribute covers distributing the software with
 27 | changes and new works permitted by [Changes and New Works
 28 | License](#changes-and-new-works-license).
 29 | 
 30 | ## Notices
 31 | 
 32 | You must ensure that anyone who gets a copy of any part of
 33 | the software from you also gets a copy of these terms or the
 34 | URL for them above, as well as copies of any plain-text lines
 35 | beginning with `Required Notice:` that the licensor provided
 36 | with the software.  For example:
 37 | 
 38 | > Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
 39 | 
 40 | ## Changes and New Works License
 41 | 
 42 | The licensor grants you an additional copyright license to
 43 | make changes and new works based on the software for any
 44 | permitted purpose.
 45 | 
 46 | ## Patent License
 47 | 
 48 | The licensor grants you a patent license for the software that
 49 | covers patent claims the licensor can license, or becomes able
 50 | to license, that you would infringe by using the software.
 51 | 
 52 | ## Noncommercial Purposes
 53 | 
 54 | Any noncommercial purpose is a permitted purpose.
 55 | 
 56 | ## Personal Uses
 57 | 
 58 | Personal use for research, experiment, and testing for
 59 | the benefit of public knowledge, personal study, private
 60 | entertainment, hobby projects, amateur pursuits, or religious
 61 | observance, without any anticipated commercial application,
 62 | is use for a permitted purpose.
 63 | 
 64 | ## Noncommercial Organizations
 65 | 
 66 | Use by any charitable organization, educational institution,
 67 | public research organization, public safety or health
 68 | organization, environmental protection organization,
 69 | or government institution is use for a permitted purpose
 70 | regardless of the source of funding or obligations resulting
 71 | from the funding.
 72 | 
 73 | ## Fair Use
 74 | 
 75 | You may have "fair use" rights for the software under the
 76 | law. These terms do not limit them.
 77 | 
 78 | ## No Other Rights
 79 | 
 80 | These terms do not allow you to sublicense or transfer any of
 81 | your licenses to anyone else, or prevent the licensor from
 82 | granting licenses to anyone else.  These terms do not imply
 83 | any other licenses.
 84 | 
 85 | ## Patent Defense
 86 | 
 87 | If you make any written claim that the software infringes or
 88 | contributes to infringement of any patent, your patent license
 89 | for the software granted under these terms ends immediately. If
 90 | your company makes such a claim, your patent license ends
 91 | immediately for work on behalf of your company.
 92 | 
 93 | ## Violations
 94 | 
 95 | The first time you are notified in writing that you have
 96 | violated any of these terms, or done anything with the software
 97 | not covered by your licenses, your licenses can nonetheless
 98 | continue if you come into full compliance with these terms,
 99 | and take practical steps to correct past violations, within
100 | 32 days of receiving notice.  Otherwise, all your licenses
101 | end immediately.
102 | 
103 | ## No Liability
104 | 
105 | ***As far as the law allows, the software comes as is, without
106 | any warranty or condition, and the licensor will not be liable
107 | to you for any damages arising out of these terms or the use
108 | or nature of the software, under any kind of legal claim.***
109 | 
110 | ## Definitions
111 | 
112 | The **licensor** is the individual or entity offering these
113 | terms, and the **software** is the software the licensor makes
114 | available under these terms.
115 | 
116 | **You** refers to the individual or entity agreeing to these
117 | terms.
118 | 
119 | **Your company** is any legal entity, sole proprietorship,
120 | or other kind of organization that you work for, plus all
121 | organizations that have control over, are under the control of,
122 | or are under common control with that organization.  **Control**
123 | means ownership of substantially all the assets of an entity,
124 | or the power to direct its management and policies by vote,
125 | contract, or otherwise.  Control can be direct or indirect.
126 | 
127 | **Your licenses** are all the licenses granted to you for the
128 | software under these terms.
129 | 
130 | **Use** means anything you do with the software requiring one
131 | of your licenses.
132 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Waxeye Parser Generator
 2 | =======================
 3 | 
 4 | Waxeye is a parser generator based on parsing expression grammars (PEGs).
 5 | 
 6 | Currently supported programming languages:
 7 | * Racket
 8 | 
 9 | 
10 | Features
11 | --------
12 | 
13 | * Language-agnostic, modular, composable grammars
14 | 
15 | * Automatic AST generation
16 | 
17 | * Command-line grammar interpreter
18 | 
19 | * Grammar testing DSL
20 | 
21 | 
22 | User Manual
23 | -----------
24 | 
25 | Waxeye's user manual is in `docs/manual.html`. The latest version is also
26 | online at http://waxeye.org/manual.html.
27 | 
28 | 
29 | Installation
30 | ------------
31 | 
32 | ### Unix and OSX
33 | 
34 | 1. Extract the files of the distribution.
35 | 
36 | 2. Copy the `waxeye` directory to where you wish to install it.
37 | 
38 | 3. Add the `bin/waxeye` binary to your search path. e.g. If you have `~/bin` in
39 |    your `PATH` and installed waxeye to `/usr/local/waxeye` then you might do
40 |    the following.
41 | 
42 |    `ln -s /usr/local/waxeye/bin/waxeye ~/bin/`
43 | 
44 | 
45 | ### Windows
46 | 
47 | 1. Extract the files of the distribution.
48 | 
49 | 2. Copy the `waxeye` directory to where you wish to install it.
50 | 
51 | 
52 | Running
53 | -------
54 | 
55 | ### Unix and OSX
56 | 
57 | Use the `waxeye` command.
58 | 
59 | ### Windows
60 | 
61 | Use a command prompt to run `waxeye.exe`. Note: If using the interpreter under
62 | Windows, you will need to press `Ctrl-z` and then 'Enter' after the input you
63 | want to interpret.
64 | 
65 | 
66 | Building from Source
67 | --------------------
68 | 
69 | 1. Install [Racket](http://racket-lang.org)
70 | 
71 | 2. Install Waxeye's backend for Racket.
72 |    * Unix and OSX
73 | 
74 |      `sudo ln -s /usr/local/waxeye/src/racket/waxeye /usr/local/racket/lib/racket/collects/`
75 | 
76 |    * Windows
77 | 
78 |      Copy the directory `src/racket/waxeye` into your Racket `collects`
79 |      directory. For example, `C:\Program Files\Racket\collects`.
80 | 
81 | 3. Build Waxeye
82 |    * Unix and OSX
83 | 
84 |      `./build/unix`
85 | 
86 |    * Windows
87 | 
88 |      - If your Racket installation isn't `C:\Program Files\Racket`, then you
89 |        will need to modify `build\exe.bat` to use the correct path.
90 | 
91 |      - From your Waxeye installation directory, run the `build\exe.bat` script
92 |        in a command prompt.
93 | 
94 | 
95 | License
96 | -------
97 | 
98 | [PolyForm Noncommercial License 1.0.0](https://polyformproject.org/licenses/noncommercial/1.0.0)
99 | 


--------------------------------------------------------------------------------
/build/boot:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | ./build/waxeye -g racket src/waxeye/ -c src/waxeye/header.txt -p grammar grammars/waxeye.waxeye
4 | 


--------------------------------------------------------------------------------
/build/build.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require "make.rkt"
 4 |          "../src/waxeye/version.rkt")
 5 | 
 6 | 
 7 | (define *name* "waxeye")
 8 | (define *doc-book* "/usr/local/docbook")
 9 | 
10 | 
11 | (target clean (clean-book clean-dist clean-unix)
12 |         (^ rm -rf tmp))
13 | 
14 | 
15 | (target book (book-html))
16 | 
17 | 
18 | (target book-html ()
19 |         (^ asciidoc -a toc -n -o docs/manual.html docs/book/book))
20 | 
21 | 
22 | (target book-pdf ()
23 |         (^ mkdir -p tmp/book)
24 |         (^ asciidoc -a toc -b docbook --doctype=book -o tmp/book/book.xml docs/book/book)
25 |         ($ xsltproc '-o 'tmp/book/book.fo (++ *doc-book* "/fo/docbook.xsl") 'tmp/book/book.xml)
26 |         (^ fop tmp/book/book.fo docs/manual.pdf))
27 | 
28 | 
29 | (target clean-book ()
30 |         (^ rm -rf tmp/book)
31 |         (^ rm -f docs/manual.html docs/manual.pdf))
32 | 
33 | 
34 | (target dist (clean dist-src dist-unix))
35 | 
36 | 
37 | (define (cp-dist from)
38 |   ($ cp '-r from (++ "dist/waxeye-" *version* "/")))
39 | 
40 | 
41 | (target dist-base (book)
42 | 
43 |  ($ mkdir '-p (++ "dist/waxeye-" *version*))
44 | 
45 |  (cp-dist "build")
46 |  (cp-dist "docs")
47 |  (cp-dist "grammars")
48 |  (cp-dist "lib")
49 |  (cp-dist "LICENSE.md")
50 |  (cp-dist "README.md")
51 |  (cp-dist "src")
52 |  (cp-dist "test")
53 | 
54 |  ($ chmod '755 (++ "dist/waxeye-" *version* "/build/make"))
55 |  ($ chmod '755 (++ "dist/waxeye-" *version* "/build/unix"))
56 |  ($ chmod '755 (++ "dist/waxeye-" *version* "/build/waxeye")))
57 | 
58 | 
59 | (target dist-src (dist-base)
60 |         (cd dist
61 |             ($ zip '-r (++ "waxeye-" *version* "-src.zip waxeye-" *version*))
62 |             ($ tar 'cjf (++ "waxeye-" *version* "-src.tar.bz2 waxeye-" *version*))))
63 | 
64 | 
65 | (target dist-unix (dist-base)
66 |         (cd$ (++ "dist/waxeye-" *version*)
67 |              (^ ./build/unix))
68 |         (cd dist
69 |             ($ tar 'czf (++ "waxeye-" *version* "-unix.tar.gz waxeye-" *version*))
70 |             ($ tar 'cjf (++ "waxeye-" *version* "-unix.tar.bz2 waxeye-" *version*))))
71 | 
72 | 
73 | (target clean-dist ()
74 |         (^ rm -rf dist))
75 | 
76 | 
77 | (target clean-unix ()
78 |         (^ rm -rf bin lib))
79 | 
80 | 
81 | (run-make)
82 | 


--------------------------------------------------------------------------------
/build/exe.bat:
--------------------------------------------------------------------------------
1 | C:\"Program Files\Racket\raco.exe" exe src\waxeye\waxeye.rkt
2 | C:\"Program Files\Racket\raco.exe" distribute . src\waxeye\waxeye.exe
3 | DEL src\waxeye\waxeye.exe
4 | 


--------------------------------------------------------------------------------
/build/make:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | racket build/build.rkt $*
4 | 


--------------------------------------------------------------------------------
/build/make.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require (only-in racket/system system)
 4 |          (only-in "../src/waxeye/util.rkt" display-ln))
 5 | 
 6 | (provide ^ $ ++ cd cd$ run-cmd run-make target)
 7 | 
 8 | 
 9 | (define *target-table* (make-hash))
10 | (define *dep-table* (make-hash))
11 | 
12 | (define ++ string-append)
13 | 
14 | (define-syntax target
15 |   (syntax-rules ()
16 |     ((_ name (deps ...) code ...)
17 |      ;; bind target name to code
18 |      (hash-set!
19 |        *target-table*
20 |        'name
21 |        (lambda ()
22 |          ;; run dependencies
23 |          (for-each run-target '(deps ...))
24 |          ;; run code
25 |          code ...)))))
26 | 
27 | 
28 | (define (run-target t)
29 |   (let ((t-code (hash-ref *target-table* t #f)))
30 |     (if t-code
31 |         (unless (hash-ref *dep-table* t #f)
32 |                 (hash-set! *dep-table* t #t)
33 |                 (apply t-code ()))
34 |         (error 'make (++ "target doesn't exist - " (symbol->string t))))))
35 | 
36 | 
37 | (define (run-make)
38 |   (let ((args (map string->symbol (vector->list (current-command-line-arguments)))))
39 |     ;; if no make target was specified
40 |     (if (null? args)
41 |         ;; print all possible targets
42 |         (begin
43 |           (display-ln "possible targets:")
44 |           (for-each display-ln (sort (map symbol->string (hash-map *target-table* (lambda (k v) k))) string<?)))
45 |         ;; otherwise run targets
46 |         (for-each run-target args))))
47 | 
48 | 
49 | (define (run-cmd prog args)
50 |   (define (as-string s)
51 |     (cond
52 |      ((symbol? s) (symbol->string s))
53 |      ((char? s) (list->string (list s)))
54 |      ((number? s) (number->string s))
55 |      (else s)))
56 |   (let ((cmd (++ (as-string prog)
57 |                  (foldr (lambda (a b)
58 |                           (++ " " (as-string a) b))
59 |                         ""
60 |                         args))))
61 |     (display-ln cmd)
62 |     (system cmd)))
63 | 
64 | 
65 | (define-syntax $
66 |   (syntax-rules ()
67 |     ((_ prog arg ...)
68 |      (run-cmd 'prog (list arg ...)))))
69 | 
70 | 
71 | (define-syntax ^
72 |   (syntax-rules ()
73 |     ((_ prog arg ...)
74 |      (run-cmd 'prog '(arg ...)))))
75 | 
76 | 
77 | (define-syntax cd$
78 |   (syntax-rules ()
79 |     ((_ dir code ...)
80 |      (parameterize ((current-directory (let ((d dir))
81 |                                          (if (symbol? d)
82 |                                              (symbol->string d)
83 |                                              d))))
84 |                    code ...))))
85 | 
86 | 
87 | (define-syntax cd
88 |   (syntax-rules ()
89 |     ((_ dir code ...)
90 |      (cd$ 'dir code ...))))
91 | 


--------------------------------------------------------------------------------
/build/unix:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | raco exe -o waxeye src/waxeye/waxeye.rkt
4 | raco distribute . waxeye
5 | rm waxeye
6 | 


--------------------------------------------------------------------------------
/build/waxeye:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | racket src/waxeye/waxeye.rkt $*
4 | 


--------------------------------------------------------------------------------
/docs/book/book:
--------------------------------------------------------------------------------
   1 | Language Development with Waxeye
   2 | ================================
   3 | Orlando Hill
   4 | version 0.9.0-dev, January 2021
   5 | 
   6 | 
   7 | 
   8 | == Introduction ==
   9 | 
  10 | As programmers, we are required to make use of data that is presented in a
  11 | variety of formats. In order to extract and manipulate the desired information,
  12 | we need the ability to navigate the structure of the language the data is
  13 | written in. Unless the language is very simple, we must use a parser that
  14 | understands the language and gives us the data in a form we can more readily
  15 | use.
  16 | 
  17 | Manually creating parsers can be boring and time consuming. It is, therefore,
  18 | common to use a use parser generator to do the grunt work of constructing the
  19 | parser. This is where Waxeye comes in handy.
  20 | 
  21 | 
  22 | 
  23 | == Getting Started ==
  24 | 
  25 | === Downloading ===
  26 | 
  27 | You can download the latest version of Waxeye's source code from
  28 | https://github.com/pomanu/waxeye[GitHub].
  29 | 
  30 | 
  31 | === Requirements ===
  32 | 
  33 | There are no external dependencies needed to run a pre-built version of Waxeye.
  34 | If you build from source, you'll need http://racket-lang.org[Racket].
  35 | 
  36 | To use a generated parser, you need a supported programming language to run it
  37 | from.
  38 | 
  39 | 
  40 | === Installation ===
  41 | 
  42 | ==== Unix and MacOSX ====
  43 | 1. Extract the files of the distribution.
  44 | 
  45 | 2. Copy the 'waxeye' directory to where you wish to install it.
  46 | 
  47 | 3. Add the 'bin/waxeye' binary to your search path. e.g. If you have `~/bin` in
  48 |    your PATH and installed waxeye to '/usr/local/waxeye' then you might do the
  49 |    following.
  50 | 
  51 | -------------------------------------------------------------------------------
  52 | ln -s /usr/local/waxeye/bin/waxeye ~/bin/
  53 | -------------------------------------------------------------------------------
  54 | 
  55 | ==== Windows ====
  56 | 
  57 | 1. Extract the files of the distribution.
  58 | 
  59 | 2. Copy the 'waxeye' directory to where you wish to install it.
  60 | 
  61 | 
  62 | === Running ===
  63 | 
  64 | Currently, Waxeye is used from a command-line interface. You can use it as a
  65 | command-line tool or, as part of a script or build-system. There are plans to
  66 | develop a graphical tool at a later stage.
  67 | 
  68 | ==== Unix and MacOSX ====
  69 | 
  70 | Run Waxeye by executing the `waxeye` binary.
  71 | 
  72 | ==== Windows ====
  73 | 
  74 | Use a command prompt to run `waxeye.exe`.
  75 | 
  76 | 
  77 | 
  78 | == Basic Concepts ==
  79 | 
  80 | === What is a parser? ===
  81 | 
  82 | When we want to understand data that has been written in a language of interest
  83 | ('L'), we need to break our data into units of the language. This process of
  84 | breaking our input into different parts, based on the structure of 'L', is
  85 | called 'parsing'. A program used for parsing is called a 'parser'.
  86 | 
  87 | 
  88 | === What is the result of a parser? ===
  89 | 
  90 | Once your input has been parsed, you need the result to be presented in a from
  91 | that is easy to understand and manipulate. Since the input was organized based
  92 | on the hierarchical structure of the language, it makes sense that the output
  93 | of the parser mimic this structure. The most effective form to do this with is
  94 | a tree.
  95 | 
  96 | Such a tree is known as an Abstract Syntax Tree (AST). A Waxeye parser will
  97 | automatically give you an AST that represents your input. The structure of this
  98 | AST is based on the structure of your language's grammar.
  99 | 
 100 | 
 101 | === What is a parser generator? ===
 102 | 
 103 | If 'L' is simple, it is easy for us to use our programming lanugage of choice
 104 | to, manually, write a parser for 'L'. However, as the structural complexity of
 105 | 'L' increases, so too, does the size and complexity of the parser program.
 106 | Writing and maintaining a large parser, by hand, can quickly become a tedious
 107 | and laborious job. Thankfully, we can use a parser generator to automate the
 108 | work of creating a parser so we can focus on other problems.
 109 | 
 110 | A parser generator is a tool designed to help software developers automate the
 111 | process of creating a parser. Just like compilers and assemblers, a parser
 112 | generator takes a description of a program, automatically does the boring work
 113 | for you and gives you a transformed program as output. Each tool accepts input
 114 | in one language ('L1'), performs various transformations and creates output in
 115 | another language ('L2').
 116 | 
 117 |  L1 --> Compiler         --> L2
 118 |  L1 --> Assembler        --> L2
 119 |  L1 --> Parser Generator --> L2
 120 | 
 121 | The key difference between the three tools is the level of abstraction held by
 122 | the input and output languages. The assembler works at the lowest level by
 123 | taking assembly files and producing machine code. The compiler works above the
 124 | assembler by taking a more abstract programming language and generating
 125 | assembly files or machine code directly. Finally, the parser generator has the
 126 | highest level of abstraction and transforms a 'grammar file' into programming
 127 | language source code for a compiler to process.
 128 | 
 129 | 
 130 | === What is a grammar file? ===
 131 | 
 132 | We can define a language as the set of strings it contains. While it is
 133 | sometimes possible to specify a language simply by enumerating all of its
 134 | strings, such an approach has significant drawbacks. Trying to write each
 135 | string in our language could be very time consuming and, potentially, take
 136 | forever.
 137 | 
 138 | Suppose we need to read time information as part of a larger program. In a
 139 | trivial case, the time information may be presented as two digits for the
 140 | hours, a colon `:`, and then two digits for the minutes.
 141 | 
 142 | -------------------------------------------------------------------------------
 143 | 00:00, 00:01, 00:02, ... 14:23, 14:24, 14:25, ... 23:57, 23:58, 23:59
 144 | -------------------------------------------------------------------------------
 145 | 
 146 | We could describe our time language this way but, writing all 1,440 possible
 147 | hour/minute combinations wouldn't be much fun. Not to mention how bad things
 148 | would be if we extended our language to include date information.
 149 | 
 150 | As another example, consider the language that consists of all strings of one
 151 | or more alphabet character.
 152 | 
 153 | -------------------------------------------------------------------------------
 154 | a, b, c, ... z, aa, ab, ac, ... az, aaa, aab, aac, ...
 155 | -------------------------------------------------------------------------------
 156 | 
 157 | Even worse than our time example, this language is infinite. It would be
 158 | impossible for us to explicitly list every string in the language.
 159 | 
 160 | If we want to describe such languages, we need a notation that is more abstract
 161 | than simply writing out strings. We call this notation a 'grammar' and the file
 162 | that contains it a 'grammar file'.
 163 | 
 164 | 
 165 | 
 166 | == Waxeye Grammars ==
 167 | 
 168 | To generate a parser for a language, you must supply the parser generator with
 169 | a grammar file that describes the language. Waxeye grammar files are written as
 170 | text documents and are, by convention, given the `.waxeye` file extension.
 171 | 
 172 | A Waxeye grammar consists of a set of rule definitions, called 'non-terminals'.
 173 | Together, the non-terminals succinctly describe the syntax of the language. By
 174 | default, the first non-terminal is considered the starting point of the
 175 | language definition.
 176 | 
 177 | 
 178 | === Non-terminals ===
 179 | 
 180 | Non-terminals are defined in three parts; a name, a rule type and one or more
 181 | grammar expressions.
 182 | 
 183 | The most common non-terminal type is the tree constructing non-terminal. A tree
 184 | constructing non-terminal has the following form:
 185 | 
 186 | *******************************************************************************
 187 | 'Name' `<-` '+expressions'
 188 | *******************************************************************************
 189 | 
 190 | Where 'Name' matches `[a-zA-Z_] *[a-zA-Z0-9_-]`.
 191 | 
 192 | [source,waxeye]
 193 | .A tree constructing non-terminal
 194 | -------------------------------------------------------------------------------
 195 | Example <- A | B
 196 | -------------------------------------------------------------------------------
 197 | 
 198 | 
 199 | The other common non-terminal type is the void non-terminal. The result of a
 200 | void non-terminal is not included in the AST that is constructed by the parser.
 201 | To define a void non-terminal, use this form:
 202 | 
 203 | *******************************************************************************
 204 | 'Name' `<:` '+expressions'
 205 | *******************************************************************************
 206 | 
 207 | [source,waxeye]
 208 | .A void non-terminal
 209 | -------------------------------------------------------------------------------
 210 | Example <: A | B
 211 | -------------------------------------------------------------------------------
 212 | 
 213 | 
 214 | 
 215 | === Expressions ===
 216 | 
 217 | The most important part of each non-terminal definition is the set of
 218 | expressions it contains. Grammar expressions come in different forms and have
 219 | their own meanings. Places where an expression can be contained within another
 220 | expression are denoted with an 'e'.
 221 | 
 222 | 
 223 | ==== Atomic Expressions ====
 224 | 
 225 | ===== Wildcard =====
 226 | `.`
 227 | 
 228 | Matches any character from the input.
 229 | 
 230 | 
 231 | ===== Literal =====
 232 | `'text'`
 233 | 
 234 | Matches `text` in the input.
 235 | 
 236 | 
 237 | ===== Case-insensitive Literal =====
 238 | `"text"`
 239 | 
 240 | Matches `text` in the input while ignores case. This is equivalent to the
 241 | expression `[tT][eE][xX][tT]` but, is much more readable.
 242 | 
 243 | 
 244 | ===== Character Class =====
 245 | `[a-z_-]`
 246 | 
 247 | Character-class that matches either a lower-case English character, `_` or
 248 | `-`.
 249 | 
 250 | 
 251 | ===== Non-terminal =====
 252 | `NT`
 253 | 
 254 | References the non-terminal named `NT`.
 255 | 
 256 | 
 257 | ===== Parentheses =====
 258 | `(`'e'`)`
 259 | 
 260 | Raises the precedence of the expression 'e'.
 261 | 
 262 | 
 263 | ///////////////////////////////////////////////////////////////////////////////
 264 | ===== Context Actions =====
 265 | `@action<a, b>`
 266 | 
 267 | References the context-action `action` and gives the action the data held by
 268 | the labels `a` and `b`. These are used for context-sensitive parsing. Not fully
 269 | implemented yet.
 270 | ///////////////////////////////////////////////////////////////////////////////
 271 | 
 272 | 
 273 | ==== Prefix Expressions ====
 274 | 
 275 | ===== Void =====
 276 | `:`'e'
 277 | 
 278 | Doesn't include the result of 'e' when building the AST.
 279 | 
 280 | 
 281 | ===== Closure =====
 282 | `*`'e'
 283 | 
 284 | Puts 'e' within a closure.
 285 | 
 286 | 
 287 | ===== Plus =====
 288 | `+`'e'
 289 | 
 290 | Puts 'e' within a plus-closure.
 291 | 
 292 | 
 293 | ===== Optional =====
 294 | `?`'e'
 295 | 
 296 | Puts 'e' within an optional.
 297 | 
 298 | 
 299 | ===== Negative Check =====
 300 | `!`'e'
 301 | 
 302 | Checks that 'e' fails.
 303 | 
 304 | 
 305 | ===== Positive Check =====
 306 | `&`'e'
 307 | 
 308 | Checks that 'e' succeeds.
 309 | 
 310 | 
 311 | ///////////////////////////////////////////////////////////////////////////////
 312 | ===== Labels =====
 313 | `a=`'e'
 314 | 
 315 | Labels the expression 'e' with the label `a`. Not fully implemented yet.
 316 | ///////////////////////////////////////////////////////////////////////////////
 317 | 
 318 | 
 319 | ==== Sequence Expressions ====
 320 | 'e1 e2'
 321 | 
 322 | Matches 'e1' and 'e2' in sequence.
 323 | 
 324 | 
 325 | ==== Alternation Expressions ====
 326 | 'e1'`|`'e2'
 327 | 
 328 | Tries to match 'e1' and, if that fails, tries to match 'e2'.
 329 | 
 330 | 
 331 | === Precedence ===
 332 | 
 333 | In Waxeye grammars, some expressions can have other expressions nested within
 334 | them. When we use parentheses, we are explicitly denoting the nesting structure
 335 | of the expressions.
 336 | 
 337 | [source,waxeye]
 338 | -------------------------------------------------------------------------------
 339 | ((?A) B) | C
 340 | -------------------------------------------------------------------------------
 341 | 
 342 | At times, this can seem needlessly verbose. In many cases, we are able to omit
 343 | the parentheses in favor of a shorter notation. We do this by exploiting the
 344 | precedence of each expression type.
 345 | 
 346 | [source,waxeye]
 347 | -------------------------------------------------------------------------------
 348 | ?A B | C
 349 | -------------------------------------------------------------------------------
 350 | 
 351 | The precedence of an expression determines the priority it has when resolving
 352 | implicitly nested expressions. Each expression type has a level of precedence
 353 | relative to all other types. There are four different precedence levels in
 354 | Waxeye grammars.
 355 | 
 356 | 
 357 | ==== Level 4 ====
 358 | 
 359 | The highest precedence is held by the atomic expressions. Because these
 360 | expressions cannot, themselves, contain expressions, there is no need to
 361 | consider which expressions are nested within them.
 362 | 
 363 | 
 364 | ==== Level 3 ====
 365 | 
 366 | The prefix expressions hold the next precedence level. Their nesting is
 367 | resolved directly after the atomic expressions.
 368 | 
 369 | 
 370 | ==== Level 2 ====
 371 | 
 372 | Sequences of expressions are formed once the atomic and prefix expressions have
 373 | been resolved.
 374 | 
 375 | 
 376 | ==== Level 1 ====
 377 | 
 378 | Finally, once all other expressions have been resolved, the different choices of
 379 | the alternation expression are resolved.
 380 | 
 381 | 
 382 | 
 383 | === Pruning Non-terminals ===
 384 | 
 385 | Sometimes, creating a new AST node will give us more information than we need.
 386 | We might want to create a new AST node, only if doing so will tell us something
 387 | interesting about our input. If the additional node gives us nothing of
 388 | interest, our tree could be said to contain 'vertical noise'.
 389 | 
 390 | To make it easier to process the AST, we can remove this vertical noise by
 391 | using the 'pruning' non-terminal type. This non-terminal type has the following
 392 | form:
 393 | 
 394 | *******************************************************************************
 395 | 'Name' `<=` '+expressions'
 396 | *******************************************************************************
 397 | 
 398 | When 'Name' has successfully parsed a string, one of three things will happen,
 399 | depending on the number of results to be included from 'Name''s expressions.
 400 | 
 401 | * If there are no expression results to be included, nothing new will be added
 402 |   to the AST.
 403 | 
 404 | * If there is one expression result to be included, that result will take the
 405 |   place of the 'Name' AST node.
 406 | 
 407 | * Otherwise, a new 'Name' AST node will be created, just like a tree
 408 |   constructing non-terminal.
 409 | 
 410 | 
 411 | To help understand how this works, consider an example from a simple arithmetic
 412 | grammar.
 413 | 
 414 | [source,waxeye]
 415 | -------------------------------------------------------------------------------
 416 | Product <- Number *([*/] Number)
 417 | 
 418 | Number  <- +[0-9]
 419 | -------------------------------------------------------------------------------
 420 | 
 421 | If we use the 'Product' rule to parse the string `3*7`, we get a tree with
 422 | 'Product' at the root and, below that, a 'Number', a `*` character and then
 423 | another 'Number'.
 424 | 
 425 | -------------------------------------------------------------------------------
 426 | Product
 427 | ->  Number
 428 |     |   3
 429 | |   *
 430 | ->  Number
 431 |     |   7
 432 | -------------------------------------------------------------------------------
 433 | 
 434 | However, if the 'Product' rule parses a string with just one 'Number' in it, we
 435 | will get a tree that is slightly bigger than we need. Parsing the string `5`
 436 | produces the following tree. 
 437 | 
 438 | -------------------------------------------------------------------------------
 439 | Product
 440 | ->  Number
 441 |     |   5
 442 | -------------------------------------------------------------------------------
 443 | 
 444 | In this case, having a 'Product' node at the root of the AST isn't necessary.
 445 | If we want to, we can rewrite the original grammar to use a pruning
 446 | non-terminal.
 447 | 
 448 | [source,waxeye]
 449 | -------------------------------------------------------------------------------
 450 | Product <= Number *([*/] Number)
 451 | 
 452 | Number  <- +[0-9]
 453 | -------------------------------------------------------------------------------
 454 | 
 455 | Now, when we use 'Product' to parse `3*7`, we will get the same result as
 456 | before but, when parsing `5`, we get an AST with 'Number' as the root.
 457 | 
 458 | -------------------------------------------------------------------------------
 459 | Number
 460 | |   5
 461 | -------------------------------------------------------------------------------
 462 | 
 463 | 
 464 | As a second example, let's look at a grammar for nested parentheses.
 465 | 
 466 | [source,waxeye]
 467 | -------------------------------------------------------------------------------
 468 | A <- :'(' A :')' | B
 469 | 
 470 | B <- 'b'
 471 | -------------------------------------------------------------------------------
 472 | 
 473 | Here are some example inputs and their resulting ASTs:
 474 | 
 475 | Input: `b`
 476 | 
 477 | -------------------------------------------------------------------------------
 478 | A
 479 | ->  B
 480 |     |   b
 481 | -------------------------------------------------------------------------------
 482 | 
 483 | Input: `(b)`
 484 | 
 485 | -------------------------------------------------------------------------------
 486 | A
 487 | ->  A
 488 |     ->  B
 489 |         |   b
 490 | -------------------------------------------------------------------------------
 491 | 
 492 | Input: `(((b)))`
 493 | 
 494 | -------------------------------------------------------------------------------
 495 | A
 496 | ->  A
 497 |     ->  A
 498 |         ->  A
 499 |             ->  B
 500 |                 |   b
 501 | -------------------------------------------------------------------------------
 502 | 
 503 | Unless we want to know the number of parentheses matched, trees like these
 504 | contain more information than we need. Again, we are able to solve this by
 505 | rewriting the grammar using a 'pruning' non-terminal.
 506 | 
 507 | [source,waxeye]
 508 | -------------------------------------------------------------------------------
 509 | A <= :'(' A :')' | B
 510 | 
 511 | B <- 'b'
 512 | -------------------------------------------------------------------------------
 513 | 
 514 | This time, parsing the input `(((b)))` gives us a much shorter tree.
 515 | 
 516 | -------------------------------------------------------------------------------
 517 | B
 518 | |   b
 519 | -------------------------------------------------------------------------------
 520 | 
 521 | 
 522 | === Comments ===
 523 | 
 524 | There are two types of comments in Waxeye grammars; single-line and multi-line.
 525 | 
 526 | ==== Single-line ====
 527 | 
 528 | Single-line comments start at the first `#` outside of an atomic expression and
 529 | extend until the end of the line.
 530 | 
 531 | [source,waxeye]
 532 | -------------------------------------------------------------------------------
 533 | # This is a single-line comment.
 534 | -------------------------------------------------------------------------------
 535 | 
 536 | 
 537 | ==== Multi-line ====
 538 | 
 539 | Multi-line comments are opened at the first `/*` outside of an atomic
 540 | expression and closed with a `*/`.
 541 | 
 542 | 
 543 | [source,waxeye]
 544 | -------------------------------------------------------------------------------
 545 | /* This is a multi-line comment. */
 546 | -------------------------------------------------------------------------------
 547 | 
 548 | [source,waxeye]
 549 | -------------------------------------------------------------------------------
 550 | /* This is, also,
 551 |    a multi-line comment. */
 552 | -------------------------------------------------------------------------------
 553 | 
 554 | 
 555 | As an added convenience for when editing a grammar, multi-line comments can be
 556 | nested within each other. This is handy when you want to comment out a section
 557 | of the grammar that already contains a comment.
 558 | 
 559 | 
 560 | [source,waxeye]
 561 | -------------------------------------------------------------------------------
 562 | /*
 563 | 
 564 | This is the outer comment.
 565 | 
 566 | A <- 'a'
 567 | 
 568 | /*
 569 |  * This is the inner comment.
 570 |  */
 571 | B <- 'b'
 572 | 
 573 | */
 574 | -------------------------------------------------------------------------------
 575 | 
 576 | 
 577 | 
 578 | 
 579 | == Using Waxeye ==
 580 | 
 581 | This chapter will show you how to setup Waxeye for your programming language.
 582 | It covers language specific installation requirements and presents some basic
 583 | boilerplate code to get you started. You can find copies of this boilerplate
 584 | code in `src/example/`. I use `$WAXEYE_HOME` to refer to the location where you
 585 | have installed the files of the Waxeye distribution.
 586 | 
 587 | The example grammar we'll be using can be found in `grammars/num.waxeye`. You
 588 | may wish to copy it to the directory you're working in so you can experiment
 589 | with extending and modifying the grammar.
 590 | 
 591 | .grammars/num.waxeye
 592 | [source,waxeye]
 593 | -------------------------------------------------------------------------------
 594 | Num <- '0' | [1-9] *[0-9]
 595 | -------------------------------------------------------------------------------
 596 | 
 597 | Once setup and run, the boilerplate example will use the parser you generated
 598 | to parse the string `42` and print the AST it creates.
 599 | 
 600 | -------------------------------------------------------------------------------
 601 | Num
 602 | |   4
 603 | |   2
 604 | -------------------------------------------------------------------------------
 605 | 
 606 | 
 607 | 
 608 | === Using Waxeye from Racket ===
 609 | 
 610 | Waxeye's Racket runtime is compatible with http://racket-lang.org/[Racket].
 611 | 
 612 | ==== Install ====
 613 | 
 614 | Install the waxeye collection where Racket can find it.
 615 | 
 616 | -------------------------------------------------------------------------------
 617 | # Install the Waxeye collection; change to your install paths as needed
 618 | sudo ln -s /usr/local/waxeye/src/racket/waxeye /usr/local/racket/lib/racket/collects/
 619 | -------------------------------------------------------------------------------
 620 | 
 621 | ==== Generate Parser ====
 622 | 
 623 | -------------------------------------------------------------------------------
 624 | waxeye -g racket . num.waxeye
 625 | -------------------------------------------------------------------------------
 626 | 
 627 | ==== Use Parser ====
 628 | 
 629 | .src/example/racket/example.rkt
 630 | [source,scheme]
 631 | -------------------------------------------------------------------------------
 632 | #lang racket
 633 | 
 634 | (require "parser.rkt")
 635 | 
 636 | ;; Parse our input
 637 | (let ((ast (parser "42")))
 638 |   ;; Print our AST
 639 |   (display-ast ast))
 640 | -------------------------------------------------------------------------------
 641 | 
 642 | ==== Run from Racket ====
 643 | 
 644 | -------------------------------------------------------------------------------
 645 | racket -t example.rkt
 646 | -------------------------------------------------------------------------------
 647 | 
 648 | 
 649 | 
 650 | == Using ASTs and Parse Errors ==
 651 | 
 652 | Since just printing an Abstract Syntax Tree isn't very interesting, let's have
 653 | a look at how to access the information the ASTs contain.
 654 | 
 655 | When you use a Waxeye parser, the result will be one of two things. If the
 656 | parser successfully parsed the input, the result will be an AST. If the input
 657 | doesn't match the syntax of the language, the result will be a 'parse error'.
 658 | 
 659 | 
 660 | === ASTs ===
 661 | 
 662 | ASTs come in three different forms; 'tree', 'char' and 'empty'.
 663 | 
 664 | * A 'tree' AST contains a type, a list of children and, the start and end
 665 |   position in the input.
 666 | 
 667 | * A 'char' AST contains a single character and has no children.
 668 | 
 669 | * An 'empty' AST simply signifies that parsing was successful. If your starting
 670 |   non-terminal is voided or is pruning and had no children, you will get an
 671 |   empty AST.
 672 | 
 673 | 
 674 | ==== Using an AST node as string ====
 675 | 
 676 | If a given AST node will only ever have 'char' children, you may wish to treat
 677 | that node as a single string.
 678 | 
 679 | 
 680 | ===== From Racket =====
 681 | 
 682 | [source,scheme]
 683 | -------------------------------------------------------------------------------
 684 | (display (list->string (ast-c ast)))
 685 | (newline)
 686 | -------------------------------------------------------------------------------
 687 | 
 688 | 
 689 | 
 690 | === Parse Errors ===
 691 | 
 692 | A parse error contains information about where the input is invalid and hints
 693 | about what is wrong with it.
 694 | 
 695 | 
 696 | 
 697 | === Determining the result type ===
 698 | 
 699 | 
 700 | ==== From Racket ====
 701 | 
 702 | [source,scheme]
 703 | -------------------------------------------------------------------------------
 704 | (cond
 705 |   ((ast? result) "tree ast")
 706 |   ((parse-error? result) "error")
 707 |   (else "empty ast"))
 708 | -------------------------------------------------------------------------------
 709 | 
 710 | 
 711 | 
 712 | 
 713 | == Example: A Calculator ==
 714 | 
 715 | Now that we know how to write grammars, generate parsers and manipulate AST, we
 716 | can put these skills together to build a small language interpreter. In this
 717 | chapter, we create a command-line calculator.
 718 | 
 719 | Our calculator reads a line of input, parses it as an arithmetic expression and
 720 | computes the result. The arithmetic language supports the following constructs.
 721 | 
 722 | * floating point numbers
 723 | * binary operators +,-,*,/
 724 | * unary negation
 725 | * parentheses
 726 | 
 727 | 
 728 | .grammars/calc.waxeye
 729 | [source,waxeye]
 730 | -------------------------------------------------------------------------------
 731 | calc  <- ws sum
 732 | 
 733 | sum   <- prod *([+-] ws prod)
 734 | 
 735 | prod  <- unary *([*/] ws unary)
 736 | 
 737 | unary <= '-' ws unary
 738 |        | :'(' ws sum :')' ws
 739 |        | num
 740 | 
 741 | num   <- +[0-9] ?('.' +[0-9]) ws
 742 | 
 743 | ws    <: *[ \t\n\r]
 744 | -------------------------------------------------------------------------------
 745 | 
 746 | 
 747 | === Calculator in Racket ===
 748 | 
 749 | .src/example/racket/calculator.rkt
 750 | [source,scheme]
 751 | -------------------------------------------------------------------------------
 752 | #lang racket
 753 | 
 754 | (require "parser.rkt")
 755 | 
 756 | ;; A commandline arithmetic calculator.
 757 | 
 758 | (define (calc input)
 759 |   (let ((ast (parser input)))
 760 |     (if (ast? ast)
 761 |         (begin (display (sum (car (ast-c ast))))
 762 |                (newline))
 763 |         (display-parse-error ast))))
 764 | 
 765 | 
 766 | (define (bin-op ast fn ch op1 op2)
 767 |   (let* ((chil (list->vector (ast-c ast)))
 768 |          (val (fn (vector-ref chil 0))))
 769 |     (let loop ((i 1))
 770 |       (unless (= i (vector-length chil))
 771 |               ;; Increment val by the operator applied to val and the operand
 772 |               (set! val ((if (equal? (vector-ref chil i) ch) op1 op2)
 773 |                          val (fn (vector-ref chil (+ i 1)))))
 774 |               (loop (+ i 2))))
 775 |     val))
 776 | 
 777 | 
 778 | (define (sum ast)
 779 |   (bin-op ast prod #\+ + -))
 780 | 
 781 | 
 782 | (define (prod ast)
 783 |   (bin-op ast unary #\* * /))
 784 | 
 785 | 
 786 | (define (unary ast)
 787 |   (case (ast-t ast)
 788 |     ((unary) (- (unary (cadr (ast-c ast)))))
 789 |     ((sum) (sum ast))
 790 |     (else (num ast))))
 791 | 
 792 | 
 793 | (define (num ast)
 794 |   (string->number (list->string (ast-c ast))))
 795 | 
 796 | 
 797 | (define (rl)
 798 |   (display "calc> ")
 799 |   (read-line (current-input-port)))
 800 | 
 801 | 
 802 | (let loop ((input (rl)))
 803 |   (if (eof-object? input)
 804 |       (newline)
 805 |       (begin (calc input)
 806 |              (loop (rl)))))
 807 | -------------------------------------------------------------------------------
 808 | 
 809 | 
 810 | 
 811 | 
 812 | ///////////////////////////////////////////////////////////////////////////////
 813 | == A Short Example ==
 814 | 
 815 | This chapter will introduce you to the basic work-flow used with Waxeye. In the
 816 | process, we will iteratively develop the grammar of a simple real-world
 817 | language.
 818 | 
 819 | 
 820 | == Using the Interpreter ==
 821 | todo
 822 | 
 823 | == Extended Example ==
 824 | todo
 825 | ///////////////////////////////////////////////////////////////////////////////
 826 | 
 827 | 
 828 | 
 829 | == Grammar Testing ==
 830 | 
 831 | .test/grammars/waxeye.rkt
 832 | [source,scheme]
 833 | -------------------------------------------------------------------------------
 834 | ;; These are tests for the 'Grammar' non-terminal
 835 | (Grammar ; <- This is the non-terminal's name
 836 | 
 837 |  ;; Following the name are pairs of input string and expected output. The
 838 |  ;; output is either the keyword 'pass', the keyword 'fail' or an AST. The AST
 839 |  ;; specifies the structure of the expected tree, the names of the nodes and
 840 |  ;; the individual characters. If you don't want to specify the whole tree,
 841 |  ;; just use the wild-card symbol '*' for the portion of the tree you want to
 842 |  ;; skip.
 843 | 
 844 |  "" ; <- This is the input
 845 |  (Grammar) ; <- This is the expected output
 846 | 
 847 |  "A <- 'a'"
 848 |  pass ; <- The keyword 'pass'
 849 | 
 850 |  "A"
 851 |  fail ; <- The keyword 'fail'
 852 | 
 853 |  "A <- 'a' B <- 'b'"
 854 |  (Grammar (Definition (Identifier #\A) *)  ; <- Here we skip some of
 855 |           (Definition (Identifier #\B) *)) ;    Definition's children
 856 | 
 857 |  "A <- 'a'"
 858 |  (Grammar (*)) ; <- Here we specify a child tree of any type
 859 | 
 860 |  "A <- [a-z] *[a-z0-9]"
 861 |  (Grammar (Definition (Identifier #\A) (LeftArrow) (Alternation *)))
 862 | 
 863 |  "A <- 'a'"
 864 |  (Grammar (Definition (Identifier #\A)
 865 |             (LeftArrow) (Alternation (Sequence (Unit (Literal (LChar #\a)))))))
 866 |  )
 867 | -------------------------------------------------------------------------------
 868 | 
 869 | 
 870 | 
 871 | == Modular Grammars ==
 872 | 
 873 | It is sometimes desirable to have a grammar split across multiple files and to
 874 | have a final grammar built from those files. We can do this by using a
 875 | modular grammar.
 876 | 
 877 | Having our grammar split in this way provides us with the opportunity to
 878 | manipulate the definition of the non-terminals and, in the process, create new
 879 | languages. Depending on how we compose our final grammar, we can create vastly
 880 | different languages from the same base grammars and only need to change the one
 881 | modular grammar.
 882 | 
 883 | One of the biggest advantages of modular grammars is that they make it very
 884 | easy to embed one language within another. Many languages can be thought of in
 885 | this way. Prime examples are when a programming language is embedded within XML
 886 | or HTML. Or, going the other way, you could embed a data language like SQL
 887 | within a programming language.
 888 | 
 889 | There are also cases when a language's syntax changes subtly over time. We
 890 | want to have parsers for each version of the language but without duplicating
 891 | large parts of our grammars.
 892 | 
 893 | 
 894 | === Grammar Composition ===
 895 | 
 896 | A modular grammar is made up of expressions that pull together non-modular
 897 | grammars. Some modular expressions can have other expressions nested within
 898 | them. An expression is one of the following:
 899 | 
 900 | * '"grammar.waxeye"' +
 901 |   A path to a '.waxeye' file. This path should either be relative to the
 902 |   modular grammar or be an absolute path.
 903 | 
 904 | * '(rename modular-exp (old-name . new-name) ...)' +
 905 |   Renames the specified non-terminals with their new names.
 906 | 
 907 | * '(only modular-exp non-term ...)' +
 908 |   Includes only the listed non-terminals.
 909 | 
 910 | * '(all-except modular-exp non-term ...)' +
 911 |   Includes all non-terminals except those listed.
 912 | 
 913 | * '(prefix prefix modular-exp)' +
 914 |    Prefixes the names of non-terminals from 'modular-exp'.
 915 | 
 916 | * '(prefix-only prefix modular-exp non-term ...)' +
 917 |   Prefixes only the listed non-terminals.
 918 | 
 919 | * '(prefix-all-except prefix modular-exp non-term ...)' +
 920 |   Prefixes all non-terminals except those listed.
 921 | 
 922 | * '(join modular-exp ...)' +
 923 |   Combines the results of multiple modular expressions into a single
 924 |   expression. Not needed at the top-level.
 925 | 
 926 | 
 927 | .grammars/modular/mod.rkt
 928 | [source,scheme]
 929 | -------------------------------------------------------------------------------
 930 | ;; A contrived example where we replace the definition of Number in Json with a
 931 | ;; much simpler one that only supports integers.
 932 | 
 933 | (all-except "../json.waxeye" Number)
 934 | 
 935 | (rename (only "../num.waxeye" Num) (Num . Number))
 936 | -------------------------------------------------------------------------------
 937 | 
 938 | 
 939 | 
 940 | == Waxeye Options ==
 941 | 
 942 | -------------------------------------------------------------------------------
 943 | waxeye [ <option> ... ] <grammar>
 944 |  where <option> is one of
 945 |  Waxeye modes:
 946 | / -g <language> <dir> : Generate
 947 | | -i : Interpret
 948 | \ -t <test> : Test
 949 |  Grammar options:
 950 |   -m : Modular Grammar - default: false
 951 |   -s <start> : Starting non-terminal - default: first non-terminal
 952 |  Parser options:
 953 |   -c <comment> : Header comment for generated files - default: none
 954 |   -e <eof> : Check parser consumes all input - default: true
 955 |   -n <namespace> : Module or package namespace - default: none
 956 |   -p <prefix> : Name prefix for generated files - default: none
 957 |  Misc options:
 958 |   --debug : Activates debug information
 959 |   --version : Prints version number and copyright notice
 960 |   --help, -h : Show this help
 961 |   -- : Do not treat any remaining argument as a switch (at this level)
 962 |  /|\ Brackets indicate mutually exclusive options.
 963 |  Multiple single-letter switches can be combined after one `-'; for
 964 |   example: `-h-' is the same as `-h --'
 965 | -------------------------------------------------------------------------------
 966 | 
 967 | === Waxeye Modes ===
 968 | 
 969 | -------------------------------------------------------------------------------
 970 | grammar
 971 | -------------------------------------------------------------------------------
 972 | 
 973 | The grammar file describing the language you want to parse. It is the last
 974 | argument given to Waxeye and is required by all of Waxeye's operating modes.
 975 | 
 976 | 
 977 | ==== Generate ====
 978 | 
 979 | -------------------------------------------------------------------------------
 980 | -g <language> <dir>
 981 | -------------------------------------------------------------------------------
 982 | 
 983 | Creates a parser written in the specified programming language. Writes the
 984 | parser's files to the specified directory.
 985 | 
 986 | Currently supported programming languages:
 987 | 
 988 | * racket
 989 | 
 990 | -------------------------------------------------------------------------------
 991 | waxeye -g racket . grammar.waxeye
 992 | -------------------------------------------------------------------------------
 993 | 
 994 | 
 995 | ==== Interpret ====
 996 | 
 997 | -------------------------------------------------------------------------------
 998 | -i
 999 | -------------------------------------------------------------------------------
1000 | 
1001 | Parses input as a string from the language defined by the grammar. Displays the
1002 | resulting AST or parse error.
1003 | 
1004 | -------------------------------------------------------------------------------
1005 | waxeye -i grammar.waxeye < input.txt
1006 | -------------------------------------------------------------------------------
1007 | 
1008 | 
1009 | ==== Test ====
1010 | 
1011 | -------------------------------------------------------------------------------
1012 | -t <test>
1013 | -------------------------------------------------------------------------------
1014 | 
1015 | Runs the tests in the specified test file for the language defined by the
1016 | grammar. Displays any test errors.
1017 | 
1018 | -------------------------------------------------------------------------------
1019 | waxeye -t tests.rkt grammar.waxeye
1020 | -------------------------------------------------------------------------------
1021 | 
1022 | 
1023 | === Grammar Options ===
1024 | 
1025 | -------------------------------------------------------------------------------
1026 | -m
1027 | -------------------------------------------------------------------------------
1028 | 
1029 | Indicates that the grammar is a modular grammar.
1030 | 
1031 | -------------------------------------------------------------------------------
1032 | -s <start>
1033 | -------------------------------------------------------------------------------
1034 | 
1035 | Specifies the non-terminal that starts the language. Default - The first
1036 | non-terminal in the grammar.
1037 | 
1038 | 
1039 | === Parser Options ===
1040 | 
1041 | -------------------------------------------------------------------------------
1042 | -c <comment>
1043 | -------------------------------------------------------------------------------
1044 | 
1045 | The file to be used as the header comment of generated files. Default - none.
1046 | 
1047 | 
1048 | -------------------------------------------------------------------------------
1049 | -e <eof>
1050 | -------------------------------------------------------------------------------
1051 | 
1052 | Whether to check that the parser consumes all input. Default - true.
1053 | 
1054 | 
1055 | -------------------------------------------------------------------------------
1056 | -n <namespace>
1057 | -------------------------------------------------------------------------------
1058 | 
1059 | The module or package namespace. Default - none.
1060 | 
1061 | 
1062 | -------------------------------------------------------------------------------
1063 | -p <prefix>
1064 | -------------------------------------------------------------------------------
1065 | 
1066 | The name prefix for any generated files. Default - none.
1067 | 
1068 | 
1069 | === Misc Options ===
1070 | 
1071 | -------------------------------------------------------------------------------
1072 | --debug
1073 | -------------------------------------------------------------------------------
1074 | 
1075 | Activates debug information.
1076 | 
1077 | 
1078 | -------------------------------------------------------------------------------
1079 | --version
1080 | -------------------------------------------------------------------------------
1081 | 
1082 | Prints the version number and copyright notice.
1083 | 
1084 | 
1085 | -------------------------------------------------------------------------------
1086 | --help, -h
1087 | -------------------------------------------------------------------------------
1088 | 
1089 | Prints a message describing the available command-line options.
1090 | 
1091 | 
1092 | ///////////////////////////////////////////////////////////////////////////////
1093 | == Grammar Cookbook ==
1094 | 
1095 | This chapter gives you recipes for some of the common situations faced when
1096 | writing grammars.
1097 | 
1098 | === Removing Implicit Information ===
1099 | 
1100 | e.g.
1101 | 
1102 | Assignment <- Variable '=' Expression
1103 | 
1104 | Change to:
1105 | Assignment <- Variable :'=' Expression
1106 | 
1107 | 
1108 | === Whitespace ===
1109 | 
1110 | TODO
1111 | 
1112 | === Comments ===
1113 | 
1114 | * Single-line comments
1115 | * Multi-line comments
1116 | * Nested comments
1117 | 
1118 | === End of Line Encoding ===
1119 | 
1120 | TODO
1121 | 
1122 | === Quoted Strings ===
1123 | 
1124 | TODO
1125 | 
1126 | === Delimited Lists ===
1127 | 
1128 | TODO
1129 | 
1130 | === Optionally Delimited Expressions ===
1131 | 
1132 | TODO
1133 | 
1134 | === Escape Sequences ===
1135 | 
1136 | TODO
1137 | 
1138 | === Nested Expressions ===
1139 | 
1140 | TODO
1141 | 
1142 | === Arithmetic Expressions ===
1143 | 
1144 | TODO
1145 | 
1146 | === Case Insensitive Keywords ===
1147 | 
1148 | TODO
1149 | 
1150 | === Keywords as Identifiers ===
1151 | 
1152 | TODO
1153 | 
1154 | === Embedded Languages ===
1155 | 
1156 | TODO
1157 | ///////////////////////////////////////////////////////////////////////////////
1158 | 
1159 | Copyright (C) 2008-2021 Orlando Hill
1160 | 
1161 | This work is licensed under a
1162 | http://creativecommons.org/licenses/by-nc-sa/4.0/[Creative Commons
1163 | Attribution-NonCommercial-ShareAlike 4.0 International License].
1164 | 


--------------------------------------------------------------------------------
/docs/book/scheme.lang:
--------------------------------------------------------------------------------
 1 | comment delim "#\|" "\|#" multiline nested
 2 | comment start ";"
 3 | 
 4 | environment string delim "\"" "\"" begin
 5 |   specialchar = '\\.'
 6 | end
 7 | 
 8 | type = '#t|#f|#\\[[:alnum:]]+' nonsensitive
 9 | 
10 | vardef words = '(define|define-syntax|syntax-case|syntax-rules|lambda|let|let\*|letrec|let-syntax|letrec-syntax|if|else|cond|case|and|not|or|begin|for-each|map|delay|do|dynamic-wind|call/cc|call-with-current-continuation|call-with-input-file|call-with-output-file)'
11 | 
12 | # Match keywords as long as they aren't just a prefix of an identifier
13 | keyword = $words + '(?![[:alnum:]!$%&*+./:<=>?@^_~-])'
14 | 
15 | # Make any identifier that doen't start with a keyword be normal
16 | normal = '(?!' + $words +')' + '[[:alpha:]!$%&*+./:<=>?@^_~-][[:alnum:]!$%&*+./:<=>?@^_~-]*'
17 | 
18 | # Highlight the named let expressions
19 | (cbracket,keyword,normal,function) = `(\()(let)([[:blank:]]+)([[:alpha:]!$%&*+./:<=>?@^_~-][[:alnum:]!$%&*+./:<=>?@^_~-]*)`
20 | 
21 | # Highlight the names of defined forms
22 | (cbracket,keyword,normal,function) = `(\()(define(?:-syntax)?)([[:blank:]]+)([[:alpha:]!$%&*+./:<=>?@^_~-][[:alnum:]!$%&*+./:<=>?@^_~-]*)`
23 | (cbracket,keyword,normal,cbracket,function) = `(\()(define)([[:blank:]]+)(\()([[:alpha:]!$%&*+./:<=>?@^_~-][[:alnum:]!$%&*+./:<=>?@^_~-]*)`
24 | 
25 | number = '(\#(x|o|b|X|O|B))?[+-]?((0x[[:xdigit:]]+)|(([[:digit:]]*\.)?[[:digit:]]+([eE][+-]?[[:digit:]]+)?))'
26 | 
27 | cbracket = "(|)"
28 | 


--------------------------------------------------------------------------------
/docs/book/waxeye.lang:
--------------------------------------------------------------------------------
 1 | include "c_string.lang"
 2 | 
 3 | comment start "#"
 4 | 
 5 | comment delim "/*" "*/" multiline nested
 6 | 
 7 | environment symbol delim "[" "]" begin
 8 |   specialchar = '\\.'
 9 | end
10 | 
11 | cbracket = "(|)"
12 | 
13 | keyword = '[[:alpha:]][[:alnum:]_-]*[[:blank:]]*(?=<[:=-])'
14 | 
15 | function = '<[:=-]'
16 | 
17 | preproc = '[!&*+?|]'
18 | 


--------------------------------------------------------------------------------
/grammars/calc.waxeye:
--------------------------------------------------------------------------------
 1 | # A grammar for an arithmetic calculator.
 2 | # Supports +,-,*,/, negation, parentheses and floating point numbers.
 3 | 
 4 | calc  <- ws sum
 5 | 
 6 | sum   <- prod *([+-] ws prod)
 7 | 
 8 | prod  <- unary *([*/] ws unary)
 9 | 
10 | unary <= '-' ws unary
11 |        | :'(' ws sum :')' ws
12 |        | num
13 | 
14 | num   <- +[0-9] ?('.' +[0-9]) ws
15 | 
16 | ws    <: *[ \t\n\r]
17 | 


--------------------------------------------------------------------------------
/grammars/json.waxeye:
--------------------------------------------------------------------------------
 1 | # The JSON data format
 2 | 
 3 | Json    <- Ws Value
 4 | 
 5 | 
 6 | Value   <- ( Object
 7 |            | Array
 8 |            | Number
 9 |            | String
10 |            | Literal)
11 |            Ws
12 | 
13 | 
14 | Object  <- :'{' Ws
15 |            ?( Member *(Com Member))
16 |            :'}'
17 | 
18 | 
19 | Member  <- String Ws Col Value
20 | 
21 | 
22 | Array   <- :'[' Ws
23 |            ?( Value *(Com Value))
24 |            :']'
25 | 
26 | 
27 | Number  <- ?'-'
28 |            ('0' | [1-9] *[0-9])
29 |            ?('.' +[0-9])
30 |            ?([eE] ?[+-] +[0-9])
31 | 
32 | 
33 | String  <- :'"'
34 |            *( :'\\' Escaped
35 |             | !'\\' !'"' . )
36 |            :'"'
37 | 
38 | 
39 | Escaped <- 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
40 |          | ["/\\bfnrt]
41 | 
42 | 
43 | Literal <- 'true'
44 |          | 'false'
45 |          | 'null'
46 | 
47 | 
48 | Ws      <: *[ \t\n\r]
49 | 
50 | 
51 | Com     <: ',' Ws
52 | 
53 | 
54 | Col     <: ':' Ws
55 | 


--------------------------------------------------------------------------------
/grammars/modular/mod.rkt:
--------------------------------------------------------------------------------
1 | ;; A contrived example where we replace the definition of Number in Json with a
2 | ;; much simpler one that only supports integers.
3 | 
4 | (all-except "../json.waxeye" Number)
5 | 
6 | (rename (only "../num.waxeye" Num) (Num . Number))
7 | 


--------------------------------------------------------------------------------
/grammars/num.waxeye:
--------------------------------------------------------------------------------
1 | # Non-negative integers
2 | 
3 | Num <- '0' | [1-9] *[0-9]
4 | 


--------------------------------------------------------------------------------
/grammars/regexp.waxeye:
--------------------------------------------------------------------------------
 1 | # A simple regular expression language.
 2 | 
 3 | # alternation has lowest precedence
 4 | Regexp    <- Concat *(:'|' Concat)
 5 | 
 6 | 
 7 | # then concatenation
 8 | Concat    <- +Rep
 9 | 
10 | 
11 | # then repitition
12 | Rep       <- Unit ?[*+?]
13 | 
14 | 
15 | # then parentheses
16 | Unit    <- :'(' Regexp :')'
17 |            | CharClass
18 |            | Wild
19 |            | Start
20 |            | End
21 |            | Literal
22 | 
23 | 
24 | Literal   <- '\\' Escaped
25 |            | '\\' !Escaped .
26 |            | !Escaped .
27 | 
28 | 
29 | Escaped   <- [*?+.^$\\|()[\]]
30 | 
31 | 
32 | CharClass <- :'[' +(!']' Range) :']'
33 | 
34 | 
35 | Range     <- Char ?(:'-' Char)
36 | 
37 | 
38 | Char      <- '\\' [\-\]\\]
39 |            | !'\\' .
40 | 
41 | 
42 | Wild      <- :'.'
43 | 
44 | 
45 | Start     <- :'^'
46 | 
47 | 
48 | End       <- :'$'
49 | 


--------------------------------------------------------------------------------
/grammars/templ.waxeye:
--------------------------------------------------------------------------------
1 | # A grammar for code generation templates.
2 | 
3 | template    <- *(code | string)
4 | 
5 | code        <- :'${' *(:'\\' '}' | !'}' !'\\}' .) :'}'
6 | 
7 | string      <- +(:'\\' '$' | !code .)
8 | 


--------------------------------------------------------------------------------
/grammars/waxeye.waxeye:
--------------------------------------------------------------------------------
 1 | # The Waxeye grammar language.
 2 | 
 3 | Grammar     <- Ws *Definition
 4 | 
 5 | 
 6 | Definition  <- Identifier (LeftArrow | PruneArrow | VoidArrow) Alternation Ws
 7 | 
 8 | 
 9 | Alternation <- Sequence *(Alt Sequence)
10 | 
11 | 
12 | Sequence    <- +Unit
13 | 
14 | 
15 | Unit        <- *(Prefix | Label)
16 |                ( Identifier !(LeftArrow | PruneArrow | VoidArrow)
17 |                | Open Alternation Close
18 |                | Action
19 |                | Literal
20 |                | CaseLiteral
21 |                | CharClass
22 |                | WildCard )
23 | 
24 | 
25 | Prefix      <- [?*+:&!] Ws
26 | 
27 | 
28 | Label       <- Identifier Ws :'=' Ws
29 | 
30 | 
31 | Action      <- :'@' Identifier ?(:'<' Ws Identifier *(Comma Identifier) :'>') Ws
32 | 
33 | 
34 | Identifier  <- [a-zA-Z_] *[a-zA-Z0-9_-] Ws
35 | 
36 | 
37 | Literal     <- :['] +(!['] (LChar | Hex)) :['] Ws
38 | 
39 | 
40 | CaseLiteral <- :["] +(!["] (LChar | Hex)) :["] Ws
41 | 
42 | 
43 | LChar       <- '\\' [nrt'"\\] | !'\\' !EndOfLine .
44 | 
45 | 
46 | CharClass   <- :'[' *(!']' Range) :']' Ws
47 | 
48 | 
49 | Range       <- (Char | Hex) ?(:'-' (Char | Hex))
50 | 
51 | 
52 | Char        <- '\\' [nrt\-\]\\] | !'\\' !']' !EndOfLine .
53 | 
54 | 
55 | Hex         <- :'\\<' [0-9A-Fa-f] [0-9A-Fa-f] :'>'
56 | 
57 | 
58 | WildCard    <- :'.' Ws
59 | 
60 | 
61 | LeftArrow   <- :'<-' Ws
62 | 
63 | 
64 | PruneArrow  <- :'<=' Ws
65 | 
66 | 
67 | VoidArrow   <- :'<:' Ws
68 | 
69 | 
70 | #################
71 | # Always voided #
72 | #################
73 | 
74 | Alt         <: '|' Ws
75 | 
76 | 
77 | Open        <: '(' Ws
78 | 
79 | 
80 | Close       <: ')' Ws
81 | 
82 | 
83 | Comma       <: ',' Ws
84 | 
85 | 
86 | SComment    <: '#' *(!EndOfLine .) (EndOfLine | !.)
87 | 
88 | 
89 | MComment    <: '/*' *(MComment | !'*/' . ) '*/'
90 | 
91 | 
92 | EndOfLine   <: '\r\n' | '\n' | '\r'
93 | 
94 | 
95 | Ws          <: *([ \t] | EndOfLine | SComment | MComment)
96 | 


--------------------------------------------------------------------------------
/src/example/racket/calculator.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket
 2 | 
 3 | (require "parser.rkt")
 4 | 
 5 | ;; A commandline arithmetic calculator.
 6 | 
 7 | (define (calc input)
 8 |   (let ((ast (parser input)))
 9 |     (if (ast? ast)
10 |         (begin (display (sum (car (ast-c ast))))
11 |                (newline))
12 |         (display-parse-error ast))))
13 | 
14 | 
15 | (define (bin-op ast fn ch op1 op2)
16 |   (let* ((chil (list->vector (ast-c ast)))
17 |          (val (fn (vector-ref chil 0))))
18 |     (let loop ((i 1))
19 |       (unless (= i (vector-length chil))
20 |               ;; Increment val by the operator applied to val and the operand
21 |               (set! val ((if (equal? (vector-ref chil i) ch) op1 op2)
22 |                          val (fn (vector-ref chil (+ i 1)))))
23 |               (loop (+ i 2))))
24 |     val))
25 | 
26 | 
27 | (define (sum ast)
28 |   (bin-op ast prod #\+ + -))
29 | 
30 | 
31 | (define (prod ast)
32 |   (bin-op ast unary #\* * /))
33 | 
34 | 
35 | (define (unary ast)
36 |   (case (ast-t ast)
37 |     ((unary) (- (unary (cadr (ast-c ast)))))
38 |     ((sum) (sum ast))
39 |     (else (num ast))))
40 | 
41 | 
42 | (define (num ast)
43 |   (string->number (list->string (ast-c ast))))
44 | 
45 | 
46 | (define (rl)
47 |   (display "calc> ")
48 |   (read-line (current-input-port)))
49 | 
50 | 
51 | (let loop ((input (rl)))
52 |   (if (eof-object? input)
53 |       (newline)
54 |       (begin (calc input)
55 |              (loop (rl)))))
56 | 


--------------------------------------------------------------------------------
/src/example/racket/example.rkt:
--------------------------------------------------------------------------------
1 | #lang racket
2 | 
3 | (require "parser.rkt")
4 | 
5 | ;; Parse our input
6 | (let ((ast (parser "42")))
7 |   ;; Print our AST
8 |   (display-ast ast))
9 | 


--------------------------------------------------------------------------------
/src/racket/waxeye/ast.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require (only-in racket/list remove-duplicates)
 4 |          (only-in racket/string string-join))
 5 | 
 6 | (provide (all-defined-out))
 7 | 
 8 | 
 9 | ;; ast
10 | ;;
11 | ;; t = The type of the ast as a symbol
12 | ;; c = The list of the ast's children as nested asts or characters
13 | ;; p = The position of the ast in the original string,
14 | ;;     a pair of start and end indexes
15 | (struct ast (t c p) #:mutable)
16 | 
17 | (struct parse-error (pos line col expected received snippet))
18 | 
19 | 
20 | (define (ast->string ast)
21 |   (let ((indent-level 0) (o (open-output-string)))
22 |     (define (display-a c)
23 |       (when (> indent-level 0)
24 |             (display "->  " o))
25 |       (display (ast-t c) o)
26 |       (set! indent-level (+ indent-level 1))
27 |       (for-each (lambda (a)
28 |                   (newline o)
29 |                   (display-iter a))
30 |                 (ast-c c))
31 |       (set! indent-level (- indent-level 1)))
32 |     (define (display-c c)
33 |       (when (> indent-level 0)
34 |             (display "|   " o))
35 |       (display c o))
36 |     (define (display-iter ast)
37 |       (when (or (char? ast) (ast? ast))
38 |             (let loop ((i 1))
39 |               (when (< i indent-level)
40 |                     (display "    " o)
41 |                     (loop (+ i 1))))
42 |             (if (char? ast)
43 |                 (display-c ast)
44 |                 (display-a ast))))
45 |     (display-iter ast)
46 |     (get-output-string o)))
47 | 
48 | 
49 | (define (display-ast ast)
50 |   (displayln
51 |    (cond
52 |     ((ast? ast) (ast->string ast))
53 |     ((parse-error? ast) (parse-error->string ast))
54 |     (else ast))))
55 | 
56 | 
57 | (define (ast->string-sexpr ast)
58 |   (let ((o (open-output-string)))
59 |     (define (display-iter ast)
60 |       (display "(" o)
61 |       (display (ast-t ast) o)
62 |       (for-each (lambda (a)
63 |                   (display " " o)
64 |                   (if (ast? a)
65 |                       (display-iter a)
66 |                       (display a o)))
67 |                 (ast-c ast))
68 |       (display ")" o))
69 |     (display-iter ast)
70 |     (get-output-string o)))
71 | 
72 | 
73 | (define (parse-error->string error)
74 |   (define (comma-seperate l)
75 |     (string-join (map symbol->string l) ", "))
76 |   (define (expected nts)
77 |     (let ((len (length nts)))
78 |       (if (= len 0)
79 |           "<end of input>"
80 |           (string-append "[" (comma-seperate nts) "]"))))
81 |   (string-append
82 |    (number->string (parse-error-line error))
83 |    ":"
84 |    (number->string (parse-error-col error))
85 |    " expected: "
86 |    (expected (remove-duplicates (parse-error-expected error)))
87 |    " received: "
88 |    (parse-error-received error)
89 |    "\n"
90 |    (parse-error-snippet error)))
91 | 
92 | 
93 | (define (display-parse-error error)
94 |   (displayln (parse-error->string error)))
95 | 


--------------------------------------------------------------------------------
/src/racket/waxeye/fa.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide (all-defined-out))
 4 | 
 5 | 
 6 | ;; t - The transition cost
 7 | ;; s - The state to transition to
 8 | ;; v - If the result of the cost should be included in the tree
 9 | (struct edge (t s v) #:mutable)
10 | 
11 | (struct state (edges match) #:mutable)
12 | 
13 | ;; type - string if Non-Terminal
14 | ;; states - a vector of states
15 | ;; mode - the automaton mode
16 | (struct fa (type states mode) #:mutable)
17 | 


--------------------------------------------------------------------------------
/src/racket/waxeye/parser.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require waxeye/ast
  4 |          waxeye/fa
  5 |          waxeye/set)
  6 | 
  7 | (provide make-parser)
  8 | 
  9 | 
 10 | (struct cache-item (val pos line col cr))
 11 | 
 12 | 
 13 | (define (make-parser start eof-check automata)
 14 |   (lambda (input)
 15 |     (let* ((input-len (string-length input))
 16 |            (input-pos 0)
 17 |            (line 1)
 18 |            (column 0)
 19 |            (last-cr #f)
 20 |            (error-pos 0)
 21 |            (error-line 1)
 22 |            (error-col 0)
 23 |            (error-expected '())
 24 |            (fa-stack '())
 25 |            (cache (make-hash)))
 26 | 
 27 |       (define (match-automaton index)
 28 |         (let* ((key (cons index input-pos)) (value (hash-ref cache key #f)))
 29 |           (if value
 30 |               (begin
 31 |                 (restore-pos (cache-item-pos value) (cache-item-line value) (cache-item-col value) (cache-item-cr value))
 32 |                 (cache-item-val value))
 33 |               (let* ((automaton (vector-ref automata index))
 34 |                      (type (fa-type automaton))
 35 |                      (states (fa-states automaton))
 36 |                      (automaton-mode (fa-mode automaton)))
 37 |                 ;; Push to the fa-stack
 38 |                 (set! fa-stack (cons (cons automaton #f) fa-stack))
 39 |                 (let ((v (let ((start-pos input-pos)
 40 |                                (start-line line)
 41 |                                (start-col column)
 42 |                                (start-cr last-cr)
 43 |                                (res (match-state (vector-ref states 0))))
 44 |                            (cond
 45 |                             ((equal? type '&)
 46 |                              (restore-pos
 47 |                                start-pos
 48 |                                start-line
 49 |                                start-col
 50 |                                start-cr)
 51 |                              (not (not res)))
 52 |                             ((equal? type '!)
 53 |                              (restore-pos
 54 |                                start-pos
 55 |                                start-line
 56 |                                start-col
 57 |                                start-cr)
 58 |                              (if res
 59 |                                  (update-error)
 60 |                                  #t))
 61 |                             (else
 62 |                              (if res
 63 |                                  (case automaton-mode
 64 |                                    ((voidArrow)
 65 |                                     #t)
 66 |                                    ((pruneArrow)
 67 |                                     (cond
 68 |                                      ((null? res)
 69 |                                       #t)
 70 |                                      ((null? (cdr res))
 71 |                                       (car res))
 72 |                                      (else
 73 |                                       (ast
 74 |                                         type
 75 |                                         res
 76 |                                         (cons start-pos input-pos)))))
 77 |                                    ((leftArrow)
 78 |                                     (ast
 79 |                                       type
 80 |                                       res
 81 |                                       (cons start-pos input-pos)))
 82 |                                    (else
 83 |                                      (error 'waxeye "Unknown automaton mode")))
 84 |                                  ;; Don't need to restore here since we already did
 85 |                                  (update-error)))))))
 86 |                   ;; Pop from the fa-stack
 87 |                   (set! fa-stack (cdr fa-stack))
 88 |                   (hash-set!
 89 |                     cache
 90 |                     key
 91 |                     (cache-item v input-pos line column last-cr))
 92 |                   v)))))
 93 | 
 94 |       (define (match-state state)
 95 |         (let ((res (match-edges (state-edges state))))
 96 |           (if res
 97 |               res
 98 |               (and (state-match state) '()))))
 99 | 
100 |       (define (match-edges edges)
101 |         (if (null? edges)
102 |             #f
103 |             (let ((res (match-edge (car edges))))
104 |               (if res
105 |                   res
106 |                   (match-edges (cdr edges))))))
107 | 
108 |       ;; If the transition was made
109 |       (define (match-edge e)
110 |         (let* ((start-pos input-pos)
111 |                (start-line line)
112 |                (start-col column)
113 |                (start-cr last-cr)
114 |                (t (edge-t e))
115 |                (res (cond
116 |                      ;; If we have a wild card expression
117 |                      ((equal? 'wild t) (if (< input-pos input-len)
118 |                                            (mv)
119 |                                            (record-error)))
120 |                      ;; If we have a character match
121 |                      ((char? t) (if (and (< input-pos input-len) (equal? (string-ref input input-pos) t))
122 |                                     (mv)
123 |                                     (record-error)))
124 |                      ;; If we have a character class
125 |                      ((pair? t) (if (and (< input-pos input-len) (within-set? t (string-ref input input-pos)))
126 |                                     (mv)
127 |                                     (record-error)))
128 |                      ;; If we have a reference to another automata
129 |                      ((integer? t) (match-automaton t))
130 |                      (else #f))))
131 |           ;; If we are able to transition to the next state
132 |           (if res
133 |               ;; Move to next state
134 |               (let ((tran-res (match-state (vector-ref (fa-states (caar fa-stack)) (edge-s e)))))
135 |                 (if tran-res
136 |                     (if (or (edge-v e) (equal? res #t))
137 |                         tran-res
138 |                         (cons res tran-res))
139 |                     (begin
140 |                       (restore-pos start-pos start-line start-col start-cr)
141 |                       #f)))
142 |               #f)))
143 | 
144 |       (define (mv)
145 |         (let ((ch (string-ref input input-pos)))
146 |           (set! input-pos (+ input-pos 1))
147 |           (if (char=? ch #\return)
148 |               (begin
149 |                 (set! line (+ line 1))
150 |                 (set! column 0)
151 |                 (set! last-cr #t))
152 |               (begin
153 |                 (if (char=? ch #\linefeed)
154 |                     (unless last-cr
155 |                             (set! line (+ line 1))
156 |                             (set! column 0))
157 |                     (set! column (+ column 1)))
158 |                 (set! last-cr #f)))
159 |           ch))
160 | 
161 |       (define (restore-pos p l c cr)
162 |         (set! input-pos p)
163 |         (set! line l)
164 |         (set! column c)
165 |         (set! last-cr cr))
166 | 
167 |       (define (record-error)
168 |         ;; did we find a deeper error
169 |         (when (< error-pos input-pos)
170 |               (set! error-pos input-pos)
171 |               (set! error-line line)
172 |               (set! error-col column)
173 |               (set! error-expected '()))
174 |         ;; record the name of the non-terminal for errors of same or greater depth
175 |         (when (<= error-pos input-pos)
176 |               (set! fa-stack (cons (cons (caar fa-stack) #t) (cdr fa-stack))))
177 |         #f)
178 | 
179 |       (define (update-error)
180 |         (when (cdar fa-stack) ;; when there was a reported error
181 |               (set! error-expected (cons (fa-type (caar fa-stack)) error-expected)))
182 |         #f)
183 | 
184 |       (define (do-eof-check res)
185 |         (if res
186 |             (if (and eof-check (< input-pos input-len))
187 |                 ;; Create a parse error - Not all input consumed
188 |                 (parse-error
189 |                   error-pos
190 |                   error-line
191 |                   error-col
192 |                   error-expected
193 |                   (received)
194 |                   (snippet))
195 |                 res)
196 |             ;; Create a parse error
197 |             (parse-error
198 |               error-pos
199 |               error-line
200 |               error-col
201 |               error-expected
202 |               (received)
203 |               (snippet))))
204 | 
205 |       (define (received)
206 |         (if (= error-pos input-len)
207 |             "<end of input>"
208 |             (substring input error-pos (+ error-pos 1))))
209 | 
210 |       (define (snippet)
211 |         (define snippet-length-max 80)
212 |         (define (line-finder index-test index-move)
213 |           (let loop ((i error-pos) (j 0))
214 |             (if (and (index-test i)
215 |                      (< j snippet-length-max)
216 |                      (let ((ch (string-ref input (index-move i))))
217 |                        (not (or (char=? ch #\newline)
218 |                                 (char=? ch #\return)))))
219 |                 (loop (index-move i) (+ j 1))
220 |                 i)))
221 |         (define (find-line-start)
222 |           (line-finder (lambda (i) (> i 0)) sub1))
223 |         (define (find-line-end)
224 |           (line-finder (lambda (i) (< i (- input-len 1))) add1))
225 |         (define (build-snippet start end)
226 |           (string-append (substring input start (min end input-len)) "\n" (make-string (- error-pos start) #\space) "^"))
227 |         (let ((line-start (find-line-start))
228 |               (line-end (find-line-end)))
229 |           (if (< (- line-end line-start) snippet-length-max)
230 |               (build-snippet line-start (+ line-end 1))
231 |               (let* ((s-len (- error-pos line-start))
232 |                      (e-len (- (+ line-end 1) error-pos))
233 |                      (half-max (/ snippet-length-max 2))
234 |                      (s (min s-len half-max))
235 |                      (e (min e-len half-max))
236 |                      (ss (+ s (- half-max e)))
237 |                      (ee (+ e (- half-max s))))
238 |                 (build-snippet (- error-pos ss) (+ error-pos ee))))))
239 | 
240 |       (do-eof-check (match-automaton start)))))
241 | 


--------------------------------------------------------------------------------
/src/racket/waxeye/set.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide within-set?)
 4 | 
 5 | 
 6 | ;; Is 'b' within set 'a'?
 7 | ;;
 8 | ;; The elements of the set are assumed to be in ascending order.
 9 | (define (within-set? a b)
10 |   (if (null? a)
11 |       #f
12 |       (let ((aa (car a)))
13 |         (if (char? aa)
14 |             (if (char=? aa b)
15 |                 #t
16 |                 (if (char<? aa b)
17 |                     (within-set? (cdr a) b)
18 |                     #f))
19 |             (if (char-within-range? (car aa) (cdr aa) b)
20 |                 #t
21 |                 (if (char<? (cdr aa) b)
22 |                     (within-set? (cdr a) b)
23 |                     #f))))))
24 | 
25 | ;; Is the character within the inclusive character range?
26 | (define (char-within-range? start end char)
27 |   (if (and (char<=? start char) (char<=? char end))
28 |       #t
29 |       #f))
30 | 
31 | 
32 | (module+ test
33 |   (require rackunit)
34 | 
35 |   (check-true (within-set? '(#\a) #\a))
36 |   (check-true (within-set? '(#\a #\b) #\a))
37 |   (check-true (within-set? '(#\a #\b) #\b))
38 |   (check-true (within-set? '(#\a #\b #\c) #\a))
39 |   (check-true (within-set? '(#\a #\b #\c) #\b))
40 |   (check-true (within-set? '(#\a #\b #\c) #\c))
41 | 
42 |   (check-false (within-set? '() #\a))
43 |   (check-false (within-set? '() #\space))
44 | 
45 |   (check-false (within-set? '(#\a) #\space))
46 |   (check-false (within-set? '(#\a) #\b))
47 |   (check-false (within-set? '(#\a #\b) #\d))
48 |   (check-false (within-set? '(#\a #\b) #\E))
49 |   (check-false (within-set? '(#\space #\a #\b) #\tab))
50 |   (check-false (within-set? '(#\a #\A #\e #\E) #\æ))
51 |   (check-false (within-set? '(#\e #\E #\o #\O) #\ø))
52 |   (check-false (within-set? '(#\e #\E #\o #\O) #\ø))
53 | 
54 |   ;; test that set is assumed to be in ascending order
55 |   (check-false (within-set? '(#\c #\a #\b) #\a))
56 |   (check-false (within-set? '(#\c #\a #\b) #\b))
57 |   (check-false (within-set? '(#\a #\c #\b) #\b))
58 | 
59 |   (check-true (within-set? '((#\a . #\a)) #\a))
60 |   (check-true (within-set? '((#\a . #\z)) #\a))
61 |   (check-true (within-set? '((#\A . #\Z) (#\a . #\z)) #\c))
62 |   (check-true (within-set? '((#\A . #\Z) (#\a . #\z)) #\C))
63 |   (check-true (within-set? '((#\0 . #\9) (#\A . #\Z) (#\a . #\z)) #\e))
64 |   (check-true (within-set? '((#\0 . #\9) (#\A . #\Z) (#\a . #\z)) #\F))
65 |   (check-true (within-set? '((#\0 . #\9) (#\A . #\Z) (#\a . #\z)) #\7))
66 |   (check-true (within-set? '((#\d . #\g)) #\g))
67 |   (check-true (within-set? '((#\d . #\h)) #\g))
68 |   (check-true (within-set? '(#\a (#\b . #\z)) #\a))
69 |   (check-true (within-set? '(#\a (#\c . #\z)) #\a))
70 |   (check-true (within-set? '(#\a (#\b . #\z)) #\b))
71 |   (check-true (within-set? '(#\a (#\c . #\z)) #\c))
72 |   (check-true (within-set? '(#\a (#\c . #\z)) #\z))
73 |   (check-true (within-set? '(#\a (#\c . #\z)) #\x))
74 |   (check-true (within-set? '(#\a #\b #\c (#\w . #\y)) #\x))
75 |   (check-true (within-set? '(#\a (#\b . #\h) #\c (#\w . #\y)) #\d))
76 | 
77 |   (check-false (within-set? '((#\a . #\a)) #\b))
78 |   (check-false (within-set? '((#\d . #\g)) #\h))
79 |   (check-false (within-set? '((#\d . #\h)) #\b))
80 |   (check-false (within-set? '((#\a . #\z)) #\A))
81 |   (check-false (within-set? '(#\a (#\d . #\z)) #\c))
82 |   (check-false (within-set? '(#\c (#\D . #\Z)) #\C))
83 |   (check-false (within-set? '((#\0 . #\9) (#\A . #\Z)) #\e))
84 |   (check-false (within-set? '((#\A . #\z)) #\4))
85 |   (check-false (within-set? '((#\0 . #\5) (#\6 . #\Z)) #\t))
86 |   (check-false (within-set? '(#\a #\b #\c (#\w . #\y)) #\r))
87 |   (check-false (within-set? '(#\a (#\b . #\h) #\c (#\w . #\y)) #\j))
88 | 
89 |   ;; test that set is assumed to be in ascending order
90 |   (check-false (within-set? '((#\a . #\z) (#\A . #\Z)) #\A))
91 |   (check-false (within-set? '((#\a . #\z) (#\A . #\Z)) #\F))
92 |   (check-false (within-set? '((#\a . #\z) #\A) #\A))
93 |   (check-false (within-set? '((#\A . #\Z) (#\0 . #\9) (#\a . #\z)) #\1))
94 |   (check-false (within-set? '((#\j . #\m) (#\a . #\c)) #\a))
95 |   (check-false (within-set? '(#\a (#\w . #\y) #\b #\c) #\b))
96 |   (check-false (within-set? '((#\b . #\h) #\a #\c (#\w . #\y)) #\a)))
97 | 


--------------------------------------------------------------------------------
/src/waxeye/action.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require waxeye/ast
 4 |          "gen.rkt")
 5 | 
 6 | (provide (all-defined-out))
 7 | 
 8 | 
 9 | (define *action-list* '())
10 | 
11 | (define (collect-actions grammar)
12 |   (define (visit-action exp)
13 |     (set! *action-list* (cons exp *action-list*)))
14 | 
15 |   (define (visit-alternation exp)
16 |     (for-each visit-sequence (ast-c exp)))
17 | 
18 |   (define (visit-sequence exp)
19 |     (for-each visit-unit (ast-c exp)))
20 | 
21 |   (define (visit-unit exp)
22 |     (let* ((el (ast-c exp)) (el-len (length el)))
23 |       (visit-exp (list-ref el (- el-len 1)))))
24 | 
25 |   (define (visit-exp exp)
26 |     (let ((type (ast-t exp)))
27 |       (case type
28 |        ((action) (visit-action exp))
29 |        ((alternation) (visit-alternation exp))
30 |        ((sequence) (visit-sequence exp))
31 |        ((unit) (visit-unit exp)))))
32 | 
33 |   (define (get-def-actions def)
34 |     (visit-alternation (caddr (ast-c def))))
35 | 
36 |   (for-each get-def-actions (get-defs grammar))
37 |   (set! *action-list* (reverse *action-list*)))
38 | 


--------------------------------------------------------------------------------
/src/waxeye/code.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require (only-in "util.rkt" string-concat)
  4 |          "version.rkt")
  5 | 
  6 | (provide (all-defined-out))
  7 | 
  8 | 
  9 | (define *default-header*
 10 |   (list (string-append "Generated by the Waxeye Parser Generator - version " *version*)
 11 |         "www.waxeye.org"))
 12 | 
 13 | (define *indent-unit* 4)
 14 | (define *indent-level* 0)
 15 | 
 16 | (define (indent-unit! val)
 17 |   (set! *indent-unit* val))
 18 | 
 19 | (define (dump-string s path)
 20 |   (call-with-output-file path (lambda (p)
 21 |                                 (display s p)) 'replace))
 22 | 
 23 | 
 24 | ;; Constructs the indentation string
 25 | (define (ind)
 26 |   (let ((il '()))
 27 |     (let loop ((i 0))
 28 |       (when (< i *indent-level*)
 29 |             (let loop ((j 0))
 30 |               (when (< j *indent-unit*)
 31 |                     (set! il (cons #\space il))
 32 |                     (loop (+ j 1))))
 33 |             (loop (+ i 1))))
 34 |     (list->string il)))
 35 | 
 36 | 
 37 | (define (indent+ n)
 38 |   (set! *indent-level* (+ *indent-level* n)))
 39 | 
 40 | 
 41 | (define (indent- n)
 42 |   (set! *indent-level* (- *indent-level* n)))
 43 | 
 44 | 
 45 | (define-syntax indent
 46 |   (syntax-rules ()
 47 |     ((_ a) (indentn 1 a))))
 48 | 
 49 | 
 50 | (define-syntax indentn
 51 |   (syntax-rules ()
 52 |     ((_ num a) (begin
 53 |                  (indent+ num)
 54 |                  (let ((val a))
 55 |                    (indent- num)
 56 |                    val)))))
 57 | 
 58 | 
 59 | (define (bool->s b)
 60 |   (if b "true" "false"))
 61 | 
 62 | 
 63 | (define (comment-bookend top unit bot lines)
 64 |   (string-append (ind) top "\n" (comment-base unit lines) (ind) bot "\n"))
 65 | 
 66 | 
 67 | (define (comment-base unit lines)
 68 |   (string-concat (map (lambda (a)
 69 |                         (if (equal? "" a)
 70 |                             (format "~a~a\n" (ind) unit)
 71 |                             (format "~a~a ~a\n" (ind) unit a)))
 72 |                       lines)))
 73 | 
 74 | 
 75 | (define (script-comment lines)
 76 |   (comment-base "#" lines))
 77 | 
 78 | 
 79 | (define (camel-case-lower s)
 80 |   (let ((sl (string->list s)))
 81 |     (if (null? sl)
 82 |         ""
 83 |         (list->string (cons (char-downcase (car sl)) (cdr sl))))))
 84 | 
 85 | 
 86 | (define (camel-case-upper s)
 87 |   (let ((sl (string->list s)))
 88 |     (if (null? sl)
 89 |         ""
 90 |         (list->string (cons (char-upcase (car sl)) (cdr sl))))))
 91 | 
 92 | 
 93 | (define (string->upper s)
 94 |   (list->string (map char-upcase (string->list s))))
 95 | 
 96 | 
 97 | (define (escape-for-java-char? ch)
 98 |   (or (equal? ch #\\) (equal? ch #\')))
 99 | 
100 | 
101 | (define (escape-java-string s)
102 |   (define (escape-java-string-iter sl)
103 |     (if (null? sl)
104 |         '()
105 |         (if (equal? (car sl) #\")
106 |             (cons #\\ (cons #\" (escape-java-string-iter (cdr sl))))
107 |             (cons (car sl) (escape-java-string-iter (cdr sl))))))
108 | 
109 |   (list->string (escape-java-string-iter (string->list s))))
110 | 


--------------------------------------------------------------------------------
/src/waxeye/debug.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide debug
 4 |          *debug*
 5 |          debug!)
 6 | 
 7 | 
 8 | (define *debug* #f)
 9 | 
10 | (define (debug! v)
11 |   (set! *debug* v))
12 | 
13 | (define-syntax debug
14 |   (syntax-rules ()
15 |     ((_ a ...)
16 |      (when *debug*
17 |            a ...))))
18 | 


--------------------------------------------------------------------------------
/src/waxeye/dfa.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require waxeye/ast
  4 |          waxeye/fa
  5 |          waxeye/set
  6 |          "debug.rkt"
  7 |          "gen.rkt"
  8 |          "nfa.rkt"
  9 |          "set.rkt"
 10 |          "util.rkt")
 11 | 
 12 | (provide make-automata)
 13 | 
 14 | 
 15 | (define (make-automata grammar)
 16 |   (reset-nfa-builder)
 17 |   (let ((dfas (map make-dfa (ast-c grammar)))
 18 |         (unwind-dfas (map make-unwind-dfa unwinds))
 19 |         (nt-table (make-hash))
 20 |         (i 0))
 21 |     ;; Replace each non-term string reference in the state with the
 22 |     ;; non-term's index
 23 |     (define (nt-edges s)
 24 |       (set-state-edges!
 25 |         s
 26 |         (map
 27 |           (lambda (a)
 28 |             ;; If the transition is a non-terminal or unwind
 29 |             (if (or (string? (edge-t a)) (integer? (edge-t a)))
 30 |               ;; Create edge using the new index
 31 |               (edge (hash-ref nt-table (edge-t a)) (edge-s a) (edge-v a))
 32 |               ;; Otherwise, leave as it is
 33 |               a))
 34 |           (state-edges s))))
 35 |     (for-each (lambda (a) (hash-set! nt-table (car a) i) (set! i (+ i 1))) dfas) ;; Hash the nt indexes against their names
 36 |     ;; Hash the unwind indexes against their old index
 37 |     (let loop ((u-dfas unwind-dfas) (j 0))
 38 |       (when (not (null? u-dfas))
 39 |             (hash-set! nt-table j i)
 40 |             (set! i (+ i 1))
 41 |             (loop (cdr u-dfas) (+ j 1))))
 42 |     (set! dfas (append dfas unwind-dfas))
 43 |     (for-each (lambda (a) (for-each nt-edges (cadr a))) dfas) ;; Replace the non-term names with indexes
 44 |     (list->vector (map (lambda (a)
 45 |                          (fa (if (string? (car a))
 46 |                                  (string->symbol (car a))
 47 |                                  (car a))
 48 |                              (list->vector (cadr a)) (caddr a)))
 49 |                        dfas))))
 50 | 
 51 | 
 52 | (define (make-unwind-dfa unwind-nfa)
 53 |   (let ((type (car unwind-nfa))
 54 |         (nfa (cdr unwind-nfa)))
 55 |     (debug
 56 |      (display-ln type " NFA:")
 57 |      (display-states (vector->list nfa))
 58 |      (newline))
 59 |     (let ((dfa (nfa->dfa nfa)))
 60 |       (debug
 61 |        (display-ln type " DFA:")
 62 |        (display-states dfa)
 63 |        (newline))
 64 |       (list type dfa 'voidArrow))))
 65 | 
 66 | 
 67 | ;; Creates an automaton from a non-term definition
 68 | ;; Returns a list of the non-term's name followed by the states of the automaton
 69 | (define (make-dfa nt)
 70 |   (let ((nfa (make-nfa nt)))
 71 |     (debug
 72 |      (display-ln "NFA:")
 73 |      (display-states (vector->list nfa))
 74 |      (newline))
 75 |     (let ((dfa (nfa->dfa nfa)))
 76 |       (debug
 77 |        (display-ln "DFA:")
 78 |        (display-states dfa)
 79 |        (newline))
 80 |       (list (get-non-term nt) dfa (get-arrow nt)))))
 81 | 
 82 | 
 83 | (define (nfa->dfa nfa)
 84 |   (let ((state-table (make-hash))
 85 |         (state-list '())
 86 |         (state-count 0))
 87 | 
 88 |     ;; Returns a list of states reachable by 'e' moves
 89 |     ;; This includes the starting state
 90 |     (define (e-closure state-index)
 91 |       (define (e-closure-rec state-index e-table)
 92 |         (let ((hv (hash-ref e-table state-index #f)))
 93 |           (if hv
 94 |               hv
 95 |               (let ((l (list state-index)))
 96 |                 (hash-set! e-table state-index '())
 97 |                 (for-each (lambda (a)
 98 |                             (when (and (equal? (edge-t a) 'e) (not (member (edge-s a) l)))
 99 |                                   (set! l (append l (e-closure-rec (edge-s a) e-table)))))
100 |                           (state-edges (vector-ref nfa state-index)))
101 |                 (hash-set! e-table state-index l)
102 |                 l))))
103 |       (e-closure-rec state-index (make-hash)))
104 | 
105 |     (define (make-dfa-edges state-set)
106 |       (map (lambda (a)
107 |              (edge
108 |                (edge-t a)
109 |                (get-dfa-state (list-concat (map e-closure (edge-s a))))
110 |                (edge-v a)))
111 |            (group-edges (compact-edges (get-edges nfa state-set)))))
112 | 
113 |     (define (get-dfa-state state-set)
114 |       (let ((state-num (hash-ref state-table state-set #f)))
115 |         (if state-num
116 |             state-num
117 |             (begin
118 |               (hash-set! state-table state-set state-count)
119 |               (set! state-num state-count)
120 |               (set! state-count (+ state-count 1))
121 |               (let ((new-state (state
122 |                                  #f
123 |                                  ;; Is our state-set an end state?
124 |                                  (not (not (memf
125 |                                              (lambda (a)
126 |                                                (state-match (vector-ref nfa a)))
127 |                                              state-set))))))
128 |                 ;; Ensure prefix traversal
129 |                 (set! state-list (cons new-state state-list))
130 | 
131 |                 (set-state-edges! new-state (make-dfa-edges state-set)))
132 |               state-num))))
133 | 
134 |     (get-dfa-state (e-closure 0))
135 |     (reverse state-list)))
136 | 
137 | 
138 | ;; Group adjacent edges that have the same transition
139 | (define (group-edges edge-list)
140 |   (if (null? edge-list)
141 |       '()
142 |       (let ((cur-edge (car edge-list)) (rest (group-edges (cdr edge-list))))
143 |         (if (null? rest)
144 |             (list (edge (edge-t cur-edge) (list (edge-s cur-edge)) (edge-v cur-edge)))
145 |             (let ((next-edge (car rest)))
146 |               ;; If the transition is the same
147 |               (if (and (equal? (edge-t cur-edge) (edge-t next-edge))
148 |                        (equal? (edge-v cur-edge) (edge-v next-edge)))
149 |                   ;; Merge the edges
150 |                   (cons (edge (edge-t cur-edge) (cons (edge-s cur-edge) (edge-s next-edge)) (edge-v cur-edge)) (cdr rest))
151 |                   (cons (edge (edge-t cur-edge) (list (edge-s cur-edge)) (edge-v cur-edge)) rest)))))))
152 | 
153 | 
154 | ;; Remove duplicate edges and edges with transitions that are subsets of others
155 | (define (compact-edges edges)
156 |   (if (null? edges)
157 |       '()
158 |       (let* ((e (car edges)) (et (edge-t e)) (es (edge-s e)) (ev (edge-v e)))
159 |         (cons e (compact-edges (filter (lambda (a)
160 |                                          (let ((t (edge-t a)) (s (edge-s a)) (v (edge-v a)))
161 |                                            (cond
162 |                                             ((not (and (equal? s es) (equal? v ev))) #t)
163 |                                             ((equal? t et) #f)
164 |                                             ((and (list? et) (list? t) (subset? et t)) #f)
165 |                                             ((and (list? et) (char? t) (within-set? et t)) #f)
166 |                                             (else #t))))
167 |                                        (cdr edges)))))))
168 | 
169 | 
170 | ;; Get the edges of each state in the state set
171 | ;; If an edge's transition is 'e', get the edges from the state that edge points to
172 | ;; Does that to maintain correct ordering
173 | ;; Avoids getting edges from a state twice
174 | (define (get-edges state-vector state-set)
175 |   (let ((ht (make-hash)) (l '()))
176 |     (define (get-edges-rec s)
177 |       ;; if we haven't got the edges of this state
178 |       (unless (hash-ref ht s #f)
179 |               (hash-set! ht s #t)
180 |               (for-each (lambda (e)
181 |                           (if (equal? (edge-t e) 'e)
182 |                               (get-edges-rec (edge-s e))
183 |                               (set! l (cons e l))))
184 |                         (state-edges (vector-ref state-vector s)))))
185 |     (for-each get-edges-rec state-set)
186 |     (reverse l)))
187 | 
188 | 
189 | (define (display-states state-vector)
190 |   (for-each (lambda (a)
191 |               (display "(")
192 |               (for-each (lambda (b)
193 |                           (display "(")
194 |                           (print (edge-t b))
195 |                           (display " ")
196 |                           (print (edge-s b))
197 |                           (display " ")
198 |                           (print (edge-v b))
199 |                           (display ") "))
200 |                         (state-edges a))
201 |               (display-ln ") " (state-match a)))
202 |             state-vector))
203 | 


--------------------------------------------------------------------------------
/src/waxeye/dot.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require "util.rkt")
 4 | 
 5 | 
 6 | (define (display-dot name state)
 7 |   (let ((visited (make-hash)))
 8 |     (hash-set! visited state "match")
 9 |     (display-ln "digraph " name " {")
10 |     (display-state visited state)
11 |     (display-ln "\"match\" [ label = \"match\" ];")
12 |     (display-ln "}")))
13 | 
14 | 
15 | (define (display-state visited state)
16 |   (define (get-state-name table state)
17 |     (let ((val (hash-ref table state #f)))
18 |       (if val
19 |           val
20 |           (begin
21 |             (let ((v2 (gensym)))
22 |               (hash-set! table state v2)
23 |               v2)))))
24 |   (unless (state-match state)
25 |           (display-ln "\"" (get-state-name visited state) "\""
26 |                       "[ label = \"\" ];")
27 |           (for-each (lambda (a)
28 |                       (display-ln "\"" (get-state-name visited state) "\""
29 |                                   "->"
30 |                                   "\"" (get-state-name visited (cdr a)) "\""
31 |                                   "[ label = \"" (car a) "\" ];")
32 |                       (display-state visited (cdr a)))
33 |                     (state-edges state))))
34 | 


--------------------------------------------------------------------------------
/src/waxeye/expand.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require waxeye/ast
  4 |          "gen.rkt")
  5 | 
  6 | (provide (all-defined-out))
  7 | 
  8 | 
  9 | (define (expand-grammar grammar)
 10 |   (define (lift-only-sub-exp visitor exp)
 11 |     (let ((chil (ast-c exp)))
 12 |       (for-each visitor chil)
 13 |       (when (= (length chil) 1) ; When we only have the one exp
 14 |             (let ((only (car chil))); Lift that to become our new expression
 15 |               (set-ast-t! exp (ast-t only))
 16 |               (set-ast-c! exp (ast-c only))
 17 |               (set-ast-p! exp (ast-p only))))))
 18 | 
 19 |   (define (visit-alternation exp)
 20 |     (lift-only-sub-exp visit-sequence exp))
 21 | 
 22 |   (define (visit-sequence exp)
 23 |     (set-ast-c! exp (map expand-unit (ast-c exp)))
 24 |     (lift-only-sub-exp visit-exp exp))
 25 | 
 26 |   (define (visit-only-child exp)
 27 |     (visit-exp (car (ast-c exp))))
 28 | 
 29 |   (define (visit-exp exp)
 30 |     (let ((type (ast-t exp)))
 31 |       (case type
 32 |        ((action) (void))
 33 |        ((alternation) (visit-alternation exp))
 34 |        ((and) (visit-only-child exp))
 35 |        ((caseLiteral) (visit-case-literal exp))
 36 |        ((charClass) (visit-char-class exp))
 37 |        ((closure) (visit-only-child exp))
 38 |        ((identifier) (void))
 39 |        ((label) (void))
 40 |        ((literal) (visit-literal exp))
 41 |        ((not) (visit-only-child exp))
 42 |        ((optional) (visit-only-child exp))
 43 |        ((plus) (visit-only-child exp))
 44 |        ((sequence) (visit-sequence exp))
 45 |        ((void) (visit-only-child exp))
 46 |        ((wildCard) (void))
 47 |        (else (error 'expand-grammar "unknown expression type: ~s" type)))))
 48 | 
 49 |   (define (expand-def def)
 50 |     (visit-alternation (caddr (ast-c def))))
 51 |   (for-each expand-def (get-defs grammar)))
 52 | 
 53 | 
 54 | (define (expand-unit exp)
 55 |   (define (make-prefix v e)
 56 |     (let ((r (car (ast-c v))))
 57 |       (ast
 58 |        (cond
 59 |         ((equal? r #\*) 'closure)
 60 |         ((equal? r #\+) 'plus)
 61 |         ((equal? r #\?) 'optional)
 62 |         ((equal? r #\:) 'void)
 63 |         ((equal? r #\&) 'and)
 64 |         ((equal? r #\!) 'not)
 65 |         (else (error 'make-prefix "unknown expression type: ~s" r)))
 66 |        (list e)
 67 |        (cons 0 0))))
 68 | 
 69 |   (define (make-label v e)
 70 |     (let ((r (car (ast-c v))))
 71 |       (ast 'label (list e) (cons 0 0))))
 72 | 
 73 |   (define (expand-unit-iter el)
 74 |     (let ((rest (cdr el)))
 75 |       (if (null? rest)
 76 |           (car el)
 77 |           (let ((type (ast-t (car el))))
 78 |             ((case type
 79 |               ((prefix) make-prefix)
 80 |               ((label) make-label)
 81 |               (else (error 'expand-unit-iter "unknown expression type: ~s" type)))
 82 |              (car el)
 83 |              (expand-unit-iter rest))))))
 84 |   (expand-unit-iter (ast-c exp)))
 85 | 
 86 | 
 87 | (define (visit-case-literal exp)
 88 |   (define (cc-chil c)
 89 |     (if (char-alphabetic? c)
 90 |         (list (char-upcase c) (char-downcase c))
 91 |         (list c)))
 92 |   (convert-chars! exp)
 93 |   (let ((letters (ast-c exp)))
 94 |     (if (memf char-alphabetic? letters)
 95 |         (if (null? (cdr letters))
 96 |             (let ((c (car letters)))
 97 |               (set-ast-t! exp 'charClass)
 98 |               (set-ast-c! exp (cc-chil c)))
 99 |             (begin
100 |               (set-ast-t! exp 'sequence)
101 |               (set-ast-c! exp (map (lambda (a)
102 |                                  (ast 'charClass (cc-chil a) (cons 0 0)))
103 |                                letters))))
104 |         (set-ast-t! exp 'literal))))
105 | 
106 | 
107 | (define (convert-char c)
108 |   (define (cc-char c)
109 |     (let ((chil (ast-c c)))
110 |       (if (= (length chil) 1)
111 |           (car chil)
112 |           (let ((s (cadr chil)))
113 |             (cond
114 |              ((equal? s #\n) #\linefeed)
115 |              ((equal? s #\t) #\tab)
116 |              ((equal? s #\r) #\return)
117 |              (else s))))))
118 |   (define (cc-hex c)
119 |     (integer->char (string->number (list->string (ast-c c)) 16)))
120 |   (if (equal? (ast-t c) 'hex)
121 |       (cc-hex c)
122 |       (cc-char c)))
123 | 
124 | 
125 | (define (convert-chars! exp)
126 |   (set-ast-c! exp (map convert-char (ast-c exp))))
127 | 
128 | 
129 | (define (visit-literal exp)
130 |   (convert-chars! exp))
131 | 
132 | 
133 | (define (visit-char-class exp)
134 |   (define (cc-part part)
135 |     (let ((range (ast-c part)))
136 |       (if (= (length range) 1)
137 |           (convert-char (car range))
138 |           (let ((r1 (convert-char (car range))) (r2 (convert-char (cadr range))))
139 |             (cond
140 |              ((char=? r1 r2) r1)
141 |              ((char<? r1 r2) (cons r1 r2))
142 |              (else
143 |               (cons r2 r1)))))))
144 | 
145 |   ;; The order of ranges with the same start doesn't matter as they get
146 |   ;; merged no matter what their ends are.
147 |   (define (cc-less-than? a b)
148 |     (char<? (if (char? a)
149 |                 a
150 |                 (car a))
151 |             (if (char? b)
152 |                 b
153 |                 (car b))))
154 | 
155 |   (define (minimise cc)
156 |     (define (next-to? a b)
157 |       (= (- (char->integer b) (char->integer a)) 1))
158 |     (if (null? cc)
159 |         '()
160 |         (let ((a (car cc)) (rest (cdr cc)))
161 |           (if (null? rest)
162 |               cc
163 |               (let ((b (car rest)))
164 |                 (if (char? a)
165 |                     (if (char? b)
166 |                         (if (char=? a b) ; Is duplicate char?
167 |                             (minimise (cons a (cdr rest)))
168 |                             (if (next-to? a b) ; Is a next to b?
169 |                                 (minimise (cons (cons a b) (cdr rest)))
170 |                                 (cons a (minimise rest))))
171 |                         (if (next-to? a (car b)) ; Is a next to range b?
172 |                             (minimise (cons (cons a (cdr b)) (cdr rest)))
173 |                             (cons a (minimise rest))))
174 |                     (if (char? b)
175 |                         (if (or (char=? b (car a)) ; Is b within range a?
176 |                                 (char<=? b (cdr a)))
177 |                             (minimise (cons a (cdr rest)))
178 |                             (if (next-to? (cdr a) b) ; Is b next to range a?
179 |                                 (minimise (cons (cons (car a) b) (cdr rest)))
180 |                                 (cons a (minimise rest))))
181 |                         (if (or (char<=? (car b) (cdr a)) ; Can we merge the ranges?
182 |                                 (next-to? (cdr a) (car b)))
183 |                             (minimise (cons
184 |                                        (cons (integer->char (min (char->integer (car a)) (char->integer (car b))))
185 |                                              (integer->char (max (char->integer (cdr a)) (char->integer (cdr b)))))
186 |                                        (cdr rest)))
187 |                             (cons a (minimise rest))))))))))
188 | 
189 |   (set-ast-c! exp (minimise (sort (map cc-part (ast-c exp)) cc-less-than?))))
190 | 


--------------------------------------------------------------------------------
/src/waxeye/file.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide (all-defined-out))
 4 | 
 5 | 
 6 | (define (input-as-string port)
 7 |   (define (input-as-iter)
 8 |     (let ((ch (read-char port)))
 9 |       (if (eof-object? ch)
10 |           '()
11 |           (cons ch (input-as-iter)))))
12 |   (list->string (input-as-iter)))
13 | 
14 | 
15 | ;; Returns the contents of the file of the given name as a string
16 | (define (file-as-string path)
17 |   (call-with-input-file path input-as-string))
18 | 
19 | 
20 | (define (file-as-string-lines path)
21 |   (define (file-as-iter stream)
22 |     (let ((ch (read-line stream)))
23 |       (if (eof-object? ch)
24 |           '()
25 |           (cons ch (file-as-iter stream)))))
26 |   (call-with-input-file path file-as-iter))
27 | 


--------------------------------------------------------------------------------
/src/waxeye/gen.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require (only-in racket/list index-of)
 4 |          waxeye/ast)
 5 | 
 6 | (provide (all-defined-out))
 7 | 
 8 | 
 9 | (define *eof-check* #t)
10 | (define *expression-level* '())
11 | (define *file-header* #f)
12 | (define *module-name* #f)
13 | (define *name-prefix* #f)
14 | (define *start-index* 0)
15 | (define *start-name* "")
16 | 
17 | 
18 | (define (eof-check! val)
19 |   (set! *eof-check* val))
20 | 
21 | 
22 | (define (file-header! val)
23 |   (set! *file-header* val))
24 | 
25 | 
26 | (define (module-name! val)
27 |   (set! *module-name* val))
28 | 
29 | 
30 | (define (name-prefix! val)
31 |   (set! *name-prefix* val))
32 | 
33 | 
34 | (define (start-index! val)
35 |   (set! *start-index* val))
36 | 
37 | 
38 | (define (start-name! val)
39 |   (set! *start-name* val))
40 | 
41 | 
42 | (define (start-nt! name grammar)
43 |   (set! *start-name* name)
44 |   (if (equal? *start-name* "")
45 |       (start-name! (get-non-term (car (get-defs grammar))))
46 |       (let ((si (index-of (map get-non-term (get-defs grammar))
47 |                           *start-name*)))
48 |         (if si
49 |             (start-index! si)
50 |             (error 'waxeye "Can't find definition of starting non-terminal: ~a" *start-name*)))))
51 | 
52 | 
53 | (define (push-exp-level level)
54 |   (set! *expression-level* (cons level *expression-level*)))
55 | 
56 | 
57 | (define (pop-exp-level)
58 |   (let ((top (car *expression-level*)))
59 |     (set! *expression-level* (cdr *expression-level*))
60 |     top))
61 | 
62 | 
63 | (define (peek-exp-level)
64 |   (car *expression-level*))
65 | 
66 | 
67 | (define (get-non-terms grammar)
68 |   (map get-non-term (ast-c grammar)))
69 | 
70 | 
71 | (define (get-non-term def)
72 |   (list->string (ast-c (car (ast-c def)))))
73 | 
74 | 
75 | (define (get-defs grammar)
76 |   (ast-c grammar))
77 | 
78 | 
79 | (define (get-arrow def)
80 |   (ast-t (cadr (ast-c def))))
81 | 
82 | 
83 | (define (get-alternation def)
84 |   (caddr (ast-c def)))
85 | 


--------------------------------------------------------------------------------
/src/waxeye/grammar-parser.rkt:
--------------------------------------------------------------------------------
  1 | ;; This is the parser for Waxeye grammar files. It was generated from
  2 | ;; the grammar 'grammars/waxeye.waxeye'.
  3 | 
  4 | #lang racket/base
  5 | 
  6 | (require waxeye/ast
  7 |          waxeye/fa
  8 |          waxeye/parser)
  9 | 
 10 | (provide (all-from-out waxeye/ast)
 11 |          grammar-parser)
 12 | 
 13 | 
 14 | (define automata
 15 |   (vector
 16 |    (fa 'grammar (vector
 17 |     (state (list
 18 |      (edge 27 1 #f)) #f)
 19 |     (state (list
 20 |      (edge 1 1 #f)) #t)) 'leftArrow)
 21 |    (fa 'definition (vector
 22 |     (state (list
 23 |      (edge 8 1 #f)) #f)
 24 |     (state (list
 25 |      (edge 17 2 #f)
 26 |      (edge 18 2 #f)
 27 |      (edge 19 2 #f)) #f)
 28 |     (state (list
 29 |      (edge 2 3 #f)) #f)
 30 |     (state (list
 31 |      (edge 27 4 #f)) #f)
 32 |     (state (list) #t)) 'leftArrow)
 33 |    (fa 'alternation (vector
 34 |     (state (list
 35 |      (edge 3 1 #f)) #f)
 36 |     (state (list
 37 |      (edge 20 2 #f)) #t)
 38 |     (state (list
 39 |      (edge 3 1 #f)) #f)) 'leftArrow)
 40 |    (fa 'sequence (vector
 41 |     (state (list
 42 |      (edge 4 1 #f)) #f)
 43 |     (state (list
 44 |      (edge 4 1 #f)) #t)) 'leftArrow)
 45 |    (fa 'unit (vector
 46 |     (state (list
 47 |      (edge 5 0 #f)
 48 |      (edge 6 0 #f)
 49 |      (edge 8 1 #f)
 50 |      (edge 21 3 #f)
 51 |      (edge 7 2 #f)
 52 |      (edge 9 2 #f)
 53 |      (edge 10 2 #f)
 54 |      (edge 12 2 #f)
 55 |      (edge 16 2 #f)) #f)
 56 |     (state (list
 57 |      (edge 28 2 #f)) #f)
 58 |     (state (list) #t)
 59 |     (state (list
 60 |      (edge 2 4 #f)) #f)
 61 |     (state (list
 62 |      (edge 22 2 #f)) #f)) 'leftArrow)
 63 |    (fa 'prefix (vector
 64 |     (state (list
 65 |      (edge (list #\! #\& (cons #\* #\+) #\: #\?) 1 #f)) #f)
 66 |     (state (list
 67 |      (edge 27 2 #f)) #f)
 68 |     (state (list) #t)) 'leftArrow)
 69 |    (fa 'label (vector
 70 |     (state (list
 71 |      (edge 8 1 #f)) #f)
 72 |     (state (list
 73 |      (edge 27 2 #f)) #f)
 74 |     (state (list
 75 |      (edge #\= 3 #t)) #f)
 76 |     (state (list
 77 |      (edge 27 4 #f)) #f)
 78 |     (state (list) #t)) 'leftArrow)
 79 |    (fa 'action (vector
 80 |     (state (list
 81 |      (edge #\@ 1 #t)) #f)
 82 |     (state (list
 83 |      (edge 8 2 #f)) #f)
 84 |     (state (list
 85 |      (edge #\< 3 #t)
 86 |      (edge 27 8 #f)) #f)
 87 |     (state (list
 88 |      (edge 27 4 #f)) #f)
 89 |     (state (list
 90 |      (edge 8 5 #f)) #f)
 91 |     (state (list
 92 |      (edge 23 6 #f)
 93 |      (edge #\> 7 #t)) #f)
 94 |     (state (list
 95 |      (edge 8 5 #f)) #f)
 96 |     (state (list
 97 |      (edge 27 8 #f)) #f)
 98 |     (state (list) #t)) 'leftArrow)
 99 |    (fa 'identifier (vector
100 |     (state (list
101 |      (edge (list (cons #\A #\Z) #\_ (cons #\a #\z)) 1 #f)) #f)
102 |     (state (list
103 |      (edge (list #\- (cons #\0 #\9) (cons #\A #\Z) #\_ (cons #\a #\z)) 1 #f)
104 |      (edge 27 2 #f)) #f)
105 |     (state (list) #t)) 'leftArrow)
106 |    (fa 'literal (vector
107 |     (state (list
108 |      (edge (list #\') 1 #t)) #f)
109 |     (state (list
110 |      (edge 30 2 #f)) #f)
111 |     (state (list
112 |      (edge 11 3 #f)
113 |      (edge 15 3 #f)) #f)
114 |     (state (list
115 |      (edge 29 4 #f)
116 |      (edge (list #\') 5 #t)) #f)
117 |     (state (list
118 |      (edge 11 3 #f)
119 |      (edge 15 3 #f)) #f)
120 |     (state (list
121 |      (edge 27 6 #f)) #f)
122 |     (state (list) #t)) 'leftArrow)
123 |    (fa 'caseLiteral (vector
124 |     (state (list
125 |      (edge (list #\") 1 #t)) #f)
126 |     (state (list
127 |      (edge 32 2 #f)) #f)
128 |     (state (list
129 |      (edge 11 3 #f)
130 |      (edge 15 3 #f)) #f)
131 |     (state (list
132 |      (edge 31 4 #f)
133 |      (edge (list #\") 5 #t)) #f)
134 |     (state (list
135 |      (edge 11 3 #f)
136 |      (edge 15 3 #f)) #f)
137 |     (state (list
138 |      (edge 27 6 #f)) #f)
139 |     (state (list) #t)) 'leftArrow)
140 |    (fa 'lChar (vector
141 |     (state (list
142 |      (edge #\\ 1 #f)
143 |      (edge 34 3 #f)) #f)
144 |     (state (list
145 |      (edge (list #\" #\' #\\ #\n #\r #\t) 2 #f)) #f)
146 |     (state (list) #t)
147 |     (state (list
148 |      (edge 33 4 #f)) #f)
149 |     (state (list
150 |      (edge 'wild 2 #f)) #f)) 'leftArrow)
151 |    (fa 'charClass (vector
152 |     (state (list
153 |      (edge #\[ 1 #t)) #f)
154 |     (state (list
155 |      (edge 35 2 #f)
156 |      (edge #\] 3 #t)) #f)
157 |     (state (list
158 |      (edge 13 1 #f)) #f)
159 |     (state (list
160 |      (edge 27 4 #f)) #f)
161 |     (state (list) #t)) 'leftArrow)
162 |    (fa 'range (vector
163 |     (state (list
164 |      (edge 14 1 #f)
165 |      (edge 15 1 #f)) #f)
166 |     (state (list
167 |      (edge #\- 2 #t)) #t)
168 |     (state (list
169 |      (edge 14 3 #f)
170 |      (edge 15 3 #f)) #f)
171 |     (state (list) #t)) 'leftArrow)
172 |    (fa 'char (vector
173 |     (state (list
174 |      (edge #\\ 1 #f)
175 |      (edge 38 3 #f)) #f)
176 |     (state (list
177 |      (edge (list #\- (cons #\\ #\]) #\n #\r #\t) 2 #f)) #f)
178 |     (state (list) #t)
179 |     (state (list
180 |      (edge 37 4 #f)) #f)
181 |     (state (list
182 |      (edge 36 5 #f)) #f)
183 |     (state (list
184 |      (edge 'wild 2 #f)) #f)) 'leftArrow)
185 |    (fa 'hex (vector
186 |     (state (list
187 |      (edge #\\ 1 #t)) #f)
188 |     (state (list
189 |      (edge #\< 2 #t)) #f)
190 |     (state (list
191 |      (edge (list (cons #\0 #\9) (cons #\A #\F) (cons #\a #\f)) 3 #f)) #f)
192 |     (state (list
193 |      (edge (list (cons #\0 #\9) (cons #\A #\F) (cons #\a #\f)) 4 #f)) #f)
194 |     (state (list
195 |      (edge #\> 5 #t)) #f)
196 |     (state (list) #t)) 'leftArrow)
197 |    (fa 'wildCard (vector
198 |     (state (list
199 |      (edge #\. 1 #t)) #f)
200 |     (state (list
201 |      (edge 27 2 #f)) #f)
202 |     (state (list) #t)) 'leftArrow)
203 |    (fa 'leftArrow (vector
204 |     (state (list
205 |      (edge #\< 1 #t)) #f)
206 |     (state (list
207 |      (edge #\- 2 #t)) #f)
208 |     (state (list
209 |      (edge 27 3 #f)) #f)
210 |     (state (list) #t)) 'leftArrow)
211 |    (fa 'pruneArrow (vector
212 |     (state (list
213 |      (edge #\< 1 #t)) #f)
214 |     (state (list
215 |      (edge #\= 2 #t)) #f)
216 |     (state (list
217 |      (edge 27 3 #f)) #f)
218 |     (state (list) #t)) 'leftArrow)
219 |    (fa 'voidArrow (vector
220 |     (state (list
221 |      (edge #\< 1 #t)) #f)
222 |     (state (list
223 |      (edge #\: 2 #t)) #f)
224 |     (state (list
225 |      (edge 27 3 #f)) #f)
226 |     (state (list) #t)) 'leftArrow)
227 |    (fa 'alt (vector
228 |     (state (list
229 |      (edge #\| 1 #f)) #f)
230 |     (state (list
231 |      (edge 27 2 #f)) #f)
232 |     (state (list) #t)) 'voidArrow)
233 |    (fa 'open (vector
234 |     (state (list
235 |      (edge #\( 1 #f)) #f)
236 |     (state (list
237 |      (edge 27 2 #f)) #f)
238 |     (state (list) #t)) 'voidArrow)
239 |    (fa 'close (vector
240 |     (state (list
241 |      (edge #\) 1 #f)) #f)
242 |     (state (list
243 |      (edge 27 2 #f)) #f)
244 |     (state (list) #t)) 'voidArrow)
245 |    (fa 'comma (vector
246 |     (state (list
247 |      (edge #\, 1 #f)) #f)
248 |     (state (list
249 |      (edge 27 2 #f)) #f)
250 |     (state (list) #t)) 'voidArrow)
251 |    (fa 'sComment (vector
252 |     (state (list
253 |      (edge #\# 1 #f)) #f)
254 |     (state (list
255 |      (edge 40 2 #f)
256 |      (edge 26 3 #f)
257 |      (edge 39 3 #f)) #f)
258 |     (state (list
259 |      (edge 'wild 1 #f)) #f)
260 |     (state (list) #t)) 'voidArrow)
261 |    (fa 'mComment (vector
262 |     (state (list
263 |      (edge #\/ 1 #f)) #f)
264 |     (state (list
265 |      (edge #\* 2 #f)) #f)
266 |     (state (list
267 |      (edge 25 2 #f)
268 |      (edge 41 3 #f)
269 |      (edge #\* 4 #f)) #f)
270 |     (state (list
271 |      (edge 'wild 2 #f)) #f)
272 |     (state (list
273 |      (edge #\/ 5 #f)) #f)
274 |     (state (list) #t)) 'voidArrow)
275 |    (fa 'endOfLine (vector
276 |     (state (list
277 |      (edge #\return 1 #f)
278 |      (edge #\newline 2 #f)
279 |      (edge #\return 2 #f)) #f)
280 |     (state (list
281 |      (edge #\newline 2 #f)) #f)
282 |     (state (list) #t)) 'voidArrow)
283 |    (fa 'ws (vector
284 |     (state (list
285 |      (edge (list #\tab #\space) 0 #f)
286 |      (edge 26 0 #f)
287 |      (edge 24 0 #f)
288 |      (edge 25 0 #f)) #t)) 'voidArrow)
289 |    (fa '! (vector
290 |     (state (list
291 |      (edge 17 1 #f)
292 |      (edge 18 1 #f)
293 |      (edge 19 1 #f)) #f)
294 |     (state (list) #t)) 'voidArrow)
295 |    (fa '! (vector
296 |     (state (list
297 |      (edge (list #\') 1 #f)) #f)
298 |     (state (list) #t)) 'voidArrow)
299 |    (fa '! (vector
300 |     (state (list
301 |      (edge (list #\') 1 #f)) #f)
302 |     (state (list) #t)) 'voidArrow)
303 |    (fa '! (vector
304 |     (state (list
305 |      (edge (list #\") 1 #f)) #f)
306 |     (state (list) #t)) 'voidArrow)
307 |    (fa '! (vector
308 |     (state (list
309 |      (edge (list #\") 1 #f)) #f)
310 |     (state (list) #t)) 'voidArrow)
311 |    (fa '! (vector
312 |     (state (list
313 |      (edge 26 1 #f)) #f)
314 |     (state (list) #t)) 'voidArrow)
315 |    (fa '! (vector
316 |     (state (list
317 |      (edge #\\ 1 #f)) #f)
318 |     (state (list) #t)) 'voidArrow)
319 |    (fa '! (vector
320 |     (state (list
321 |      (edge #\] 1 #f)) #f)
322 |     (state (list) #t)) 'voidArrow)
323 |    (fa '! (vector
324 |     (state (list
325 |      (edge 26 1 #f)) #f)
326 |     (state (list) #t)) 'voidArrow)
327 |    (fa '! (vector
328 |     (state (list
329 |      (edge #\] 1 #f)) #f)
330 |     (state (list) #t)) 'voidArrow)
331 |    (fa '! (vector
332 |     (state (list
333 |      (edge #\\ 1 #f)) #f)
334 |     (state (list) #t)) 'voidArrow)
335 |    (fa '! (vector
336 |     (state (list
337 |      (edge 'wild 1 #f)) #f)
338 |     (state (list) #t)) 'voidArrow)
339 |    (fa '! (vector
340 |     (state (list
341 |      (edge 26 1 #f)) #f)
342 |     (state (list) #t)) 'voidArrow)
343 |    (fa '! (vector
344 |     (state (list
345 |      (edge #\* 1 #f)) #f)
346 |     (state (list
347 |      (edge #\/ 2 #f)) #f)
348 |     (state (list) #t)) 'voidArrow)))
349 | 
350 | (define grammar-parser (make-parser 0 #t automata))
351 | 


--------------------------------------------------------------------------------
/src/waxeye/header.txt:
--------------------------------------------------------------------------------
1 | This is the parser for Waxeye grammar files. It was generated from
2 | the grammar 'grammars/waxeye.waxeye'.
3 | 


--------------------------------------------------------------------------------
/src/waxeye/interp.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require waxeye/ast
 4 |          waxeye/parser
 5 |          "dfa.rkt"
 6 |          "gen.rkt"
 7 |          "racket.rkt"
 8 |          "util.rkt")
 9 | 
10 | (provide dynamic-parser
11 |          interpreter)
12 | 
13 | 
14 | (define (dynamic-parser grammar)
15 |   (make-parser *start-index* *eof-check* (make-automata grammar)))
16 | 
17 | 
18 | (define (interpreter grammar input)
19 |   (let ((input-ast ((dynamic-parser grammar) input)))
20 |     (if (parse-error? input-ast)
21 |         (display-parse-error input-ast)
22 |         (display-ast input-ast))))
23 | 


--------------------------------------------------------------------------------
/src/waxeye/load.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require waxeye/ast
  4 |          "file.rkt"
  5 |          "gen.rkt"
  6 |          "grammar-parser.rkt"
  7 |          "interp.rkt"
  8 |          "util.rkt")
  9 | 
 10 | (provide load-grammar
 11 |          modular-grammar!)
 12 | 
 13 | 
 14 | (define *modular-grammar* #f)
 15 | (define (modular-grammar! val)
 16 |   (set! *modular-grammar* val))
 17 | 
 18 | 
 19 | (define *load-cache* (make-hash))
 20 | 
 21 | 
 22 | (define (load-grammar path)
 23 |   (if *modular-grammar*
 24 |       (load-modular-grammar path)
 25 |       (load-waxeye-grammar path)))
 26 | 
 27 | 
 28 | (define (load-waxeye-grammar path)
 29 |   (let ((v (hash-ref *load-cache* path #f)))
 30 |     (if v
 31 |         v
 32 |         (let ((grammar-tree (grammar-parser (file-as-string path))))
 33 |           (if (ast? grammar-tree)
 34 |               (begin
 35 |                 (hash-set! *load-cache* path grammar-tree)
 36 |                 grammar-tree)
 37 |               (error 'waxeye (string-append "syntax error in grammar " path "\n" (parse-error->string grammar-tree))))))))
 38 | 
 39 | 
 40 | (define (load-modular-grammar path)
 41 |   ;; Returns the list of modular grammar expressions
 42 |   (define (read-modular i)
 43 |     (let ((m (read i)))
 44 |       (if (eof-object? m)
 45 |           '()
 46 |           (cons m (read-modular i)))))
 47 |   (let ((base-path (call-with-values (lambda () (split-path path)) (lambda (a b c) a))))
 48 |     (ast
 49 |       'grammar
 50 |       (list-concat (map
 51 |                      (lambda (a)
 52 |                        (resolve-modular a base-path))
 53 |                      (call-with-input-file path read-modular)))
 54 |       (cons 0 0))))
 55 | 
 56 | 
 57 | ;; Resolve the modular expression
 58 | (define (resolve-modular m base-path)
 59 |   (cond
 60 |    ((string? m)
 61 |     (ast-c (load-waxeye-grammar (if (or (absolute-path? m) (equal? base-path 'relative) (not base-path))
 62 |                                     m
 63 |                                     (build-path base-path m)))))
 64 |    ((list? m)
 65 |     (apply (case (car m)
 66 |              ((rename) resolve-rename)
 67 |              ((only) resolve-only)
 68 |              ((all-except) resolve-all-except)
 69 |              ((prefix) resolve-prefix)
 70 |              ((prefix-only) resolve-prefix-only)
 71 |              ((prefix-all-except) resolve-prefix-all-except)
 72 |              ((join) resolve-join)
 73 |              (else (error 'load-modular-grammar "Bad modular grammar expression type: ~s" (car m))))
 74 |            (cons base-path (cdr m))))
 75 |    (else (error 'load-modular-grammar "Bad modular grammar expression: ~s" m))))
 76 | 
 77 | 
 78 | (define (rename-list nts names)
 79 |   (let ((t (make-hash)))
 80 |     (define (visit-alternation exp)
 81 |       (visit-multi-child visit-sequence exp))
 82 | 
 83 |     (define (visit-sequence exp)
 84 |       (visit-multi-child visit-exp exp))
 85 | 
 86 |     (define (visit-multi-child visitor exp)
 87 |       (ast (ast-t exp) (map visitor (ast-c exp)) (ast-p exp)))
 88 | 
 89 |     (define (visit-unit exp)
 90 |       (define (visit-unit-children cs)
 91 |         (let ((c (car cs)) (rest (cdr cs)))
 92 |           (if (null? rest)
 93 |               (list (visit-exp c))
 94 |               (cons c (visit-unit-children rest)))))
 95 |       (ast
 96 |         (ast-t exp)
 97 |         (visit-unit-children (ast-c exp))
 98 |         (ast-p exp)))
 99 | 
100 |     (define (visit-ident exp)
101 |       (let* ((name (string->symbol (list->string (ast-c exp))))
102 |              (new-name (hash-ref t name #f)))
103 |         (if new-name
104 |             (ast
105 |               (ast-t exp)
106 |               (string->list (symbol->string new-name))
107 |               (ast-p exp))
108 |             exp)))
109 | 
110 |     (define (visit-exp exp)
111 |       (let ((type (ast-t exp)))
112 |         (case type
113 |           ((action) exp)
114 |           ((alternation) (visit-alternation exp))
115 |           ((caseLiteral) exp)
116 |           ((charClass) exp)
117 |           ((identifier) (visit-ident exp))
118 |           ((label) exp)
119 |           ((literal) exp)
120 |           ((sequence) (visit-sequence exp))
121 |           ((unit) (visit-unit exp))
122 |           ((wildCard) exp)
123 |           (else (error 'expand-grammar "unknown expression type: ~s" type)))))
124 | 
125 |     (define (rename nt)
126 |       (let* ((name (string->symbol (get-non-term nt)))
127 |              (new-name (hash-ref t name #f)))
128 |         (ast
129 |          (ast-t nt)
130 |          `(,(ast
131 |               'identifier
132 |               (string->list (symbol->string (if new-name
133 |                                                 new-name
134 |                                                 name)))
135 |               (cons 0 0))
136 |            ,(cadr (ast-c nt))
137 |            ,(visit-alternation (caddr (ast-c nt))))
138 |          (ast-p nt))))
139 |     (for-each (lambda (a)
140 |                 (hash-set! t (car a) (cdr a)))
141 |               names)
142 |     (map rename nts)))
143 | 
144 | 
145 | (define (resolve-only base-path exp . non-terms)
146 |   (filter (lambda (a)
147 |             (member (string->symbol (get-non-term a)) non-terms))
148 |           (resolve-modular exp base-path)))
149 | 
150 | 
151 | (define (resolve-all-except base-path exp . non-terms)
152 |   (filter (lambda (a)
153 |             (not (member (string->symbol (get-non-term a)) non-terms)))
154 |           (resolve-modular exp base-path)))
155 | 
156 | 
157 | (define (resolve-rename base-path exp . names)
158 |   (rename-list (resolve-modular exp base-path) names))
159 | 
160 | 
161 | (define (resolve-prefix base-path prefix exp)
162 |   (let ((nts (resolve-modular exp base-path)) (p (symbol->string prefix)))
163 |     (rename-list nts (map (lambda (a)
164 |                             (let ((n (get-non-term a)))
165 |                               (cons (string->symbol n) (string->symbol (string-append p n)))))
166 |                           nts))))
167 | 
168 | 
169 | (define (resolve-prefix-only base-path prefix exp . non-terms)
170 |   (let ((nts (resolve-modular exp base-path)) (p (symbol->string prefix)))
171 |     (rename-list nts (map (lambda (a)
172 |                             (let ((n (get-non-term a)))
173 |                               (cons (string->symbol n) (string->symbol (string-append p n)))))
174 |                           (filter (lambda (a)
175 |                                     (member (string->symbol (get-non-term a)) non-terms))
176 |                                   nts)))))
177 | 
178 | 
179 | (define (resolve-prefix-all-except base-path prefix exp . non-terms)
180 |   (let ((nts (resolve-modular exp base-path)) (p (symbol->string prefix)))
181 |     (rename-list nts (map (lambda (a)
182 |                             (let ((n (get-non-term a)))
183 |                               (cons (string->symbol n) (string->symbol (string-append p n)))))
184 |                           (filter (lambda (a)
185 |                                     (not (member (string->symbol (get-non-term a)) non-terms)))
186 |                                   nts)))))
187 | 
188 | 
189 | (define (resolve-join base-path . exps)
190 |   (list-concat (map (lambda (a)
191 |                       (resolve-modular a base-path))
192 |                     exps)))
193 | 


--------------------------------------------------------------------------------
/src/waxeye/main.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require (only-in racket/cmdline command-line)
  4 |          waxeye/ast
  5 |          "debug.rkt"
  6 |          "file.rkt"
  7 |          "gen.rkt"
  8 |          "interp.rkt"
  9 |          "load.rkt"
 10 |          "racket.rkt"
 11 |          "tester.rkt"
 12 |          "transform.rkt"
 13 |          "util.rkt"
 14 |          "version.rkt")
 15 | 
 16 | (provide main)
 17 | 
 18 | 
 19 | (define *grammar-path* #f)
 20 | (define *grammar-test* #f)
 21 | (define *header-path* #f)
 22 | (define *interpret* #f)
 23 | (define *output-path* #f)
 24 | (define *target-lang* #f)
 25 | 
 26 | 
 27 | (define (main args)
 28 |   (process-args args)
 29 |   (when *grammar-path*
 30 |         (let ((grammar-tree (load-grammar *grammar-path*)))
 31 |           (transform-grammar grammar-tree)
 32 |           (start-nt! *start-name* grammar-tree)
 33 |           (cond
 34 |            (*interpret* (interpreter grammar-tree (input-as-string (current-input-port))))
 35 |            (*grammar-test* (tester grammar-tree *grammar-test*))
 36 |            ((and *target-lang* *output-path*)
 37 |             (begin
 38 |               (when *header-path*
 39 |                     (file-header! (file-as-string-lines *header-path*)))
 40 |               (display-version)
 41 |               (for-each (lambda (a)
 42 |                           (display-ln "generated: " a))
 43 |                         (*target-lang* grammar-tree *output-path*))))
 44 |            (else (display-help))))))
 45 | 
 46 | 
 47 | (define (process-args args)
 48 |   (if (member "--version" args)
 49 |       (display-version)
 50 |       (if (null? args)
 51 |           (begin
 52 |             (display-version)
 53 |             (newline)
 54 |             (display-help))
 55 |           (parse-args args))))
 56 | 
 57 | 
 58 | (define (parse-args args)
 59 |   (command-line
 60 |    #:program "waxeye"
 61 |    #:argv args
 62 | 
 63 |    #:help-labels "Waxeye modes:"
 64 | 
 65 |    #:once-any
 66 |    ("-g" language dir
 67 |     "Generate"
 68 |     (set! *target-lang* (case (string->symbol language)
 69 |                           ((racket) gen-racket)
 70 |                           (else #f)))
 71 |     (set! *output-path* (if (equal? (string-ref dir (- (string-length dir) 1)) #\/)
 72 |                             dir
 73 |                             (string-append dir "/"))))
 74 |    ("-i" "Interpret"
 75 |     (set! *interpret* #t))
 76 |    ("-t" test
 77 |     "Test"
 78 |     (set! *grammar-test* test))
 79 | 
 80 |    #:help-labels "Grammar options:"
 81 | 
 82 |    #:once-each
 83 |    ("-m"
 84 |     "Modular Grammar - default: false"
 85 |     (modular-grammar! #t))
 86 |    ("-s" start
 87 |     "Starting non-terminal - default: first non-terminal"
 88 |     (start-name! start))
 89 | 
 90 |    #:help-labels "Parser options:"
 91 | 
 92 |    #:once-each
 93 |    ("-c" comment
 94 |     "Header comment for generated files - default: none"
 95 |     (set! *header-path* comment))
 96 |    ("-e" eof
 97 |     "Check parser consumes all input - default: true"
 98 |     (eof-check! (equal? eof "true")))
 99 |    ("-n" namespace
100 |     "Module or package namespace - default: none"
101 |     (module-name! namespace))
102 |    ("-p" prefix
103 |     "Name prefix for generated files - default: none"
104 |     (name-prefix! prefix))
105 | 
106 |    #:help-labels "Misc options:"
107 | 
108 |    #:once-each
109 |    ("--debug" "Activates debug information"
110 |     (debug! #t))
111 |    ("--version" "Prints version number and copyright notice"
112 |     (void))
113 | 
114 |    ;; expects one grammar path
115 |    #:args (grammar)
116 |    ;; set the grammar path when done
117 |    (set! *grammar-path* grammar)))
118 | 
119 | 
120 | (define (display-version)
121 |   (display-ln "Waxeye Parser Generator v" *version*))
122 | 
123 | 
124 | (define (display-help)
125 |   (parse-args '("--help")))
126 | 


--------------------------------------------------------------------------------
/src/waxeye/nfa.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require waxeye/ast
  4 |          waxeye/fa
  5 |          "util.rkt")
  6 | 
  7 | (provide make-nfa
  8 |          reset-nfa-builder
  9 |          unwinds)
 10 | 
 11 | 
 12 | (define is-void #f)
 13 | (define unwinds '())
 14 | 
 15 | (define (reset-nfa-builder)
 16 |   (set! is-void #f)
 17 |   (set! unwinds '()))
 18 | 
 19 | (define (build-unwind-nfa type exp)
 20 |   (let ((nfa (build-states exp (state '() #t))))
 21 |     (set! unwinds (append unwinds (list (cons type (nfa->vector nfa)))))
 22 |     (- (length unwinds) 1)))
 23 | 
 24 | 
 25 | (define (make-nfa def)
 26 |   (nfa->vector (build-states (caddr (ast-c def)) (state '() #t))))
 27 | 
 28 | 
 29 | ;; Converts an NFA into a vector of it's states
 30 | ;; References between states are changed to indexes into the vector
 31 | (define (nfa->vector nfa)
 32 |   (let ((visited-table (make-hash)) (state-list '()) (state-count 0))
 33 | 
 34 |     (define (add-edge e)
 35 |       (set-edge-s! e (add-state (edge-s e))))
 36 | 
 37 |     (define (add-state to-add)
 38 |       (let ((h-index (hash-ref visited-table to-add #f)))
 39 |         (if h-index
 40 |             h-index
 41 |             (let ((new-index state-count))
 42 |               (hash-set! visited-table to-add state-count)
 43 | 
 44 |               (set! state-count (+ state-count 1))
 45 | 
 46 |               ;; Create a deep copy of the nfa state since, we are about to destroy the
 47 |               ;; original edges but still need to hash against them
 48 |               (let ((state-copy (state (map
 49 |                                          (lambda (a)
 50 |                                            (edge (edge-t a) (edge-s a) (edge-v a)))
 51 |                                          (state-edges to-add))
 52 |                                        (state-match to-add))))
 53 |                 (set! state-list (cons state-copy state-list))
 54 | 
 55 |                 ;; Add the states of the edges
 56 |                 (for-each add-edge (state-edges state-copy)))
 57 | 
 58 |               new-index))))
 59 | 
 60 |     (add-state nfa)
 61 |     (list->vector (reverse state-list))))
 62 | 
 63 | 
 64 | (define (build-states exp end)
 65 |   (let ((type (ast-t exp)))
 66 |     ((case type
 67 |       ((action) build-action)
 68 |       ((alternation) build-alternation)
 69 |       ((and) build-and)
 70 |       ((charClass) build-char-class)
 71 |       ((closure) build-closure)
 72 |       ((identifier) build-identifier)
 73 |       ((label) build-label)
 74 |       ((literal) build-literal)
 75 |       ((not) build-not)
 76 |       ((optional) build-optional)
 77 |       ((plus) build-plus)
 78 |       ((sequence) build-sequence)
 79 |       ((void) build-void)
 80 |       ((wildCard) build-wildCard)
 81 |       (else (error 'build-states "unknown expression type: ~s" type)))
 82 |      exp end)))
 83 | 
 84 | 
 85 | (define (build-action exp end)
 86 |   (error 'build-action "actions not done yet"))
 87 | 
 88 | 
 89 | (define (build-alternation exp end)
 90 |   (state
 91 |     (list-concat
 92 |       (map
 93 |         (lambda (a)
 94 |           (state-edges (build-states a end)))
 95 |         (ast-c exp)))
 96 |     #f))
 97 | 
 98 | 
 99 | (define (build-and exp end)
100 |   (state (list (edge (build-unwind-nfa '& (car (ast-c exp))) end is-void))
101 |          #f))
102 | 
103 | 
104 | (define (build-char-class exp end)
105 |   (state (list (edge (ast-c exp) end is-void))
106 |          #f))
107 | 
108 | 
109 | (define (build-closure exp end)
110 |   (let* ((s (state #f #f))
111 |          (e (build-states (car (ast-c exp)) s)))
112 |     (set-state-edges! s (append (state-edges e) (list (edge 'e end is-void))))
113 |     s))
114 | 
115 | 
116 | (define (build-identifier exp end)
117 |   (state (list (edge (list->string (ast-c exp)) end is-void)) #f))
118 | 
119 | 
120 | (define (build-label exp end)
121 |   (error 'build-label "labels not done yet"))
122 | 
123 | 
124 | (define (build-literal exp end)
125 |   (define (build-char c end)
126 |     (state (list (edge c end is-void)) #f))
127 |   (define (build-iter es end)
128 |     (let ((c (car es)) (n (cdr es)))
129 |       (build-char c (if (null? n)
130 |                         end
131 |                         (build-iter n end)))))
132 |   (build-iter (ast-c exp) end))
133 | 
134 | 
135 | (define (build-not exp end)
136 |   (state (list (edge (build-unwind-nfa '! (car (ast-c exp))) end is-void)) #f))
137 | 
138 | 
139 | (define (build-optional exp end)
140 |   (let ((s (build-states (car (ast-c exp)) end)))
141 |     (set-state-edges! s (append (state-edges s) (list (edge 'e end is-void))))
142 |     s))
143 | 
144 | 
145 | (define (build-plus exp end)
146 |   (build-states (car (ast-c exp)) (build-closure exp end)))
147 | 
148 | 
149 | (define (build-sequence exp end)
150 |   (define (build-iter es end)
151 |     (let ((c (car es)) (n (cdr es)))
152 |       (build-states c (if (null? n)
153 |                           end
154 |                           (build-iter n end)))))
155 |   (build-iter (ast-c exp) end))
156 | 
157 | 
158 | (define (build-void exp end)
159 |   (let ((old-void is-void))
160 |     (set! is-void #t)
161 |     (let ((res (build-states (car (ast-c exp)) end)))
162 |       (set! is-void old-void)
163 |       res)))
164 | 
165 | 
166 | (define (build-wildCard exp end)
167 |   (state (list (edge 'wild end is-void)) #f))
168 | 


--------------------------------------------------------------------------------
/src/waxeye/racket.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | (require waxeye/ast
  4 |          waxeye/fa
  5 |          "code.rkt"
  6 |          "dfa.rkt"
  7 |          "gen.rkt"
  8 |          "util.rkt")
  9 | 
 10 | (provide gen-racket
 11 |          gen-racket-parser)
 12 | 
 13 | 
 14 | (define (gen-racket grammar path)
 15 |   (indent-unit! 1)
 16 |   (let ((file-path (string-append path (if *name-prefix*
 17 |                                            (string-append *name-prefix* "-parser.rkt")
 18 |                                            "parser.rkt"))))
 19 |     (dump-string (gen-racket-parser grammar) file-path)
 20 |     (list file-path)))
 21 | 
 22 | 
 23 | (define (racket-comment lines)
 24 |   (comment-base ";;" lines))
 25 | 
 26 | 
 27 | (define (gen-racket-trans a)
 28 |   (define (gen-list-item a)
 29 |     (if (char? a)
 30 |         (format "~s" a)
 31 |         (format "(cons ~s ~s)" (car a) (cdr a))))
 32 |   (cond
 33 |    ((symbol? a) (format "'~s" a))
 34 |    ((list? a)
 35 |     (format "(list ~a~a)"
 36 |             (gen-list-item (car a))
 37 |             (string-concat (map (lambda (b)
 38 |                                   (format " ~a" (gen-list-item b)))
 39 |                                 (cdr a)))))
 40 |    (else (format "~s" a))))
 41 | 
 42 | 
 43 | (define (gen-racket-edge a)
 44 |   (format "\n~a(edge ~a ~a ~a)"
 45 |           (ind)
 46 |           (gen-racket-trans (edge-t a))
 47 |           (edge-s a)
 48 |           (edge-v a)))
 49 | 
 50 | 
 51 | (define (gen-racket-edges edges)
 52 |   (indent (format "(list~a)" (string-concat (map gen-racket-edge edges)))))
 53 | 
 54 | 
 55 | (define (gen-racket-state a)
 56 |   (format "\n~a(state ~a ~a)"
 57 |           (ind)
 58 |           (gen-racket-edges (state-edges a))
 59 |           (state-match a)))
 60 | 
 61 | 
 62 | (define (gen-racket-states states)
 63 |   (indent (format "(vector~a)" (string-concat (map gen-racket-state (vector->list states))))))
 64 | 
 65 | 
 66 | (define (gen-racket-fa a)
 67 |   (format "\n~a(fa '~a ~a '~a)"
 68 |           (ind)
 69 |           (camel-case-lower (symbol->string (fa-type a)))
 70 |           (gen-racket-states (fa-states a))
 71 |           (fa-mode a)))
 72 | 
 73 | 
 74 | (define (gen-racket-parser grammar)
 75 |   (let ((parser-name (if *name-prefix*
 76 |                          (string-append *name-prefix* "-parser")
 77 |                          "parser")))
 78 |     (format
 79 | #<<EOF
 80 | ~a
 81 | #lang racket/base
 82 | 
 83 | (require waxeye/ast
 84 |          waxeye/fa
 85 |          waxeye/parser)
 86 | 
 87 | (provide (all-from-out waxeye/ast)
 88 |          ~a)
 89 | 
 90 | 
 91 | ~a
 92 | EOF
 93 | 
 94 | (if *file-header*
 95 |     (racket-comment *file-header*)
 96 |     (racket-comment *default-header*))
 97 | 
 98 | parser-name
 99 | 
100 | (indentn 2 (format
101 | 
102 | #<<EOF
103 | (define automata
104 | ~a~a)
105 | 
106 | (define ~a (make-parser ~a ~a automata))
107 | EOF
108 | 
109 | (ind)
110 | (indent (format "(vector~a)" (string-concat (map gen-racket-fa (vector->list (make-automata grammar))))))
111 | parser-name
112 | *start-index*
113 | *eof-check*
114 | ))
115 | 
116 | )))
117 | 


--------------------------------------------------------------------------------
/src/waxeye/set.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide subset?)
 4 | 
 5 | 
 6 | ;; Is 'b' a subset of 'a'?
 7 | (define (subset? a b)
 8 |   (if (null? b)
 9 |       #t
10 |       (if (null? a)
11 |           #f
12 |           (let ((aa (car a)) (bb (car b)))
13 |             (if (char? aa)
14 |                 (if (and (char? bb) (char=? aa bb))
15 |                     (subset? (cdr a) (cdr b))
16 |                     #f)
17 |                 (if (char? bb)
18 |                     (if (and (char<=? (car aa) bb) (char<=? bb (cdr aa)))
19 |                         (subset? a (cdr b))
20 |                         (subset? (cdr a) b))
21 |                     (if (and (char<=? (car aa) (car bb)) (char<=? (cdr bb) (cdr aa)))
22 |                         (subset? a (cdr b))
23 |                         (subset? (cdr a) b))))))))
24 | 


--------------------------------------------------------------------------------
/src/waxeye/tester.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require (only-in racket/list take)
 4 |          waxeye/ast
 5 |          "gen.rkt"
 6 |          "interp.rkt"
 7 |          "racket.rkt"
 8 |          (only-in "util.rkt" display-ln))
 9 | 
10 | (provide tester)
11 | 
12 | 
13 | (define *num-pass* 0)
14 | (define *num-fail* 0)
15 | 
16 | 
17 | (define (tester grammar tests)
18 |   (define read-tests
19 |     (lambda (i)
20 |       (let ((test (read i)))
21 |         (unless (eof-object? test)
22 |                 (start-nt! (symbol->string (car test)) grammar)
23 |                 (run-test-iter (dynamic-parser grammar) (cdr test))
24 |                 (read-tests i)))))
25 |   (eof-check! #t)
26 |   (set! *num-pass* 0)
27 |   (set! *num-fail* 0)
28 |   (call-with-input-file tests read-tests)
29 |   (display-ln "Waxeye Grammar Tester")
30 |   (display "------------------------------------------------------------------------------\n")
31 |   (let* ((t-count (+ *num-pass* *num-fail*))
32 |          (cl (string->list (number->string (exact->inexact (/ (* *num-pass* 100) t-count)))))
33 |          (cent (list->string (take cl (min (length cl) 5)))))
34 |     (display (format "passed ~a | failed ~a | success ~a%\n" *num-pass* *num-fail* cent)))
35 |   (display "------------------------------------------------------------------------------\n"))
36 | 
37 | 
38 | (define (run-test-iter parser pairs)
39 |   (unless (null? pairs)
40 |           (run-test parser (car pairs) (cadr pairs))
41 |           (run-test-iter parser (cddr pairs))))
42 | 
43 | 
44 | (define (run-test parser input expect)
45 |   (let ((result (parser input)))
46 |     (if (cond
47 |          ((ast? result)
48 |           (or (equal? expect 'pass) (is-expected? result expect)))
49 |          ((parse-error? result)
50 |           (equal? expect 'fail))
51 |          ((equal? result #t)
52 |           (equal? expect 'pass)))
53 |         (set! *num-pass* (+ *num-pass* 1))
54 |         (begin
55 |           (set! *num-fail* (+ *num-fail* 1)) 
56 |           (report-error input expect result)))))
57 | 
58 | 
59 | (define (report-error input expect actual)
60 |   (display-ln "Error! @ " *start-name*)
61 |   (display-ln "input    = " input)
62 |   (display-ln "expected = " expect)
63 |   (display "actual   = ")
64 |   (if (ast? actual)
65 |       (display-ln (ast->string-sexpr actual))
66 |       (display-ln (if (parse-error? actual)
67 |                    'fail
68 |                    'pass)))
69 |   (newline))
70 | 
71 | 
72 | (define (is-expected? result expect)
73 |   (cond
74 |    ((and (ast? result) (list? expect))
75 |     (let ((type (car expect)) (child (cdr expect)))
76 |       (or (equal? type '*)
77 |           (and (equal? (ast-t result) type) (children-match? (ast-c result) child)))))
78 |    ((and (char? result) (char? expect) (char=? result expect)))
79 |    (else #f)))
80 | 
81 | 
82 | (define (children-match? res expect)
83 |   (if (null? res)
84 |       (or (null? expect) (equal? (car expect) '*))
85 |       (and (not (null? expect))
86 |            (or (equal? (car expect) '*)
87 |                (and (is-expected? (car res) (car expect))
88 |                     (children-match? (cdr res) (cdr expect)))))))
89 | 


--------------------------------------------------------------------------------
/src/waxeye/transform.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (require waxeye/ast
 4 |          "action.rkt"
 5 |          "expand.rkt"
 6 |          "gen.rkt"
 7 |          "util.rkt")
 8 | 
 9 | (provide (all-defined-out))
10 | 
11 | 
12 | ;; The hash table for the names of the non-terminals
13 | (define nt-names (make-hash))
14 | 
15 | 
16 | ;; Transforms the grammar and performs sanity checks
17 | (define (transform-grammar g)
18 |   (and (check-not-empty g)
19 |        (collect-actions g)
20 |        (collect-nt-names g)
21 |        (check-refs g)
22 |        (expand-grammar g)))
23 | 
24 | 
25 | (define (check-not-empty g)
26 |   (when (null? (ast-c g))
27 |         (error 'check-not-empty "grammar is empty")))
28 | 
29 | 
30 | (define (collect-nt-names g)
31 |   (let ((ok #t))
32 |     (for-each (lambda (a)
33 |                 (let* ((name (get-non-term a)) (found (hash-ref nt-names name #f)))
34 |                   (if found
35 |                       (begin
36 |                         (set! ok #f)
37 |                         (error 'check-duplicate "duplicate definition of non-terminal: ~a" name))
38 |                       (hash-set! nt-names name name))))
39 |               (ast-c g))
40 |     ok))
41 | 
42 | 
43 | ;; Checks that referenced non-terminals have been defined
44 | (define (check-refs grammar)
45 |   (define (visit-nt exp)
46 |     (let ((name (list->string (ast-c exp))))
47 |       (unless (hash-ref nt-names name #f)
48 |               (error 'waxeye "undefined reference to non-terminal: ~a" name))))
49 | 
50 |   (define (visit-alternation exp)
51 |     (for-each visit-sequence (ast-c exp)))
52 | 
53 |   (define (visit-sequence exp)
54 |     (for-each visit-unit (ast-c exp)))
55 | 
56 |   (define (visit-unit exp)
57 |     (let* ((el (ast-c exp)) (el-len (length el)))
58 |       (visit-exp (list-ref el (- el-len 1)))))
59 | 
60 |   (define (visit-exp exp)
61 |     (let ((type (ast-t exp)))
62 |       (case type
63 |        ((alternation) (visit-alternation exp))
64 |        ((identifier) (visit-nt exp))
65 |        ((sequence) (visit-sequence exp))
66 |        ((unit) (visit-unit exp)))))
67 | 
68 |   (define (check-nt-refs def)
69 |     (visit-alternation (caddr (ast-c def))))
70 | 
71 |   (for-each check-nt-refs (get-defs grammar)))
72 | 


--------------------------------------------------------------------------------
/src/waxeye/util.rkt:
--------------------------------------------------------------------------------
 1 | #lang racket/base
 2 | 
 3 | (provide (all-defined-out))
 4 | 
 5 | 
 6 | ;; Prints the value followed by a newline
 7 | (define (print-ln . e)
 8 |   (for-each print e)
 9 |   (newline))
10 | 
11 | 
12 | ;; Displays the value followed by a newline
13 | (define (display-ln . e)
14 |   (for-each display e)
15 |   (newline))
16 | 
17 | 
18 | ;; Concatenates a list of lists
19 | (define (list-concat sl)
20 |   (foldr append '() sl))
21 | 
22 | 
23 | ;; Concatenates a list of strings
24 | (define (string-concat sl)
25 |   (foldr string-append "" sl))
26 | 


--------------------------------------------------------------------------------
/src/waxeye/version.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | 
3 | (provide *version*)
4 | 
5 | 
6 | (define *version* "0.9.0-dev")
7 | 


--------------------------------------------------------------------------------
/src/waxeye/waxeye.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | 
3 | (require "main.rkt")
4 | 
5 | 
6 | (main (vector->list (current-command-line-arguments)))
7 | 


--------------------------------------------------------------------------------
/test/grammars/json.rkt:
--------------------------------------------------------------------------------
  1 | (Object
  2 | 
  3 |  ""
  4 |  fail
  5 | 
  6 |  "{}"
  7 |  (Object)
  8 | 
  9 |  "{ 1 }"
 10 |  fail
 11 | 
 12 |  "{ \"s\" }"
 13 |  fail
 14 | 
 15 |  "{ \"m\" : 1 }"
 16 |  (Object (Member (String #\m) (Value (Number #\1))))
 17 | 
 18 |  "{ \"\":{}, \"\":[]}"
 19 |  (Object (Member (String) (Value (Object))) (Member (String) (Value (Array))))
 20 |  )
 21 | 
 22 | 
 23 | (Array
 24 | 
 25 |  ""
 26 |  fail
 27 | 
 28 |  "[]"
 29 |  (Array)
 30 | 
 31 |  "[1,2]"
 32 |  (Array (Value (Number #\1)) (Value (Number #\2)))
 33 | 
 34 |  "[ 3 , [] , {} ]"
 35 |  (Array (Value (Number #\3)) (Value (Array)) (Value (Object)))
 36 |  )
 37 | 
 38 | 
 39 | (Number
 40 | 
 41 |  ""
 42 |  fail
 43 | 
 44 |  "0"
 45 |  (Number #\0)
 46 | 
 47 |  "1"
 48 |  (Number #\1)
 49 | 
 50 |  "01"
 51 |  fail
 52 | 
 53 |  "42"
 54 |  (Number #\4 #\2)
 55 | 
 56 |  "-4"
 57 |  (Number #\- #\4)
 58 | 
 59 |  "0.8"
 60 |  (Number #\0 #\. #\8)
 61 | 
 62 |  "7.635"
 63 |  (Number #\7 #\. #\6 #\3 #\5)
 64 | 
 65 |  "9e10"
 66 |  (Number #\9 #\e #\1 #\0)
 67 | 
 68 |  "4E+3"
 69 |  (Number #\4 #\E #\+ #\3)
 70 | 
 71 |  "-0e-517"
 72 |  (Number #\- #\0 #\e #\- #\5 #\1 #\7)
 73 |  )
 74 | 
 75 | 
 76 | (String
 77 | 
 78 |  ""
 79 |  fail
 80 | 
 81 |  "\"\""
 82 |  (String)
 83 | 
 84 |  "\"a\""
 85 |  (String #\a)
 86 | 
 87 |  "\"ab\""
 88 |  (String #\a #\b)
 89 | 
 90 |  "\"\\\"\""
 91 |  (String (Escaped #\"))
 92 | 
 93 |  "\"\\\\\""
 94 |  (String (Escaped #\\))
 95 |  )
 96 | 
 97 | 
 98 | (Escaped
 99 | 
100 |  "u0000"
101 |  (Escaped #\u #\0 #\0 #\0 #\0)
102 | 
103 |  "ua9F3"
104 |  (Escaped #\u #\a #\9 #\F #\3)
105 | 
106 |  "u"
107 |  fail
108 | 
109 |  "n"
110 |  (Escaped #\n)
111 | 
112 |  "z"
113 |  fail
114 |  )
115 | 


--------------------------------------------------------------------------------
/test/grammars/templ.rkt:
--------------------------------------------------------------------------------
 1 | (template
 2 | 
 3 |  "foo=${bar};"
 4 |  (template (string #\f #\o #\o #\=) (code #\b #\a #\r) (string #\;))
 5 | 
 6 |  "public static void ${(get-method-name)}() {
 7 |     System.out.println(${(get-expr)});
 8 | }
 9 | "
10 |  (template (string *) (code *) (string *) (code *) (string *))
11 | 
12 |  "def ${(get-name)}(${(get-args)}):${(i)}
13 | ${(get-code)}${(u)}"
14 |  (template (string *) (code *) (string *) (code *) (string *) (code *) (string *) (code *) (code *))
15 | )
16 | 
17 | (code
18 | 
19 |  "${}"
20 |  (code)
21 | 
22 |  "${(foo 1)}"
23 |  (code #\( #\f #\o #\o #\space #\1 #\))
24 | 
25 |  "${ a + b}"
26 |  (code #\space #\a #\space #\+ #\space #\b)
27 | 
28 |  "${\\}}"
29 |  (code #\})
30 | 
31 |  "${\\n}"
32 |  (code #\\ #\n)
33 | 
34 |  "${$}"
35 |  (code #\$)
36 | 
37 |  "${${}"
38 |  (code #\$ #\{)
39 | 
40 |  "${}}"
41 |  fail
42 | 
43 |  "${{}}"
44 |  fail
45 | 
46 |  "${${}}"
47 |  fail
48 | 
49 |  "${\\}"
50 |  fail
51 | 
52 |  "${${\\}}"
53 |  (code #\$ #\{ #\})
54 | )
55 | 
56 | (string
57 | 
58 |  ";oisdcn;aosc;p981y2ep9nC"
59 |  pass
60 | 
61 |  "$"
62 |  pass
63 | 
64 |  "${"
65 |  pass
66 | 
67 |  "$"
68 |  pass
69 | 
70 |  "\\${}"
71 |  pass
72 | 
73 |  "${\\}"
74 |  pass
75 | 
76 |  ""
77 |  fail
78 | )
79 | 


--------------------------------------------------------------------------------
/test/grammars/waxeye.rkt:
--------------------------------------------------------------------------------
 1 | ;; These are tests for the 'Grammar' non-terminal
 2 | (Grammar ; <- This is the non-terminal's name
 3 | 
 4 |  ;; Following the name are pairs of input string and expected output. The
 5 |  ;; output is either the keyword 'pass', the keyword 'fail' or an AST. The AST
 6 |  ;; specifies the structure of the expected tree, the names of the nodes and
 7 |  ;; the individual characters. If you don't want to specify the whole tree,
 8 |  ;; just use the wild-card symbol '*' for the portion of the tree you want to
 9 |  ;; skip.
10 | 
11 |  "" ; <- This is the input
12 |  (Grammar) ; <- This is the expected output
13 | 
14 |  "A <- 'a'"
15 |  pass ; <- The keyword 'pass'
16 | 
17 |  "A"
18 |  fail ; <- The keyword 'fail'
19 | 
20 |  "A <- 'a' B <- 'b'"
21 |  (Grammar (Definition (Identifier #\A) *)  ; <- Here we skip some of
22 |           (Definition (Identifier #\B) *)) ;    Definition's children
23 | 
24 |  "A <- 'a'"
25 |  (Grammar (*)) ; <- Here we specify a child tree of any type
26 | 
27 |  "A <- [a-z] *[a-z0-9]"
28 |  (Grammar (Definition (Identifier #\A) (LeftArrow) (Alternation *)))
29 | 
30 |  "A <- 'a'"
31 |  (Grammar (Definition (Identifier #\A)
32 |             (LeftArrow) (Alternation (Sequence (Unit (Literal (LChar #\a)))))))
33 |  )
34 | 
35 | 
36 | (Literal
37 |  "'in'"
38 |  (Literal (LChar #\i) (LChar #\n))
39 | 
40 |  "''"
41 |  fail
42 |  )
43 | 
44 | 
45 | (Range
46 |  ""
47 |  fail
48 | 
49 |  "-"
50 |  (Range (Char #\-))
51 | 
52 |  "a"
53 |  (Range (Char #\a))
54 | 
55 |  "a-z"
56 |  (Range (Char #\a) (Char #\z))
57 | 
58 |  "\\<0C>"
59 |  (Range (Hex #\0 #\C))
60 | 
61 |  "\\<30>-\\<39>"
62 |  (Range (Hex #\3 #\0) (Hex #\3 #\9))
63 | 
64 |  "0-\\<39>"
65 |  (Range (Char #\0) (Hex #\3 #\9))
66 | 
67 |  "\\<30>-9"
68 |  (Range (Hex #\3 #\0) (Char #\9))
69 |  )
70 | 
71 | 
72 | (Alt
73 |  "|"
74 |  pass
75 | 
76 |  "| "
77 |  pass
78 | 
79 |  " | "
80 |  fail
81 |  )
82 | 


--------------------------------------------------------------------------------