├── .github
    └── workflows
    │   ├── ci.yml
    │   └── contents.yml
├── Changelog.md
├── INTERNALS.md
├── LICENSE
├── README.md
├── config.nims
├── doc
    ├── README.md
    ├── example-graph.png
    ├── example-railroad.png
    ├── npeg.png
    ├── papers
    │   ├── 2008_A_Text_Pattern-Matching_Tool_based_on_Parsing_Expression_Grammars.pdf
    │   ├── 2008_Packrat parsers can support left recursion.pdf
    │   ├── 2008_slides-lpeg-workshop2008.pdf
    │   ├── 2009_A_Parsing_Machine_For_PEGs.pdf
    │   ├── 2010_Converting_regexes_to_Parsing_Expression_Grammars.pdf
    │   ├── 2010_Direct_left-recursive_parsing_expression_grammars.pdf
    │   ├── 2011_From_EBNF_to_PEG.pdf
    │   ├── 2011_From_Regular_Expressions_to_Parsing_Expression_Grammars.pdf
    │   ├── 2011_Parsing_Expression_Grammars_for_Structured_Data.pdf
    │   ├── 2013_Exception_Handling_for_Error_Reporting_in_Parsing_Expression_Grammars.pdf
    │   ├── 2014_Left_recursion_in_parsing_expression_grammars.pdf
    │   ├── 2018_An_efficient_parsing_machine_for_PEGs.pdf
    │   ├── 2021_Incremental_PEG_Parsing.pdf
    │   └── README.md
    └── syntax-diagram.png
├── misc
    ├── README
    ├── indent.nim
    ├── java.nim
    ├── mouse2npeg.nim
    └── rod.nim
├── npeg.nimble
├── src
    ├── npeg.nim
    └── npeg
    │   ├── capture.nim
    │   ├── codegen.nim
    │   ├── common.nim
    │   ├── dot.nim
    │   ├── grammar.nim
    │   ├── lib
    │       ├── core.nim
    │       ├── rfc3339.nim
    │       ├── types.nim
    │       ├── uri.nim
    │       └── utf8.nim
    │   ├── parsepatt.nim
    │   ├── patt.nim
    │   ├── railroad.nim
    │   └── stack.nim
└── tests
    ├── basics.nim
    ├── captures.nim
    ├── config.nims
    ├── examples.nim
    ├── json-32M.bzip2
    ├── lexparse.nim
    ├── lib.nim
    ├── nimversion.nim
    ├── performance.nim
    ├── precedence.nim
    ├── testdata
    └── tests.nim


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | on:
  3 |   push:
  4 |   pull_request:
  5 | 
  6 | concurrency:
  7 |   group: ci-${{ github.ref }}
  8 |   cancel-in-progress: true
  9 | 
 10 | jobs:
 11 |   build:
 12 |     strategy:
 13 |       fail-fast: false
 14 |       matrix:
 15 |         compiler:
 16 |           - name: nim
 17 |             version: devel
 18 |           - name: nim
 19 |             version: version-2-0
 20 |           - name: nimskull
 21 |             version: "0.1.0-dev.21405"
 22 |           - name: nimskull
 23 |             version: "*"
 24 | 
 25 |         include:
 26 |           - compiler:
 27 |               name: nim
 28 |               version: devel
 29 |             build_doc: true
 30 | 
 31 |     name: ${{ matrix.compiler.name }} ${{ matrix.compiler.version }}
 32 |     runs-on: ubuntu-latest
 33 | 
 34 |     defaults:
 35 |       run:
 36 |         shell: bash
 37 |         working-directory: npeg
 38 | 
 39 |     steps:
 40 |       - name: Checkout
 41 |         uses: actions/checkout@v4.1.1
 42 |         with:
 43 |           path: npeg
 44 | 
 45 |       - name: Setup Nim
 46 |         if: matrix.compiler.name == 'nim'
 47 |         uses: alaviss/setup-nim@0.1.1
 48 |         with:
 49 |           path: nim
 50 |           version: ${{ matrix.compiler.version }}
 51 | 
 52 |       - name: Setup nimskull
 53 |         id: nimskull
 54 |         if: matrix.compiler.name == 'nimskull'
 55 |         uses: nim-works/setup-nimskull@0.1.1
 56 |         with:
 57 |           nimskull-version: ${{ matrix.compiler.version }}
 58 | 
 59 |       - name: Run tests
 60 |         run: nim r --path:src tests/tests.nim
 61 | 
 62 |       - name: Build docs
 63 |         if: matrix.build_doc
 64 |         shell: bash
 65 |         run: |
 66 |           branch=$GITHUB_REF
 67 |           branch=${branch##*/}
 68 |           for i in src/npeg.nim src/npeg/*.nim; do
 69 |             nim doc --project --outdir:htmldocs \
 70 |               --path:src \
 71 |               "--git.url:https://github.com/$GITHUB_REPOSITORY" \
 72 |               "--git.commit:$GITHUB_SHA" \
 73 |               "--git.devel:$branch" \
 74 |               "$i"
 75 |           done
 76 |           # Make npeg module the default page
 77 |           cp htmldocs/{npeg,index}.html
 78 | 
 79 |       - name: Upload GitHub Pages artifact
 80 |         if: matrix.build_doc
 81 |         uses: actions/upload-pages-artifact@v3.0.1
 82 |         with:
 83 |           path: npeg/htmldocs
 84 | 
 85 |   deploy:
 86 |     needs:
 87 |       - build
 88 |     if: github.ref == 'refs/heads/master'
 89 | 
 90 |     permissions:
 91 |       actions: read
 92 |       pages: write
 93 |       id-token: write
 94 | 
 95 |     environment:
 96 |       name: github-pages
 97 |       url: ${{ steps.deployment.outputs.page_url }}
 98 | 
 99 |     name: Deploy docs to GitHub Pages
100 |     runs-on: ubuntu-latest
101 |     steps:
102 |       - name: Deploy page
103 |         id: deployment
104 |         uses: actions/deploy-pages@v4.0.4
105 | 
106 |   passed:
107 |     needs: build
108 |     if: failure() || cancelled()
109 |     name: All tests passed
110 | 
111 |     runs-on: ubuntu-latest
112 |     steps:
113 |       - run: exit 1
114 | 


--------------------------------------------------------------------------------
/.github/workflows/contents.yml:
--------------------------------------------------------------------------------
 1 | name: Make table of contents
 2 | on: 
 3 |   push:
 4 |     paths:
 5 |       - README.md
 6 |     branches:
 7 |       - '**' 
 8 | jobs:
 9 |   make:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |         - uses: actions/checkout@v3
13 |         - uses: thatrandomperson5/AutoMarkdownContents@v1.1.1
14 |           with:
15 |             file: README.md
16 |             auto: true
17 |             skip-first: true
18 |         - name: Pull request
19 |           uses: peter-evans/create-pull-request@v4
20 |           with:
21 |             token: ${{ secrets.GITHUB_TOKEN }}
22 |             title: "Add md table of contents"
23 |             commit-message: ":clipboard: Added markdown table of contents"
24 |             body: |
25 |               :clipboard: Added markdown table of contents
26 |             base: ${{ github.head_ref }} # Creates pull request onto pull request or commit branch
27 |             branch: actions/automd  
28 | 
29 | 


--------------------------------------------------------------------------------
/Changelog.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 1.3.0 - 2024-08-22
  3 | ==================
  4 | 
  5 | - Added CI (thanks Leorize)
  6 | - Performance optimization
  7 | - Some small rafactorings
  8 | 
  9 | 1.2.1 - 2023-03-04
 10 | ==================
 11 | 
 12 | - fixes for --styleCheck=usages
 13 | 
 14 | 1.2.0 - 2023-01-17
 15 | ==================
 16 | 
 17 | - Split NPegException into more specific errors, updated documentation
 18 | 
 19 | 1.1.2 - 2023-01-08
 20 | ==================
 21 | 
 22 | - Fixed compat with Nim 1.0.11
 23 | 
 24 | 1.1.1 - 2023-01-08
 25 | ==================
 26 | 
 27 | - Disabled test with '∙' to avoid breaking older Nim
 28 | 
 29 | 1.1.0 - 2023-01-08
 30 | ==================
 31 | 
 32 | - Added alternate `∙` concatenation operator
 33 | - Fixed fixBareExceptionWarning in Nim devel
 34 | - Added table of contents to README.md
 35 | 
 36 | 1.0.1 - 2022-12-10
 37 | ==================
 38 | 
 39 | - Bugfix release, fixes "expression 'discard' has no type (or is ambiguous)" in 
 40 |   rare cases
 41 | 
 42 | 1.0.0 - 2022-11-27
 43 | ==================
 44 | 
 45 | - Improved stack trace handling
 46 | - Fixed matchFile() for empty files
 47 | 
 48 | 0.27.0 - 2022-11-06
 49 | ===================
 50 | 
 51 | - Augment the Nim stack trace with the NPeg return stack on exceptions
 52 | - Documentation updates
 53 | 
 54 | 0.26.0 - 2021-11-27
 55 | ===================
 56 | 
 57 | - Improved lineinfo in code blocks for better backtraces
 58 | - Some documentation improvements
 59 | 
 60 | 0.25.0 - 2021-09-11
 61 | ===================
 62 | 
 63 | - Omit the `.computedGoto.` in the inner parser loop for grammars with more
 64 |   then 10k instructions to work around the nim compiler limitation
 65 | 
 66 | 0.24.1 - 2021-01-16
 67 | ===================
 68 | 
 69 | - Added mixin for 'repr' to allow clean tracing of user types
 70 | 
 71 | 0.24.0 - 2020-11-20
 72 | ===================
 73 | 
 74 | - Added -d:npegGcsafe
 75 | 
 76 | 0.23.2 - 2020-11-06
 77 | ===================
 78 | 
 79 | - Small improvement in npeg systax checking
 80 | 
 81 | 0.23.0 - 2020-09-23
 82 | ===================
 83 | 
 84 | - Reinstated [] out of bound check for capturest
 85 | - Dropped profiler support, the implementation was bad
 86 | - Small documentation improvements
 87 | - Added RFC3339 date parser to libs
 88 | 
 89 | 0.22.2 - 2019-12-27
 90 | ===================
 91 | 
 92 | - Skip --gc:arc tests for nim <1.1 to fix Nim CI builds.
 93 | 
 94 | 0.22.1 - 2019-12-27
 95 | ===================
 96 | 
 97 | - Bugfix in codegen causing problems with ^1 notation in code blocks.
 98 | 
 99 | 0.22.0 - 2019-12-24
100 | ===================
101 | 
102 | - Changed the parsing subject from `openArray[char]` to `openArray[T]` and
103 |   added a 'literal' atom to the grammar. This allows NPeg to parse lists of
104 |   any type, making it suitable for separate lexer and parser stages. See
105 |   tests/lexparse.nim for a concise example.
106 | 
107 | - Added `@` syntactic sugar to access the match offset inside code block
108 |   captures.
109 | 
110 | - Dropped Json and AST captures - no complains heard since deprecation, and it
111 |   simplifies the code base to aid the development new features.
112 | 
113 | 0.21.3 - 2019-12-06
114 | ===================
115 | 
116 | - Fixed off-by-one error in range `P[m..n]` operator, which would also match
117 |   `P` times `n+1`
118 | 
119 | - Various documentation improvements
120 | 
121 | 0.21.2 - 2019-11-26
122 | ===================
123 | 
124 | - Fixed the way dollar captures are rewritten to avoid the name space clash
125 |   which was introduced by Nim PR #12712.
126 | 
127 | 0.21.1 - 2019-11-19
128 | ===================
129 | 
130 | - Bugfix for templates generating ordered choices
131 | 
132 | 0.21.0 - 2019-10-28
133 | ===================
134 | 
135 | - anonymous `patt` patterns now also take a code block
136 | 
137 | - deprecated AST and Json captures. AST captures are not flexible enough, and
138 |   the functionality can be better implemented using code block captures and
139 |   domain-specific AST object types. The Json captures were added in the early
140 |   days of NPeg as a flexible way to store captures, but this does not mix well
141 |   with custom captures and can not handle things like string unescaping. Both
142 |   capture types were removed from the documentation and a .deprecated. pragma
143 |   was added to the implementation. If you use Json or AST captures and think
144 |   deprecation is a mistake, let me know.
145 | 
146 | 0.20.0 - 2019-10-18
147 | ===================
148 | 
149 | - Added precedence operators - this allows constructions of Pratt parsers with
150 |   bounded left recursion and operator precedence.
151 | - Added run time profiler, enable with -d:npegProfile
152 | - Performance improvements
153 | 
154 | 0.19.0 - 2019-10-11
155 | ===================
156 | 
157 | - Significant performance improvements
158 | - Changed semantincs of code block captures: $0 now always captures the
159 |   total subject captured in a rule. This is a minor API change that only
160 |   affects code using the `capture[]` notation inside code blocks
161 | - Added fail() function to force a parser fail in a code block capture
162 | - Added push() function to allow code block captures to push captures
163 |   back on the stack
164 | - Check for loops caused by repeat of empty strings at compile time
165 | 
166 | 0.18.0 - 2019-09-26
167 | ===================
168 | 
169 | - Runtime performance improvements
170 | 
171 | 0.17.1 - 2019-09-19
172 | ===================
173 | 
174 | - Bugfix release (removed lingering debug echo)
175 | 
176 | 0.17.0 - 2019-09-17
177 | ===================
178 | 
179 | - Various runtime and compiletime performance improvements
180 | 
181 | 0.16.0 - 2019-09-08
182 | ===================
183 | 
184 | - Templates can now also be used in libraries
185 | - Added railroad diagram generation with -d:npegGraph
186 | - Improved error reporting
187 | 
188 | 0.15.0 - 2019-08-31
189 | ===================
190 | 
191 | - Generic parser API changed: the peg() macro now explicity passes the
192 |   userdata type and identifier.
193 | 
194 | 0.14.1 - 2019-08-28
195 | ===================
196 | 
197 | - Added templates / parameterised rules
198 | - Added custom match validation in code block capture
199 | - Added basic types, utf8 and uri libs
200 | - Added global pattern library support
201 | - Proc matchFile() now uses memfiles/mmap for zero copy parsers
202 | - Implemented method to pass user variable to code block captures
203 | - Added AST capture type for building simple abstract syntax trees
204 | - Added Jb() capture for Json booleans
205 | 
206 | 0.13.0 - 2019-07-21
207 | ===================
208 | 
209 | - The capture[] variable available inside code block matches now allows access
210 |   to the match offset as well. This is an API change since the type of capture
211 |   changed from seq[string] to seq[Capture].
212 | 
213 | 0.12.0 - 2019-07-14
214 | ===================
215 | 
216 | - Documentation updates
217 | - Made some error bounds compile-time configurable
218 | - Fix for more strict Nim compiler checks
219 | 
220 | 0.11.0 - 2019-05-29
221 | ===================
222 | 
223 | - Added support for named backreferences
224 | - Added safeguards to prevent grammars growing out of bounds
225 | - Added Graphviz .dot debugging output for parser debugging
226 | - Added `matchLen` and `matchMax` fields to `NPegException`
227 | - Improved pattern syntax error messages
228 | 
229 | 0.10.0 - 2019-04-24
230 | ===================
231 | 
232 | - Fixed 'Graph' character class
233 | 
234 | 0.9.0 - 2019-03-31
235 | ==================
236 | 
237 | - Some syntax changes to fix compilation with mainline Nim 0.19.4
238 | 
239 | 0.8.0 - 2019-03-30
240 | ==================
241 | 
242 | - Added syntactic sugar for accessing the captures[] seq in capture
243 |   code blocks with dollar-number variables $1..$9
244 | 
245 | 0.7.0 - 2019-03-29
246 | ==================
247 | 
248 | - Action callbacks (%) dropped in favour of Nim code block callbacks.
249 | 
250 | 0.6.0 - 2019-03-27
251 | ==================
252 | 
253 | - API change: count syntax changed from {n} to [n].
254 | 
255 | - Optimizations in code generation
256 | 
257 | 0.5.0 - 2019-03-27
258 | ==================
259 | 
260 | - API change: peg() and patt() now return an object of type Parser
261 |   instead of a proc, and the function match(p: Parser) is now used for
262 |   matching the subject. match() can match string and cstring types, 
263 |   matchFile() matches a file using memFile.
264 | 
265 | - Added builtin atoms Upper, Lower, Digit, HexDigit, Alpha
266 | 
267 | - Added `@` search operator
268 | 
269 | - Added `&` and predicate
270 | 
271 | 0.4.0 - 2019-03-24
272 | ==================
273 | 
274 | - Improved tracing output, during trace the originating rule name
275 |   for each instruction is dumped.
276 | 
277 | - Optimizations
278 | 


--------------------------------------------------------------------------------
/INTERNALS.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Introduction
  3 | 
  4 | This document briefly describes the inner workings of NPeg.
  5 | 
  6 | The main PEG algorithm is based on the Paper "A Text Pattern-Matching Tool
  7 | based on Parsing Expression Grammars" by Roberto Ierusalimschy, who is also the
  8 | author or LPEG. While LPEG uses a VM approach for parsing, NPeg adds an
  9 | additional step where the VM code is compiled to native Nim code which does the
 10 | parsing.
 11 | 
 12 | This is how NPeg works in short:
 13 | 
 14 | - The grammar is parsed by a Nim macro which recursively transforms this into
 15 |   a sequence of VM instructions for each grammar rule.
 16 | 
 17 | - The set of instructions is 'linked' into a complete program of instructions
 18 | 
 19 | - The linked program is translated/compiled into a state machine, implemented
 20 |   as a large Nim `case` statement that performs the parsing of the subject
 21 |   string.
 22 | 
 23 | 
 24 | ## Data structures
 25 | 
 26 | The following data structures are used for compiling the grammar:
 27 | 
 28 | - `Inst`, short for "instruction": This is a object variant which implements a
 29 |   basic VM instruction. It consists of the opcode and a number of data fields.
 30 | 
 31 | - `Patt`, short for "pattern": A pattern is a sequence of instructions
 32 |   `seq[Inst]` which typically match an atom from the grammar.
 33 | 
 34 | - `Rule`: One complete, named pattern which is part of a grammar.
 35 | 
 36 | - `Grammar`: A grammar is collection of named patterns implemented as a
 37 |   `table[string, Patt]`. This is used as the intermediate representation of the
 38 |   complete compiled grammar and holds patterns for each of the named rules.
 39 | 
 40 | - `Program`: A complete linked program, consisting of a pattern and its debug
 41 |   info (symbol table, textual listing)
 42 | 
 43 | - `Parser`: object holding the compiled Nim matching function
 44 | 
 45 | For captures the following data structures are relevant:
 46 | 
 47 | - `CapFrame`: A capframe is a frame of a specific type on the capture stack
 48 |   that points to an offset in the subject string. For each capture open and
 49 |   close pair a frame exists on the stack, thus allowing for nested captures.
 50 | 
 51 | - `Capture`: A capture is a completed capture that is collected and finalized
 52 |   when a capture is closed and finished. 
 53 | 
 54 | For the generic procs and types, the following convention is used:
 55 | 
 56 | - `[T]` is the type of optional "user data" the gets passed into the parser.
 57 |   When this is not explicitly given with the `peg` macro, NPeg will stub this
 58 |   with an unused bool
 59 | 
 60 | - `[S]` is the type of the subject. This is typicall a string, although NPeg
 61 |   is generic enough and can parse any `seq[S]`
 62 | 
 63 | ## Building a grammar
 64 | 
 65 | The first step in building a parser is the translation of the grammar into
 66 | snippets of VM instructions which match the data and perform flow control. For
 67 | details of these instructions, refer to the paper by Ierusalimschy.
 68 | 
 69 | The `Patt` data type is used to store a sequence of instructions. This section
 70 | describe how a pattern is built from Nim code, all of which lives in `patt.nim`
 71 | - this mechanism is later used by the macro which is parsing the actual PEG
 72 | grammar.
 73 | 
 74 | The basic atoms are constructed by the `newPatt()` procedures. These take an
 75 | argument describing what needs to be matched in the subject, and deliver a
 76 | short sequence of instructions. For example, the `newPatt("foo")` procedure
 77 | will create a pattern consisting of a single instruction: 
 78 | 
 79 | ```
 80 |    1: line           opStr "foo"
 81 | ```
 82 | 
 83 | There are a number of operators defined which act on one or more patterns.
 84 | These operators are used to combine multiple patterns into larger patters.
 85 | 
 86 | For example, the `|` operator is used for the PEG ordered choice. This takes
 87 | two patters, and results in a pattern that tries to match the first one and
 88 | then skips the second, or tries to match the second if the first fails:
 89 | 
 90 | ```
 91 |    0: line           opChoice 3
 92 |    1: line           opStr "foo"
 93 |    2: line           opCommit 4
 94 |    3: line           opStr "bar"
 95 |    4:                opReturn
 96 | ```
 97 | 
 98 | A number of patterns can be combined into a grammar, which is simply a table
 99 | of patterns indexed by name.
100 | 
101 | 
102 | ## PEG DSL to grammar
103 | 
104 | The user defines their NPeg grammar in a Nim code block, which consists of a
105 | number of named patterns. The whole grammar is handled by the `parseGrammar()`
106 | which iterates all individual named patterns. Each pattern is passed to the
107 | `parsePatt()` macro, which transforms the Nim code block AST into a NPeg
108 | grammar. This macro recursively goes through the Nim AST and calls `newPatt()`
109 | for building atoms, and calls the various operators acting on patterns to grow
110 | the grammar.
111 | 
112 | 
113 | ## Grammar to Nim code
114 | 
115 | The `genCode()` procedure is used to convert the list of instructions into Nim
116 | code which implements the actual parser. This procedure builds a `case`
117 | statement for each VM instruction, and inserts a template for each opcode for
118 | each case.
119 | 
120 | 
121 | ## Example
122 | 
123 | The following grammar is specified by the user:
124 | 
125 | ```
126 |     lines <- *line                                                          
127 |     line <- "foo" | "bar"
128 | ```
129 | 
130 | This is translated into the following VM program:
131 | 
132 | ```
133 | lines:
134 |    0: lines          opChoice 3
135 |    1: lines          opCall 4 line
136 |    2: lines          opPartCommit 1
137 |    3:                opReturn
138 | 
139 | line:
140 |    4: line           opChoice 7
141 |    5: line           opStr "foo"
142 |    6: line           opCommit 8
143 |    7: line           opStr "bar"
144 |    8:                opReturn
145 | ```
146 | 
147 | which is then translated into the following `case` statement:
148 | 
149 | ```
150 |   while true:
151 |     case ip
152 |     of 0:
153 |       opChoiceFn(3, "lines")
154 |     of 1:
155 |       opCallFn("line", 3, "lines")
156 |     of 2:
157 |       opPartCommitFn(1, "lines")
158 |     of 3:
159 |       opReturnFn("")
160 |     of 4:
161 |       opChoiceFn(7, "line")
162 |     of 5:
163 |       opStrFn("foo", "line")
164 |     of 6:
165 |       opCommitFn(8, "line")
166 |     of 7:
167 |       opStrFn("bar", "line")
168 |     of 8:
169 |       opReturnFn("")
170 |     else:
171 |       opFailFn()
172 | ```
173 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2019 Ico Doornekamp <npeg@zevv.nl>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 
21 | 


--------------------------------------------------------------------------------
/config.nims:
--------------------------------------------------------------------------------
1 | --styleCheck:usages
2 | if (NimMajor, NimMinor) < (1, 6):
3 |   --styleCheck:hint
4 | else:
5 |   --styleCheck:error
6 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This directory contains various papers which were used for inspiration when
3 | building Npeg.
4 | 


--------------------------------------------------------------------------------
/doc/example-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/example-graph.png


--------------------------------------------------------------------------------
/doc/example-railroad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/example-railroad.png


--------------------------------------------------------------------------------
/doc/npeg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/npeg.png


--------------------------------------------------------------------------------
/doc/papers/2008_A_Text_Pattern-Matching_Tool_based_on_Parsing_Expression_Grammars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2008_A_Text_Pattern-Matching_Tool_based_on_Parsing_Expression_Grammars.pdf


--------------------------------------------------------------------------------
/doc/papers/2008_Packrat parsers can support left recursion.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2008_Packrat parsers can support left recursion.pdf


--------------------------------------------------------------------------------
/doc/papers/2008_slides-lpeg-workshop2008.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2008_slides-lpeg-workshop2008.pdf


--------------------------------------------------------------------------------
/doc/papers/2009_A_Parsing_Machine_For_PEGs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2009_A_Parsing_Machine_For_PEGs.pdf


--------------------------------------------------------------------------------
/doc/papers/2010_Converting_regexes_to_Parsing_Expression_Grammars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2010_Converting_regexes_to_Parsing_Expression_Grammars.pdf


--------------------------------------------------------------------------------
/doc/papers/2010_Direct_left-recursive_parsing_expression_grammars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2010_Direct_left-recursive_parsing_expression_grammars.pdf


--------------------------------------------------------------------------------
/doc/papers/2011_From_EBNF_to_PEG.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2011_From_EBNF_to_PEG.pdf


--------------------------------------------------------------------------------
/doc/papers/2011_From_Regular_Expressions_to_Parsing_Expression_Grammars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2011_From_Regular_Expressions_to_Parsing_Expression_Grammars.pdf


--------------------------------------------------------------------------------
/doc/papers/2011_Parsing_Expression_Grammars_for_Structured_Data.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2011_Parsing_Expression_Grammars_for_Structured_Data.pdf


--------------------------------------------------------------------------------
/doc/papers/2013_Exception_Handling_for_Error_Reporting_in_Parsing_Expression_Grammars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2013_Exception_Handling_for_Error_Reporting_in_Parsing_Expression_Grammars.pdf


--------------------------------------------------------------------------------
/doc/papers/2014_Left_recursion_in_parsing_expression_grammars.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2014_Left_recursion_in_parsing_expression_grammars.pdf


--------------------------------------------------------------------------------
/doc/papers/2018_An_efficient_parsing_machine_for_PEGs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2018_An_efficient_parsing_machine_for_PEGs.pdf


--------------------------------------------------------------------------------
/doc/papers/2021_Incremental_PEG_Parsing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2021_Incremental_PEG_Parsing.pdf


--------------------------------------------------------------------------------
/doc/papers/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This is a collection of papers somehow relevant to NPeg.
3 | 


--------------------------------------------------------------------------------
/doc/syntax-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/syntax-diagram.png


--------------------------------------------------------------------------------
/misc/README:
--------------------------------------------------------------------------------
1 | 
2 | This directory contains various snippets, examples or other helpful things
3 | that I want to keep around but do not fit in elsewhere.
4 | 


--------------------------------------------------------------------------------
/misc/indent.nim:
--------------------------------------------------------------------------------
 1 |   # Indent syntax
 2 | 
 3 |   let data = """
 4 | a=123
 5 | b=
 6 |   c=567
 7 |   e=42
 8 | f=18
 9 | g=
10 |   b=44
11 |   c=22
12 | """
13 | 
14 |   var indentStack = @[""]
15 |   template top[T](s: seq[T]): T = s[s.high]
16 | 
17 | 
18 |   let p = peg doc:
19 |     doc <- pairs * !1
20 |     pairs <- pair * *('\n' * pair)
21 |     pair <- indSame * key * '=' * val
22 |     indentPairs <- '\n' * &indIn * pairs * &('\n' * indOut)
23 |     key <- +Alpha:
24 |       echo "key ", $0
25 |     number <- +Digit:
26 |       echo "val ", $0
27 |     val <- number | indentPairs
28 | 
29 |     indSame <- *' ':
30 |       validate $0 == indentStack.top
31 | 
32 |     indIn <- *' ':
33 |       validate len($0) > len(indentStack.top)
34 |       indentStack.add $0
35 |     
36 |     indOut <- *' ':
37 |       discard indentStack.pop
38 |       validate $0 == indentStack.top
39 | 
40 |   echo p.match(data).ok
41 | 


--------------------------------------------------------------------------------
/misc/java.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | #
  3 | # This grammar has been auto-generated with mouse2npeg from the Mouse Java-1.6
  4 | # grammar at http://www.romanredz.se/Mouse/Java.1.6.peg. It is not nice to look
  5 | # at, but it does parse Java
  6 | # 
  7 | 
  8 | import npeg
  9 | let r = peg CompilationUnit:
 10 |   CompilationUnit <- Spacing * ?PackageDeclaration * *ImportDeclaration * *TypeDeclaration * EOT
 11 |   PackageDeclaration <- *Annotation * PACKAGE * QualifiedIdentifier * SEMI
 12 |   ImportDeclaration <- IMPORT * ?STATIC * QualifiedIdentifier * ?( DOT * STAR ) * SEMI
 13 |   TypeDeclaration <- *Modifier * ( ClassDeclaration | EnumDeclaration | InterfaceDeclaration | AnnotationTypeDeclaration ) | SEMI
 14 |   ClassDeclaration <- CLASS * Identifier * ?TypeParameters * ?( EXTENDS * ClassType ) * ?( IMPLEMENTS * ClassTypeList ) * ClassBody
 15 |   ClassBody <- LWING * *ClassBodyDeclaration * RWING
 16 |   ClassBodyDeclaration <- SEMI | ?STATIC * Block | *Modifier * MemberDecl
 17 |   MemberDecl <- TypeParameters * GenericMethodOrConstructorRest | Type * Identifier * MethodDeclaratorRest | Type * VariableDeclarators * SEMI | VOID * Identifier * VoidMethodDeclaratorRest | Identifier * ConstructorDeclaratorRest | InterfaceDeclaration | ClassDeclaration | EnumDeclaration | AnnotationTypeDeclaration
 18 |   GenericMethodOrConstructorRest <- ( Type | VOID ) * Identifier * MethodDeclaratorRest | Identifier * ConstructorDeclaratorRest
 19 |   MethodDeclaratorRest <- FormalParameters * *Dim * ?( THROWS * ClassTypeList ) * ( MethodBody | SEMI )
 20 |   VoidMethodDeclaratorRest <- FormalParameters * ?( THROWS * ClassTypeList ) * ( MethodBody | SEMI )
 21 |   ConstructorDeclaratorRest <- FormalParameters * ?( THROWS * ClassTypeList ) * MethodBody
 22 |   MethodBody <- Block
 23 |   InterfaceDeclaration <- INTERFACE * Identifier * ?TypeParameters * ?( EXTENDS * ClassTypeList ) * InterfaceBody
 24 |   InterfaceBody <- LWING * *InterfaceBodyDeclaration * RWING
 25 |   InterfaceBodyDeclaration <- *Modifier * InterfaceMemberDecl | SEMI
 26 |   InterfaceMemberDecl <- InterfaceMethodOrFieldDecl | InterfaceGenericMethodDecl | VOID * Identifier * VoidInterfaceMethodDeclaratorRest | InterfaceDeclaration | AnnotationTypeDeclaration | ClassDeclaration | EnumDeclaration
 27 |   InterfaceMethodOrFieldDecl <- Type * Identifier * InterfaceMethodOrFieldRest
 28 |   InterfaceMethodOrFieldRest <- ConstantDeclaratorsRest * SEMI | InterfaceMethodDeclaratorRest
 29 |   InterfaceMethodDeclaratorRest <- FormalParameters * *Dim * ?( THROWS * ClassTypeList ) * SEMI
 30 |   InterfaceGenericMethodDecl <- TypeParameters * ( Type | VOID ) * Identifier * InterfaceMethodDeclaratorRest
 31 |   VoidInterfaceMethodDeclaratorRest <- FormalParameters * ?( THROWS * ClassTypeList ) * SEMI
 32 |   ConstantDeclaratorsRest <- ConstantDeclaratorRest * *( COMMA * ConstantDeclarator )
 33 |   ConstantDeclarator <- Identifier * ConstantDeclaratorRest
 34 |   ConstantDeclaratorRest <- *Dim * EQU * VariableInitializer
 35 |   EnumDeclaration <- ENUM * Identifier * ?( IMPLEMENTS * ClassTypeList ) * EnumBody
 36 |   EnumBody <- LWING * ?EnumConstants * ?COMMA * ?EnumBodyDeclarations * RWING
 37 |   EnumConstants <- EnumConstant * *( COMMA * EnumConstant )
 38 |   EnumConstant <- *Annotation * Identifier * ?Arguments * ?ClassBody
 39 |   EnumBodyDeclarations <- SEMI * *ClassBodyDeclaration
 40 |   LocalVariableDeclarationStatement <- *( FINAL | Annotation ) * Type * VariableDeclarators * SEMI
 41 |   VariableDeclarators <- VariableDeclarator * *( COMMA * VariableDeclarator )
 42 |   VariableDeclarator <- Identifier * *Dim * ?( EQU * VariableInitializer )
 43 |   FormalParameters <- LPAR * ?FormalParameterList * RPAR
 44 |   FormalParameter <- *( FINAL | Annotation ) * Type * VariableDeclaratorId
 45 |   LastFormalParameter <- *( FINAL | Annotation ) * Type * ELLIPSIS * VariableDeclaratorId
 46 |   FormalParameterList <- FormalParameter * *( COMMA * FormalParameter ) * ?( COMMA * LastFormalParameter ) | LastFormalParameter
 47 |   VariableDeclaratorId <- Identifier * *Dim
 48 |   Block <- LWING * BlockStatements * RWING
 49 |   BlockStatements <- *BlockStatement
 50 |   BlockStatement <- LocalVariableDeclarationStatement | *Modifier * ( ClassDeclaration | EnumDeclaration ) | Statement
 51 |   Statement <- Block | ASSERT * Expression * ?( COLON * Expression ) * SEMI | IF * ParExpression * Statement * ?( ELSE * Statement ) | FOR * LPAR * ?ForInit * SEMI * ?Expression * SEMI * ?ForUpdate * RPAR * Statement | FOR * LPAR * FormalParameter * COLON * Expression * RPAR * Statement | WHILE * ParExpression * Statement | DO * Statement * WHILE * ParExpression * SEMI | TRY * Block * ( +Catch * ?Finally | Finally ) | SWITCH * ParExpression * LWING * SwitchBlockStatementGroups * RWING | SYNCHRONIZED * ParExpression * Block | RETURN * ?Expression * SEMI | THROW * Expression * SEMI | BREAK * ?Identifier * SEMI | CONTINUE * ?Identifier * SEMI | SEMI | StatementExpression * SEMI | Identifier * COLON * Statement
 52 |   Catch <- CATCH * LPAR * FormalParameter * RPAR * Block
 53 |   Finally <- FINALLY * Block
 54 |   SwitchBlockStatementGroups <- *SwitchBlockStatementGroup
 55 |   SwitchBlockStatementGroup <- SwitchLabel * BlockStatements
 56 |   SwitchLabel <- CASE * ConstantExpression * COLON | CASE * EnumConstantName * COLON | DEFAULT * COLON
 57 |   ForInit <- *( FINAL | Annotation ) * Type * VariableDeclarators | StatementExpression * *( COMMA * StatementExpression )
 58 |   ForUpdate <- StatementExpression * *( COMMA * StatementExpression )
 59 |   EnumConstantName <- Identifier
 60 |   StatementExpression <- Expression
 61 |   ConstantExpression <- Expression
 62 |   Expression <- ConditionalExpression * *( AssignmentOperator * ConditionalExpression )
 63 |   AssignmentOperator <- EQU | PLUSEQU | MINUSEQU | STAREQU | DIVEQU | ANDEQU | OREQU | HATEQU | MODEQU | SLEQU | SREQU | BSREQU
 64 |   ConditionalExpression <- ConditionalOrExpression * *( QUERY * Expression * COLON * ConditionalOrExpression )
 65 |   ConditionalOrExpression <- ConditionalAndExpression * *( OROR * ConditionalAndExpression )
 66 |   ConditionalAndExpression <- InclusiveOrExpression * *( ANDAND * InclusiveOrExpression )
 67 |   InclusiveOrExpression <- ExclusiveOrExpression * *( OR * ExclusiveOrExpression )
 68 |   ExclusiveOrExpression <- AndExpression * *( HAT * AndExpression )
 69 |   AndExpression <- EqualityExpression * *( AND * EqualityExpression )
 70 |   EqualityExpression <- RelationalExpression * *( ( EQUAL | NOTEQUAL ) * RelationalExpression )
 71 |   RelationalExpression <- ShiftExpression * *( ( LE | GE | LT | GT ) * ShiftExpression | INSTANCEOF * ReferenceType )
 72 |   ShiftExpression <- AdditiveExpression * *( ( SL | SR | BSR ) * AdditiveExpression )
 73 |   AdditiveExpression <- MultiplicativeExpression * *( ( PLUS | MINUS ) * MultiplicativeExpression )
 74 |   MultiplicativeExpression <- UnaryExpression * *( ( STAR | DIV | MOD ) * UnaryExpression )
 75 |   UnaryExpression <- PrefixOp * UnaryExpression | LPAR * Type * RPAR * UnaryExpression | Primary * *( Selector ) * *( PostfixOp )
 76 |   Primary <- ParExpression | NonWildcardTypeArguments * ( ExplicitGenericInvocationSuffix | THIS * Arguments ) | THIS * ?Arguments | SUPER * SuperSuffix | Literal | NEW * Creator | QualifiedIdentifier * ?IdentifierSuffix | BasicType * *Dim * DOT * CLASS | VOID * DOT * CLASS
 77 |   IdentifierSuffix <- LBRK * ( RBRK * *Dim * DOT * CLASS | Expression * RBRK ) | Arguments | DOT * ( CLASS | ExplicitGenericInvocation | THIS | SUPER * Arguments | NEW * ?NonWildcardTypeArguments * InnerCreator )
 78 |   ExplicitGenericInvocation <- NonWildcardTypeArguments * ExplicitGenericInvocationSuffix
 79 |   NonWildcardTypeArguments <- LPOINT * ReferenceType * *( COMMA * ReferenceType ) * RPOINT
 80 |   ExplicitGenericInvocationSuffix <- SUPER * SuperSuffix | Identifier * Arguments
 81 |   PrefixOp <- INC | DEC | BANG | TILDA | PLUS | MINUS
 82 |   PostfixOp <- INC | DEC
 83 |   Selector <- DOT * Identifier * ?Arguments | DOT * ExplicitGenericInvocation | DOT * THIS | DOT * SUPER * SuperSuffix | DOT * NEW * ?NonWildcardTypeArguments * InnerCreator | DimExpr
 84 |   SuperSuffix <- Arguments | DOT * ?NonWildcardTypeArguments * Identifier * ?Arguments
 85 |   BasicType <- ( "byte" | "short" | "char" | "int" | "long" | "float" | "double" | "boolean" ) * !LetterOrDigit * Spacing
 86 |   Arguments <- LPAR * ?( Expression * *( COMMA * Expression ) ) * RPAR
 87 |   Creator <- ?NonWildcardTypeArguments * CreatedName * ClassCreatorRest | ?NonWildcardTypeArguments * ( ClassType | BasicType ) * ArrayCreatorRest
 88 |   CreatedName <- Identifier * ?NonWildcardTypeArguments * *( DOT * Identifier * ?NonWildcardTypeArguments )
 89 |   InnerCreator <- Identifier * ClassCreatorRest
 90 |   ArrayCreatorRest <- LBRK * ( RBRK * *Dim * ArrayInitializer | Expression * RBRK * *DimExpr * *Dim )
 91 |   ClassCreatorRest <- Arguments * ?ClassBody
 92 |   ArrayInitializer <- LWING * ?( VariableInitializer * *( COMMA * VariableInitializer ) ) * ?COMMA * RWING
 93 |   VariableInitializer <- ArrayInitializer | Expression
 94 |   ParExpression <- LPAR * Expression * RPAR
 95 |   QualifiedIdentifier <- Identifier * *( DOT * Identifier )
 96 |   Dim <- LBRK * RBRK
 97 |   DimExpr <- LBRK * Expression * RBRK
 98 |   Type <- ( BasicType | ClassType ) * *Dim
 99 |   ReferenceType <- BasicType * +Dim | ClassType * *Dim
100 |   ClassType <- Identifier * ?TypeArguments * *( DOT * Identifier * ?TypeArguments )
101 |   ClassTypeList <- ClassType * *( COMMA * ClassType )
102 |   TypeArguments <- LPOINT * TypeArgument * *( COMMA * TypeArgument ) * RPOINT
103 |   TypeArgument <- ReferenceType | QUERY * ?( ( EXTENDS | SUPER ) * ReferenceType )
104 |   TypeParameters <- LPOINT * TypeParameter * *( COMMA * TypeParameter ) * RPOINT
105 |   TypeParameter <- Identifier * ?( EXTENDS * Bound )
106 |   Bound <- ClassType * *( AND * ClassType )
107 |   Modifier <- Annotation | ( "public" | "protected" | "private" | "static" | "abstract" | "final" | "native" | "synchronized" | "transient" | "volatile" | "strictfp" ) * !LetterOrDigit * Spacing
108 |   AnnotationTypeDeclaration <- AT * INTERFACE * Identifier * AnnotationTypeBody
109 |   AnnotationTypeBody <- LWING * *AnnotationTypeElementDeclaration * RWING
110 |   AnnotationTypeElementDeclaration <- *Modifier * AnnotationTypeElementRest | SEMI
111 |   AnnotationTypeElementRest <- Type * AnnotationMethodOrConstantRest * SEMI | ClassDeclaration | EnumDeclaration | InterfaceDeclaration | AnnotationTypeDeclaration
112 |   AnnotationMethodOrConstantRest <- AnnotationMethodRest | AnnotationConstantRest
113 |   AnnotationMethodRest <- Identifier * LPAR * RPAR * ?DefaultValue
114 |   AnnotationConstantRest <- VariableDeclarators
115 |   DefaultValue <- DEFAULT * ElementValue
116 |   Annotation <- NormalAnnotation | SingleElementAnnotation | MarkerAnnotation
117 |   NormalAnnotation <- AT * QualifiedIdentifier * LPAR * ?ElementValuePairs * RPAR
118 |   SingleElementAnnotation <- AT * QualifiedIdentifier * LPAR * ElementValue * RPAR
119 |   MarkerAnnotation <- AT * QualifiedIdentifier
120 |   ElementValuePairs <- ElementValuePair * *( COMMA * ElementValuePair )
121 |   ElementValuePair <- Identifier * EQU * ElementValue
122 |   ElementValue <- ConditionalExpression | Annotation | ElementValueArrayInitializer
123 |   ElementValueArrayInitializer <- LWING * ?ElementValues * ?COMMA * RWING
124 |   ElementValues <- ElementValue * *( COMMA * ElementValue )
125 |   Spacing <- *( +{' ','\t','\r','\n','\x0c'} | "/*" * *( !"*/" * 1 ) * "*/" | "//" * *( !{'\r','\n'} * 1 ) * {'\r','\n'} )
126 |   Identifier <- !Keyword * Letter * *LetterOrDigit * Spacing
127 |   Letter <- {'a'..'z'} | {'A'..'Z'} | {'_','$'}
128 |   LetterOrDigit <- {'a'..'z'} | {'A'..'Z'} | {'0'..'9'} | {'_','$'}
129 |   Keyword <- ( "abstract" | "assert" | "boolean" | "break" | "byte" | "case" | "catch" | "char" | "class" | "const" | "continue" | "default" | "double" | "do" | "else" | "enum" | "extends" | "false" | "finally" | "final" | "float" | "for" | "goto" | "if" | "implements" | "import" | "interface" | "int" | "instanceof" | "long" | "native" | "new" | "null" | "package" | "private" | "protected" | "public" | "return" | "short" | "static" | "strictfp" | "super" | "switch" | "synchronized" | "this" | "throws" | "throw" | "transient" | "true" | "try" | "void" | "volatile" | "while" ) * !LetterOrDigit
130 |   ASSERT <- "assert" * !LetterOrDigit * Spacing
131 |   BREAK <- "break" * !LetterOrDigit * Spacing
132 |   CASE <- "case" * !LetterOrDigit * Spacing
133 |   CATCH <- "catch" * !LetterOrDigit * Spacing
134 |   CLASS <- "class" * !LetterOrDigit * Spacing
135 |   CONTINUE <- "continue" * !LetterOrDigit * Spacing
136 |   DEFAULT <- "default" * !LetterOrDigit * Spacing
137 |   DO <- "do" * !LetterOrDigit * Spacing
138 |   ELSE <- "else" * !LetterOrDigit * Spacing
139 |   ENUM <- "enum" * !LetterOrDigit * Spacing
140 |   EXTENDS <- "extends" * !LetterOrDigit * Spacing
141 |   FINALLY <- "finally" * !LetterOrDigit * Spacing
142 |   FINAL <- "final" * !LetterOrDigit * Spacing
143 |   FOR <- "for" * !LetterOrDigit * Spacing
144 |   IF <- "if" * !LetterOrDigit * Spacing
145 |   IMPLEMENTS <- "implements" * !LetterOrDigit * Spacing
146 |   IMPORT <- "import" * !LetterOrDigit * Spacing
147 |   INTERFACE <- "interface" * !LetterOrDigit * Spacing
148 |   INSTANCEOF <- "instanceof" * !LetterOrDigit * Spacing
149 |   NEW <- "new" * !LetterOrDigit * Spacing
150 |   PACKAGE <- "package" * !LetterOrDigit * Spacing
151 |   RETURN <- "return" * !LetterOrDigit * Spacing
152 |   STATIC <- "static" * !LetterOrDigit * Spacing
153 |   SUPER <- "super" * !LetterOrDigit * Spacing
154 |   SWITCH <- "switch" * !LetterOrDigit * Spacing
155 |   SYNCHRONIZED <- "synchronized" * !LetterOrDigit * Spacing
156 |   THIS <- "this" * !LetterOrDigit * Spacing
157 |   THROWS <- "throws" * !LetterOrDigit * Spacing
158 |   THROW <- "throw" * !LetterOrDigit * Spacing
159 |   TRY <- "try" * !LetterOrDigit * Spacing
160 |   VOID <- "void" * !LetterOrDigit * Spacing
161 |   WHILE <- "while" * !LetterOrDigit * Spacing
162 |   Literal <- ( FloatLiteral | IntegerLiteral | CharLiteral | StringLiteral | "true" * !LetterOrDigit | "false" * !LetterOrDigit | "null" * !LetterOrDigit ) * Spacing
163 |   IntegerLiteral <- ( HexNumeral | OctalNumeral | DecimalNumeral ) * ?{'l','L'}
164 |   DecimalNumeral <- "0" | {'1'..'9'} * *{'0'..'9'}
165 |   HexNumeral <- ( "0x" | "0X" ) * +HexDigit
166 |   HexDigit <- {'a'..'f'} | {'A'..'F'} | {'0'..'9'}
167 |   OctalNumeral <- "0" * +{'0'..'7'}
168 |   FloatLiteral <- HexFloat | DecimalFloat
169 |   DecimalFloat <- +Digit * "." * *Digit * ?Exponent * ?{'f','F','d','D'} | "." * +Digit * ?Exponent * ?{'f','F','d','D'} | +Digit * Exponent * ?{'f','F','d','D'} | +Digit * ?Exponent * {'f','F','d','D'}
170 |   Exponent <- {'e','E'} * ?{'+','\\','-'} * +Digit
171 |   Digit <- {'0'..'9'}
172 |   HexFloat <- HexSignificand * BinaryExponent * ?{'f','F','d','D'}
173 |   HexSignificand <- ( "0x" | "0X" ) * *HexDigit * "." * +HexDigit | HexNumeral * ?"."
174 |   BinaryExponent <- {'p','P'} * ?{'+','\\','-'} * +Digit
175 |   CharLiteral <- "\'" * ( Escape | !{'\'','\\','\n','\r'} * 1 ) * "\'"
176 |   StringLiteral <- "\"" * *( Escape | !{'"','\\','\n','\r'} * 1 ) * "\""
177 |   Escape <- "\\" * ( {'b','t','n','f','r','"','\'','\\'} | OctalEscape | UnicodeEscape )
178 |   OctalEscape <- {'0'..'3'} * {'0'..'7'} * {'0'..'7'} | {'0'..'7'} * {'0'..'7'} | {'0'..'7'}
179 |   UnicodeEscape <- +"u" * HexDigit * HexDigit * HexDigit * HexDigit
180 |   AT <- "@" * Spacing
181 |   AND <- "&" * !{'=','&'} * Spacing
182 |   ANDAND <- "&&" * Spacing
183 |   ANDEQU <- "&=" * Spacing
184 |   BANG <- "!" * !"=" * Spacing
185 |   BSR <- ">>>" * !"=" * Spacing
186 |   BSREQU <- ">>>=" * Spacing
187 |   COLON <- ":" * Spacing
188 |   COMMA <- "," * Spacing
189 |   DEC <- "--" * Spacing
190 |   DIV <- "/" * !"=" * Spacing
191 |   DIVEQU <- "/=" * Spacing
192 |   DOT <- "." * Spacing
193 |   ELLIPSIS <- "..." * Spacing
194 |   EQU <- "=" * !"=" * Spacing
195 |   EQUAL <- "==" * Spacing
196 |   GE <- ">=" * Spacing
197 |   GT <- ">" * !{'=','>'} * Spacing
198 |   HAT <- "^" * !"=" * Spacing
199 |   HATEQU <- "^=" * Spacing
200 |   INC <- "++" * Spacing
201 |   LBRK <- "[" * Spacing
202 |   LE <- "<=" * Spacing
203 |   LPAR <- "(" * Spacing
204 |   LPOINT <- "<" * Spacing
205 |   LT <- "<" * !{'=','<'} * Spacing
206 |   LWING <- "{" * Spacing
207 |   MINUS <- "-" * !{'=','\\','-'} * Spacing
208 |   MINUSEQU <- "-=" * Spacing
209 |   MOD <- "%" * !"=" * Spacing
210 |   MODEQU <- "%=" * Spacing
211 |   NOTEQUAL <- "!=" * Spacing
212 |   OR <- "|" * !{'=','|'} * Spacing
213 |   OREQU <- "|=" * Spacing
214 |   OROR <- "||" * Spacing
215 |   PLUS <- "+" * !{'=','+'} * Spacing
216 |   PLUSEQU <- "+=" * Spacing
217 |   QUERY <- "?" * Spacing
218 |   RBRK <- "]" * Spacing
219 |   RPAR <- ")" * Spacing
220 |   RPOINT <- ">" * Spacing
221 |   RWING <- "}" * Spacing
222 |   SEMI <- ";" * Spacing
223 |   SL <- "<<" * !"=" * Spacing
224 |   SLEQU <- "<<=" * Spacing
225 |   SR <- ">>" * !{'=','>'} * Spacing
226 |   SREQU <- ">>=" * Spacing
227 |   STAR <- "*" * !"=" * Spacing
228 |   STAREQU <- "*=" * Spacing
229 |   TILDA <- "~" * Spacing
230 |   EOT <- !1
231 | 
232 | 


--------------------------------------------------------------------------------
/misc/mouse2npeg.nim:
--------------------------------------------------------------------------------
  1 | #
  2 | # Convert a Mouse PEG grammar into NPeg grammar
  3 | # http://www.romanredz.se/Mouse/
  4 | #
  5 | 
  6 | import npeg
  7 | import npeg/common
  8 | import strutils
  9 | 
 10 | # Parse the Mouse grammar into an ASTNode tree
 11 | 
 12 | let mouse = peg "mouse":
 13 |   mouse     <- A("mouse", *rule) * ?s * !1
 14 |   rule      <- ?s * A("rule", >name * s * "=" * s * patt)
 15 |   patt      <- A("patt", choice * ?sem * s * ';')
 16 |   sem       <- ('{' * @'}')
 17 |   choice    <- A("choice", seq * s * *('/' * s * seq))
 18 |   seq       <- A("seq", prefixed * *(s * prefixed) * s)
 19 |   nonterm   <- A("nonterm", >name)
 20 |   prefixed  <- A("pre", ?>'!' * postfixed)
 21 |   postfixed <- A("post", (paren | nonterm | lit) * >?postfix)
 22 |   lit       <- any | range | set | string
 23 |   any       <- A("any", '_')
 24 |   range     <- A("range", '[' * >(char * '-' * char) * ']')
 25 |   set       <- A("set", '[' * +(char-']') * ']')
 26 |   string    <- A("string", '"' * +(char-'"') * '"')
 27 |   paren     <- A("paren", '(' * s * choice * s * ')')
 28 |   postfix   <- {'+','*','?'}
 29 |   name      <- +Alpha
 30 |   char      <- A("char", >( ("\\u" * Xdigit[4]) | ('\\' * {'\\','r','n','t','"'}) | 1))
 31 |   nl        <- {'\r','\n'}
 32 |   s         <- *( +Space | comment | sem )
 33 |   comment   <- "//" * >*(1-nl)
 34 | 
 35 | 
 36 | # Dump the PEG ast tree into NPeg form
 37 | 
 38 | proc dump(a: ASTNode): string =
 39 |   proc unescapeChar(s: string): string =
 40 |     if s == "'":
 41 |       result = "\\'"
 42 |     elif s == "\\":
 43 |       result = "\\\\"
 44 |     elif s.len == 6:
 45 |       result = $(parseHexInt(s[2..5]).char.escapeChar)
 46 |     else:
 47 |       result = s
 48 |   case a.id:
 49 |     of "mouse":
 50 |       for c in a:
 51 |         result.add dump(c)
 52 |     of "rule":
 53 |       return "  " & $a.val & " <- " & dump(a["patt"]) & "\n"
 54 |     of "patt":
 55 |       return dump a[0]
 56 |     of "choice":
 57 |       var parts: seq[string]
 58 |       for c in a:
 59 |         parts.add dump(c)
 60 |       return parts.join(" | ")
 61 |     of "seq":
 62 |       var parts: seq[string]
 63 |       for c in a:
 64 |         parts.add dump(c)
 65 |       return parts.join(" * ")
 66 |     of "paren":
 67 |       return "( " & dump(a[0]) & " )"
 68 |     of "pre":
 69 |       return a.val & dump(a[0])
 70 |     of "post":
 71 |       return a.val & dump(a[0])
 72 |     of "nonterm":
 73 |       return a.val
 74 |     of "any":
 75 |       return "1"
 76 |     of "string":
 77 |       result.add '"'
 78 |       for c in a:
 79 |         result.add unescapeChar(c.val)
 80 |       result.add '"'
 81 |     of "set":
 82 |       var cs: seq[string]
 83 |       for c in a: cs.add unescapeChar(c.val)
 84 |       return "{'" & cs.join("','") & "'}"
 85 |     of "range":
 86 |       return "{'" & escapeChar(a.val[0]) & "'..'" & escapeChar(a.val[2]) & "'}"
 87 |     else:
 88 |       echo "\nUnhnandled " & a.id
 89 |       quit 1
 90 | 
 91 | 
 92 | # http://www.romanredz.se/Mouse/Java.1.6.peg
 93 | 
 94 | let r = mouse.matchFile("/tmp/Java.1.6.peg")
 95 | if not r.ok:
 96 |   echo "Error parsing at ", r.matchMax
 97 |   quit 1
 98 | 
 99 | echo "import npeg"
100 | echo "let r = peg CompilationUnit:"
101 | 
102 | echo dump(r.capturesAst())
103 | 
104 | 


--------------------------------------------------------------------------------
/misc/rod.nim:
--------------------------------------------------------------------------------
  1 | import npeg
  2 | import strutils
  3 | 
  4 | # Rod AST node types
  5 | 
  6 | type
  7 |   NodeKind* = enum
  8 |     nkEmpty
  9 |     nkScript, nkBlock
 10 |     nkBool, nkNumber, nkString, nkIdent
 11 |     nkPrefix, nkInfix, nkDot, nkIndex
 12 |     nkVar, nkLet
 13 |     nkIf, nkWhile, nkFor
 14 |     nkBreak, nkContinue
 15 |     nkCall
 16 |     nkGeneric
 17 |     nkObject, nkObjFields, nkObjConstr
 18 |   Node* = ref object
 19 |     ln*, col*: int
 20 |     file*: string
 21 |     case kind*: NodeKind
 22 |     of nkEmpty: discard
 23 |     of nkBool:
 24 |       boolVal*: bool
 25 |     of nkNumber:
 26 |       numberVal*: float
 27 |     of nkString:
 28 |       stringVal*: string
 29 |     of nkIdent:
 30 |       ident*: string
 31 |     else:
 32 |       children*: seq[Node]
 33 | 
 34 | type
 35 |   ParseStack = seq[Node]
 36 | 
 37 | 
 38 | # Pretty printing
 39 | 
 40 | proc `$`*(node: Node, showLineInfo = false): string =
 41 |   const LeafNodes = { nkEmpty, nkBool, nkNumber, nkString, nkIdent, nkPrefix, nkInfix }
 42 |   case node.kind
 43 |   of nkEmpty: result = "<empty>"
 44 |   of nkBool: result = $node.boolVal
 45 |   of nkNumber: result = $node.numberVal
 46 |   of nkString: result = escape(node.stringVal)
 47 |   of nkIdent: result = node.ident
 48 |   else:
 49 |     result = (if showLineInfo: $node.ln & ":" & $node.col & " " else: "") &
 50 |              "(" & (case node.kind
 51 |                     of nkPrefix, nkInfix: ""
 52 |                     else: $node.kind & " ")
 53 |     for i, child in node.children:
 54 |       if child.kind notin LeafNodes and node.children.len > 1:
 55 |         result.add("\n")
 56 |         result.add(indent(`$`(child, showLineInfo), 2))
 57 |       else:
 58 |         if i > 0:
 59 |           result.add(" ")
 60 |         result.add(`$`(child, showLineInfo))
 61 |     result.add(")")
 62 | 
 63 | proc `$`*(ps: ParseStack): string =
 64 |   for i, n in ps:
 65 |     result &= $i & ":\n" & $n & "\n"
 66 |   result &= "\n"
 67 | 
 68 | 
 69 | 
 70 | proc addToParent(ps: var ParseStack, ns: varargs[Node]) =
 71 |   ps[ps.high].children.add ns
 72 | 
 73 | proc swap(ps: var ParseStack) =
 74 |   ps.add ps[ps.high-1]
 75 |   ps.delete ps.high-2
 76 | 
 77 | let p = peg(rod, ps: ParseStack):
 78 | 
 79 |   S <- *Space
 80 | 
 81 |   # Basic tokens
 82 | 
 83 |   tokColon    <- ":" * S
 84 |   tokEquals   <- "=" * S
 85 |   tokComma    <- "," * S
 86 |   tokPlus     <- "+" * S
 87 |   tokMinus    <- "-" * S
 88 |   tokMul      <- "*" * S
 89 |   tokDiv      <- "/" * S
 90 |   tokParOpen  <- "(" * S
 91 |   tokParClose <- ")" * S
 92 |   tokCurOpen  <- "{" * S
 93 |   tokCurClose <- "}" * S
 94 |   tokVar      <- "var" * S
 95 |   tokLet      <- "let" * S
 96 |   tokIf       <- "if" * S
 97 |   tokElif     <- "elif" * S
 98 |   tokElse     <- "else" * S
 99 |   tokWhile    <- "while" * S
100 |   tokObject   <- "object" * S
101 |   
102 |   keyWords    <- "var" | "let" | "if" | "elif" | "else" | "while" | "object"
103 | 
104 |   # Atoms
105 | 
106 |   tokNumber   <- >+Digit * S:
107 |     ps.add Node(kind: nkNumber, numberVal: parseFloat($1))
108 | 
109 |   tokType     <- Alpha * *Alnum * S
110 |   
111 |   tokBool     <- >("true" | "false") * S:
112 |     ps.add Node(kind: nkBool, boolval: $1 == "true")
113 | 
114 |   tokIdent    <- >((Alpha * *Alnum) - keyWords) * S:
115 |     ps.add Node(kind: nkIdent, ident: $1)
116 | 
117 |   # Block
118 | 
119 |   blockOpen   <- tokCurOpen:
120 |     ps.add Node(kind: nkBlock)
121 | 
122 |   blockStmt   <- stmt:
123 |     ps.addToParent ps.pop()
124 | 
125 |   blockSec    <- blockOpen * *blockStmt * tokCurClose
126 | 
127 |   # Var section
128 | 
129 |   varOpen     <- (tokVar | tokLet):
130 |     ps.add Node(kind: nkVar)
131 |   
132 |   varDef      <- tokIdent * ?(tokColon * tokType) * ?(tokEquals * exprSec):
133 |     ps.swap()
134 |     ps.addToParent Node(kind: nkVar,
135 |                         children: @[Node(kind: nkIdent, ident: "="), ps.pop(), ps.pop()])
136 | 
137 |   varSec      <- varOpen * +varDef * *(tokComma * varDef):
138 |     ps.add ps.pop()
139 | 
140 |   # While statement
141 | 
142 |   whileSec    <- tokWhile * exprSec * blockSec:
143 |     ps.swap()
144 |     ps.add Node(kind: nkWhile, children: @[ps.pop(), ps.pop()])
145 | 
146 |   # If expressions
147 | 
148 |   ifOpen      <- tokIf * exprSec * blockSec:
149 |     let (nBlock, nExpr) = (ps.pop(), ps.pop())
150 |     ps.add Node(kind: nkIf, children: @[nExpr, nBlock])
151 | 
152 |   ifElif      <- (tokElif * exprSec * blockSec):
153 |     ps.swap()
154 |     ps.addtoParent ps.pop(), ps.pop()
155 | 
156 |   ifElse      <- ?(tokElse * blockSec):
157 |     ps.addToParent ps.pop()
158 | 
159 |   ifExpr      <- ifOpen * *ifElif * ?ifElse
160 | 
161 |   # Object
162 | 
163 |   objectSec   <- tokObject * tokIdent * tokCurOpen * objFields * tokCurClose
164 | 
165 |   objFields   <- tokIdent * *(tokComma * tokIdent) * tokColon * tokType
166 | 
167 |   stmt        <- blockSec | varSec | objectSec | whileSec | exprSec
168 | 
169 |   rod         <- S * +stmt * !1
170 | 
171 |   # Expressions: Pratt parser
172 | 
173 |   exprSec <- exp
174 | 
175 |   exp <- S * prefix * *infix
176 | 
177 |   prefix <- ifExpr | tokBool | tokNumber | parenExp | uniMinus | tokIdent
178 |   uniMinus <- >'-' * exp
179 |   parenExp <- ( tokParOpen * exp * tokParClose ) ^ 0
180 | 
181 |   infix <- >("not" | "->" | "$")                     * exp ^ 1 |
182 |            >("=")                                    * exp ^ 2 |
183 |            >("or" | "xor")                           * exp ^ 3 |
184 |            >("and")                                  * exp ^ 4 |
185 |            >("==" | "<=" | "<" | ">=" | ">" | "!=" | 
186 |              "in" | "notin" | "is" | "isnot" | "of") * exp ^ 5 |
187 |            >(".." | "..<")                           * exp ^ 6 |
188 |            >("&")                                    * exp ^ 7 |
189 |            >("+" | "-")                              * exp ^ 8 |
190 |            >("*" | "/" | "%")                        * exp ^ 9 |
191 |            >("div" | "mod" | "shl" | "shr")          * exp ^ 10 |
192 |            >("^")                                    * exp ^^ 11:
193 | 
194 |     let (f2, f1) = (ps.pop(), ps.pop())
195 |     ps.add Node(kind: nkInfix, children:
196 |                 @[Node(kind: nkIdent, ident: $1), f1, f2])
197 | 
198 | 
199 | proc compile(source:string) =
200 |   var ps: ParseStack
201 |   echo "---------------"
202 |   echo source
203 |   if p.match(source, ps).ok:
204 |     echo "---------------"
205 |     let n = Node(kind: nkBlock, children: ps)
206 |     echo n
207 | 
208 | when false:
209 |   compile """
210 |     if a > 3 {
211 |       var w = 42
212 |     }
213 |   """
214 | 
215 | when false:
216 |     compile("""
217 |     var
218 |       a = 2 + 2,
219 |       b = 2 + a
220 |     """)
221 |   
222 | when true:
223 |     compile("""
224 |     { var a = 10
225 |       { var a = a } }
226 |     { var a = 12
227 |       a = a + 3 }
228 |     """)
229 |   
230 | when false:
231 |     compile("""
232 |     let x = true
233 |     if x {
234 |       var x = 2
235 |     } 
236 |     """)
237 | 
238 | when false:
239 |     compile("""
240 |     let x = true
241 |     if x {
242 |       var x = 2
243 |     } elif false {
244 |       var y = 3
245 |     } elif false {
246 |       var z = 4
247 |     } else {
248 |       var w = 5
249 |     }
250 |     """)
251 | 
252 | when false:
253 |     compile("""
254 |     let x = if true { 2 }
255 |             else { 4 }
256 |     """)
257 |  
258 | when false:
259 |     compile("""
260 |     let x = true
261 |     while x {
262 |       let y = 1
263 |     }
264 |     """)
265 | 
266 | when false:
267 |     compile("""
268 |     while true {
269 |       let y = 1
270 |     }
271 |     """)
272 | 
273 | when false:
274 |     compile("""
275 |     while false {
276 |       let y = 1
277 |     }
278 |     """)
279 | 
280 | when false:
281 |     compile("""
282 |     var
283 |       x = 0,
284 |       stop = false
285 |     while x {
286 |     }
287 |     """)
288 | 


--------------------------------------------------------------------------------
/npeg.nimble:
--------------------------------------------------------------------------------
 1 | # Package
 2 | 
 3 | version       = "1.3.0"
 4 | author        = "Ico Doornekamp"
 5 | description   = "a PEG library"
 6 | license       = "MIT"
 7 | srcDir        = "src"
 8 | installExt    = @["nim"]
 9 | 
10 | # Dependencies
11 | 
12 | requires "nim >= 0.19.0"
13 | 
14 | # Test
15 | 
16 | task test, "Runs the test suite":
17 |   exec "nimble testc && nimble testcpp && nimble testarc && nimble testjs"
18 | 
19 | task testc, "C tests":
20 |   exec "nim c -r tests/tests.nim"
21 | 
22 | task testcpp, "CPP tests":
23 |   exec "nim cpp -r tests/tests.nim"
24 | 
25 | task testjs, "JS tests":
26 |   exec "nim js -r tests/tests.nim"
27 | 
28 | task testdanger, "Runs the test suite in danger mode":
29 |   exec "nim c -d:danger -r tests/tests.nim"
30 | 
31 | task testwin, "Mingw tests":
32 |   exec "nim c -d:mingw tests/tests.nim && wine tests/tests.exe"
33 | 
34 | task test32, "32 bit tests":
35 |   exec "nim c --cpu:i386 --passC:-m32 --passL:-m32 tests/tests.nim && tests/tests"
36 | 
37 | task testall, "Test all":
38 |   exec "nimble test && nimble testcpp && nimble testdanger && nimble testjs && nimble testwin"
39 | 
40 | when (NimMajor, NimMinor) >= (1, 1):
41 |   task testarc, "--gc:arc tests":
42 |     exec "nim c --gc:arc -r tests/tests.nim"
43 | else:
44 |   task testarc, "--gc:arc tests":
45 |     exec "true"
46 | 
47 | task perf, "Test performance":
48 |   exec "nim cpp -r -d:danger tests/performance.nim"
49 | 


--------------------------------------------------------------------------------
/src/npeg.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | #
  3 | # Copyright (c) 2019 Ico Doornekamp
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | # THE SOFTWARE.
 22 | #
 23 | # This parser implementation is based on the following papers:
 24 | #
 25 | # - A Text Pattern-Matching Tool based on Parsing Expression Grammars
 26 | #   (Roberto Ierusalimschy)
 27 | #
 28 | # - An efficient parsing machine for PEGs
 29 | #   (Jos Craaijo)
 30 | #
 31 | 
 32 | ## Note: This document is rather terse, for the complete NPeg manual please refer
 33 | ## to the README.md or the git project page at https://github.com/zevv/npeg
 34 | ##   
 35 | ## NPeg is a pure Nim pattern matching library. It provides macros to compile
 36 | ## patterns and grammars (PEGs) to Nim procedures which will parse a string and
 37 | ## collect selected parts of the input. PEGs are not unlike regular
 38 | ## expressions, but offer more power and flexibility, and have less ambiguities.
 39 | ##
 40 | ## Here is a simple example showing the power of NPeg: The macro `peg` compiles a
 41 | ## grammar definition into a `parser` object, which is used to match a string and
 42 | ## place the key-value pairs into the Nim table `words`:
 43 | 
 44 | runnableExamples:
 45 | 
 46 |   import npeg, strutils, tables
 47 | 
 48 |   var words: Table[string, int]
 49 | 
 50 |   let parser = peg "pairs":
 51 |     pairs <- pair * *(',' * pair) * !1
 52 |     word <- +Alpha
 53 |     number <- +Digit
 54 |     pair <- >word * '=' * >number:
 55 |       words[$1] = parseInt($2)
 56 | 
 57 |   doAssert parser.match("one=1,two=2,three=3,four=4").ok
 58 | 
 59 | 
 60 | import tables
 61 | import macros
 62 | import strutils
 63 | import npeg/[common,codegen,capture,parsepatt,grammar,dot]
 64 | 
 65 | export NPegException,
 66 |        NPegStackOverflowError,
 67 |        NPegUnknownBackrefError,
 68 |        NPegCaptureOutOfRangeError,
 69 |        NpegParseError,
 70 |        contains, `[]`, len
 71 | 
 72 | # Create a parser for a PEG grammar
 73 | 
 74 | proc pegAux(name: string, subjectType, userDataType, userDataId, n: NimNode): NimNode =
 75 |   var dot = newDot(name)
 76 |   var grammar = parseGrammar(n, dot)
 77 |   var program = grammar.link(name, dot)
 78 |   let code = program.genCode(subjectType, userDataType, userDataId)
 79 |   dot.dump()
 80 |   code
 81 | 
 82 | macro peg*(name: untyped, n: untyped): untyped =
 83 |   ## Construct a parser from the given PEG grammar. `name` is the initial
 84 |   ## grammar rule where parsing starts. This macro returns a `Parser` type
 85 |   ## which can later be used for matching subjects with the `match()` proc
 86 |   pegAux name.strVal, ident "char", ident "bool", ident "userdata", n
 87 | 
 88 | macro peg*(name: untyped, userData: untyped, n: untyped): untyped =
 89 |   ## Construct a parser from the given PEG grammar. `name` is the initial
 90 |   ## grammar rule where parsing starts. This macro returns a `Parser` type
 91 |   ## which can later be used for matching subjects with the `match()` proc
 92 |   ##
 93 |   ## The `userdata` argument is a colon expression with an identifier and a
 94 |   ## type, this identifier is available in code block captions during parsing.
 95 |   expectKind(userData, nnkExprColonExpr)
 96 |   pegAux name.strVal, ident "char", userData[1], userData[0], n
 97 | 
 98 | macro peg*(name: untyped, subjectType, userData, n: untyped): untyped =
 99 |   ## Construct a parser from the given PEG grammar. `name` is the initial
100 |   ## grammar rule where parsing starts. This macro returns a `Parser` type
101 |   ## which can later be used for matching subjects with the `match()` proc
102 |   ##
103 |   ## The `subjectType` argument is a Nim type which should match the base
104 |   ## type of the subject passed to `match()`.
105 |   ##
106 |   ## The `userdata` argument is a colon expression with an identifier and a
107 |   ## type, this identifier is available in code block captions during parsing.
108 |   expectKind(userData, nnkExprColonExpr)
109 |   pegAux name.strVal, subjectType, userData[1], userData[0], n
110 | 
111 | template patt*(n: untyped): untyped =
112 |   ## Construct a parser from a single PEG rule. This is similar to the regular
113 |   ## `peg()` macro, but useful for short regexp-like parsers that do not need a
114 |   ## complete grammar.
115 |   peg anonymous:
116 |     anonymous <- n
117 | 
118 | template patt*(n: untyped, code: untyped): untyped =
119 |   ## Construct a parser from a single PEG rule. This is similar to the regular
120 |   ## `peg()` macro, but useful for short regexp-like parsers that do not need a
121 |   ## complete grammar. This variant takes a code block which will be used as
122 |   ## code block capture for the anonymous rule.
123 |   peg anonymous:
124 |     anonymous <- n:
125 |       code
126 | 
127 | macro grammar*(libNameNode: untyped, n: untyped) =
128 |   ## This macro defines a collection of rules to be stored in NPeg's global
129 |   ## grammar library.
130 |   let libName = libNameNode.strVal
131 |   let grammar = parseGrammar(n, dumpRailroad = libName != "")
132 |   libStore(libName, grammar)
133 | 
134 | 
135 | proc match*[S, T](p: Parser, s: openArray[S], userData: var T): MatchResult[S] =
136 |   ## Match a subject string with the given generic parser. The returned
137 |   ## `MatchResult` contains the result of the match and can be used to query
138 |   ## any captures.
139 |   var ms = p.fn_init()
140 |   p.fn_run(ms, s, userData)
141 | 
142 | 
143 | proc match*[S](p: Parser, s: openArray[S]): MatchResult[S] =
144 |   ## Match a subject string with the given parser. The returned `MatchResult`
145 |   ## contains the result of the match and can be used to query any captures.
146 |   var userData: bool # dummy if user does not provide a type
147 |   p.match(s, userData)
148 | 
149 | 
150 | # Match a file
151 | 
152 | when defined(windows) or defined(posix):
153 |   import memfiles, os
154 |   proc matchFile*[T](p: Parser, fname: string, userData: var T): MatchResult[char] =
155 |     # memfiles.open() throws on empty files, work around that
156 |     if os.getFileSize(fname) > 0:
157 |       var m = memfiles.open(fname)
158 |       var a: ptr UncheckedArray[char] = cast[ptr UncheckedArray[char]](m.mem)
159 |       var ms = p.fn_init()
160 |       result = p.fn_run(ms, toOpenArray(a, 0, m.size-1), userData)
161 |       m.close()
162 |     else:
163 |       result = match(p, "", userData)
164 |   
165 |   proc matchFile*(p: Parser, fname: string): MatchResult[char] =
166 |     var userData: bool # dummy if user does not provide a type
167 |     matchFile(p, fname, userData)
168 | 
169 | 
170 | proc captures*(mr: MatchResult[char]): seq[string] =
171 |   ## Return all plain string captures from the match result
172 |   for cap in collectCaptures(mr.cs):
173 |     result.add cap.s
174 | 
175 | proc captures*[S](mr: MatchResult[S]): seq[S] =
176 |   ## Return all plain string captures from the match result
177 |   for cap in collectCaptures(mr.cs):
178 |     result.add cap.s
179 | 
180 | template nimBug22740*() =
181 |   ## Provide stub templates as a workaround for https://github.com/nim-lang/Nim/issues/22740.
182 |   ## Invoke this template in your code if you want to define a parser in a generic proc.
183 |   template `>`(a: untyped): untyped = discard
184 |   template `*`(a: untyped): untyped = discard
185 |   template `-`(a: untyped): untyped = discard
186 |   template `+`(a: untyped): untyped = discard
187 |   template `?`(a: untyped): untyped = discard
188 |   template `!`(a: untyped): untyped = discard
189 |   template `$`(a: untyped): untyped = discard
190 | 
191 | 
192 | import npeg/lib/core
193 | 
194 | 


--------------------------------------------------------------------------------
/src/npeg/capture.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import strutils
  3 | import sequtils
  4 | import npeg/[stack,common]
  5 | 
  6 | type
  7 | 
  8 |   Capture*[S] = object
  9 |     ck: CapKind
 10 |     si*: int
 11 |     name: string
 12 |     len: int
 13 |     when S is char:
 14 |       s*: string
 15 |     else:
 16 |       s*: S
 17 | 
 18 |   Captures*[S] = object
 19 |     capList*: seq[Capture[S]]
 20 | 
 21 |   FixMethod* = enum
 22 |     FixAll, FixOpen
 23 | 
 24 | # Search the capStack for cftOpen matching the cftClose on top
 25 | 
 26 | proc findTop[S](capStack: var Stack[CapFrame[S]], fm: FixMethod): int =
 27 |   if fm == FixOpen:
 28 |     var i = capStack.top - 1
 29 |     var depth = 0
 30 |     while true:
 31 |       if capStack[i].cft == cftClose: inc depth else: dec depth
 32 |       if depth == 0: break
 33 |       dec i
 34 |     result = i
 35 | 
 36 | # Convert all closed CapFrames on the capture stack to a list of Captures, all
 37 | # consumed frames are removed from the CapStack
 38 | 
 39 | proc fixCaptures*[S](s: openArray[S], capStack: var Stack[CapFrame[S]], fm: FixMethod): Captures[S] =
 40 | 
 41 |   assert capStack.top > 0
 42 |   assert capStack.peek.cft == cftClose
 43 |   when npegDebug: echo $capStack
 44 | 
 45 |   # Convert the closed frames to a seq[Capture]
 46 | 
 47 |   var stack = initStack[int]("captures", 8)
 48 |   let iFrom = findTop(capStack, fm)
 49 | 
 50 |   for i in iFrom..<capStack.top:
 51 |     let c = capStack[i]
 52 |     if c.cft == cftOpen:
 53 |       stack.push result.capList.len
 54 |       result.capList.add Capture[S](ck: c.ck, si: c.si, name: c.name)
 55 |     else:
 56 |       let i2 = stack.pop()
 57 |       assert result[i2].ck == c.ck
 58 |       result.capList[i2].s = if c.ck == ckPushed:
 59 |         c.sPushed
 60 |       else:
 61 |         s.slice(result[i2].si, c.si)
 62 |       result.capList[i2].len = result.capList.len - i2 - 1
 63 |   assert stack.top == 0
 64 | 
 65 |   # Remove closed captures from the cap stack
 66 | 
 67 |   capStack.top = iFrom
 68 | 
 69 | 
 70 | proc collectCaptures*[S](caps: Captures[S]): Captures[S] =
 71 |   result = Captures[S](
 72 |     capList: caps.capList.filterIt(it.ck in {ckVal, ckPushed, ckCodeBlock})
 73 |   )
 74 | 
 75 | proc collectCapturesRef*(caps: Captures): Ref =
 76 |   for cap in caps.capList:
 77 |     result.key = cap.name
 78 |     result.val = cap.s
 79 | 
 80 | # The `Captures[S]` type is a seq wrapped in an object to allow boundary
 81 | # checking on acesses with nicer error messages. The procs below allow easy
 82 | # access to the captures from Nim code.
 83 | 
 84 | proc getCapture[S](cs: Captures[S], i: int): Capture[S] =
 85 |   if i >= cs.capList.len:
 86 |     let msg = "Capture out of range, " & $i & " is not in [0.." & $cs.capList.high & "]"
 87 |     raise newException(NPegCaptureOutOfRangeError, msg)
 88 |   cs.capList[i]
 89 | 
 90 | proc `[]`*[S](cs: Captures[S], i: int): Capture[S] =
 91 |   cs.getCapture(i)
 92 | 
 93 | proc `[]`*[S](cs: Captures[S], i: BackwardsIndex): Capture[S] =
 94 |   cs.getCapture(cs.capList.len-i.int)
 95 | 
 96 | proc `[]`*[S](cs: Captures[S], range: HSlice[system.int, system.int]): seq[Capture[S]] =
 97 |   for i in range:
 98 |     result.add cs.getCapture(i)
 99 | 
100 | iterator items*[S](captures: Captures[S]): Capture[S] =
101 |   for c in captures.capList:
102 |     yield c
103 | 
104 | proc len*[S](captures: Captures[S]): int =
105 |   captures.capList.len
106 | 
107 | 


--------------------------------------------------------------------------------
/src/npeg/codegen.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import macros except quote, stamp
  3 | import strutils
  4 | import tables
  5 | import npeg/[common,patt,stack,capture]
  6 | 
  7 | type
  8 | 
  9 |   RetFrame = int
 10 | 
 11 |   BackFrame = object
 12 |     ip*: int # Instruction pointer
 13 |     si*: int # Subject index
 14 |     rp*: int # Retstack top pointer
 15 |     cp*: int # Capstack top pointer
 16 |     pp*: int # PrecStack top pointer
 17 | 
 18 |   PrecFrame = int
 19 | 
 20 |   MatchResult*[S] = object
 21 |     ok*: bool
 22 |     matchLen*: int
 23 |     matchMax*: int
 24 |     cs*: Captures[S]
 25 | 
 26 |   MatchState*[S] = object
 27 |     ip*: int
 28 |     si*: int
 29 |     simax*: int
 30 |     refs*: Table[string, string]
 31 |     retStack*: Stack[RetFrame]
 32 |     capStack*: Stack[CapFrame[S]]
 33 |     backStack*: Stack[BackFrame]
 34 |     precStack*: Stack[PrecFrame]
 35 | 
 36 |   Parser*[S, T] = object
 37 |     fn_init*: proc(): MatchState[S]
 38 |     when npegGcsafe:
 39 |       fn_run*: proc(ms: var MatchState[S], s: openArray[S], u: var T): MatchResult[S] {.gcsafe.}
 40 |     else:
 41 |       fn_run*: proc(ms: var MatchState[S], s: openArray[S], u: var T): MatchResult[S]
 42 | 
 43 | when declared(macros.stamp): # nimskull
 44 |   template quote(body: untyped): NimNode =
 45 |     macros.stamp(body)
 46 | else:
 47 |   template quote(body: untyped): NimNode =
 48 |     macros.quote(body)
 49 | 
 50 | # This macro translates `$1`.. into `capture[1].s`.. and `@1` into `capture[1].si` 
 51 | # for use in code block captures. The source nimnode lineinfo is recursively
 52 | # copied to the newly genreated node to make sure "Capture out of range"
 53 | # exceptions are properly traced.
 54 | 
 55 | proc doSugar(n, captureId: NimNode): NimNode =
 56 |   proc cli(n2: NimNode) =
 57 |     n2.copyLineInfo(n)
 58 |     for nc in n2: cli(nc)
 59 |   let isIntPrefix =  n.kind == nnkPrefix and n[0].kind == nnkIdent and n[1].kind == nnkIntLit
 60 |   if isIntPrefix and n[0].eqIdent("$"):
 61 |     result = newDotExpr(nnkBracketExpr.newTree(captureId, n[1]), ident("s"))
 62 |     cli result
 63 |   elif isIntPrefix and n[0].eqIdent("@"):
 64 |     result = newDotExpr(nnkBracketExpr.newTree(captureId, n[1]), ident("si"))
 65 |     cli result
 66 |   else:
 67 |     result = copyNimNode(n)
 68 |     for nc in n:
 69 |       result.add doSugar(nc, captureId)
 70 | 
 71 | 
 72 | # Generate the parser main loop. The .computedGoto. pragma will generate code
 73 | # using C computed gotos, which will get highly optmized, mostly eliminating
 74 | # the inner parser loop. Nim limits computed goto to a maximum of 10_000
 75 | # cases; if our program is this large, emit a warning and do not use a
 76 | # computed goto
 77 | 
 78 | proc genLoopCode(program: Program, casesCode: NimNode): NimNode=
 79 |   result = nnkWhileStmt.newTree(true.newLit, nnkStmtList.newTree())
 80 |   if program.patt.len < 10_000:
 81 |     result[1].add nnkPragma.newTree("computedGoto".ident)
 82 |   else:
 83 |     warning "Grammar too large for computed goto, falling back to normal 'case'"
 84 |   result[1].add casesCode
 85 |   
 86 | 
 87 | # Generate out all the case handlers for the parser program
 88 | 
 89 | proc genCasesCode*(program: Program, sType, uType, uId: NimNode, ms, s, si, simax, ip: NimNode): NimNode =
 90 | 
 91 |   result = quote:
 92 |     case `ip`
 93 | 
 94 |   for ipNow, i in program.patt.pairs:
 95 | 
 96 |     let
 97 |       ipNext = ipNow + 1
 98 |       opName = newLit(repeat(" ", i.indent) & ($i.op).toLowerAscii[2..^1])
 99 |       iname = newLit(i.name)
100 |       ipFail = if i.failOffset == 0:
101 |         program.patt.high
102 |       else:
103 |         ipNow + i.failOffset
104 | 
105 |     var call = case i.op:
106 | 
107 |       of opChr:
108 |         let ch = newLit(i.ch)
109 |         quote:
110 |           trace `ms`, `iname`, `opName`, `s`, "\"" & escapeChar(`ch`) & "\""
111 |           if `si` < `s`.len and `s`[`si`] == `ch`.char:
112 |             inc `si`
113 |             `ip` = `ipNext`
114 |           else:
115 |             `ip` = `ipFail`
116 | 
117 |       of opLit:
118 |         let lit = i.lit
119 |         quote:
120 |           trace `ms`, `iname`, `opName`, `s`, `lit`.repr
121 |           if `si` < `s`.len and `s`[`si`] == `lit`:
122 |             inc `si`
123 |             `ip` = `ipNext`
124 |           else:
125 |             `ip` = `ipFail`
126 | 
127 |       of opSet:
128 |         let cs = newLit(i.cs)
129 |         quote:
130 |           trace `ms`, `iname`, `opName`, `s`, dumpSet(`cs`)
131 |           if `si` < `s`.len and `s`[`si`] in `cs`:
132 |             inc `si`
133 |             `ip` = `ipNext`
134 |           else:
135 |             `ip` = `ipFail`
136 | 
137 |       of opSpan:
138 |         let cs = newLit(i.cs)
139 |         quote:
140 |           trace `ms`, `iname`, `opName`, `s`, dumpSet(`cs`)
141 |           while `si` < `s`.len and `s`[`si`] in `cs`:
142 |             inc `si`
143 |           `ip` = `ipNext`
144 | 
145 |       of opChoice:
146 |         let ip2 = newLit(ipNow + i.ipOffset)
147 |         let siOffset = newLit(i.siOffset)
148 |         quote:
149 |           trace `ms`, `iname`, `opName`, `s`, $`ip2`
150 |           push(`ms`.backStack, BackFrame(ip:`ip2`, si:`si`+`siOffset`, rp:`ms`.retStack.top, cp:`ms`.capStack.top, pp:`ms`.precStack.top))
151 |           `ip` = `ipNext`
152 | 
153 |       of opCommit:
154 |         let ip2 = newLit(ipNow + i.ipOffset)
155 |         quote:
156 |           trace `ms`, `iname`, `opName`, `s`, $`ip2`
157 |           discard pop(`ms`.backStack)
158 |           `ip` = `ip2`
159 | 
160 |       of opCall:
161 |         let label = newLit(i.callLabel)
162 |         let ip2 = newLit(ipNow + i.callOffset)
163 |         quote:
164 |           trace `ms`, `iname`, `opName`, `s`, `label` & ":" & $`ip2`
165 |           push(`ms`.retStack, `ipNext`)
166 |           `ip` = `ip2`
167 | 
168 |       of opJump:
169 |         let label = newLit(i.callLabel)
170 |         let ip2 = newLit(ipNow + i.callOffset)
171 |         quote:
172 |           trace `ms`, `iname`, `opName`, `s`, `label` & ":" & $`ip2`
173 |           `ip` = `ip2`
174 | 
175 |       of opCapOpen:
176 |         let capKind = newLit(i.capKind)
177 |         let capName = newLit(i.capName)
178 |         let capSiOffset = newLit(i.capSiOffset)
179 |         quote:
180 |           trace `ms`, `iname`, `opName`, `s`, $`capKind` & " -> " & $`si`
181 |           push(`ms`.capStack, CapFrame[`sType`](cft: cftOpen, si: `si`+`capSiOffset`, ck: `capKind`, name: `capName`))
182 |           `ip` = `ipNext`
183 | 
184 |       of opCapClose:
185 |         let ck = newLit(i.capKind)
186 | 
187 |         case i.capKind:
188 |           of ckCodeBlock:
189 |             let captureId = ident "capture"
190 |             let code = doSugar(i.capAction, captureId)
191 |             quote:
192 |               trace `ms`, `iname`, `opName`, `s`, "ckCodeBlock -> " & $`si`
193 |               push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, si: `si`, ck: `ck`))
194 |               let capture = collectCaptures(fixCaptures[`sType`](`s`, `ms`.capStack, FixOpen))
195 |               proc fn(`captureId`: Captures[`sType`], `ms`: var MatchState[`sType`], `uId`: var `uType`): bool =
196 |                 result = true
197 |                 `code`
198 |               if fn(capture, `ms`, `uId`):
199 |                 `ip` = `ipNext`
200 |               else:
201 |                 `ip` = `ipFail`
202 | 
203 |           of ckRef:
204 |             quote:
205 |               trace `ms`, `iname`, `opName`, `s`, "ckRef -> " & $`si`
206 |               push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, si: `si`, ck: `ck`))
207 |               let r = collectCapturesRef(fixCaptures[`sType`](`s`, `ms`.capStack, FixOpen))
208 |               `ms`.refs[r.key] = r.val
209 |               `ip` = `ipNext`
210 | 
211 |           else:
212 |             quote:
213 |               trace `ms`, `iname`, `opName`, `s`, $`ck` & " -> " & $`si`
214 |               push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, si: `si`, ck: `ck`))
215 |               `ip` = `ipNext`
216 | 
217 |       of opBackref:
218 |         let refName = newLit(i.refName)
219 |         quote:
220 |           if `refName` in `ms`.refs:
221 |             let s2 = `ms`.refs[`refName`]
222 |             trace `ms`, `iname`, `opName`, `s`, `refName` & ":\"" & s2 & "\""
223 |             if subStrCmp(`s`, `s`.len, `si`, s2):
224 |               inc `si`, s2.len
225 |               `ip` = `ipNext`
226 |             else:
227 |               `ip` = `ipFail`
228 |           else:
229 |             raise newException(NPegUnknownBackrefError, "Unknown back reference '" & `refName` & "'")
230 | 
231 |       of opErr:
232 |         let msg = newLit(i.msg)
233 |         quote:
234 |           trace `ms`, `iname`, `opName`, `s`, `msg`
235 |           var e = newException(NPegParseError, `msg`)
236 |           `simax` = max(`simax`, `si`)
237 |           raise e
238 | 
239 |       of opReturn:
240 |         quote:
241 |           trace `ms`, `iname`, `opName`, `s`
242 |           if `ms`.retStack.top > 0:
243 |             `ip` = pop(`ms`.retStack)
244 |           else:
245 |             result.ok = true
246 |             `simax` = max(`simax`, `si`)
247 |             break
248 | 
249 |       of opAny:
250 |         quote:
251 |           trace `ms`, `iname`, `opName`, `s`
252 |           if `si` < `s`.len:
253 |             inc `si`
254 |             `ip` = `ipNext`
255 |           else:
256 |             `ip` = `ipFail`
257 | 
258 |       of opNop:
259 |         quote:
260 |           trace `ms`, `iname`, `opName`, `s`
261 |           `ip` = `ipNext`
262 | 
263 |       of opPrecPush:
264 |         if i.prec == 0:
265 |           quote:
266 |             push(`ms`.precStack, 0)
267 |             `ip` = `ipNext`
268 |         else:
269 |           let (iPrec, iAssoc) = (i.prec.newLit, i.assoc.newLit)
270 |           let exp = if i.assoc == assocLeft:
271 |             quote: peek(`ms`.precStack) < `iPrec`
272 |           else:
273 |             quote: peek(`ms`.precStack) <= `iPrec`
274 |           quote:
275 |             if `exp`:
276 |               push(`ms`.precStack, `iPrec`)
277 |               `ip` = `ipNext`
278 |             else:
279 |               `ip` = `ipFail`
280 | 
281 |       of opPrecPop:
282 |         quote:
283 |             discard `ms`.precStack.pop()
284 |             `ip` = `ipNext`
285 | 
286 |       of opFail:
287 |         quote:
288 |           `simax` = max(`simax`, `si`)
289 |           if `ms`.backStack.top > 0:
290 |             trace `ms`, "", "opFail", `s`, "(backtrack)"
291 |             let t = pop(`ms`.backStack)
292 |             (`ip`, `si`, `ms`.retStack.top, `ms`.capStack.top, `ms`.precStack.top) = (t.ip, t.si, t.rp, t.cp, t.pp)
293 |           else:
294 |             trace `ms`, "", "opFail", `s`, "(error)"
295 |             break
296 | 
297 |     # Recursively copy the line info from the original instruction NimNode into
298 |     # the generated Nim code
299 |     proc aux(n: NimNode) =
300 |       n.copyLineInfo(i.nimNode)
301 |       for nc in n: aux(nc)
302 |     aux(call)
303 | 
304 |     result.add nnkOfBranch.newTree(newLit(ipNow), call)
305 | 
306 | 
307 | # Generate code for tracing the parser. An empty stub is generated if tracing
308 | # is disabled
309 | 
310 | proc genTraceCode*(program: Program, sType, uType, uId, ms, s, si, simax, ip: NimNode): NimNode =
311 |   
312 |   when npegTrace:
313 |     result = quote:
314 |       proc doTrace[sType](`ms`: var MatchState, iname, opname: string, ip: int, s: openArray[sType], si: int, ms: var MatchState, msg: string) {.nimcall.} =
315 |           echo align(if ip >= 0: $ip else: "", 3) &
316 |             "|" & align($(peek(ms.precStack)), 3) &
317 |             "|" & align($si, 3) &
318 |             "|" & alignLeft(dumpSubject(s, si, 24), 24) &
319 |             "|" & alignLeft(iname, 15) &
320 |             "|" & alignLeft(opname & " " & msg, 40) &
321 |             "|" & repeat("*", ms.backStack.top)
322 | 
323 |       template trace(`ms`: var MatchState, iname, opname: string, `s`: openArray[`sType`], msg = "") =
324 |         doTrace(`ms`, iname, opname, `ip`, `s`, `si`, `ms`, msg)
325 | 
326 |   else:
327 |     result = quote:
328 |       template trace(`ms`: var MatchState, iname, opname: string, `s`: openArray[`sType`], msg = "") =
329 |         discard
330 | 
331 | 
332 | # Augment exception stack traces with the NPeg return stack and re-raise
333 | 
334 | proc genExceptionCode(ms, ip, si, simax, symTab: NimNode): NimNode =
335 |   quote:
336 | 
337 |     # Helper proc to add a stack frame for the given ip
338 |     var trace: seq[StackTraceEntry]
339 |     let symTab = `symTab`
340 |     proc aux(ip: int) =
341 |       let sym = symTab[ip]
342 |       trace.insert StackTraceEntry(procname: cstring(sym.repr), filename: cstring(sym.lineInfo.filename), line: sym.lineInfo.line)
343 |       # On older Nim versions e.trace is not accessible, in this case just
344 |       # dump the exception to stdout if npgStacktrace is enabled
345 |       when npegStacktrace:
346 |         echo $(sym.lineInfo) & ": " & sym.repr
347 | 
348 |     # Emit current IP and unwind all addresses from the return stack
349 |     aux(`ip`)
350 |     while `ms`.retStack.top > 0:
351 |       aux(`ms`.retStack.pop())
352 | 
353 |     let e = getCurrentException()
354 | 
355 |     when compiles(e.trace.pop()):
356 |       # drop the generated parser fn() from the trace and replace by the NPeg frames
357 |       discard e.trace.pop()
358 |       e.trace.add trace
359 | 
360 |     # Re-reaise the exception with the augmented stack trace and match index filled in
361 |     if e of NPegException:
362 |       let eref = (ref NPegException)(e)
363 |       eref.matchLen = `si`
364 |       eref.matchMax = `simax`
365 |     raise
366 | 
367 | 
368 | # Convert the list of parser instructions into a Nim finite state machine
369 | #
370 | # - sType is the base type of the subject; typically `char` but can be specified
371 | #   to be another type by the user
372 | # - uType is the type of the userdata, if not used this defaults to `bool`
373 | # - uId is the identifier of the userdata, if not used this defaults to `userdata`
374 | 
375 | proc genCode*(program: Program, sType, uType, uId: NimNode): NimNode =
376 | 
377 |   let
378 |     count = program.patt.high
379 |     suffix = "_NP"
380 |     ms = ident "ms" & suffix
381 |     s = ident "s" & suffix
382 |     si = ident "si" & suffix
383 |     ip = ident "ip" & suffix
384 |     simax = ident "simax" & suffix
385 | 
386 |     casesCode = genCasesCode(program, sType, uType, uId, ms, s, si, simax, ip)
387 |     loopCode = genLoopCode(program, casesCode)
388 |     traceCode = genTraceCode(program, sType, uType, uId, ms, s, si, simax, ip)
389 |     exceptionCode = genExceptionCode(ms, ip, si, simax, newLit(program.symTab))
390 | 
391 |   result = quote:
392 | 
393 |     proc fn_init(): MatchState[`sType`] {.gensym.} =
394 |       result = MatchState[`sType`](
395 |         retStack: initStack[RetFrame]("return", 8, npegRetStackSize),
396 |         capStack: initStack[CapFrame[`sType`]]("capture", 8),
397 |         backStack: initStack[BackFrame]("backtrace", 8, npegBackStackSize),
398 |         precStack: initStack[PrecFrame]("precedence", 8, 16),
399 |       )
400 |       push(result.precStack, 0)
401 | 
402 | 
403 |     proc fn_run(`ms`: var MatchState[`sType`], `s`: openArray[`sType`], `uId`: var `uType`): MatchResult[`sType`] {.gensym.} =
404 | 
405 |       # Create local instances of performance-critical MatchState vars, this
406 |       # saves a dereference on each access
407 | 
408 |       var
409 |         `ip`: range[0..`count`] = `ms`.ip
410 |         `si` = `ms`.si
411 |         `simax` = `ms`.simax
412 | 
413 |       # These templates are available for code blocks
414 | 
415 |       template validate(o: bool) {.used.} =
416 |         if not o: return false
417 | 
418 |       template fail() {.used.} =
419 |         return false
420 | 
421 |       template push(`s`: string|`sType`) {.used.} =
422 |         push(`ms`.capStack, CapFrame[`sType`](cft: cftOpen, ck: ckPushed))
423 |         push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, ck: ckPushed, sPushed: `s`))
424 | 
425 |       # Emit trace and loop code
426 | 
427 |       try:
428 |         `traceCode`
429 |         `loopCode`
430 |       except CatchableError:
431 |         `exceptionCode`
432 | 
433 |       # When the parsing machine is done, copy the local copies of the
434 |       # matchstate back, close the capture stack and collect all the captures
435 |       # in the match result
436 | 
437 |       `ms`.ip = `ip`
438 |       `ms`.si = `si`
439 |       `ms`.simax = `simax`
440 |       result.matchLen = `ms`.si
441 |       result.matchMax = `ms`.simax
442 |       if result.ok and `ms`.capStack.top > 0:
443 |         result.cs = fixCaptures(`s`, `ms`.capStack, FixAll)
444 | 
445 |     # This is the result of genCode: a Parser object with two function
446 |     # pointers: fn_init: initializes a MatchState object for this parser
447 |     # fn_run: performs the parsing of the subject on the given matchstate
448 | 
449 |     Parser[`sType`,`uType`](fn_init: fn_init, fn_run: fn_run)
450 | 
451 |   when npegGcsafe:
452 |     result[0].addPragma(ident("gcsafe"))
453 | 
454 |   when npegExpand:
455 |     echo repr result
456 | 
457 | 


--------------------------------------------------------------------------------
/src/npeg/common.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import strutils
  3 | import tables
  4 | import macros
  5 | import bitops
  6 | 
  7 | 
  8 | const
  9 | 
 10 |   # Some constants with "sane" defaults, configurable with compiler flags
 11 | 
 12 |   npegPattMaxLen* {.intdefine.} = 4096
 13 |   npegInlineMaxLen* {.intdefine.} = 30
 14 |   npegRetStackSize* {.intdefine.} = 1024
 15 |   npegBackStackSize* {.intdefine.} = 1024
 16 |   npegOptimize* {.intdefine.} = 255
 17 |   npegDebug* = defined(npegDebug)
 18 |   npegTrace* = defined(npegTrace)
 19 |   npegExpand* = defined(npegExpand)
 20 |   npegGraph* = defined(npegGraph)
 21 |   npegGcsafe* = defined(npegGcsafe)
 22 |   npegStacktrace* = defined(npegStacktrace)
 23 | 
 24 |   # Various optimizations. These can be disabled for testing purposes
 25 |   # or when suspecting bugs in the optimization stages
 26 | 
 27 |   npegOptSets* = npegOptimize.testBit(0)
 28 |   npegOptHeadFail* = npegOptimize.testBit(1)
 29 |   npegOptCapShift* = npegOptimize.testBit(2)
 30 |   npegOptChoiceCommit* = npegOptimize.testBit(3)
 31 | 
 32 | type
 33 | 
 34 |   NPegException* = object of CatchableError
 35 |     matchLen*: int
 36 |     matchMax*: int
 37 | 
 38 |   NPegParseError* = object of NPegException
 39 |   NPegStackOverflowError* = object of NPegException
 40 |   NPegUnknownBackrefError* = object of NPegException
 41 |   NPegCaptureOutOfRangeError* = object of NPegException
 42 | 
 43 |   CapFrameType* = enum cftOpen, cftClose
 44 | 
 45 |   CapKind* = enum
 46 |     ckVal,          # Value capture
 47 |     ckPushed,       # Pushed capture
 48 |     ckCodeBlock,    # Code block capture
 49 |     ckRef           # Reference
 50 | 
 51 |   CapFrame*[S] = object
 52 |     cft*: CapFrameType # Capture frame type
 53 |     name*: string      # Capture name
 54 |     si*: int           # Subject index
 55 |     ck*: CapKind       # Capture kind
 56 |     when S is char:
 57 |       sPushed*: string # Pushed capture, overrides subject slice
 58 |     else:
 59 |       sPushed*: S      # Pushed capture, overrides subject slice
 60 | 
 61 |   Ref* = object
 62 |     key*: string
 63 |     val*: string
 64 | 
 65 |   Opcode* = enum
 66 |     opChr,          # Matching: Character
 67 |     opLit,          # Matching: Literal
 68 |     opSet,          # Matching: Character set and/or range
 69 |     opAny,          # Matching: Any character
 70 |     opNop,          # Matching: Always matches, consumes nothing
 71 |     opSpan          # Matching: Match a sequence of 0 or more character sets
 72 |     opChoice,       # Flow control: stores current position
 73 |     opCommit,       # Flow control: commit previous choice
 74 |     opCall,         # Flow control: call another rule
 75 |     opJump,         # Flow control: jump to target
 76 |     opReturn,       # Flow control: return from earlier call
 77 |     opFail,         # Fail: unwind stack until last frame
 78 |     opCapOpen,      # Capture open
 79 |     opCapClose,     # Capture close
 80 |     opBackref       # Back reference
 81 |     opErr,          # Error handler
 82 |     opPrecPush,     # Precedence stack push
 83 |     opPrecPop,      # Precedence stack pop
 84 | 
 85 |   CharSet* = set[char]
 86 | 
 87 |   Assoc* = enum assocLeft, assocRight
 88 | 
 89 |   Inst* = object
 90 |     case op*: Opcode
 91 |       of opChoice, opCommit:
 92 |         ipOffset*: int
 93 |         siOffset*: int
 94 |       of opChr:
 95 |         ch*: char
 96 |       of opLit:
 97 |         lit*: NimNode
 98 |       of opCall, opJump:
 99 |         callLabel*: string
100 |         callOffset*: int
101 |       of opSet, opSpan:
102 |         cs*: CharSet
103 |       of opCapOpen, opCapClose:
104 |         capKind*: CapKind
105 |         capAction*: NimNode
106 |         capName*: string
107 |         capSiOffset*: int
108 |       of opErr:
109 |         msg*: string
110 |       of opFail, opReturn, opAny, opNop, opPrecPop:
111 |         discard
112 |       of opBackref:
113 |         refName*: string
114 |       of opPrecPush:
115 |         prec*: int
116 |         assoc*: Assoc
117 |     failOffset*: int
118 |     # Debug info
119 |     name*: string
120 |     nimNode*: NimNode
121 |     indent*: int
122 | 
123 |   Patt* = seq[Inst]
124 | 
125 |   Symbol* = object
126 |     ip*: int
127 |     name*: string
128 |     repr*: string
129 |     lineInfo*: LineInfo
130 | 
131 |   SymTab* = object
132 |     syms*: seq[Symbol]
133 | 
134 |   Rule* = object
135 |     name*: string
136 |     patt*: Patt
137 |     repr*: string
138 |     lineInfo*: LineInfo
139 | 
140 |   Program* = object
141 |     patt*: Patt
142 |     symTab*: SymTab
143 | 
144 |   Template* = ref object
145 |     name*: string
146 |     args*: seq[string]
147 |     code*: NimNode
148 | 
149 |   Grammar* = ref object
150 |     rules*: Table[string, Rule]
151 |     templates*: Table[string, Template]
152 | 
153 | #
154 | # SymTab implementation
155 | #
156 | 
157 | proc add*(s: var SymTab, ip: int, name: string, repr: string = "", lineInfo: LineInfo = LineInfo()) =
158 |   let symbol = Symbol(ip: ip, name: name, repr: repr, lineInfo: lineInfo)
159 |   s.syms.add(symbol)
160 | 
161 | proc `[]`*(s: SymTab, ip: int): Symbol =
162 |   for sym in s.syms:
163 |     if ip >= sym.ip:
164 |       result = sym
165 | 
166 | proc `[]`*(s: SymTab, name: string): Symbol =
167 |   for sym in s.syms:
168 |     if name == sym.name:
169 |       return sym
170 | 
171 | proc contains*(s: SymTab, ip: int): bool =
172 |   for sym in s.syms:
173 |     if ip == sym.ip:
174 |       return true
175 | 
176 | proc contains*(s: SymTab, name: string): bool =
177 |   for sym in s.syms:
178 |     if name == sym.name:
179 |       return true
180 | 
181 | #
182 | # Some glue to report parse errors without having to pass the original
183 | # NimNode all the way down the call stack
184 | #
185 | 
186 | var gCurErrorNode {.compileTime} = newEmptyNode()
187 | 
188 | proc setKrakNode*(n: NimNode) =
189 |   gCurErrorNode.copyLineInfo(n)
190 | 
191 | template krak*(n: NimNode, msg: string) =
192 |   error "NPeg: error at '" & n.repr & "': " & msg & "\n", n
193 | 
194 | template krak*(msg: string) =
195 |   krak gCurErrorNode, msg
196 | 
197 | 
198 | #
199 | # Misc helper functions
200 | #
201 | 
202 | proc subStrCmp*(s: openArray[char], slen: int, si: int, s2: string): bool =
203 |   if si > slen - s2.len:
204 |     return false
205 |   for i in 0..<s2.len:
206 |     if s[si+i] != s2[i]:
207 |       return false
208 |   return true
209 | 
210 | 
211 | proc subIStrCmp*(s: openArray[char], slen: int, si: int, s2: string): bool =
212 |   if si > slen - s2.len:
213 |     return false
214 |   for i in 0..<s2.len:
215 |     if s[si+i].toLowerAscii != s2[i].toLowerAscii:
216 |       return false
217 |   return true
218 | 
219 | 
220 | proc truncate*(s: string, len: int): string =
221 |   result = s
222 |   if result.len > len:
223 |     result = result[0..len-1] & "..."
224 | 
225 | # This macro flattens AST trees of `|` operators into a single call to
226 | # `choice()` with all arguments in one call. e.g, it will convert `A | B | C`
227 | # into `call(A, B, C)`.
228 | 
229 | proc flattenChoice*(n: NimNode, nChoice: NimNode = nil): NimNode =
230 |   proc addToChoice(n, nc: NimNode) =
231 |     if n.kind == nnkInfix and n[0].eqIdent("|"):
232 |       addToChoice(n[1], nc)
233 |       addToChoice(n[2], nc)
234 |     else:
235 |       nc.add flattenChoice(n)
236 |   if n.kind == nnkInfix and n[0].eqIdent("|"):
237 |     result = nnkCall.newTree(ident "choice")
238 |     addToChoice(n[1], result)
239 |     addToChoice(n[2], result)
240 |   else:
241 |     result = copyNimNode(n)
242 |     for nc in n:
243 |       result.add flattenChoice(nc)
244 | 
245 | 
246 | # Create a short and friendly text representation of a character set.
247 | 
248 | proc escapeChar*(c: char): string =
249 |   const escapes = { '\n': "\\n", '\r': "\\r", '\t': "\\t" }.toTable()
250 |   if c in escapes:
251 |     result = escapes[c]
252 |   elif c >= ' ' and c <= '~':
253 |     result = $c
254 |   else:
255 |     result = "\\x" & toHex(c.int, 2).toLowerAscii
256 | 
257 | proc dumpSet*(cs: CharSet): string =
258 |   result.add "{"
259 |   var c = 0
260 |   while c <= 255:
261 |     let first = c
262 |     while c <= 255 and c.char in cs:
263 |       inc c
264 |     if (c - 1 == first):
265 |       result.add "'" & escapeChar(first.char) & "',"
266 |     elif c - 1 > first:
267 |       result.add "'" & escapeChar(first.char) & "'..'" & escapeChar((c-1).char) & "',"
268 |     inc c
269 |   if result[result.len-1] == ',': result.setLen(result.len-1)
270 |   result.add "}"
271 | 
272 | # Create a friendly version of the given string, escaping not-printables
273 | # and no longer then `l`
274 | 
275 | proc dumpSubject*[S](s: openArray[S], o:int=0, l:int=1024): string =
276 |   var i = o
277 |   while i < s.len:
278 |     when S is char:
279 |       let a = escapeChar s[i]
280 |     else:
281 |       mixin repr
282 |       let a = s[i].repr
283 |     if result.len >= l-a.len:
284 |       return
285 |     result.add a
286 |     inc i
287 | 
288 | 
289 | proc `$`*(i: Inst, ip=0): string =
290 |   var args: string
291 |   case i.op:
292 |     of opChr:
293 |       args = " '" & escapeChar(i.ch) & "'"
294 |     of opChoice, opCommit:
295 |       args = " " & $(ip+i.ipOffset)
296 |     of opCall, opJump:
297 |       args = " " & $(ip+i.callOffset)
298 |     of opCapOpen, opCapClose:
299 |       args = " " & $i.capKind
300 |       if i.capSiOffset != 0:
301 |         args &= "(" & $i.capSiOffset & ")"
302 |     of opBackref:
303 |       args = " " & i.refName
304 |     of opPrecPush:
305 |       args = " @" & $i.prec
306 |     else:
307 |       discard
308 |   if i.failOffset != 0:
309 |     args.add " " & $(ip+i.failOffset)
310 |   let tmp = if i.nimNode != nil: i.nimNode.repr.truncate(30) else: ""
311 |   result.add alignLeft(i.name, 15) &
312 |              alignLeft(repeat(" ", i.indent) & ($i.op).toLowerAscii[2..^1] & args, 25) & " " & tmp
313 | 
314 | proc `$`*(program: Program): string =
315 |   for ip, i in program.patt.pairs:
316 |     if ip in program.symTab:
317 |       result.add "\n" & program.symTab[ip].repr & "\n"
318 |     result.add align($ip, 4) & ": " & `$`(i, ip) & "\n"
319 | 
320 | 
321 | proc slice*(s: openArray[char], iFrom, iTo: int): string =
322 |   let len = iTo - iFrom
323 |   result.setLen(len)
324 |   for i in 0..<len:
325 |     result[i] = s[i+iFrom]
326 | 
327 | proc slice*[S](s: openArray[S], iFrom, iTo: int): S =
328 |   result = s[iFrom]
329 | 
330 | proc `$`*(t: Template): string =
331 |   return t.name & "(" & t.args.join(", ") & ") = " & t.code.repr
332 | 
333 | 


--------------------------------------------------------------------------------
/src/npeg/dot.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | import tables
 3 | import strutils
 4 | 
 5 | type
 6 |   Dot* = ref object
 7 |     name: string
 8 |     edges: Table[string, bool]
 9 |     nodes: seq[string]
10 | 
11 | const colors = {
12 |   "inline": "grey60",
13 |   "call": "blue",
14 | }.toTable()
15 | 
16 | 
17 | proc escape(s: string): string =
18 |   return s.replace(".", "_").replace("-", "_")
19 | 
20 | proc newDot*(name: string): Dot =
21 |   return Dot(name: name)
22 | 
23 | proc add*(d: Dot, n1, n2: string, meth: string) =
24 |   if d != nil:
25 |     let l = "  " & n1.escape & " -> " & n2.escape & " [ color=" & colors[meth] & "];"
26 |     d.edges[l] = true
27 | 
28 | proc addPatt*(d: Dot, name: string, len: int) =
29 |   if d != nil:
30 |     var color = "black"
31 |     if len > 10: color = "orange"
32 |     if len > 100: color = "red"
33 |     d.nodes.add "  " & name.escape &
34 |                 " [ fillcolor=lightgrey color=" & color & " label=\"" & name & "/" & $len & "\"];"
35 | 
36 | proc dump*(d: Dot) =
37 |   const npegDotDir {.strdefine.}: string = ""
38 |   when npegDotDir != "":
39 |     let fname = npegDotDir & "/" & d.name & ".dot"
40 |     echo "Dumping dot graph file to " & fname & "..."
41 | 
42 |     var o: string
43 |     o.add "digraph dot {\n"
44 |     o.add "  graph [ center=true, margin=0.2, nodesep=0.1, ranksep=0.3 ];\n"
45 |     o.add "  node [ shape=box, style=\"rounded,filled\" width=0, height=0, fontname=Helvetica, fontsize=10];\n"
46 |     o.add "  edge [ fontname=Helvetica, fontsize=10];\n"
47 |     for k, v in d.edges:
48 |       o.add k & "\n"
49 |     for n in d.nodes:
50 |       o.add n & "\n"
51 |     o.add "}\n"
52 |     writeFile fname, o
53 | 
54 | 


--------------------------------------------------------------------------------
/src/npeg/grammar.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import tables
  3 | import macros
  4 | import strutils
  5 | import npeg/[common,dot]
  6 | 
  7 | # This is the global instance of pattern library. This is itself a grammar
  8 | # where all patterns are stored with qualified names in the form of
  9 | # <libname>.<pattname>.  At grammar link time all unresolved patterns are
 10 | # looked up from this global table.
 11 | 
 12 | var gPattLib {.compileTime.} = new Grammar
 13 | 
 14 | 
 15 | 
 16 | # Store a grammar in the library.  The rule names and all unqualified
 17 | # identifiers in the grammar are expanded to qualified names in the form
 18 | # <libname>.<pattname> to make sure they are easily resolved when they are
 19 | # later imported by other grammars.
 20 | 
 21 | proc libStore*(libName: string, grammar: Grammar) =
 22 | 
 23 |   proc qualify(name: string): string =
 24 |     if libName.len > 0: libName & "." & name else: name
 25 | 
 26 |   for rulename, rule in grammar.rules:
 27 |     var rulename2 = qualify(rulename)
 28 |     var rule2 = Rule(name: rulename2)
 29 |     for i in rule.patt.items:
 30 |       var i2 = i
 31 |       if i2.op == opCall:
 32 |         if "." notin i2.callLabel:
 33 |           i2.callLabel = qualify(i2.callLabel)
 34 |       rule2.patt.add i2
 35 |     gPattLib.rules[rulename2] = rule2
 36 | 
 37 |   for tname, t in grammar.templates:
 38 |     gPattLib.templates[qualify(tname)] = t
 39 | 
 40 | #
 41 | # Add rule to a grammer
 42 | #
 43 | 
 44 | proc addRule*(grammar: Grammar, name: string, patt: Patt, repr: string = "", lineInfo: LineInfo = LineInfo()) =
 45 |   if name in grammar.rules:
 46 |     warning "Redefinition of rule '" & name & "'"
 47 |   var rule = Rule(name: name, patt: patt, repr: repr, lineInfo: lineInfo)
 48 |   for i in rule.patt.mitems:
 49 |     if i.name == "":
 50 |       i.name = name
 51 |   grammar.rules[name] = rule
 52 | 
 53 | # Try to import the given rule from the pattern library into a grammar. Returns
 54 | # true if import succeeded, false if not found.
 55 | 
 56 | proc libImportRule*(name: string, grammar: Grammar): bool =
 57 |   if name in gPattLib.rules:
 58 |     grammar.addRule name, gPattLib.rules[name].patt
 59 |     when npegDebug:
 60 |       echo "importing ", name
 61 |     return true
 62 | 
 63 | 
 64 | proc libImportTemplate*(name: string): Template =
 65 |   if name in gPattLib.templates:
 66 |     result = gPattLib.templates[name]
 67 | 
 68 | 
 69 | # Shadow the given name in the grammar by creating an unique new name,
 70 | # and moving the original rule
 71 | 
 72 | proc shadow*(grammar: Grammar, name: string): string =
 73 |   var gShadowId {.global.} = 0
 74 |   inc gShadowId
 75 |   let name2 = name & "-" & $gShadowId
 76 |   when npegDebug:
 77 |     echo "  shadow ", name, " -> ", name2
 78 |   grammar.rules[name2] = grammar.rules[name]
 79 |   grammar.rules.del name
 80 |   return name2
 81 | 
 82 | 
 83 | # Link a list of patterns into a grammar, which is itself again a valid
 84 | # pattern. Start with the initial rule, add all other non terminals and fixup
 85 | # opCall addresses
 86 | 
 87 | proc link*(grammar: Grammar, initial_name: string, dot: Dot = nil): Program =
 88 | 
 89 |   if initial_name notin grammar.rules:
 90 |     error "inital rule '" & initial_name & "' not found"
 91 | 
 92 |   var retPatt: Patt
 93 |   var symTab: SymTab
 94 |   var ruleRepr: Table[int, string]
 95 | 
 96 |   # Recursively emit a pattern and all patterns it calls which are
 97 |   # not yet emitted
 98 | 
 99 |   proc emit(name: string) =
100 |     if npegDebug:
101 |       echo "emit ", name
102 |     let rule = grammar.rules[name]
103 |     if rule.patt.len > 0:
104 |       let ip = retPatt.len
105 |       symTab.add(ip, name, rule.repr, rule.lineInfo)
106 |       retPatt.add rule.patt
107 |       retPatt.add Inst(op: opReturn, name: rule.patt[0].name)
108 | 
109 |     for i in rule.patt:
110 |       if i.op == opCall and i.callLabel notin symTab:
111 |         if i.callLabel notin grammar.rules and not libImportRule(i.callLabel, grammar):
112 |           error "Npeg: rule \"" & name & "\" is referencing undefined rule \"" & i.callLabel & "\""
113 |         dot.add(name, i.callLabel, "call")
114 |         emit i.callLabel
115 | 
116 |   emit initial_name
117 | 
118 |   # Fixup call addresses and do tail call optimization
119 | 
120 |   for ip, i in retPatt.mpairs:
121 |     if i.op == opCall:
122 |       i.callOffset = symTab[i.callLabel].ip - ip
123 |     if i.op == opCall and retPatt[ip+1].op == opReturn:
124 |       i.op = opJump
125 | 
126 |   # Choice/Commit pairs that touch because of head fail optimization can be
127 |   # replaced by a jump and a nop
128 | 
129 |   when npegOptChoiceCommit:
130 |     for i in 0..<retPatt.high:
131 |       if retPatt[i+0].op == opChoice and retPatt[i+1].op == opCommit:
132 |         retPatt[i+0] = Inst(op: opJump, callOffset: retPatt[i+1].ipOffset + 1)
133 |         retPatt[i+1] = Inst(op: opNop)
134 | 
135 |   # Trailing opFail is used by the codegen
136 | 
137 |   symTab.add(retPatt.len, "_fail")
138 |   retPatt.add Inst(op: opFail)
139 | 
140 |   # Calc indent level for instructions
141 | 
142 |   var indent = 0
143 |   for ip, i in retPatt.mpairs:
144 |     if i.op in {opCapClose, opCommit}: dec indent
145 |     i.indent = indent
146 |     if i.op in {opCapOpen, opChoice}: inc indent
147 | 
148 |   result = Program(patt: retPatt, symTab: symTab)
149 | 
150 |   when npegTrace:
151 |     echo result
152 | 
153 | 


--------------------------------------------------------------------------------
/src/npeg/lib/core.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | # This library file is special: it is imported by default, and provides rules
 4 | # which do not live in a separate namespace.
 5 | #
 6 | 
 7 | when defined(nimHasUsed): {.used.}
 8 | 
 9 | import npeg
10 | 
11 | grammar "":
12 |   Alnum  <- {'A'..'Z','a'..'z','0'..'9'} # Alphanumeric characters
13 |   Alpha  <- {'A'..'Z','a'..'z'}          # Alphabetic characters
14 |   Blank  <- {' ','\t'}                   # Space and tab
15 |   Cntrl  <- {'\x00'..'\x1f','\x7f'}      # Control characters
16 |   Digit  <- {'0'..'9'}                   # Digits
17 |   Graph  <- {'\x21'..'\x7e'}             # Visible characters
18 |   Lower  <- {'a'..'z'}                   # Lowercase characters
19 |   Print  <- {'\x21'..'\x7e',' '}         # Visible characters and spaces
20 |   Space  <- {'\9'..'\13',' '}            # Whitespace characters
21 |   Upper  <- {'A'..'Z'}                   # Uppercase characters
22 |   Xdigit <- {'A'..'F','a'..'f','0'..'9'} # Hexadecimal digits
23 | 
24 | 


--------------------------------------------------------------------------------
/src/npeg/lib/rfc3339.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | # This library provides a number of common types
 4 | #
 5 | 
 6 | import npeg
 7 | 
 8 | when defined(nimHasUsed): {.used.}
 9 | 
10 | grammar "rfc3339":
11 | 
12 |    date_fullyear   <- Digit[4]
13 |    date_month      <- Digit[2]  # 01-12
14 |    date_mday       <- Digit[2]  # 01-28, 01-29, 01-30, 01-31 based on
15 |                                 # month/year
16 |    time_hour       <- Digit[2]  # 00-23
17 |    time_minute     <- Digit[2]  # 00-59
18 |    time_second     <- Digit[2]  # 00-58, 00-59, 00-60 based on leap second
19 |                                # rules
20 |    time_secfrac    <- "." * +Digit
21 |    time_numoffset  <- ("+" | "-") * time_hour * ":" * time_minute
22 |    time_offset     <- "Z" | time_numoffset
23 | 
24 |    partial_time    <- time_hour * ":" * time_minute * ":" * time_second * ?time_secfrac
25 |    full_date       <- date_fullyear * "-" * date_month * "-" * date_mday
26 |    full_time       <- partial_time * time_offset
27 | 
28 |    date_time       <- full_date * ("T" | " ") * full_time
29 | 


--------------------------------------------------------------------------------
/src/npeg/lib/types.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | # This library provides a number of common types
 4 | #
 5 | 
 6 | import npeg
 7 | 
 8 | when defined(nimHasUsed): {.used.}
 9 | 
10 | template checkRange*(T: typedesc, parseFn: untyped, s: string): bool =
11 |   let v = parseFn(s).BiggestInt
12 |   v >= T.low.BiggestInt and v <= T.high.BiggestInt
13 | 
14 | grammar "types":
15 | 
16 |   bool    <- "true" | "false"
17 | 
18 |   # Unsigned decimal
19 | 
20 |   uint    <- +Digit
21 |   uint8   <- >+uint: validate checkRange(uint8,  parseInt, $1)
22 |   uint16  <- >+uint: validate checkRange(uint16, parseInt, $1)
23 |   uint32  <- >+uint: validate checkRange(uint32, parseInt, $1)
24 | 
25 |   # Signed decimal
26 | 
27 |   int     <- ?'-' * uint
28 |   int8    <- >int: validate checkRange(int8,   parseInt, $1)
29 |   int16   <- >int: validate checkRange(int16,  parseInt, $1)
30 |   int32   <- >int: validate checkRange(int32,  parseInt, $1)
31 |   int64   <- >int: validate checkRange(int64,  parseInt, $1)
32 | 
33 |   # Hexadecimal
34 | 
35 |   hex    <- '0' * {'x','X'} * +Digit
36 |   hex8   <- >+uhex: validate checkRange(uint8,  parseHexInt, $1)
37 |   hex16  <- >+uhex: validate checkRange(uint16, parseHexInt, $1)
38 |   hex32  <- >+uhex: validate checkRange(uint32, parseHexInt, $1)
39 | 
40 | 


--------------------------------------------------------------------------------
/src/npeg/lib/uri.nim:
--------------------------------------------------------------------------------
 1 | import npeg
 2 | 
 3 | when defined(nimHasUsed): {.used.}
 4 | 
 5 | # The grammar below is a literal translation of the ABNF notation of the
 6 | # RFC. Optimizations can be made to limit backtracking, but this is a nice
 7 | # example how to create a parser from a RFC protocol description.
 8 | 
 9 | grammar "uri":
10 | 
11 |   URI <- scheme * ":" * hier_part * ?( "?" * query) * ?( "#" * fragment) * !1
12 | 
13 |   hier_part <- "//" * authority * path
14 | 
15 |   URI_reference <- uri | relative_ref
16 | 
17 |   absolute_uri <- scheme * ":" * hier_part * ?( "?" * query)
18 | 
19 |   relative_ref <- relative_part * ?( "?" * query) * ?( "#" * fragment)
20 | 
21 |   relative_part <- "//" * authority * path_abempty |
22 |                    path_absolute |
23 |                    path_noscheme |
24 |                    path_empty
25 | 
26 |   scheme <- (Alpha * *( Alpha | Digit | "+" | "-" | "." ))
27 | 
28 |   authority <- ?(userinfo * "@") * host * ?( ":" * port)
29 |   userinfo <- *(unreserved | pct_encoded | sub_delims | ":")
30 | 
31 |   host <- (IP_literal | IPv4address | reg_name)
32 |   port <- *Digit
33 | 
34 |   IP_literal <- "[" * (IPv6address | IPvFuture) * "]"
35 | 
36 |   IPvFuture <- "v" * +Xdigit * "." * +(unreserved | sub_delims | ":")
37 | 
38 |   IPv6address <-                                     (h16 * ":")[6] * ls32 |
39 |                                               "::" * (h16 * ":")[5] * ls32 |
40 |                ?( h16                     ) * "::" * (h16 * ":")[4] * ls32 |
41 |                ?( h16 * (":" * h16)[0..1] ) * "::" * (h16 * ":")[3] * ls32 |
42 |                ?( h16 * (":" * h16)[0..2] ) * "::" * (h16 * ":")[2] * ls32 |
43 |                ?( h16 * (":" * h16)[0..3] ) * "::" * (h16 * ":")    * ls32 |
44 |                ?( h16 * (":" * h16)[0..4] ) * "::" *                  ls32 |
45 |                ?( h16 * (":" * h16)[0..5] ) * "::" *                  h16  |
46 |                ?( h16 * (":" * h16)[0..6] ) * "::"
47 | 
48 |   h16 <- Xdigit[1..4]
49 |   ls32 <- (h16 * ":" * h16) | IPv4address
50 |   IPv4address <- dec_octet * "." * dec_octet * "." * dec_octet * "." * dec_octet
51 | 
52 |   dec_octet <- Digit[1..3]
53 | 
54 |   reg_name <- *(unreserved | pct_encoded | sub_delims)
55 | 
56 |   path <- path_abempty  | # begins with "/" or is empty
57 |           path_absolute | # begins with "/" but not "//"
58 |           path_noscheme | # begins with a non-colon segment
59 |           path_rootless | # begins with a segment
60 |           path_empty      # zero characters
61 | 
62 |   path_abempty  <- (*( "/" * segment ))
63 |   path_absolute <- ("/" * ?( segment_nz * *( "/" * segment ) ))
64 |   path_noscheme <- (segment_nz_nc * *( "/" * segment ))
65 |   path_rootless <- (segment_nz * *( "/" * segment ))
66 |   path_empty    <- 0
67 | 
68 |   segment       <- *pchar
69 |   segment_nz    <- +pchar
70 |   segment_nz_nc <- +( unreserved | pct_encoded | sub_delims | "@" )
71 |                 # non_zero_length segment without any colon ":"
72 | 
73 |   pchar         <- unreserved | pct_encoded | sub_delims | ":" | "@"
74 | 
75 |   query         <- *( pchar | "|" | "?" )
76 | 
77 |   fragment      <- *( pchar | "|" | "?" )
78 | 
79 |   pct_encoded   <- "%" * Xdigit * Xdigit
80 | 
81 |   unreserved    <- Alpha | Digit | "-" | "." | "_" | "~"
82 |   reserved      <- gen_delims | sub_delims
83 |   gen_delims    <- ":" | "|" | "?" | "#" | "[" | "]" | "@"
84 |   sub_delims    <- "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="
85 | 
86 | 


--------------------------------------------------------------------------------
/src/npeg/lib/utf8.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | import npeg
 3 | 
 4 | when defined(nimHasUsed): {.used.}
 5 | 
 6 | grammar "utf8":
 7 | 
 8 |   cont <- {128..191}
 9 | 
10 |   # Matches any utf-8 codepoint glyph
11 | 
12 |   any <- {0..127} |
13 |          {194..223} * cont[1] |
14 |          {224..239} * cont[2] |
15 |          {240..244} * cont[3]
16 | 
17 |   bom <- "\xff\xfe"
18 | 
19 |   # Check for UTF-8 character classes. Depends on the tables from
20 |   # the nim unicode module
21 | 
22 |   space <- >utf8.any: validate unicode.isSpace($1)
23 |   lower <- >utf8.any: validate unicode.isLower(runeAt($1, 0))
24 |   upper <- >utf8.any: validate unicode.isUpper(runeAt($1, 0))
25 |   alpha <- >utf8.any: validate unicode.isAlpha(runeAt($1, 0))
26 |   title <- >utf8.any: validate unicode.isTitle(runeAt($1, 0))
27 | 


--------------------------------------------------------------------------------
/src/npeg/parsepatt.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import tables, macros, sequtils, strutils, algorithm
  3 | import npeg/[common,patt,dot,grammar]
  4 | 
  5 | when npegGraph:
  6 |   import npeg/[railroad]
  7 | 
  8 | 
  9 | # Recursively compile a PEG rule to a Pattern
 10 | 
 11 | proc parsePatt*(pattName: string, nn: NimNode, grammar: Grammar, dot: Dot = nil): Patt =
 12 | 
 13 |   when npegDebug:
 14 |     echo "parse ", pattName, " <- ", nn.repr
 15 | 
 16 |   proc aux(n: NimNode): Patt =
 17 | 
 18 |     setKrakNode(n)
 19 | 
 20 |     proc inlineOrCall(callName: string): Patt =
 21 | 
 22 |       # Try to import symbol early so we might be able to inline or shadow it
 23 |       if callName notin grammar.rules:
 24 |         discard libImportRule(callName, grammar)
 25 | 
 26 |       if pattName == callName:
 27 |         if pattName in grammar.rules:
 28 |           let nameShadowed = grammar.shadow(pattName)
 29 |           return newCallPatt(nameShadowed)
 30 | 
 31 |       if callName in grammar.rules and grammar.rules[callName].patt.len < npegInlineMaxLen:
 32 |         when npegDebug:
 33 |           echo "  inline ", callName
 34 |         dot.add(pattName, callName, "inline")
 35 |         return grammar.rules[callName].patt
 36 | 
 37 |       else:
 38 |         when npegDebug:
 39 |           echo "  call ", callName
 40 |         dot.add(pattName, callName, "call")
 41 |         return newCallPatt(callName)
 42 | 
 43 |     proc applyTemplate(tName: string, arg: NimNode): NimNode =
 44 |       let t = if tName in grammar.templates:
 45 |         grammar.templates[tName]
 46 |       else:
 47 |         libImportTemplate(tName)
 48 |       if t != nil:
 49 |         if arg.len-1 != t.args.len:
 50 |           krak arg, "Wrong number of arguments for template " & tName & "(" & $(t.args.join(",")) & ")"
 51 |         proc aux(n: NimNode): NimNode =
 52 |           if n.kind == nnkIdent and n.strVal in t.args:
 53 |             result = arg[ find(t.args, n.strVal)+1 ]
 54 |           else:
 55 |             result = copyNimNode(n)
 56 |             for nc in n:
 57 |               result.add aux(nc)
 58 |         result = aux(t.code).flattenChoice()
 59 |         when npegDebug:
 60 |           echo "template ", tName, " = \n  in:  ", n.repr, "\n  out: ", result.repr
 61 | 
 62 |     case n.kind:
 63 | 
 64 |       of nnkPar:
 65 |         if n.len > 1:
 66 |           krak n, "syntax error. Did you mean '|'?"
 67 |         result = aux n[0]
 68 | 
 69 |       of nnkIntLit:
 70 |         result = newPatt(n.intVal)
 71 | 
 72 |       of nnkStrLit:
 73 |         result = newPatt(n.strVal)
 74 | 
 75 |       of nnkCharLit:
 76 |         result = newPatt($n.intVal.char)
 77 | 
 78 |       of nnkCall:
 79 |         var name: string
 80 |         if n[0].kind == nnkIdent:
 81 |           name = n[0].strVal
 82 |         elif n[0].kind == nnkDotExpr:
 83 |           name = n[0].repr
 84 |         else:
 85 |           krak n, "syntax error"
 86 |         let n2 = applyTemplate(name, n)
 87 |         if n2 != nil:
 88 |           result = aux n2
 89 |         elif name == "choice":
 90 |           result = choice(n[1..^1].map(aux))
 91 |         elif n.len == 2:
 92 |           case name
 93 |             of "R": result = newBackrefPatt(n[1].strVal)
 94 |         elif n.len == 3:
 95 |           case name
 96 |             of "R": result = newPatt(aux n[2], ckRef, n[1].strVal)
 97 |         if result.len == 0:
 98 |           krak n, "Unknown template or capture '" & name & "'"
 99 | 
100 |       of nnkPrefix:
101 |         # Nim combines all prefix chars into one string. Handle prefixes
102 |         # chars right to left
103 |         var p = aux n[1]
104 |         for c in n[0].strVal.reversed:
105 |           case c:
106 |             of '?': p = ?p
107 |             of '+': p = +p
108 |             of '*': p = *p
109 |             of '!': p = !p
110 |             of '&': p = &p
111 |             of '>': p = >p
112 |             of '@': p = @p
113 |             else: krak n, "Unhandled prefix operator"
114 |         result = p
115 | 
116 |       of nnkInfix:
117 |         case n[0].strVal:
118 |           of "*", "∙": result = aux(n[1]) * aux(n[2])
119 |           of "-": result = aux(n[1]) - aux(n[2])
120 |           of "^": result = newPattAssoc(aux(n[1]), intVal(n[2]), assocLeft)
121 |           of "^^": result = newPattAssoc(aux(n[1]), intVal(n[2]), assocRight)
122 |           else: krak n, "Unhandled infix operator"
123 | 
124 |       of nnkBracketExpr:
125 |         let p = aux(n[0])
126 |         if n[1].kind == nnkIntLit:
127 |           result = p{n[1].intVal}
128 |         elif n[1].kind == nnkInfix and n[1][0].eqIdent(".."):
129 |           result = p{n[1][1].intVal..n[1][2].intVal}
130 |         else: krak n, "syntax error"
131 | 
132 |       of nnkIdent:
133 |         result = inlineOrCall(n.strVal)
134 | 
135 |       of nnkDotExpr:
136 |         result = inlineOrCall(n.repr)
137 | 
138 |       of nnkCurly:
139 |         var cs: CharSet
140 |         for nc in n:
141 |           if nc.kind == nnkCharLit:
142 |             cs.incl nc.intVal.char
143 |           elif nc.kind == nnkInfix:
144 |             if nc[0].kind == nnkIdent and nc[0].eqIdent(".."):
145 |               for c in nc[1].intVal..nc[2].intVal:
146 |                 cs.incl c.char
147 |             else:
148 |               krak n, "syntax error"
149 |           else:
150 |             krak n, "syntax error"
151 |         if cs.card == 0:
152 |           result = newPatt(1)
153 |         else:
154 |           result = newPatt(cs)
155 | 
156 |       of nnkCallStrLit:
157 |         case n[0].strVal:
158 |           of "i": 
159 |             for c in n[1].strVal:
160 |               result.add newPatt({c.toLowerAscii, c.toUpperAscii})
161 |           of "E": result = newErrorPatt(n[1].strVal)
162 |           else: krak n, "unhandled string prefix"
163 | 
164 |       of nnkBracket:
165 |         result.add newLitPatt n[0]
166 | 
167 |       else:
168 |         echo n.astGenRepr
169 |         krak n, "syntax error"
170 | 
171 |     for i in result.mitems:
172 |       if i.nimNode == nil:
173 |         i.nimNode = n
174 | 
175 |   result = aux(nn.flattenChoice())
176 |   dot.addPatt(pattName, result.len)
177 | 
178 | 
179 | #
180 | # Parse a grammar. A grammar consists of named rules, where each rule is one
181 | # pattern
182 | #
183 | 
184 | proc parseGrammar*(ns: NimNode, dot: Dot=nil, dumpRailroad = true): Grammar =
185 |   result = new Grammar
186 | 
187 |   for n in ns:
188 | 
189 |     if n.kind == nnkInfix and n[0].eqIdent("<-"):
190 | 
191 |       case n[1].kind
192 |       of nnkIdent, nnkDotExpr, nnkPrefix:
193 |         let name = if n[1].kind == nnkPrefix:
194 |                      when declared(expectIdent):
195 |                        expectIdent n[1][0], ">"
196 |                      n[1][1].repr
197 |                    else: n[1].repr
198 |         var patt = parsePatt(name, n[2], result, dot)
199 |         if n.len == 4:
200 |           patt = newPatt(patt, ckCodeBlock)
201 |           patt[patt.high].capAction = n[3]
202 |         result.addRule(name, if n[1].kind == nnkPrefix: >patt else: patt, n.repr, n.lineInfoObj)
203 | 
204 |         when npegGraph:
205 |           if dumpRailroad:
206 |             echo parseRailroad(n[2], result).wrap(name)
207 | 
208 |       of nnkCall:
209 |         if n.len > 3:
210 |           error "Code blocks can not be used on templates", n[3]
211 |         var t = Template(name: n[1][0].strVal, code: n[2])
212 |         for i in 1..<n[1].len:
213 |           t.args.add n[1][i].strVal
214 |         result.templates[t.name] = t
215 | 
216 |       else:
217 |         error "Expected PEG rule name but got " & $n[1].kind, n
218 | 
219 |     else:
220 |       error "Expected PEG rule (name <- ...)", n
221 | 
222 | 


--------------------------------------------------------------------------------
/src/npeg/patt.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import macros
  3 | import sequtils
  4 | 
  5 | import npeg/[common,stack]
  6 | 
  7 | 
  8 | # Some tests on patterns
  9 | 
 10 | proc isSet(p: Patt): bool {.used.} =
 11 |   p.len == 1 and p[0].op == opSet
 12 | 
 13 | 
 14 | proc toSet(p: Patt, cs: var CharSet): bool =
 15 |   when npegOptSets:
 16 |     if p.len == 1:
 17 |       let i = p[0]
 18 |       if i.op == opSet:
 19 |         cs = i.cs
 20 |         return true
 21 |       if i.op == opChr:
 22 |         cs = { i.ch }
 23 |         return true
 24 |       if i.op == opAny:
 25 |         cs = {low(char)..high(char)}
 26 |         return true
 27 | 
 28 | 
 29 | proc checkSanity(p: Patt) =
 30 |   if p.len >= npegPattMaxLen:
 31 |     krak "NPeg: grammar too complex, (" & $p.len & " > " & $npegPattMaxLen & ").\n" &
 32 |          "If you think this is a mistake, increase the maximum size with -d:npegPattMaxLen=N"
 33 | 
 34 | 
 35 | # Checks if the passed patt matches an empty subject. This is done by executing
 36 | # the pattern as if it was passed an empty subject and see how it terminates.
 37 | 
 38 | proc matchesEmpty(patt: Patt): bool =
 39 |   var backStack = initStack[int]("backtrack", 8, 32)
 40 |   var ip: int
 41 |   while ip < patt.len:
 42 |     let i = patt[ip]
 43 |     case i.op
 44 |       of opChoice:
 45 |         push(backStack, ip+i.ipOffset)
 46 |         inc ip
 47 |       of opCommit:
 48 |         discard pop(backStack)
 49 |         ip += i.ipOffset
 50 |       of opJump: ip += i.callOffset
 51 |       of opCapOpen, opCapClose, opNop, opSpan, opPrecPush, opPrecPop: inc ip
 52 |       of opErr, opReturn, opCall: return false
 53 |       of opAny, opChr, opLit, opSet, opBackref, opFail:
 54 |         if i.failOffset != 0:
 55 |           ip += i.failOffset
 56 |         elif backStack.top > 0:
 57 |           ip = pop(backStack)
 58 |         else:
 59 |           return false
 60 |   return true
 61 | 
 62 | 
 63 | # Calculate how far captures or choices can be shifted into this pattern
 64 | # without consequences; this allows the pattern to fail before pushing to the
 65 | # backStack or capStack
 66 | 
 67 | proc canShift(p: Patt, enable: static[bool]): (int, int) =
 68 |   let i = p[0]
 69 |   if i.failOffset == 0:
 70 |     case i.op
 71 |     of opChr, opAny, opSet:
 72 |       result = (1, 1)
 73 |     else:
 74 |       discard
 75 | 
 76 | ### Atoms
 77 | 
 78 | proc newPatt*(s: string): Patt =
 79 |   for ch in s:
 80 |     result.add Inst(op: opChr, ch: ch)
 81 | 
 82 | proc newLitPatt*(n: NimNode): Patt =
 83 |   result.add Inst(op: opLit, lit: n)
 84 | 
 85 | proc newPatt*(p: Patt, ck: CapKind, name = ""): Patt =
 86 |   let (siShift, ipShift) = p.canShift(npegOptCapShift)
 87 |   result.add p[0..<ipShift]
 88 |   result.add Inst(op: opCapOpen, capKind: ck, capSiOffset: -siShift, capName: name)
 89 |   result.add p[ipShift..^1]
 90 |   result.add Inst(op: opCapClose, capKind: ck)
 91 | 
 92 | proc newCallPatt*(label: string): Patt =
 93 |   result.add Inst(op: opCall, callLabel: label)
 94 | 
 95 | proc newPatt*(n: BiggestInt): Patt =
 96 |   if n > 0:
 97 |     for i in 1..n:
 98 |       result.add Inst(op: opAny)
 99 |   else:
100 |     result.add Inst(op: opNop)
101 | 
102 | proc newPatt*(cs: CharSet): Patt =
103 |   result.add Inst(op: opSet, cs: cs)
104 | 
105 | proc newBackrefPatt*(refName: string): Patt =
106 |   result.add Inst(op: opBackref, refName: refName)
107 | 
108 | proc newReturnPatt*(): Patt =
109 |   result.add Inst(op: opReturn)
110 | 
111 | proc newErrorPatt*(msg: string): Patt =
112 |   result.add Inst(op: opErr, msg: msg)
113 | 
114 | 
115 | # Add a choice/commit pair around pattern P, try to optimize head
116 | # fails when possible
117 | 
118 | proc addChoiceCommit(addTo: var Patt, p: Patt, choiceOffset, commitOffset: int) =
119 |   let (siShift, ipShift) = p.canShift(npegOptHeadFail)
120 |   for n in 0..<ipShift:
121 |     addTo.add p[n]
122 |     addTo[addTo.high].failOffset = choiceOffset - n
123 |   addTo.add Inst(op: opChoice, ipOffset: choiceOffset - ipShift, siOffset: -siShift)
124 |   addTo.add p[ipShift..^1]
125 |   addTo.add Inst(op: opCommit, ipOffset: commitOffset)
126 | 
127 | 
128 | ### Prefixes
129 | 
130 | proc `?`*(p: Patt): Patt =
131 |   result.addChoiceCommit(p, p.len+2, 1)
132 | 
133 | proc `*`*(p: Patt): Patt =
134 |   var cs: CharSet
135 |   if p.toSet(cs):
136 |     result.add Inst(op: opSpan, cs: cs)
137 |   else:
138 |     if matchesEmpty(p):
139 |       krak "'*' repeat argument matches empty subject"
140 |     result.addChoiceCommit(p, p.len+2, -p.len-1)
141 | 
142 | proc `+`*(p: Patt): Patt =
143 |   result.add p
144 |   result.add *p
145 | 
146 | proc `>`*(p: Patt): Patt =
147 |   return newPatt(p, ckVal)
148 | 
149 | proc `!`*(p: Patt): Patt =
150 |   result.addChoiceCommit(p, p.len+3, 1)
151 |   result.add Inst(op: opFail)
152 | 
153 | proc `&`*(p: Patt): Patt =
154 |   result.add !(!p)
155 | 
156 | proc `@`*(p: Patt): Patt =
157 |   result.addChoiceCommit(p, p.len+2, 3)
158 |   result.add Inst(op: opAny)
159 |   result.add Inst(op: opJump, callOffset: - p.len - 3)
160 | 
161 | ### Infixes
162 | 
163 | proc `*`*(p1, p2: Patt): Patt =
164 |   result.add p1
165 |   result.add p2
166 |   result.checkSanity
167 | 
168 | 
169 | # choice() is generated from | operators by flattenChoice().
170 | #
171 | # Optimizations done here:
172 | # - convert to union if all elements can be represented as a set
173 | # - head fails: when possible, opChoice is shifted into a pattern to
174 | #   allow the pattern to fail before emitting the opChoice
175 | 
176 | proc choice*(ps: openArray[Patt]): Patt =
177 |   var csUnion: CharSet
178 |   var allSets = true
179 |   for p in ps:
180 |     var cs: CharSet
181 |     if p.toSet(cs):
182 |       csUnion = csUnion + cs
183 |     else:
184 |       allSets = false
185 |   if allSets:
186 |     result.add Inst(op: opSet, cs: csUnion)
187 |     return result
188 | 
189 |   var lenTot, ip: int
190 |   lenTot = foldl(ps, a + b.len+2, 0)
191 |   for i, p in ps:
192 |     if i < ps.high:
193 |       result.addChoiceCommit(p, p.len+2, lenTot-ip-p.len-3)
194 |       ip += p.len + 2
195 |     else:
196 |       result.add p
197 | 
198 | proc `-`*(p1, p2: Patt): Patt =
199 |   var cs1, cs2: CharSet
200 |   if p1.toSet(cs1) and p2.toSet(cs2):
201 |     result.add Inst(op: opSet, cs: cs1 - cs2)
202 |   else:
203 |     result.add !p2
204 |     result.add p1
205 | 
206 | proc newPattAssoc*(p: Patt, prec: BiggestInt, assoc: Assoc): Patt =
207 |   result.add Inst(op: opPrecPush, prec: prec.int, assoc: assoc)
208 |   result.add p
209 |   result.add Inst(op: opPrecPop)
210 | 
211 | 
212 | ### Others
213 | 
214 | proc `{}`*(p: Patt, n: BiggestInt): Patt =
215 |   for i in 1..n:
216 |     result.add p
217 | 
218 | proc `{}`*(p: Patt, range: HSlice[system.BiggestInt, system.BiggestInt]): Patt =
219 |   result.add p{range.a}
220 |   for i in range.a..<range.b:
221 |     result.add ?p
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/src/npeg/railroad.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import macros, unicode, tables, strutils, sequtils
  3 | import npeg/[grammar,common]
  4 | 
  5 | when not defined(js):
  6 |   import terminal
  7 | else:
  8 |   type ForeGroundColor = enum
  9 |     fgYellow, fgMagenta, fgGreen, fgWhite, fgCyan, fgRed
 10 | 
 11 | const
 12 |   fgName = fgYellow
 13 |   fgLit = fgMagenta
 14 |   fgLine = fgGreen
 15 |   fgCap = fgWhite
 16 |   fgNonterm = fgCyan
 17 |   fgError = fgRed
 18 | 
 19 | type
 20 | 
 21 |   Sym = object
 22 |     x, y: int
 23 |     c: Char
 24 | 
 25 |   Char = object
 26 |     r: Rune
 27 |     fg: ForeGroundColor
 28 | 
 29 |   Line = seq[Char]
 30 | 
 31 |   Grid = seq[Line]
 32 | 
 33 |   Node = ref object
 34 |     w, y0, y1: int
 35 |     syms: seq[Sym]
 36 |     kids: seq[Kid]
 37 | 
 38 |   Kid = object
 39 |     dx, dy: int
 40 |     n: Node
 41 | 
 42 | # Provide ASCII alternative of box drawing for windows
 43 | 
 44 | when defined(windows) or defined(js):
 45 |   const asciiTable = [ ("│", "|"), ("─", "-"), ("╭", "."), ("╮", "."),
 46 |                        ("╰", "`"), ("╯", "'"), ("┬", "-"), ("├", "|"),
 47 |                        ("┤", "|"), ("┴", "-"), ("━", "=") ]
 48 | 
 49 | #
 50 | # Renders a node to text output
 51 | #
 52 | 
 53 | proc `$`*(n: Node): string =
 54 |   let h = n.y1 - n.y0 + 1
 55 |   let y0 = n.y0
 56 |   var line: Line
 57 |   var grid: Grid
 58 |   for x in 0..<n.w:
 59 |     line.add Char(r: ' '.Rune)
 60 |   for y in 0..<h: grid.add line
 61 | 
 62 |   proc render(n: Node, x, y: int) =
 63 |     for k in n.kids:
 64 |       render(k.n, x + k.dx, y + k.dy)
 65 |     for s in n.syms:
 66 |       let sx = x+s.x
 67 |       let sy = y+s.y - y0
 68 |       grid[sy][sx] = s.c
 69 |   render(n, 0, 0)
 70 |       
 71 |   when defined(windows) or defined(js):
 72 |     for line in grid:
 73 |       for cell in line:
 74 |         result.add ($cell.r).multiReplace(asciiTable)
 75 |       result.add "\r\n"
 76 |   else:
 77 |     var fg = fgLine
 78 |     for line in grid:
 79 |       for cell in line:
 80 |         if fg != cell.fg:
 81 |           fg = cell.fg
 82 |           result.add ansiForegroundColorCode(fg)
 83 |         result.add $cell.r
 84 |       result.add "\n"
 85 |     result.add ansiForegroundColorCode(fgLine)
 86 | 
 87 | proc poke(n: Node, fg: ForeGroundColor, cs: varArgs[tuple[x, y: int, s: string]]) =
 88 |   for c in cs:
 89 |     n.syms.add Sym(x: c.x, y: c.y, c: Char(r: c.s.runeAt(0), fg: fg))
 90 | 
 91 | proc pad(n: Node, left, right, top, bottom = 0): Node = 
 92 |   result = Node(w: n.w + left + right, y0: n.y0 - top, y1: n.y1 + bottom)
 93 |   result.kids.add Kid(n: n, dx: left, dy: 0)
 94 |   for x in 0..<left:
 95 |     result.poke fgLine, (x, 0, "─")
 96 |   for x in n.w+left..<result.w:
 97 |     result.poke fgLine, (x, 0, "─")
 98 | 
 99 | proc wrap*(n: Node, name: string): Node =
100 |   let namer = (name & " ").toRunes()
101 |   let nl = namer.len()
102 |   result = n.pad(nl+2, 2)
103 |   result.poke fgLine, (nl+0, 0, "o"), (nl+1, 0, "─"), (result.w-2, 0, "─"), (result.w-1, 0, "o")
104 |   for i in 0..<nl:
105 |     result.poke fgName, (i, 0, $namer[i])
106 | 
107 | proc newNode(s: string, fg = fgLine): Node =
108 |   let rs = s.dumpSubject().toRunes()
109 |   let n = Node(w: rs.len)
110 |   for x in 0..<rs.len:
111 |     n.poke fg, (x, 0, $rs[x])
112 |   result = n.pad(1, 1)
113 | 
114 | proc newCapNode(n: Node, name = ""): Node =
115 |   result = pad(n, 2, 2)
116 |   result.y0 = n.y0 - 1
117 |   result.y1 = n.y1 + 1
118 |   let (x0, x1, y0, y1) = (1, result.w-2, result.y0, result.y1)
119 |   result.poke fgCap, (x0, y0, "╭"), (x1, y0, "╮"), (x0, y1, "╰"), (x1, y1, "╯")
120 |   for x in x0+1..x1-1:
121 |     result.poke fgCap, (x, y0, "╶"), (x, y1, "╶")
122 |   for y in y0+1..y1-1:
123 |     if y != 0:
124 |       result.poke fgCap, (x0, y, "┆"), (x1, y, "┆")
125 |   let namer = name.toRunes()
126 |   for i in 0..<namer.len:
127 |     result.poke fgCap, ((x1+x0-namer.len)/%2+i, y0, $namer[i])
128 | 
129 | proc newPrecNode(n: Node, prec: BiggestInt, lr: string): Node =
130 |   let l = lr & $prec & lr
131 |   result = pad(n, if l.len > n.w: l.len-n.w else: 0, 0, 1)
132 |   for i, c in l:
133 |     result.poke fgCap, (result.w/%2 - l.len/%2 + i, -1, $c)
134 | 
135 | proc `*`(n1, n2: Node): Node =
136 |   result = Node(w: n1.w + n2.w + 1, y0: min(n1.y0, n2.y0), y1: max(n1.y1, n2.y1))
137 |   result.poke fgGreen, (n1.w, 0, "»")
138 |   result.kids.add Kid(n: n1, dx: 0)
139 |   result.kids.add Kid(n: n2, dx: n1.w+1)
140 | 
141 | proc `?`(n: Node): Node =
142 |   result = n.pad(1, 1, 1, 0)
143 |   let (x1, x2, y1, y2) = (0, n.w+1, -1 + n.y0, 0)
144 |   result.poke fgLine, (x1, y1, "╭"), (x1, y2, "┴"), (x2, y1, "╮"), (x2, y2, "┴")
145 |   for x in x1+1..x2-1:
146 |     result.poke fgLine, (x, y1, "─")
147 |   for y in y1+1..y2-1:
148 |     result.poke fgLine, (x1, y, "│"), (x2, y, "│")
149 |   result.poke fgLine, ((x1+x2)/%2, y1, "»")
150 | 
151 | proc `+`(n: Node): Node =
152 |   result = n.pad(1, 1, 0, 1)
153 |   let (x1, x2, y1, y2) = (0, n.w+1, 0, n.y1+1)
154 |   result.poke fgLine, (x1, y1, "┬"), (x1, y2, "╰"), (x2, y1, "┬"), (x2, y2, "╯")
155 |   for x in x1+1..x2-1:
156 |     result.poke fgLine, (x, y2, "─")
157 |   for y in y1+1..y2-1:
158 |     result.poke fgLine, (x1, y, "│"), (x2, y, "│")
159 |   result.poke fgLine, ((x1+x2)/%2, y2, "«")
160 | 
161 | proc `!`(n: Node): Node =
162 |   result = n.pad(0, 0, 1)
163 |   let (x0, x1) = (1, result.w-2)
164 |   for x in x0..x1:
165 |     result.poke fgRed, (x, result.y0, "━")
166 | 
167 | proc `-`*(p1, p2: Node): Node =
168 |   return !p2 * p1
169 | 
170 | proc `*`(n: Node): Node = ? + n
171 | 
172 | proc `@`(n: Node): Node =
173 |   result = *(!n * newNode("1")) * n
174 | 
175 | proc `&`(n: Node): Node =
176 |   result = ! ! n
177 | 
178 | proc choice(ns: varArgs[Node]): Node =
179 |   var wmax = 0
180 |   for n in ns:
181 |     wmax = max(wmax, n.w)
182 |   var dys = @[0]
183 |   var dy = 0
184 |   for i in 0..<ns.len-1:
185 |     inc dy, ns[i].y1 - ns[i+1].y0 + 1
186 |     dys.add dy
187 |   result = Node(w: wmax+4, y0: ns[0].y0, y1: dy+ns[ns.high].y1)
188 |   let x0 = 1
189 |   let x1 = wmax+2
190 |   result.poke fgLine, (0, 0, "─"), (result.w-1, 0, "─")
191 |   for i in 0..<ns.len:
192 |     let n = ns[i]
193 |     result.kids.add Kid(n: n.pad(0, wmax-n.w), dx: 2, dy: dys[i])
194 |   for y in 1..<dys[dys.high]:
195 |     result.poke fgLine, (x0, y, "│"), (x1, y, "│")
196 |   result.poke fgLine, (x0, 0, "┬"), (x1, 0, "┬")
197 |   for i in 0..<ns.len-1:
198 |     if i > 0:
199 |       result.poke fgLine, (x0, dys[i], "├"), (x1, dys[i], "┤")
200 |   result.poke fgLine, (x0, dys[dys.high], "╰"), (x1, dys[dys.high], "╯")
201 | 
202 | proc `{}`*(p: Node, n: BiggestInt): Node =
203 |   result = p
204 |   for i in 1..<n:
205 |     result = result * p
206 | 
207 | proc `{}`*(p: Node, range: HSlice[system.BiggestInt, system.BiggestInt]): Node =
208 |   result = p{range.a}
209 |   for i in range.a..<range.b:
210 |     result = result * ?p
211 | 
212 | # This is a simplified parser based on parsePatt(), but lacking any error
213 | # checking. This will always run after parsePatt(), so any errors would already
214 | # have been caught there
215 | 
216 | proc parseRailRoad*(nn: NimNode, grammar: Grammar): Node =
217 | 
218 |   proc aux(n: NimNode): Node =
219 | 
220 |     proc applyTemplate(name: string, arg: NimNode): NimNode =
221 |       let t = if name in grammar.templates:
222 |         grammar.templates[name]
223 |       else:
224 |         libImportTemplate(name)
225 |       if t != nil:
226 |         proc aux(n: NimNode): NimNode =
227 |           if n.kind == nnkIdent and n.strVal in t.args:
228 |             result = arg[ find(t.args, n.strVal)+1 ]
229 |           else:
230 |             result = copyNimNode(n)
231 |             for nc in n:
232 |               result.add aux(nc)
233 |         result = aux(t.code).flattenChoice()
234 | 
235 |     case n.kind:
236 | 
237 |       of nnKPar:
238 |         result = aux n[0]
239 | 
240 |       of nnkIntLit:
241 |         result = newNode($n.intVal, fgLit)
242 | 
243 |       of nnkStrLit:
244 |         result = newNode("\"" & $n.strval.dumpSubject() & "\"", fgLit)
245 | 
246 |       of nnkCharLit:
247 |         result = newNode("'" & $n.intVal.char & "'", fgLit)
248 | 
249 |       of nnkCall:
250 |         var name: string
251 |         if n[0].kind == nnkIdent:
252 |           name = n[0].strVal
253 |         elif n[0].kind == nnkDotExpr:
254 |           name = n[0].repr
255 |         let n2 = applyTemplate(name, n)
256 |         if n2 != nil:
257 |           result = aux n2
258 |         elif name == "choice":
259 |           result = choice(n[1..^1].map(aux))
260 |         elif n.len == 2:
261 |           result = newCapNode aux(n[1])
262 |         elif n.len == 3:
263 |           result = newCapNode(aux(n[2]), n[1].strVal)
264 | 
265 |       of nnkPrefix:
266 |         # Nim combines all prefix chars into one string. Handle prefixes
267 |         # chars right to left
268 |         let cs = n[0].strVal
269 |         var p = aux n[1]
270 |         for i in 1..cs.len:
271 |           case cs[cs.len-i]:
272 |             of '?': p = ?p
273 |             of '+': p = +p
274 |             of '*': p = *p
275 |             of '!': p = !p
276 |             of '@': p = @p
277 |             of '&': p = &p
278 |             of '>': p = newCapNode(p)
279 |             else: p = p
280 |         result = p
281 | 
282 |       of nnkInfix:
283 |         case n[0].strVal:
284 |           of "*", "∙": result = aux(n[1]) * aux(n[2])
285 |           of "-": result = aux(n[1]) - aux(n[2])
286 |           of "^": result = newPrecNode(aux(n[1]), intVal(n[2]), "<")
287 |           of "^^": result = newPrecNode(aux(n[1]), intVal(n[2]), ">")
288 |           else: discard
289 | 
290 |       of nnkBracketExpr:
291 |         let p = aux(n[0])
292 |         if n[1].kind == nnkIntLit:
293 |           result = p{n[1].intVal}
294 |         elif n[1].kind == nnkInfix and n[1][0].eqIdent(".."):
295 |           result = p{n[1][1].intVal..n[1][2].intVal}
296 |         else: discard
297 | 
298 |       of nnkIdent:
299 |         result = newNode("[" & n.strVal & "]", fgNonterm)
300 | 
301 |       of nnkDotExpr:
302 |         result = newNode("[" & n.repr & "]", fgNonterm)
303 | 
304 |       of nnkCurly:
305 |         var cs: CharSet
306 |         for nc in n:
307 |           if nc.kind == nnkCharLit:
308 |             cs.incl nc.intVal.char
309 |           elif nc.kind == nnkInfix:
310 |             if nc[0].kind == nnkIdent and nc[0].eqIdent(".."):
311 |               for c in nc[1].intVal..nc[2].intVal:
312 |                 cs.incl c.char
313 |         if cs.card == 0:
314 |           result = newNode("1", fgNonterm)
315 |         else:
316 |           result = newNode(dumpSet(cs), fgLit)
317 | 
318 |       of nnkCallStrLit:
319 |         case n[0].strVal:
320 |           of "i": result = newNode(n[1].strval)
321 |           of "E": result = newNode("ERROR", fgError)
322 | 
323 |       of nnkBracket:
324 |         result = newNode("[" & n[0].repr & "]", fgNonterm)
325 | 
326 |       else:
327 |         discard
328 | 
329 |   let nnf = nn.flattenChoice
330 |   result = aux(nnf)
331 | 
332 | 
333 | 


--------------------------------------------------------------------------------
/src/npeg/stack.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | # This module implements a basic stack[T]. This is used instead of seq[T]
 3 | # because the latter has bad performance when unwinding more then one frame at
 4 | # a time (ie, setlen). These stacks keep track of their own top and do not
 5 | # shrink the underlying seq when popping or unwinding.
 6 | 
 7 | type
 8 |   Stack*[T] = object
 9 |     name: string
10 |     top*: int
11 |     max: int
12 |     frames: seq[T]
13 | 
14 | 
15 | proc `$`*[T](s: Stack[T]): string =
16 |   for i in 0..<s.top:
17 |     result.add $i & ": " & $s.frames[i] & "\n"
18 | 
19 | proc initStack*[T](name: string, len: int, max: int=int.high): Stack[T] =
20 |   result.name = name
21 |   result.frames.setLen len
22 |   result.max = max
23 | 
24 | proc grow*[T](s: var Stack[T]) =
25 |   if s.top >= s.max:
26 |     mixin NPegStackOverflowError
27 |     raise newException(NPegStackOverflowError, s.name & " stack overflow, depth>" & $s.max)
28 |   s.frames.setLen s.frames.len * 2
29 | 
30 | template push*[T](s: var Stack[T], frame: T) =
31 |   if s.top >= s.frames.len: grow(s)
32 |   s.frames[s.top] = frame
33 |   inc s.top
34 | 
35 | template pop*[T](s: var Stack[T]): T =
36 |   assert s.top > 0
37 |   dec s.top
38 |   s.frames[s.top]
39 | 
40 | template peek*[T](s: Stack[T]): T =
41 |   assert s.top > 0
42 |   s.frames[s.top-1]
43 | 
44 | template `[]`*[T](s: Stack[T], idx: int): T =
45 |   assert idx < s.top
46 |   s.frames[idx]
47 | 
48 | template update*[T](s: Stack[T], field: untyped, val: untyped) =
49 |   assert s.top > 0
50 |   s.frames[s.top-1].field = val
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/basics.nim:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import strutils
  3 | import npeg
  4 |   
  5 | {.push warning[Spacing]: off.}
  6 | 
  7 | 
  8 | suite "unit tests":
  9 | 
 10 |   test "atoms":
 11 |     doAssert     patt(0 * "a").match("a").ok
 12 |     doAssert     patt(1).match("a").ok
 13 |     doAssert     patt(1).match("a").ok
 14 |     doAssert     patt(2).match("a").ok == false
 15 |     doAssert     patt("a").match("a").ok
 16 |     doAssert     patt("a").match("b").ok == false
 17 |     doAssert     patt("abc").match("abc").ok
 18 |     doAssert     patt({'a'}).match("a").ok
 19 |     doAssert     patt({'a'}).match("b").ok == false
 20 |     doAssert     patt({'a','b'}).match("a").ok
 21 |     doAssert     patt({'a','b'}).match("b").ok
 22 |     doAssert     patt({'a','b'}).match("c").ok == false
 23 |     doAssert     patt({'a'..'c'}).match("a").ok
 24 |     doAssert     patt({'a'..'c'}).match("b").ok
 25 |     doAssert     patt({'a'..'c'}).match("c").ok
 26 |     doAssert     patt({'a'..'c'}).match("d").ok == false
 27 |     doAssert     patt({'a'..'c'}).match("a").ok
 28 |     doAssert     patt("").match("abcde").matchLen == 0
 29 |     doAssert     patt("a").match("abcde").matchLen == 1
 30 |     doAssert     patt("ab").match("abcde").matchLen == 2
 31 |     doAssert     patt(i"ab").match("AB").ok
 32 | 
 33 |   test "*: concatenation":
 34 |     doAssert     patt("a" * "b").match("ab").ok
 35 |     #doAssert     patt("a" ∙ "b").match("ab").ok
 36 | 
 37 |   test "?: zero or one":
 38 |     doAssert     patt("a" * ?"b" * "c").match("abc").ok
 39 |     doAssert     patt("a" * ?"b" * "c").match("ac").ok
 40 | 
 41 |   test "+: one or more":
 42 |     doAssert     patt("a" * +"b" * "c").match("abc").ok
 43 |     doAssert     patt("a" * +"b" * "c").match("abbc").ok
 44 |     doAssert     patt("a" * +"b" * "c").match("ac").ok == false
 45 | 
 46 |   test "*: zero or more":
 47 |     doAssert     patt(*'a').match("aaaa").ok
 48 |     doAssert     patt(*'a' * 'b').match("aaaab").ok
 49 |     doAssert     patt(*'a' * 'b').match("bbbbb").ok
 50 |     doAssert     patt(*'a' * 'b').match("caaab").ok == false
 51 |     doAssert     patt(+'a' * 'b').match("aaaab").ok
 52 |     doAssert     patt(+'a' * 'b').match("ab").ok
 53 |     doAssert     patt(+'a' * 'b').match("b").ok == false
 54 | 
 55 |   test "!: not predicate":
 56 |     doAssert     patt('a' * !'b').match("ac").ok
 57 |     doAssert     patt('a' * !'b').match("ab").ok == false
 58 | 
 59 |   test "&: and predicate":
 60 |     doAssert     patt(&"abc").match("abc").ok
 61 |     doAssert     patt(&"abc").match("abd").ok == false
 62 |     doAssert     patt(&"abc").match("abc").matchLen == 0
 63 | 
 64 |   test "@: search":
 65 |     doAssert     patt(@"fg").match("abcdefghijk").matchLen == 7
 66 | 
 67 |   test "[n]: count":
 68 |     doAssert     patt(1[3]).match("aaaa").ok
 69 |     doAssert     patt(1[4]).match("aaaa").ok
 70 |     doAssert     patt(1[5]).match("aaaa").ok == false
 71 | 
 72 |   test "[m..n]: count":
 73 |     doAssert     patt('a'[2..4] * !1).match("").ok == false
 74 |     doAssert     patt('a'[2..4] * !1).match("a").ok == false
 75 |     doAssert     patt('a'[2..4] * !1).match("aa").ok
 76 |     doAssert     patt('a'[2..4] * !1).match("aaa").ok
 77 |     doAssert     patt('a'[2..4] * !1).match("aaaa").ok
 78 |     doAssert     patt('a'[2..4] * !1).match("aaaaa").ok == false
 79 | 
 80 |     doAssert     patt('a'[0..1] * !1).match("").ok
 81 |     doAssert     patt('a'[0..1] * !1).match("a").ok
 82 |     doAssert     patt('a'[0..1] * !1).match("aa").ok == false
 83 | 
 84 |   test "|: ordered choice":
 85 |     doAssert     patt("ab" | "cd").match("ab").ok
 86 |     doAssert     patt("ab" | "cd").match("cd").ok
 87 |     doAssert     patt("ab" | "cd").match("ef").ok == false
 88 |     doAssert     patt(("ab" | "cd") | "ef").match("ab").ok == true
 89 |     doAssert     patt(("ab" | "cd") | "ef").match("cd").ok == true
 90 |     doAssert     patt(("ab" | "cd") | "ef").match("ef").ok == true
 91 |     doAssert     patt("ab" | ("cd") | "ef").match("ab").ok == true
 92 |     doAssert     patt("ab" | ("cd") | "ef").match("cd").ok == true
 93 |     doAssert     patt("ab" | ("cd") | "ef").match("ef").ok == true
 94 | 
 95 |   test "-: difference":
 96 |     doAssert     patt("abcd" - "abcdef").match("abcdefgh").ok == false
 97 |     doAssert     patt("abcd" - "abcdf").match("abcdefgh").ok
 98 | 
 99 |   test "Builtins":
100 |     doAssert     patt(Digit).match("1").ok
101 |     doAssert     patt(Digit).match("a").ok == false
102 |     doAssert     patt(Upper).match("A").ok
103 |     doAssert     patt(Upper).match("a").ok == false
104 |     doAssert     patt(Lower).match("a").ok
105 |     doAssert     patt(Lower).match("A").ok == false
106 |     doAssert     patt(+Digit).match("12345").ok
107 |     doAssert     patt(+Xdigit).match("deadbeef").ok
108 |     doAssert     patt(+Graph).match(" x").ok == false
109 | 
110 |   test "Misc combos":
111 |     doAssert     patt('a' | ('b' * 'c')).match("a").ok
112 |     doAssert     patt('a' | ('b' * 'c') | ('d' * 'e' * 'f')).match("a").ok
113 |     doAssert     patt('a' | ('b' * 'c') | ('d' * 'e' * 'f')).match("bc").ok
114 |     doAssert     patt('a' | ('b' * 'c') | ('d' * 'e' * 'f')).match("def").ok
115 | 
116 |   test "Compile time 1":
117 |     proc doTest(): string {.compileTime.} =
118 |       var n: string
119 |       let p = peg "number":
120 |         number <- >+Digit:
121 |           n = $1
122 |       doAssert p.match("12345").ok
123 |       return n
124 |     const v = doTest()
125 |     doAssert v == "12345"
126 | 
127 |   test "Compile time 2":
128 |     static:
129 |       var n: string
130 |       let p = peg "number":
131 |         number <- >+Digit:
132 |           n = $1
133 |       doAssert p.match("12345").ok
134 |       doAssert n == "12345"
135 | 
136 |   test "matchMax":
137 |     let s = peg "line":
138 |       line   <- one | two
139 |       one    <- +Digit * 'c' * 'd' * 'f'
140 |       two    <- +Digit * 'b'
141 |     let r = s.match("1234cde")
142 |     doAssert r.ok == false
143 |     doAssert r.matchLen == 4
144 |     doAssert r.matchMax == 6
145 | 
146 |   test "grammar1":
147 |     let a = peg "r1":
148 |       r1 <- "abc"
149 |       r2 <- r1 * r1
150 |     doAssert a.match("abcabc").ok
151 | 
152 |   test "grammar2":
153 |     let a = peg "r1":
154 |       r2 <- r1 * r1
155 |       r1 <- "abc"
156 |     doAssert a.match("abcabc").ok
157 |   
158 |   test "backref":
159 |     doAssert patt(R("sep", Alpha) * *(1 - R("sep")) * R("sep") * !1).match("abbbba").ok
160 |     doAssert patt(R("sep", Alpha) * *(1 - R("sep")) * R("sep") * !1).match("abbbbc").ok == false
161 | 
162 |   test "raise exception 1":
163 |     let a = patt E"boom"
164 |     expect NPegParseError:
165 |       doAssert a.match("abcabc").ok
166 | 
167 |   test "raise exception 2":
168 |     let a = patt 4 * E"boom"
169 |     try:
170 |       doAssert a.match("abcabc").ok
171 |     except NPegParseError as e:
172 |       doAssert e.matchLen == 4
173 |       doAssert e.matchMax == 4
174 | 
175 |   test "out of range capture exception 1":
176 |     expect NPegCaptureOutOfRangeError:
177 |       let a = patt 1:
178 |         echo capture[10].s
179 |       doAssert a.match("c").ok
180 | 
181 |   test "out of range capture exception 2":
182 |     expect NPegCaptureOutOfRangeError:
183 |       let a = patt 1:
184 |         echo $9
185 |       doAssert a.match("c").ok
186 | 
187 |   test "unknown backref error":
188 |     expect NPegUnknownBackrefError:
189 |       discard patt(R("sep", Alpha) * *(1 - R("sep")) * R("sap") * !1).match("abbbba")
190 | 
191 |   test "user validation":
192 |     let p = peg "line":
193 |       line <- uint8 * "," * uint8 * !1
194 |       uint8 <- >+Digit:
195 |         let v = parseInt($1)
196 |         validate(v>=0 and v<=255)
197 |     doAssert p.match("10,10").ok
198 |     doAssert p.match("0,255").ok
199 |     doAssert not p.match("10,300").ok
200 |     doAssert not p.match("300,10").ok
201 | 
202 |   test "user fail":
203 |     let p = peg "line":
204 |       line <- 1:
205 |         fail()
206 |     doAssert not p.match("a").ok
207 | 
208 |   test "templates":
209 |     let p = peg "a":
210 |       list(patt, sep) <- patt * *(sep * patt)
211 |       commaList(patt) <- list(patt, ",")
212 |       a <- commaList(>+Digit)
213 |     doAssert p.match("11,22,3").captures == ["11","22","3"]
214 | 
215 |   test "templates with choices":
216 |     let p = peg aap:
217 |       one() <- "one"
218 |       two() <- "one"
219 |       three() <- "flip" | "flap"
220 |       aap <- one() | two() | three()
221 |     doAssert p.match("onetwoflip").ok
222 | 
223 | 


--------------------------------------------------------------------------------
/tests/captures.nim:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import npeg
 3 | import strutils
 4 | import json
 5 |   
 6 | {.push warning[Spacing]: off.}
 7 | 
 8 | 
 9 | suite "captures":
10 | 
11 |   test "no captures":
12 |     doAssert    patt(1).match("a").captures == @[]
13 | 
14 |   test "string captures":
15 |     doAssert     patt(>1).match("ab").captures == @["a"]
16 |     doAssert     patt(>(>1)).match("ab").captures == @["a", "a"]
17 |     doAssert     patt(>1 * >1).match("ab").captures == @["a", "b"]
18 |     doAssert     patt(>(>1 * >1)).match("ab").captures == @["ab", "a", "b"]
19 |     doAssert     patt(>(>1 * >1)).match("ab").captures == @["ab", "a", "b"]
20 | 
21 |   test "code block captures":
22 |     let p = peg "foo":
23 |       foo <- >1:
24 |         doAssert $1 == "a"
25 |         doAssert @1 == 0
26 |     doAssert p.match("a").ok
27 | 
28 |   test "code block captures 2":
29 |     let p = peg("foo", v: string):
30 |       foo <- >1: v = $1
31 |     var a: string
32 |     doAssert p.match("a", a).ok
33 |     doAssert a == "a"
34 |   
35 |   test "code block captures 3":
36 |     var a: string
37 |     let p = patt >1:
38 |         a = $1
39 |     doAssert p.match("a").ok
40 |     doAssert a == "a"
41 |   
42 |   test "code block captures 4":
43 |     let p = peg "foo":
44 |       foo <- +Digit * >1:
45 |         doAssert $1 == "a"
46 |         doAssert @1 == 4
47 |     doAssert p.match("1234a").ok
48 | 
49 |   test "code block captures with typed parser":
50 | 
51 |     type Thing = object
52 |       word: string
53 |       number: int
54 | 
55 |     let s = peg("foo", t: Thing):
56 |       foo <- word * number
57 |       word <- >+Alpha:
58 |         t.word = $1
59 |       number <- >+Digit:
60 |         t.number = parseInt($1)
61 | 
62 |     var t = Thing()
63 |     doAssert s.match("foo123", t).ok == true
64 |     doAssert t.word == "foo"
65 |     doAssert t.number == 123
66 | 
67 |   when not defined(gcDestructors):
68 |     test "Capture out of range":
69 |       expect NPegException:
70 |         let p = peg "l":
71 |           l <- 1: echo $1
72 |         discard p.match("a")
73 | 
74 |   test "push":
75 |     let p = peg "m":
76 |       m <- >n * '+' * >n:
77 |         push $(parseInt($1) + parseInt($2))
78 |       n <- +Digit
79 |     let r = p.match("12+34")
80 |     doAssert r.captures()[0] == "46"
81 |   
82 |   test "nested":
83 |     doAssert patt(>(>1 * >1)).match("ab").captures == @["ab", "a", "b"]
84 | 
85 |   test "nested codeblock":
86 |     let p = peg foo:
87 |       foo <- >(>1 * b)
88 |       b <- >1: push $1
89 |     doAssert p.match("ab").captures() == @["ab", "a", "b"]
90 | 
91 |   test "clyybber":
92 |     let p = peg "m":
93 |       m <- n * '+' * n:
94 |         push $(parseInt($1) + parseInt($2))
95 |       >n <- +Digit
96 |     let r = p.match("12+34")
97 |     doAssert r.captures()[0] == "46"
98 | 


--------------------------------------------------------------------------------
/tests/config.nims:
--------------------------------------------------------------------------------
1 | switch("path", "$projectDir/../src")
2 | switch("hints", "off")
3 | 


--------------------------------------------------------------------------------
/tests/examples.nim:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import npeg
  3 | import json
  4 | import strutils
  5 | import math
  6 | import tables
  7 | import npeg/lib/uri
  8 | 
  9 | {.push warning[Spacing]: off.}
 10 | 
 11 | 
 12 | suite "examples":
 13 | 
 14 |   ######################################################################
 15 | 
 16 |   test "misc":
 17 | 
 18 |     let p1 = patt +{'a'..'z'}
 19 |     doAssert p1.match("lowercaseword").ok
 20 | 
 21 |     let p2 = peg "ident":
 22 |       lower <- {'a'..'z'}
 23 |       ident <- +lower
 24 |     doAssert p2.match("lowercaseword").ok
 25 | 
 26 |   ######################################################################
 27 | 
 28 |   test "shadowing":
 29 |     
 30 |     let parser = peg "line":
 31 |       line <- uri.URI
 32 |       uri.scheme <- >uri.scheme
 33 |       uri.host <- >uri.host
 34 |       uri.port <- >+Digit
 35 |       uri.path <- >uri.path
 36 |     
 37 |     let r = parser.match("http://nim-lang.org:8080/one/two/three")
 38 |     doAssert r.captures == @["http", "nim-lang.org", "8080", "/one/two/three"]
 39 | 
 40 |   ######################################################################
 41 | 
 42 |   test "matchFile":
 43 | 
 44 |     when defined(windows) or defined(posix):
 45 | 
 46 |       let parser = peg "pairs":
 47 |         pairs <- pair * *(',' * pair)
 48 |         word <- +Alnum
 49 |         number <- +Digit
 50 |         pair <- (>word * '=' * >number)
 51 | 
 52 |       let r = parser.matchFile "tests/testdata"
 53 |       doAssert r.ok
 54 |       doAssert r.captures == @["one", "1", "two", "2", "three", "3", "four", "4"]
 55 | 
 56 |   ######################################################################
 57 | 
 58 |   test "JSON parser":
 59 | 
 60 |     let json = """
 61 |       {
 62 |           "glossary": {
 63 |               "title": "example glossary",
 64 |               "GlossDiv": {
 65 |                   "title": "S",
 66 |                   "GlossList": {
 67 |                       "GlossEntry": {
 68 |                           "ID": "SGML",
 69 |                               "SortAs": "SGML",
 70 |                               "GlossTerm": "Standard Generalized Markup Language",
 71 |                               "Acronym": "SGML",
 72 |                               "Abbrev": "ISO 8879:1986",
 73 |                               "GlossDef": {
 74 |                               "para": "A meta-markup language, used to create markup languages such as DocBook.",
 75 |                               "GlossSeeAlso": ["GML", "XML"]
 76 |                           },
 77 |                           "GlossSee": "markup"
 78 |                       }
 79 |                   }
 80 |               }
 81 |           }
 82 |       }
 83 |       """
 84 | 
 85 |     let s = peg "doc":
 86 |       S              <- *Space
 87 |       jtrue          <- "true"
 88 |       jfalse         <- "false"
 89 |       jnull          <- "null"
 90 | 
 91 |       unicodeEscape  <- 'u' * Xdigit[4]
 92 |       escape         <- '\\' * ({ '{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't' } | unicodeEscape)
 93 |       stringBody     <- ?escape * *( +( {'\x20'..'\xff'} - {'"'} - {'\\'}) * *escape) 
 94 |       jstring        <- ?S * '"' * stringBody * '"' * ?S
 95 | 
 96 |       minus          <- '-'
 97 |       intPart        <- '0' | (Digit-'0') * *Digit
 98 |       fractPart      <- "." * +Digit
 99 |       expPart        <- ( 'e' | 'E' ) * ?( '+' | '-' ) * +Digit
100 |       jnumber        <- ?minus * intPart * ?fractPart * ?expPart
101 | 
102 |       doc            <- JSON * !1
103 |       JSON           <- ?S * ( jnumber | jobject | jarray | jstring | jtrue | jfalse | jnull ) * ?S
104 |       jobject        <- '{' * ( jstring * ":" * JSON * *( "," * jstring * ":" * JSON ) | ?S ) * "}"
105 |       jarray         <- "[" * ( JSON * *( "," * JSON ) | ?S ) * "]"
106 | 
107 |     doAssert s.match(json).ok
108 | 
109 |   ######################################################################
110 | 
111 |   test "HTTP with action captures to Nim object":
112 | 
113 |     type
114 |       Request = object
115 |         proto: string
116 |         version: string
117 |         code: int
118 |         message: string
119 |         headers: Table[string, string]
120 | 
121 |     let s = peg("http", userdata: Request):
122 |       space       <- ' '
123 |       crlf        <- '\n' * ?'\r'
124 |       url         <- +(Alpha | Digit | '/' | '_' | '.')
125 |       eof         <- !1
126 |       header_name <- +(Alpha | '-')
127 |       header_val  <- +(1-{'\n'}-{'\r'})
128 |       proto       <- >(+Alpha):
129 |         userdata.proto = $1
130 |       version     <- >(+Digit * '.' * +Digit):
131 |         userdata.version = $1
132 |       code        <- >+Digit:
133 |         userdata.code = parseInt($1)
134 |       msg         <- >(+(1 - '\r' - '\n')):
135 |         userdata.message = $1
136 |       header      <- >header_name * ": " * >header_val:
137 |         userdata.headers[$1] = $2
138 | 
139 |       response    <- proto * '/' * version * space * code * space * msg 
140 |       headers     <- *(header * crlf)
141 |       http        <- response * crlf * headers * eof
142 | 
143 |     let data = """
144 | HTTP/1.1 301 Moved Permanently
145 | Content-Length: 162
146 | Content-Type: text/html
147 | Location: https://nim.org/
148 | """
149 | 
150 |     var req: Request
151 |     let res = s.match(data, req)
152 |     doAssert res.ok
153 |     doAssert req.proto == "HTTP"
154 |     doAssert req.version == "1.1"
155 |     doAssert req.code == 301
156 |     doAssert req.message == "Moved Permanently"
157 |     doAssert req.headers["Content-Length"] == "162"
158 |     doAssert req.headers["Content-Type"] == "text/html"
159 |     doAssert req.headers["Location"] == "https://nim.org/"
160 | 
161 |   ######################################################################
162 | 
163 |   test "UTF-8":
164 | 
165 |     let b = "  añyóng  ♜♞♝♛♚♝♞♜ оживлённым   "
166 | 
167 |     let m = peg "s":
168 | 
169 |       cont <- {128..191}
170 | 
171 |       utf8 <- {0..127} |
172 |               {194..223} * cont[1] |
173 |               {224..239} * cont[2] |
174 |               {240..244} * cont[3]
175 | 
176 |       s <- *(@ > +(utf8-' '))
177 | 
178 |     let r = m.match(b)
179 |     doAssert r.ok
180 |     let c = r.captures
181 |     doAssert c == @["añyóng", "♜♞♝♛♚♝♞♜", "оживлённым"]
182 | 
183 |   ######################################################################
184 | 
185 |   test "Back references":
186 | 
187 |     let p = peg "doc":
188 |       S <- *Space
189 |       doc <- +word * "<<" * R("sep", sep) * S * >heredoc * R("sep") * S * +word
190 |       word <- +Alpha * S
191 |       sep <- +Alpha
192 |       heredoc <- +(1 - R("sep"))
193 | 
194 |     let d = """This is a <<EOT here document
195 |     with multiple lines EOT end"""
196 | 
197 |     let r = p.match(d)
198 |     doAssert r.ok
199 |     doAssert r.captures[0] == "here document\n    with multiple lines "
200 | 
201 |   ######################################################################
202 |   
203 |   test "RFC3986: Uniform Resource Identifier (URI): Generic Syntax":
204 | 
205 |     type Uri = object
206 |       scheme: string
207 |       userinfo: string
208 |       host: string
209 |       path: string
210 |       port: string
211 |       query: string
212 |       fragment: string
213 | 
214 |     # The grammar below is a literal translation of the ABNF notation of the
215 |     # RFC. Optimizations can be made to limit backtracking, but this is a nice
216 |     # example how to create a parser from a RFC protocol description.
217 | 
218 |     let p = peg("URI", userdata: Uri):
219 | 
220 |       URI <- scheme * ":" * hier_part * ?( "?" * query) * ?( "#" * fragment) * !1
221 | 
222 |       hier_part <- "//" * authority * path_abempty |
223 |                    path_absolute |
224 |                    path_rootless |
225 |                    path_empty
226 | 
227 |       URI_reference <- uri | relative_ref
228 | 
229 |       absolute_uri <- scheme * ":" * hier_part * ?( "?" * query)
230 | 
231 |       relative_ref <- relative_part * ?( "?" * query) * ?( "#" * fragment)
232 | 
233 |       relative_part <- "//" * authority * path_abempty |
234 |                        path_absolute |
235 |                        path_noscheme |
236 |                        path_empty
237 | 
238 |       scheme <- >(Alpha * *( Alpha | Digit | "+" | "-" | "." )): userdata.scheme = $1
239 | 
240 |       authority <- ?(userinfo * "@") * host * ?( ":" * port)
241 |       userinfo <- >*(unreserved | pct_encoded | sub_delims | ":"):
242 |         userdata.userinfo = $1
243 | 
244 |       host <- >(IP_literal | IPv4address | reg_name): userdata.host = $1
245 |       port <- >*Digit: userdata.port = $1
246 | 
247 |       IP_literal <- "[" * (IPv6address | IPvFuture) * "]"
248 | 
249 |       IPvFuture <- "v" * +Xdigit * "." * +(unreserved | sub_delims | ":")
250 | 
251 |       IPv6address <-                                     (h16 * ":")[6] * ls32 |
252 |                                                   "::" * (h16 * ":")[5] * ls32 |
253 |                    ?( h16                     ) * "::" * (h16 * ":")[4] * ls32 |
254 |                    ?( h16 * (":" * h16)[0..1] ) * "::" * (h16 * ":")[3] * ls32 |
255 |                    ?( h16 * (":" * h16)[0..2] ) * "::" * (h16 * ":")[2] * ls32 |
256 |                    ?( h16 * (":" * h16)[0..3] ) * "::" * (h16 * ":")    * ls32 |
257 |                    ?( h16 * (":" * h16)[0..4] ) * "::" *                  ls32 |
258 |                    ?( h16 * (":" * h16)[0..5] ) * "::" *                  h16  |
259 |                    ?( h16 * (":" * h16)[0..6] ) * "::"
260 | 
261 |       h16 <- Xdigit[1..4]
262 |       ls32 <- (h16 * ":" * h16) | IPv4address
263 |       IPv4address <- dec_octet * "." * dec_octet * "." * dec_octet * "." * dec_octet
264 | 
265 |       dec_octet <- Digit                   | # 0-9
266 |                   {'1'..'9'} * Digit       | # 10-99
267 |                   "1" * Digit * Digit      | # 100-199
268 |                   "2" * {'0'..'4'} * Digit | # 200-249
269 |                   "25" * {'0'..'5'}          # 250-255
270 | 
271 |       reg_name <- *(unreserved | pct_encoded | sub_delims)
272 | 
273 |       path <- path_abempty  | # begins with "/" or is empty
274 |               path_absolute | # begins with "/" but not "//"
275 |               path_noscheme | # begins with a non-colon segment
276 |               path_rootless | # begins with a segment
277 |               path_empty      # zero characters
278 | 
279 |       path_abempty  <- >(*( "/" * segment )): userdata.path = $1
280 |       path_absolute <- >("/" * ?( segment_nz * *( "/" * segment ) )): userdata.path = $1
281 |       path_noscheme <- >(segment_nz_nc * *( "/" * segment )): userdata.path = $1
282 |       path_rootless <- >(segment_nz * *( "/" * segment )): userdata.path = $1
283 |       path_empty    <- 0
284 | 
285 |       segment       <- *pchar
286 |       segment_nz    <- +pchar
287 |       segment_nz_nc <- +( unreserved | pct_encoded | sub_delims | "@" )
288 |                     # non_zero_length segment without any colon ":"
289 | 
290 |       pchar         <- unreserved | pct_encoded | sub_delims | ":" | "@"
291 | 
292 |       query         <- >*( pchar | "|" | "?" ): userdata.query = $1
293 | 
294 |       fragment      <- >*( pchar | "|" | "?" ): userdata.fragment = $1
295 | 
296 |       pct_encoded   <- "%" * Xdigit * Xdigit
297 | 
298 |       unreserved    <- Alpha | Digit | "-" | "." | "_" | "~"
299 |       reserved      <- gen_delims | sub_delims
300 |       gen_delims    <- ":" | "|" | "?" | "#" | "[" | "]" | "@"
301 |       sub_delims    <- "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="
302 | 
303 |     let urls = @[
304 |       "s3://somebucket/somefile.txt",
305 |       "scheme://user:pass@xn--mgbh0fb.xn--kgbechtv",
306 |       "scheme://user:pass@host:81/path?query#fragment",
307 |       "ScheMe://user:pass@HoSt:81/path?query#fragment",
308 |       "scheme://HoSt:81/path?query#fragment",
309 |       "scheme://@HoSt:81/path?query#fragment",
310 |       "scheme://user:pass@host/path?query#fragment",
311 |       "scheme://user:pass@host:/path?query#fragment",
312 |       "scheme://host/path?query#fragment",
313 |       "scheme://10.0.0.2/p?q#f",
314 |       "scheme://[vAF.1::2::3]/p?q#f",
315 |       "scheme:path?query#fragment",
316 |       "scheme:///path?query#fragment",
317 |       "scheme://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]?query#fragment",
318 |       "scheme:path#fragment",
319 |       "scheme:path?#fragment",
320 |       "ldap://[2001:db8::7]/c=GB?objectClass?one",
321 |       "http://example.org/hello:12?foo=bar#test",
322 |       "android-app://org.wikipedia/http/en.m.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy",
323 |       "ftp://:/p?q#f",
324 |       "scheme://user:pass@host:000000000081/path?query#fragment",
325 |       "scheme://user:pass@host:81/path?query#fragment",
326 |       "ScheMe://user:pass@HoSt:81/path?query#fragment",
327 |       "scheme://HoSt:81/path?query#fragment",
328 |       "scheme://@HoSt:81/path?query#fragment",
329 |       "scheme://user:pass@host/path?query#fragment",
330 |       "scheme://user:pass@host:/path?query#fragment",
331 |       "scheme://user:pass@host/path?query#fragment",
332 |       "scheme://host/path?query#fragment",
333 |       "scheme://10.0.0.2/p?q#f",
334 |       "scheme:path?query#fragment",
335 |       "scheme:///path?query#fragment",
336 |       "scheme://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]?query#fragment",
337 |       "scheme:path#fragment",
338 |       "scheme:path?#fragment",
339 |       "tel:05000",
340 |       "scheme:path#",
341 |       "https://thephpleague.com./p?#f",
342 |       "http://a_.!~*\'(-)n0123Di%25%26:pass;:&=+$,word@www.zend.com",
343 |       "http://",
344 |       "http:::/path",
345 |       "ldap://[2001:db8::7]/c=GB?objectClass?one",
346 |       "http://example.org/hello:12?foo=bar#test",
347 |       "android-app://org.wikipedia/http/en.m.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy",
348 |       "scheme://user:pass@xn--mgbh0fb.xn--kgbechtv",
349 |       "http://download.linuxjournal.com/pdf/get-doc.php?code=2c230d54e20e7cb595c660da48be7622&tcode=epub-301-"
350 |     ]
351 | 
352 |     for s in urls:
353 |       var uri: Uri
354 |       let r = p.match(s, uri)
355 |       if not r.ok:
356 |         echo s
357 |         quit 1
358 | 


--------------------------------------------------------------------------------
/tests/json-32M.bzip2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/tests/json-32M.bzip2


--------------------------------------------------------------------------------
/tests/lexparse.nim:
--------------------------------------------------------------------------------
 1 | import npeg, strutils, sequtils, unittest
 2 | 
 3 | type
 4 | 
 5 |   Token* = enum
 6 |     tInt
 7 |     tAdd
 8 |     cAddExpr
 9 | 
10 |   Node = ref object
11 |     case kind: Token
12 |     of tInt:
13 |       intVal: int
14 |     of tAdd:
15 |       discard
16 |     of cAddExpr:
17 |       l, r: Node
18 | 
19 |   State = ref object
20 |     tokens: seq[Node]
21 |     stack: seq[Node]
22 | 
23 | # Npeg uses `==` to check if a subject matches a literal
24 | 
25 | proc `==`(n: Node, t: Token): bool = n.kind == t
26 | 
27 | proc `$`(n: Node): string =
28 |   case n.kind
29 |     of tInt: return $n.intVal
30 |     of tAdd: return "+"
31 |     of cAddExpr: return "(" & $n.l & " + " & $n.r & ")"
32 | 
33 | let lexer = peg(tokens, st: State):
34 |   s      <- *Space
35 |   tokens <- s * *(token * s)
36 |   token  <- int | add
37 |   int    <- +Digit:
38 |     st.tokens.add Node(kind: tInt, intVal: parseInt($0))
39 |   add    <- '+':
40 |     st.tokens.add Node(kind: tAdd)
41 | 
42 | let parser = peg(g, Node, st: State):
43 |   g   <- int * *add * !1
44 |   int <- [tInt]:
45 |     st.stack.add $0
46 |   add <- [tAdd] * int:
47 |     st.stack.add Node(kind: cAddExpr, r: st.stack.pop, l: st.stack.pop)
48 | 
49 | suite "lexer/parser":
50 | 
51 |   test "run":
52 | 
53 |     var st = State()
54 |     doAssert lexer.match("1 + 2 + 3", st).ok
55 |     doAssert parser.match(st.tokens, st).ok
56 |     doAssert $st.stack[0] == "((1 + 2) + 3)"
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/lib.nim:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import strutils
 3 | import unicode
 4 | import npeg
 5 | import npeg/lib/types
 6 | import npeg/lib/utf8
 7 | 
 8 | {.push warning[Spacing]: off.}
 9 | 
10 | 
11 | suite "unit tests":
12 | 
13 |   test "types":
14 |     doAssert     patt(types.uint8).match("0").ok
15 |     doAssert     patt(types.uint8).match("255").ok
16 |     doAssert not patt(types.uint8).match("256").ok
17 | 
18 |     doAssert     patt(types.int8).match("-128").ok
19 |     doAssert     patt(types.int8).match("127").ok
20 |     doAssert not patt(types.int8).match("-129").ok
21 |     doAssert not patt(types.int8).match("128").ok
22 |     
23 |     when defined(cpu64):
24 |       doAssert     patt(types.uint32).match("4294967295").ok
25 |       doAssert not patt(types.uint32).match("4294967296").ok
26 | 
27 | 
28 |   test "utf8 runes":
29 |     doAssert     patt(utf8.any[4] * !1).match("abcd").ok
30 |     doAssert     patt(utf8.any[4] * !1).match("ａｂｃｄ").ok
31 |     doAssert     patt(utf8.any[4] * !1).match("всех").ok
32 |     doAssert     patt(utf8.any[4] * !1).match("乪乫乬乭").ok
33 | 
34 |   test "utf8 character classes":
35 |     doAssert     patt(utf8.upper).match("Ɵ").ok
36 |     doAssert not patt(utf8.upper).match("ë").ok
37 |     doAssert not patt(utf8.lower).match("Ɵ").ok
38 |     doAssert     patt(utf8.lower).match("ë").ok
39 | 


--------------------------------------------------------------------------------
/tests/nimversion.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | import strutils
 3 | import npeg
 4 | 
 5 | type
 6 |   NimType = enum Nim, NimSkull
 7 | 
 8 |   Version = object
 9 |     maj, min, rev: int
10 |     extra: string
11 | 
12 |   NimVersion = object
13 |     typ: NimType
14 |     version: Version
15 |     os: string
16 |     cpu: string
17 |     date: string
18 |     git: string
19 |     boot_switches: seq[string]
20 | 
21 | 
22 | let p = peg("nimversion", nv: NimVersion):
23 | 
24 |   S <- *{' ','\t','\n','\r'}
25 |   nimversion <- oldnim_version | nimskull_version
26 | 
27 |   oldnim_version <- header * S *
28 |                     "Compiled at " * date * S *
29 |                     "Copyright (c) " * +Graph * " by Andreas Rumpf" * S *
30 |                     "git hash:" * S * git * S * 
31 |                     "active boot switches:" * S * boot_switches
32 | 
33 |   nimskull_version <- header * S *
34 |                       "Source hash: " * git * S *
35 |                       "Source date: " * date
36 | 
37 |   header <- typ * S * "Compiler Version" * S * version * S * "[" * os * ":" * S * cpu * "]" * S
38 | 
39 |   typ <- typ_nimskull | typ_nim
40 |   typ_nim <- "Nim": nv.typ = NimType.Nim
41 |   typ_nimskull <- "Nimskull": nv.typ = NimType.NimSkull
42 | 
43 |   int <- +{'0'..'9'}
44 |   os <- >+Alnum: nv.os = $1
45 |   cpu <- >+Alnum: nv.cpu = $1
46 |   git <- >+{'0'..'9','a'..'f'}: nv.git = $1
47 |   boot_switches <- *(boot_switch * S)
48 |   boot_switch <- >+Graph: nv.boot_switches.add($1)
49 |   date <- >+{'0'..'9','-'}: nv.date = $1
50 |   version <- >int * "." * >int * "." * >int * ?"-" * >*Graph:
51 |     nv.version.maj = parseInt($1)
52 |     nv.version.min = parseInt($2)
53 |     nv.version.rev = parseInt($3)
54 |     nv.version.extra = $4
55 | 
56 | 
57 | let vnim = """Nim Compiler Version 2.1.1 [Linux: amd64]
58 | Compiled at 2024-03-01
59 | Copyright (c) 2006-2024 by Andreas Rumpf
60 | 
61 | git hash: 1e7ca2dc789eafccdb44304f7e42206c3702fc13
62 | active boot switches: -d:release -d:danger
63 | """
64 | 
65 | let vskull = """Nimskull Compiler Version 0.1.0-dev.21234 [linux: amd64]
66 | 
67 | Source hash: 4948ae809f7d84ef6d765111a7cd0c7cf2ae77d2
68 | Source date: 2024-02-18
69 | """
70 | 
71 | var nv: NimVersion
72 | 
73 | block:
74 |   let r = p.match(vnim, nv)
75 |   if r.ok:
76 |     echo nv.repr
77 | 
78 | block:
79 |   let r = p.match(vskull, nv)
80 |   if r.ok:
81 |     echo nv.repr
82 | 
83 | 


--------------------------------------------------------------------------------
/tests/performance.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | import npeg
  3 | import os
  4 | import streams
  5 | import strutils
  6 | import tables
  7 | import json
  8 | import times
  9 | #import packedjson
 10 | import osproc
 11 | 
 12 | let js = execProcess("bzip2 -d < tests/json-32M.bzip2").string
 13 | 
 14 | let hostname = readFile("/etc/hostname").strip()
 15 | 
 16 | let expectTime = {
 17 |   "platdoos": { 
 18 |     "json": 0.651,
 19 |     "parsejson": 3.962,
 20 |     "words": 0.920,
 21 |     "search": 0.057,
 22 |     "search1": 0.231,
 23 |     "search2": 1.419,
 24 |     "search3": 0.292,
 25 |   }.toTable(),
 26 |   "fe2": { 
 27 |     "json": 3.975,
 28 |     "parsejson": 8.739,
 29 |     "words": 2.391,
 30 |     "search": 0.373,
 31 |     "search1": 2.014,
 32 |     "search2": 2.871,
 33 |     "search3": 0.771,
 34 |   }.toTable(),
 35 | }.toTable()
 36 | 
 37 | 
 38 | # Wake up the governor a bit
 39 | 
 40 | var v = 0
 41 | for i in 1..100000:
 42 |   for j in 1..1000000:
 43 |     inc v
 44 | 
 45 | 
 46 | template measureTime*(what: string, code: untyped) =
 47 | 
 48 |   var expect = 0.0
 49 |   if hostname in expectTime:
 50 |     if what in expectTime[hostname]:
 51 |       expect = expectTime[hostname][what]
 52 | 
 53 |   let start = cpuTime()
 54 |   block:
 55 |     code
 56 |   let duration = cpuTime() - start
 57 |   let perc = 100.0 * duration / expect
 58 |   echo what & ": ", duration.formatFloat(ffDecimal, 3), "s ", perc.formatFloat(ffDecimal, 1), "%"
 59 | 
 60 | 
 61 | measureTime "json":
 62 | 
 63 |   ## Json parsing with npeg
 64 | 
 65 |   let p = peg JSON:
 66 |     S              <- *{' ','\t','\r','\n'}
 67 |     True           <- "true"
 68 |     False          <- "false"
 69 |     Null           <- "null"
 70 | 
 71 |     UnicodeEscape  <- 'u' * Xdigit[4]
 72 |     Escape         <- '\\' * ({ '"', '\\', '/', 'b', 'f', 'n', 'r', 't' } | UnicodeEscape)
 73 |     StringBody     <- *Escape * *( +( {'\x20'..'\xff'} - {'"'} - {'\\'}) * *Escape) 
 74 |     String         <- '"' * StringBody * '"':
 75 |       discard
 76 | 
 77 |     Minus          <- '-'
 78 |     IntPart        <- '0' | {'1'..'9'} * *{'0'..'9'}
 79 |     FractPart      <- "." * +{'0'..'9'}
 80 |     ExpPart        <- ( 'e' | 'E' ) * ?( '+' | '-' ) * +{'0'..'9'}
 81 |     Number         <- ?Minus * IntPart * ?FractPart * ?ExpPart:
 82 |       discard
 83 | 
 84 |     DOC            <- Value * !1
 85 |     ObjPair        <- S * String * S * ":" * Value
 86 |     Object         <- '{' * ( ObjPair * *( "," * ObjPair ) | S ) * "}"
 87 |     Array          <- "[" * ( Value * *( "," * Value ) | S ) * "]"
 88 |     Value          <- S * ( Number | String | Object | Array | True | False | Null ) * S
 89 | 
 90 |     JSON           <- Value * !1
 91 | 
 92 |   for i in 1..10:
 93 |     doAssert p.match(js).ok
 94 | 
 95 | 
 96 | let s = newStringStream(js)
 97 | measureTime "parsejson":
 98 |   # JSon parsing with nims 'parsejson' module.
 99 |   for i in 1..10:
100 |     s.setPosition(0)
101 |     var p: JsonParser
102 |     open(p, s, "json")
103 |     while true:
104 |       p.next()
105 |       if p.kind == jsonError or p.kind == jsonEof:
106 |         break
107 | 
108 | 
109 | measureTime "words":
110 | 
111 |   var v = 0
112 |   let p = peg foo:
113 |     foo <- +word
114 |     word <- @>+Alpha:
115 |       inc v
116 |   discard p.match(js).ok
117 | 
118 | 
119 | measureTime "search":
120 |   # Search using built in search operator
121 |   var v = 0
122 |   let p = peg search:
123 |     search <- @"CALIFORNIA":
124 |       inc v
125 |   for i in 1..10:
126 |     discard p.match(js).ok
127 | 
128 | 
129 | measureTime "search1":
130 |   # Searches using tail recursion.
131 |   let p = peg SS:
132 |     SS <- +S
133 |     S <- "CALIFORNIA" | 1 * S
134 |   for i in 1..10:
135 |     discard p.match(js).ok
136 | 
137 | measureTime "search2":
138 |   # Searches using an explicit
139 |   let p = peg SS:
140 |     SS <- +S
141 |     S <- *( !"CALIFORNIA" * 1) * "CALIFORNIA"
142 |   for i in 1..10:
143 |     discard p.match(js).ok
144 | 
145 | measureTime "search3":
146 |    # using an optimization to skip false starts.
147 |   let p = peg SS:
148 |     SS <- +S
149 |     S <- "CALIFORNIA" | 1 * *(1-'C') * S
150 |   for i in 1..10:
151 |     discard p.match(js).ok
152 | 
153 | 


--------------------------------------------------------------------------------
/tests/precedence.nim:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import strutils
  3 | import math
  4 | import tables
  5 | import npeg
  6 | 
  7 | {.push warning[Spacing]: off.}
  8 | 
  9 | 
 10 | suite "precedence operator":
 11 | 
 12 |   # The PEG below implements a Pratt parser. The ^ and ^^ operators are used to
 13 |   # implement precedence climbing, this allows rules to be left recursive while
 14 |   # still avoiding unbound recursion.
 15 |   #
 16 |   # The parser local state `seq[int]` is used as a stack to store captures and
 17 |   # intermediate results while parsing, the end result of the expression will
 18 |   # be available in element 0 when the parser finishes
 19 | 
 20 |   test "expr evaluator":
 21 | 
 22 |     # Table of binary operators - this maps the operator string to a proc
 23 |     # performing the operation:
 24 | 
 25 |     template map(op: untyped): untyped = (proc(a, b: int): int = op(a, b))
 26 | 
 27 |     var binOps = {
 28 |       "+": map(`+`),
 29 |       "-": map(`-`),
 30 |       "*": map(`*`),
 31 |       "/": map(`/%`),
 32 |       "^": map(`^`),
 33 |     }.toTable()
 34 | 
 35 |     let p = peg(exp, st: seq[int]):
 36 | 
 37 |       S <- *Space
 38 | 
 39 |       # Capture a number and put it on the stack
 40 | 
 41 |       number <- >+Digit * S:
 42 |         st.add parseInt($1)
 43 | 
 44 |       # Reset the precedence level to 0 when parsing sub-expressions
 45 |       # in parentheses
 46 | 
 47 |       parenExp <- ( "(" * exp * ")" ) ^ 0
 48 | 
 49 |       # Unary minues: take last element of the stack, negate and push back
 50 | 
 51 |       uniMinus <- '-' * exp:
 52 |         st.add(-st.pop)
 53 | 
 54 |       # The prefix is a number, a sub expression in parentheses or the unary
 55 |       # `-` operator.
 56 | 
 57 |       prefix <- number | parenExp | uniMinus
 58 | 
 59 |       # Parse an infix operator. Bounded by the precedece operator that makes
 60 |       # sure `exp` is only parsed if the currrent precedence is lower then the
 61 |       # given precedence. Note that the power operator has right assosiativity.
 62 | 
 63 |       infix <- >{'+','-'}    * exp ^  1 |
 64 |                >{'*','/'}    * exp ^  2 |
 65 |                >{'^'}        * exp ^^ 3 :
 66 | 
 67 |         # Takes two results off the stack, applies the operator and push
 68 |         # back the result
 69 | 
 70 |         let (f2, f1) = (st.pop, st.pop)
 71 |         st.add binOps[$1](f1, f2)
 72 | 
 73 |       # An expression consists of a prefix followed by zero or more infix
 74 |       # operators
 75 | 
 76 |       exp <- S * prefix * *infix
 77 | 
 78 | 
 79 |     # Evaluate the given expression
 80 | 
 81 |     proc eval(expr: string): int =
 82 |       var st: seq[int]
 83 |       doAssert p.match(expr, st).ok
 84 |       st[0]
 85 | 
 86 | 
 87 |     # Test cases
 88 | 
 89 |     doAssert eval("2+1") == 2+1
 90 |     doAssert eval("(((2+(1))))") == 2+1
 91 |     doAssert eval("3+2") == 3+2
 92 | 
 93 |     doAssert eval("3+2+4") == 3+2+4
 94 |     doAssert eval("(3+2)+4") == 3+2+4
 95 |     doAssert eval("3+(2+4)") == 3+2+4
 96 |     doAssert eval("(3+2+4)") == 3+2+4
 97 | 
 98 |     doAssert eval("3*2*4") == 3*2*4
 99 |     doAssert eval("(3*2)*4") == 3*2*4
100 |     doAssert eval("3*(2*4)") == 3*2*4
101 |     doAssert eval("(3*2*4)") == 3*2*4
102 | 
103 |     doAssert eval("3-2-4") == 3-2-4
104 |     doAssert eval("(3-2)-4") == (3-2)-4
105 |     doAssert eval("3-(2-4)") == 3-(2-4)
106 |     doAssert eval("(3-2-4)") == 3-2-4
107 | 
108 |     doAssert eval("3/8/4") == 3/%8/%4
109 |     doAssert eval("(3/8)/4") == (3/%8)/%4
110 |     doAssert eval("3/(8/4)") == 3/%(8/%4)
111 |     doAssert eval("(3/8/4)") == 3/%8/%4
112 | 
113 |     doAssert eval("(3*8/4)") == 3*8/%4
114 |     doAssert eval("(3/8*4)") == 3/%8*4
115 |     doAssert eval("3*(8/4)") == 3*(8/%4)
116 | 
117 |     doAssert eval("4^3^2") == 4^3^2
118 |     doAssert eval("(4^3)^2") == (4^3)^2
119 |     doAssert eval("4^(3^2)") == 4^(3^2)
120 | 
121 | 


--------------------------------------------------------------------------------
/tests/testdata:
--------------------------------------------------------------------------------
1 | one=1,two=2,three=3,four=4
2 | 


--------------------------------------------------------------------------------
/tests/tests.nim:
--------------------------------------------------------------------------------
1 | include "basics.nim"
2 | include "examples.nim"
3 | include "captures.nim"
4 | include "precedence.nim"
5 | include "lib.nim"
6 | include "lexparse.nim"
7 | 
8 | 


--------------------------------------------------------------------------------