├── .github └── workflows │ ├── ci.yml │ └── contents.yml ├── Changelog.md ├── INTERNALS.md ├── LICENSE ├── README.md ├── config.nims ├── doc ├── README.md ├── example-graph.png ├── example-railroad.png ├── npeg.png ├── papers │ ├── 2008_A_Text_Pattern-Matching_Tool_based_on_Parsing_Expression_Grammars.pdf │ ├── 2008_Packrat parsers can support left recursion.pdf │ ├── 2008_slides-lpeg-workshop2008.pdf │ ├── 2009_A_Parsing_Machine_For_PEGs.pdf │ ├── 2010_Converting_regexes_to_Parsing_Expression_Grammars.pdf │ ├── 2010_Direct_left-recursive_parsing_expression_grammars.pdf │ ├── 2011_From_EBNF_to_PEG.pdf │ ├── 2011_From_Regular_Expressions_to_Parsing_Expression_Grammars.pdf │ ├── 2011_Parsing_Expression_Grammars_for_Structured_Data.pdf │ ├── 2013_Exception_Handling_for_Error_Reporting_in_Parsing_Expression_Grammars.pdf │ ├── 2014_Left_recursion_in_parsing_expression_grammars.pdf │ ├── 2018_An_efficient_parsing_machine_for_PEGs.pdf │ ├── 2021_Incremental_PEG_Parsing.pdf │ └── README.md └── syntax-diagram.png ├── misc ├── README ├── indent.nim ├── java.nim ├── mouse2npeg.nim └── rod.nim ├── npeg.nimble ├── src ├── npeg.nim └── npeg │ ├── capture.nim │ ├── codegen.nim │ ├── common.nim │ ├── dot.nim │ ├── grammar.nim │ ├── lib │ ├── core.nim │ ├── rfc3339.nim │ ├── types.nim │ ├── uri.nim │ └── utf8.nim │ ├── parsepatt.nim │ ├── patt.nim │ ├── railroad.nim │ └── stack.nim └── tests ├── basics.nim ├── captures.nim ├── config.nims ├── examples.nim ├── json-32M.bzip2 ├── lexparse.nim ├── lib.nim ├── nimversion.nim ├── performance.nim ├── precedence.nim ├── testdata └── tests.nim /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | pull_request: 5 | 6 | concurrency: 7 | group: ci-${{ github.ref }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | compiler: 16 | - name: nim 17 | version: devel 18 | - name: nim 19 | version: version-2-0 20 | - name: nimskull 21 | version: "0.1.0-dev.21405" 22 | - name: nimskull 23 | version: "*" 24 | 25 | include: 26 | - compiler: 27 | name: nim 28 | version: devel 29 | build_doc: true 30 | 31 | name: ${{ matrix.compiler.name }} ${{ matrix.compiler.version }} 32 | runs-on: ubuntu-latest 33 | 34 | defaults: 35 | run: 36 | shell: bash 37 | working-directory: npeg 38 | 39 | steps: 40 | - name: Checkout 41 | uses: actions/checkout@v4.1.1 42 | with: 43 | path: npeg 44 | 45 | - name: Setup Nim 46 | if: matrix.compiler.name == 'nim' 47 | uses: alaviss/setup-nim@0.1.1 48 | with: 49 | path: nim 50 | version: ${{ matrix.compiler.version }} 51 | 52 | - name: Setup nimskull 53 | id: nimskull 54 | if: matrix.compiler.name == 'nimskull' 55 | uses: nim-works/setup-nimskull@0.1.1 56 | with: 57 | nimskull-version: ${{ matrix.compiler.version }} 58 | 59 | - name: Run tests 60 | run: nim r --path:src tests/tests.nim 61 | 62 | - name: Build docs 63 | if: matrix.build_doc 64 | shell: bash 65 | run: | 66 | branch=$GITHUB_REF 67 | branch=${branch##*/} 68 | for i in src/npeg.nim src/npeg/*.nim; do 69 | nim doc --project --outdir:htmldocs \ 70 | --path:src \ 71 | "--git.url:https://github.com/$GITHUB_REPOSITORY" \ 72 | "--git.commit:$GITHUB_SHA" \ 73 | "--git.devel:$branch" \ 74 | "$i" 75 | done 76 | # Make npeg module the default page 77 | cp htmldocs/{npeg,index}.html 78 | 79 | - name: Upload GitHub Pages artifact 80 | if: matrix.build_doc 81 | uses: actions/upload-pages-artifact@v3.0.1 82 | with: 83 | path: npeg/htmldocs 84 | 85 | deploy: 86 | needs: 87 | - build 88 | if: github.ref == 'refs/heads/master' 89 | 90 | permissions: 91 | actions: read 92 | pages: write 93 | id-token: write 94 | 95 | environment: 96 | name: github-pages 97 | url: ${{ steps.deployment.outputs.page_url }} 98 | 99 | name: Deploy docs to GitHub Pages 100 | runs-on: ubuntu-latest 101 | steps: 102 | - name: Deploy page 103 | id: deployment 104 | uses: actions/deploy-pages@v4.0.4 105 | 106 | passed: 107 | needs: build 108 | if: failure() || cancelled() 109 | name: All tests passed 110 | 111 | runs-on: ubuntu-latest 112 | steps: 113 | - run: exit 1 114 | -------------------------------------------------------------------------------- /.github/workflows/contents.yml: -------------------------------------------------------------------------------- 1 | name: Make table of contents 2 | on: 3 | push: 4 | paths: 5 | - README.md 6 | branches: 7 | - '**' 8 | jobs: 9 | make: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - uses: thatrandomperson5/AutoMarkdownContents@v1.1.1 14 | with: 15 | file: README.md 16 | auto: true 17 | skip-first: true 18 | - name: Pull request 19 | uses: peter-evans/create-pull-request@v4 20 | with: 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | title: "Add md table of contents" 23 | commit-message: ":clipboard: Added markdown table of contents" 24 | body: | 25 | :clipboard: Added markdown table of contents 26 | base: ${{ github.head_ref }} # Creates pull request onto pull request or commit branch 27 | branch: actions/automd 28 | 29 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | 2 | 1.3.0 - 2024-08-22 3 | ================== 4 | 5 | - Added CI (thanks Leorize) 6 | - Performance optimization 7 | - Some small rafactorings 8 | 9 | 1.2.1 - 2023-03-04 10 | ================== 11 | 12 | - fixes for --styleCheck=usages 13 | 14 | 1.2.0 - 2023-01-17 15 | ================== 16 | 17 | - Split NPegException into more specific errors, updated documentation 18 | 19 | 1.1.2 - 2023-01-08 20 | ================== 21 | 22 | - Fixed compat with Nim 1.0.11 23 | 24 | 1.1.1 - 2023-01-08 25 | ================== 26 | 27 | - Disabled test with '∙' to avoid breaking older Nim 28 | 29 | 1.1.0 - 2023-01-08 30 | ================== 31 | 32 | - Added alternate `∙` concatenation operator 33 | - Fixed fixBareExceptionWarning in Nim devel 34 | - Added table of contents to README.md 35 | 36 | 1.0.1 - 2022-12-10 37 | ================== 38 | 39 | - Bugfix release, fixes "expression 'discard' has no type (or is ambiguous)" in 40 | rare cases 41 | 42 | 1.0.0 - 2022-11-27 43 | ================== 44 | 45 | - Improved stack trace handling 46 | - Fixed matchFile() for empty files 47 | 48 | 0.27.0 - 2022-11-06 49 | =================== 50 | 51 | - Augment the Nim stack trace with the NPeg return stack on exceptions 52 | - Documentation updates 53 | 54 | 0.26.0 - 2021-11-27 55 | =================== 56 | 57 | - Improved lineinfo in code blocks for better backtraces 58 | - Some documentation improvements 59 | 60 | 0.25.0 - 2021-09-11 61 | =================== 62 | 63 | - Omit the `.computedGoto.` in the inner parser loop for grammars with more 64 | then 10k instructions to work around the nim compiler limitation 65 | 66 | 0.24.1 - 2021-01-16 67 | =================== 68 | 69 | - Added mixin for 'repr' to allow clean tracing of user types 70 | 71 | 0.24.0 - 2020-11-20 72 | =================== 73 | 74 | - Added -d:npegGcsafe 75 | 76 | 0.23.2 - 2020-11-06 77 | =================== 78 | 79 | - Small improvement in npeg systax checking 80 | 81 | 0.23.0 - 2020-09-23 82 | =================== 83 | 84 | - Reinstated [] out of bound check for capturest 85 | - Dropped profiler support, the implementation was bad 86 | - Small documentation improvements 87 | - Added RFC3339 date parser to libs 88 | 89 | 0.22.2 - 2019-12-27 90 | =================== 91 | 92 | - Skip --gc:arc tests for nim <1.1 to fix Nim CI builds. 93 | 94 | 0.22.1 - 2019-12-27 95 | =================== 96 | 97 | - Bugfix in codegen causing problems with ^1 notation in code blocks. 98 | 99 | 0.22.0 - 2019-12-24 100 | =================== 101 | 102 | - Changed the parsing subject from `openArray[char]` to `openArray[T]` and 103 | added a 'literal' atom to the grammar. This allows NPeg to parse lists of 104 | any type, making it suitable for separate lexer and parser stages. See 105 | tests/lexparse.nim for a concise example. 106 | 107 | - Added `@` syntactic sugar to access the match offset inside code block 108 | captures. 109 | 110 | - Dropped Json and AST captures - no complains heard since deprecation, and it 111 | simplifies the code base to aid the development new features. 112 | 113 | 0.21.3 - 2019-12-06 114 | =================== 115 | 116 | - Fixed off-by-one error in range `P[m..n]` operator, which would also match 117 | `P` times `n+1` 118 | 119 | - Various documentation improvements 120 | 121 | 0.21.2 - 2019-11-26 122 | =================== 123 | 124 | - Fixed the way dollar captures are rewritten to avoid the name space clash 125 | which was introduced by Nim PR #12712. 126 | 127 | 0.21.1 - 2019-11-19 128 | =================== 129 | 130 | - Bugfix for templates generating ordered choices 131 | 132 | 0.21.0 - 2019-10-28 133 | =================== 134 | 135 | - anonymous `patt` patterns now also take a code block 136 | 137 | - deprecated AST and Json captures. AST captures are not flexible enough, and 138 | the functionality can be better implemented using code block captures and 139 | domain-specific AST object types. The Json captures were added in the early 140 | days of NPeg as a flexible way to store captures, but this does not mix well 141 | with custom captures and can not handle things like string unescaping. Both 142 | capture types were removed from the documentation and a .deprecated. pragma 143 | was added to the implementation. If you use Json or AST captures and think 144 | deprecation is a mistake, let me know. 145 | 146 | 0.20.0 - 2019-10-18 147 | =================== 148 | 149 | - Added precedence operators - this allows constructions of Pratt parsers with 150 | bounded left recursion and operator precedence. 151 | - Added run time profiler, enable with -d:npegProfile 152 | - Performance improvements 153 | 154 | 0.19.0 - 2019-10-11 155 | =================== 156 | 157 | - Significant performance improvements 158 | - Changed semantincs of code block captures: $0 now always captures the 159 | total subject captured in a rule. This is a minor API change that only 160 | affects code using the `capture[]` notation inside code blocks 161 | - Added fail() function to force a parser fail in a code block capture 162 | - Added push() function to allow code block captures to push captures 163 | back on the stack 164 | - Check for loops caused by repeat of empty strings at compile time 165 | 166 | 0.18.0 - 2019-09-26 167 | =================== 168 | 169 | - Runtime performance improvements 170 | 171 | 0.17.1 - 2019-09-19 172 | =================== 173 | 174 | - Bugfix release (removed lingering debug echo) 175 | 176 | 0.17.0 - 2019-09-17 177 | =================== 178 | 179 | - Various runtime and compiletime performance improvements 180 | 181 | 0.16.0 - 2019-09-08 182 | =================== 183 | 184 | - Templates can now also be used in libraries 185 | - Added railroad diagram generation with -d:npegGraph 186 | - Improved error reporting 187 | 188 | 0.15.0 - 2019-08-31 189 | =================== 190 | 191 | - Generic parser API changed: the peg() macro now explicity passes the 192 | userdata type and identifier. 193 | 194 | 0.14.1 - 2019-08-28 195 | =================== 196 | 197 | - Added templates / parameterised rules 198 | - Added custom match validation in code block capture 199 | - Added basic types, utf8 and uri libs 200 | - Added global pattern library support 201 | - Proc matchFile() now uses memfiles/mmap for zero copy parsers 202 | - Implemented method to pass user variable to code block captures 203 | - Added AST capture type for building simple abstract syntax trees 204 | - Added Jb() capture for Json booleans 205 | 206 | 0.13.0 - 2019-07-21 207 | =================== 208 | 209 | - The capture[] variable available inside code block matches now allows access 210 | to the match offset as well. This is an API change since the type of capture 211 | changed from seq[string] to seq[Capture]. 212 | 213 | 0.12.0 - 2019-07-14 214 | =================== 215 | 216 | - Documentation updates 217 | - Made some error bounds compile-time configurable 218 | - Fix for more strict Nim compiler checks 219 | 220 | 0.11.0 - 2019-05-29 221 | =================== 222 | 223 | - Added support for named backreferences 224 | - Added safeguards to prevent grammars growing out of bounds 225 | - Added Graphviz .dot debugging output for parser debugging 226 | - Added `matchLen` and `matchMax` fields to `NPegException` 227 | - Improved pattern syntax error messages 228 | 229 | 0.10.0 - 2019-04-24 230 | =================== 231 | 232 | - Fixed 'Graph' character class 233 | 234 | 0.9.0 - 2019-03-31 235 | ================== 236 | 237 | - Some syntax changes to fix compilation with mainline Nim 0.19.4 238 | 239 | 0.8.0 - 2019-03-30 240 | ================== 241 | 242 | - Added syntactic sugar for accessing the captures[] seq in capture 243 | code blocks with dollar-number variables $1..$9 244 | 245 | 0.7.0 - 2019-03-29 246 | ================== 247 | 248 | - Action callbacks (%) dropped in favour of Nim code block callbacks. 249 | 250 | 0.6.0 - 2019-03-27 251 | ================== 252 | 253 | - API change: count syntax changed from {n} to [n]. 254 | 255 | - Optimizations in code generation 256 | 257 | 0.5.0 - 2019-03-27 258 | ================== 259 | 260 | - API change: peg() and patt() now return an object of type Parser 261 | instead of a proc, and the function match(p: Parser) is now used for 262 | matching the subject. match() can match string and cstring types, 263 | matchFile() matches a file using memFile. 264 | 265 | - Added builtin atoms Upper, Lower, Digit, HexDigit, Alpha 266 | 267 | - Added `@` search operator 268 | 269 | - Added `&` and predicate 270 | 271 | 0.4.0 - 2019-03-24 272 | ================== 273 | 274 | - Improved tracing output, during trace the originating rule name 275 | for each instruction is dumped. 276 | 277 | - Optimizations 278 | -------------------------------------------------------------------------------- /INTERNALS.md: -------------------------------------------------------------------------------- 1 | 2 | ## Introduction 3 | 4 | This document briefly describes the inner workings of NPeg. 5 | 6 | The main PEG algorithm is based on the Paper "A Text Pattern-Matching Tool 7 | based on Parsing Expression Grammars" by Roberto Ierusalimschy, who is also the 8 | author or LPEG. While LPEG uses a VM approach for parsing, NPeg adds an 9 | additional step where the VM code is compiled to native Nim code which does the 10 | parsing. 11 | 12 | This is how NPeg works in short: 13 | 14 | - The grammar is parsed by a Nim macro which recursively transforms this into 15 | a sequence of VM instructions for each grammar rule. 16 | 17 | - The set of instructions is 'linked' into a complete program of instructions 18 | 19 | - The linked program is translated/compiled into a state machine, implemented 20 | as a large Nim `case` statement that performs the parsing of the subject 21 | string. 22 | 23 | 24 | ## Data structures 25 | 26 | The following data structures are used for compiling the grammar: 27 | 28 | - `Inst`, short for "instruction": This is a object variant which implements a 29 | basic VM instruction. It consists of the opcode and a number of data fields. 30 | 31 | - `Patt`, short for "pattern": A pattern is a sequence of instructions 32 | `seq[Inst]` which typically match an atom from the grammar. 33 | 34 | - `Rule`: One complete, named pattern which is part of a grammar. 35 | 36 | - `Grammar`: A grammar is collection of named patterns implemented as a 37 | `table[string, Patt]`. This is used as the intermediate representation of the 38 | complete compiled grammar and holds patterns for each of the named rules. 39 | 40 | - `Program`: A complete linked program, consisting of a pattern and its debug 41 | info (symbol table, textual listing) 42 | 43 | - `Parser`: object holding the compiled Nim matching function 44 | 45 | For captures the following data structures are relevant: 46 | 47 | - `CapFrame`: A capframe is a frame of a specific type on the capture stack 48 | that points to an offset in the subject string. For each capture open and 49 | close pair a frame exists on the stack, thus allowing for nested captures. 50 | 51 | - `Capture`: A capture is a completed capture that is collected and finalized 52 | when a capture is closed and finished. 53 | 54 | For the generic procs and types, the following convention is used: 55 | 56 | - `[T]` is the type of optional "user data" the gets passed into the parser. 57 | When this is not explicitly given with the `peg` macro, NPeg will stub this 58 | with an unused bool 59 | 60 | - `[S]` is the type of the subject. This is typicall a string, although NPeg 61 | is generic enough and can parse any `seq[S]` 62 | 63 | ## Building a grammar 64 | 65 | The first step in building a parser is the translation of the grammar into 66 | snippets of VM instructions which match the data and perform flow control. For 67 | details of these instructions, refer to the paper by Ierusalimschy. 68 | 69 | The `Patt` data type is used to store a sequence of instructions. This section 70 | describe how a pattern is built from Nim code, all of which lives in `patt.nim` 71 | - this mechanism is later used by the macro which is parsing the actual PEG 72 | grammar. 73 | 74 | The basic atoms are constructed by the `newPatt()` procedures. These take an 75 | argument describing what needs to be matched in the subject, and deliver a 76 | short sequence of instructions. For example, the `newPatt("foo")` procedure 77 | will create a pattern consisting of a single instruction: 78 | 79 | ``` 80 | 1: line opStr "foo" 81 | ``` 82 | 83 | There are a number of operators defined which act on one or more patterns. 84 | These operators are used to combine multiple patterns into larger patters. 85 | 86 | For example, the `|` operator is used for the PEG ordered choice. This takes 87 | two patters, and results in a pattern that tries to match the first one and 88 | then skips the second, or tries to match the second if the first fails: 89 | 90 | ``` 91 | 0: line opChoice 3 92 | 1: line opStr "foo" 93 | 2: line opCommit 4 94 | 3: line opStr "bar" 95 | 4: opReturn 96 | ``` 97 | 98 | A number of patterns can be combined into a grammar, which is simply a table 99 | of patterns indexed by name. 100 | 101 | 102 | ## PEG DSL to grammar 103 | 104 | The user defines their NPeg grammar in a Nim code block, which consists of a 105 | number of named patterns. The whole grammar is handled by the `parseGrammar()` 106 | which iterates all individual named patterns. Each pattern is passed to the 107 | `parsePatt()` macro, which transforms the Nim code block AST into a NPeg 108 | grammar. This macro recursively goes through the Nim AST and calls `newPatt()` 109 | for building atoms, and calls the various operators acting on patterns to grow 110 | the grammar. 111 | 112 | 113 | ## Grammar to Nim code 114 | 115 | The `genCode()` procedure is used to convert the list of instructions into Nim 116 | code which implements the actual parser. This procedure builds a `case` 117 | statement for each VM instruction, and inserts a template for each opcode for 118 | each case. 119 | 120 | 121 | ## Example 122 | 123 | The following grammar is specified by the user: 124 | 125 | ``` 126 | lines <- *line 127 | line <- "foo" | "bar" 128 | ``` 129 | 130 | This is translated into the following VM program: 131 | 132 | ``` 133 | lines: 134 | 0: lines opChoice 3 135 | 1: lines opCall 4 line 136 | 2: lines opPartCommit 1 137 | 3: opReturn 138 | 139 | line: 140 | 4: line opChoice 7 141 | 5: line opStr "foo" 142 | 6: line opCommit 8 143 | 7: line opStr "bar" 144 | 8: opReturn 145 | ``` 146 | 147 | which is then translated into the following `case` statement: 148 | 149 | ``` 150 | while true: 151 | case ip 152 | of 0: 153 | opChoiceFn(3, "lines") 154 | of 1: 155 | opCallFn("line", 3, "lines") 156 | of 2: 157 | opPartCommitFn(1, "lines") 158 | of 3: 159 | opReturnFn("") 160 | of 4: 161 | opChoiceFn(7, "line") 162 | of 5: 163 | opStrFn("foo", "line") 164 | of 6: 165 | opCommitFn(8, "line") 166 | of 7: 167 | opStrFn("bar", "line") 168 | of 8: 169 | opReturnFn("") 170 | else: 171 | opFailFn() 172 | ``` 173 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Ico Doornekamp 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /config.nims: -------------------------------------------------------------------------------- 1 | --styleCheck:usages 2 | if (NimMajor, NimMinor) < (1, 6): 3 | --styleCheck:hint 4 | else: 5 | --styleCheck:error 6 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains various papers which were used for inspiration when 3 | building Npeg. 4 | -------------------------------------------------------------------------------- /doc/example-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/example-graph.png -------------------------------------------------------------------------------- /doc/example-railroad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/example-railroad.png -------------------------------------------------------------------------------- /doc/npeg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/npeg.png -------------------------------------------------------------------------------- /doc/papers/2008_A_Text_Pattern-Matching_Tool_based_on_Parsing_Expression_Grammars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2008_A_Text_Pattern-Matching_Tool_based_on_Parsing_Expression_Grammars.pdf -------------------------------------------------------------------------------- /doc/papers/2008_Packrat parsers can support left recursion.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2008_Packrat parsers can support left recursion.pdf -------------------------------------------------------------------------------- /doc/papers/2008_slides-lpeg-workshop2008.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2008_slides-lpeg-workshop2008.pdf -------------------------------------------------------------------------------- /doc/papers/2009_A_Parsing_Machine_For_PEGs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2009_A_Parsing_Machine_For_PEGs.pdf -------------------------------------------------------------------------------- /doc/papers/2010_Converting_regexes_to_Parsing_Expression_Grammars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2010_Converting_regexes_to_Parsing_Expression_Grammars.pdf -------------------------------------------------------------------------------- /doc/papers/2010_Direct_left-recursive_parsing_expression_grammars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2010_Direct_left-recursive_parsing_expression_grammars.pdf -------------------------------------------------------------------------------- /doc/papers/2011_From_EBNF_to_PEG.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2011_From_EBNF_to_PEG.pdf -------------------------------------------------------------------------------- /doc/papers/2011_From_Regular_Expressions_to_Parsing_Expression_Grammars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2011_From_Regular_Expressions_to_Parsing_Expression_Grammars.pdf -------------------------------------------------------------------------------- /doc/papers/2011_Parsing_Expression_Grammars_for_Structured_Data.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2011_Parsing_Expression_Grammars_for_Structured_Data.pdf -------------------------------------------------------------------------------- /doc/papers/2013_Exception_Handling_for_Error_Reporting_in_Parsing_Expression_Grammars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2013_Exception_Handling_for_Error_Reporting_in_Parsing_Expression_Grammars.pdf -------------------------------------------------------------------------------- /doc/papers/2014_Left_recursion_in_parsing_expression_grammars.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2014_Left_recursion_in_parsing_expression_grammars.pdf -------------------------------------------------------------------------------- /doc/papers/2018_An_efficient_parsing_machine_for_PEGs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2018_An_efficient_parsing_machine_for_PEGs.pdf -------------------------------------------------------------------------------- /doc/papers/2021_Incremental_PEG_Parsing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/papers/2021_Incremental_PEG_Parsing.pdf -------------------------------------------------------------------------------- /doc/papers/README.md: -------------------------------------------------------------------------------- 1 | 2 | This is a collection of papers somehow relevant to NPeg. 3 | -------------------------------------------------------------------------------- /doc/syntax-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/doc/syntax-diagram.png -------------------------------------------------------------------------------- /misc/README: -------------------------------------------------------------------------------- 1 | 2 | This directory contains various snippets, examples or other helpful things 3 | that I want to keep around but do not fit in elsewhere. 4 | -------------------------------------------------------------------------------- /misc/indent.nim: -------------------------------------------------------------------------------- 1 | # Indent syntax 2 | 3 | let data = """ 4 | a=123 5 | b= 6 | c=567 7 | e=42 8 | f=18 9 | g= 10 | b=44 11 | c=22 12 | """ 13 | 14 | var indentStack = @[""] 15 | template top[T](s: seq[T]): T = s[s.high] 16 | 17 | 18 | let p = peg doc: 19 | doc <- pairs * !1 20 | pairs <- pair * *('\n' * pair) 21 | pair <- indSame * key * '=' * val 22 | indentPairs <- '\n' * &indIn * pairs * &('\n' * indOut) 23 | key <- +Alpha: 24 | echo "key ", $0 25 | number <- +Digit: 26 | echo "val ", $0 27 | val <- number | indentPairs 28 | 29 | indSame <- *' ': 30 | validate $0 == indentStack.top 31 | 32 | indIn <- *' ': 33 | validate len($0) > len(indentStack.top) 34 | indentStack.add $0 35 | 36 | indOut <- *' ': 37 | discard indentStack.pop 38 | validate $0 == indentStack.top 39 | 40 | echo p.match(data).ok 41 | -------------------------------------------------------------------------------- /misc/java.nim: -------------------------------------------------------------------------------- 1 | 2 | # 3 | # This grammar has been auto-generated with mouse2npeg from the Mouse Java-1.6 4 | # grammar at http://www.romanredz.se/Mouse/Java.1.6.peg. It is not nice to look 5 | # at, but it does parse Java 6 | # 7 | 8 | import npeg 9 | let r = peg CompilationUnit: 10 | CompilationUnit <- Spacing * ?PackageDeclaration * *ImportDeclaration * *TypeDeclaration * EOT 11 | PackageDeclaration <- *Annotation * PACKAGE * QualifiedIdentifier * SEMI 12 | ImportDeclaration <- IMPORT * ?STATIC * QualifiedIdentifier * ?( DOT * STAR ) * SEMI 13 | TypeDeclaration <- *Modifier * ( ClassDeclaration | EnumDeclaration | InterfaceDeclaration | AnnotationTypeDeclaration ) | SEMI 14 | ClassDeclaration <- CLASS * Identifier * ?TypeParameters * ?( EXTENDS * ClassType ) * ?( IMPLEMENTS * ClassTypeList ) * ClassBody 15 | ClassBody <- LWING * *ClassBodyDeclaration * RWING 16 | ClassBodyDeclaration <- SEMI | ?STATIC * Block | *Modifier * MemberDecl 17 | MemberDecl <- TypeParameters * GenericMethodOrConstructorRest | Type * Identifier * MethodDeclaratorRest | Type * VariableDeclarators * SEMI | VOID * Identifier * VoidMethodDeclaratorRest | Identifier * ConstructorDeclaratorRest | InterfaceDeclaration | ClassDeclaration | EnumDeclaration | AnnotationTypeDeclaration 18 | GenericMethodOrConstructorRest <- ( Type | VOID ) * Identifier * MethodDeclaratorRest | Identifier * ConstructorDeclaratorRest 19 | MethodDeclaratorRest <- FormalParameters * *Dim * ?( THROWS * ClassTypeList ) * ( MethodBody | SEMI ) 20 | VoidMethodDeclaratorRest <- FormalParameters * ?( THROWS * ClassTypeList ) * ( MethodBody | SEMI ) 21 | ConstructorDeclaratorRest <- FormalParameters * ?( THROWS * ClassTypeList ) * MethodBody 22 | MethodBody <- Block 23 | InterfaceDeclaration <- INTERFACE * Identifier * ?TypeParameters * ?( EXTENDS * ClassTypeList ) * InterfaceBody 24 | InterfaceBody <- LWING * *InterfaceBodyDeclaration * RWING 25 | InterfaceBodyDeclaration <- *Modifier * InterfaceMemberDecl | SEMI 26 | InterfaceMemberDecl <- InterfaceMethodOrFieldDecl | InterfaceGenericMethodDecl | VOID * Identifier * VoidInterfaceMethodDeclaratorRest | InterfaceDeclaration | AnnotationTypeDeclaration | ClassDeclaration | EnumDeclaration 27 | InterfaceMethodOrFieldDecl <- Type * Identifier * InterfaceMethodOrFieldRest 28 | InterfaceMethodOrFieldRest <- ConstantDeclaratorsRest * SEMI | InterfaceMethodDeclaratorRest 29 | InterfaceMethodDeclaratorRest <- FormalParameters * *Dim * ?( THROWS * ClassTypeList ) * SEMI 30 | InterfaceGenericMethodDecl <- TypeParameters * ( Type | VOID ) * Identifier * InterfaceMethodDeclaratorRest 31 | VoidInterfaceMethodDeclaratorRest <- FormalParameters * ?( THROWS * ClassTypeList ) * SEMI 32 | ConstantDeclaratorsRest <- ConstantDeclaratorRest * *( COMMA * ConstantDeclarator ) 33 | ConstantDeclarator <- Identifier * ConstantDeclaratorRest 34 | ConstantDeclaratorRest <- *Dim * EQU * VariableInitializer 35 | EnumDeclaration <- ENUM * Identifier * ?( IMPLEMENTS * ClassTypeList ) * EnumBody 36 | EnumBody <- LWING * ?EnumConstants * ?COMMA * ?EnumBodyDeclarations * RWING 37 | EnumConstants <- EnumConstant * *( COMMA * EnumConstant ) 38 | EnumConstant <- *Annotation * Identifier * ?Arguments * ?ClassBody 39 | EnumBodyDeclarations <- SEMI * *ClassBodyDeclaration 40 | LocalVariableDeclarationStatement <- *( FINAL | Annotation ) * Type * VariableDeclarators * SEMI 41 | VariableDeclarators <- VariableDeclarator * *( COMMA * VariableDeclarator ) 42 | VariableDeclarator <- Identifier * *Dim * ?( EQU * VariableInitializer ) 43 | FormalParameters <- LPAR * ?FormalParameterList * RPAR 44 | FormalParameter <- *( FINAL | Annotation ) * Type * VariableDeclaratorId 45 | LastFormalParameter <- *( FINAL | Annotation ) * Type * ELLIPSIS * VariableDeclaratorId 46 | FormalParameterList <- FormalParameter * *( COMMA * FormalParameter ) * ?( COMMA * LastFormalParameter ) | LastFormalParameter 47 | VariableDeclaratorId <- Identifier * *Dim 48 | Block <- LWING * BlockStatements * RWING 49 | BlockStatements <- *BlockStatement 50 | BlockStatement <- LocalVariableDeclarationStatement | *Modifier * ( ClassDeclaration | EnumDeclaration ) | Statement 51 | Statement <- Block | ASSERT * Expression * ?( COLON * Expression ) * SEMI | IF * ParExpression * Statement * ?( ELSE * Statement ) | FOR * LPAR * ?ForInit * SEMI * ?Expression * SEMI * ?ForUpdate * RPAR * Statement | FOR * LPAR * FormalParameter * COLON * Expression * RPAR * Statement | WHILE * ParExpression * Statement | DO * Statement * WHILE * ParExpression * SEMI | TRY * Block * ( +Catch * ?Finally | Finally ) | SWITCH * ParExpression * LWING * SwitchBlockStatementGroups * RWING | SYNCHRONIZED * ParExpression * Block | RETURN * ?Expression * SEMI | THROW * Expression * SEMI | BREAK * ?Identifier * SEMI | CONTINUE * ?Identifier * SEMI | SEMI | StatementExpression * SEMI | Identifier * COLON * Statement 52 | Catch <- CATCH * LPAR * FormalParameter * RPAR * Block 53 | Finally <- FINALLY * Block 54 | SwitchBlockStatementGroups <- *SwitchBlockStatementGroup 55 | SwitchBlockStatementGroup <- SwitchLabel * BlockStatements 56 | SwitchLabel <- CASE * ConstantExpression * COLON | CASE * EnumConstantName * COLON | DEFAULT * COLON 57 | ForInit <- *( FINAL | Annotation ) * Type * VariableDeclarators | StatementExpression * *( COMMA * StatementExpression ) 58 | ForUpdate <- StatementExpression * *( COMMA * StatementExpression ) 59 | EnumConstantName <- Identifier 60 | StatementExpression <- Expression 61 | ConstantExpression <- Expression 62 | Expression <- ConditionalExpression * *( AssignmentOperator * ConditionalExpression ) 63 | AssignmentOperator <- EQU | PLUSEQU | MINUSEQU | STAREQU | DIVEQU | ANDEQU | OREQU | HATEQU | MODEQU | SLEQU | SREQU | BSREQU 64 | ConditionalExpression <- ConditionalOrExpression * *( QUERY * Expression * COLON * ConditionalOrExpression ) 65 | ConditionalOrExpression <- ConditionalAndExpression * *( OROR * ConditionalAndExpression ) 66 | ConditionalAndExpression <- InclusiveOrExpression * *( ANDAND * InclusiveOrExpression ) 67 | InclusiveOrExpression <- ExclusiveOrExpression * *( OR * ExclusiveOrExpression ) 68 | ExclusiveOrExpression <- AndExpression * *( HAT * AndExpression ) 69 | AndExpression <- EqualityExpression * *( AND * EqualityExpression ) 70 | EqualityExpression <- RelationalExpression * *( ( EQUAL | NOTEQUAL ) * RelationalExpression ) 71 | RelationalExpression <- ShiftExpression * *( ( LE | GE | LT | GT ) * ShiftExpression | INSTANCEOF * ReferenceType ) 72 | ShiftExpression <- AdditiveExpression * *( ( SL | SR | BSR ) * AdditiveExpression ) 73 | AdditiveExpression <- MultiplicativeExpression * *( ( PLUS | MINUS ) * MultiplicativeExpression ) 74 | MultiplicativeExpression <- UnaryExpression * *( ( STAR | DIV | MOD ) * UnaryExpression ) 75 | UnaryExpression <- PrefixOp * UnaryExpression | LPAR * Type * RPAR * UnaryExpression | Primary * *( Selector ) * *( PostfixOp ) 76 | Primary <- ParExpression | NonWildcardTypeArguments * ( ExplicitGenericInvocationSuffix | THIS * Arguments ) | THIS * ?Arguments | SUPER * SuperSuffix | Literal | NEW * Creator | QualifiedIdentifier * ?IdentifierSuffix | BasicType * *Dim * DOT * CLASS | VOID * DOT * CLASS 77 | IdentifierSuffix <- LBRK * ( RBRK * *Dim * DOT * CLASS | Expression * RBRK ) | Arguments | DOT * ( CLASS | ExplicitGenericInvocation | THIS | SUPER * Arguments | NEW * ?NonWildcardTypeArguments * InnerCreator ) 78 | ExplicitGenericInvocation <- NonWildcardTypeArguments * ExplicitGenericInvocationSuffix 79 | NonWildcardTypeArguments <- LPOINT * ReferenceType * *( COMMA * ReferenceType ) * RPOINT 80 | ExplicitGenericInvocationSuffix <- SUPER * SuperSuffix | Identifier * Arguments 81 | PrefixOp <- INC | DEC | BANG | TILDA | PLUS | MINUS 82 | PostfixOp <- INC | DEC 83 | Selector <- DOT * Identifier * ?Arguments | DOT * ExplicitGenericInvocation | DOT * THIS | DOT * SUPER * SuperSuffix | DOT * NEW * ?NonWildcardTypeArguments * InnerCreator | DimExpr 84 | SuperSuffix <- Arguments | DOT * ?NonWildcardTypeArguments * Identifier * ?Arguments 85 | BasicType <- ( "byte" | "short" | "char" | "int" | "long" | "float" | "double" | "boolean" ) * !LetterOrDigit * Spacing 86 | Arguments <- LPAR * ?( Expression * *( COMMA * Expression ) ) * RPAR 87 | Creator <- ?NonWildcardTypeArguments * CreatedName * ClassCreatorRest | ?NonWildcardTypeArguments * ( ClassType | BasicType ) * ArrayCreatorRest 88 | CreatedName <- Identifier * ?NonWildcardTypeArguments * *( DOT * Identifier * ?NonWildcardTypeArguments ) 89 | InnerCreator <- Identifier * ClassCreatorRest 90 | ArrayCreatorRest <- LBRK * ( RBRK * *Dim * ArrayInitializer | Expression * RBRK * *DimExpr * *Dim ) 91 | ClassCreatorRest <- Arguments * ?ClassBody 92 | ArrayInitializer <- LWING * ?( VariableInitializer * *( COMMA * VariableInitializer ) ) * ?COMMA * RWING 93 | VariableInitializer <- ArrayInitializer | Expression 94 | ParExpression <- LPAR * Expression * RPAR 95 | QualifiedIdentifier <- Identifier * *( DOT * Identifier ) 96 | Dim <- LBRK * RBRK 97 | DimExpr <- LBRK * Expression * RBRK 98 | Type <- ( BasicType | ClassType ) * *Dim 99 | ReferenceType <- BasicType * +Dim | ClassType * *Dim 100 | ClassType <- Identifier * ?TypeArguments * *( DOT * Identifier * ?TypeArguments ) 101 | ClassTypeList <- ClassType * *( COMMA * ClassType ) 102 | TypeArguments <- LPOINT * TypeArgument * *( COMMA * TypeArgument ) * RPOINT 103 | TypeArgument <- ReferenceType | QUERY * ?( ( EXTENDS | SUPER ) * ReferenceType ) 104 | TypeParameters <- LPOINT * TypeParameter * *( COMMA * TypeParameter ) * RPOINT 105 | TypeParameter <- Identifier * ?( EXTENDS * Bound ) 106 | Bound <- ClassType * *( AND * ClassType ) 107 | Modifier <- Annotation | ( "public" | "protected" | "private" | "static" | "abstract" | "final" | "native" | "synchronized" | "transient" | "volatile" | "strictfp" ) * !LetterOrDigit * Spacing 108 | AnnotationTypeDeclaration <- AT * INTERFACE * Identifier * AnnotationTypeBody 109 | AnnotationTypeBody <- LWING * *AnnotationTypeElementDeclaration * RWING 110 | AnnotationTypeElementDeclaration <- *Modifier * AnnotationTypeElementRest | SEMI 111 | AnnotationTypeElementRest <- Type * AnnotationMethodOrConstantRest * SEMI | ClassDeclaration | EnumDeclaration | InterfaceDeclaration | AnnotationTypeDeclaration 112 | AnnotationMethodOrConstantRest <- AnnotationMethodRest | AnnotationConstantRest 113 | AnnotationMethodRest <- Identifier * LPAR * RPAR * ?DefaultValue 114 | AnnotationConstantRest <- VariableDeclarators 115 | DefaultValue <- DEFAULT * ElementValue 116 | Annotation <- NormalAnnotation | SingleElementAnnotation | MarkerAnnotation 117 | NormalAnnotation <- AT * QualifiedIdentifier * LPAR * ?ElementValuePairs * RPAR 118 | SingleElementAnnotation <- AT * QualifiedIdentifier * LPAR * ElementValue * RPAR 119 | MarkerAnnotation <- AT * QualifiedIdentifier 120 | ElementValuePairs <- ElementValuePair * *( COMMA * ElementValuePair ) 121 | ElementValuePair <- Identifier * EQU * ElementValue 122 | ElementValue <- ConditionalExpression | Annotation | ElementValueArrayInitializer 123 | ElementValueArrayInitializer <- LWING * ?ElementValues * ?COMMA * RWING 124 | ElementValues <- ElementValue * *( COMMA * ElementValue ) 125 | Spacing <- *( +{' ','\t','\r','\n','\x0c'} | "/*" * *( !"*/" * 1 ) * "*/" | "//" * *( !{'\r','\n'} * 1 ) * {'\r','\n'} ) 126 | Identifier <- !Keyword * Letter * *LetterOrDigit * Spacing 127 | Letter <- {'a'..'z'} | {'A'..'Z'} | {'_','$'} 128 | LetterOrDigit <- {'a'..'z'} | {'A'..'Z'} | {'0'..'9'} | {'_','$'} 129 | Keyword <- ( "abstract" | "assert" | "boolean" | "break" | "byte" | "case" | "catch" | "char" | "class" | "const" | "continue" | "default" | "double" | "do" | "else" | "enum" | "extends" | "false" | "finally" | "final" | "float" | "for" | "goto" | "if" | "implements" | "import" | "interface" | "int" | "instanceof" | "long" | "native" | "new" | "null" | "package" | "private" | "protected" | "public" | "return" | "short" | "static" | "strictfp" | "super" | "switch" | "synchronized" | "this" | "throws" | "throw" | "transient" | "true" | "try" | "void" | "volatile" | "while" ) * !LetterOrDigit 130 | ASSERT <- "assert" * !LetterOrDigit * Spacing 131 | BREAK <- "break" * !LetterOrDigit * Spacing 132 | CASE <- "case" * !LetterOrDigit * Spacing 133 | CATCH <- "catch" * !LetterOrDigit * Spacing 134 | CLASS <- "class" * !LetterOrDigit * Spacing 135 | CONTINUE <- "continue" * !LetterOrDigit * Spacing 136 | DEFAULT <- "default" * !LetterOrDigit * Spacing 137 | DO <- "do" * !LetterOrDigit * Spacing 138 | ELSE <- "else" * !LetterOrDigit * Spacing 139 | ENUM <- "enum" * !LetterOrDigit * Spacing 140 | EXTENDS <- "extends" * !LetterOrDigit * Spacing 141 | FINALLY <- "finally" * !LetterOrDigit * Spacing 142 | FINAL <- "final" * !LetterOrDigit * Spacing 143 | FOR <- "for" * !LetterOrDigit * Spacing 144 | IF <- "if" * !LetterOrDigit * Spacing 145 | IMPLEMENTS <- "implements" * !LetterOrDigit * Spacing 146 | IMPORT <- "import" * !LetterOrDigit * Spacing 147 | INTERFACE <- "interface" * !LetterOrDigit * Spacing 148 | INSTANCEOF <- "instanceof" * !LetterOrDigit * Spacing 149 | NEW <- "new" * !LetterOrDigit * Spacing 150 | PACKAGE <- "package" * !LetterOrDigit * Spacing 151 | RETURN <- "return" * !LetterOrDigit * Spacing 152 | STATIC <- "static" * !LetterOrDigit * Spacing 153 | SUPER <- "super" * !LetterOrDigit * Spacing 154 | SWITCH <- "switch" * !LetterOrDigit * Spacing 155 | SYNCHRONIZED <- "synchronized" * !LetterOrDigit * Spacing 156 | THIS <- "this" * !LetterOrDigit * Spacing 157 | THROWS <- "throws" * !LetterOrDigit * Spacing 158 | THROW <- "throw" * !LetterOrDigit * Spacing 159 | TRY <- "try" * !LetterOrDigit * Spacing 160 | VOID <- "void" * !LetterOrDigit * Spacing 161 | WHILE <- "while" * !LetterOrDigit * Spacing 162 | Literal <- ( FloatLiteral | IntegerLiteral | CharLiteral | StringLiteral | "true" * !LetterOrDigit | "false" * !LetterOrDigit | "null" * !LetterOrDigit ) * Spacing 163 | IntegerLiteral <- ( HexNumeral | OctalNumeral | DecimalNumeral ) * ?{'l','L'} 164 | DecimalNumeral <- "0" | {'1'..'9'} * *{'0'..'9'} 165 | HexNumeral <- ( "0x" | "0X" ) * +HexDigit 166 | HexDigit <- {'a'..'f'} | {'A'..'F'} | {'0'..'9'} 167 | OctalNumeral <- "0" * +{'0'..'7'} 168 | FloatLiteral <- HexFloat | DecimalFloat 169 | DecimalFloat <- +Digit * "." * *Digit * ?Exponent * ?{'f','F','d','D'} | "." * +Digit * ?Exponent * ?{'f','F','d','D'} | +Digit * Exponent * ?{'f','F','d','D'} | +Digit * ?Exponent * {'f','F','d','D'} 170 | Exponent <- {'e','E'} * ?{'+','\\','-'} * +Digit 171 | Digit <- {'0'..'9'} 172 | HexFloat <- HexSignificand * BinaryExponent * ?{'f','F','d','D'} 173 | HexSignificand <- ( "0x" | "0X" ) * *HexDigit * "." * +HexDigit | HexNumeral * ?"." 174 | BinaryExponent <- {'p','P'} * ?{'+','\\','-'} * +Digit 175 | CharLiteral <- "\'" * ( Escape | !{'\'','\\','\n','\r'} * 1 ) * "\'" 176 | StringLiteral <- "\"" * *( Escape | !{'"','\\','\n','\r'} * 1 ) * "\"" 177 | Escape <- "\\" * ( {'b','t','n','f','r','"','\'','\\'} | OctalEscape | UnicodeEscape ) 178 | OctalEscape <- {'0'..'3'} * {'0'..'7'} * {'0'..'7'} | {'0'..'7'} * {'0'..'7'} | {'0'..'7'} 179 | UnicodeEscape <- +"u" * HexDigit * HexDigit * HexDigit * HexDigit 180 | AT <- "@" * Spacing 181 | AND <- "&" * !{'=','&'} * Spacing 182 | ANDAND <- "&&" * Spacing 183 | ANDEQU <- "&=" * Spacing 184 | BANG <- "!" * !"=" * Spacing 185 | BSR <- ">>>" * !"=" * Spacing 186 | BSREQU <- ">>>=" * Spacing 187 | COLON <- ":" * Spacing 188 | COMMA <- "," * Spacing 189 | DEC <- "--" * Spacing 190 | DIV <- "/" * !"=" * Spacing 191 | DIVEQU <- "/=" * Spacing 192 | DOT <- "." * Spacing 193 | ELLIPSIS <- "..." * Spacing 194 | EQU <- "=" * !"=" * Spacing 195 | EQUAL <- "==" * Spacing 196 | GE <- ">=" * Spacing 197 | GT <- ">" * !{'=','>'} * Spacing 198 | HAT <- "^" * !"=" * Spacing 199 | HATEQU <- "^=" * Spacing 200 | INC <- "++" * Spacing 201 | LBRK <- "[" * Spacing 202 | LE <- "<=" * Spacing 203 | LPAR <- "(" * Spacing 204 | LPOINT <- "<" * Spacing 205 | LT <- "<" * !{'=','<'} * Spacing 206 | LWING <- "{" * Spacing 207 | MINUS <- "-" * !{'=','\\','-'} * Spacing 208 | MINUSEQU <- "-=" * Spacing 209 | MOD <- "%" * !"=" * Spacing 210 | MODEQU <- "%=" * Spacing 211 | NOTEQUAL <- "!=" * Spacing 212 | OR <- "|" * !{'=','|'} * Spacing 213 | OREQU <- "|=" * Spacing 214 | OROR <- "||" * Spacing 215 | PLUS <- "+" * !{'=','+'} * Spacing 216 | PLUSEQU <- "+=" * Spacing 217 | QUERY <- "?" * Spacing 218 | RBRK <- "]" * Spacing 219 | RPAR <- ")" * Spacing 220 | RPOINT <- ">" * Spacing 221 | RWING <- "}" * Spacing 222 | SEMI <- ";" * Spacing 223 | SL <- "<<" * !"=" * Spacing 224 | SLEQU <- "<<=" * Spacing 225 | SR <- ">>" * !{'=','>'} * Spacing 226 | SREQU <- ">>=" * Spacing 227 | STAR <- "*" * !"=" * Spacing 228 | STAREQU <- "*=" * Spacing 229 | TILDA <- "~" * Spacing 230 | EOT <- !1 231 | 232 | -------------------------------------------------------------------------------- /misc/mouse2npeg.nim: -------------------------------------------------------------------------------- 1 | # 2 | # Convert a Mouse PEG grammar into NPeg grammar 3 | # http://www.romanredz.se/Mouse/ 4 | # 5 | 6 | import npeg 7 | import npeg/common 8 | import strutils 9 | 10 | # Parse the Mouse grammar into an ASTNode tree 11 | 12 | let mouse = peg "mouse": 13 | mouse <- A("mouse", *rule) * ?s * !1 14 | rule <- ?s * A("rule", >name * s * "=" * s * patt) 15 | patt <- A("patt", choice * ?sem * s * ';') 16 | sem <- ('{' * @'}') 17 | choice <- A("choice", seq * s * *('/' * s * seq)) 18 | seq <- A("seq", prefixed * *(s * prefixed) * s) 19 | nonterm <- A("nonterm", >name) 20 | prefixed <- A("pre", ?>'!' * postfixed) 21 | postfixed <- A("post", (paren | nonterm | lit) * >?postfix) 22 | lit <- any | range | set | string 23 | any <- A("any", '_') 24 | range <- A("range", '[' * >(char * '-' * char) * ']') 25 | set <- A("set", '[' * +(char-']') * ']') 26 | string <- A("string", '"' * +(char-'"') * '"') 27 | paren <- A("paren", '(' * s * choice * s * ')') 28 | postfix <- {'+','*','?'} 29 | name <- +Alpha 30 | char <- A("char", >( ("\\u" * Xdigit[4]) | ('\\' * {'\\','r','n','t','"'}) | 1)) 31 | nl <- {'\r','\n'} 32 | s <- *( +Space | comment | sem ) 33 | comment <- "//" * >*(1-nl) 34 | 35 | 36 | # Dump the PEG ast tree into NPeg form 37 | 38 | proc dump(a: ASTNode): string = 39 | proc unescapeChar(s: string): string = 40 | if s == "'": 41 | result = "\\'" 42 | elif s == "\\": 43 | result = "\\\\" 44 | elif s.len == 6: 45 | result = $(parseHexInt(s[2..5]).char.escapeChar) 46 | else: 47 | result = s 48 | case a.id: 49 | of "mouse": 50 | for c in a: 51 | result.add dump(c) 52 | of "rule": 53 | return " " & $a.val & " <- " & dump(a["patt"]) & "\n" 54 | of "patt": 55 | return dump a[0] 56 | of "choice": 57 | var parts: seq[string] 58 | for c in a: 59 | parts.add dump(c) 60 | return parts.join(" | ") 61 | of "seq": 62 | var parts: seq[string] 63 | for c in a: 64 | parts.add dump(c) 65 | return parts.join(" * ") 66 | of "paren": 67 | return "( " & dump(a[0]) & " )" 68 | of "pre": 69 | return a.val & dump(a[0]) 70 | of "post": 71 | return a.val & dump(a[0]) 72 | of "nonterm": 73 | return a.val 74 | of "any": 75 | return "1" 76 | of "string": 77 | result.add '"' 78 | for c in a: 79 | result.add unescapeChar(c.val) 80 | result.add '"' 81 | of "set": 82 | var cs: seq[string] 83 | for c in a: cs.add unescapeChar(c.val) 84 | return "{'" & cs.join("','") & "'}" 85 | of "range": 86 | return "{'" & escapeChar(a.val[0]) & "'..'" & escapeChar(a.val[2]) & "'}" 87 | else: 88 | echo "\nUnhnandled " & a.id 89 | quit 1 90 | 91 | 92 | # http://www.romanredz.se/Mouse/Java.1.6.peg 93 | 94 | let r = mouse.matchFile("/tmp/Java.1.6.peg") 95 | if not r.ok: 96 | echo "Error parsing at ", r.matchMax 97 | quit 1 98 | 99 | echo "import npeg" 100 | echo "let r = peg CompilationUnit:" 101 | 102 | echo dump(r.capturesAst()) 103 | 104 | -------------------------------------------------------------------------------- /misc/rod.nim: -------------------------------------------------------------------------------- 1 | import npeg 2 | import strutils 3 | 4 | # Rod AST node types 5 | 6 | type 7 | NodeKind* = enum 8 | nkEmpty 9 | nkScript, nkBlock 10 | nkBool, nkNumber, nkString, nkIdent 11 | nkPrefix, nkInfix, nkDot, nkIndex 12 | nkVar, nkLet 13 | nkIf, nkWhile, nkFor 14 | nkBreak, nkContinue 15 | nkCall 16 | nkGeneric 17 | nkObject, nkObjFields, nkObjConstr 18 | Node* = ref object 19 | ln*, col*: int 20 | file*: string 21 | case kind*: NodeKind 22 | of nkEmpty: discard 23 | of nkBool: 24 | boolVal*: bool 25 | of nkNumber: 26 | numberVal*: float 27 | of nkString: 28 | stringVal*: string 29 | of nkIdent: 30 | ident*: string 31 | else: 32 | children*: seq[Node] 33 | 34 | type 35 | ParseStack = seq[Node] 36 | 37 | 38 | # Pretty printing 39 | 40 | proc `$`*(node: Node, showLineInfo = false): string = 41 | const LeafNodes = { nkEmpty, nkBool, nkNumber, nkString, nkIdent, nkPrefix, nkInfix } 42 | case node.kind 43 | of nkEmpty: result = "" 44 | of nkBool: result = $node.boolVal 45 | of nkNumber: result = $node.numberVal 46 | of nkString: result = escape(node.stringVal) 47 | of nkIdent: result = node.ident 48 | else: 49 | result = (if showLineInfo: $node.ln & ":" & $node.col & " " else: "") & 50 | "(" & (case node.kind 51 | of nkPrefix, nkInfix: "" 52 | else: $node.kind & " ") 53 | for i, child in node.children: 54 | if child.kind notin LeafNodes and node.children.len > 1: 55 | result.add("\n") 56 | result.add(indent(`$`(child, showLineInfo), 2)) 57 | else: 58 | if i > 0: 59 | result.add(" ") 60 | result.add(`$`(child, showLineInfo)) 61 | result.add(")") 62 | 63 | proc `$`*(ps: ParseStack): string = 64 | for i, n in ps: 65 | result &= $i & ":\n" & $n & "\n" 66 | result &= "\n" 67 | 68 | 69 | 70 | proc addToParent(ps: var ParseStack, ns: varargs[Node]) = 71 | ps[ps.high].children.add ns 72 | 73 | proc swap(ps: var ParseStack) = 74 | ps.add ps[ps.high-1] 75 | ps.delete ps.high-2 76 | 77 | let p = peg(rod, ps: ParseStack): 78 | 79 | S <- *Space 80 | 81 | # Basic tokens 82 | 83 | tokColon <- ":" * S 84 | tokEquals <- "=" * S 85 | tokComma <- "," * S 86 | tokPlus <- "+" * S 87 | tokMinus <- "-" * S 88 | tokMul <- "*" * S 89 | tokDiv <- "/" * S 90 | tokParOpen <- "(" * S 91 | tokParClose <- ")" * S 92 | tokCurOpen <- "{" * S 93 | tokCurClose <- "}" * S 94 | tokVar <- "var" * S 95 | tokLet <- "let" * S 96 | tokIf <- "if" * S 97 | tokElif <- "elif" * S 98 | tokElse <- "else" * S 99 | tokWhile <- "while" * S 100 | tokObject <- "object" * S 101 | 102 | keyWords <- "var" | "let" | "if" | "elif" | "else" | "while" | "object" 103 | 104 | # Atoms 105 | 106 | tokNumber <- >+Digit * S: 107 | ps.add Node(kind: nkNumber, numberVal: parseFloat($1)) 108 | 109 | tokType <- Alpha * *Alnum * S 110 | 111 | tokBool <- >("true" | "false") * S: 112 | ps.add Node(kind: nkBool, boolval: $1 == "true") 113 | 114 | tokIdent <- >((Alpha * *Alnum) - keyWords) * S: 115 | ps.add Node(kind: nkIdent, ident: $1) 116 | 117 | # Block 118 | 119 | blockOpen <- tokCurOpen: 120 | ps.add Node(kind: nkBlock) 121 | 122 | blockStmt <- stmt: 123 | ps.addToParent ps.pop() 124 | 125 | blockSec <- blockOpen * *blockStmt * tokCurClose 126 | 127 | # Var section 128 | 129 | varOpen <- (tokVar | tokLet): 130 | ps.add Node(kind: nkVar) 131 | 132 | varDef <- tokIdent * ?(tokColon * tokType) * ?(tokEquals * exprSec): 133 | ps.swap() 134 | ps.addToParent Node(kind: nkVar, 135 | children: @[Node(kind: nkIdent, ident: "="), ps.pop(), ps.pop()]) 136 | 137 | varSec <- varOpen * +varDef * *(tokComma * varDef): 138 | ps.add ps.pop() 139 | 140 | # While statement 141 | 142 | whileSec <- tokWhile * exprSec * blockSec: 143 | ps.swap() 144 | ps.add Node(kind: nkWhile, children: @[ps.pop(), ps.pop()]) 145 | 146 | # If expressions 147 | 148 | ifOpen <- tokIf * exprSec * blockSec: 149 | let (nBlock, nExpr) = (ps.pop(), ps.pop()) 150 | ps.add Node(kind: nkIf, children: @[nExpr, nBlock]) 151 | 152 | ifElif <- (tokElif * exprSec * blockSec): 153 | ps.swap() 154 | ps.addtoParent ps.pop(), ps.pop() 155 | 156 | ifElse <- ?(tokElse * blockSec): 157 | ps.addToParent ps.pop() 158 | 159 | ifExpr <- ifOpen * *ifElif * ?ifElse 160 | 161 | # Object 162 | 163 | objectSec <- tokObject * tokIdent * tokCurOpen * objFields * tokCurClose 164 | 165 | objFields <- tokIdent * *(tokComma * tokIdent) * tokColon * tokType 166 | 167 | stmt <- blockSec | varSec | objectSec | whileSec | exprSec 168 | 169 | rod <- S * +stmt * !1 170 | 171 | # Expressions: Pratt parser 172 | 173 | exprSec <- exp 174 | 175 | exp <- S * prefix * *infix 176 | 177 | prefix <- ifExpr | tokBool | tokNumber | parenExp | uniMinus | tokIdent 178 | uniMinus <- >'-' * exp 179 | parenExp <- ( tokParOpen * exp * tokParClose ) ^ 0 180 | 181 | infix <- >("not" | "->" | "$") * exp ^ 1 | 182 | >("=") * exp ^ 2 | 183 | >("or" | "xor") * exp ^ 3 | 184 | >("and") * exp ^ 4 | 185 | >("==" | "<=" | "<" | ">=" | ">" | "!=" | 186 | "in" | "notin" | "is" | "isnot" | "of") * exp ^ 5 | 187 | >(".." | "..<") * exp ^ 6 | 188 | >("&") * exp ^ 7 | 189 | >("+" | "-") * exp ^ 8 | 190 | >("*" | "/" | "%") * exp ^ 9 | 191 | >("div" | "mod" | "shl" | "shr") * exp ^ 10 | 192 | >("^") * exp ^^ 11: 193 | 194 | let (f2, f1) = (ps.pop(), ps.pop()) 195 | ps.add Node(kind: nkInfix, children: 196 | @[Node(kind: nkIdent, ident: $1), f1, f2]) 197 | 198 | 199 | proc compile(source:string) = 200 | var ps: ParseStack 201 | echo "---------------" 202 | echo source 203 | if p.match(source, ps).ok: 204 | echo "---------------" 205 | let n = Node(kind: nkBlock, children: ps) 206 | echo n 207 | 208 | when false: 209 | compile """ 210 | if a > 3 { 211 | var w = 42 212 | } 213 | """ 214 | 215 | when false: 216 | compile(""" 217 | var 218 | a = 2 + 2, 219 | b = 2 + a 220 | """) 221 | 222 | when true: 223 | compile(""" 224 | { var a = 10 225 | { var a = a } } 226 | { var a = 12 227 | a = a + 3 } 228 | """) 229 | 230 | when false: 231 | compile(""" 232 | let x = true 233 | if x { 234 | var x = 2 235 | } 236 | """) 237 | 238 | when false: 239 | compile(""" 240 | let x = true 241 | if x { 242 | var x = 2 243 | } elif false { 244 | var y = 3 245 | } elif false { 246 | var z = 4 247 | } else { 248 | var w = 5 249 | } 250 | """) 251 | 252 | when false: 253 | compile(""" 254 | let x = if true { 2 } 255 | else { 4 } 256 | """) 257 | 258 | when false: 259 | compile(""" 260 | let x = true 261 | while x { 262 | let y = 1 263 | } 264 | """) 265 | 266 | when false: 267 | compile(""" 268 | while true { 269 | let y = 1 270 | } 271 | """) 272 | 273 | when false: 274 | compile(""" 275 | while false { 276 | let y = 1 277 | } 278 | """) 279 | 280 | when false: 281 | compile(""" 282 | var 283 | x = 0, 284 | stop = false 285 | while x { 286 | } 287 | """) 288 | -------------------------------------------------------------------------------- /npeg.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "1.3.0" 4 | author = "Ico Doornekamp" 5 | description = "a PEG library" 6 | license = "MIT" 7 | srcDir = "src" 8 | installExt = @["nim"] 9 | 10 | # Dependencies 11 | 12 | requires "nim >= 0.19.0" 13 | 14 | # Test 15 | 16 | task test, "Runs the test suite": 17 | exec "nimble testc && nimble testcpp && nimble testarc && nimble testjs" 18 | 19 | task testc, "C tests": 20 | exec "nim c -r tests/tests.nim" 21 | 22 | task testcpp, "CPP tests": 23 | exec "nim cpp -r tests/tests.nim" 24 | 25 | task testjs, "JS tests": 26 | exec "nim js -r tests/tests.nim" 27 | 28 | task testdanger, "Runs the test suite in danger mode": 29 | exec "nim c -d:danger -r tests/tests.nim" 30 | 31 | task testwin, "Mingw tests": 32 | exec "nim c -d:mingw tests/tests.nim && wine tests/tests.exe" 33 | 34 | task test32, "32 bit tests": 35 | exec "nim c --cpu:i386 --passC:-m32 --passL:-m32 tests/tests.nim && tests/tests" 36 | 37 | task testall, "Test all": 38 | exec "nimble test && nimble testcpp && nimble testdanger && nimble testjs && nimble testwin" 39 | 40 | when (NimMajor, NimMinor) >= (1, 1): 41 | task testarc, "--gc:arc tests": 42 | exec "nim c --gc:arc -r tests/tests.nim" 43 | else: 44 | task testarc, "--gc:arc tests": 45 | exec "true" 46 | 47 | task perf, "Test performance": 48 | exec "nim cpp -r -d:danger tests/performance.nim" 49 | -------------------------------------------------------------------------------- /src/npeg.nim: -------------------------------------------------------------------------------- 1 | 2 | # 3 | # Copyright (c) 2019 Ico Doornekamp 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | # This parser implementation is based on the following papers: 24 | # 25 | # - A Text Pattern-Matching Tool based on Parsing Expression Grammars 26 | # (Roberto Ierusalimschy) 27 | # 28 | # - An efficient parsing machine for PEGs 29 | # (Jos Craaijo) 30 | # 31 | 32 | ## Note: This document is rather terse, for the complete NPeg manual please refer 33 | ## to the README.md or the git project page at https://github.com/zevv/npeg 34 | ## 35 | ## NPeg is a pure Nim pattern matching library. It provides macros to compile 36 | ## patterns and grammars (PEGs) to Nim procedures which will parse a string and 37 | ## collect selected parts of the input. PEGs are not unlike regular 38 | ## expressions, but offer more power and flexibility, and have less ambiguities. 39 | ## 40 | ## Here is a simple example showing the power of NPeg: The macro `peg` compiles a 41 | ## grammar definition into a `parser` object, which is used to match a string and 42 | ## place the key-value pairs into the Nim table `words`: 43 | 44 | runnableExamples: 45 | 46 | import npeg, strutils, tables 47 | 48 | var words: Table[string, int] 49 | 50 | let parser = peg "pairs": 51 | pairs <- pair * *(',' * pair) * !1 52 | word <- +Alpha 53 | number <- +Digit 54 | pair <- >word * '=' * >number: 55 | words[$1] = parseInt($2) 56 | 57 | doAssert parser.match("one=1,two=2,three=3,four=4").ok 58 | 59 | 60 | import tables 61 | import macros 62 | import strutils 63 | import npeg/[common,codegen,capture,parsepatt,grammar,dot] 64 | 65 | export NPegException, 66 | NPegStackOverflowError, 67 | NPegUnknownBackrefError, 68 | NPegCaptureOutOfRangeError, 69 | NpegParseError, 70 | contains, `[]`, len 71 | 72 | # Create a parser for a PEG grammar 73 | 74 | proc pegAux(name: string, subjectType, userDataType, userDataId, n: NimNode): NimNode = 75 | var dot = newDot(name) 76 | var grammar = parseGrammar(n, dot) 77 | var program = grammar.link(name, dot) 78 | let code = program.genCode(subjectType, userDataType, userDataId) 79 | dot.dump() 80 | code 81 | 82 | macro peg*(name: untyped, n: untyped): untyped = 83 | ## Construct a parser from the given PEG grammar. `name` is the initial 84 | ## grammar rule where parsing starts. This macro returns a `Parser` type 85 | ## which can later be used for matching subjects with the `match()` proc 86 | pegAux name.strVal, ident "char", ident "bool", ident "userdata", n 87 | 88 | macro peg*(name: untyped, userData: untyped, n: untyped): untyped = 89 | ## Construct a parser from the given PEG grammar. `name` is the initial 90 | ## grammar rule where parsing starts. This macro returns a `Parser` type 91 | ## which can later be used for matching subjects with the `match()` proc 92 | ## 93 | ## The `userdata` argument is a colon expression with an identifier and a 94 | ## type, this identifier is available in code block captions during parsing. 95 | expectKind(userData, nnkExprColonExpr) 96 | pegAux name.strVal, ident "char", userData[1], userData[0], n 97 | 98 | macro peg*(name: untyped, subjectType, userData, n: untyped): untyped = 99 | ## Construct a parser from the given PEG grammar. `name` is the initial 100 | ## grammar rule where parsing starts. This macro returns a `Parser` type 101 | ## which can later be used for matching subjects with the `match()` proc 102 | ## 103 | ## The `subjectType` argument is a Nim type which should match the base 104 | ## type of the subject passed to `match()`. 105 | ## 106 | ## The `userdata` argument is a colon expression with an identifier and a 107 | ## type, this identifier is available in code block captions during parsing. 108 | expectKind(userData, nnkExprColonExpr) 109 | pegAux name.strVal, subjectType, userData[1], userData[0], n 110 | 111 | template patt*(n: untyped): untyped = 112 | ## Construct a parser from a single PEG rule. This is similar to the regular 113 | ## `peg()` macro, but useful for short regexp-like parsers that do not need a 114 | ## complete grammar. 115 | peg anonymous: 116 | anonymous <- n 117 | 118 | template patt*(n: untyped, code: untyped): untyped = 119 | ## Construct a parser from a single PEG rule. This is similar to the regular 120 | ## `peg()` macro, but useful for short regexp-like parsers that do not need a 121 | ## complete grammar. This variant takes a code block which will be used as 122 | ## code block capture for the anonymous rule. 123 | peg anonymous: 124 | anonymous <- n: 125 | code 126 | 127 | macro grammar*(libNameNode: untyped, n: untyped) = 128 | ## This macro defines a collection of rules to be stored in NPeg's global 129 | ## grammar library. 130 | let libName = libNameNode.strVal 131 | let grammar = parseGrammar(n, dumpRailroad = libName != "") 132 | libStore(libName, grammar) 133 | 134 | 135 | proc match*[S, T](p: Parser, s: openArray[S], userData: var T): MatchResult[S] = 136 | ## Match a subject string with the given generic parser. The returned 137 | ## `MatchResult` contains the result of the match and can be used to query 138 | ## any captures. 139 | var ms = p.fn_init() 140 | p.fn_run(ms, s, userData) 141 | 142 | 143 | proc match*[S](p: Parser, s: openArray[S]): MatchResult[S] = 144 | ## Match a subject string with the given parser. The returned `MatchResult` 145 | ## contains the result of the match and can be used to query any captures. 146 | var userData: bool # dummy if user does not provide a type 147 | p.match(s, userData) 148 | 149 | 150 | # Match a file 151 | 152 | when defined(windows) or defined(posix): 153 | import memfiles, os 154 | proc matchFile*[T](p: Parser, fname: string, userData: var T): MatchResult[char] = 155 | # memfiles.open() throws on empty files, work around that 156 | if os.getFileSize(fname) > 0: 157 | var m = memfiles.open(fname) 158 | var a: ptr UncheckedArray[char] = cast[ptr UncheckedArray[char]](m.mem) 159 | var ms = p.fn_init() 160 | result = p.fn_run(ms, toOpenArray(a, 0, m.size-1), userData) 161 | m.close() 162 | else: 163 | result = match(p, "", userData) 164 | 165 | proc matchFile*(p: Parser, fname: string): MatchResult[char] = 166 | var userData: bool # dummy if user does not provide a type 167 | matchFile(p, fname, userData) 168 | 169 | 170 | proc captures*(mr: MatchResult[char]): seq[string] = 171 | ## Return all plain string captures from the match result 172 | for cap in collectCaptures(mr.cs): 173 | result.add cap.s 174 | 175 | proc captures*[S](mr: MatchResult[S]): seq[S] = 176 | ## Return all plain string captures from the match result 177 | for cap in collectCaptures(mr.cs): 178 | result.add cap.s 179 | 180 | template nimBug22740*() = 181 | ## Provide stub templates as a workaround for https://github.com/nim-lang/Nim/issues/22740. 182 | ## Invoke this template in your code if you want to define a parser in a generic proc. 183 | template `>`(a: untyped): untyped = discard 184 | template `*`(a: untyped): untyped = discard 185 | template `-`(a: untyped): untyped = discard 186 | template `+`(a: untyped): untyped = discard 187 | template `?`(a: untyped): untyped = discard 188 | template `!`(a: untyped): untyped = discard 189 | template `$`(a: untyped): untyped = discard 190 | 191 | 192 | import npeg/lib/core 193 | 194 | -------------------------------------------------------------------------------- /src/npeg/capture.nim: -------------------------------------------------------------------------------- 1 | 2 | import strutils 3 | import sequtils 4 | import npeg/[stack,common] 5 | 6 | type 7 | 8 | Capture*[S] = object 9 | ck: CapKind 10 | si*: int 11 | name: string 12 | len: int 13 | when S is char: 14 | s*: string 15 | else: 16 | s*: S 17 | 18 | Captures*[S] = object 19 | capList*: seq[Capture[S]] 20 | 21 | FixMethod* = enum 22 | FixAll, FixOpen 23 | 24 | # Search the capStack for cftOpen matching the cftClose on top 25 | 26 | proc findTop[S](capStack: var Stack[CapFrame[S]], fm: FixMethod): int = 27 | if fm == FixOpen: 28 | var i = capStack.top - 1 29 | var depth = 0 30 | while true: 31 | if capStack[i].cft == cftClose: inc depth else: dec depth 32 | if depth == 0: break 33 | dec i 34 | result = i 35 | 36 | # Convert all closed CapFrames on the capture stack to a list of Captures, all 37 | # consumed frames are removed from the CapStack 38 | 39 | proc fixCaptures*[S](s: openArray[S], capStack: var Stack[CapFrame[S]], fm: FixMethod): Captures[S] = 40 | 41 | assert capStack.top > 0 42 | assert capStack.peek.cft == cftClose 43 | when npegDebug: echo $capStack 44 | 45 | # Convert the closed frames to a seq[Capture] 46 | 47 | var stack = initStack[int]("captures", 8) 48 | let iFrom = findTop(capStack, fm) 49 | 50 | for i in iFrom..= cs.capList.len: 86 | let msg = "Capture out of range, " & $i & " is not in [0.." & $cs.capList.high & "]" 87 | raise newException(NPegCaptureOutOfRangeError, msg) 88 | cs.capList[i] 89 | 90 | proc `[]`*[S](cs: Captures[S], i: int): Capture[S] = 91 | cs.getCapture(i) 92 | 93 | proc `[]`*[S](cs: Captures[S], i: BackwardsIndex): Capture[S] = 94 | cs.getCapture(cs.capList.len-i.int) 95 | 96 | proc `[]`*[S](cs: Captures[S], range: HSlice[system.int, system.int]): seq[Capture[S]] = 97 | for i in range: 98 | result.add cs.getCapture(i) 99 | 100 | iterator items*[S](captures: Captures[S]): Capture[S] = 101 | for c in captures.capList: 102 | yield c 103 | 104 | proc len*[S](captures: Captures[S]): int = 105 | captures.capList.len 106 | 107 | -------------------------------------------------------------------------------- /src/npeg/codegen.nim: -------------------------------------------------------------------------------- 1 | 2 | import macros except quote, stamp 3 | import strutils 4 | import tables 5 | import npeg/[common,patt,stack,capture] 6 | 7 | type 8 | 9 | RetFrame = int 10 | 11 | BackFrame = object 12 | ip*: int # Instruction pointer 13 | si*: int # Subject index 14 | rp*: int # Retstack top pointer 15 | cp*: int # Capstack top pointer 16 | pp*: int # PrecStack top pointer 17 | 18 | PrecFrame = int 19 | 20 | MatchResult*[S] = object 21 | ok*: bool 22 | matchLen*: int 23 | matchMax*: int 24 | cs*: Captures[S] 25 | 26 | MatchState*[S] = object 27 | ip*: int 28 | si*: int 29 | simax*: int 30 | refs*: Table[string, string] 31 | retStack*: Stack[RetFrame] 32 | capStack*: Stack[CapFrame[S]] 33 | backStack*: Stack[BackFrame] 34 | precStack*: Stack[PrecFrame] 35 | 36 | Parser*[S, T] = object 37 | fn_init*: proc(): MatchState[S] 38 | when npegGcsafe: 39 | fn_run*: proc(ms: var MatchState[S], s: openArray[S], u: var T): MatchResult[S] {.gcsafe.} 40 | else: 41 | fn_run*: proc(ms: var MatchState[S], s: openArray[S], u: var T): MatchResult[S] 42 | 43 | when declared(macros.stamp): # nimskull 44 | template quote(body: untyped): NimNode = 45 | macros.stamp(body) 46 | else: 47 | template quote(body: untyped): NimNode = 48 | macros.quote(body) 49 | 50 | # This macro translates `$1`.. into `capture[1].s`.. and `@1` into `capture[1].si` 51 | # for use in code block captures. The source nimnode lineinfo is recursively 52 | # copied to the newly genreated node to make sure "Capture out of range" 53 | # exceptions are properly traced. 54 | 55 | proc doSugar(n, captureId: NimNode): NimNode = 56 | proc cli(n2: NimNode) = 57 | n2.copyLineInfo(n) 58 | for nc in n2: cli(nc) 59 | let isIntPrefix = n.kind == nnkPrefix and n[0].kind == nnkIdent and n[1].kind == nnkIntLit 60 | if isIntPrefix and n[0].eqIdent("$"): 61 | result = newDotExpr(nnkBracketExpr.newTree(captureId, n[1]), ident("s")) 62 | cli result 63 | elif isIntPrefix and n[0].eqIdent("@"): 64 | result = newDotExpr(nnkBracketExpr.newTree(captureId, n[1]), ident("si")) 65 | cli result 66 | else: 67 | result = copyNimNode(n) 68 | for nc in n: 69 | result.add doSugar(nc, captureId) 70 | 71 | 72 | # Generate the parser main loop. The .computedGoto. pragma will generate code 73 | # using C computed gotos, which will get highly optmized, mostly eliminating 74 | # the inner parser loop. Nim limits computed goto to a maximum of 10_000 75 | # cases; if our program is this large, emit a warning and do not use a 76 | # computed goto 77 | 78 | proc genLoopCode(program: Program, casesCode: NimNode): NimNode= 79 | result = nnkWhileStmt.newTree(true.newLit, nnkStmtList.newTree()) 80 | if program.patt.len < 10_000: 81 | result[1].add nnkPragma.newTree("computedGoto".ident) 82 | else: 83 | warning "Grammar too large for computed goto, falling back to normal 'case'" 84 | result[1].add casesCode 85 | 86 | 87 | # Generate out all the case handlers for the parser program 88 | 89 | proc genCasesCode*(program: Program, sType, uType, uId: NimNode, ms, s, si, simax, ip: NimNode): NimNode = 90 | 91 | result = quote: 92 | case `ip` 93 | 94 | for ipNow, i in program.patt.pairs: 95 | 96 | let 97 | ipNext = ipNow + 1 98 | opName = newLit(repeat(" ", i.indent) & ($i.op).toLowerAscii[2..^1]) 99 | iname = newLit(i.name) 100 | ipFail = if i.failOffset == 0: 101 | program.patt.high 102 | else: 103 | ipNow + i.failOffset 104 | 105 | var call = case i.op: 106 | 107 | of opChr: 108 | let ch = newLit(i.ch) 109 | quote: 110 | trace `ms`, `iname`, `opName`, `s`, "\"" & escapeChar(`ch`) & "\"" 111 | if `si` < `s`.len and `s`[`si`] == `ch`.char: 112 | inc `si` 113 | `ip` = `ipNext` 114 | else: 115 | `ip` = `ipFail` 116 | 117 | of opLit: 118 | let lit = i.lit 119 | quote: 120 | trace `ms`, `iname`, `opName`, `s`, `lit`.repr 121 | if `si` < `s`.len and `s`[`si`] == `lit`: 122 | inc `si` 123 | `ip` = `ipNext` 124 | else: 125 | `ip` = `ipFail` 126 | 127 | of opSet: 128 | let cs = newLit(i.cs) 129 | quote: 130 | trace `ms`, `iname`, `opName`, `s`, dumpSet(`cs`) 131 | if `si` < `s`.len and `s`[`si`] in `cs`: 132 | inc `si` 133 | `ip` = `ipNext` 134 | else: 135 | `ip` = `ipFail` 136 | 137 | of opSpan: 138 | let cs = newLit(i.cs) 139 | quote: 140 | trace `ms`, `iname`, `opName`, `s`, dumpSet(`cs`) 141 | while `si` < `s`.len and `s`[`si`] in `cs`: 142 | inc `si` 143 | `ip` = `ipNext` 144 | 145 | of opChoice: 146 | let ip2 = newLit(ipNow + i.ipOffset) 147 | let siOffset = newLit(i.siOffset) 148 | quote: 149 | trace `ms`, `iname`, `opName`, `s`, $`ip2` 150 | push(`ms`.backStack, BackFrame(ip:`ip2`, si:`si`+`siOffset`, rp:`ms`.retStack.top, cp:`ms`.capStack.top, pp:`ms`.precStack.top)) 151 | `ip` = `ipNext` 152 | 153 | of opCommit: 154 | let ip2 = newLit(ipNow + i.ipOffset) 155 | quote: 156 | trace `ms`, `iname`, `opName`, `s`, $`ip2` 157 | discard pop(`ms`.backStack) 158 | `ip` = `ip2` 159 | 160 | of opCall: 161 | let label = newLit(i.callLabel) 162 | let ip2 = newLit(ipNow + i.callOffset) 163 | quote: 164 | trace `ms`, `iname`, `opName`, `s`, `label` & ":" & $`ip2` 165 | push(`ms`.retStack, `ipNext`) 166 | `ip` = `ip2` 167 | 168 | of opJump: 169 | let label = newLit(i.callLabel) 170 | let ip2 = newLit(ipNow + i.callOffset) 171 | quote: 172 | trace `ms`, `iname`, `opName`, `s`, `label` & ":" & $`ip2` 173 | `ip` = `ip2` 174 | 175 | of opCapOpen: 176 | let capKind = newLit(i.capKind) 177 | let capName = newLit(i.capName) 178 | let capSiOffset = newLit(i.capSiOffset) 179 | quote: 180 | trace `ms`, `iname`, `opName`, `s`, $`capKind` & " -> " & $`si` 181 | push(`ms`.capStack, CapFrame[`sType`](cft: cftOpen, si: `si`+`capSiOffset`, ck: `capKind`, name: `capName`)) 182 | `ip` = `ipNext` 183 | 184 | of opCapClose: 185 | let ck = newLit(i.capKind) 186 | 187 | case i.capKind: 188 | of ckCodeBlock: 189 | let captureId = ident "capture" 190 | let code = doSugar(i.capAction, captureId) 191 | quote: 192 | trace `ms`, `iname`, `opName`, `s`, "ckCodeBlock -> " & $`si` 193 | push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, si: `si`, ck: `ck`)) 194 | let capture = collectCaptures(fixCaptures[`sType`](`s`, `ms`.capStack, FixOpen)) 195 | proc fn(`captureId`: Captures[`sType`], `ms`: var MatchState[`sType`], `uId`: var `uType`): bool = 196 | result = true 197 | `code` 198 | if fn(capture, `ms`, `uId`): 199 | `ip` = `ipNext` 200 | else: 201 | `ip` = `ipFail` 202 | 203 | of ckRef: 204 | quote: 205 | trace `ms`, `iname`, `opName`, `s`, "ckRef -> " & $`si` 206 | push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, si: `si`, ck: `ck`)) 207 | let r = collectCapturesRef(fixCaptures[`sType`](`s`, `ms`.capStack, FixOpen)) 208 | `ms`.refs[r.key] = r.val 209 | `ip` = `ipNext` 210 | 211 | else: 212 | quote: 213 | trace `ms`, `iname`, `opName`, `s`, $`ck` & " -> " & $`si` 214 | push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, si: `si`, ck: `ck`)) 215 | `ip` = `ipNext` 216 | 217 | of opBackref: 218 | let refName = newLit(i.refName) 219 | quote: 220 | if `refName` in `ms`.refs: 221 | let s2 = `ms`.refs[`refName`] 222 | trace `ms`, `iname`, `opName`, `s`, `refName` & ":\"" & s2 & "\"" 223 | if subStrCmp(`s`, `s`.len, `si`, s2): 224 | inc `si`, s2.len 225 | `ip` = `ipNext` 226 | else: 227 | `ip` = `ipFail` 228 | else: 229 | raise newException(NPegUnknownBackrefError, "Unknown back reference '" & `refName` & "'") 230 | 231 | of opErr: 232 | let msg = newLit(i.msg) 233 | quote: 234 | trace `ms`, `iname`, `opName`, `s`, `msg` 235 | var e = newException(NPegParseError, `msg`) 236 | `simax` = max(`simax`, `si`) 237 | raise e 238 | 239 | of opReturn: 240 | quote: 241 | trace `ms`, `iname`, `opName`, `s` 242 | if `ms`.retStack.top > 0: 243 | `ip` = pop(`ms`.retStack) 244 | else: 245 | result.ok = true 246 | `simax` = max(`simax`, `si`) 247 | break 248 | 249 | of opAny: 250 | quote: 251 | trace `ms`, `iname`, `opName`, `s` 252 | if `si` < `s`.len: 253 | inc `si` 254 | `ip` = `ipNext` 255 | else: 256 | `ip` = `ipFail` 257 | 258 | of opNop: 259 | quote: 260 | trace `ms`, `iname`, `opName`, `s` 261 | `ip` = `ipNext` 262 | 263 | of opPrecPush: 264 | if i.prec == 0: 265 | quote: 266 | push(`ms`.precStack, 0) 267 | `ip` = `ipNext` 268 | else: 269 | let (iPrec, iAssoc) = (i.prec.newLit, i.assoc.newLit) 270 | let exp = if i.assoc == assocLeft: 271 | quote: peek(`ms`.precStack) < `iPrec` 272 | else: 273 | quote: peek(`ms`.precStack) <= `iPrec` 274 | quote: 275 | if `exp`: 276 | push(`ms`.precStack, `iPrec`) 277 | `ip` = `ipNext` 278 | else: 279 | `ip` = `ipFail` 280 | 281 | of opPrecPop: 282 | quote: 283 | discard `ms`.precStack.pop() 284 | `ip` = `ipNext` 285 | 286 | of opFail: 287 | quote: 288 | `simax` = max(`simax`, `si`) 289 | if `ms`.backStack.top > 0: 290 | trace `ms`, "", "opFail", `s`, "(backtrack)" 291 | let t = pop(`ms`.backStack) 292 | (`ip`, `si`, `ms`.retStack.top, `ms`.capStack.top, `ms`.precStack.top) = (t.ip, t.si, t.rp, t.cp, t.pp) 293 | else: 294 | trace `ms`, "", "opFail", `s`, "(error)" 295 | break 296 | 297 | # Recursively copy the line info from the original instruction NimNode into 298 | # the generated Nim code 299 | proc aux(n: NimNode) = 300 | n.copyLineInfo(i.nimNode) 301 | for nc in n: aux(nc) 302 | aux(call) 303 | 304 | result.add nnkOfBranch.newTree(newLit(ipNow), call) 305 | 306 | 307 | # Generate code for tracing the parser. An empty stub is generated if tracing 308 | # is disabled 309 | 310 | proc genTraceCode*(program: Program, sType, uType, uId, ms, s, si, simax, ip: NimNode): NimNode = 311 | 312 | when npegTrace: 313 | result = quote: 314 | proc doTrace[sType](`ms`: var MatchState, iname, opname: string, ip: int, s: openArray[sType], si: int, ms: var MatchState, msg: string) {.nimcall.} = 315 | echo align(if ip >= 0: $ip else: "", 3) & 316 | "|" & align($(peek(ms.precStack)), 3) & 317 | "|" & align($si, 3) & 318 | "|" & alignLeft(dumpSubject(s, si, 24), 24) & 319 | "|" & alignLeft(iname, 15) & 320 | "|" & alignLeft(opname & " " & msg, 40) & 321 | "|" & repeat("*", ms.backStack.top) 322 | 323 | template trace(`ms`: var MatchState, iname, opname: string, `s`: openArray[`sType`], msg = "") = 324 | doTrace(`ms`, iname, opname, `ip`, `s`, `si`, `ms`, msg) 325 | 326 | else: 327 | result = quote: 328 | template trace(`ms`: var MatchState, iname, opname: string, `s`: openArray[`sType`], msg = "") = 329 | discard 330 | 331 | 332 | # Augment exception stack traces with the NPeg return stack and re-raise 333 | 334 | proc genExceptionCode(ms, ip, si, simax, symTab: NimNode): NimNode = 335 | quote: 336 | 337 | # Helper proc to add a stack frame for the given ip 338 | var trace: seq[StackTraceEntry] 339 | let symTab = `symTab` 340 | proc aux(ip: int) = 341 | let sym = symTab[ip] 342 | trace.insert StackTraceEntry(procname: cstring(sym.repr), filename: cstring(sym.lineInfo.filename), line: sym.lineInfo.line) 343 | # On older Nim versions e.trace is not accessible, in this case just 344 | # dump the exception to stdout if npgStacktrace is enabled 345 | when npegStacktrace: 346 | echo $(sym.lineInfo) & ": " & sym.repr 347 | 348 | # Emit current IP and unwind all addresses from the return stack 349 | aux(`ip`) 350 | while `ms`.retStack.top > 0: 351 | aux(`ms`.retStack.pop()) 352 | 353 | let e = getCurrentException() 354 | 355 | when compiles(e.trace.pop()): 356 | # drop the generated parser fn() from the trace and replace by the NPeg frames 357 | discard e.trace.pop() 358 | e.trace.add trace 359 | 360 | # Re-reaise the exception with the augmented stack trace and match index filled in 361 | if e of NPegException: 362 | let eref = (ref NPegException)(e) 363 | eref.matchLen = `si` 364 | eref.matchMax = `simax` 365 | raise 366 | 367 | 368 | # Convert the list of parser instructions into a Nim finite state machine 369 | # 370 | # - sType is the base type of the subject; typically `char` but can be specified 371 | # to be another type by the user 372 | # - uType is the type of the userdata, if not used this defaults to `bool` 373 | # - uId is the identifier of the userdata, if not used this defaults to `userdata` 374 | 375 | proc genCode*(program: Program, sType, uType, uId: NimNode): NimNode = 376 | 377 | let 378 | count = program.patt.high 379 | suffix = "_NP" 380 | ms = ident "ms" & suffix 381 | s = ident "s" & suffix 382 | si = ident "si" & suffix 383 | ip = ident "ip" & suffix 384 | simax = ident "simax" & suffix 385 | 386 | casesCode = genCasesCode(program, sType, uType, uId, ms, s, si, simax, ip) 387 | loopCode = genLoopCode(program, casesCode) 388 | traceCode = genTraceCode(program, sType, uType, uId, ms, s, si, simax, ip) 389 | exceptionCode = genExceptionCode(ms, ip, si, simax, newLit(program.symTab)) 390 | 391 | result = quote: 392 | 393 | proc fn_init(): MatchState[`sType`] {.gensym.} = 394 | result = MatchState[`sType`]( 395 | retStack: initStack[RetFrame]("return", 8, npegRetStackSize), 396 | capStack: initStack[CapFrame[`sType`]]("capture", 8), 397 | backStack: initStack[BackFrame]("backtrace", 8, npegBackStackSize), 398 | precStack: initStack[PrecFrame]("precedence", 8, 16), 399 | ) 400 | push(result.precStack, 0) 401 | 402 | 403 | proc fn_run(`ms`: var MatchState[`sType`], `s`: openArray[`sType`], `uId`: var `uType`): MatchResult[`sType`] {.gensym.} = 404 | 405 | # Create local instances of performance-critical MatchState vars, this 406 | # saves a dereference on each access 407 | 408 | var 409 | `ip`: range[0..`count`] = `ms`.ip 410 | `si` = `ms`.si 411 | `simax` = `ms`.simax 412 | 413 | # These templates are available for code blocks 414 | 415 | template validate(o: bool) {.used.} = 416 | if not o: return false 417 | 418 | template fail() {.used.} = 419 | return false 420 | 421 | template push(`s`: string|`sType`) {.used.} = 422 | push(`ms`.capStack, CapFrame[`sType`](cft: cftOpen, ck: ckPushed)) 423 | push(`ms`.capStack, CapFrame[`sType`](cft: cftClose, ck: ckPushed, sPushed: `s`)) 424 | 425 | # Emit trace and loop code 426 | 427 | try: 428 | `traceCode` 429 | `loopCode` 430 | except CatchableError: 431 | `exceptionCode` 432 | 433 | # When the parsing machine is done, copy the local copies of the 434 | # matchstate back, close the capture stack and collect all the captures 435 | # in the match result 436 | 437 | `ms`.ip = `ip` 438 | `ms`.si = `si` 439 | `ms`.simax = `simax` 440 | result.matchLen = `ms`.si 441 | result.matchMax = `ms`.simax 442 | if result.ok and `ms`.capStack.top > 0: 443 | result.cs = fixCaptures(`s`, `ms`.capStack, FixAll) 444 | 445 | # This is the result of genCode: a Parser object with two function 446 | # pointers: fn_init: initializes a MatchState object for this parser 447 | # fn_run: performs the parsing of the subject on the given matchstate 448 | 449 | Parser[`sType`,`uType`](fn_init: fn_init, fn_run: fn_run) 450 | 451 | when npegGcsafe: 452 | result[0].addPragma(ident("gcsafe")) 453 | 454 | when npegExpand: 455 | echo repr result 456 | 457 | -------------------------------------------------------------------------------- /src/npeg/common.nim: -------------------------------------------------------------------------------- 1 | 2 | import strutils 3 | import tables 4 | import macros 5 | import bitops 6 | 7 | 8 | const 9 | 10 | # Some constants with "sane" defaults, configurable with compiler flags 11 | 12 | npegPattMaxLen* {.intdefine.} = 4096 13 | npegInlineMaxLen* {.intdefine.} = 30 14 | npegRetStackSize* {.intdefine.} = 1024 15 | npegBackStackSize* {.intdefine.} = 1024 16 | npegOptimize* {.intdefine.} = 255 17 | npegDebug* = defined(npegDebug) 18 | npegTrace* = defined(npegTrace) 19 | npegExpand* = defined(npegExpand) 20 | npegGraph* = defined(npegGraph) 21 | npegGcsafe* = defined(npegGcsafe) 22 | npegStacktrace* = defined(npegStacktrace) 23 | 24 | # Various optimizations. These can be disabled for testing purposes 25 | # or when suspecting bugs in the optimization stages 26 | 27 | npegOptSets* = npegOptimize.testBit(0) 28 | npegOptHeadFail* = npegOptimize.testBit(1) 29 | npegOptCapShift* = npegOptimize.testBit(2) 30 | npegOptChoiceCommit* = npegOptimize.testBit(3) 31 | 32 | type 33 | 34 | NPegException* = object of CatchableError 35 | matchLen*: int 36 | matchMax*: int 37 | 38 | NPegParseError* = object of NPegException 39 | NPegStackOverflowError* = object of NPegException 40 | NPegUnknownBackrefError* = object of NPegException 41 | NPegCaptureOutOfRangeError* = object of NPegException 42 | 43 | CapFrameType* = enum cftOpen, cftClose 44 | 45 | CapKind* = enum 46 | ckVal, # Value capture 47 | ckPushed, # Pushed capture 48 | ckCodeBlock, # Code block capture 49 | ckRef # Reference 50 | 51 | CapFrame*[S] = object 52 | cft*: CapFrameType # Capture frame type 53 | name*: string # Capture name 54 | si*: int # Subject index 55 | ck*: CapKind # Capture kind 56 | when S is char: 57 | sPushed*: string # Pushed capture, overrides subject slice 58 | else: 59 | sPushed*: S # Pushed capture, overrides subject slice 60 | 61 | Ref* = object 62 | key*: string 63 | val*: string 64 | 65 | Opcode* = enum 66 | opChr, # Matching: Character 67 | opLit, # Matching: Literal 68 | opSet, # Matching: Character set and/or range 69 | opAny, # Matching: Any character 70 | opNop, # Matching: Always matches, consumes nothing 71 | opSpan # Matching: Match a sequence of 0 or more character sets 72 | opChoice, # Flow control: stores current position 73 | opCommit, # Flow control: commit previous choice 74 | opCall, # Flow control: call another rule 75 | opJump, # Flow control: jump to target 76 | opReturn, # Flow control: return from earlier call 77 | opFail, # Fail: unwind stack until last frame 78 | opCapOpen, # Capture open 79 | opCapClose, # Capture close 80 | opBackref # Back reference 81 | opErr, # Error handler 82 | opPrecPush, # Precedence stack push 83 | opPrecPop, # Precedence stack pop 84 | 85 | CharSet* = set[char] 86 | 87 | Assoc* = enum assocLeft, assocRight 88 | 89 | Inst* = object 90 | case op*: Opcode 91 | of opChoice, opCommit: 92 | ipOffset*: int 93 | siOffset*: int 94 | of opChr: 95 | ch*: char 96 | of opLit: 97 | lit*: NimNode 98 | of opCall, opJump: 99 | callLabel*: string 100 | callOffset*: int 101 | of opSet, opSpan: 102 | cs*: CharSet 103 | of opCapOpen, opCapClose: 104 | capKind*: CapKind 105 | capAction*: NimNode 106 | capName*: string 107 | capSiOffset*: int 108 | of opErr: 109 | msg*: string 110 | of opFail, opReturn, opAny, opNop, opPrecPop: 111 | discard 112 | of opBackref: 113 | refName*: string 114 | of opPrecPush: 115 | prec*: int 116 | assoc*: Assoc 117 | failOffset*: int 118 | # Debug info 119 | name*: string 120 | nimNode*: NimNode 121 | indent*: int 122 | 123 | Patt* = seq[Inst] 124 | 125 | Symbol* = object 126 | ip*: int 127 | name*: string 128 | repr*: string 129 | lineInfo*: LineInfo 130 | 131 | SymTab* = object 132 | syms*: seq[Symbol] 133 | 134 | Rule* = object 135 | name*: string 136 | patt*: Patt 137 | repr*: string 138 | lineInfo*: LineInfo 139 | 140 | Program* = object 141 | patt*: Patt 142 | symTab*: SymTab 143 | 144 | Template* = ref object 145 | name*: string 146 | args*: seq[string] 147 | code*: NimNode 148 | 149 | Grammar* = ref object 150 | rules*: Table[string, Rule] 151 | templates*: Table[string, Template] 152 | 153 | # 154 | # SymTab implementation 155 | # 156 | 157 | proc add*(s: var SymTab, ip: int, name: string, repr: string = "", lineInfo: LineInfo = LineInfo()) = 158 | let symbol = Symbol(ip: ip, name: name, repr: repr, lineInfo: lineInfo) 159 | s.syms.add(symbol) 160 | 161 | proc `[]`*(s: SymTab, ip: int): Symbol = 162 | for sym in s.syms: 163 | if ip >= sym.ip: 164 | result = sym 165 | 166 | proc `[]`*(s: SymTab, name: string): Symbol = 167 | for sym in s.syms: 168 | if name == sym.name: 169 | return sym 170 | 171 | proc contains*(s: SymTab, ip: int): bool = 172 | for sym in s.syms: 173 | if ip == sym.ip: 174 | return true 175 | 176 | proc contains*(s: SymTab, name: string): bool = 177 | for sym in s.syms: 178 | if name == sym.name: 179 | return true 180 | 181 | # 182 | # Some glue to report parse errors without having to pass the original 183 | # NimNode all the way down the call stack 184 | # 185 | 186 | var gCurErrorNode {.compileTime} = newEmptyNode() 187 | 188 | proc setKrakNode*(n: NimNode) = 189 | gCurErrorNode.copyLineInfo(n) 190 | 191 | template krak*(n: NimNode, msg: string) = 192 | error "NPeg: error at '" & n.repr & "': " & msg & "\n", n 193 | 194 | template krak*(msg: string) = 195 | krak gCurErrorNode, msg 196 | 197 | 198 | # 199 | # Misc helper functions 200 | # 201 | 202 | proc subStrCmp*(s: openArray[char], slen: int, si: int, s2: string): bool = 203 | if si > slen - s2.len: 204 | return false 205 | for i in 0.. slen - s2.len: 213 | return false 214 | for i in 0.. len: 223 | result = result[0..len-1] & "..." 224 | 225 | # This macro flattens AST trees of `|` operators into a single call to 226 | # `choice()` with all arguments in one call. e.g, it will convert `A | B | C` 227 | # into `call(A, B, C)`. 228 | 229 | proc flattenChoice*(n: NimNode, nChoice: NimNode = nil): NimNode = 230 | proc addToChoice(n, nc: NimNode) = 231 | if n.kind == nnkInfix and n[0].eqIdent("|"): 232 | addToChoice(n[1], nc) 233 | addToChoice(n[2], nc) 234 | else: 235 | nc.add flattenChoice(n) 236 | if n.kind == nnkInfix and n[0].eqIdent("|"): 237 | result = nnkCall.newTree(ident "choice") 238 | addToChoice(n[1], result) 239 | addToChoice(n[2], result) 240 | else: 241 | result = copyNimNode(n) 242 | for nc in n: 243 | result.add flattenChoice(nc) 244 | 245 | 246 | # Create a short and friendly text representation of a character set. 247 | 248 | proc escapeChar*(c: char): string = 249 | const escapes = { '\n': "\\n", '\r': "\\r", '\t': "\\t" }.toTable() 250 | if c in escapes: 251 | result = escapes[c] 252 | elif c >= ' ' and c <= '~': 253 | result = $c 254 | else: 255 | result = "\\x" & toHex(c.int, 2).toLowerAscii 256 | 257 | proc dumpSet*(cs: CharSet): string = 258 | result.add "{" 259 | var c = 0 260 | while c <= 255: 261 | let first = c 262 | while c <= 255 and c.char in cs: 263 | inc c 264 | if (c - 1 == first): 265 | result.add "'" & escapeChar(first.char) & "'," 266 | elif c - 1 > first: 267 | result.add "'" & escapeChar(first.char) & "'..'" & escapeChar((c-1).char) & "'," 268 | inc c 269 | if result[result.len-1] == ',': result.setLen(result.len-1) 270 | result.add "}" 271 | 272 | # Create a friendly version of the given string, escaping not-printables 273 | # and no longer then `l` 274 | 275 | proc dumpSubject*[S](s: openArray[S], o:int=0, l:int=1024): string = 276 | var i = o 277 | while i < s.len: 278 | when S is char: 279 | let a = escapeChar s[i] 280 | else: 281 | mixin repr 282 | let a = s[i].repr 283 | if result.len >= l-a.len: 284 | return 285 | result.add a 286 | inc i 287 | 288 | 289 | proc `$`*(i: Inst, ip=0): string = 290 | var args: string 291 | case i.op: 292 | of opChr: 293 | args = " '" & escapeChar(i.ch) & "'" 294 | of opChoice, opCommit: 295 | args = " " & $(ip+i.ipOffset) 296 | of opCall, opJump: 297 | args = " " & $(ip+i.callOffset) 298 | of opCapOpen, opCapClose: 299 | args = " " & $i.capKind 300 | if i.capSiOffset != 0: 301 | args &= "(" & $i.capSiOffset & ")" 302 | of opBackref: 303 | args = " " & i.refName 304 | of opPrecPush: 305 | args = " @" & $i.prec 306 | else: 307 | discard 308 | if i.failOffset != 0: 309 | args.add " " & $(ip+i.failOffset) 310 | let tmp = if i.nimNode != nil: i.nimNode.repr.truncate(30) else: "" 311 | result.add alignLeft(i.name, 15) & 312 | alignLeft(repeat(" ", i.indent) & ($i.op).toLowerAscii[2..^1] & args, 25) & " " & tmp 313 | 314 | proc `$`*(program: Program): string = 315 | for ip, i in program.patt.pairs: 316 | if ip in program.symTab: 317 | result.add "\n" & program.symTab[ip].repr & "\n" 318 | result.add align($ip, 4) & ": " & `$`(i, ip) & "\n" 319 | 320 | 321 | proc slice*(s: openArray[char], iFrom, iTo: int): string = 322 | let len = iTo - iFrom 323 | result.setLen(len) 324 | for i in 0.. " & n2.escape & " [ color=" & colors[meth] & "];" 26 | d.edges[l] = true 27 | 28 | proc addPatt*(d: Dot, name: string, len: int) = 29 | if d != nil: 30 | var color = "black" 31 | if len > 10: color = "orange" 32 | if len > 100: color = "red" 33 | d.nodes.add " " & name.escape & 34 | " [ fillcolor=lightgrey color=" & color & " label=\"" & name & "/" & $len & "\"];" 35 | 36 | proc dump*(d: Dot) = 37 | const npegDotDir {.strdefine.}: string = "" 38 | when npegDotDir != "": 39 | let fname = npegDotDir & "/" & d.name & ".dot" 40 | echo "Dumping dot graph file to " & fname & "..." 41 | 42 | var o: string 43 | o.add "digraph dot {\n" 44 | o.add " graph [ center=true, margin=0.2, nodesep=0.1, ranksep=0.3 ];\n" 45 | o.add " node [ shape=box, style=\"rounded,filled\" width=0, height=0, fontname=Helvetica, fontsize=10];\n" 46 | o.add " edge [ fontname=Helvetica, fontsize=10];\n" 47 | for k, v in d.edges: 48 | o.add k & "\n" 49 | for n in d.nodes: 50 | o.add n & "\n" 51 | o.add "}\n" 52 | writeFile fname, o 53 | 54 | -------------------------------------------------------------------------------- /src/npeg/grammar.nim: -------------------------------------------------------------------------------- 1 | 2 | import tables 3 | import macros 4 | import strutils 5 | import npeg/[common,dot] 6 | 7 | # This is the global instance of pattern library. This is itself a grammar 8 | # where all patterns are stored with qualified names in the form of 9 | # .. At grammar link time all unresolved patterns are 10 | # looked up from this global table. 11 | 12 | var gPattLib {.compileTime.} = new Grammar 13 | 14 | 15 | 16 | # Store a grammar in the library. The rule names and all unqualified 17 | # identifiers in the grammar are expanded to qualified names in the form 18 | # . to make sure they are easily resolved when they are 19 | # later imported by other grammars. 20 | 21 | proc libStore*(libName: string, grammar: Grammar) = 22 | 23 | proc qualify(name: string): string = 24 | if libName.len > 0: libName & "." & name else: name 25 | 26 | for rulename, rule in grammar.rules: 27 | var rulename2 = qualify(rulename) 28 | var rule2 = Rule(name: rulename2) 29 | for i in rule.patt.items: 30 | var i2 = i 31 | if i2.op == opCall: 32 | if "." notin i2.callLabel: 33 | i2.callLabel = qualify(i2.callLabel) 34 | rule2.patt.add i2 35 | gPattLib.rules[rulename2] = rule2 36 | 37 | for tname, t in grammar.templates: 38 | gPattLib.templates[qualify(tname)] = t 39 | 40 | # 41 | # Add rule to a grammer 42 | # 43 | 44 | proc addRule*(grammar: Grammar, name: string, patt: Patt, repr: string = "", lineInfo: LineInfo = LineInfo()) = 45 | if name in grammar.rules: 46 | warning "Redefinition of rule '" & name & "'" 47 | var rule = Rule(name: name, patt: patt, repr: repr, lineInfo: lineInfo) 48 | for i in rule.patt.mitems: 49 | if i.name == "": 50 | i.name = name 51 | grammar.rules[name] = rule 52 | 53 | # Try to import the given rule from the pattern library into a grammar. Returns 54 | # true if import succeeded, false if not found. 55 | 56 | proc libImportRule*(name: string, grammar: Grammar): bool = 57 | if name in gPattLib.rules: 58 | grammar.addRule name, gPattLib.rules[name].patt 59 | when npegDebug: 60 | echo "importing ", name 61 | return true 62 | 63 | 64 | proc libImportTemplate*(name: string): Template = 65 | if name in gPattLib.templates: 66 | result = gPattLib.templates[name] 67 | 68 | 69 | # Shadow the given name in the grammar by creating an unique new name, 70 | # and moving the original rule 71 | 72 | proc shadow*(grammar: Grammar, name: string): string = 73 | var gShadowId {.global.} = 0 74 | inc gShadowId 75 | let name2 = name & "-" & $gShadowId 76 | when npegDebug: 77 | echo " shadow ", name, " -> ", name2 78 | grammar.rules[name2] = grammar.rules[name] 79 | grammar.rules.del name 80 | return name2 81 | 82 | 83 | # Link a list of patterns into a grammar, which is itself again a valid 84 | # pattern. Start with the initial rule, add all other non terminals and fixup 85 | # opCall addresses 86 | 87 | proc link*(grammar: Grammar, initial_name: string, dot: Dot = nil): Program = 88 | 89 | if initial_name notin grammar.rules: 90 | error "inital rule '" & initial_name & "' not found" 91 | 92 | var retPatt: Patt 93 | var symTab: SymTab 94 | var ruleRepr: Table[int, string] 95 | 96 | # Recursively emit a pattern and all patterns it calls which are 97 | # not yet emitted 98 | 99 | proc emit(name: string) = 100 | if npegDebug: 101 | echo "emit ", name 102 | let rule = grammar.rules[name] 103 | if rule.patt.len > 0: 104 | let ip = retPatt.len 105 | symTab.add(ip, name, rule.repr, rule.lineInfo) 106 | retPatt.add rule.patt 107 | retPatt.add Inst(op: opReturn, name: rule.patt[0].name) 108 | 109 | for i in rule.patt: 110 | if i.op == opCall and i.callLabel notin symTab: 111 | if i.callLabel notin grammar.rules and not libImportRule(i.callLabel, grammar): 112 | error "Npeg: rule \"" & name & "\" is referencing undefined rule \"" & i.callLabel & "\"" 113 | dot.add(name, i.callLabel, "call") 114 | emit i.callLabel 115 | 116 | emit initial_name 117 | 118 | # Fixup call addresses and do tail call optimization 119 | 120 | for ip, i in retPatt.mpairs: 121 | if i.op == opCall: 122 | i.callOffset = symTab[i.callLabel].ip - ip 123 | if i.op == opCall and retPatt[ip+1].op == opReturn: 124 | i.op = opJump 125 | 126 | # Choice/Commit pairs that touch because of head fail optimization can be 127 | # replaced by a jump and a nop 128 | 129 | when npegOptChoiceCommit: 130 | for i in 0..= T.low.BiggestInt and v <= T.high.BiggestInt 13 | 14 | grammar "types": 15 | 16 | bool <- "true" | "false" 17 | 18 | # Unsigned decimal 19 | 20 | uint <- +Digit 21 | uint8 <- >+uint: validate checkRange(uint8, parseInt, $1) 22 | uint16 <- >+uint: validate checkRange(uint16, parseInt, $1) 23 | uint32 <- >+uint: validate checkRange(uint32, parseInt, $1) 24 | 25 | # Signed decimal 26 | 27 | int <- ?'-' * uint 28 | int8 <- >int: validate checkRange(int8, parseInt, $1) 29 | int16 <- >int: validate checkRange(int16, parseInt, $1) 30 | int32 <- >int: validate checkRange(int32, parseInt, $1) 31 | int64 <- >int: validate checkRange(int64, parseInt, $1) 32 | 33 | # Hexadecimal 34 | 35 | hex <- '0' * {'x','X'} * +Digit 36 | hex8 <- >+uhex: validate checkRange(uint8, parseHexInt, $1) 37 | hex16 <- >+uhex: validate checkRange(uint16, parseHexInt, $1) 38 | hex32 <- >+uhex: validate checkRange(uint32, parseHexInt, $1) 39 | 40 | -------------------------------------------------------------------------------- /src/npeg/lib/uri.nim: -------------------------------------------------------------------------------- 1 | import npeg 2 | 3 | when defined(nimHasUsed): {.used.} 4 | 5 | # The grammar below is a literal translation of the ABNF notation of the 6 | # RFC. Optimizations can be made to limit backtracking, but this is a nice 7 | # example how to create a parser from a RFC protocol description. 8 | 9 | grammar "uri": 10 | 11 | URI <- scheme * ":" * hier_part * ?( "?" * query) * ?( "#" * fragment) * !1 12 | 13 | hier_part <- "//" * authority * path 14 | 15 | URI_reference <- uri | relative_ref 16 | 17 | absolute_uri <- scheme * ":" * hier_part * ?( "?" * query) 18 | 19 | relative_ref <- relative_part * ?( "?" * query) * ?( "#" * fragment) 20 | 21 | relative_part <- "//" * authority * path_abempty | 22 | path_absolute | 23 | path_noscheme | 24 | path_empty 25 | 26 | scheme <- (Alpha * *( Alpha | Digit | "+" | "-" | "." )) 27 | 28 | authority <- ?(userinfo * "@") * host * ?( ":" * port) 29 | userinfo <- *(unreserved | pct_encoded | sub_delims | ":") 30 | 31 | host <- (IP_literal | IPv4address | reg_name) 32 | port <- *Digit 33 | 34 | IP_literal <- "[" * (IPv6address | IPvFuture) * "]" 35 | 36 | IPvFuture <- "v" * +Xdigit * "." * +(unreserved | sub_delims | ":") 37 | 38 | IPv6address <- (h16 * ":")[6] * ls32 | 39 | "::" * (h16 * ":")[5] * ls32 | 40 | ?( h16 ) * "::" * (h16 * ":")[4] * ls32 | 41 | ?( h16 * (":" * h16)[0..1] ) * "::" * (h16 * ":")[3] * ls32 | 42 | ?( h16 * (":" * h16)[0..2] ) * "::" * (h16 * ":")[2] * ls32 | 43 | ?( h16 * (":" * h16)[0..3] ) * "::" * (h16 * ":") * ls32 | 44 | ?( h16 * (":" * h16)[0..4] ) * "::" * ls32 | 45 | ?( h16 * (":" * h16)[0..5] ) * "::" * h16 | 46 | ?( h16 * (":" * h16)[0..6] ) * "::" 47 | 48 | h16 <- Xdigit[1..4] 49 | ls32 <- (h16 * ":" * h16) | IPv4address 50 | IPv4address <- dec_octet * "." * dec_octet * "." * dec_octet * "." * dec_octet 51 | 52 | dec_octet <- Digit[1..3] 53 | 54 | reg_name <- *(unreserved | pct_encoded | sub_delims) 55 | 56 | path <- path_abempty | # begins with "/" or is empty 57 | path_absolute | # begins with "/" but not "//" 58 | path_noscheme | # begins with a non-colon segment 59 | path_rootless | # begins with a segment 60 | path_empty # zero characters 61 | 62 | path_abempty <- (*( "/" * segment )) 63 | path_absolute <- ("/" * ?( segment_nz * *( "/" * segment ) )) 64 | path_noscheme <- (segment_nz_nc * *( "/" * segment )) 65 | path_rootless <- (segment_nz * *( "/" * segment )) 66 | path_empty <- 0 67 | 68 | segment <- *pchar 69 | segment_nz <- +pchar 70 | segment_nz_nc <- +( unreserved | pct_encoded | sub_delims | "@" ) 71 | # non_zero_length segment without any colon ":" 72 | 73 | pchar <- unreserved | pct_encoded | sub_delims | ":" | "@" 74 | 75 | query <- *( pchar | "|" | "?" ) 76 | 77 | fragment <- *( pchar | "|" | "?" ) 78 | 79 | pct_encoded <- "%" * Xdigit * Xdigit 80 | 81 | unreserved <- Alpha | Digit | "-" | "." | "_" | "~" 82 | reserved <- gen_delims | sub_delims 83 | gen_delims <- ":" | "|" | "?" | "#" | "[" | "]" | "@" 84 | sub_delims <- "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=" 85 | 86 | -------------------------------------------------------------------------------- /src/npeg/lib/utf8.nim: -------------------------------------------------------------------------------- 1 | 2 | import npeg 3 | 4 | when defined(nimHasUsed): {.used.} 5 | 6 | grammar "utf8": 7 | 8 | cont <- {128..191} 9 | 10 | # Matches any utf-8 codepoint glyph 11 | 12 | any <- {0..127} | 13 | {194..223} * cont[1] | 14 | {224..239} * cont[2] | 15 | {240..244} * cont[3] 16 | 17 | bom <- "\xff\xfe" 18 | 19 | # Check for UTF-8 character classes. Depends on the tables from 20 | # the nim unicode module 21 | 22 | space <- >utf8.any: validate unicode.isSpace($1) 23 | lower <- >utf8.any: validate unicode.isLower(runeAt($1, 0)) 24 | upper <- >utf8.any: validate unicode.isUpper(runeAt($1, 0)) 25 | alpha <- >utf8.any: validate unicode.isAlpha(runeAt($1, 0)) 26 | title <- >utf8.any: validate unicode.isTitle(runeAt($1, 0)) 27 | -------------------------------------------------------------------------------- /src/npeg/parsepatt.nim: -------------------------------------------------------------------------------- 1 | 2 | import tables, macros, sequtils, strutils, algorithm 3 | import npeg/[common,patt,dot,grammar] 4 | 5 | when npegGraph: 6 | import npeg/[railroad] 7 | 8 | 9 | # Recursively compile a PEG rule to a Pattern 10 | 11 | proc parsePatt*(pattName: string, nn: NimNode, grammar: Grammar, dot: Dot = nil): Patt = 12 | 13 | when npegDebug: 14 | echo "parse ", pattName, " <- ", nn.repr 15 | 16 | proc aux(n: NimNode): Patt = 17 | 18 | setKrakNode(n) 19 | 20 | proc inlineOrCall(callName: string): Patt = 21 | 22 | # Try to import symbol early so we might be able to inline or shadow it 23 | if callName notin grammar.rules: 24 | discard libImportRule(callName, grammar) 25 | 26 | if pattName == callName: 27 | if pattName in grammar.rules: 28 | let nameShadowed = grammar.shadow(pattName) 29 | return newCallPatt(nameShadowed) 30 | 31 | if callName in grammar.rules and grammar.rules[callName].patt.len < npegInlineMaxLen: 32 | when npegDebug: 33 | echo " inline ", callName 34 | dot.add(pattName, callName, "inline") 35 | return grammar.rules[callName].patt 36 | 37 | else: 38 | when npegDebug: 39 | echo " call ", callName 40 | dot.add(pattName, callName, "call") 41 | return newCallPatt(callName) 42 | 43 | proc applyTemplate(tName: string, arg: NimNode): NimNode = 44 | let t = if tName in grammar.templates: 45 | grammar.templates[tName] 46 | else: 47 | libImportTemplate(tName) 48 | if t != nil: 49 | if arg.len-1 != t.args.len: 50 | krak arg, "Wrong number of arguments for template " & tName & "(" & $(t.args.join(",")) & ")" 51 | proc aux(n: NimNode): NimNode = 52 | if n.kind == nnkIdent and n.strVal in t.args: 53 | result = arg[ find(t.args, n.strVal)+1 ] 54 | else: 55 | result = copyNimNode(n) 56 | for nc in n: 57 | result.add aux(nc) 58 | result = aux(t.code).flattenChoice() 59 | when npegDebug: 60 | echo "template ", tName, " = \n in: ", n.repr, "\n out: ", result.repr 61 | 62 | case n.kind: 63 | 64 | of nnkPar: 65 | if n.len > 1: 66 | krak n, "syntax error. Did you mean '|'?" 67 | result = aux n[0] 68 | 69 | of nnkIntLit: 70 | result = newPatt(n.intVal) 71 | 72 | of nnkStrLit: 73 | result = newPatt(n.strVal) 74 | 75 | of nnkCharLit: 76 | result = newPatt($n.intVal.char) 77 | 78 | of nnkCall: 79 | var name: string 80 | if n[0].kind == nnkIdent: 81 | name = n[0].strVal 82 | elif n[0].kind == nnkDotExpr: 83 | name = n[0].repr 84 | else: 85 | krak n, "syntax error" 86 | let n2 = applyTemplate(name, n) 87 | if n2 != nil: 88 | result = aux n2 89 | elif name == "choice": 90 | result = choice(n[1..^1].map(aux)) 91 | elif n.len == 2: 92 | case name 93 | of "R": result = newBackrefPatt(n[1].strVal) 94 | elif n.len == 3: 95 | case name 96 | of "R": result = newPatt(aux n[2], ckRef, n[1].strVal) 97 | if result.len == 0: 98 | krak n, "Unknown template or capture '" & name & "'" 99 | 100 | of nnkPrefix: 101 | # Nim combines all prefix chars into one string. Handle prefixes 102 | # chars right to left 103 | var p = aux n[1] 104 | for c in n[0].strVal.reversed: 105 | case c: 106 | of '?': p = ?p 107 | of '+': p = +p 108 | of '*': p = *p 109 | of '!': p = !p 110 | of '&': p = &p 111 | of '>': p = >p 112 | of '@': p = @p 113 | else: krak n, "Unhandled prefix operator" 114 | result = p 115 | 116 | of nnkInfix: 117 | case n[0].strVal: 118 | of "*", "∙": result = aux(n[1]) * aux(n[2]) 119 | of "-": result = aux(n[1]) - aux(n[2]) 120 | of "^": result = newPattAssoc(aux(n[1]), intVal(n[2]), assocLeft) 121 | of "^^": result = newPattAssoc(aux(n[1]), intVal(n[2]), assocRight) 122 | else: krak n, "Unhandled infix operator" 123 | 124 | of nnkBracketExpr: 125 | let p = aux(n[0]) 126 | if n[1].kind == nnkIntLit: 127 | result = p{n[1].intVal} 128 | elif n[1].kind == nnkInfix and n[1][0].eqIdent(".."): 129 | result = p{n[1][1].intVal..n[1][2].intVal} 130 | else: krak n, "syntax error" 131 | 132 | of nnkIdent: 133 | result = inlineOrCall(n.strVal) 134 | 135 | of nnkDotExpr: 136 | result = inlineOrCall(n.repr) 137 | 138 | of nnkCurly: 139 | var cs: CharSet 140 | for nc in n: 141 | if nc.kind == nnkCharLit: 142 | cs.incl nc.intVal.char 143 | elif nc.kind == nnkInfix: 144 | if nc[0].kind == nnkIdent and nc[0].eqIdent(".."): 145 | for c in nc[1].intVal..nc[2].intVal: 146 | cs.incl c.char 147 | else: 148 | krak n, "syntax error" 149 | else: 150 | krak n, "syntax error" 151 | if cs.card == 0: 152 | result = newPatt(1) 153 | else: 154 | result = newPatt(cs) 155 | 156 | of nnkCallStrLit: 157 | case n[0].strVal: 158 | of "i": 159 | for c in n[1].strVal: 160 | result.add newPatt({c.toLowerAscii, c.toUpperAscii}) 161 | of "E": result = newErrorPatt(n[1].strVal) 162 | else: krak n, "unhandled string prefix" 163 | 164 | of nnkBracket: 165 | result.add newLitPatt n[0] 166 | 167 | else: 168 | echo n.astGenRepr 169 | krak n, "syntax error" 170 | 171 | for i in result.mitems: 172 | if i.nimNode == nil: 173 | i.nimNode = n 174 | 175 | result = aux(nn.flattenChoice()) 176 | dot.addPatt(pattName, result.len) 177 | 178 | 179 | # 180 | # Parse a grammar. A grammar consists of named rules, where each rule is one 181 | # pattern 182 | # 183 | 184 | proc parseGrammar*(ns: NimNode, dot: Dot=nil, dumpRailroad = true): Grammar = 185 | result = new Grammar 186 | 187 | for n in ns: 188 | 189 | if n.kind == nnkInfix and n[0].eqIdent("<-"): 190 | 191 | case n[1].kind 192 | of nnkIdent, nnkDotExpr, nnkPrefix: 193 | let name = if n[1].kind == nnkPrefix: 194 | when declared(expectIdent): 195 | expectIdent n[1][0], ">" 196 | n[1][1].repr 197 | else: n[1].repr 198 | var patt = parsePatt(name, n[2], result, dot) 199 | if n.len == 4: 200 | patt = newPatt(patt, ckCodeBlock) 201 | patt[patt.high].capAction = n[3] 202 | result.addRule(name, if n[1].kind == nnkPrefix: >patt else: patt, n.repr, n.lineInfoObj) 203 | 204 | when npegGraph: 205 | if dumpRailroad: 206 | echo parseRailroad(n[2], result).wrap(name) 207 | 208 | of nnkCall: 209 | if n.len > 3: 210 | error "Code blocks can not be used on templates", n[3] 211 | var t = Template(name: n[1][0].strVal, code: n[2]) 212 | for i in 1..= npegPattMaxLen: 31 | krak "NPeg: grammar too complex, (" & $p.len & " > " & $npegPattMaxLen & ").\n" & 32 | "If you think this is a mistake, increase the maximum size with -d:npegPattMaxLen=N" 33 | 34 | 35 | # Checks if the passed patt matches an empty subject. This is done by executing 36 | # the pattern as if it was passed an empty subject and see how it terminates. 37 | 38 | proc matchesEmpty(patt: Patt): bool = 39 | var backStack = initStack[int]("backtrack", 8, 32) 40 | var ip: int 41 | while ip < patt.len: 42 | let i = patt[ip] 43 | case i.op 44 | of opChoice: 45 | push(backStack, ip+i.ipOffset) 46 | inc ip 47 | of opCommit: 48 | discard pop(backStack) 49 | ip += i.ipOffset 50 | of opJump: ip += i.callOffset 51 | of opCapOpen, opCapClose, opNop, opSpan, opPrecPush, opPrecPop: inc ip 52 | of opErr, opReturn, opCall: return false 53 | of opAny, opChr, opLit, opSet, opBackref, opFail: 54 | if i.failOffset != 0: 55 | ip += i.failOffset 56 | elif backStack.top > 0: 57 | ip = pop(backStack) 58 | else: 59 | return false 60 | return true 61 | 62 | 63 | # Calculate how far captures or choices can be shifted into this pattern 64 | # without consequences; this allows the pattern to fail before pushing to the 65 | # backStack or capStack 66 | 67 | proc canShift(p: Patt, enable: static[bool]): (int, int) = 68 | let i = p[0] 69 | if i.failOffset == 0: 70 | case i.op 71 | of opChr, opAny, opSet: 72 | result = (1, 1) 73 | else: 74 | discard 75 | 76 | ### Atoms 77 | 78 | proc newPatt*(s: string): Patt = 79 | for ch in s: 80 | result.add Inst(op: opChr, ch: ch) 81 | 82 | proc newLitPatt*(n: NimNode): Patt = 83 | result.add Inst(op: opLit, lit: n) 84 | 85 | proc newPatt*(p: Patt, ck: CapKind, name = ""): Patt = 86 | let (siShift, ipShift) = p.canShift(npegOptCapShift) 87 | result.add p[0.. 0: 97 | for i in 1..n: 98 | result.add Inst(op: opAny) 99 | else: 100 | result.add Inst(op: opNop) 101 | 102 | proc newPatt*(cs: CharSet): Patt = 103 | result.add Inst(op: opSet, cs: cs) 104 | 105 | proc newBackrefPatt*(refName: string): Patt = 106 | result.add Inst(op: opBackref, refName: refName) 107 | 108 | proc newReturnPatt*(): Patt = 109 | result.add Inst(op: opReturn) 110 | 111 | proc newErrorPatt*(msg: string): Patt = 112 | result.add Inst(op: opErr, msg: msg) 113 | 114 | 115 | # Add a choice/commit pair around pattern P, try to optimize head 116 | # fails when possible 117 | 118 | proc addChoiceCommit(addTo: var Patt, p: Patt, choiceOffset, commitOffset: int) = 119 | let (siShift, ipShift) = p.canShift(npegOptHeadFail) 120 | for n in 0..`*(p: Patt): Patt = 147 | return newPatt(p, ckVal) 148 | 149 | proc `!`*(p: Patt): Patt = 150 | result.addChoiceCommit(p, p.len+3, 1) 151 | result.add Inst(op: opFail) 152 | 153 | proc `&`*(p: Patt): Patt = 154 | result.add !(!p) 155 | 156 | proc `@`*(p: Patt): Patt = 157 | result.addChoiceCommit(p, p.len+2, 3) 158 | result.add Inst(op: opAny) 159 | result.add Inst(op: opJump, callOffset: - p.len - 3) 160 | 161 | ### Infixes 162 | 163 | proc `*`*(p1, p2: Patt): Patt = 164 | result.add p1 165 | result.add p2 166 | result.checkSanity 167 | 168 | 169 | # choice() is generated from | operators by flattenChoice(). 170 | # 171 | # Optimizations done here: 172 | # - convert to union if all elements can be represented as a set 173 | # - head fails: when possible, opChoice is shifted into a pattern to 174 | # allow the pattern to fail before emitting the opChoice 175 | 176 | proc choice*(ps: openArray[Patt]): Patt = 177 | var csUnion: CharSet 178 | var allSets = true 179 | for p in ps: 180 | var cs: CharSet 181 | if p.toSet(cs): 182 | csUnion = csUnion + cs 183 | else: 184 | allSets = false 185 | if allSets: 186 | result.add Inst(op: opSet, cs: csUnion) 187 | return result 188 | 189 | var lenTot, ip: int 190 | lenTot = foldl(ps, a + b.len+2, 0) 191 | for i, p in ps: 192 | if i < ps.high: 193 | result.addChoiceCommit(p, p.len+2, lenTot-ip-p.len-3) 194 | ip += p.len + 2 195 | else: 196 | result.add p 197 | 198 | proc `-`*(p1, p2: Patt): Patt = 199 | var cs1, cs2: CharSet 200 | if p1.toSet(cs1) and p2.toSet(cs2): 201 | result.add Inst(op: opSet, cs: cs1 - cs2) 202 | else: 203 | result.add !p2 204 | result.add p1 205 | 206 | proc newPattAssoc*(p: Patt, prec: BiggestInt, assoc: Assoc): Patt = 207 | result.add Inst(op: opPrecPush, prec: prec.int, assoc: assoc) 208 | result.add p 209 | result.add Inst(op: opPrecPop) 210 | 211 | 212 | ### Others 213 | 214 | proc `{}`*(p: Patt, n: BiggestInt): Patt = 215 | for i in 1..n: 216 | result.add p 217 | 218 | proc `{}`*(p: Patt, range: HSlice[system.BiggestInt, system.BiggestInt]): Patt = 219 | result.add p{range.a} 220 | for i in range.a.. n.w: l.len-n.w else: 0, 0, 1) 132 | for i, c in l: 133 | result.poke fgCap, (result.w/%2 - l.len/%2 + i, -1, $c) 134 | 135 | proc `*`(n1, n2: Node): Node = 136 | result = Node(w: n1.w + n2.w + 1, y0: min(n1.y0, n2.y0), y1: max(n1.y1, n2.y1)) 137 | result.poke fgGreen, (n1.w, 0, "»") 138 | result.kids.add Kid(n: n1, dx: 0) 139 | result.kids.add Kid(n: n2, dx: n1.w+1) 140 | 141 | proc `?`(n: Node): Node = 142 | result = n.pad(1, 1, 1, 0) 143 | let (x1, x2, y1, y2) = (0, n.w+1, -1 + n.y0, 0) 144 | result.poke fgLine, (x1, y1, "╭"), (x1, y2, "┴"), (x2, y1, "╮"), (x2, y2, "┴") 145 | for x in x1+1..x2-1: 146 | result.poke fgLine, (x, y1, "─") 147 | for y in y1+1..y2-1: 148 | result.poke fgLine, (x1, y, "│"), (x2, y, "│") 149 | result.poke fgLine, ((x1+x2)/%2, y1, "»") 150 | 151 | proc `+`(n: Node): Node = 152 | result = n.pad(1, 1, 0, 1) 153 | let (x1, x2, y1, y2) = (0, n.w+1, 0, n.y1+1) 154 | result.poke fgLine, (x1, y1, "┬"), (x1, y2, "╰"), (x2, y1, "┬"), (x2, y2, "╯") 155 | for x in x1+1..x2-1: 156 | result.poke fgLine, (x, y2, "─") 157 | for y in y1+1..y2-1: 158 | result.poke fgLine, (x1, y, "│"), (x2, y, "│") 159 | result.poke fgLine, ((x1+x2)/%2, y2, "«") 160 | 161 | proc `!`(n: Node): Node = 162 | result = n.pad(0, 0, 1) 163 | let (x0, x1) = (1, result.w-2) 164 | for x in x0..x1: 165 | result.poke fgRed, (x, result.y0, "━") 166 | 167 | proc `-`*(p1, p2: Node): Node = 168 | return !p2 * p1 169 | 170 | proc `*`(n: Node): Node = ? + n 171 | 172 | proc `@`(n: Node): Node = 173 | result = *(!n * newNode("1")) * n 174 | 175 | proc `&`(n: Node): Node = 176 | result = ! ! n 177 | 178 | proc choice(ns: varArgs[Node]): Node = 179 | var wmax = 0 180 | for n in ns: 181 | wmax = max(wmax, n.w) 182 | var dys = @[0] 183 | var dy = 0 184 | for i in 0.. 0: 199 | result.poke fgLine, (x0, dys[i], "├"), (x1, dys[i], "┤") 200 | result.poke fgLine, (x0, dys[dys.high], "╰"), (x1, dys[dys.high], "╯") 201 | 202 | proc `{}`*(p: Node, n: BiggestInt): Node = 203 | result = p 204 | for i in 1..': p = newCapNode(p) 279 | else: p = p 280 | result = p 281 | 282 | of nnkInfix: 283 | case n[0].strVal: 284 | of "*", "∙": result = aux(n[1]) * aux(n[2]) 285 | of "-": result = aux(n[1]) - aux(n[2]) 286 | of "^": result = newPrecNode(aux(n[1]), intVal(n[2]), "<") 287 | of "^^": result = newPrecNode(aux(n[1]), intVal(n[2]), ">") 288 | else: discard 289 | 290 | of nnkBracketExpr: 291 | let p = aux(n[0]) 292 | if n[1].kind == nnkIntLit: 293 | result = p{n[1].intVal} 294 | elif n[1].kind == nnkInfix and n[1][0].eqIdent(".."): 295 | result = p{n[1][1].intVal..n[1][2].intVal} 296 | else: discard 297 | 298 | of nnkIdent: 299 | result = newNode("[" & n.strVal & "]", fgNonterm) 300 | 301 | of nnkDotExpr: 302 | result = newNode("[" & n.repr & "]", fgNonterm) 303 | 304 | of nnkCurly: 305 | var cs: CharSet 306 | for nc in n: 307 | if nc.kind == nnkCharLit: 308 | cs.incl nc.intVal.char 309 | elif nc.kind == nnkInfix: 310 | if nc[0].kind == nnkIdent and nc[0].eqIdent(".."): 311 | for c in nc[1].intVal..nc[2].intVal: 312 | cs.incl c.char 313 | if cs.card == 0: 314 | result = newNode("1", fgNonterm) 315 | else: 316 | result = newNode(dumpSet(cs), fgLit) 317 | 318 | of nnkCallStrLit: 319 | case n[0].strVal: 320 | of "i": result = newNode(n[1].strval) 321 | of "E": result = newNode("ERROR", fgError) 322 | 323 | of nnkBracket: 324 | result = newNode("[" & n[0].repr & "]", fgNonterm) 325 | 326 | else: 327 | discard 328 | 329 | let nnf = nn.flattenChoice 330 | result = aux(nnf) 331 | 332 | 333 | -------------------------------------------------------------------------------- /src/npeg/stack.nim: -------------------------------------------------------------------------------- 1 | 2 | # This module implements a basic stack[T]. This is used instead of seq[T] 3 | # because the latter has bad performance when unwinding more then one frame at 4 | # a time (ie, setlen). These stacks keep track of their own top and do not 5 | # shrink the underlying seq when popping or unwinding. 6 | 7 | type 8 | Stack*[T] = object 9 | name: string 10 | top*: int 11 | max: int 12 | frames: seq[T] 13 | 14 | 15 | proc `$`*[T](s: Stack[T]): string = 16 | for i in 0..= s.max: 26 | mixin NPegStackOverflowError 27 | raise newException(NPegStackOverflowError, s.name & " stack overflow, depth>" & $s.max) 28 | s.frames.setLen s.frames.len * 2 29 | 30 | template push*[T](s: var Stack[T], frame: T) = 31 | if s.top >= s.frames.len: grow(s) 32 | s.frames[s.top] = frame 33 | inc s.top 34 | 35 | template pop*[T](s: var Stack[T]): T = 36 | assert s.top > 0 37 | dec s.top 38 | s.frames[s.top] 39 | 40 | template peek*[T](s: Stack[T]): T = 41 | assert s.top > 0 42 | s.frames[s.top-1] 43 | 44 | template `[]`*[T](s: Stack[T], idx: int): T = 45 | assert idx < s.top 46 | s.frames[idx] 47 | 48 | template update*[T](s: Stack[T], field: untyped, val: untyped) = 49 | assert s.top > 0 50 | s.frames[s.top-1].field = val 51 | 52 | -------------------------------------------------------------------------------- /tests/basics.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import strutils 3 | import npeg 4 | 5 | {.push warning[Spacing]: off.} 6 | 7 | 8 | suite "unit tests": 9 | 10 | test "atoms": 11 | doAssert patt(0 * "a").match("a").ok 12 | doAssert patt(1).match("a").ok 13 | doAssert patt(1).match("a").ok 14 | doAssert patt(2).match("a").ok == false 15 | doAssert patt("a").match("a").ok 16 | doAssert patt("a").match("b").ok == false 17 | doAssert patt("abc").match("abc").ok 18 | doAssert patt({'a'}).match("a").ok 19 | doAssert patt({'a'}).match("b").ok == false 20 | doAssert patt({'a','b'}).match("a").ok 21 | doAssert patt({'a','b'}).match("b").ok 22 | doAssert patt({'a','b'}).match("c").ok == false 23 | doAssert patt({'a'..'c'}).match("a").ok 24 | doAssert patt({'a'..'c'}).match("b").ok 25 | doAssert patt({'a'..'c'}).match("c").ok 26 | doAssert patt({'a'..'c'}).match("d").ok == false 27 | doAssert patt({'a'..'c'}).match("a").ok 28 | doAssert patt("").match("abcde").matchLen == 0 29 | doAssert patt("a").match("abcde").matchLen == 1 30 | doAssert patt("ab").match("abcde").matchLen == 2 31 | doAssert patt(i"ab").match("AB").ok 32 | 33 | test "*: concatenation": 34 | doAssert patt("a" * "b").match("ab").ok 35 | #doAssert patt("a" ∙ "b").match("ab").ok 36 | 37 | test "?: zero or one": 38 | doAssert patt("a" * ?"b" * "c").match("abc").ok 39 | doAssert patt("a" * ?"b" * "c").match("ac").ok 40 | 41 | test "+: one or more": 42 | doAssert patt("a" * +"b" * "c").match("abc").ok 43 | doAssert patt("a" * +"b" * "c").match("abbc").ok 44 | doAssert patt("a" * +"b" * "c").match("ac").ok == false 45 | 46 | test "*: zero or more": 47 | doAssert patt(*'a').match("aaaa").ok 48 | doAssert patt(*'a' * 'b').match("aaaab").ok 49 | doAssert patt(*'a' * 'b').match("bbbbb").ok 50 | doAssert patt(*'a' * 'b').match("caaab").ok == false 51 | doAssert patt(+'a' * 'b').match("aaaab").ok 52 | doAssert patt(+'a' * 'b').match("ab").ok 53 | doAssert patt(+'a' * 'b').match("b").ok == false 54 | 55 | test "!: not predicate": 56 | doAssert patt('a' * !'b').match("ac").ok 57 | doAssert patt('a' * !'b').match("ab").ok == false 58 | 59 | test "&: and predicate": 60 | doAssert patt(&"abc").match("abc").ok 61 | doAssert patt(&"abc").match("abd").ok == false 62 | doAssert patt(&"abc").match("abc").matchLen == 0 63 | 64 | test "@: search": 65 | doAssert patt(@"fg").match("abcdefghijk").matchLen == 7 66 | 67 | test "[n]: count": 68 | doAssert patt(1[3]).match("aaaa").ok 69 | doAssert patt(1[4]).match("aaaa").ok 70 | doAssert patt(1[5]).match("aaaa").ok == false 71 | 72 | test "[m..n]: count": 73 | doAssert patt('a'[2..4] * !1).match("").ok == false 74 | doAssert patt('a'[2..4] * !1).match("a").ok == false 75 | doAssert patt('a'[2..4] * !1).match("aa").ok 76 | doAssert patt('a'[2..4] * !1).match("aaa").ok 77 | doAssert patt('a'[2..4] * !1).match("aaaa").ok 78 | doAssert patt('a'[2..4] * !1).match("aaaaa").ok == false 79 | 80 | doAssert patt('a'[0..1] * !1).match("").ok 81 | doAssert patt('a'[0..1] * !1).match("a").ok 82 | doAssert patt('a'[0..1] * !1).match("aa").ok == false 83 | 84 | test "|: ordered choice": 85 | doAssert patt("ab" | "cd").match("ab").ok 86 | doAssert patt("ab" | "cd").match("cd").ok 87 | doAssert patt("ab" | "cd").match("ef").ok == false 88 | doAssert patt(("ab" | "cd") | "ef").match("ab").ok == true 89 | doAssert patt(("ab" | "cd") | "ef").match("cd").ok == true 90 | doAssert patt(("ab" | "cd") | "ef").match("ef").ok == true 91 | doAssert patt("ab" | ("cd") | "ef").match("ab").ok == true 92 | doAssert patt("ab" | ("cd") | "ef").match("cd").ok == true 93 | doAssert patt("ab" | ("cd") | "ef").match("ef").ok == true 94 | 95 | test "-: difference": 96 | doAssert patt("abcd" - "abcdef").match("abcdefgh").ok == false 97 | doAssert patt("abcd" - "abcdf").match("abcdefgh").ok 98 | 99 | test "Builtins": 100 | doAssert patt(Digit).match("1").ok 101 | doAssert patt(Digit).match("a").ok == false 102 | doAssert patt(Upper).match("A").ok 103 | doAssert patt(Upper).match("a").ok == false 104 | doAssert patt(Lower).match("a").ok 105 | doAssert patt(Lower).match("A").ok == false 106 | doAssert patt(+Digit).match("12345").ok 107 | doAssert patt(+Xdigit).match("deadbeef").ok 108 | doAssert patt(+Graph).match(" x").ok == false 109 | 110 | test "Misc combos": 111 | doAssert patt('a' | ('b' * 'c')).match("a").ok 112 | doAssert patt('a' | ('b' * 'c') | ('d' * 'e' * 'f')).match("a").ok 113 | doAssert patt('a' | ('b' * 'c') | ('d' * 'e' * 'f')).match("bc").ok 114 | doAssert patt('a' | ('b' * 'c') | ('d' * 'e' * 'f')).match("def").ok 115 | 116 | test "Compile time 1": 117 | proc doTest(): string {.compileTime.} = 118 | var n: string 119 | let p = peg "number": 120 | number <- >+Digit: 121 | n = $1 122 | doAssert p.match("12345").ok 123 | return n 124 | const v = doTest() 125 | doAssert v == "12345" 126 | 127 | test "Compile time 2": 128 | static: 129 | var n: string 130 | let p = peg "number": 131 | number <- >+Digit: 132 | n = $1 133 | doAssert p.match("12345").ok 134 | doAssert n == "12345" 135 | 136 | test "matchMax": 137 | let s = peg "line": 138 | line <- one | two 139 | one <- +Digit * 'c' * 'd' * 'f' 140 | two <- +Digit * 'b' 141 | let r = s.match("1234cde") 142 | doAssert r.ok == false 143 | doAssert r.matchLen == 4 144 | doAssert r.matchMax == 6 145 | 146 | test "grammar1": 147 | let a = peg "r1": 148 | r1 <- "abc" 149 | r2 <- r1 * r1 150 | doAssert a.match("abcabc").ok 151 | 152 | test "grammar2": 153 | let a = peg "r1": 154 | r2 <- r1 * r1 155 | r1 <- "abc" 156 | doAssert a.match("abcabc").ok 157 | 158 | test "backref": 159 | doAssert patt(R("sep", Alpha) * *(1 - R("sep")) * R("sep") * !1).match("abbbba").ok 160 | doAssert patt(R("sep", Alpha) * *(1 - R("sep")) * R("sep") * !1).match("abbbbc").ok == false 161 | 162 | test "raise exception 1": 163 | let a = patt E"boom" 164 | expect NPegParseError: 165 | doAssert a.match("abcabc").ok 166 | 167 | test "raise exception 2": 168 | let a = patt 4 * E"boom" 169 | try: 170 | doAssert a.match("abcabc").ok 171 | except NPegParseError as e: 172 | doAssert e.matchLen == 4 173 | doAssert e.matchMax == 4 174 | 175 | test "out of range capture exception 1": 176 | expect NPegCaptureOutOfRangeError: 177 | let a = patt 1: 178 | echo capture[10].s 179 | doAssert a.match("c").ok 180 | 181 | test "out of range capture exception 2": 182 | expect NPegCaptureOutOfRangeError: 183 | let a = patt 1: 184 | echo $9 185 | doAssert a.match("c").ok 186 | 187 | test "unknown backref error": 188 | expect NPegUnknownBackrefError: 189 | discard patt(R("sep", Alpha) * *(1 - R("sep")) * R("sap") * !1).match("abbbba") 190 | 191 | test "user validation": 192 | let p = peg "line": 193 | line <- uint8 * "," * uint8 * !1 194 | uint8 <- >+Digit: 195 | let v = parseInt($1) 196 | validate(v>=0 and v<=255) 197 | doAssert p.match("10,10").ok 198 | doAssert p.match("0,255").ok 199 | doAssert not p.match("10,300").ok 200 | doAssert not p.match("300,10").ok 201 | 202 | test "user fail": 203 | let p = peg "line": 204 | line <- 1: 205 | fail() 206 | doAssert not p.match("a").ok 207 | 208 | test "templates": 209 | let p = peg "a": 210 | list(patt, sep) <- patt * *(sep * patt) 211 | commaList(patt) <- list(patt, ",") 212 | a <- commaList(>+Digit) 213 | doAssert p.match("11,22,3").captures == ["11","22","3"] 214 | 215 | test "templates with choices": 216 | let p = peg aap: 217 | one() <- "one" 218 | two() <- "one" 219 | three() <- "flip" | "flap" 220 | aap <- one() | two() | three() 221 | doAssert p.match("onetwoflip").ok 222 | 223 | -------------------------------------------------------------------------------- /tests/captures.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import npeg 3 | import strutils 4 | import json 5 | 6 | {.push warning[Spacing]: off.} 7 | 8 | 9 | suite "captures": 10 | 11 | test "no captures": 12 | doAssert patt(1).match("a").captures == @[] 13 | 14 | test "string captures": 15 | doAssert patt(>1).match("ab").captures == @["a"] 16 | doAssert patt(>(>1)).match("ab").captures == @["a", "a"] 17 | doAssert patt(>1 * >1).match("ab").captures == @["a", "b"] 18 | doAssert patt(>(>1 * >1)).match("ab").captures == @["ab", "a", "b"] 19 | doAssert patt(>(>1 * >1)).match("ab").captures == @["ab", "a", "b"] 20 | 21 | test "code block captures": 22 | let p = peg "foo": 23 | foo <- >1: 24 | doAssert $1 == "a" 25 | doAssert @1 == 0 26 | doAssert p.match("a").ok 27 | 28 | test "code block captures 2": 29 | let p = peg("foo", v: string): 30 | foo <- >1: v = $1 31 | var a: string 32 | doAssert p.match("a", a).ok 33 | doAssert a == "a" 34 | 35 | test "code block captures 3": 36 | var a: string 37 | let p = patt >1: 38 | a = $1 39 | doAssert p.match("a").ok 40 | doAssert a == "a" 41 | 42 | test "code block captures 4": 43 | let p = peg "foo": 44 | foo <- +Digit * >1: 45 | doAssert $1 == "a" 46 | doAssert @1 == 4 47 | doAssert p.match("1234a").ok 48 | 49 | test "code block captures with typed parser": 50 | 51 | type Thing = object 52 | word: string 53 | number: int 54 | 55 | let s = peg("foo", t: Thing): 56 | foo <- word * number 57 | word <- >+Alpha: 58 | t.word = $1 59 | number <- >+Digit: 60 | t.number = parseInt($1) 61 | 62 | var t = Thing() 63 | doAssert s.match("foo123", t).ok == true 64 | doAssert t.word == "foo" 65 | doAssert t.number == 123 66 | 67 | when not defined(gcDestructors): 68 | test "Capture out of range": 69 | expect NPegException: 70 | let p = peg "l": 71 | l <- 1: echo $1 72 | discard p.match("a") 73 | 74 | test "push": 75 | let p = peg "m": 76 | m <- >n * '+' * >n: 77 | push $(parseInt($1) + parseInt($2)) 78 | n <- +Digit 79 | let r = p.match("12+34") 80 | doAssert r.captures()[0] == "46" 81 | 82 | test "nested": 83 | doAssert patt(>(>1 * >1)).match("ab").captures == @["ab", "a", "b"] 84 | 85 | test "nested codeblock": 86 | let p = peg foo: 87 | foo <- >(>1 * b) 88 | b <- >1: push $1 89 | doAssert p.match("ab").captures() == @["ab", "a", "b"] 90 | 91 | test "clyybber": 92 | let p = peg "m": 93 | m <- n * '+' * n: 94 | push $(parseInt($1) + parseInt($2)) 95 | >n <- +Digit 96 | let r = p.match("12+34") 97 | doAssert r.captures()[0] == "46" 98 | -------------------------------------------------------------------------------- /tests/config.nims: -------------------------------------------------------------------------------- 1 | switch("path", "$projectDir/../src") 2 | switch("hints", "off") 3 | -------------------------------------------------------------------------------- /tests/examples.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import npeg 3 | import json 4 | import strutils 5 | import math 6 | import tables 7 | import npeg/lib/uri 8 | 9 | {.push warning[Spacing]: off.} 10 | 11 | 12 | suite "examples": 13 | 14 | ###################################################################### 15 | 16 | test "misc": 17 | 18 | let p1 = patt +{'a'..'z'} 19 | doAssert p1.match("lowercaseword").ok 20 | 21 | let p2 = peg "ident": 22 | lower <- {'a'..'z'} 23 | ident <- +lower 24 | doAssert p2.match("lowercaseword").ok 25 | 26 | ###################################################################### 27 | 28 | test "shadowing": 29 | 30 | let parser = peg "line": 31 | line <- uri.URI 32 | uri.scheme <- >uri.scheme 33 | uri.host <- >uri.host 34 | uri.port <- >+Digit 35 | uri.path <- >uri.path 36 | 37 | let r = parser.match("http://nim-lang.org:8080/one/two/three") 38 | doAssert r.captures == @["http", "nim-lang.org", "8080", "/one/two/three"] 39 | 40 | ###################################################################### 41 | 42 | test "matchFile": 43 | 44 | when defined(windows) or defined(posix): 45 | 46 | let parser = peg "pairs": 47 | pairs <- pair * *(',' * pair) 48 | word <- +Alnum 49 | number <- +Digit 50 | pair <- (>word * '=' * >number) 51 | 52 | let r = parser.matchFile "tests/testdata" 53 | doAssert r.ok 54 | doAssert r.captures == @["one", "1", "two", "2", "three", "3", "four", "4"] 55 | 56 | ###################################################################### 57 | 58 | test "JSON parser": 59 | 60 | let json = """ 61 | { 62 | "glossary": { 63 | "title": "example glossary", 64 | "GlossDiv": { 65 | "title": "S", 66 | "GlossList": { 67 | "GlossEntry": { 68 | "ID": "SGML", 69 | "SortAs": "SGML", 70 | "GlossTerm": "Standard Generalized Markup Language", 71 | "Acronym": "SGML", 72 | "Abbrev": "ISO 8879:1986", 73 | "GlossDef": { 74 | "para": "A meta-markup language, used to create markup languages such as DocBook.", 75 | "GlossSeeAlso": ["GML", "XML"] 76 | }, 77 | "GlossSee": "markup" 78 | } 79 | } 80 | } 81 | } 82 | } 83 | """ 84 | 85 | let s = peg "doc": 86 | S <- *Space 87 | jtrue <- "true" 88 | jfalse <- "false" 89 | jnull <- "null" 90 | 91 | unicodeEscape <- 'u' * Xdigit[4] 92 | escape <- '\\' * ({ '{', '"', '|', '\\', 'b', 'f', 'n', 'r', 't' } | unicodeEscape) 93 | stringBody <- ?escape * *( +( {'\x20'..'\xff'} - {'"'} - {'\\'}) * *escape) 94 | jstring <- ?S * '"' * stringBody * '"' * ?S 95 | 96 | minus <- '-' 97 | intPart <- '0' | (Digit-'0') * *Digit 98 | fractPart <- "." * +Digit 99 | expPart <- ( 'e' | 'E' ) * ?( '+' | '-' ) * +Digit 100 | jnumber <- ?minus * intPart * ?fractPart * ?expPart 101 | 102 | doc <- JSON * !1 103 | JSON <- ?S * ( jnumber | jobject | jarray | jstring | jtrue | jfalse | jnull ) * ?S 104 | jobject <- '{' * ( jstring * ":" * JSON * *( "," * jstring * ":" * JSON ) | ?S ) * "}" 105 | jarray <- "[" * ( JSON * *( "," * JSON ) | ?S ) * "]" 106 | 107 | doAssert s.match(json).ok 108 | 109 | ###################################################################### 110 | 111 | test "HTTP with action captures to Nim object": 112 | 113 | type 114 | Request = object 115 | proto: string 116 | version: string 117 | code: int 118 | message: string 119 | headers: Table[string, string] 120 | 121 | let s = peg("http", userdata: Request): 122 | space <- ' ' 123 | crlf <- '\n' * ?'\r' 124 | url <- +(Alpha | Digit | '/' | '_' | '.') 125 | eof <- !1 126 | header_name <- +(Alpha | '-') 127 | header_val <- +(1-{'\n'}-{'\r'}) 128 | proto <- >(+Alpha): 129 | userdata.proto = $1 130 | version <- >(+Digit * '.' * +Digit): 131 | userdata.version = $1 132 | code <- >+Digit: 133 | userdata.code = parseInt($1) 134 | msg <- >(+(1 - '\r' - '\n')): 135 | userdata.message = $1 136 | header <- >header_name * ": " * >header_val: 137 | userdata.headers[$1] = $2 138 | 139 | response <- proto * '/' * version * space * code * space * msg 140 | headers <- *(header * crlf) 141 | http <- response * crlf * headers * eof 142 | 143 | let data = """ 144 | HTTP/1.1 301 Moved Permanently 145 | Content-Length: 162 146 | Content-Type: text/html 147 | Location: https://nim.org/ 148 | """ 149 | 150 | var req: Request 151 | let res = s.match(data, req) 152 | doAssert res.ok 153 | doAssert req.proto == "HTTP" 154 | doAssert req.version == "1.1" 155 | doAssert req.code == 301 156 | doAssert req.message == "Moved Permanently" 157 | doAssert req.headers["Content-Length"] == "162" 158 | doAssert req.headers["Content-Type"] == "text/html" 159 | doAssert req.headers["Location"] == "https://nim.org/" 160 | 161 | ###################################################################### 162 | 163 | test "UTF-8": 164 | 165 | let b = " añyóng ♜♞♝♛♚♝♞♜ оживлённым " 166 | 167 | let m = peg "s": 168 | 169 | cont <- {128..191} 170 | 171 | utf8 <- {0..127} | 172 | {194..223} * cont[1] | 173 | {224..239} * cont[2] | 174 | {240..244} * cont[3] 175 | 176 | s <- *(@ > +(utf8-' ')) 177 | 178 | let r = m.match(b) 179 | doAssert r.ok 180 | let c = r.captures 181 | doAssert c == @["añyóng", "♜♞♝♛♚♝♞♜", "оживлённым"] 182 | 183 | ###################################################################### 184 | 185 | test "Back references": 186 | 187 | let p = peg "doc": 188 | S <- *Space 189 | doc <- +word * "<<" * R("sep", sep) * S * >heredoc * R("sep") * S * +word 190 | word <- +Alpha * S 191 | sep <- +Alpha 192 | heredoc <- +(1 - R("sep")) 193 | 194 | let d = """This is a <(Alpha * *( Alpha | Digit | "+" | "-" | "." )): userdata.scheme = $1 239 | 240 | authority <- ?(userinfo * "@") * host * ?( ":" * port) 241 | userinfo <- >*(unreserved | pct_encoded | sub_delims | ":"): 242 | userdata.userinfo = $1 243 | 244 | host <- >(IP_literal | IPv4address | reg_name): userdata.host = $1 245 | port <- >*Digit: userdata.port = $1 246 | 247 | IP_literal <- "[" * (IPv6address | IPvFuture) * "]" 248 | 249 | IPvFuture <- "v" * +Xdigit * "." * +(unreserved | sub_delims | ":") 250 | 251 | IPv6address <- (h16 * ":")[6] * ls32 | 252 | "::" * (h16 * ":")[5] * ls32 | 253 | ?( h16 ) * "::" * (h16 * ":")[4] * ls32 | 254 | ?( h16 * (":" * h16)[0..1] ) * "::" * (h16 * ":")[3] * ls32 | 255 | ?( h16 * (":" * h16)[0..2] ) * "::" * (h16 * ":")[2] * ls32 | 256 | ?( h16 * (":" * h16)[0..3] ) * "::" * (h16 * ":") * ls32 | 257 | ?( h16 * (":" * h16)[0..4] ) * "::" * ls32 | 258 | ?( h16 * (":" * h16)[0..5] ) * "::" * h16 | 259 | ?( h16 * (":" * h16)[0..6] ) * "::" 260 | 261 | h16 <- Xdigit[1..4] 262 | ls32 <- (h16 * ":" * h16) | IPv4address 263 | IPv4address <- dec_octet * "." * dec_octet * "." * dec_octet * "." * dec_octet 264 | 265 | dec_octet <- Digit | # 0-9 266 | {'1'..'9'} * Digit | # 10-99 267 | "1" * Digit * Digit | # 100-199 268 | "2" * {'0'..'4'} * Digit | # 200-249 269 | "25" * {'0'..'5'} # 250-255 270 | 271 | reg_name <- *(unreserved | pct_encoded | sub_delims) 272 | 273 | path <- path_abempty | # begins with "/" or is empty 274 | path_absolute | # begins with "/" but not "//" 275 | path_noscheme | # begins with a non-colon segment 276 | path_rootless | # begins with a segment 277 | path_empty # zero characters 278 | 279 | path_abempty <- >(*( "/" * segment )): userdata.path = $1 280 | path_absolute <- >("/" * ?( segment_nz * *( "/" * segment ) )): userdata.path = $1 281 | path_noscheme <- >(segment_nz_nc * *( "/" * segment )): userdata.path = $1 282 | path_rootless <- >(segment_nz * *( "/" * segment )): userdata.path = $1 283 | path_empty <- 0 284 | 285 | segment <- *pchar 286 | segment_nz <- +pchar 287 | segment_nz_nc <- +( unreserved | pct_encoded | sub_delims | "@" ) 288 | # non_zero_length segment without any colon ":" 289 | 290 | pchar <- unreserved | pct_encoded | sub_delims | ":" | "@" 291 | 292 | query <- >*( pchar | "|" | "?" ): userdata.query = $1 293 | 294 | fragment <- >*( pchar | "|" | "?" ): userdata.fragment = $1 295 | 296 | pct_encoded <- "%" * Xdigit * Xdigit 297 | 298 | unreserved <- Alpha | Digit | "-" | "." | "_" | "~" 299 | reserved <- gen_delims | sub_delims 300 | gen_delims <- ":" | "|" | "?" | "#" | "[" | "]" | "@" 301 | sub_delims <- "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=" 302 | 303 | let urls = @[ 304 | "s3://somebucket/somefile.txt", 305 | "scheme://user:pass@xn--mgbh0fb.xn--kgbechtv", 306 | "scheme://user:pass@host:81/path?query#fragment", 307 | "ScheMe://user:pass@HoSt:81/path?query#fragment", 308 | "scheme://HoSt:81/path?query#fragment", 309 | "scheme://@HoSt:81/path?query#fragment", 310 | "scheme://user:pass@host/path?query#fragment", 311 | "scheme://user:pass@host:/path?query#fragment", 312 | "scheme://host/path?query#fragment", 313 | "scheme://10.0.0.2/p?q#f", 314 | "scheme://[vAF.1::2::3]/p?q#f", 315 | "scheme:path?query#fragment", 316 | "scheme:///path?query#fragment", 317 | "scheme://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]?query#fragment", 318 | "scheme:path#fragment", 319 | "scheme:path?#fragment", 320 | "ldap://[2001:db8::7]/c=GB?objectClass?one", 321 | "http://example.org/hello:12?foo=bar#test", 322 | "android-app://org.wikipedia/http/en.m.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy", 323 | "ftp://:/p?q#f", 324 | "scheme://user:pass@host:000000000081/path?query#fragment", 325 | "scheme://user:pass@host:81/path?query#fragment", 326 | "ScheMe://user:pass@HoSt:81/path?query#fragment", 327 | "scheme://HoSt:81/path?query#fragment", 328 | "scheme://@HoSt:81/path?query#fragment", 329 | "scheme://user:pass@host/path?query#fragment", 330 | "scheme://user:pass@host:/path?query#fragment", 331 | "scheme://user:pass@host/path?query#fragment", 332 | "scheme://host/path?query#fragment", 333 | "scheme://10.0.0.2/p?q#f", 334 | "scheme:path?query#fragment", 335 | "scheme:///path?query#fragment", 336 | "scheme://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]?query#fragment", 337 | "scheme:path#fragment", 338 | "scheme:path?#fragment", 339 | "tel:05000", 340 | "scheme:path#", 341 | "https://thephpleague.com./p?#f", 342 | "http://a_.!~*\'(-)n0123Di%25%26:pass;:&=+$,word@www.zend.com", 343 | "http://", 344 | "http:::/path", 345 | "ldap://[2001:db8::7]/c=GB?objectClass?one", 346 | "http://example.org/hello:12?foo=bar#test", 347 | "android-app://org.wikipedia/http/en.m.wikipedia.org/wiki/The_Hitchhiker%27s_Guide_to_the_Galaxy", 348 | "scheme://user:pass@xn--mgbh0fb.xn--kgbechtv", 349 | "http://download.linuxjournal.com/pdf/get-doc.php?code=2c230d54e20e7cb595c660da48be7622&tcode=epub-301-" 350 | ] 351 | 352 | for s in urls: 353 | var uri: Uri 354 | let r = p.match(s, uri) 355 | if not r.ok: 356 | echo s 357 | quit 1 358 | -------------------------------------------------------------------------------- /tests/json-32M.bzip2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zevv/npeg/409f6796d0e880b3f0222c964d1da7de6e450811/tests/json-32M.bzip2 -------------------------------------------------------------------------------- /tests/lexparse.nim: -------------------------------------------------------------------------------- 1 | import npeg, strutils, sequtils, unittest 2 | 3 | type 4 | 5 | Token* = enum 6 | tInt 7 | tAdd 8 | cAddExpr 9 | 10 | Node = ref object 11 | case kind: Token 12 | of tInt: 13 | intVal: int 14 | of tAdd: 15 | discard 16 | of cAddExpr: 17 | l, r: Node 18 | 19 | State = ref object 20 | tokens: seq[Node] 21 | stack: seq[Node] 22 | 23 | # Npeg uses `==` to check if a subject matches a literal 24 | 25 | proc `==`(n: Node, t: Token): bool = n.kind == t 26 | 27 | proc `$`(n: Node): string = 28 | case n.kind 29 | of tInt: return $n.intVal 30 | of tAdd: return "+" 31 | of cAddExpr: return "(" & $n.l & " + " & $n.r & ")" 32 | 33 | let lexer = peg(tokens, st: State): 34 | s <- *Space 35 | tokens <- s * *(token * s) 36 | token <- int | add 37 | int <- +Digit: 38 | st.tokens.add Node(kind: tInt, intVal: parseInt($0)) 39 | add <- '+': 40 | st.tokens.add Node(kind: tAdd) 41 | 42 | let parser = peg(g, Node, st: State): 43 | g <- int * *add * !1 44 | int <- [tInt]: 45 | st.stack.add $0 46 | add <- [tAdd] * int: 47 | st.stack.add Node(kind: cAddExpr, r: st.stack.pop, l: st.stack.pop) 48 | 49 | suite "lexer/parser": 50 | 51 | test "run": 52 | 53 | var st = State() 54 | doAssert lexer.match("1 + 2 + 3", st).ok 55 | doAssert parser.match(st.tokens, st).ok 56 | doAssert $st.stack[0] == "((1 + 2) + 3)" 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /tests/lib.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import strutils 3 | import unicode 4 | import npeg 5 | import npeg/lib/types 6 | import npeg/lib/utf8 7 | 8 | {.push warning[Spacing]: off.} 9 | 10 | 11 | suite "unit tests": 12 | 13 | test "types": 14 | doAssert patt(types.uint8).match("0").ok 15 | doAssert patt(types.uint8).match("255").ok 16 | doAssert not patt(types.uint8).match("256").ok 17 | 18 | doAssert patt(types.int8).match("-128").ok 19 | doAssert patt(types.int8).match("127").ok 20 | doAssert not patt(types.int8).match("-129").ok 21 | doAssert not patt(types.int8).match("128").ok 22 | 23 | when defined(cpu64): 24 | doAssert patt(types.uint32).match("4294967295").ok 25 | doAssert not patt(types.uint32).match("4294967296").ok 26 | 27 | 28 | test "utf8 runes": 29 | doAssert patt(utf8.any[4] * !1).match("abcd").ok 30 | doAssert patt(utf8.any[4] * !1).match("abcd").ok 31 | doAssert patt(utf8.any[4] * !1).match("всех").ok 32 | doAssert patt(utf8.any[4] * !1).match("乪乫乬乭").ok 33 | 34 | test "utf8 character classes": 35 | doAssert patt(utf8.upper).match("Ɵ").ok 36 | doAssert not patt(utf8.upper).match("ë").ok 37 | doAssert not patt(utf8.lower).match("Ɵ").ok 38 | doAssert patt(utf8.lower).match("ë").ok 39 | -------------------------------------------------------------------------------- /tests/nimversion.nim: -------------------------------------------------------------------------------- 1 | 2 | import strutils 3 | import npeg 4 | 5 | type 6 | NimType = enum Nim, NimSkull 7 | 8 | Version = object 9 | maj, min, rev: int 10 | extra: string 11 | 12 | NimVersion = object 13 | typ: NimType 14 | version: Version 15 | os: string 16 | cpu: string 17 | date: string 18 | git: string 19 | boot_switches: seq[string] 20 | 21 | 22 | let p = peg("nimversion", nv: NimVersion): 23 | 24 | S <- *{' ','\t','\n','\r'} 25 | nimversion <- oldnim_version | nimskull_version 26 | 27 | oldnim_version <- header * S * 28 | "Compiled at " * date * S * 29 | "Copyright (c) " * +Graph * " by Andreas Rumpf" * S * 30 | "git hash:" * S * git * S * 31 | "active boot switches:" * S * boot_switches 32 | 33 | nimskull_version <- header * S * 34 | "Source hash: " * git * S * 35 | "Source date: " * date 36 | 37 | header <- typ * S * "Compiler Version" * S * version * S * "[" * os * ":" * S * cpu * "]" * S 38 | 39 | typ <- typ_nimskull | typ_nim 40 | typ_nim <- "Nim": nv.typ = NimType.Nim 41 | typ_nimskull <- "Nimskull": nv.typ = NimType.NimSkull 42 | 43 | int <- +{'0'..'9'} 44 | os <- >+Alnum: nv.os = $1 45 | cpu <- >+Alnum: nv.cpu = $1 46 | git <- >+{'0'..'9','a'..'f'}: nv.git = $1 47 | boot_switches <- *(boot_switch * S) 48 | boot_switch <- >+Graph: nv.boot_switches.add($1) 49 | date <- >+{'0'..'9','-'}: nv.date = $1 50 | version <- >int * "." * >int * "." * >int * ?"-" * >*Graph: 51 | nv.version.maj = parseInt($1) 52 | nv.version.min = parseInt($2) 53 | nv.version.rev = parseInt($3) 54 | nv.version.extra = $4 55 | 56 | 57 | let vnim = """Nim Compiler Version 2.1.1 [Linux: amd64] 58 | Compiled at 2024-03-01 59 | Copyright (c) 2006-2024 by Andreas Rumpf 60 | 61 | git hash: 1e7ca2dc789eafccdb44304f7e42206c3702fc13 62 | active boot switches: -d:release -d:danger 63 | """ 64 | 65 | let vskull = """Nimskull Compiler Version 0.1.0-dev.21234 [linux: amd64] 66 | 67 | Source hash: 4948ae809f7d84ef6d765111a7cd0c7cf2ae77d2 68 | Source date: 2024-02-18 69 | """ 70 | 71 | var nv: NimVersion 72 | 73 | block: 74 | let r = p.match(vnim, nv) 75 | if r.ok: 76 | echo nv.repr 77 | 78 | block: 79 | let r = p.match(vskull, nv) 80 | if r.ok: 81 | echo nv.repr 82 | 83 | -------------------------------------------------------------------------------- /tests/performance.nim: -------------------------------------------------------------------------------- 1 | 2 | import npeg 3 | import os 4 | import streams 5 | import strutils 6 | import tables 7 | import json 8 | import times 9 | #import packedjson 10 | import osproc 11 | 12 | let js = execProcess("bzip2 -d < tests/json-32M.bzip2").string 13 | 14 | let hostname = readFile("/etc/hostname").strip() 15 | 16 | let expectTime = { 17 | "platdoos": { 18 | "json": 0.651, 19 | "parsejson": 3.962, 20 | "words": 0.920, 21 | "search": 0.057, 22 | "search1": 0.231, 23 | "search2": 1.419, 24 | "search3": 0.292, 25 | }.toTable(), 26 | "fe2": { 27 | "json": 3.975, 28 | "parsejson": 8.739, 29 | "words": 2.391, 30 | "search": 0.373, 31 | "search1": 2.014, 32 | "search2": 2.871, 33 | "search3": 0.771, 34 | }.toTable(), 35 | }.toTable() 36 | 37 | 38 | # Wake up the governor a bit 39 | 40 | var v = 0 41 | for i in 1..100000: 42 | for j in 1..1000000: 43 | inc v 44 | 45 | 46 | template measureTime*(what: string, code: untyped) = 47 | 48 | var expect = 0.0 49 | if hostname in expectTime: 50 | if what in expectTime[hostname]: 51 | expect = expectTime[hostname][what] 52 | 53 | let start = cpuTime() 54 | block: 55 | code 56 | let duration = cpuTime() - start 57 | let perc = 100.0 * duration / expect 58 | echo what & ": ", duration.formatFloat(ffDecimal, 3), "s ", perc.formatFloat(ffDecimal, 1), "%" 59 | 60 | 61 | measureTime "json": 62 | 63 | ## Json parsing with npeg 64 | 65 | let p = peg JSON: 66 | S <- *{' ','\t','\r','\n'} 67 | True <- "true" 68 | False <- "false" 69 | Null <- "null" 70 | 71 | UnicodeEscape <- 'u' * Xdigit[4] 72 | Escape <- '\\' * ({ '"', '\\', '/', 'b', 'f', 'n', 'r', 't' } | UnicodeEscape) 73 | StringBody <- *Escape * *( +( {'\x20'..'\xff'} - {'"'} - {'\\'}) * *Escape) 74 | String <- '"' * StringBody * '"': 75 | discard 76 | 77 | Minus <- '-' 78 | IntPart <- '0' | {'1'..'9'} * *{'0'..'9'} 79 | FractPart <- "." * +{'0'..'9'} 80 | ExpPart <- ( 'e' | 'E' ) * ?( '+' | '-' ) * +{'0'..'9'} 81 | Number <- ?Minus * IntPart * ?FractPart * ?ExpPart: 82 | discard 83 | 84 | DOC <- Value * !1 85 | ObjPair <- S * String * S * ":" * Value 86 | Object <- '{' * ( ObjPair * *( "," * ObjPair ) | S ) * "}" 87 | Array <- "[" * ( Value * *( "," * Value ) | S ) * "]" 88 | Value <- S * ( Number | String | Object | Array | True | False | Null ) * S 89 | 90 | JSON <- Value * !1 91 | 92 | for i in 1..10: 93 | doAssert p.match(js).ok 94 | 95 | 96 | let s = newStringStream(js) 97 | measureTime "parsejson": 98 | # JSon parsing with nims 'parsejson' module. 99 | for i in 1..10: 100 | s.setPosition(0) 101 | var p: JsonParser 102 | open(p, s, "json") 103 | while true: 104 | p.next() 105 | if p.kind == jsonError or p.kind == jsonEof: 106 | break 107 | 108 | 109 | measureTime "words": 110 | 111 | var v = 0 112 | let p = peg foo: 113 | foo <- +word 114 | word <- @>+Alpha: 115 | inc v 116 | discard p.match(js).ok 117 | 118 | 119 | measureTime "search": 120 | # Search using built in search operator 121 | var v = 0 122 | let p = peg search: 123 | search <- @"CALIFORNIA": 124 | inc v 125 | for i in 1..10: 126 | discard p.match(js).ok 127 | 128 | 129 | measureTime "search1": 130 | # Searches using tail recursion. 131 | let p = peg SS: 132 | SS <- +S 133 | S <- "CALIFORNIA" | 1 * S 134 | for i in 1..10: 135 | discard p.match(js).ok 136 | 137 | measureTime "search2": 138 | # Searches using an explicit 139 | let p = peg SS: 140 | SS <- +S 141 | S <- *( !"CALIFORNIA" * 1) * "CALIFORNIA" 142 | for i in 1..10: 143 | discard p.match(js).ok 144 | 145 | measureTime "search3": 146 | # using an optimization to skip false starts. 147 | let p = peg SS: 148 | SS <- +S 149 | S <- "CALIFORNIA" | 1 * *(1-'C') * S 150 | for i in 1..10: 151 | discard p.match(js).ok 152 | 153 | -------------------------------------------------------------------------------- /tests/precedence.nim: -------------------------------------------------------------------------------- 1 | import unittest 2 | import strutils 3 | import math 4 | import tables 5 | import npeg 6 | 7 | {.push warning[Spacing]: off.} 8 | 9 | 10 | suite "precedence operator": 11 | 12 | # The PEG below implements a Pratt parser. The ^ and ^^ operators are used to 13 | # implement precedence climbing, this allows rules to be left recursive while 14 | # still avoiding unbound recursion. 15 | # 16 | # The parser local state `seq[int]` is used as a stack to store captures and 17 | # intermediate results while parsing, the end result of the expression will 18 | # be available in element 0 when the parser finishes 19 | 20 | test "expr evaluator": 21 | 22 | # Table of binary operators - this maps the operator string to a proc 23 | # performing the operation: 24 | 25 | template map(op: untyped): untyped = (proc(a, b: int): int = op(a, b)) 26 | 27 | var binOps = { 28 | "+": map(`+`), 29 | "-": map(`-`), 30 | "*": map(`*`), 31 | "/": map(`/%`), 32 | "^": map(`^`), 33 | }.toTable() 34 | 35 | let p = peg(exp, st: seq[int]): 36 | 37 | S <- *Space 38 | 39 | # Capture a number and put it on the stack 40 | 41 | number <- >+Digit * S: 42 | st.add parseInt($1) 43 | 44 | # Reset the precedence level to 0 when parsing sub-expressions 45 | # in parentheses 46 | 47 | parenExp <- ( "(" * exp * ")" ) ^ 0 48 | 49 | # Unary minues: take last element of the stack, negate and push back 50 | 51 | uniMinus <- '-' * exp: 52 | st.add(-st.pop) 53 | 54 | # The prefix is a number, a sub expression in parentheses or the unary 55 | # `-` operator. 56 | 57 | prefix <- number | parenExp | uniMinus 58 | 59 | # Parse an infix operator. Bounded by the precedece operator that makes 60 | # sure `exp` is only parsed if the currrent precedence is lower then the 61 | # given precedence. Note that the power operator has right assosiativity. 62 | 63 | infix <- >{'+','-'} * exp ^ 1 | 64 | >{'*','/'} * exp ^ 2 | 65 | >{'^'} * exp ^^ 3 : 66 | 67 | # Takes two results off the stack, applies the operator and push 68 | # back the result 69 | 70 | let (f2, f1) = (st.pop, st.pop) 71 | st.add binOps[$1](f1, f2) 72 | 73 | # An expression consists of a prefix followed by zero or more infix 74 | # operators 75 | 76 | exp <- S * prefix * *infix 77 | 78 | 79 | # Evaluate the given expression 80 | 81 | proc eval(expr: string): int = 82 | var st: seq[int] 83 | doAssert p.match(expr, st).ok 84 | st[0] 85 | 86 | 87 | # Test cases 88 | 89 | doAssert eval("2+1") == 2+1 90 | doAssert eval("(((2+(1))))") == 2+1 91 | doAssert eval("3+2") == 3+2 92 | 93 | doAssert eval("3+2+4") == 3+2+4 94 | doAssert eval("(3+2)+4") == 3+2+4 95 | doAssert eval("3+(2+4)") == 3+2+4 96 | doAssert eval("(3+2+4)") == 3+2+4 97 | 98 | doAssert eval("3*2*4") == 3*2*4 99 | doAssert eval("(3*2)*4") == 3*2*4 100 | doAssert eval("3*(2*4)") == 3*2*4 101 | doAssert eval("(3*2*4)") == 3*2*4 102 | 103 | doAssert eval("3-2-4") == 3-2-4 104 | doAssert eval("(3-2)-4") == (3-2)-4 105 | doAssert eval("3-(2-4)") == 3-(2-4) 106 | doAssert eval("(3-2-4)") == 3-2-4 107 | 108 | doAssert eval("3/8/4") == 3/%8/%4 109 | doAssert eval("(3/8)/4") == (3/%8)/%4 110 | doAssert eval("3/(8/4)") == 3/%(8/%4) 111 | doAssert eval("(3/8/4)") == 3/%8/%4 112 | 113 | doAssert eval("(3*8/4)") == 3*8/%4 114 | doAssert eval("(3/8*4)") == 3/%8*4 115 | doAssert eval("3*(8/4)") == 3*(8/%4) 116 | 117 | doAssert eval("4^3^2") == 4^3^2 118 | doAssert eval("(4^3)^2") == (4^3)^2 119 | doAssert eval("4^(3^2)") == 4^(3^2) 120 | 121 | -------------------------------------------------------------------------------- /tests/testdata: -------------------------------------------------------------------------------- 1 | one=1,two=2,three=3,four=4 2 | -------------------------------------------------------------------------------- /tests/tests.nim: -------------------------------------------------------------------------------- 1 | include "basics.nim" 2 | include "examples.nim" 3 | include "captures.nim" 4 | include "precedence.nim" 5 | include "lib.nim" 6 | include "lexparse.nim" 7 | 8 | --------------------------------------------------------------------------------