├── .gitattributes ├── .github ├── FUNDING.yml ├── pull_request_template.md └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── build.zig └── src ├── combn ├── README.md ├── combinator │ ├── always.zig │ ├── combinator.zig │ ├── mapto.zig │ ├── oneof.zig │ ├── oneof_ambiguous.zig │ ├── optional.zig │ ├── reentrant.zig │ ├── repeated.zig │ ├── repeated_ambiguous.zig │ ├── sequence.zig │ └── sequence_ambiguous.zig ├── combn.zig ├── gllparser │ ├── ParserPath.zig │ ├── gllparser.zig │ ├── parser.zig │ └── result_stream.zig ├── parser │ ├── byte_range.zig │ ├── end.zig │ ├── literal.zig │ └── parser.zig └── test_complex.zig ├── dsl ├── Compilation.zig ├── CompilerContext.zig ├── Node.zig ├── Program.zig ├── String.zig ├── compiler.zig ├── dsl.zig ├── grammar.zig ├── identifier.zig └── pattern_grammar.zig └── zorex.zig /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: slimsag 2 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [ ] By selecting this checkbox, I agree to license my contributions to this project under the license(s) described in the LICENSE file, and I have the right to do so or have received permission to do so by an employer or client I am producing work for whom has this right. -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | - push 4 | - pull_request 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v2 11 | - name: Setup Zig 12 | run: | 13 | sudo apt install xz-utils 14 | sudo sh -c 'wget -c https://ziglang.org/builds/zig-linux-x86_64-0.10.0-dev.36+6fdf7ce0a.tar.xz -O - | tar -xJ --strip-components=1 -C /usr/local/bin' 15 | - name: test 16 | run: zig build test 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # This file is for zig-specific build artifacts. 2 | # If you have OS-specific or editor-specific files to ignore, 3 | # such as *.swp or .DS_Store, put those in your global 4 | # ~/.gitignore and put this in your ~/.gitconfig: 5 | # 6 | # [core] 7 | # excludesfile = ~/.gitignore 8 | # 9 | # Cheers! 10 | # -andrewrk 11 | 12 | zig-cache/ 13 | /release/ 14 | /debug/ 15 | /build/ 16 | /build-*/ 17 | /docgen_tmp/ 18 | 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021, Hexops Contributors (given via the Git commit history). 2 | 3 | Licensed under the Apache License, Version 2.0 (see LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) 4 | or the MIT license (see LICENSE-MIT or http://opensource.org/licenses/MIT), at 5 | your option. All files in the project without exclusions may not be copied, 6 | modified, or distributed except according to those terms. 7 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Hexops Contributors (given via the Git commit history). 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Zorex: the omnipotent regex engine Hexops logo 2 | 3 | [![CI](https://github.com/hexops/zorex/workflows/CI/badge.svg)](https://github.com/hexops/zorex/actions) 4 | 5 | Zorex blurs the line between regex engine and advanced parsing algorithms used to parse programming languages. 6 | 7 | With the most powerful of regex engines today, you [can't parse HTML](https://stackoverflow.com/questions/6751105/why-its-not-possible-to-use-regex-to-parse-html-xml-a-formal-explanation-in-la) (a context-free language) [or XML](https://stackoverflow.com/a/8578999) ([a context-sensitive language](https://softwareengineering.stackexchange.com/a/205725)), but _you can_ with Zorex. 8 | 9 | ## ⚠️ Project status: in-development ⚠️ 10 | 11 | Under heavy development, not ready for use currently. [Follow me on Twitter](https://twitter.com/slimsag) for updates. 12 | 13 | ## How does it work? 14 | 15 | Behind the scenes, Zorex parses a small DSL (the "zorex syntax", a regex-like syntax that enables opt-in [EBNF-like syntax](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form)) and then [at runtime builds a parser specifically for your input grammar](https://devlog.hexops.com/2021/zig-parser-combinators-and-why-theyre-awesome). 16 | 17 | It's a bit like a traditional parser generator, but done at runtime (instead of through code generation) and with a deep level of syntactic compatibility with traditional regex engines. 18 | 19 | It uses [an optimized GLL parser combinator framework called Combn](./src/combn/README.md) to support parsing some of the most complex languages, including left-and-right recursive context-free and some context-sensitive languages, in a fast way. 20 | 21 | ## A quick note about academic terminology 22 | 23 | Technically, Zorex is "an advanced pattern matching engine", and it is arguably incorrect to call it a _regular expression engine_ because regular expressions by nature cannot parse non-regular languages (such as HTML). 24 | 25 | Any regex engine that supports backtracking, however, is _also_ "not a regular expression engine", as the author of Perl's regex engine Larry Wall [puts it](https://raku.org/archive/doc/design/apo/A05.html): 26 | 27 | > “Regular expressions” […] are only marginally related to real regular expressions. Nevertheless, the term has grown with the capabilities of our pattern matching engines, so I’m not going to try to fight linguistic necessity here. I will, however, generally call them “regexes” (or “regexen”, when I’m in an Anglo-Saxon mood). 28 | 29 | Since the aim of Zorex is to maintain a deep level of syntactical compatibility with other regex engines people are familiar with, and _further extend that_ to support parsing more complex non-regular languages, we call Zorex a regex engine. 30 | -------------------------------------------------------------------------------- /build.zig: -------------------------------------------------------------------------------- 1 | const Builder = @import("std").build.Builder; 2 | 3 | pub fn build(b: *Builder) void { 4 | const mode = b.standardReleaseOptions(); 5 | const lib = b.addStaticLibrary("template", "src/zorex.zig"); 6 | lib.setBuildMode(mode); 7 | lib.install(); 8 | 9 | var main_tests = b.addTest("src/zorex.zig"); 10 | main_tests.test_evented_io = true; 11 | main_tests.setBuildMode(mode); 12 | main_tests.setMainPkgPath("src/"); 13 | 14 | const test_step = b.step("test", "Run library tests"); 15 | test_step.dependOn(&main_tests.step); 16 | } 17 | -------------------------------------------------------------------------------- /src/combn/README.md: -------------------------------------------------------------------------------- 1 | # combn: runtime GLL parser combinators for Zig Hexops logo 2 | 3 | Combn is the core parsing technique behind Zorex. 4 | 5 | ## Runtime composition 6 | 7 | It is a runtime-composed [parser combinator](https://en.wikipedia.org/wiki/Parser_combinator) framework, which enables one to build parsers _at runtime_ (compared to traditional parser combinators done through code generation) as described in ["Zig, Parser Combinators - and Why They're Awesome"](https://devlog.hexops.com/2021/zig-parser-combinators-and-why-theyre-awesome). 8 | 9 | Being runtime-composed means that you can e.g. define a DSL using parser combinators (such as regex-like syntax), and then produce a new parser at runtime to actually parse inputs for that regex-like syntax. 10 | 11 | ## Generalized LL parser (GLL) 12 | 13 | Behind the scenes, combn uses a generalized LL (GLL) parsing algorithm described in: 14 | 15 | > "Generalized Parser Combinators", Daniel Spiewak, University of Wisconsin, 2010. Implemented as the [gll-combinators Scala library](https://github.com/djspiewak/gll-combinators), using continuation-passing style and trampolined dispatch. 16 | 17 | This enables combn to parse some of the most complex language grammars out there, including left and right recursive context-free grammars, as well as some context-sensitive grammars. 18 | 19 | You can read more about GLL parsing in this great article: ["General Parser Combinators in Racket" by Vegard Øye](https://epsil.github.io/gll/) 20 | 21 | ## Zig ≈ performance 22 | 23 | Most (almost all?) GLL parser implementations are in higher-level languages (Haskell, Scala, OCaml, etc.) as their type systems (and especially Haskell's lazy evaluation) lend themselves very well to functional parsing approaches in general. 24 | 25 | Combn implementing an optimized GLL parser in a low-level language like Zig is, as far as we know, very rare or the first such instance. 26 | 27 | One reason we attribute to being able to implement this in Zig in a relatively straightforward way is due to its async support: we are able to leverage async Zig functions to effectively provide "lazy evaluation" and "sleep" dependent parse paths as is done in e.g. the Haskell implementations. 28 | 29 | ## Advantages over other GLL parsers 30 | 31 | Combn has a few advantages over other GLL parsers: 32 | 33 | ### Optimized parse-node-localized memoization 34 | 35 | The original GLL parsing algorithm is O(n^3) worst-case, better than GLR which is O(n^4) worst-case. 36 | 37 | Combn uses an even more optimized GLL parsing algorithm than the original, with parse-node-localized memoization, approximately the same as described in: 38 | 39 | > "Faster, Practical GLL Parsing", Ali Afroozeh and Anastasia Izmaylova, Centrum Wiskunde & Informatica,1098 XG Amsterdam, The Netherlands, 40 | 41 | ### Support for same-position reentrant parsers 42 | 43 | Some left-recursive parsers require same-position reentrancy, e.g.: 44 | 45 | ```ebnf 46 | Expr = Expr?, "abc" ; 47 | Grammar = Expr ; 48 | ``` 49 | 50 | Where an input string "abcabcabc" would require `Expr` be parsed at offset=0 in the input string multiple times (in order to "greedily consume" the entire input.) 51 | 52 | Many GLL parser implementations have differing behavior depending on whether or not the above grammar is defined as `Grammar = Expr ;` or `Grammar = Expr, EOF ;` - matching only a single "abc" without EOF and greedily otherwise. Some implementations also implement this using a globalized rollback system if the entire parse fails, requiring re-parsing starting at the root of the parse tree. 53 | 54 | Combn uses parse-node-localized retries in the case of same-position reentrant grammars, which provides both better theoretical performance as well as consistent results regardless of what comes next in the grammar. 55 | 56 | ### Enumeration of all possible parse paths for ambiguous grammars 57 | 58 | Many parser combinator frameworks opt to only enable navigating down one possible "committed" path of ambiguous grammars, this makes dealing with the resulting data types easier but means it is not possible to enumerate all possible ways an ambiguous grammar would have been parsed. 59 | 60 | Combn uses fully generic type parameters, which does make it slightly more complex than other parser libraries but also enables enumerating all possible parse paths. 61 | 62 | ## How do I use it? 63 | 64 | You can look at [`test_complex.zig`](test_complex.zig) for some ideas, but note a few things: 65 | 66 | 1. **The usage is quite complex** 67 | - Due to being type-agnostic (you can define your own "AST node" result value, or compute and return results directly from within parsers) AND due to supporting full enumeration of ambiguous grammars, there is a lot of type munging required. 68 | 2. **You probably don't want to use this API directly** 69 | - I am working on an EBNF-like DSL grammar on top of this API which will enable you to quickly define a language in EBNF form and get a parser for parsing it (at runtime), which will be a far more reasonable interface. 70 | 3. Some parts of the API are still in motion / can be simplified. 71 | 72 | [Follow me on Twitter](https://twitter.com/slimsag) for updates. 73 | -------------------------------------------------------------------------------- /src/combn/combinator/always.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | 8 | const std = @import("std"); 9 | const testing = std.testing; 10 | const mem = std.mem; 11 | 12 | pub const Void = struct { 13 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 14 | _ = self; 15 | _ = allocator; 16 | } 17 | }; 18 | 19 | /// If the result is not `null`, its `.offset` value will be updated to reflect the current parse 20 | /// position before Always returns it. 21 | pub fn Context(comptime Value: type) type { 22 | return ?Result(Value); 23 | } 24 | 25 | /// Always yields the input value (once/unambiguously), or no value (if the input value is null). 26 | /// 27 | /// The `input` value is taken ownership of by the parser, and deinitialized once the parser is. 28 | pub fn Always(comptime Payload: type, comptime Value: type) type { 29 | return struct { 30 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, null), 31 | input: Context(Value), 32 | 33 | const Self = @This(); 34 | 35 | pub fn init(allocator: mem.Allocator, input: Context(Value)) !*Parser(Payload, Value) { 36 | const self = Self{ .input = input }; 37 | return try self.parser.heapAlloc(allocator, self); 38 | } 39 | 40 | pub fn initStack(input: Context(Value)) Self { 41 | return Self{ .input = input }; 42 | } 43 | 44 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 45 | _ = freed; 46 | const self = @fieldParentPtr(Self, "parser", parser); 47 | if (self.input) |input| input.deinit(allocator); 48 | } 49 | 50 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 51 | _ = node_name_cache; 52 | const self = @fieldParentPtr(Self, "parser", parser); 53 | 54 | var v = std.hash_map.hashString("Always"); 55 | v +%= std.hash_map.getAutoHashFn(?Result(Value), void)({}, self.input); 56 | return v; 57 | } 58 | 59 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) Error!void { 60 | const self = @fieldParentPtr(Self, "parser", parser); 61 | var ctx = in_ctx.with(self.input); 62 | defer ctx.results.close(); 63 | 64 | if (self.input) |input| { 65 | var tmp = input.toUnowned(); 66 | tmp.offset = ctx.offset; 67 | try ctx.results.add(tmp); 68 | } 69 | } 70 | }; 71 | } 72 | 73 | test "always" { 74 | nosuspend { 75 | const allocator = testing.allocator; 76 | 77 | const Payload = void; 78 | const ctx = try ParserContext(Payload, Void).init(allocator, "hello world", {}); 79 | defer ctx.deinit(); 80 | 81 | const noop = try Always(Payload, Void).init(allocator, null); 82 | defer noop.deinit(allocator, null); 83 | 84 | try noop.parse(&ctx); 85 | 86 | var sub = ctx.subscribe(); 87 | try testing.expect(sub.next() == null); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/combn/combinator/combinator.zig: -------------------------------------------------------------------------------- 1 | pub const always = @import("always.zig"); 2 | pub const Always = always.Always; 3 | 4 | pub const mapto = @import("mapto.zig"); 5 | pub const MapTo = mapto.MapTo; 6 | 7 | pub const oneof_ambiguous = @import("oneof_ambiguous.zig"); 8 | pub const OneOfAmbiguous = oneof_ambiguous.OneOfAmbiguous; 9 | 10 | pub const oneof = @import("oneof.zig"); 11 | pub const OneOf = oneof.OneOf; 12 | 13 | pub const optional = @import("optional.zig"); 14 | pub const Optional = optional.Optional; 15 | 16 | pub const reentrant = @import("reentrant.zig"); 17 | pub const Reentrant = reentrant.Reentrant; 18 | 19 | pub const repeated_ambiguous = @import("repeated_ambiguous.zig"); 20 | pub const RepeatedAmbiguous = repeated_ambiguous.RepeatedAmbiguous; 21 | 22 | pub const repeated = @import("repeated.zig"); 23 | pub const Repeated = repeated.Repeated; 24 | 25 | pub const sequence_ambiguous = @import("sequence_ambiguous.zig"); 26 | pub const SequenceAmbiguous = sequence_ambiguous.SequenceAmbiguous; 27 | 28 | pub const sequence = @import("sequence.zig"); 29 | pub const Sequence = sequence.Sequence; 30 | 31 | test "include" { 32 | _ = OneOfAmbiguous; 33 | _ = RepeatedAmbiguous; 34 | } 35 | -------------------------------------------------------------------------------- /src/combn/combinator/mapto.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const PosKey = gllparser.PosKey; 8 | const ParserPath = gllparser.ParserPath; 9 | 10 | const Literal = @import("../parser/literal.zig").Literal; 11 | const LiteralValue = @import("../parser/literal.zig").Value; 12 | 13 | const std = @import("std"); 14 | const testing = std.testing; 15 | const mem = std.mem; 16 | 17 | pub fn Context(comptime Payload: type, comptime Value: type, comptime Target: type) type { 18 | return struct { 19 | parser: *Parser(Payload, Value), 20 | mapTo: fn (in: Result(Value), payload: Payload, allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(Target), 21 | }; 22 | } 23 | 24 | /// Wraps the `input.parser`, mapping its value to the `dst` type. 25 | /// 26 | /// The `input.parser` must remain alive for as long as the `MapTo` parser will be used. 27 | pub fn MapTo(comptime Payload: type, comptime Value: type, comptime Target: type) type { 28 | return struct { 29 | parser: Parser(Payload, Target) = Parser(Payload, Target).init(parse, nodeName, deinit, countReferencesTo), 30 | input: Context(Payload, Value, Target), 31 | 32 | const Self = @This(); 33 | 34 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value, Target)) !*Parser(Payload, Target) { 35 | const self = Self{ .input = input }; 36 | return try self.parser.heapAlloc(allocator, self); 37 | } 38 | 39 | pub fn initStack(input: Context(Payload, Value, Target)) Self { 40 | return Self{ .input = input }; 41 | } 42 | 43 | pub fn deinit(parser: *Parser(Payload, Target), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 44 | const self = @fieldParentPtr(Self, "parser", parser); 45 | self.input.parser.deinit(allocator, freed); 46 | } 47 | 48 | pub fn countReferencesTo(parser: *const Parser(Payload, Target), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 49 | const self = @fieldParentPtr(Self, "parser", parser); 50 | if (@ptrToInt(parser) == other) return 1; 51 | return self.input.parser.countReferencesTo(other, freed); 52 | } 53 | 54 | pub fn nodeName(parser: *const Parser(Payload, Target), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 55 | const self = @fieldParentPtr(Self, "parser", parser); 56 | 57 | var v = std.hash_map.hashString("MapTo"); 58 | v +%= try self.input.parser.nodeName(node_name_cache); 59 | v +%= @ptrToInt(self.input.mapTo); 60 | return v; 61 | } 62 | 63 | pub fn parse(parser: *const Parser(Payload, Target), in_ctx: *const ParserContext(Payload, Target)) callconv(.Async) !void { 64 | const self = @fieldParentPtr(Self, "parser", parser); 65 | var ctx = in_ctx.with(self.input); 66 | defer ctx.results.close(); 67 | 68 | const child_node_name = try ctx.input.parser.nodeName(&in_ctx.memoizer.node_name_cache); 69 | const child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset); 70 | defer child_ctx.deinitChild(); 71 | if (!child_ctx.existing_results) try ctx.input.parser.parse(&child_ctx); 72 | 73 | var sub = child_ctx.subscribe(); 74 | var closed = false; 75 | while (sub.next()) |next| { 76 | if (closed) { 77 | continue; 78 | } 79 | var frame = try std.heap.page_allocator.allocAdvanced(u8, 16, @frameSize(self.input.mapTo), std.mem.Allocator.Exact.at_least); 80 | defer std.heap.page_allocator.free(frame); 81 | const mapped = try await @asyncCall(frame, {}, self.input.mapTo, .{ next, in_ctx.input, ctx.allocator, ctx.key, ctx.path }); 82 | if (mapped == null) { 83 | closed = true; 84 | continue; 85 | } 86 | try ctx.results.add(mapped.?); 87 | } 88 | } 89 | }; 90 | } 91 | 92 | test "mapto" { 93 | nosuspend { 94 | const allocator = testing.allocator; 95 | 96 | const String = struct { 97 | value: []const u8, 98 | 99 | pub fn init(value: []const u8) @This() { 100 | return .{ .value = value }; 101 | } 102 | 103 | pub fn deinit(self: *const @This(), _allocator: mem.Allocator) void { 104 | _ = self; 105 | _ = _allocator; 106 | } 107 | }; 108 | 109 | const Payload = void; 110 | const ctx = try ParserContext(Payload, String).init(allocator, "hello world", {}); 111 | defer ctx.deinit(); 112 | 113 | const mapTo = try MapTo(Payload, LiteralValue, String).init(allocator, .{ 114 | .parser = (try Literal(Payload).init(allocator, "hello")).ref(), 115 | .mapTo = struct { 116 | fn mapTo(in: Result(LiteralValue), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(String) { 117 | _ = payload; 118 | _ = _allocator; 119 | _ = key; 120 | _ = path; 121 | switch (in.result) { 122 | .err => return Result(String).initError(in.offset, in.result.err), 123 | else => return Result(String).init(in.offset, String.init("hello")), 124 | } 125 | } 126 | }.mapTo, 127 | }); 128 | defer mapTo.deinit(allocator, null); 129 | 130 | try mapTo.parse(&ctx); 131 | 132 | var sub = ctx.subscribe(); 133 | var first = sub.next().?; 134 | try testing.expectEqual(Result(String).init(5, String.init("hello")), first); 135 | try testing.expect(sub.next() == null); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/combn/combinator/oneof.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | 9 | const Literal = @import("../parser/literal.zig").Literal; 10 | const LiteralValue = @import("../parser/literal.zig").Value; 11 | 12 | const std = @import("std"); 13 | const testing = std.testing; 14 | const mem = std.mem; 15 | 16 | pub fn Context(comptime Payload: type, comptime Value: type) type { 17 | return []const *Parser(Payload, Value); 18 | } 19 | 20 | pub const Ownership = enum { 21 | borrowed, 22 | owned, 23 | copy, 24 | }; 25 | 26 | /// Matches one of the given `input` parsers, matching the first parse path. If ambiguous grammar 27 | /// matching is desired, see `OneOfAmbiguous`. 28 | /// 29 | /// The `input` parsers must remain alive for as long as the `OneOf` parser will be used. 30 | /// 31 | /// In the case of a non-ambiguous `OneOf` grammar of `Parser1 | Parser2`, the combinator will 32 | /// yield: 33 | /// 34 | /// ``` 35 | /// stream(Parser1Value) 36 | /// ``` 37 | /// 38 | /// Or: 39 | /// 40 | /// ``` 41 | /// stream(Parser2Value) 42 | /// ``` 43 | /// 44 | /// In the case of an ambiguous grammar `Parser1 | Parser2` where either parser can produce three 45 | /// different parse paths, it will always yield the first successful path. 46 | pub fn OneOf(comptime Payload: type, comptime Value: type) type { 47 | return struct { 48 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, countReferencesTo), 49 | input: Context(Payload, Value), 50 | ownership: Ownership, 51 | 52 | const Self = @This(); 53 | 54 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value), ownership: Ownership) !*Parser(Payload, Value) { 55 | var self = Self{ .input = input, .ownership = ownership }; 56 | if (ownership == .copy) { 57 | const Elem = std.meta.Elem(@TypeOf(input)); 58 | var copy = try allocator.alloc(Elem, input.len); 59 | std.mem.copy(Elem, copy, input); 60 | self.input = copy; 61 | self.ownership = .owned; 62 | } 63 | return try self.parser.heapAlloc(allocator, self); 64 | } 65 | 66 | pub fn initStack(input: Context(Payload, Value), ownership: Ownership) Self { 67 | if (ownership == Ownership.copy) unreachable; 68 | return Self{ .input = input, .ownership = ownership }; 69 | } 70 | 71 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 72 | const self = @fieldParentPtr(Self, "parser", parser); 73 | for (self.input) |in_parser| { 74 | in_parser.deinit(allocator, freed); 75 | } 76 | if (self.ownership == .owned) allocator.free(self.input); 77 | } 78 | 79 | pub fn countReferencesTo(parser: *const Parser(Payload, Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 80 | const self = @fieldParentPtr(Self, "parser", parser); 81 | if (@ptrToInt(parser) == other) return 1; 82 | var count: usize = 0; 83 | for (self.input) |in_parser| { 84 | count += in_parser.countReferencesTo(other, freed); 85 | } 86 | return count; 87 | } 88 | 89 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 90 | const self = @fieldParentPtr(Self, "parser", parser); 91 | 92 | var v = std.hash_map.hashString("OneOf"); 93 | for (self.input) |in_parser| { 94 | v +%= try in_parser.nodeName(node_name_cache); 95 | } 96 | return v; 97 | } 98 | 99 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void { 100 | const self = @fieldParentPtr(Self, "parser", parser); 101 | var ctx = in_ctx.with(self.input); 102 | defer ctx.results.close(); 103 | 104 | var gotValues: usize = 0; 105 | for (self.input) |in_parser| { 106 | const child_node_name = try in_parser.nodeName(&in_ctx.memoizer.node_name_cache); 107 | var child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset); 108 | defer child_ctx.deinitChild(); 109 | if (!child_ctx.existing_results) try in_parser.parse(&child_ctx); 110 | var sub = child_ctx.subscribe(); 111 | while (sub.next()) |next| { 112 | switch (next.result) { 113 | .err => {}, 114 | else => { 115 | // TODO(slimsag): need path committal functionality 116 | if (gotValues == 0) try ctx.results.add(next.toUnowned()); 117 | gotValues += 1; 118 | }, 119 | } 120 | } 121 | } 122 | if (gotValues == 0) { 123 | // All parse paths failed, so return a nice error. 124 | // 125 | // TODO(slimsag): include names of expected input parsers 126 | // 127 | // TODO(slimsag): collect and return the furthest error if a parse path made 128 | // progress and failed. 129 | try ctx.results.add(Result(Value).initError(ctx.offset, "expected OneOf")); 130 | } 131 | } 132 | }; 133 | } 134 | 135 | // Confirms that the following grammar works as expected: 136 | // 137 | // ```ebnf 138 | // Grammar = "ello" | "world" ; 139 | // ``` 140 | // 141 | test "oneof" { 142 | nosuspend { 143 | const allocator = testing.allocator; 144 | 145 | const Payload = void; 146 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {}); 147 | defer ctx.deinit(); 148 | 149 | const parsers: []*Parser(Payload, LiteralValue) = &.{ 150 | (try Literal(Payload).init(allocator, "ello")).ref(), 151 | (try Literal(Payload).init(allocator, "world")).ref(), 152 | }; 153 | var helloOrWorld = try OneOf(Payload, LiteralValue).init(allocator, parsers, .borrowed); 154 | defer helloOrWorld.deinit(allocator, null); 155 | try helloOrWorld.parse(&ctx); 156 | 157 | var sub = ctx.subscribe(); 158 | var r1 = sub.next().?; 159 | try testing.expectEqual(@as(usize, 4), r1.offset); 160 | try testing.expectEqualStrings("ello", r1.result.value.value); 161 | try testing.expect(sub.next() == null); // stream closed 162 | } 163 | } 164 | 165 | // Confirms behavior of the following grammar, which is ambiguous and should use OneOfAmbiguous 166 | // instead of OneOf if ambiguity needs to be enumerated: 167 | // 168 | // ```ebnf 169 | // Grammar = "ello" | "elloworld" ; 170 | // ``` 171 | // 172 | test "oneof_ambiguous_first" { 173 | nosuspend { 174 | const allocator = testing.allocator; 175 | 176 | const Payload = void; 177 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {}); 178 | defer ctx.deinit(); 179 | 180 | const parsers: []*Parser(Payload, LiteralValue) = &.{ 181 | (try Literal(Payload).init(allocator, "ello")).ref(), 182 | (try Literal(Payload).init(allocator, "elloworld")).ref(), 183 | }; 184 | var helloOrWorld = try OneOf(Payload, LiteralValue).init(allocator, parsers, .borrowed); 185 | defer helloOrWorld.deinit(allocator, null); 186 | try helloOrWorld.parse(&ctx); 187 | 188 | var sub = ctx.subscribe(); 189 | var r1 = sub.next().?; 190 | try testing.expectEqual(@as(usize, 4), r1.offset); 191 | try testing.expectEqualStrings("ello", r1.result.value.value); 192 | try testing.expect(sub.next() == null); // stream closed 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/combn/combinator/oneof_ambiguous.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | 9 | const Literal = @import("../parser/literal.zig").Literal; 10 | const LiteralValue = @import("../parser/literal.zig").Value; 11 | 12 | const std = @import("std"); 13 | const testing = std.testing; 14 | const mem = std.mem; 15 | 16 | pub fn Context(comptime Payload: type, comptime Value: type) type { 17 | return []const *Parser(Payload, Value); 18 | } 19 | 20 | pub const Ownership = enum { 21 | borrowed, 22 | owned, 23 | copy, 24 | }; 25 | 26 | /// Matches one of the given `input` parsers, supporting ambiguous and unambiguous grammars. 27 | /// 28 | /// The `input` parsers must remain alive for as long as the `OneOfAmbiguous` parser will be used. 29 | /// 30 | /// In the case of a non-ambiguous `OneOfAmbiguous` grammar of `Parser1 | Parser2`, the combinator will 31 | /// yield: 32 | /// 33 | /// ``` 34 | /// stream(Parser1Value) 35 | /// ``` 36 | /// 37 | /// Or: 38 | /// 39 | /// ``` 40 | /// stream(Parser2Value) 41 | /// ``` 42 | /// 43 | /// In the case of an ambiguous grammar `Parser1 | Parser2` where either parser can produce three 44 | /// different parse paths, it will yield: 45 | /// 46 | /// ``` 47 | /// stream( 48 | /// Parser1Value1, 49 | /// Parser1Value2, 50 | /// Parser1Value3, 51 | /// Parser2Value1, 52 | /// Parser2Value2, 53 | /// Parser2Value3, 54 | /// ) 55 | /// ``` 56 | /// 57 | pub fn OneOfAmbiguous(comptime Payload: type, comptime Value: type) type { 58 | return struct { 59 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, countReferencesTo), 60 | input: Context(Payload, Value), 61 | ownership: Ownership, 62 | 63 | const Self = @This(); 64 | 65 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value), ownership: Ownership) !*Parser(Payload, Value) { 66 | var self = Self{ .input = input, .ownership = ownership }; 67 | if (ownership == .copy) { 68 | const Elem = std.meta.Elem(@TypeOf(input)); 69 | var copy = try allocator.alloc(Elem, input.len); 70 | std.mem.copy(Elem, copy, input); 71 | self.input = copy; 72 | self.ownership = .owned; 73 | } 74 | return try self.parser.heapAlloc(allocator, self); 75 | } 76 | 77 | pub fn initStack(input: Context(Payload, Value), ownership: Ownership) Self { 78 | if (ownership == Ownership.copy) unreachable; 79 | return Self{ .input = input, .ownership = ownership }; 80 | } 81 | 82 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 83 | const self = @fieldParentPtr(Self, "parser", parser); 84 | for (self.input) |in_parser| { 85 | in_parser.deinit(allocator, freed); 86 | } 87 | if (self.ownership == .owned) allocator.free(self.input); 88 | } 89 | 90 | pub fn countReferencesTo(parser: *const Parser(Payload, Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 91 | const self = @fieldParentPtr(Self, "parser", parser); 92 | if (@ptrToInt(parser) == other) return 1; 93 | var count: usize = 0; 94 | for (self.input) |in_parser| { 95 | count += in_parser.countReferencesTo(other, freed); 96 | } 97 | return count; 98 | } 99 | 100 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 101 | const self = @fieldParentPtr(Self, "parser", parser); 102 | 103 | var v = std.hash_map.hashString("OneOfAmbiguous"); 104 | for (self.input) |in_parser| { 105 | v +%= try in_parser.nodeName(node_name_cache); 106 | } 107 | return v; 108 | } 109 | 110 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void { 111 | const self = @fieldParentPtr(Self, "parser", parser); 112 | var ctx = in_ctx.with(self.input); 113 | defer ctx.results.close(); 114 | 115 | var buffer = try ResultStream(Result(Value)).init(ctx.allocator, ctx.key); 116 | defer buffer.deinit(); 117 | for (self.input) |in_parser| { 118 | const child_node_name = try in_parser.nodeName(&in_ctx.memoizer.node_name_cache); 119 | var child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset); 120 | defer child_ctx.deinitChild(); 121 | if (!child_ctx.existing_results) try in_parser.parse(&child_ctx); 122 | var sub = child_ctx.subscribe(); 123 | while (sub.next()) |next| { 124 | try buffer.add(next.toUnowned()); 125 | } 126 | } 127 | buffer.close(); 128 | 129 | var gotValues: usize = 0; 130 | var gotErrors: usize = 0; 131 | var sub = buffer.subscribe(ctx.key, ctx.path, Result(Value).initError(ctx.offset, "matches only the empty language")); 132 | while (sub.next()) |next| { 133 | switch (next.result) { 134 | .err => gotErrors += 1, 135 | else => gotValues += 1, 136 | } 137 | } 138 | if (gotValues > 0) { 139 | // At least one parse path succeeded, so discard all error'd parse paths. 140 | // 141 | // TODO(slimsag): would the client not want to enumerate error'd paths that made some 142 | // progress? 143 | var sub2 = buffer.subscribe(ctx.key, ctx.path, Result(Value).initError(ctx.offset, "matches only the empty language")); 144 | while (sub2.next()) |next| { 145 | switch (next.result) { 146 | .err => {}, 147 | else => try ctx.results.add(next), 148 | } 149 | } 150 | return; 151 | } 152 | // All parse paths failed, so return a nice error. 153 | // 154 | // TODO(slimsag): include names of expected input parsers 155 | // 156 | // TODO(slimsag): collect and return the furthest error if a parse path made 157 | // progress and failed. 158 | try ctx.results.add(Result(Value).initError(ctx.offset, "expected OneOfAmbiguous")); 159 | } 160 | }; 161 | } 162 | 163 | // Confirms that the following grammar works as expected: 164 | // 165 | // ```ebnf 166 | // Grammar = "ello" | "world" ; 167 | // ``` 168 | // 169 | test "oneof" { 170 | nosuspend { 171 | const allocator = testing.allocator; 172 | 173 | const Payload = void; 174 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {}); 175 | defer ctx.deinit(); 176 | 177 | const parsers: []*Parser(Payload, LiteralValue) = &.{ 178 | (try Literal(Payload).init(allocator, "ello")).ref(), 179 | (try Literal(Payload).init(allocator, "world")).ref(), 180 | }; 181 | var helloOrWorld = try OneOfAmbiguous(Payload, LiteralValue).init(allocator, parsers, .borrowed); 182 | defer helloOrWorld.deinit(allocator, null); 183 | try helloOrWorld.parse(&ctx); 184 | 185 | var sub = ctx.subscribe(); 186 | var first = sub.next().?; 187 | try testing.expectEqual(Result(LiteralValue).init(4, .{ .value = "ello" }).toUnowned(), first); 188 | try testing.expect(sub.next() == null); // stream closed 189 | } 190 | } 191 | 192 | // Confirms that the following grammar works as expected: 193 | // 194 | // ```ebnf 195 | // Grammar = "ello" | "elloworld" ; 196 | // ``` 197 | // 198 | test "oneof_ambiguous" { 199 | nosuspend { 200 | const allocator = testing.allocator; 201 | 202 | const Payload = void; 203 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {}); 204 | defer ctx.deinit(); 205 | 206 | const parsers: []*Parser(Payload, LiteralValue) = &.{ (try Literal(Payload).init(allocator, "ello")).ref(), (try Literal(Payload).init(allocator, "elloworld")).ref() }; 207 | var helloOrWorld = try OneOfAmbiguous(Payload, LiteralValue).init(allocator, parsers, .borrowed); 208 | defer helloOrWorld.deinit(allocator, null); 209 | try helloOrWorld.parse(&ctx); 210 | 211 | var sub = ctx.subscribe(); 212 | var r1 = sub.next().?; 213 | try testing.expectEqual(@as(usize, 4), r1.offset); 214 | try testing.expectEqualStrings("ello", r1.result.value.value); 215 | var r2 = sub.next().?; 216 | try testing.expectEqual(@as(usize, 9), r2.offset); 217 | try testing.expectEqualStrings("elloworld", r2.result.value.value); 218 | try testing.expect(sub.next() == null); // stream closed 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/combn/combinator/optional.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | 8 | const Literal = @import("../parser/literal.zig").Literal; 9 | const LiteralValue = @import("../parser/literal.zig").Value; 10 | 11 | const std = @import("std"); 12 | const testing = std.testing; 13 | const mem = std.mem; 14 | 15 | pub fn Context(comptime Payload: type, comptime Value: type) type { 16 | return *Parser(Payload, Value); 17 | } 18 | 19 | /// Wraps the `input.parser`, making it an optional parser producing an optional value. 20 | /// 21 | /// The `input.parser` must remain alive for as long as the `Optional` parser will be used. 22 | pub fn Optional(comptime Payload: type, comptime Value: type) type { 23 | return struct { 24 | parser: Parser(Payload, ?Value) = Parser(Payload, ?Value).init(parse, nodeName, deinit, countReferencesTo), 25 | input: Context(Payload, Value), 26 | 27 | const Self = @This(); 28 | 29 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value)) !*Parser(Payload, ?Value) { 30 | const self = Self{ .input = input }; 31 | return try self.parser.heapAlloc(allocator, self); 32 | } 33 | 34 | pub fn initStack(input: Context(Payload, Value)) Self { 35 | return Self{ .input = input }; 36 | } 37 | 38 | pub fn deinit(parser: *Parser(Payload, ?Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 39 | const self = @fieldParentPtr(Self, "parser", parser); 40 | self.input.deinit(allocator, freed); 41 | } 42 | 43 | pub fn countReferencesTo(parser: *const Parser(Payload, ?Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 44 | const self = @fieldParentPtr(Self, "parser", parser); 45 | if (@ptrToInt(parser) == other) return 1; 46 | return self.input.countReferencesTo(other, freed); 47 | } 48 | 49 | pub fn nodeName(parser: *const Parser(Payload, ?Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 50 | const self = @fieldParentPtr(Self, "parser", parser); 51 | 52 | var v = std.hash_map.hashString("Optional"); 53 | v +%= try self.input.nodeName(node_name_cache); 54 | return v; 55 | } 56 | 57 | pub fn parse(parser: *const Parser(Payload, ?Value), in_ctx: *const ParserContext(Payload, ?Value)) callconv(.Async) Error!void { 58 | const self = @fieldParentPtr(Self, "parser", parser); 59 | var ctx = in_ctx.with(self.input); 60 | defer ctx.results.close(); 61 | 62 | const child_node_name = try ctx.input.nodeName(&in_ctx.memoizer.node_name_cache); 63 | const child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset); 64 | defer child_ctx.deinitChild(); 65 | if (!child_ctx.existing_results) try ctx.input.parse(&child_ctx); 66 | 67 | var sub = child_ctx.subscribe(); 68 | while (sub.next()) |next| { 69 | switch (next.result) { 70 | .err => try ctx.results.add(Result(?Value).init(ctx.offset, null)), 71 | else => try ctx.results.add(Result(?Value).init(next.offset, next.result.value).toUnowned()), 72 | } 73 | } 74 | return; 75 | } 76 | }; 77 | } 78 | 79 | test "optional_some" { 80 | nosuspend { 81 | const allocator = testing.allocator; 82 | 83 | const Payload = void; 84 | const ctx = try ParserContext(Payload, ?LiteralValue).init(allocator, "hello world", {}); 85 | defer ctx.deinit(); 86 | 87 | const optional = try Optional(Payload, LiteralValue).init(allocator, (try Literal(Payload).init(allocator, "hello")).ref()); 88 | defer optional.deinit(allocator, null); 89 | 90 | try optional.parse(&ctx); 91 | 92 | var sub = ctx.subscribe(); 93 | var r1 = sub.next().?; 94 | try testing.expectEqual(@as(usize, 5), r1.offset); 95 | try testing.expectEqualStrings("hello", r1.result.value.?.value); 96 | try testing.expectEqual(@as(?Result(?LiteralValue), null), sub.next()); 97 | } 98 | } 99 | 100 | test "optional_none" { 101 | nosuspend { 102 | const allocator = testing.allocator; 103 | 104 | const Payload = void; 105 | const ctx = try ParserContext(Payload, ?LiteralValue).init(allocator, "hello world", {}); 106 | defer ctx.deinit(); 107 | 108 | const optional = try Optional(Payload, LiteralValue).init(allocator, (try Literal(Payload).init(allocator, "world")).ref()); 109 | defer optional.deinit(allocator, null); 110 | 111 | try optional.parse(&ctx); 112 | 113 | var sub = ctx.subscribe(); 114 | var first = sub.next().?; 115 | try testing.expectEqual(Result(?LiteralValue).init(0, null), first); 116 | try testing.expect(sub.next() == null); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/combn/combinator/reentrant.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | 9 | const std = @import("std"); 10 | const testing = std.testing; 11 | const mem = std.mem; 12 | 13 | pub fn Context(comptime Payload: type, comptime Value: type) type { 14 | return *Parser(Payload, Value); 15 | } 16 | 17 | /// Wraps the `input.parser`, allowing it to be reentrant (such as in the case of a left recursive 18 | /// grammar.) 19 | /// 20 | /// This has relatively small overhead (so you may use it to wrap any reentrant parser), but is 21 | /// only strictly required for reentrant parsers where invoking the parser multiple times at the 22 | /// same exact position in the input string is required to emit a different result. For example: 23 | /// 24 | /// ```ebnf 25 | /// Expr = Expr?, "abc" ; 26 | /// Grammar = Expr ; 27 | /// ``` 28 | /// 29 | /// Without a Reentrant wrapper, parsing the above Grammar would match only a singular 30 | /// `(null, abc)` match, because `Expr` is not invoked recursively. However, with a reentrant 31 | /// wrapper it would match `(((null,abc),abc),abc)` instead. 32 | /// 33 | /// The `input.parser` must remain alive for as long as the `Reentrant` parser will be used. 34 | pub fn Reentrant(comptime Payload: type, comptime Value: type) type { 35 | return struct { 36 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, countReferencesTo), 37 | input: Context(Payload, Value), 38 | 39 | const Self = @This(); 40 | 41 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value)) !*Parser(Payload, Value) { 42 | const self = Self{ .input = input }; 43 | return try self.parser.heapAlloc(allocator, self); 44 | } 45 | 46 | pub fn initStack(input: Context(Payload, Value)) Self { 47 | return Self{ .input = input }; 48 | } 49 | 50 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 51 | const self = @fieldParentPtr(Self, "parser", parser); 52 | self.input.deinit(allocator, freed); 53 | } 54 | 55 | pub fn countReferencesTo(parser: *const Parser(Payload, Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 56 | const self = @fieldParentPtr(Self, "parser", parser); 57 | if (@ptrToInt(parser) == other) return 1; 58 | return self.input.countReferencesTo(other, freed); 59 | } 60 | 61 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 62 | const self = @fieldParentPtr(Self, "parser", parser); 63 | 64 | var v = std.hash_map.hashString("Reentrant"); 65 | v +%= try self.input.nodeName(node_name_cache); 66 | return v; 67 | } 68 | 69 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void { 70 | const self = @fieldParentPtr(Self, "parser", parser); 71 | var ctx = in_ctx.with(self.input); 72 | defer ctx.results.close(); 73 | 74 | // See gll_parser.zig:Memoizer.get for details on what this is doing and why. 75 | var retrying = false; 76 | var retrying_max_depth: ?usize = null; 77 | while (true) { 78 | const child_node_name = try ctx.input.nodeName(&in_ctx.memoizer.node_name_cache); 79 | const child_ctx = try in_ctx.initChildRetry(Value, child_node_name, ctx.offset, retrying_max_depth); 80 | defer child_ctx.deinitChild(); 81 | if (!child_ctx.existing_results) try ctx.input.parse(&child_ctx); 82 | 83 | var buf = try ctx.allocator.create(ResultStream(Result(Value))); 84 | defer ctx.allocator.destroy(buf); 85 | buf.* = try ResultStream(Result(Value)).init(ctx.allocator, ctx.key); 86 | defer buf.deinit(); 87 | var sub = child_ctx.subscribe(); 88 | while (sub.next()) |next| { 89 | try buf.add(next.toUnowned()); 90 | } 91 | buf.close(); 92 | 93 | if ((sub.cyclic_closed or retrying) and !child_ctx.isRetrying(child_node_name, ctx.offset)) { 94 | if (retrying and sub.cyclic_closed) { 95 | if (retrying_max_depth.? > 0) retrying_max_depth.? -= 1; 96 | retrying = false; 97 | continue; 98 | } 99 | retrying = true; 100 | if (retrying_max_depth == null) { 101 | retrying_max_depth = 0; 102 | } 103 | retrying_max_depth.? += 1; 104 | continue; 105 | } else { 106 | var sub2 = buf.subscribe(ctx.key, ctx.path, Result(Value).initError(ctx.offset, "matches only the empty language")); 107 | while (sub2.next()) |next| { 108 | try ctx.results.add(next); 109 | } 110 | break; 111 | } 112 | } 113 | } 114 | }; 115 | } 116 | -------------------------------------------------------------------------------- /src/combn/combinator/repeated.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | 9 | const Literal = @import("../parser/literal.zig").Literal; 10 | const LiteralValue = @import("../parser/literal.zig").Value; 11 | 12 | const std = @import("std"); 13 | const testing = std.testing; 14 | const mem = std.mem; 15 | 16 | pub fn Context(comptime Payload: type, comptime V: type) type { 17 | return struct { 18 | /// The parser which should be repeatedly parsed. 19 | parser: *Parser(Payload, V), 20 | 21 | /// The minimum number of times the parser must successfully match. 22 | min: usize, 23 | 24 | /// The maximum number of times the parser can match, or -1 for unlimited. 25 | max: isize, 26 | }; 27 | } 28 | 29 | /// Represents a single value in the stream of repeated values. 30 | /// 31 | /// In the case of a non-ambiguous grammar, a `Repeated` combinator will yield: 32 | /// 33 | /// ``` 34 | /// stream(value1, value2) 35 | /// ``` 36 | /// 37 | /// In the case of an ambiguous grammar, it would yield a stream with only the first parse path. 38 | /// Use RepeatedAmbiguous if ambiguou parse paths are desirable. 39 | pub fn Value(comptime V: type) type { 40 | return struct { 41 | results: *ResultStream(Result(V)), 42 | 43 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 44 | self.results.deinit(); 45 | allocator.destroy(self.results); 46 | } 47 | }; 48 | } 49 | 50 | /// Matches the `input` repeatedly, between `[min, max]` times (inclusive.) If ambiguous parse paths 51 | /// are desirable, use RepeatedAmbiguous. 52 | /// 53 | /// The `input` parsers must remain alive for as long as the `Repeated` parser will be used. 54 | pub fn Repeated(comptime Payload: type, comptime V: type) type { 55 | return struct { 56 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo), 57 | input: Context(Payload, V), 58 | 59 | const Self = @This(); 60 | 61 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V)) !*Parser(Payload, Value(V)) { 62 | const self = Self{ .input = input }; 63 | return try self.parser.heapAlloc(allocator, self); 64 | } 65 | 66 | pub fn initStack(input: Context(Payload, V)) Self { 67 | return Self{ .input = input }; 68 | } 69 | 70 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 71 | const self = @fieldParentPtr(Self, "parser", parser); 72 | self.input.parser.deinit(allocator, freed); 73 | } 74 | 75 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 76 | const self = @fieldParentPtr(Self, "parser", parser); 77 | if (@ptrToInt(parser) == other) return 1; 78 | return self.input.parser.countReferencesTo(other, freed); 79 | } 80 | 81 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 82 | const self = @fieldParentPtr(Self, "parser", parser); 83 | 84 | var v = std.hash_map.hashString("Repeated"); 85 | v +%= try self.input.parser.nodeName(node_name_cache); 86 | v +%= std.hash_map.getAutoHashFn(usize, void)({}, self.input.min); 87 | v +%= std.hash_map.getAutoHashFn(isize, void)({}, self.input.max); 88 | return v; 89 | } 90 | 91 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void { 92 | const self = @fieldParentPtr(Self, "parser", parser); 93 | var ctx = in_ctx.with(self.input); 94 | defer ctx.results.close(); 95 | 96 | // Invoke the child parser repeatedly to produce each of our results. Each time we ask 97 | // the child parser to parse, it can produce a set of results (its result stream) which 98 | // are varying parse paths / interpretations, we take the first successful one. 99 | 100 | // Return early if we're not trying to parse anything (stream close signals to the 101 | // consumer there were no matches). 102 | if (ctx.input.max == 0) { 103 | return; 104 | } 105 | 106 | var buffer = try ctx.allocator.create(ResultStream(Result(V))); 107 | errdefer ctx.allocator.destroy(buffer); 108 | errdefer buffer.deinit(); 109 | buffer.* = try ResultStream(Result(V)).init(ctx.allocator, ctx.key); 110 | 111 | var num_values: usize = 0; 112 | var offset: usize = ctx.offset; 113 | while (true) { 114 | const child_node_name = try self.input.parser.nodeName(&in_ctx.memoizer.node_name_cache); 115 | var child_ctx = try in_ctx.initChild(V, child_node_name, offset); 116 | defer child_ctx.deinitChild(); 117 | if (!child_ctx.existing_results) try self.input.parser.parse(&child_ctx); 118 | 119 | var num_local_values: usize = 0; 120 | var sub = child_ctx.subscribe(); 121 | while (sub.next()) |next| { 122 | switch (next.result) { 123 | .err => { 124 | offset = next.offset; 125 | if (num_values < ctx.input.min) { 126 | buffer.close(); 127 | buffer.deinit(); 128 | ctx.allocator.destroy(buffer); 129 | try ctx.results.add(Result(Value(V)).initError(next.offset, next.result.err)); 130 | return; 131 | } 132 | buffer.close(); 133 | try ctx.results.add(Result(Value(V)).init(offset, .{ .results = buffer })); 134 | return; 135 | }, 136 | else => { 137 | // TODO(slimsag): need path committal functionality 138 | if (num_local_values == 0) { 139 | offset = next.offset; 140 | // TODO(slimsag): if no consumption, could get stuck forever! 141 | try buffer.add(next.toUnowned()); 142 | } 143 | num_local_values += 1; 144 | }, 145 | } 146 | } 147 | 148 | num_values += 1; 149 | if (num_values >= ctx.input.max and ctx.input.max != -1) break; 150 | } 151 | buffer.close(); 152 | try ctx.results.add(Result(Value(V)).init(offset, .{ .results = buffer })); 153 | } 154 | }; 155 | } 156 | 157 | test "repeated" { 158 | nosuspend { 159 | const allocator = testing.allocator; 160 | 161 | const Payload = void; 162 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abcabcabc123abc", {}); 163 | defer ctx.deinit(); 164 | 165 | var abcInfinity = try Repeated(Payload, LiteralValue).init(allocator, .{ 166 | .parser = (try Literal(Payload).init(allocator, "abc")).ref(), 167 | .min = 0, 168 | .max = -1, 169 | }); 170 | defer abcInfinity.deinit(allocator, null); 171 | try abcInfinity.parse(&ctx); 172 | 173 | var sub = ctx.subscribe(); 174 | var repeated = sub.next().?.result.value; 175 | try testing.expect(sub.next() == null); // stream closed 176 | 177 | var repeatedSub = repeated.results.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language")); 178 | try testing.expectEqual(@as(usize, 3), repeatedSub.next().?.offset); 179 | try testing.expectEqual(@as(usize, 6), repeatedSub.next().?.offset); 180 | try testing.expectEqual(@as(usize, 9), repeatedSub.next().?.offset); 181 | try testing.expect(repeatedSub.next() == null); // stream closed 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/combn/combinator/repeated_ambiguous.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | const PosKey = gllparser.PosKey; 9 | const ParserPath = gllparser.ParserPath; 10 | 11 | const Literal = @import("../parser/literal.zig").Literal; 12 | const LiteralValue = @import("../parser/literal.zig").Value; 13 | 14 | const std = @import("std"); 15 | const testing = std.testing; 16 | const mem = std.mem; 17 | 18 | pub fn Context(comptime Payload: type, comptime V: type) type { 19 | return struct { 20 | /// The parser which should be repeatedly parsed. 21 | parser: *Parser(Payload, V), 22 | 23 | /// The minimum number of times the parser must successfully match. 24 | min: usize, 25 | 26 | /// The maximum number of times the parser can match, or -1 for unlimited. 27 | max: isize, 28 | }; 29 | } 30 | 31 | /// Represents a single value in the stream of repeated values. 32 | /// 33 | /// In the case of a non-ambiguous grammar, a `RepeatedAmbiguous` combinator will yield: 34 | /// 35 | /// ``` 36 | /// Value{ 37 | /// node: value1, 38 | /// next: ResultStream(Value{ 39 | /// node: value2, 40 | /// next: ..., 41 | /// }) 42 | /// } 43 | /// ``` 44 | /// 45 | /// In the case of an ambiguous grammar, it would yield streams with potentially multiple values 46 | /// (each representing one possible parse path / interpretation of the grammar): 47 | /// 48 | /// ``` 49 | /// Value{ 50 | /// node: value1, 51 | /// next: ResultStream( 52 | /// Value{ 53 | /// node: value2variant1, 54 | /// next: ..., 55 | /// }, 56 | /// Value{ 57 | /// node: value2variant2, 58 | /// next: ..., 59 | /// }, 60 | /// ) 61 | /// } 62 | /// ``` 63 | /// 64 | pub fn Value(comptime V: type) type { 65 | return struct { 66 | node: Result(V), 67 | next: *ResultStream(Result(@This())), 68 | 69 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 70 | self.next.deinit(); 71 | self.node.deinit(allocator); 72 | allocator.destroy(self.next); 73 | } 74 | 75 | pub fn flatten(self: *const @This(), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!ResultStream(Result(V)) { 76 | var dst = try ResultStream(Result(V)).init(allocator, subscriber); 77 | try self.flatten_into(&dst, allocator, subscriber, path); 78 | dst.close(); // TODO(slimsag): why does deferring this not work? 79 | return dst; 80 | } 81 | 82 | pub fn flatten_into(self: *const @This(), dst: *ResultStream(Result(V)), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!void { 83 | try dst.add(self.node.toUnowned()); 84 | 85 | var sub = self.next.subscribe(subscriber, path, Result(Value(V)).initError(0, "matches only the empty language")); 86 | nosuspend { 87 | while (sub.next()) |next_path| { 88 | switch (next_path.result) { 89 | .err => try dst.add(Result(V).initError(next_path.offset, next_path.result.err)), 90 | else => try next_path.result.value.flatten_into(dst, allocator, subscriber, path), 91 | } 92 | } 93 | } 94 | } 95 | }; 96 | } 97 | 98 | /// Matches the `input` repeatedly, between `[min, max]` times (inclusive.) 99 | /// 100 | /// The `input` parsers must remain alive for as long as the `RepeatedAmbiguous` parser will be used. 101 | pub fn RepeatedAmbiguous(comptime Payload: type, comptime V: type) type { 102 | return struct { 103 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo), 104 | input: Context(Payload, V), 105 | 106 | const Self = @This(); 107 | 108 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V)) !*Parser(Payload, Value(V)) { 109 | const self = Self{ .input = input }; 110 | return try self.parser.heapAlloc(allocator, self); 111 | } 112 | 113 | pub fn initStack(input: Context(Payload, V)) Self { 114 | return Self{ .input = input }; 115 | } 116 | 117 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 118 | const self = @fieldParentPtr(Self, "parser", parser); 119 | self.input.parser.deinit(allocator, freed); 120 | } 121 | 122 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 123 | const self = @fieldParentPtr(Self, "parser", parser); 124 | if (@ptrToInt(parser) == other) return 1; 125 | return self.input.parser.countReferencesTo(other, freed); 126 | } 127 | 128 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 129 | const self = @fieldParentPtr(Self, "parser", parser); 130 | 131 | var v = std.hash_map.hashString("RepeatedAmbiguous"); 132 | v +%= try self.input.parser.nodeName(node_name_cache); 133 | v +%= std.hash_map.getAutoHashFn(usize, void)({}, self.input.min); 134 | v +%= std.hash_map.getAutoHashFn(isize, void)({}, self.input.max); 135 | return v; 136 | } 137 | 138 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void { 139 | const self = @fieldParentPtr(Self, "parser", parser); 140 | var ctx = in_ctx.with(self.input); 141 | defer ctx.results.close(); 142 | 143 | // Invoke the child parser repeatedly to produce each of our results. Each time we ask 144 | // the child parser to parse, it can produce a set of results (its result stream) which 145 | // are varying parse paths / interpretations. Our set of results (our result stream) 146 | // will contain many more possible paths, for example consider a parser: 147 | // 148 | // S -> [A, B] 149 | // 150 | // Matched once, it can produce one or two separate parse paths / interpretations (A, B, 151 | // or A and B), and we may commit to producing certain ones. But match twice, and it 152 | // could produce: 153 | // 154 | // S -> [AB, BA, AA, BB] 155 | // 156 | // There is an exponential number of repetitive parse paths to follow. Thus, we simply 157 | // follow each path in order, trying one at a time until we commit or reject the 158 | // unwanted paths. We also have two options in how we follow the paths - depth-first in 159 | // order: 160 | // 161 | // AA, AB, BA, BB 162 | // 163 | // Or breadth-first in order: 164 | // 165 | // AA, BA, AB, BB 166 | // 167 | // Depth-first vs. breadth-first could impact the performance of some grammars by 168 | // making it harder to bail out of a given parse path quicker. Similarly, iteration 169 | // order could be more expensive depending on the order of operations, this will be 170 | // slower: 171 | // 172 | // Iteration 0: Try A 173 | // Iteration 0: Try B -> Commit to B 174 | // Iteration 1: Try A 175 | // Iteration 1: Try B -> Commit to B 176 | // Iteration 2: Try A 177 | // Iteration 2: Try B -> Commit to B 178 | // 179 | // Than this: 180 | // 181 | // Iteration 0: Try B -> Commit to B 182 | // Iteration 1: Try B -> Commit to B 183 | // Iteration 2: Try B -> Commit to B 184 | // 185 | // However, the most optimal order is not known ahead of time. Likely the best approach 186 | // would be to assume the next path will be the same as the past path, but in practice 187 | // this would involve more book-keeping and still be a guess. Instead, we just focus on 188 | // exploring all potential paths as quickly as possible (and future efforts will be 189 | // better spent on parallelization of exploring these paths.) 190 | 191 | // Return early if we're not trying to parse anything (stream close signals to the 192 | // consumer there were no matches). 193 | if (ctx.input.max == 0) { 194 | return; 195 | } 196 | 197 | // First we need to actually invoke the child parser. This will give us [A, B, C] and 198 | // we then invoke RepeatedAmbiguous(child) on the proceeding states to get the associated stream: 199 | // 200 | // stream( 201 | // (A, stream( 202 | // (A, stream(...), 203 | // (B, stream(...), 204 | // (C, stream(...), 205 | // ), 206 | // (B, stream( 207 | // (A, stream(...), 208 | // (B, stream(...), 209 | // (C, stream(...), 210 | // ), 211 | // (C, stream( 212 | // (A, stream(...), 213 | // (B, stream(...), 214 | // (C, stream(...), 215 | // ), 216 | // ) 217 | // 218 | const child_node_name = try self.input.parser.nodeName(&in_ctx.memoizer.node_name_cache); 219 | var child_ctx = try in_ctx.initChild(V, child_node_name, ctx.offset); 220 | defer child_ctx.deinitChild(); 221 | if (!child_ctx.existing_results) try self.input.parser.parse(&child_ctx); 222 | 223 | // For every top-level value (A, B, C in our example above.) 224 | var num_values: usize = 0; 225 | var sub = child_ctx.subscribe(); 226 | var offset: usize = ctx.offset; 227 | while (sub.next()) |top_level| { 228 | if (num_values >= ctx.input.max and ctx.input.max != -1) break; 229 | num_values += 1; 230 | switch (top_level.result) { 231 | .err => { 232 | // Going down the path of this top-level value terminated with an error. 233 | if (num_values < 1 or num_values < ctx.input.min) { 234 | try ctx.results.add(Result(Value(V)).initError(top_level.offset, top_level.result.err)); 235 | } 236 | continue; 237 | }, 238 | else => { 239 | // We got a non-error top-level value (e.g. A, B, C). 240 | // TODO(slimsag): if no consumption, could get stuck forever! 241 | offset = top_level.offset; 242 | 243 | // Now get the stream that continues down this path (i.e. the stream 244 | // associated with A, B, C.) 245 | var path_results = try ctx.allocator.create(ResultStream(Result(Value(V)))); 246 | path_results.* = try ResultStream(Result(Value(V))).init(ctx.allocator, ctx.key); 247 | var path = RepeatedAmbiguous(Payload, V).initStack(.{ 248 | .parser = self.input.parser, 249 | .min = self.input.min, 250 | .max = if (self.input.max == -1) -1 else self.input.max - 1, 251 | }); 252 | const path_node_name = try path.parser.nodeName(&in_ctx.memoizer.node_name_cache); 253 | var path_ctx = try in_ctx.initChild(Value(V), path_node_name, top_level.offset); 254 | defer path_ctx.deinitChild(); 255 | if (!path_ctx.existing_results) try path.parser.parse(&path_ctx); 256 | var path_results_sub = path_ctx.subscribe(); 257 | while (path_results_sub.next()) |next| { 258 | try path_results.add(next.toUnowned()); 259 | } 260 | path_results.close(); 261 | 262 | // Emit our top-level value tuple (e.g. (A, stream(...)) 263 | try ctx.results.add(Result(Value(V)).init(top_level.offset, .{ 264 | .node = top_level.toUnowned(), 265 | .next = path_results, 266 | })); 267 | }, 268 | } 269 | } 270 | if (num_values < ctx.input.min) { 271 | // TODO(slimsag): include number of expected/found matches 272 | try ctx.results.add(Result(Value(V)).initError(offset, "expected more")); 273 | return; 274 | } 275 | return; 276 | } 277 | }; 278 | } 279 | 280 | test "repeated" { 281 | nosuspend { 282 | const allocator = testing.allocator; 283 | 284 | const Payload = void; 285 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abcabcabc123abc", {}); 286 | defer ctx.deinit(); 287 | 288 | var abcInfinity = try RepeatedAmbiguous(Payload, LiteralValue).init(allocator, .{ 289 | .parser = (try Literal(Payload).init(allocator, "abc")).ref(), 290 | .min = 0, 291 | .max = -1, 292 | }); 293 | defer abcInfinity.deinit(allocator, null); 294 | try abcInfinity.parse(&ctx); 295 | 296 | var sub = ctx.subscribe(); 297 | var list = sub.next(); 298 | try testing.expect(sub.next() == null); // stream closed 299 | 300 | // first element 301 | try testing.expectEqual(@as(usize, 3), list.?.offset); 302 | try testing.expectEqual(@as(usize, 3), list.?.result.value.node.offset); 303 | 304 | // flatten the nested multi-dimensional array, since our grammar above is not ambiguous 305 | // this is fine to do and makes testing far easier. 306 | var flattened = try list.?.result.value.flatten(allocator, ctx.key, ctx.path); 307 | defer flattened.deinit(); 308 | var flat = flattened.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language")); 309 | try testing.expectEqual(@as(usize, 3), flat.next().?.offset); 310 | try testing.expectEqual(@as(usize, 6), flat.next().?.offset); 311 | try testing.expectEqual(@as(usize, 9), flat.next().?.offset); 312 | try testing.expect(flat.next() == null); // stream closed 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /src/combn/combinator/sequence.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | 9 | const Literal = @import("../parser/literal.zig").Literal; 10 | const LiteralValue = @import("../parser/literal.zig").Value; 11 | const MapTo = @import("mapto.zig").MapTo; 12 | 13 | const std = @import("std"); 14 | const testing = std.testing; 15 | const mem = std.mem; 16 | 17 | pub fn Context(comptime Payload: type, comptime V: type) type { 18 | return []const *Parser(Payload, V); 19 | } 20 | 21 | /// Represents a sequence of parsed values. 22 | /// 23 | /// In the case of a non-ambiguous grammar, a `Sequence` combinator will yield: 24 | /// 25 | /// ``` 26 | /// stream(value1, value2) 27 | /// ``` 28 | /// 29 | /// In the case of an ambiguous grammar, it would yield a stream with only the first parse path. 30 | /// Use SequenceAmbiguous if ambiguou parse paths are desirable. 31 | pub fn Value(comptime V: type) type { 32 | return struct { 33 | results: *ResultStream(Result(V)), 34 | 35 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 36 | self.results.deinit(); 37 | allocator.destroy(self.results); 38 | } 39 | }; 40 | } 41 | 42 | pub const Ownership = enum { 43 | borrowed, 44 | owned, 45 | copy, 46 | }; 47 | 48 | /// Matches the `input` parsers sequentially. The parsers must produce the same data type (use 49 | /// MapTo, if needed.) If ambiguous parse paths are desirable, use SequenceAmbiguous. 50 | /// 51 | /// The `input` parsers must remain alive for as long as the `Sequence` parser will be used. 52 | pub fn Sequence(comptime Payload: type, comptime V: type) type { 53 | return struct { 54 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo), 55 | input: Context(Payload, V), 56 | ownership: Ownership, 57 | 58 | const Self = @This(); 59 | 60 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V), ownership: Ownership) !*Parser(Payload, Value(V)) { 61 | var self = Self{ .input = input, .ownership = ownership }; 62 | if (ownership == .copy) { 63 | const Elem = std.meta.Elem(@TypeOf(input)); 64 | var copy = try allocator.alloc(Elem, input.len); 65 | std.mem.copy(Elem, copy, input); 66 | self.input = copy; 67 | self.ownership = .owned; 68 | } 69 | return try self.parser.heapAlloc(allocator, self); 70 | } 71 | 72 | pub fn initStack(input: Context(Payload, V), ownership: Ownership) Self { 73 | if (ownership == Ownership.copy) unreachable; 74 | return Self{ .input = input }; 75 | } 76 | 77 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 78 | const self = @fieldParentPtr(Self, "parser", parser); 79 | for (self.input) |child_parser| { 80 | child_parser.deinit(allocator, freed); 81 | } 82 | if (self.ownership == .owned) allocator.free(self.input); 83 | } 84 | 85 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 86 | const self = @fieldParentPtr(Self, "parser", parser); 87 | if (@ptrToInt(parser) == other) return 1; 88 | var count: usize = 0; 89 | for (self.input) |in_parser| { 90 | count += in_parser.countReferencesTo(other, freed); 91 | } 92 | return count; 93 | } 94 | 95 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 96 | const self = @fieldParentPtr(Self, "parser", parser); 97 | 98 | var v = std.hash_map.hashString("Sequence"); 99 | for (self.input) |in_parser| { 100 | v +%= try in_parser.nodeName(node_name_cache); 101 | } 102 | return v; 103 | } 104 | 105 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void { 106 | const self = @fieldParentPtr(Self, "parser", parser); 107 | var ctx = in_ctx.with(self.input); 108 | defer ctx.results.close(); 109 | 110 | // Invoke each child parser to produce each of our results. Each time we ask a child 111 | // parser to parse, it can produce a set of results (its result stream) which are 112 | // varying parse paths / interpretations, we take the first successful one. 113 | 114 | // Return early if we're not trying to parse anything (stream close signals to the 115 | // consumer there were no matches). 116 | if (self.input.len == 0) { 117 | return; 118 | } 119 | 120 | var buffer = try ctx.allocator.create(ResultStream(Result(V))); 121 | errdefer ctx.allocator.destroy(buffer); 122 | errdefer buffer.deinit(); 123 | buffer.* = try ResultStream(Result(V)).init(ctx.allocator, ctx.key); 124 | 125 | var offset: usize = ctx.offset; 126 | for (self.input) |child_parser| { 127 | const child_node_name = try child_parser.nodeName(&in_ctx.memoizer.node_name_cache); 128 | var child_ctx = try in_ctx.initChild(V, child_node_name, offset); 129 | defer child_ctx.deinitChild(); 130 | if (!child_ctx.existing_results) try child_parser.parse(&child_ctx); 131 | 132 | var num_local_values: usize = 0; 133 | var sub = child_ctx.subscribe(); 134 | while (sub.next()) |next| { 135 | switch (next.result) { 136 | .err => { 137 | buffer.close(); 138 | buffer.deinit(); 139 | ctx.allocator.destroy(buffer); 140 | try ctx.results.add(Result(Value(V)).initError(next.offset, next.result.err)); 141 | return; 142 | }, 143 | else => { 144 | // TODO(slimsag): need path committal functionality 145 | if (num_local_values == 0) { 146 | // TODO(slimsag): if no consumption, could get stuck forever! 147 | offset = next.offset; 148 | try buffer.add(next.toUnowned()); 149 | } 150 | num_local_values += 1; 151 | }, 152 | } 153 | } 154 | } 155 | buffer.close(); 156 | try ctx.results.add(Result(Value(V)).init(offset, .{ .results = buffer })); 157 | } 158 | }; 159 | } 160 | 161 | test "sequence" { 162 | nosuspend { 163 | const allocator = testing.allocator; 164 | 165 | const Payload = void; 166 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abc123abc456_123abc", {}); 167 | defer ctx.deinit(); 168 | 169 | var seq = try Sequence(Payload, LiteralValue).init(allocator, &.{ 170 | (try Literal(Payload).init(allocator, "abc")).ref(), 171 | (try Literal(Payload).init(allocator, "123ab")).ref(), 172 | (try Literal(Payload).init(allocator, "c45")).ref(), 173 | (try Literal(Payload).init(allocator, "6")).ref(), 174 | }, .borrowed); 175 | defer seq.deinit(allocator, null); 176 | try seq.parse(&ctx); 177 | 178 | var sub = ctx.subscribe(); 179 | var sequence = sub.next().?.result.value; 180 | try testing.expect(sub.next() == null); // stream closed 181 | 182 | var sequenceSub = sequence.results.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language")); 183 | try testing.expectEqual(@as(usize, 3), sequenceSub.next().?.offset); 184 | try testing.expectEqual(@as(usize, 8), sequenceSub.next().?.offset); 185 | try testing.expectEqual(@as(usize, 11), sequenceSub.next().?.offset); 186 | try testing.expectEqual(@as(usize, 12), sequenceSub.next().?.offset); 187 | try testing.expect(sequenceSub.next() == null); // stream closed 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/combn/combinator/sequence_ambiguous.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | const ResultStream = gllparser.ResultStream; 8 | const PosKey = gllparser.PosKey; 9 | const ParserPath = gllparser.ParserPath; 10 | 11 | const Literal = @import("../parser/literal.zig").Literal; 12 | const LiteralValue = @import("../parser/literal.zig").Value; 13 | const MapTo = @import("mapto.zig").MapTo; 14 | 15 | const std = @import("std"); 16 | const testing = std.testing; 17 | const mem = std.mem; 18 | 19 | pub fn Context(comptime Payload: type, comptime V: type) type { 20 | return []const *Parser(Payload, V); 21 | } 22 | 23 | /// Represents a sequence of parsed values. 24 | /// 25 | /// In the case of a non-ambiguous grammar, a `SequenceAmbiguous` combinator will yield: 26 | /// 27 | /// ``` 28 | /// Value{ 29 | /// node: value1, 30 | /// next: ResultStream(Value{ 31 | /// node: value2, 32 | /// next: ..., 33 | /// }) 34 | /// } 35 | /// ``` 36 | /// 37 | /// In the case of an ambiguous grammar, it would yield streams with potentially multiple values 38 | /// (each representing one possible parse path / interpretation of the grammar): 39 | /// 40 | /// ``` 41 | /// Value{ 42 | /// node: value1, 43 | /// next: ResultStream( 44 | /// Value{ 45 | /// node: value2variant1, 46 | /// next: ..., 47 | /// }, 48 | /// Value{ 49 | /// node: value2variant2, 50 | /// next: ..., 51 | /// }, 52 | /// ) 53 | /// } 54 | /// ``` 55 | /// 56 | pub fn Value(comptime V: type) type { 57 | return struct { 58 | node: Result(V), 59 | next: *ResultStream(Result(@This())), 60 | 61 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 62 | self.next.deinit(); 63 | self.node.deinit(allocator); 64 | allocator.destroy(self.next); 65 | } 66 | 67 | pub fn flatten(self: *const @This(), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!ResultStream(Result(V)) { 68 | var dst = try ResultStream(Result(V)).init(allocator, subscriber); 69 | try self.flatten_into(&dst, allocator, subscriber, path); 70 | dst.close(); // TODO(slimsag): why does deferring this not work? 71 | return dst; 72 | } 73 | 74 | pub fn flatten_into(self: *const @This(), dst: *ResultStream(Result(V)), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!void { 75 | try dst.add(self.node.toUnowned()); 76 | 77 | var sub = self.next.subscribe(subscriber, path, Result(Value(V)).initError(0, "matches only the empty language")); 78 | nosuspend { 79 | while (sub.next()) |next_path| { 80 | switch (next_path.result) { 81 | .err => try dst.add(Result(V).initError(next_path.offset, next_path.result.err)), 82 | else => try next_path.result.value.flatten_into(dst, allocator, subscriber, path), 83 | } 84 | } 85 | } 86 | } 87 | }; 88 | } 89 | 90 | pub const Ownership = enum { 91 | borrowed, 92 | owned, 93 | copy, 94 | }; 95 | 96 | /// Matches the `input` parsers sequentially. The parsers must produce the same data type (use 97 | /// MapTo, if needed.) 98 | /// 99 | /// The `input` parsers must remain alive for as long as the `SequenceAmbiguous` parser will be used. 100 | pub fn SequenceAmbiguous(comptime Payload: type, comptime V: type) type { 101 | return struct { 102 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo), 103 | input: Context(Payload, V), 104 | ownership: Ownership, 105 | 106 | const Self = @This(); 107 | 108 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V), ownership: Ownership) !*Parser(Payload, Value(V)) { 109 | var self = Self{ .input = input, .ownership = ownership }; 110 | if (ownership == .copy) { 111 | const Elem = std.meta.Elem(@TypeOf(input)); 112 | var copy = try allocator.alloc(Elem, input.len); 113 | std.mem.copy(Elem, copy, input); 114 | self.input = copy; 115 | self.ownership = .owned; 116 | } 117 | return try self.parser.heapAlloc(allocator, self); 118 | } 119 | 120 | pub fn initStack(input: Context(Payload, V), ownership: Ownership) Self { 121 | if (ownership == Ownership.copy) unreachable; 122 | return Self{ .input = input, .ownership = ownership }; 123 | } 124 | 125 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 126 | const self = @fieldParentPtr(Self, "parser", parser); 127 | for (self.input) |child_parser| { 128 | child_parser.deinit(allocator, freed); 129 | } 130 | if (self.ownership == .owned) allocator.free(self.input); 131 | } 132 | 133 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize { 134 | const self = @fieldParentPtr(Self, "parser", parser); 135 | if (@ptrToInt(parser) == other) return 1; 136 | var count: usize = 0; 137 | for (self.input) |in_parser| { 138 | count += in_parser.countReferencesTo(other, freed); 139 | } 140 | return count; 141 | } 142 | 143 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 144 | const self = @fieldParentPtr(Self, "parser", parser); 145 | 146 | var v = std.hash_map.hashString("SequenceAmbiguous"); 147 | for (self.input) |in_parser| { 148 | v +%= try in_parser.nodeName(node_name_cache); 149 | } 150 | return v; 151 | } 152 | 153 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void { 154 | const self = @fieldParentPtr(Self, "parser", parser); 155 | var ctx = in_ctx.with(self.input); 156 | defer ctx.results.close(); 157 | 158 | if (self.input.len == 0) { 159 | return; 160 | } 161 | 162 | // For a sequence of input parsers [A, B, C], each one may produce multiple different 163 | // possible parser paths (valid interpretations of the same input state) in the case of 164 | // an ambiguous grammar. For example, the sequence of parsers [A, B, C] where each 165 | // produces 2 possible parser paths (e.g. A1, A2) we need to emit: 166 | // 167 | // stream( 168 | // (A1, stream( 169 | // (B1, stream( 170 | // (C1, None), 171 | // (C2, None), 172 | // )), 173 | // (B2, stream( 174 | // (C1, None), 175 | // (C2, None), 176 | // )), 177 | // )), 178 | // (A2, stream( 179 | // (B1, stream( 180 | // (C1, None), 181 | // (C2, None), 182 | // )), 183 | // (B2, stream( 184 | // (C1, None), 185 | // (C2, None), 186 | // )), 187 | // )), 188 | // ) 189 | // 190 | // This call to `SequenceAmbiguous.parse` is only responsible for emitting the top level 191 | // (A1, A2) and invoking SequenceAmbiguous(next) to produce the associated `stream()` for those 192 | // parse states. 193 | const child_node_name = try self.input[0].nodeName(&in_ctx.memoizer.node_name_cache); 194 | var child_ctx = try in_ctx.initChild(V, child_node_name, ctx.offset); 195 | defer child_ctx.deinitChild(); 196 | if (!child_ctx.existing_results) try self.input[0].parse(&child_ctx); 197 | 198 | // For every top-level value (A1, A2 in our example above.) 199 | var sub = child_ctx.subscribe(); 200 | while (sub.next()) |top_level| { 201 | switch (top_level.result) { 202 | .err => { 203 | try ctx.results.add(Result(Value(V)).initError(top_level.offset, top_level.result.err)); 204 | continue; 205 | }, 206 | else => { 207 | // We got a non-error top-level value (e.g. A1, A2). 208 | 209 | // Now get the stream that continues down this path (i.e. the stream 210 | // associated with A1, A2.) 211 | var path_results = try ctx.allocator.create(ResultStream(Result(Value(V)))); 212 | path_results.* = try ResultStream(Result(Value(V))).init(ctx.allocator, ctx.key); 213 | var path = SequenceAmbiguous(Payload, V).initStack(self.input[1..], .borrowed); 214 | const path_node_name = try path.parser.nodeName(&in_ctx.memoizer.node_name_cache); 215 | var path_ctx = try in_ctx.initChild(Value(V), path_node_name, top_level.offset); 216 | defer path_ctx.deinitChild(); 217 | if (!path_ctx.existing_results) try path.parser.parse(&path_ctx); 218 | var path_results_sub = path_ctx.subscribe(); 219 | while (path_results_sub.next()) |next| { 220 | try path_results.add(next.toUnowned()); 221 | } 222 | path_results.close(); 223 | 224 | // Emit our top-level value tuple (e.g. (A1, stream(...)) 225 | try ctx.results.add(Result(Value(V)).init(top_level.offset, .{ 226 | .node = top_level.toUnowned(), 227 | .next = path_results, 228 | })); 229 | }, 230 | } 231 | } 232 | } 233 | }; 234 | } 235 | 236 | test "sequence" { 237 | nosuspend { 238 | const allocator = testing.allocator; 239 | 240 | const Payload = void; 241 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abc123abc456_123abc", {}); 242 | defer ctx.deinit(); 243 | 244 | var seq = try SequenceAmbiguous(Payload, LiteralValue).init(allocator, &.{ 245 | (try Literal(Payload).init(allocator, "abc")).ref(), 246 | (try Literal(Payload).init(allocator, "123ab")).ref(), 247 | (try Literal(Payload).init(allocator, "c45")).ref(), 248 | (try Literal(Payload).init(allocator, "6")).ref(), 249 | }, .borrowed); 250 | defer seq.deinit(allocator, null); 251 | try seq.parse(&ctx); 252 | 253 | var sub = ctx.subscribe(); 254 | var list = sub.next(); 255 | try testing.expect(sub.next() == null); // stream closed 256 | 257 | // first element 258 | try testing.expectEqual(@as(usize, 3), list.?.offset); 259 | try testing.expectEqual(@as(usize, 3), list.?.result.value.node.offset); 260 | 261 | // flatten the nested multi-dimensional array, since our grammar above is not ambiguous 262 | // this is fine to do and makes testing far easier. 263 | var flattened = try list.?.result.value.flatten(allocator, ctx.key, ctx.path); 264 | defer flattened.deinit(); 265 | var flat = flattened.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language")); 266 | try testing.expectEqual(@as(usize, 3), flat.next().?.offset); 267 | try testing.expectEqual(@as(usize, 8), flat.next().?.offset); 268 | try testing.expectEqual(@as(usize, 11), flat.next().?.offset); 269 | try testing.expectEqual(@as(usize, 12), flat.next().?.offset); 270 | try testing.expect(flat.next() == null); // stream closed 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /src/combn/combn.zig: -------------------------------------------------------------------------------- 1 | pub const combinator = @import("combinator/combinator.zig"); 2 | pub const gllparser = @import("gllparser/gllparser.zig"); 3 | pub const parser = @import("parser/parser.zig"); 4 | 5 | usingnamespace @import("test_complex.zig"); 6 | -------------------------------------------------------------------------------- /src/combn/gllparser/ParserPath.zig: -------------------------------------------------------------------------------- 1 | //! Maintains the path which a parser took, i.e. which parser states were taken 2 | //! and in which order by maintaining a stack of parser position keys. 3 | 4 | stack: std.atomic.Stack(PosKey), 5 | 6 | const std = @import("std"); 7 | const mem = std.mem; 8 | const PosKey = @import("parser.zig").PosKey; 9 | const ParserPath = @This(); 10 | 11 | pub fn init() ParserPath { 12 | return .{ 13 | .stack = std.atomic.Stack(PosKey).init(), 14 | }; 15 | } 16 | 17 | pub fn deinit(self: ParserPath, allocator: mem.Allocator) void { 18 | var next = self.stack.root; 19 | while (next != null) { 20 | const tmp = next.?.next; 21 | allocator.destroy(next.?); 22 | next = tmp; 23 | } 24 | } 25 | 26 | pub fn push(self: *ParserPath, key: PosKey, allocator: mem.Allocator) !void { 27 | const Node = std.atomic.Stack(PosKey).Node; 28 | const pathNode = try allocator.create(Node); 29 | pathNode.* = .{ 30 | .next = undefined, 31 | .data = key, 32 | }; 33 | self.stack.push(pathNode); 34 | } 35 | 36 | pub fn clone(self: ParserPath, allocator: mem.Allocator) !ParserPath { 37 | var new = ParserPath.init(); 38 | var next = self.stack.root; 39 | while (next != null) : (next = next.?.next) { 40 | try new.push(next.?.data, allocator); 41 | } 42 | return new; 43 | } 44 | 45 | pub fn contains(self: ParserPath, key: PosKey) bool { 46 | var next = self.stack.root; 47 | const eql = std.hash_map.getAutoEqlFn(PosKey, void); 48 | while (next != null) : (next = next.?.next) { 49 | if (eql({}, next.?.data, key)) return true; 50 | } 51 | return false; 52 | } 53 | 54 | pub fn print(self: ParserPath) void { 55 | var next = self.stack.root; 56 | std.debug.print("PATH", .{}); 57 | while (next != null) : (next = next.?.next) { 58 | std.debug.print(" -> {}", .{next.?.data}); 59 | } 60 | std.debug.print("\n", .{}); 61 | } 62 | -------------------------------------------------------------------------------- /src/combn/gllparser/gllparser.zig: -------------------------------------------------------------------------------- 1 | pub const ParserPath = @import("ParserPath.zig"); 2 | 3 | pub const parser = @import("parser.zig"); 4 | pub const Error = parser.Error; 5 | pub const ResultTag = parser.ResultTag; 6 | pub const Result = parser.Result; 7 | pub const PosKey = parser.PosKey; 8 | pub const NodeName = parser.NodeName; 9 | pub const Context = parser.Context; 10 | pub const Parser = parser.Parser; 11 | 12 | pub const result_stream = @import("result_stream.zig"); 13 | pub const ResultStream = result_stream.ResultStream; 14 | -------------------------------------------------------------------------------- /src/combn/gllparser/parser.zig: -------------------------------------------------------------------------------- 1 | const ResultStream = @import("result_stream.zig").ResultStream; 2 | const Iterator = @import("result_stream.zig").Iterator; 3 | const ParserPath = @import("ParserPath.zig"); 4 | 5 | const std = @import("std"); 6 | const testing = std.testing; 7 | const mem = std.mem; 8 | 9 | pub const Error = error{OutOfMemory}; 10 | 11 | pub const ResultTag = enum { 12 | value, 13 | err, 14 | }; 15 | 16 | /// deinitOptional invokes value.deinit(allocator), taking into account it being an optional 17 | /// `?Value`, `??Value`, etc. 18 | pub inline fn deinitOptional(value: anytype, allocator: mem.Allocator) void { 19 | switch (@typeInfo(@TypeOf(value))) { 20 | .Optional => if (value) |v| return deinitOptional(v, allocator), 21 | else => value.deinit(allocator), 22 | } 23 | } 24 | 25 | /// A parser result, one of: 26 | /// 27 | /// 1. A `value` and new `offset` into the input `src`. 28 | /// 2. An `err` and new `offset` ito the input `src` ((i.e. position of error). 29 | /// 30 | /// A Result always knows how to `deinit` itself. 31 | pub fn Result(comptime Value: type) type { 32 | return struct { 33 | offset: usize, 34 | result: union(ResultTag) { 35 | value: Value, 36 | err: []const u8, 37 | }, 38 | owned: bool, 39 | 40 | pub fn init(offset: usize, value: Value) @This() { 41 | return .{ 42 | .offset = offset, 43 | .result = .{ .value = value }, 44 | .owned = true, 45 | }; 46 | } 47 | 48 | pub fn deinit(self: @This(), allocator: mem.Allocator) void { 49 | if (!self.owned) return; 50 | switch (self.result) { 51 | .value => |value| { 52 | deinitOptional(value, allocator); 53 | }, 54 | else => {}, 55 | } 56 | } 57 | 58 | pub fn toUnowned(self: @This()) @This() { 59 | var tmp = self; 60 | tmp.owned = false; 61 | return tmp; 62 | } 63 | 64 | pub fn initError(offset: usize, err: []const u8) @This() { 65 | return .{ 66 | .offset = offset, 67 | .result = .{ .err = err }, 68 | .owned = false, 69 | }; 70 | } 71 | }; 72 | } 73 | 74 | const MemoizeValue = struct { 75 | results: usize, // untyped pointer *ResultStream(Result(Value)) 76 | deinit: fn (results: usize, allocator: mem.Allocator) void, 77 | }; 78 | 79 | fn MemoizedResult(comptime Value: type) type { 80 | return struct { 81 | results: *ResultStream(Result(Value)), 82 | was_cached: bool, 83 | }; 84 | } 85 | 86 | /// A key describing a parser node at a specific position in an input string, as well as the number 87 | /// of times it reentrantly called itself at that exact position. 88 | const ParserPosDepthKey = struct { 89 | pos_key: PosKey, 90 | reentrant_depth: usize, 91 | }; 92 | 93 | /// Describes the exact string and offset into it that a parser node is parsing. 94 | pub const PosKey = struct { 95 | node_name: NodeName, 96 | src_ptr: usize, 97 | offset: usize, 98 | }; 99 | 100 | /// The name of a parser node. This includes hashes of: 101 | /// 102 | /// * The parser's type name (e.g. "MapTo", "Sequence", etc.) 103 | /// * The actual parser inputs (e.g. the list of parsers to match in a Sequence parser, or for a 104 | /// MapTo parser the input parser to match and the actual function that does mapping.) 105 | /// 106 | /// It is enough to distinctly represent a _single node in the parser graph._ Note that it is NOT 107 | /// the same as: 108 | /// 109 | /// * Identifying a singular parser instance (two parser instances with the same inputs will be 110 | /// "deduplicated" and have the same parser node name.) 111 | /// * Identifying a parser node at a particular position: the parser `offset` position and `src` 112 | /// string to parse are NOT parse of a parser node name, for that see `PosKey`. 113 | /// 114 | pub const NodeName = u64; 115 | 116 | /// Records a single recursion retry for a parser. 117 | const RecursionRetry = struct { 118 | /// The current reentrant depth of the parser. 119 | depth: usize, 120 | 121 | /// The maximum reentrant depth before this retry attempt will be stopped. 122 | max_depth: usize, 123 | }; 124 | 125 | const Memoizer = struct { 126 | /// Parser position & reentrant depth key -> memoized results 127 | memoized: std.AutoHashMap(ParserPosDepthKey, MemoizeValue), 128 | 129 | /// *Parser(T, P) -> computed parser node name. 130 | node_name_cache: std.AutoHashMap(usize, NodeName), 131 | 132 | /// Maps position key -> the currently active recursion retry attempt, if any. 133 | recursion: std.AutoHashMap(PosKey, RecursionRetry), 134 | 135 | /// Memoized values to cleanup later, because freeing them inside a reentrant parser 136 | /// invocation is not possible as the parent still intends to use it. 137 | /// 138 | /// TODO(slimsag): consider something like reference counting here to reduce memory 139 | /// footprint. 140 | deferred_cleanups: std.ArrayList(MemoizeValue), 141 | 142 | /// Tells if the given parser node is currently being retried at different maximum reentrant 143 | /// depths as part of a Reentrant combinator. 144 | pub fn isRetrying(self: *@This(), key: PosKey) bool { 145 | const recursion = self.recursion.get(key); 146 | if (recursion == null) return false; 147 | return true; 148 | } 149 | 150 | fn clearPastRecursions(self: *@This(), parser: PosKey, new_max_depth: usize) !void { 151 | var i: usize = 0; 152 | while (i <= new_max_depth) : (i += 1) { 153 | const k = ParserPosDepthKey{ 154 | .pos_key = parser, 155 | .reentrant_depth = i, 156 | }; 157 | if (self.memoized.get(k)) |memoized| try self.deferred_cleanups.append(memoized); 158 | _ = self.memoized.remove(k); 159 | } 160 | } 161 | 162 | pub fn get(self: *@This(), comptime Value: type, allocator: mem.Allocator, parser_path: ParserPath, parser: PosKey, new_max_depth: ?usize) !MemoizedResult(Value) { 163 | // We memoize results for each unique ParserPosDepthKey, meaning that a parser node can be 164 | // invoked to parse a specific input string at a specific offset recursively in a reentrant 165 | // way up to a maximum depth (new_max_depth). This enables our GLL parser to handle grammars 166 | // that are left-recursive, such as: 167 | // 168 | // ```ebnf 169 | // Expr = Expr?, "abc" ; 170 | // Grammar = Expr ; 171 | // ``` 172 | // 173 | // Where an input string "abcabcabc" would require `Expr` be parsed at offset=0 in the 174 | // input string multiple times. How many times? We start out with a maximum reentry depth 175 | // of zero, and if we determine that the parsing is cyclic (a ResultStream subscriber is in 176 | // fact itself the source) we consider that parse path as failed (it matches only the empty 177 | // language) and retry with a new_max_depth of N+1 and retry the whole parse path, 178 | // repeating this process until eventually we find the parsing is not cyclic. 179 | // 180 | // It is important to note that this is for handling reentrant parsing _at the same exact 181 | // offset position in the input string_, the GLL parsing algorithm itself handles left 182 | // recursive and right recursive parsing fine on its own, as long as the parse position is 183 | // changing, but many implementations cannot handle reentrant parsing at the same exact 184 | // offset position in the input string (I am unsure if this is by design, or a limitation 185 | // of the implementations themselves). Packrattle[1] which uses an "optimized" GLL parsing 186 | // algorithm (memoization is localized to parse nodes) is the closest to our algorithm, and 187 | // can handle this type of same-position left recursion in some instances such as with: 188 | // 189 | // ```ebnf 190 | // Expr = Expr?, "abc" ; 191 | // Grammar = Expr, EOF ; 192 | // ``` 193 | // 194 | // However, it does so using a _globalized_ retry mechanism[2] which in this event resets 195 | // the entire parser back to an earlier point in time, only if the overall parse failed. 196 | // This also coincidently means that if the `EOF` matcher is removed (`Grammar = Expr ;`) 197 | // then `Expr` matching becomes "non-greedy" matching just one "abc" value instead of all 198 | // three as when the EOF matcher is in place. 199 | // 200 | // Our implementation here uses node-localized retries, which makes us not subject to the 201 | // same bug as packrattle and more optimized (the entire parse need not fail for us to 202 | // detect and retry in this case, we do so exactly at the reentrant parser node itself.) 203 | // 204 | // [1] https://github.com/robey/packrattle 205 | // [2] https://github.com/robey/packrattle/blob/3db99f2d87abdddb9d29a0d0cf86e272c59d4ddb/src/packrattle/engine.js#L137-L177 206 | // 207 | var reentrant_depth: usize = 0; 208 | const recursionEntry = self.recursion.get(parser); 209 | if (recursionEntry) |entry| { 210 | if (new_max_depth != null) { 211 | // Existing entry, but we want to retry with a new_max_depth; 212 | reentrant_depth = new_max_depth.?; 213 | try self.recursion.put(parser, .{ .depth = new_max_depth.?, .max_depth = new_max_depth.? }); 214 | try self.clearPastRecursions(parser, new_max_depth.?); 215 | } else { 216 | // Existing entry, so increment the depth and continue. 217 | var depth = entry.depth; 218 | if (depth > 0) { 219 | depth -= 1; 220 | } 221 | try self.recursion.put(parser, .{ .depth = depth, .max_depth = entry.max_depth }); 222 | reentrant_depth = depth; 223 | } 224 | } else if (new_max_depth != null) { 225 | // No existing entry, want to retry with new_max_depth. 226 | reentrant_depth = new_max_depth.?; 227 | try self.recursion.put(parser, .{ .depth = new_max_depth.?, .max_depth = new_max_depth.? }); 228 | try self.clearPastRecursions(parser, new_max_depth.?); 229 | } else { 230 | // No existing entry, but a distant parent parser may be retrying with a max depth that 231 | // we should respect. 232 | var next_node = parser_path.stack.root; 233 | while (next_node) |next| { 234 | const parentRecursionEntry = self.recursion.get(next.data); 235 | if (parentRecursionEntry) |parent_entry| { 236 | reentrant_depth = parent_entry.depth; 237 | try self.clearPastRecursions(parser, parent_entry.max_depth); 238 | break; 239 | } 240 | next_node = next.next; 241 | } 242 | } 243 | 244 | // Do we have an existing result stream for this key? 245 | const m = try self.memoized.getOrPut(ParserPosDepthKey{ 246 | .pos_key = parser, 247 | .reentrant_depth = reentrant_depth, 248 | }); 249 | if (!m.found_existing) { 250 | // Create a new result stream for this key. 251 | var results = try allocator.create(ResultStream(Result(Value))); 252 | results.* = try ResultStream(Result(Value)).init(allocator, parser); 253 | m.value_ptr.* = MemoizeValue{ 254 | .results = @ptrToInt(results), 255 | .deinit = struct { 256 | fn deinit(_resultsPtr: usize, _allocator: mem.Allocator) void { 257 | var _results = @intToPtr(*ResultStream(Result(Value)), _resultsPtr); 258 | _results.deinit(); 259 | _allocator.destroy(_results); 260 | } 261 | }.deinit, 262 | }; 263 | } 264 | return MemoizedResult(Value){ 265 | .results = @intToPtr(*ResultStream(Result(Value)), m.value_ptr.results), 266 | .was_cached = m.found_existing, 267 | }; 268 | } 269 | 270 | pub fn init(allocator: mem.Allocator) !*@This() { 271 | var self = try allocator.create(@This()); 272 | self.* = .{ 273 | .memoized = std.AutoHashMap(ParserPosDepthKey, MemoizeValue).init(allocator), 274 | .node_name_cache = std.AutoHashMap(usize, NodeName).init(allocator), 275 | .recursion = std.AutoHashMap(PosKey, RecursionRetry).init(allocator), 276 | .deferred_cleanups = std.ArrayList(MemoizeValue).init(allocator), 277 | }; 278 | return self; 279 | } 280 | 281 | pub fn deinit(self: *@This(), allocator: mem.Allocator) void { 282 | var iter = self.memoized.iterator(); 283 | while (iter.next()) |memoized| { 284 | memoized.value_ptr.deinit(memoized.value_ptr.results, allocator); 285 | } 286 | self.memoized.deinit(); 287 | self.node_name_cache.deinit(); 288 | self.recursion.deinit(); 289 | for (self.deferred_cleanups.items) |item| { 290 | item.deinit(item.results, allocator); 291 | } 292 | self.deferred_cleanups.deinit(); 293 | allocator.destroy(self); 294 | } 295 | }; 296 | 297 | /// Describes context to be given to a `Parser`, such as `input` parameters, an `allocator`, and 298 | /// the actual `src` to parse. 299 | pub fn Context(comptime Input: type, comptime Value: type) type { 300 | return struct { 301 | input: Input, 302 | allocator: mem.Allocator, 303 | src: []const u8, 304 | offset: usize, 305 | results: *ResultStream(Result(Value)), 306 | existing_results: bool, 307 | memoizer: *Memoizer, 308 | key: PosKey, 309 | path: ParserPath, 310 | 311 | pub fn init(allocator: mem.Allocator, src: []const u8, input: Input) !@This() { 312 | var src_ptr: usize = 0; 313 | if (src.len > 0) { 314 | src_ptr = @ptrToInt(&src[0]); 315 | } 316 | const key = .{ 317 | .node_name = 0, 318 | .src_ptr = src_ptr, 319 | .offset = 0, 320 | }; 321 | 322 | var results = try allocator.create(ResultStream(Result(Value))); 323 | results.* = try ResultStream(Result(Value)).init(allocator, key); 324 | return @This(){ 325 | .input = input, 326 | .allocator = allocator, 327 | .src = src, 328 | .offset = 0, 329 | .results = results, 330 | .existing_results = false, 331 | .memoizer = try Memoizer.init(allocator), 332 | .key = key, 333 | .path = ParserPath.init(), 334 | }; 335 | } 336 | 337 | pub fn initChild(self: @This(), comptime NewValue: type, node_name: NodeName, offset: usize) !Context(Input, NewValue) { 338 | return self.initChildRetry(NewValue, node_name, offset, null); 339 | } 340 | 341 | /// initChildRetry initializes a child context to be used as a single retry attempt with a 342 | /// new maximum depth of reentrant parser invocations for the child and all of its 343 | /// children. 344 | pub fn initChildRetry(self: @This(), comptime NewValue: type, node_name: NodeName, offset: usize, max_depth: ?usize) !Context(Input, NewValue) { 345 | var src_ptr: usize = 0; 346 | if (self.src.len > 0) { 347 | src_ptr = @ptrToInt(&self.src[0]); 348 | } 349 | const key = PosKey{ 350 | .node_name = node_name, 351 | .src_ptr = src_ptr, 352 | .offset = offset, 353 | }; 354 | var child_ctx = Context(Input, NewValue){ 355 | .input = self.input, 356 | .allocator = self.allocator, 357 | .src = self.src, 358 | .offset = offset, 359 | .results = undefined, 360 | .existing_results = false, 361 | .memoizer = self.memoizer, 362 | .key = key, 363 | .path = try self.path.clone(self.allocator), 364 | }; 365 | try child_ctx.path.push(child_ctx.key, self.allocator); 366 | 367 | var memoized = try self.memoizer.get(NewValue, self.allocator, child_ctx.path, key, max_depth); 368 | child_ctx.results = memoized.results; 369 | if (memoized.was_cached) { 370 | child_ctx.existing_results = true; 371 | } 372 | return child_ctx; 373 | } 374 | 375 | /// isRetrying tells if this context represents a retry initiated previously via 376 | /// initChildRetry, potentially by a distant parent recursive call, indicating that a new 377 | /// reentrant retry should not be attempted. 378 | pub fn isRetrying(self: @This(), node_name: NodeName, offset: usize) bool { 379 | var src_ptr: usize = 0; 380 | if (self.src.len > 0) { 381 | src_ptr = @ptrToInt(&self.src[0]); 382 | } 383 | return self.memoizer.isRetrying(PosKey{ 384 | .node_name = node_name, 385 | .src_ptr = src_ptr, 386 | .offset = offset, 387 | }); 388 | } 389 | 390 | /// Subscribe to the results from this context. The caller owns the values and is 391 | /// responsible for calling `deinit` on each. 392 | pub fn subscribe(self: @This()) Iterator(Result(Value)) { 393 | return self.results.subscribe( 394 | self.key, 395 | self.path, 396 | Result(Value).initError(self.offset, "matches only the empty language"), 397 | ); 398 | } 399 | 400 | pub fn with(self: @This(), new_input: anytype) Context(@TypeOf(new_input), Value) { 401 | return Context(@TypeOf(new_input), Value){ 402 | .input = new_input, 403 | .allocator = self.allocator, 404 | .src = self.src, 405 | .offset = self.offset, 406 | .results = self.results, 407 | .existing_results = self.existing_results, 408 | .memoizer = self.memoizer, 409 | .key = self.key, 410 | .path = self.path, 411 | }; 412 | } 413 | 414 | pub fn deinit(self: *const @This()) void { 415 | self.results.deinit(); 416 | self.allocator.destroy(self.results); 417 | self.memoizer.deinit(self.allocator); 418 | self.path.deinit(self.allocator); 419 | return; 420 | } 421 | 422 | pub fn deinitChild(self: @This()) void { 423 | self.path.deinit(self.allocator); 424 | return; 425 | } 426 | }; 427 | } 428 | 429 | /// An interface whose implementation can be swapped out at runtime. It carries an arbitrary 430 | /// `Context` to make the type signature generic, and produces a `Value` of the given type which 431 | /// may vary from parser to parser. 432 | /// 433 | /// The `Payload` type is used to denote a payload of a single type which is typically passed 434 | /// through all parsers in a grammar. Parser and parser combinator implementations should always 435 | /// allow the user to specify this type, and should generally avoid changing the type or using it 436 | /// for their own purposes unless they are e.g. deferring parsing to another language grammar 437 | /// entirely. 438 | pub fn Parser(comptime Payload: type, comptime Value: type) type { 439 | return struct { 440 | const Self = @This(); 441 | _parse: fn (self: *const Self, ctx: *const Context(Payload, Value)) callconv(.Async) Error!void, 442 | _nodeName: fn (self: *const Self, node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64, 443 | _deinit: ?fn (self: *Self, allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void, 444 | _countReferencesTo: ?fn (self: *const Self, other: usize, freed: *std.AutoHashMap(usize, void)) usize, 445 | _heap_storage: ?[]u8, 446 | _refs: usize, 447 | 448 | pub fn init( 449 | parseImpl: fn (self: *const Self, ctx: *const Context(Payload, Value)) callconv(.Async) Error!void, 450 | nodeNameImpl: fn (self: *const Self, node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64, 451 | deinitImpl: ?fn (self: *Self, allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void, 452 | countReferencesToImpl: ?fn (self: *const Self, other: usize, freed: *std.AutoHashMap(usize, void)) usize, 453 | ) Self { 454 | return .{ 455 | ._parse = parseImpl, 456 | ._nodeName = nodeNameImpl, 457 | ._deinit = deinitImpl, 458 | ._countReferencesTo = countReferencesToImpl, 459 | ._heap_storage = null, 460 | ._refs = 0, 461 | }; 462 | } 463 | 464 | /// Allocates and stores the `parent` value (e.g. `Literal(...).init(...)` on the heap, 465 | /// turning this `Parser` into a heap-allocated one. Returned is a poiner to the 466 | /// heap-allocated `&parent.parser`. 467 | pub fn heapAlloc(self: *const Self, allocator: mem.Allocator, parent: anytype) !*Self { 468 | _ = self; 469 | const Parent = @TypeOf(parent); 470 | var memory = try allocator.allocAdvanced(u8, @alignOf(Parent), @sizeOf(Parent), mem.Allocator.Exact.at_least); 471 | var parent_ptr = @ptrCast(*Parent, &memory[0]); 472 | parent_ptr.* = parent; 473 | parent_ptr.parser._heap_storage = memory; 474 | return &parent_ptr.parser; 475 | } 476 | 477 | pub fn ref(self: *Self) *Self { 478 | self._refs += 1; 479 | return self; 480 | } 481 | 482 | pub fn countReferencesTo(self: *Self, other: usize, freed: *std.AutoHashMap(usize, void)) usize { 483 | if (freed.contains(@ptrToInt(self))) return 0; 484 | return if (self._countReferencesTo) |countRefs| countRefs(self, other, freed) else 0; 485 | } 486 | 487 | pub fn deinit(self: *Self, allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void { 488 | var freed_parsers = if (freed) |f| f else &std.AutoHashMap(usize, void).init(allocator); 489 | if (freed_parsers.contains(@ptrToInt(self))) { 490 | if (freed == null) { 491 | freed_parsers.deinit(); 492 | } 493 | return; 494 | } 495 | if (self._refs > 0) self._refs -= 1; 496 | if (self._refs == 0 or self._refs == self.countReferencesTo(@ptrToInt(self), freed_parsers)) { 497 | freed_parsers.put(@ptrToInt(self), .{}) catch unreachable; 498 | self._refs = 0; 499 | if (self._deinit) |dfn| { 500 | dfn(self, allocator, freed_parsers); 501 | } 502 | if (self._heap_storage) |s| { 503 | allocator.free(s); 504 | } 505 | } 506 | if (freed == null) { 507 | freed_parsers.deinit(); 508 | } 509 | } 510 | 511 | pub fn parse(self: *const Self, ctx: *const Context(Payload, Value)) callconv(.Async) Error!void { 512 | var frame = try std.heap.page_allocator.allocAdvanced(u8, 16, @frameSize(self._parse), std.mem.Allocator.Exact.at_least); 513 | defer std.heap.page_allocator.free(frame); 514 | return try await @asyncCall(frame, {}, self._parse, .{ self, ctx }); 515 | } 516 | 517 | pub fn nodeName(self: *const Self, node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 518 | var v = try node_name_cache.getOrPut(@ptrToInt(self)); 519 | if (!v.found_existing) { 520 | v.value_ptr.* = 1337; // "currently calculating" code 521 | const calculated = try self._nodeName(self, node_name_cache); 522 | 523 | // If self._nodeName added more entries to node_name_cache, ours is now potentially invalid. 524 | var vv = node_name_cache.getEntry(@ptrToInt(self)); 525 | vv.?.value_ptr.* = calculated; 526 | return calculated; 527 | } 528 | if (v.value_ptr.* == 1337) { 529 | return 0; // reentrant, don't bother trying to calculate any more recursively 530 | } 531 | return v.value_ptr.*; 532 | } 533 | }; 534 | } 535 | 536 | test "syntax" { 537 | _ = Parser(void, []u8); 538 | } 539 | 540 | test "heap_parser" { 541 | nosuspend { 542 | const Literal = @import("../parser/literal.zig").Literal; 543 | const LiteralValue = @import("../parser/literal.zig").Value; 544 | 545 | const allocator = testing.allocator; 546 | 547 | const Payload = void; 548 | var ctx = try Context(Payload, LiteralValue).init(allocator, "hello world", {}); 549 | defer ctx.deinit(); 550 | 551 | // The parser we'll store on the heap. 552 | var want = "hello"; 553 | var literal_parser = Literal(Payload).initStack(want); 554 | 555 | // Move to heap. 556 | var heap_parser = try literal_parser.parser.heapAlloc(allocator, literal_parser); 557 | defer heap_parser.deinit(allocator, null); 558 | 559 | // Use it. 560 | try heap_parser.parse(&ctx); 561 | 562 | var sub = ctx.subscribe(); 563 | var first = sub.next().?; 564 | defer first.deinit(ctx.allocator); 565 | try testing.expectEqual(Result(LiteralValue).init(want.len, .{ .value = "hello" }), first); 566 | try testing.expect(sub.next() == null); 567 | } 568 | } 569 | -------------------------------------------------------------------------------- /src/combn/gllparser/result_stream.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const testing = std.testing; 3 | const mem = std.mem; 4 | const ParserPath = @import("ParserPath.zig"); 5 | const PosKey = @import("parser.zig").PosKey; 6 | const deinitOptional = @import("parser.zig").deinitOptional; 7 | 8 | /// A ResultStream iterator. 9 | pub fn Iterator(comptime T: type) type { 10 | return struct { 11 | stream: *ResultStream(T), 12 | index: usize = 0, 13 | subscriber: PosKey, 14 | path: ParserPath, 15 | cyclic_closed: bool = false, 16 | cyclic_error: ?T, 17 | 18 | const Self = @This(); 19 | 20 | /// Gets the next value, or null if the end of values has been reached. 21 | /// 22 | /// If the next value is not yet available, the frame is suspended and will be resumed once 23 | /// a new value is added. 24 | pub fn next(self: *Self) callconv(.Async) ?T { 25 | if (self.stream.past_values.items.len == 0 or self.index >= self.stream.past_values.items.len) { 26 | if (self.stream.closed or self.cyclic_closed or self.cyclic_error == null) { 27 | return null; // no more results 28 | } 29 | if (self.path.contains(self.subscriber)) { 30 | // The parser waiting on these results (self.subscriber) is itself a part of 31 | // a larger path of parsers which depend on this result in order to produce a 32 | // result. This indicates a cyclic grammar which parses the empty language, 33 | // e.g. in the most simple form: 34 | // 35 | // Expr = Expr; 36 | // Grammar = Expr; 37 | // 38 | // In practice it may be a more complex form; but regardless this means that 39 | // the subscriber should recieve no results. 40 | self.cyclic_closed = true; 41 | return self.cyclic_error.?; 42 | } 43 | // set ourselves up to be resumed later: 44 | self.stream.listeners.append(@frame()) catch unreachable; 45 | suspend {} // wait for more results, or stream close 46 | if (self.stream.closed) { 47 | return null; // no more results 48 | } 49 | } 50 | // return the next result 51 | const v = self.stream.past_values.items[self.index]; 52 | self.index += 1; 53 | return v; 54 | } 55 | }; 56 | } 57 | 58 | /// A stream of results from a parser. 59 | /// 60 | /// Listeners can be added at any time, and will recieve all past values upon 61 | /// subscription. 62 | /// 63 | /// New values can be added at any time. 64 | pub fn ResultStream(comptime T: type) type { 65 | return struct { 66 | past_values: std.ArrayList(T), 67 | listeners: std.ArrayList(anyframe), 68 | closed: bool, 69 | source: PosKey, 70 | allocator: mem.Allocator, 71 | 72 | const Self = @This(); 73 | 74 | pub fn init(allocator: mem.Allocator, source: PosKey) !Self { 75 | return Self{ 76 | .past_values = std.ArrayList(T).init(allocator), 77 | .listeners = std.ArrayList(anyframe).init(allocator), 78 | .closed = false, 79 | .source = source, 80 | .allocator = allocator, 81 | }; 82 | } 83 | 84 | /// adds a value to the stream, resuming the frames of any pending listeners. 85 | /// 86 | /// Added values are owned by the result stream, subscribers borrow them and they are valid 87 | /// until the result stream is deinitialized - at which point `deinit(allocator)` is called 88 | /// on all values. 89 | /// 90 | /// Returns only once all pending listeners' frames have been resumed. 91 | pub fn add(self: *Self, value: T) !void { 92 | try self.past_values.append(value); 93 | for (self.listeners.items) |listener| { 94 | resume listener; 95 | } 96 | self.listeners.shrinkRetainingCapacity(0); 97 | } 98 | 99 | /// closes the stream, signaling the end and waiting for all pending listeners' frames to 100 | /// be resumed. 101 | pub fn close(self: *Self) void { 102 | self.closed = true; 103 | for (self.listeners.items) |listener| { 104 | resume listener; 105 | } 106 | self.listeners.shrinkRetainingCapacity(0); 107 | } 108 | 109 | /// deinitializes the stream, all future calls to add, subscribe, and usage of iterators is 110 | /// forbidden. 111 | /// 112 | /// All values in this result stream are deinitialized via a call to `v.deinit(allocator)`. 113 | /// 114 | /// `close` must be called before deinit. 115 | pub fn deinit(self: *const Self) void { 116 | for (self.past_values.items) |v| deinitOptional(v, self.allocator); 117 | self.past_values.deinit(); 118 | self.listeners.deinit(); 119 | } 120 | 121 | /// subscribes to all past and future values of the stream, producing an async iterator. 122 | /// 123 | /// Uses of the returned iterator are valid for as long as the result stream is not 124 | /// deinitialized. 125 | pub fn subscribe(self: *Self, subscriber: PosKey, path: ParserPath, cyclic_error: T) Iterator(T) { 126 | const iter = Iterator(T){ 127 | .stream = self, 128 | .subscriber = subscriber, 129 | .path = path, 130 | .cyclic_error = cyclic_error, 131 | }; 132 | return iter; 133 | } 134 | }; 135 | } 136 | 137 | test "result_stream" { 138 | nosuspend { 139 | const value = struct { 140 | value: i32, 141 | 142 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 143 | _ = self; 144 | _ = allocator; 145 | } 146 | }; 147 | const subscriber = PosKey{ 148 | .node_name = 0, 149 | .src_ptr = 0, 150 | .offset = 0, 151 | }; 152 | const source = subscriber; 153 | const path = ParserPath.init(); 154 | var stream = try ResultStream(value).init(testing.allocator, source); 155 | defer stream.deinit(); 156 | 157 | // Subscribe and begin to query a value (next() will suspend) before any values have been added 158 | // to the stream. 159 | var sub1 = stream.subscribe(subscriber, path, .{ .value = -1 }); 160 | var sub1first = async sub1.next(); 161 | 162 | // Add a value to the stream, our first subscription will get it. 163 | try stream.add(.{ .value = 1 }); 164 | try testing.expectEqual(@as(i32, 1), (await sub1first).?.value); 165 | 166 | // Query the next value (next() will suspend again), then add a value and close the stream for 167 | // good. 168 | var sub1second = async sub1.next(); 169 | try stream.add(.{ .value = 2 }); 170 | stream.close(); 171 | 172 | // Confirm we get the remaining values, and the null terminator forever after that. 173 | try testing.expectEqual(@as(i32, 2), (await sub1second).?.value); 174 | try testing.expectEqual(@as(?value, null), sub1.next()); 175 | try testing.expectEqual(@as(?value, null), sub1.next()); 176 | 177 | // Now that the stream is closed, add a new subscription and confirm we get all prior values. 178 | var sub2 = stream.subscribe(subscriber, path, .{ .value = -1 }); 179 | try testing.expectEqual(@as(i32, 1), sub2.next().?.value); 180 | try testing.expectEqual(@as(i32, 2), sub2.next().?.value); 181 | try testing.expectEqual(@as(?value, null), sub2.next()); 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/combn/parser/byte_range.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | 8 | const std = @import("std"); 9 | const testing = std.testing; 10 | const mem = std.mem; 11 | 12 | pub const Context = struct { 13 | // from byte (inclusive) 14 | from: u8, 15 | 16 | // to byte (inclusive) 17 | to: u8, 18 | }; 19 | 20 | pub const Value = struct { 21 | value: u8, 22 | 23 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 24 | _ = self; 25 | _ = allocator; 26 | } 27 | }; 28 | 29 | /// Matches any single byte in the specified range. 30 | pub fn ByteRange(comptime Payload: type) type { 31 | return struct { 32 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, null, null), 33 | input: Context, 34 | 35 | const Self = @This(); 36 | 37 | pub fn init(allocator: mem.Allocator, input: Context) !*Parser(Payload, Value) { 38 | const self = Self{ .input = input }; 39 | return try self.parser.heapAlloc(allocator, self); 40 | } 41 | 42 | pub fn initStack(input: Context) Self { 43 | return Self{ .input = input }; 44 | } 45 | 46 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 47 | _ = node_name_cache; 48 | const self = @fieldParentPtr(Self, "parser", parser); 49 | 50 | var v = std.hash_map.hashString("ByteRange"); 51 | v +%= self.input.from; 52 | v +%= self.input.to; 53 | return v; 54 | } 55 | 56 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void { 57 | const self = @fieldParentPtr(Self, "parser", parser); 58 | var ctx = in_ctx.with(self.input); 59 | defer ctx.results.close(); 60 | 61 | const src = ctx.src[ctx.offset..]; 62 | if (src.len == 0 or src[0] < self.input.from or src[0] > self.input.to) { 63 | // TODO(slimsag): include in error message the expected range (or "any byte" if full range) 64 | try ctx.results.add(Result(Value).initError(ctx.offset + 1, "expected byte range")); 65 | return; 66 | } 67 | try ctx.results.add(Result(Value).init(ctx.offset + 1, .{ .value = src[0] })); 68 | return; 69 | } 70 | }; 71 | } 72 | 73 | test "byte_range" { 74 | nosuspend { 75 | const allocator = testing.allocator; 76 | 77 | const Payload = void; 78 | var ctx = try ParserContext(Payload, Value).init(allocator, "hello world", {}); 79 | defer ctx.deinit(); 80 | 81 | var any_byte = try ByteRange(Payload).init(allocator, .{ .from = 0, .to = 255 }); 82 | defer any_byte.deinit(allocator, null); 83 | try any_byte.parse(&ctx); 84 | 85 | var sub = ctx.subscribe(); 86 | var first = sub.next().?; 87 | defer first.deinit(ctx.allocator); 88 | try testing.expectEqual(Result(Value).init(1, .{ .value = 'h' }), first); 89 | try testing.expect(sub.next() == null); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/combn/parser/end.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | 8 | const std = @import("std"); 9 | const testing = std.testing; 10 | const mem = std.mem; 11 | 12 | pub const Value = struct { 13 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 14 | _ = self; 15 | _ = allocator; 16 | } 17 | }; 18 | 19 | /// Matches the end of the `input` string. 20 | pub fn End(comptime Payload: type) type { 21 | return struct { 22 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, null, null), 23 | 24 | const Self = @This(); 25 | 26 | pub fn init(allocator: mem.Allocator) !*Parser(Payload, Value) { 27 | const self = Self{}; 28 | return try self.parser.heapAlloc(allocator, self); 29 | } 30 | 31 | pub fn initStack() Self { 32 | return Self{}; 33 | } 34 | 35 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 36 | _ = parser; 37 | _ = node_name_cache; 38 | return std.hash_map.hashString("End"); 39 | } 40 | 41 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void { 42 | _ = parser; 43 | var ctx = in_ctx.with({}); 44 | defer ctx.results.close(); 45 | 46 | if (ctx.offset != ctx.src.len) { 47 | try ctx.results.add(Result(Value).initError(ctx.offset + 1, "expected end of input")); 48 | return; 49 | } 50 | try ctx.results.add(Result(Value).init(ctx.offset, .{})); 51 | return; 52 | } 53 | }; 54 | } 55 | 56 | test "end" { 57 | nosuspend { 58 | const allocator = testing.allocator; 59 | 60 | const Payload = void; 61 | var ctx = try ParserContext(Payload, Value).init(allocator, "", {}); 62 | defer ctx.deinit(); 63 | 64 | var e = try End(Payload).init(allocator); 65 | defer e.deinit(allocator, null); 66 | try e.parse(&ctx); 67 | 68 | var sub = ctx.subscribe(); 69 | var first = sub.next().?; 70 | defer first.deinit(ctx.allocator); 71 | try testing.expectEqual(Result(Value).init(0, .{}), first); 72 | try testing.expect(sub.next() == null); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/combn/parser/literal.zig: -------------------------------------------------------------------------------- 1 | const gllparser = @import("../gllparser/gllparser.zig"); 2 | const Error = gllparser.Error; 3 | const Parser = gllparser.Parser; 4 | const ParserContext = gllparser.Context; 5 | const Result = gllparser.Result; 6 | const NodeName = gllparser.NodeName; 7 | 8 | const std = @import("std"); 9 | const testing = std.testing; 10 | const mem = std.mem; 11 | 12 | pub const Context = []const u8; 13 | 14 | pub const Value = struct { 15 | /// The `input` string itself. 16 | value: []const u8, 17 | 18 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 19 | _ = self; 20 | _ = allocator; 21 | } 22 | }; 23 | 24 | /// Matches the literal `input` string. 25 | /// 26 | /// The `input` string must remain alive for as long as the `Literal` parser will be used. 27 | pub fn Literal(comptime Payload: type) type { 28 | return struct { 29 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, null, null), 30 | input: Context, 31 | 32 | const Self = @This(); 33 | 34 | pub fn init(allocator: mem.Allocator, input: Context) !*Parser(Payload, Value) { 35 | const self = Self{ .input = input }; 36 | return try self.parser.heapAlloc(allocator, self); 37 | } 38 | 39 | pub fn initStack(input: Context) Self { 40 | return Self{ .input = input }; 41 | } 42 | 43 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 44 | _ = node_name_cache; 45 | const self = @fieldParentPtr(Self, "parser", parser); 46 | 47 | var v = std.hash_map.hashString("Literal"); 48 | v +%= std.hash_map.hashString(self.input); 49 | return v; 50 | } 51 | 52 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void { 53 | const self = @fieldParentPtr(Self, "parser", parser); 54 | var ctx = in_ctx.with(self.input); 55 | defer ctx.results.close(); 56 | 57 | if (ctx.offset >= ctx.src.len or !mem.startsWith(u8, ctx.src[ctx.offset..], ctx.input)) { 58 | // TODO(slimsag): include what literal was expected 59 | try ctx.results.add(Result(Value).initError(ctx.offset + 1, "expected literal")); 60 | return; 61 | } 62 | try ctx.results.add(Result(Value).init(ctx.offset + ctx.input.len, .{ .value = self.input })); 63 | return; 64 | } 65 | }; 66 | } 67 | 68 | test "literal" { 69 | nosuspend { 70 | const allocator = testing.allocator; 71 | 72 | const Payload = void; 73 | var ctx = try ParserContext(Payload, Value).init(allocator, "hello world", {}); 74 | defer ctx.deinit(); 75 | 76 | var want = "hello"; 77 | var l = try Literal(Payload).init(allocator, want); 78 | defer l.deinit(allocator, null); 79 | try l.parse(&ctx); 80 | 81 | var sub = ctx.subscribe(); 82 | var first = sub.next().?; 83 | defer first.deinit(ctx.allocator); 84 | try testing.expectEqual(Result(Value).init(want.len, .{ .value = "hello" }), first); 85 | try testing.expect(sub.next() == null); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/combn/parser/parser.zig: -------------------------------------------------------------------------------- 1 | pub const byte_range = @import("byte_range.zig"); 2 | pub const ByteRange = byte_range.ByteRange; 3 | 4 | pub const end = @import("end.zig"); 5 | pub const End = end.End; 6 | 7 | pub const literal = @import("literal.zig"); 8 | pub const Literal = literal.Literal; 9 | 10 | test "include" { 11 | _ = ByteRange; 12 | _ = End; 13 | _ = Literal; 14 | } 15 | -------------------------------------------------------------------------------- /src/combn/test_complex.zig: -------------------------------------------------------------------------------- 1 | const combn = @import("combn.zig"); 2 | const Result = combn.gllparser.Result; 3 | const Parser = combn.gllparser.Parser; 4 | const Error = combn.gllparser.Error; 5 | const Context = combn.gllparser.Context; 6 | const PosKey = combn.gllparser.PosKey; 7 | const ParserPath = combn.gllparser.ParserPath; 8 | const Literal = combn.parser.Literal; 9 | const LiteralValue = combn.parser.literal.Value; 10 | const MapTo = combn.combinator.MapTo; 11 | const Optional = combn.combinator.Optional; 12 | const Reentrant = combn.combinator.Reentrant; 13 | const SequenceAmbiguous = combn.combinator.SequenceAmbiguous; 14 | const SequenceAmbiguousValue = combn.combinator.sequence_ambiguous.Value; 15 | 16 | const std = @import("std"); 17 | const mem = std.mem; 18 | const testing = std.testing; 19 | 20 | // Confirms that a direct left-recursive grammar for an empty language actually rejects 21 | // all input strings, and does not just hang indefinitely: 22 | // 23 | // ```ebnf 24 | // Expr = Expr ; 25 | // Grammar = Expr ; 26 | // ``` 27 | // 28 | // See https://cs.stackexchange.com/q/138447/134837 29 | test "direct_left_recursion_empty_language" { 30 | nosuspend { 31 | const allocator = testing.allocator; 32 | 33 | const node = struct { 34 | name: []const u8, 35 | 36 | pub fn deinit(self: *const @This(), _allocator: mem.Allocator) void { 37 | _ = self; 38 | _ = _allocator; 39 | } 40 | }; 41 | 42 | const Payload = void; 43 | const ctx = try Context(Payload, node).init(allocator, "abcabcabc123abc", {}); 44 | defer ctx.deinit(); 45 | 46 | var parsers = [_]*Parser(Payload, node){ 47 | undefined, // placeholder for left-recursive Expr itself 48 | }; 49 | var expr = try MapTo(Payload, SequenceAmbiguousValue(node), node).init(allocator, .{ 50 | .parser = (try SequenceAmbiguous(Payload, node).init(allocator, &parsers, .borrowed)).ref(), 51 | .mapTo = struct { 52 | fn mapTo(in: Result(SequenceAmbiguousValue(node)), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) { 53 | _ = payload; 54 | switch (in.result) { 55 | .err => return Result(node).initError(in.offset, in.result.err), 56 | else => { 57 | var flattened = try in.result.value.flatten(_allocator, key, path); 58 | defer flattened.deinit(); 59 | return Result(node).init(in.offset, node{ .name = "Expr" }); 60 | }, 61 | } 62 | } 63 | }.mapTo, 64 | }); 65 | defer expr.deinit(allocator, null); 66 | parsers[0] = expr.ref(); 67 | try expr.parse(&ctx); 68 | 69 | var sub = ctx.subscribe(); 70 | var first = sub.next().?; 71 | try testing.expect(sub.next() == null); // stream closed 72 | 73 | // TODO(slimsag): perhaps better if it's not an error? 74 | try testing.expectEqual(@as(usize, 0), first.offset); 75 | try testing.expectEqualStrings("matches only the empty language", first.result.err); 76 | } 77 | } 78 | 79 | // Confirms that a direct left-recursive grammar for a valid languages works: 80 | // 81 | // ```ebnf 82 | // Expr = Expr?, "abc" ; 83 | // Grammar = Expr ; 84 | // ``` 85 | // 86 | test "direct_left_recursion" { 87 | const allocator = testing.allocator; 88 | 89 | const node = struct { 90 | name: std.ArrayList(u8), 91 | 92 | pub fn deinit(self: *const @This(), _allocator: mem.Allocator) void { 93 | _ = _allocator; 94 | self.name.deinit(); 95 | } 96 | }; 97 | 98 | const Payload = void; 99 | const ctx = try Context(Payload, node).init(allocator, "abcabcabc123abc", {}); 100 | defer ctx.deinit(); 101 | 102 | var abcAsNode = try MapTo(Payload, LiteralValue, node).init(allocator, .{ 103 | .parser = (try Literal(Payload).init(allocator, "abc")).ref(), 104 | .mapTo = struct { 105 | fn mapTo(in: Result(LiteralValue), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) { 106 | _ = _allocator; 107 | _ = payload; 108 | _ = key; 109 | _ = path; 110 | switch (in.result) { 111 | .err => return Result(node).initError(in.offset, in.result.err), 112 | else => { 113 | var name = std.ArrayList(u8).init(_allocator); 114 | try name.appendSlice("abc"); 115 | return Result(node).init(in.offset, node{ .name = name }); 116 | }, 117 | } 118 | } 119 | }.mapTo, 120 | }); 121 | 122 | var parsers = [_]*Parser(Payload, node){ 123 | undefined, // placeholder for left-recursive Expr itself 124 | abcAsNode.ref(), 125 | }; 126 | var expr = try Reentrant(Payload, node).init( 127 | allocator, 128 | try MapTo(Payload, SequenceAmbiguousValue(node), node).init(allocator, .{ 129 | .parser = (try SequenceAmbiguous(Payload, node).init(allocator, &parsers, .borrowed)).ref(), 130 | .mapTo = struct { 131 | fn mapTo(in: Result(SequenceAmbiguousValue(node)), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) { 132 | _ = payload; 133 | switch (in.result) { 134 | .err => return Result(node).initError(in.offset, in.result.err), 135 | else => { 136 | var name = std.ArrayList(u8).init(_allocator); 137 | 138 | var flattened = try in.result.value.flatten(_allocator, key, path); 139 | defer flattened.deinit(); 140 | var sub = flattened.subscribe(key, path, Result(node).initError(0, "matches only the empty language")); 141 | try name.appendSlice("("); 142 | var prev = false; 143 | while (sub.next()) |next| { 144 | if (prev) { 145 | try name.appendSlice(","); 146 | } 147 | prev = true; 148 | try name.appendSlice(next.result.value.name.items); 149 | } 150 | try name.appendSlice(")"); 151 | return Result(node).init(in.offset, node{ .name = name }); 152 | }, 153 | } 154 | } 155 | }.mapTo, 156 | }), 157 | ); 158 | var optionalExpr = try MapTo(Payload, ?node, node).init(allocator, .{ 159 | .parser = (try Optional(Payload, node).init(allocator, expr.ref())).ref(), 160 | .mapTo = struct { 161 | fn mapTo(in: Result(?node), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) { 162 | _ = payload; 163 | _ = key; 164 | _ = path; 165 | switch (in.result) { 166 | .err => return Result(node).initError(in.offset, in.result.err), 167 | else => { 168 | if (in.result.value == null) { 169 | var name = std.ArrayList(u8).init(_allocator); 170 | try name.appendSlice("null"); 171 | return Result(node).init(in.offset, node{ .name = name }); 172 | } 173 | 174 | var name = std.ArrayList(u8).init(_allocator); 175 | try name.appendSlice(in.result.value.?.name.items); 176 | return Result(node).init(in.offset, node{ .name = name }); 177 | }, 178 | } 179 | } 180 | }.mapTo, 181 | }); 182 | parsers[0] = optionalExpr.ref(); 183 | defer expr.deinit(allocator, null); 184 | try expr.parse(&ctx); 185 | 186 | var sub = ctx.subscribe(); 187 | var first = sub.next().?; 188 | try testing.expect(sub.next() == null); // stream closed 189 | 190 | try testing.expectEqual(@as(usize, 0), first.offset); 191 | try testing.expectEqualStrings("(((null,abc),abc),abc)", first.result.value.name.items); 192 | } 193 | -------------------------------------------------------------------------------- /src/dsl/Compilation.zig: -------------------------------------------------------------------------------- 1 | //! A Compilation is the result of parsing Zorex's DSL syntax. That is, the Zorex DSL is parsed to 2 | //! produce a Compilation (not e.g. an AST) which is itself a parser which, when invoked, parses 3 | //! the syntax described by the DSL. 4 | 5 | const combn = @import("../combn/combn.zig"); 6 | const Parser = combn.gllparser.Parser; 7 | 8 | const String = @import("String.zig"); 9 | const Node = @import("Node.zig"); 10 | const CompilerContext = @import("CompilerContext.zig"); 11 | 12 | const std = @import("std"); 13 | const mem = std.mem; 14 | 15 | const Compilation = @This(); 16 | 17 | value: union(ValueTag) { 18 | parser: CompiledParser, 19 | identifier: String, 20 | }, 21 | 22 | pub const CompiledParser = struct { 23 | ptr: *Parser(void, *Node), 24 | slice: ?[]*const Parser(void, *Node), 25 | 26 | pub fn deinit(self: @This(), allocator: mem.Allocator) void { 27 | self.ptr.deinit(allocator, null); 28 | if (self.slice) |slice| { 29 | allocator.free(slice); 30 | } 31 | } 32 | }; 33 | 34 | pub const ValueTag = enum { 35 | parser, 36 | identifier, 37 | }; 38 | 39 | pub fn initParser(parser: CompiledParser) Compilation { 40 | return .{ .value = .{ .parser = parser } }; 41 | } 42 | 43 | pub fn initIdentifier(identifier: String) Compilation { 44 | return .{ .value = .{ .identifier = identifier } }; 45 | } 46 | 47 | pub fn deinit(self: *const Compilation, allocator: mem.Allocator) void { 48 | switch (self.value) { 49 | .parser => |v| v.deinit(allocator), 50 | .identifier => |v| v.deinit(allocator), 51 | } 52 | } 53 | 54 | const HashContext = struct { 55 | pub fn hash(self: @This(), key: Compilation) u64 { 56 | _ = self; 57 | return switch (key.value) { 58 | .parser => |p| @ptrToInt(p.ptr), 59 | .identifier => |ident| std.hash_map.hashString(ident.value), 60 | }; 61 | } 62 | 63 | pub fn eql(self: @This(), a: Compilation, b: Compilation) bool { 64 | _ = self; 65 | return switch (a.value) { 66 | .parser => |aa| switch (b.value) { 67 | .parser => |bb| aa.ptr == bb.ptr, 68 | .identifier => false, 69 | }, 70 | .identifier => |aa| switch (b.value) { 71 | .parser => false, 72 | .identifier => |bb| std.mem.eql(u8, aa.value, bb.value), 73 | }, 74 | }; 75 | } 76 | }; 77 | 78 | pub const HashMap = std.HashMap(Compilation, Compilation, HashContext, std.hash_map.default_max_load_percentage); 79 | -------------------------------------------------------------------------------- /src/dsl/CompilerContext.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const mem = std.mem; 3 | 4 | const Compilation = @import("Compilation.zig"); 5 | 6 | const CompilerContext = @This(); 7 | 8 | identifiers: Compilation.HashMap, 9 | 10 | pub fn init(allocator: mem.Allocator) !*CompilerContext { 11 | const compilerContext = try allocator.create(CompilerContext); 12 | compilerContext.* = CompilerContext{ 13 | .identifiers = Compilation.HashMap.init(allocator), 14 | }; 15 | return compilerContext; 16 | } 17 | 18 | pub fn deinit(self: *CompilerContext, allocator: mem.Allocator) void { 19 | self.identifiers.deinit(); 20 | allocator.destroy(self); 21 | } 22 | -------------------------------------------------------------------------------- /src/dsl/Node.zig: -------------------------------------------------------------------------------- 1 | //! The result of executing a Compilation is a tree of nodes describing the parsed language. 2 | 3 | const String = @import("String.zig"); 4 | 5 | const std = @import("std"); 6 | const mem = std.mem; 7 | 8 | const Node = @This(); 9 | 10 | name: String, 11 | value: ?String, 12 | refs: usize, 13 | children: ?[]*Node, 14 | 15 | pub const Error = error{OutOfMemory}; 16 | 17 | pub fn init(allocator: mem.Allocator, name: String, value: ?String) !*Node { 18 | var self = try allocator.create(Node); 19 | self.* = .{ 20 | .name = name, 21 | .value = value, 22 | .refs = 1, 23 | .children = null, 24 | }; 25 | return self; 26 | } 27 | 28 | pub fn ref(self: *Node) *Node { 29 | self.refs += 1; 30 | return self; 31 | } 32 | 33 | pub fn deinit(self: *Node, allocator: mem.Allocator) void { 34 | self.refs -= 1; 35 | if (self.refs == 0) { 36 | self.name.deinit(allocator); 37 | if (self.value) |v| v.deinit(allocator); 38 | if (self.children) |children| { 39 | for (children) |child| child.deinit(allocator); 40 | allocator.free(children); 41 | } 42 | allocator.destroy(self); 43 | } 44 | if (self.refs < 0) unreachable; 45 | } 46 | 47 | pub fn writeJSON(self: *const Node, allocator: mem.Allocator, out_stream: anytype) Error!void { 48 | var w = std.json.WriteStream(@TypeOf(out_stream), 5).init(out_stream); 49 | 50 | var ptrToID = std.AutoHashMap(*const Node, i32).init(allocator); 51 | defer ptrToID.deinit(); 52 | 53 | try w.beginArray(); 54 | try self._writeJSON(&w, &ptrToID); 55 | try w.endArray(); 56 | } 57 | 58 | fn _writeJSON(self: *const Node, w: anytype, ptrToID: *std.AutoHashMap(*const Node, i32)) Error!void { 59 | if (self.children) |children| for (children) |child| try child._writeJSON(w, ptrToID); 60 | 61 | var v = try ptrToID.getOrPut(self); 62 | if (v.found_existing) return; // visited already 63 | 64 | v.value_ptr.* = @intCast(i32, ptrToID.count() - 1); 65 | try w.arrayElem(); 66 | try w.beginObject(); 67 | try w.objectField("name"); 68 | try w.emitString(self.name.value); 69 | if (self.value) |value| { 70 | try w.objectField("value"); 71 | try w.emitString(value.value); 72 | } 73 | if (self.children) |children| { 74 | try w.objectField("children"); 75 | try w.beginArray(); 76 | for (children) |child| { 77 | try w.arrayElem(); 78 | try w.emitNumber(ptrToID.get(child).?); 79 | } 80 | try w.endArray(); 81 | } 82 | try w.endObject(); 83 | } 84 | -------------------------------------------------------------------------------- /src/dsl/Program.zig: -------------------------------------------------------------------------------- 1 | //! The public interface for compiling and running Zorex programs. 2 | 3 | const compiler = @import("compiler.zig"); 4 | const CompilerResult = @import("compiler.zig").CompilerResult; 5 | const Compilation = @import("Compilation.zig"); 6 | const Node = @import("Node.zig"); 7 | const CompilerContext = @import("CompilerContext.zig"); 8 | 9 | const combn = @import("../combn/combn.zig"); 10 | const Context = combn.gllparser.Context; 11 | const Result = combn.gllparser.Result; 12 | 13 | const std = @import("std"); 14 | const testing = std.testing; 15 | const mem = std.mem; 16 | const assert = std.debug.assert; 17 | 18 | const Program = @This(); 19 | 20 | /// If compile() fails, this error message and offset explains why and where. 21 | error_message: ?[]const u8, 22 | error_offset: usize, 23 | 24 | /// The source of the program, null after successful compilation. 25 | src: ?[]const u8, 26 | 27 | /// The compiled program. 28 | program: ?CompilerResult, 29 | 30 | /// Context for the program. 31 | context: ?Context(void, *Node), 32 | 33 | allocator: mem.Allocator, 34 | 35 | pub const Error = error{ 36 | OutOfMemory, 37 | CompilationFailed, 38 | }; 39 | 40 | /// Initializes a new program with the given source, which is borrowed until compile() is called 41 | /// and returns. 42 | pub fn init(allocator: mem.Allocator, src: []const u8) Program { 43 | return Program{ 44 | .error_message = null, 45 | .error_offset = 0, 46 | .src = src, 47 | .program = null, 48 | .context = null, 49 | .allocator = allocator, 50 | }; 51 | } 52 | 53 | /// Compiles the program, returning an error if compilation fails. 54 | pub fn compile(self: *Program) !void { 55 | // Compile the syntax. 56 | var compilerResult = try compiler.compile(self.allocator, self.src.?); 57 | switch (compilerResult.compilation.result) { 58 | .err => |e| { 59 | self.error_message = e; 60 | self.error_offset = compilerResult.compilation.offset; 61 | compilerResult.deinit(self.allocator); 62 | return Error.CompilationFailed; 63 | }, 64 | .value => {}, 65 | } 66 | self.program = compilerResult; 67 | self.src = null; 68 | } 69 | 70 | /// Executes the program with the given input. 71 | pub fn execute(self: *Program, input: []const u8) !*Node { 72 | nosuspend { 73 | self.context = try Context(void, *Node).init(self.allocator, input, {}); 74 | 75 | const compilation = self.program.?.compilation.result.value; 76 | try compilation.value.parser.ptr.parse(&self.context.?); 77 | 78 | var sub = self.context.?.subscribe(); 79 | var first = sub.next().?; 80 | assert(sub.next() == null); // no ambiguous parse paths here 81 | return first.result.value; 82 | } 83 | } 84 | 85 | pub fn deinit(self: *const Program) void { 86 | if (self.program) |prog| { 87 | self.context.?.deinit(); 88 | prog.deinit(self.allocator); 89 | } 90 | } 91 | 92 | test "example_regex" { 93 | const allocator = testing.allocator; 94 | 95 | // Compile the regex. 96 | var program = Program.init(allocator, "/a/"); 97 | defer program.deinit(); 98 | program.compile() catch |err| switch (err) { 99 | Error.CompilationFailed => @panic(program.error_message.?), 100 | else => unreachable, 101 | }; 102 | 103 | // Execute the regex. 104 | const input = "hmmm"; 105 | const result = try program.execute(input); 106 | 107 | // Serialize to JSON. 108 | var buffer = std.ArrayList(u8).init(allocator); 109 | defer buffer.deinit(); 110 | try result.writeJSON(allocator, buffer.writer()); 111 | 112 | // Confirm the results. 113 | try testing.expectEqualStrings( 114 | \\[ 115 | \\ { 116 | \\ "name": "TODO(slimsag): value from parsing regexp!" 117 | \\ } 118 | \\] 119 | , buffer.items); 120 | } 121 | 122 | test "example_zorex" { 123 | const allocator = testing.allocator; 124 | 125 | // Compile the zorex. 126 | var program = Program.init(allocator, "Date = /a/; Date"); 127 | defer program.deinit(); 128 | program.compile() catch |err| switch (err) { 129 | Error.CompilationFailed => @panic(program.error_message.?), 130 | else => unreachable, 131 | }; 132 | 133 | // Execute the zorex. 134 | const input = "hmmm"; 135 | const result = try program.execute(input); 136 | 137 | // Serialize to JSON. 138 | var buffer = std.ArrayList(u8).init(allocator); 139 | defer buffer.deinit(); 140 | try result.writeJSON(allocator, buffer.writer()); 141 | 142 | // Confirm the results. 143 | try testing.expectEqualStrings( 144 | \\[ 145 | \\ { 146 | \\ "name": "TODO(slimsag): value from parsing regexp!" 147 | \\ }, 148 | \\ { 149 | \\ "name": "unknown", 150 | \\ "children": [ 151 | \\ 0 152 | \\ ] 153 | \\ } 154 | \\] 155 | , buffer.items); 156 | } 157 | -------------------------------------------------------------------------------- /src/dsl/String.zig: -------------------------------------------------------------------------------- 1 | //! A string that is either unowned (e.g. a slice into another string) or owned, and able to deinit 2 | //! itself accordingly. 3 | 4 | const std = @import("std"); 5 | const mem = std.mem; 6 | 7 | value: []const u8, 8 | owned: bool, 9 | 10 | pub fn initOwned(value: []const u8) !@This() { 11 | return .{ .value = value, .owned = false }; 12 | } 13 | 14 | pub fn init(value: []const u8) @This() { 15 | return .{ .value = value, .owned = false }; 16 | } 17 | 18 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 19 | if (self.owned) allocator.free(self.value); 20 | } 21 | -------------------------------------------------------------------------------- /src/dsl/compiler.zig: -------------------------------------------------------------------------------- 1 | const combn = @import("../combn/combn.zig"); 2 | const Result = combn.gllparser.Result; 3 | const Parser = combn.gllparser.Parser; 4 | const Error = combn.gllparser.Error; 5 | const Context = combn.gllparser.Context; 6 | const PosKey = combn.gllparser.PosKey; 7 | const ParserPath = combn.gllparser.ParserPath; 8 | const Sequence = combn.gllparser.Sequence; 9 | const SequenceValue = combn.combinator.sequence.Value; 10 | const Repeated = combn.combinator.Repeated; 11 | const RepeatedValue = combn.combinator.repeated.Value; 12 | const Literal = combn.parser.Literal; 13 | const LiteralValue = combn.parser.literal.Value; 14 | const OneOf = combn.combinator.OneOf; 15 | const MapTo = combn.combinator.MapTo; 16 | const Optional = combn.combinator.Optional; 17 | 18 | const String = @import("String.zig"); 19 | const Node = @import("Node.zig"); 20 | const Compilation = @import("Compilation.zig"); 21 | const Identifier = @import("identifier.zig").Identifier; 22 | const CompilerContext = @import("CompilerContext.zig"); 23 | 24 | const grammar = @import("grammar.zig"); 25 | 26 | const std = @import("std"); 27 | const testing = std.testing; 28 | const mem = std.mem; 29 | const assert = std.debug.assert; 30 | 31 | pub const CompilerResult = struct { 32 | compilation: Result(Compilation), 33 | ctx: Context(*CompilerContext, Compilation), 34 | compilerContext: *CompilerContext, 35 | 36 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void { 37 | self.ctx.deinit(); 38 | self.compilerContext.deinit(allocator); 39 | } 40 | }; 41 | 42 | pub fn compile(allocator: mem.Allocator, syntax: []const u8) !CompilerResult { 43 | const dsl_parser = try grammar.init(allocator); 44 | defer dsl_parser.deinit(allocator, null); 45 | 46 | var compilerContext = try CompilerContext.init(allocator); 47 | var ctx = try Context(*CompilerContext, Compilation).init(allocator, syntax, compilerContext); 48 | try dsl_parser.parse(&ctx); 49 | 50 | var sub = ctx.subscribe(); 51 | var compilation = sub.next(); 52 | assert(sub.next() == null); // our grammar is never ambiguous 53 | if (compilation == null) { 54 | return CompilerResult{ 55 | .compilation = Result(Compilation).initError(ctx.offset, "failed to compile"), 56 | .compilerContext = compilerContext, 57 | .ctx = ctx, 58 | }; 59 | } 60 | return CompilerResult{ 61 | .compilation = compilation.?, 62 | .compilerContext = compilerContext, 63 | .ctx = ctx, 64 | }; 65 | } 66 | 67 | test "DSL" { 68 | nosuspend { 69 | const allocator = testing.allocator; 70 | 71 | // Compile the regexp. 72 | var compilerResult = try compile(allocator, "/a/"); 73 | defer compilerResult.deinit(allocator); 74 | switch (compilerResult.compilation.result) { 75 | .err => |e| @panic(e), 76 | .value => {}, 77 | } 78 | var program = compilerResult.compilation.result.value; 79 | 80 | // Run the regexp. 81 | var input = "//"; 82 | var ctx = try Context(void, *Node).init(allocator, input, {}); 83 | defer ctx.deinit(); 84 | 85 | try program.value.parser.ptr.parse(&ctx); 86 | 87 | var sub = ctx.subscribe(); 88 | var first = sub.next().?; 89 | try testing.expectEqualStrings("TODO(slimsag): value from parsing regexp!", first.result.value.name.value); 90 | try testing.expectEqual(@as(usize, 0), first.offset); 91 | try testing.expect(sub.next() == null); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/dsl/dsl.zig: -------------------------------------------------------------------------------- 1 | pub const Compilation = @import("Compilation.zig"); 2 | pub const compiler = @import("compiler.zig"); 3 | pub const Identifier = @import("identifier.zig").Identifier; 4 | pub const Node = @import("Node.zig"); 5 | pub const Program = @import("Program.zig"); 6 | pub const String = @import("String.zig"); 7 | -------------------------------------------------------------------------------- /src/dsl/grammar.zig: -------------------------------------------------------------------------------- 1 | const combn = @import("../combn/combn.zig"); 2 | const Result = combn.gllparser.Result; 3 | const Parser = combn.gllparser.Parser; 4 | const Error = combn.gllparser.Error; 5 | const Context = combn.gllparser.Context; 6 | const PosKey = combn.gllparser.PosKey; 7 | const ParserPath = combn.gllparser.ParserPath; 8 | const Sequence = combn.combinator.Sequence; 9 | const SequenceValue = combn.combinator.sequence.Value; 10 | const Repeated = combn.combinator.Repeated; 11 | const RepeatedValue = combn.combinator.repeated.Value; 12 | const Literal = combn.parser.Literal; 13 | const LiteralValue = combn.parser.literal.Value; 14 | const OneOf = combn.combinator.OneOf; 15 | const MapTo = combn.combinator.MapTo; 16 | const Optional = combn.combinator.Optional; 17 | 18 | const String = @import("String.zig"); 19 | const Node = @import("Node.zig"); 20 | const Compilation = @import("Compilation.zig"); 21 | const Identifier = @import("identifier.zig").Identifier; 22 | const CompilerContext = @import("CompilerContext.zig"); 23 | const pattern_grammar = @import("pattern_grammar.zig"); 24 | 25 | const std = @import("std"); 26 | const mem = std.mem; 27 | const testing = std.testing; 28 | const assert = std.debug.assert; 29 | 30 | pub fn mapLiteralToNone(in: Result(LiteralValue), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) Error!?Result(?Compilation) { 31 | _ = compiler_context; 32 | _ = _allocator; 33 | _ = path; 34 | _ = key; 35 | return switch (in.result) { 36 | .err => Result(?Compilation).initError(in.offset, in.result.err), 37 | else => Result(?Compilation).init(in.offset, null), 38 | }; 39 | } 40 | 41 | /// Maps a SequenceValue(*Node) -> singular *Node with no name and children (each of the nodes in 42 | /// the sequence.) 43 | fn mapNodeSequence(in: Result(SequenceValue(*Node)), program_context: void, _allocator: mem.Allocator, key: PosKey, path: ParserPath) Error!?Result(*Node) { 44 | _ = program_context; 45 | switch (in.result) { 46 | .err => return Result(*Node).initError(in.offset, in.result.err), 47 | else => { 48 | var sequence = in.result.value; 49 | 50 | // Collect all the children nodes. 51 | var children = std.ArrayList(*Node).init(_allocator); 52 | errdefer children.deinit(); 53 | var sub = sequence.results.subscribe(key, path, Result(*Node).initError(in.offset, "matches only the empty language")); 54 | var offset = in.offset; 55 | while (sub.next()) |next| { 56 | offset = next.offset; 57 | try children.append(next.result.value.ref()); 58 | } 59 | 60 | const node = try Node.init(_allocator, String.init("unknown"), null); 61 | node.children = children.toOwnedSlice(); 62 | return Result(*Node).init(in.offset, node); 63 | }, 64 | } 65 | } 66 | 67 | /// Maps a SequenceValue(?Compilation) -> singular ?Compilation which parses all compilations in sequence, 68 | /// emitting a single unnamed Node with children. 69 | fn mapCompilationSequence(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) Error!?Result(?Compilation) { 70 | _ = compiler_context; 71 | switch (in.result) { 72 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 73 | else => { 74 | var sequence = in.result.value; 75 | 76 | // Collect all the parser compilations. 77 | var parsers = std.ArrayList(*Parser(void, *Node)).init(_allocator); 78 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language")); 79 | var offset = in.offset; 80 | while (sub.next()) |next| { 81 | offset = next.offset; 82 | const compilation = next.result.value; 83 | if (compilation) |c| { 84 | try parsers.append(c.value.parser.ptr.ref()); 85 | } 86 | } 87 | var slice = parsers.toOwnedSlice(); 88 | 89 | // Build a parser which maps the many Parser(void, *Node) compilations into a 90 | // single Parser(void, *Node) which has each node as a child. 91 | var mapped = try MapTo(void, SequenceValue(*Node), *Node).init(_allocator, .{ 92 | .parser = (try Sequence(void, *Node).init(_allocator, slice, .borrowed)).ref(), 93 | .mapTo = mapNodeSequence, 94 | }); 95 | 96 | var result_compilation = Compilation.initParser(Compilation.CompiledParser{ 97 | .ptr = mapped.ref(), 98 | .slice = slice, 99 | }); 100 | return Result(?Compilation).init(offset, result_compilation); 101 | }, 102 | } 103 | } 104 | 105 | pub fn whitespaceOneOrMore(allocator: mem.Allocator) !*Parser(*CompilerContext, ?Compilation) { 106 | const newline = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{ 107 | .parser = (try OneOf(*CompilerContext, LiteralValue).init(allocator, &.{ 108 | (try Literal(*CompilerContext).init(allocator, "\r\n")).ref(), 109 | (try Literal(*CompilerContext).init(allocator, "\r")).ref(), 110 | (try Literal(*CompilerContext).init(allocator, "\n")).ref(), 111 | }, .copy)).ref(), 112 | .mapTo = mapLiteralToNone, 113 | }); 114 | 115 | const space = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{ 116 | .parser = (try OneOf(*CompilerContext, LiteralValue).init(allocator, &.{ 117 | (try Literal(*CompilerContext).init(allocator, " ")).ref(), 118 | (try Literal(*CompilerContext).init(allocator, "\t")).ref(), 119 | }, .copy)).ref(), 120 | .mapTo = mapLiteralToNone, 121 | }); 122 | 123 | const whitespace = try OneOf(*CompilerContext, ?Compilation).init(allocator, &.{ 124 | newline.ref(), 125 | space.ref(), 126 | }, .copy); 127 | 128 | // Whitespace+ 129 | return try MapTo(*CompilerContext, RepeatedValue(?Compilation), ?Compilation).init(allocator, .{ 130 | .parser = (try Repeated(*CompilerContext, ?Compilation).init(allocator, .{ 131 | .parser = whitespace.ref(), 132 | .min = 1, 133 | .max = -1, 134 | })).ref(), 135 | .mapTo = struct { 136 | fn mapTo(in: Result(RepeatedValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 137 | _ = compiler_context; 138 | _ = _allocator; 139 | _ = key; 140 | _ = path; 141 | switch (in.result) { 142 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 143 | else => { 144 | // optimization: newline and space parsers produce no compilations, so no 145 | // need for us to pay any attention to repeated results. 146 | return Result(?Compilation).init(in.offset, null); 147 | }, 148 | } 149 | } 150 | }.mapTo, 151 | }); 152 | } 153 | 154 | pub fn init(allocator: mem.Allocator) !*Parser(*CompilerContext, Compilation) { 155 | // DSL grammar 156 | // 157 | // ```ebnf 158 | // Newline = "\r\n" | "\r" | "\n" ; 159 | // Space = " " | "\t" ; 160 | // Whitespace = Newline | Space ; 161 | // Assignment = "=" ; 162 | // Semicolon = ";" ; 163 | // Identifier = /[A-Z][[:alnum:]_]*/ ; 164 | // NestedPattern = "/", Pattern, "/" ; 165 | // Expr = NestedPattern | Identifier ; 166 | // ExprList = (ExprList, ",")? , Expr ; 167 | // Definition = Identifier , Whitespace+, Assignment, Whitespace+, ExprList, Semicolon ; 168 | // Grammar = (Definition | Expr | Whitespace+)+, EOF ; 169 | // ``` 170 | // 171 | // TODO(dsl): Expr logical OR / alternation 172 | // TODO(dsl): Expr optional 173 | // TODO(dsl): Expr zero-or-more 174 | // TODO(dsl): Expr one-or-more 175 | // TODO(dsl): Expr repetition {x,y} 176 | // TODO(dsl): Expr grouping (...) 177 | // TODO(dsl): terminal string literals 178 | // TODO(dsl): comments 179 | // TODO(dsl): exception? "-" 180 | // TODO(dsl): positive/negative lookahead? Python: & followed by a symbol, token or parenthesized group indicates a positive lookahead (i.e., is required to match but not consumed), while ! indicates a negative lookahead (i.e., is required _not_ to match). 181 | 182 | const whitespace_one_or_more = try whitespaceOneOrMore(allocator); 183 | 184 | var assignment = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{ 185 | .parser = (try Literal(*CompilerContext).init(allocator, "=")).ref(), 186 | .mapTo = mapLiteralToNone, 187 | }); 188 | var semicolon = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{ 189 | .parser = (try Literal(*CompilerContext).init(allocator, ";")).ref(), 190 | .mapTo = mapLiteralToNone, 191 | }); 192 | var forward_slash = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{ 193 | .parser = (try Literal(*CompilerContext).init(allocator, "/")).ref(), 194 | .mapTo = mapLiteralToNone, 195 | }); 196 | 197 | var nested_pattern = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{ 198 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{ 199 | forward_slash.ref(), 200 | (try pattern_grammar.init(allocator)).ref(), 201 | forward_slash.ref(), 202 | }, .copy)).ref(), 203 | .mapTo = struct { 204 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 205 | _ = compiler_context; 206 | _ = key; 207 | _ = path; 208 | switch (in.result) { 209 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 210 | else => { 211 | var sequence = in.result.value; 212 | _ = sequence; 213 | 214 | // TODO(slimsag): actually compose the compilation to parse this regexp! 215 | const node = try Node.init(_allocator, String.init("TODO(slimsag): value from parsing regexp!"), null); 216 | const success = Result(*Node).init(in.offset, node); 217 | var always_success = try combn.combinator.Always(void, *Node).init(_allocator, success); 218 | 219 | var result_compilation = Compilation.initParser(Compilation.CompiledParser{ 220 | .ptr = always_success.ref(), 221 | .slice = null, 222 | }); 223 | return Result(?Compilation).init(in.offset, result_compilation); 224 | }, 225 | } 226 | } 227 | }.mapTo, 228 | }); 229 | 230 | var identifier_expr = try MapTo(*CompilerContext, ?Compilation, ?Compilation).init(allocator, .{ 231 | .parser = (try Identifier.init(allocator)).ref(), 232 | .mapTo = struct { 233 | fn mapTo(in: Result(?Compilation), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 234 | _ = _allocator; 235 | _ = key; 236 | _ = path; 237 | switch (in.result) { 238 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 239 | else => { 240 | // Lookup this identifier, which was previously defined. 241 | // TODO(slimsag): make it possible to reference future-definitions? 242 | var compilation = compiler_context.identifiers.get(in.result.value.?); 243 | if (compilation == null) { 244 | // TODO(slimsag): include name of definition that was not found in error. 245 | return Result(?Compilation).initError(in.offset, "definition not found"); 246 | } 247 | return Result(?Compilation).init(in.offset, compilation.?).toUnowned(); 248 | }, 249 | } 250 | } 251 | }.mapTo, 252 | }); 253 | var expr = try OneOf(*CompilerContext, ?Compilation).init(allocator, &.{ 254 | nested_pattern.ref(), 255 | identifier_expr.ref(), 256 | }, .copy); 257 | 258 | // ExprList = (ExprList, ",")? , Expr ; 259 | var expr_list_parsers = try allocator.alloc(*Parser(*CompilerContext, ?Compilation), 2); 260 | expr_list_parsers[1] = expr.ref(); // position 0 will be for left-recursive `(ExprList, ",")?` set later 261 | var expr_list = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{ 262 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, expr_list_parsers, .owned)).ref(), 263 | .mapTo = mapCompilationSequence, 264 | }); 265 | // (ExprList, ",") 266 | var comma = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{ 267 | .parser = (try Literal(*CompilerContext).init(allocator, ",")).ref(), 268 | .mapTo = mapLiteralToNone, 269 | }); 270 | var expr_list_inner_left = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{ 271 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{ 272 | expr_list.ref(), 273 | comma.ref(), 274 | }, .copy)).ref(), 275 | .mapTo = struct { 276 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 277 | _ = compiler_context; 278 | _ = _allocator; 279 | switch (in.result) { 280 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 281 | else => { 282 | var sequence = in.result.value; 283 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language")); 284 | 285 | var _expr_list = sub.next().?; 286 | _ = sub.next().?; // non-capturing compilation for comma 287 | assert(sub.next() == null); 288 | return _expr_list.toUnowned(); 289 | }, 290 | } 291 | } 292 | }.mapTo, 293 | }); 294 | var optional_expr_list_inner_left = try MapTo(*CompilerContext, ??Compilation, ?Compilation).init(allocator, .{ 295 | .parser = (try Optional(*CompilerContext, ?Compilation).init(allocator, expr_list_inner_left.ref())).ref(), 296 | .mapTo = struct { 297 | fn mapTo(in: Result(??Compilation), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 298 | _ = compiler_context; 299 | _ = _allocator; 300 | _ = key; 301 | _ = path; 302 | switch (in.result) { 303 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 304 | else => { 305 | if (in.result.value == null) { 306 | return Result(?Compilation).init(in.offset, null); 307 | } 308 | return Result(?Compilation).init(in.offset, in.result.value.?).toUnowned(); 309 | }, 310 | } 311 | } 312 | }.mapTo, 313 | }); 314 | expr_list_parsers[0] = optional_expr_list_inner_left.ref(); 315 | 316 | var definition = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{ 317 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{ 318 | (try Identifier.init(allocator)).ref(), 319 | whitespace_one_or_more.ref(), 320 | assignment.ref(), 321 | whitespace_one_or_more.ref(), 322 | expr_list.ref(), 323 | semicolon.ref(), 324 | }, .copy)).ref(), 325 | .mapTo = struct { 326 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 327 | _ = _allocator; 328 | switch (in.result) { 329 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 330 | else => { 331 | var sequence = in.result.value; 332 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language")); 333 | 334 | var identifier = sub.next().?; 335 | _ = sub.next().?; // non-capturing compilation for whitespace 336 | _ = sub.next().?; // non-capturing compilation for assignment `=` operator 337 | _ = sub.next().?; // non-capturing compilation for whitespace 338 | var _expr_list = sub.next().?; 339 | var last = sub.next().?; // non-capturing compilation for semicolon 340 | assert(sub.next() == null); 341 | 342 | // Set identifier = _expr_list, so that future identifier expressions can 343 | // lookup the resulting expression compilation for the identifier. 344 | const v = try compiler_context.identifiers.getOrPut(identifier.result.value.?); 345 | if (v.found_existing) { 346 | // TODO(slimsag): include name of definition in error message 347 | return Result(?Compilation).initError(last.offset, "definition redefined"); 348 | } 349 | v.value_ptr.* = _expr_list.result.value.?; 350 | 351 | // A definition assignment yields no nodes. 352 | return Result(?Compilation).init(in.offset, null); 353 | }, 354 | } 355 | } 356 | }.mapTo, 357 | }); 358 | 359 | var definition_or_expr_or_whitespace = try OneOf(*CompilerContext, ?Compilation).init(allocator, &.{ 360 | definition.ref(), 361 | expr.ref(), 362 | whitespace_one_or_more.ref(), 363 | }, .copy); 364 | 365 | const non_null_root_compilation = try MapTo(*CompilerContext, RepeatedValue(?Compilation), ?Compilation).init(allocator, .{ 366 | .parser = (try Repeated(*CompilerContext, ?Compilation).init(allocator, .{ 367 | .parser = definition_or_expr_or_whitespace.ref(), 368 | .min = 1, 369 | .max = -1, 370 | })).ref(), 371 | .mapTo = struct { 372 | fn mapTo(in: Result(RepeatedValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 373 | _ = compiler_context; 374 | _ = _allocator; 375 | switch (in.result) { 376 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 377 | else => { 378 | var repeated = in.result.value; 379 | var sub = repeated.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language")); 380 | 381 | var offset = in.offset; 382 | var compilation: ?Result(?Compilation) = null; 383 | while (sub.next()) |next| { 384 | offset = next.offset; 385 | switch (next.result) { 386 | .value => |v| { 387 | if (v != null) { 388 | if (compilation == null) { 389 | compilation = Result(?Compilation).init(next.offset, v.?); 390 | } else { 391 | // another parse path yielded a compilation, i.e. our grammar was ambiguous - 392 | // and it definitely shouldn't be! 393 | unreachable; 394 | } 395 | } 396 | }, 397 | .err => |e| return Result(?Compilation).initError(offset, e), 398 | } 399 | } 400 | if (compilation == null) { 401 | // Grammar does not have a root expression 402 | return Result(?Compilation).initError(offset, "root expression missing"); 403 | } 404 | return compilation.?.toUnowned(); 405 | }, 406 | } 407 | } 408 | }.mapTo, 409 | }); 410 | 411 | const end = try MapTo(*CompilerContext, combn.parser.end.Value, ?Compilation).init(allocator, .{ 412 | .parser = (try combn.parser.End(*CompilerContext).init(allocator)).ref(), 413 | .mapTo = struct { 414 | fn mapTo(in: Result(combn.parser.end.Value), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 415 | _ = compiler_context; 416 | _ = _allocator; 417 | _ = key; 418 | _ = path; 419 | switch (in.result) { 420 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 421 | else => return Result(?Compilation).init(in.offset, null), 422 | } 423 | } 424 | }.mapTo, 425 | }); 426 | 427 | const grammar_then_end = try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{ 428 | non_null_root_compilation.ref(), 429 | end.ref(), 430 | }, .copy); 431 | 432 | return try MapTo(*CompilerContext, SequenceValue(?Compilation), Compilation).init(allocator, .{ 433 | .parser = grammar_then_end.ref(), 434 | .mapTo = struct { 435 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(Compilation) { 436 | _ = compiler_context; 437 | _ = _allocator; 438 | _ = key; 439 | _ = path; 440 | switch (in.result) { 441 | .err => return Result(Compilation).initError(in.offset, in.result.err), 442 | else => { 443 | var sequence = in.result.value; 444 | 445 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language")); 446 | const root_compilation = sub.next(); 447 | assert(root_compilation != null); 448 | const _end = sub.next(); 449 | assert(_end != null); 450 | assert(sub.next() == null); 451 | return Result(Compilation).init(in.offset, root_compilation.?.result.value.?).toUnowned(); 452 | }, 453 | } 454 | } 455 | }.mapTo, 456 | }); 457 | } 458 | -------------------------------------------------------------------------------- /src/dsl/identifier.zig: -------------------------------------------------------------------------------- 1 | const combn = @import("../combn/combn.zig"); 2 | const Result = combn.gllparser.Result; 3 | const Parser = combn.gllparser.Parser; 4 | const Error = combn.gllparser.Error; 5 | const Context = combn.gllparser.Context; 6 | const PosKey = combn.gllparser.PosKey; 7 | const ParserPath = combn.gllparser.ParserPath; 8 | const NodeName = combn.gllparser.NodeName; 9 | 10 | const String = @import("String.zig"); 11 | const Compilation = @import("Compilation.zig"); 12 | const CompilerContext = @import("CompilerContext.zig"); 13 | 14 | const std = @import("std"); 15 | const testing = std.testing; 16 | const mem = std.mem; 17 | 18 | /// Matches the identifier `input` string. 19 | /// 20 | /// The `input` string must remain alive for as long as the `Identifier` parser will be used. 21 | pub const Identifier = struct { 22 | parser: Parser(*CompilerContext, ?Compilation) = Parser(*CompilerContext, ?Compilation).init(parse, nodeName, null, null), 23 | 24 | const Self = @This(); 25 | 26 | pub fn init(allocator: mem.Allocator) !*Parser(*CompilerContext, ?Compilation) { 27 | const self = Self{}; 28 | return try self.parser.heapAlloc(allocator, self); 29 | } 30 | 31 | pub fn initStack() Self { 32 | return Self{}; 33 | } 34 | 35 | pub fn nodeName(parser: *const Parser(*CompilerContext, ?Compilation), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 { 36 | _ = parser; 37 | _ = node_name_cache; 38 | var v = std.hash_map.hashString("Identifier"); 39 | return v; 40 | } 41 | 42 | pub fn parse(parser: *const Parser(*CompilerContext, ?Compilation), in_ctx: *const Context(*CompilerContext, ?Compilation)) callconv(.Async) !void { 43 | _ = parser; 44 | var ctx = in_ctx.with({}); 45 | defer ctx.results.close(); 46 | 47 | const src = ctx.src[ctx.offset..]; 48 | 49 | var offset: usize = 0; 50 | if (src.len == 0) { 51 | try ctx.results.add(Result(?Compilation).initError(ctx.offset, "expected Identifier")); 52 | return; 53 | } 54 | { 55 | var isUpper = src[offset] >= 'A' and src[offset] <= 'Z'; 56 | var isLower = src[offset] >= 'a' and src[offset] <= 'z'; 57 | if (!isUpper and !isLower) { 58 | try ctx.results.add(Result(?Compilation).initError(ctx.offset + 1, "Identifier must start with a-zA-Z")); 59 | return; 60 | } 61 | } 62 | while (offset < src.len) { 63 | var isDigit = src[offset] >= '0' and src[offset] <= '9'; 64 | var isUpper = src[offset] >= 'A' and src[offset] <= 'Z'; 65 | var isLower = src[offset] >= 'a' and src[offset] <= 'z'; 66 | if (!isDigit and !isUpper and !isLower and src[offset] != '_') { 67 | break; 68 | } 69 | offset += 1; 70 | } 71 | try ctx.results.add(Result(?Compilation).init(ctx.offset + offset, Compilation.initIdentifier(String.init(src[0..offset])))); 72 | } 73 | }; 74 | 75 | test "identifier" { 76 | nosuspend { 77 | const allocator = testing.allocator; 78 | 79 | var compilerContext = try CompilerContext.init(allocator); 80 | defer compilerContext.deinit(allocator); 81 | var ctx = try Context(*CompilerContext, ?Compilation).init(allocator, "Grammar2", compilerContext); 82 | defer ctx.deinit(); 83 | 84 | var l = try Identifier.init(allocator); 85 | defer l.deinit(allocator, null); 86 | try l.parse(&ctx); 87 | 88 | var sub = ctx.subscribe(); 89 | var r1 = sub.next().?; 90 | try testing.expectEqual(@as(usize, 8), r1.offset); 91 | try testing.expectEqualStrings("Grammar2", r1.result.value.?.value.identifier.value); 92 | try testing.expect(sub.next() == null); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/dsl/pattern_grammar.zig: -------------------------------------------------------------------------------- 1 | const combn = @import("../combn/combn.zig"); 2 | const Result = combn.gllparser.Result; 3 | const Parser = combn.gllparser.Parser; 4 | const Error = combn.gllparser.Error; 5 | const Context = combn.gllparser.Context; 6 | const PosKey = combn.gllparser.PosKey; 7 | const ParserPath = combn.gllparser.ParserPath; 8 | const MapTo = combn.combinator.MapTo; 9 | const Repeated = combn.combinator.Repeated; 10 | const RepeatedValue = combn.combinator.repeated.Value; 11 | const ByteRange = combn.parser.ByteRange; 12 | const ByteRangeValue = combn.parser.byte_range.Value; 13 | 14 | const Compilation = @import("Compilation.zig"); 15 | const CompilerContext = @import("CompilerContext.zig"); 16 | 17 | const std = @import("std"); 18 | const mem = std.mem; 19 | 20 | pub fn init(allocator: mem.Allocator) !*Parser(*CompilerContext, ?Compilation) { 21 | // Pattern matching grammar 22 | // 23 | // ```ebnf 24 | // Pattern = TBD ; 25 | // ``` 26 | // 27 | 28 | const any_byte = try ByteRange(*CompilerContext).init(allocator, .{.from = 0, .to = 255}); 29 | const any_bytes = try Repeated(*CompilerContext, ByteRangeValue).init(allocator, .{ 30 | .parser = any_byte.ref(), 31 | .min = 0, 32 | .max = 1, // TODO(slimsag): make this parse more byte literals 33 | }); 34 | 35 | const literal_any_bytes = try MapTo(*CompilerContext,combn.combinator.repeated.Value(ByteRangeValue), ?Compilation).init(allocator, .{ 36 | .parser = any_bytes.ref(), 37 | .mapTo = struct { 38 | fn mapTo(in: Result(RepeatedValue(ByteRangeValue)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) { 39 | _ = compiler_context; 40 | _ = _allocator; 41 | _ = key; 42 | _ = path; 43 | switch (in.result) { 44 | .err => return Result(?Compilation).initError(in.offset, in.result.err), 45 | else => { 46 | // optimization: newline and space parsers produce no compilations, so no 47 | // need for us to pay any attention to repeated results. 48 | return Result(?Compilation).init(in.offset, null); 49 | }, 50 | } 51 | } 52 | }.mapTo, 53 | }); 54 | return literal_any_bytes; 55 | } 56 | -------------------------------------------------------------------------------- /src/zorex.zig: -------------------------------------------------------------------------------- 1 | pub const combn = @import("combn/combn.zig"); 2 | pub const dsl = @import("dsl/dsl.zig"); 3 | 4 | test "include" { 5 | _ = dsl.Program; 6 | } 7 | --------------------------------------------------------------------------------