├── .gitattributes
├── .github
├── FUNDING.yml
├── pull_request_template.md
└── workflows
│ └── ci.yml
├── .gitignore
├── LICENSE
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── build.zig
└── src
├── combn
├── README.md
├── combinator
│ ├── always.zig
│ ├── combinator.zig
│ ├── mapto.zig
│ ├── oneof.zig
│ ├── oneof_ambiguous.zig
│ ├── optional.zig
│ ├── reentrant.zig
│ ├── repeated.zig
│ ├── repeated_ambiguous.zig
│ ├── sequence.zig
│ └── sequence_ambiguous.zig
├── combn.zig
├── gllparser
│ ├── ParserPath.zig
│ ├── gllparser.zig
│ ├── parser.zig
│ └── result_stream.zig
├── parser
│ ├── byte_range.zig
│ ├── end.zig
│ ├── literal.zig
│ └── parser.zig
└── test_complex.zig
├── dsl
├── Compilation.zig
├── CompilerContext.zig
├── Node.zig
├── Program.zig
├── String.zig
├── compiler.zig
├── dsl.zig
├── grammar.zig
├── identifier.zig
└── pattern_grammar.zig
└── zorex.zig
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 |
3 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: slimsag
2 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | - [ ] By selecting this checkbox, I agree to license my contributions to this project under the license(s) described in the LICENSE file, and I have the right to do so or have received permission to do so by an employer or client I am producing work for whom has this right.
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 | on:
3 | - push
4 | - pull_request
5 | jobs:
6 | test:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - name: Checkout
10 | uses: actions/checkout@v2
11 | - name: Setup Zig
12 | run: |
13 | sudo apt install xz-utils
14 | sudo sh -c 'wget -c https://ziglang.org/builds/zig-linux-x86_64-0.10.0-dev.36+6fdf7ce0a.tar.xz -O - | tar -xJ --strip-components=1 -C /usr/local/bin'
15 | - name: test
16 | run: zig build test
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # This file is for zig-specific build artifacts.
2 | # If you have OS-specific or editor-specific files to ignore,
3 | # such as *.swp or .DS_Store, put those in your global
4 | # ~/.gitignore and put this in your ~/.gitconfig:
5 | #
6 | # [core]
7 | # excludesfile = ~/.gitignore
8 | #
9 | # Cheers!
10 | # -andrewrk
11 |
12 | zig-cache/
13 | /release/
14 | /debug/
15 | /build/
16 | /build-*/
17 | /docgen_tmp/
18 |
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2021, Hexops Contributors (given via the Git commit history).
2 |
3 | Licensed under the Apache License, Version 2.0 (see LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0)
4 | or the MIT license (see LICENSE-MIT or http://opensource.org/licenses/MIT), at
5 | your option. All files in the project without exclusions may not be copied,
6 | modified, or distributed except according to those terms.
7 |
--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021 Hexops Contributors (given via the Git commit history).
2 |
3 | Permission is hereby granted, free of charge, to any
4 | person obtaining a copy of this software and associated
5 | documentation files (the "Software"), to deal in the
6 | Software without restriction, including without
7 | limitation the rights to use, copy, modify, merge,
8 | publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 |
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Zorex: the omnipotent regex engine
2 |
3 | [](https://github.com/hexops/zorex/actions)
4 |
5 | Zorex blurs the line between regex engine and advanced parsing algorithms used to parse programming languages.
6 |
7 | With the most powerful of regex engines today, you [can't parse HTML](https://stackoverflow.com/questions/6751105/why-its-not-possible-to-use-regex-to-parse-html-xml-a-formal-explanation-in-la) (a context-free language) [or XML](https://stackoverflow.com/a/8578999) ([a context-sensitive language](https://softwareengineering.stackexchange.com/a/205725)), but _you can_ with Zorex.
8 |
9 | ## ⚠️ Project status: in-development ⚠️
10 |
11 | Under heavy development, not ready for use currently. [Follow me on Twitter](https://twitter.com/slimsag) for updates.
12 |
13 | ## How does it work?
14 |
15 | Behind the scenes, Zorex parses a small DSL (the "zorex syntax", a regex-like syntax that enables opt-in [EBNF-like syntax](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form)) and then [at runtime builds a parser specifically for your input grammar](https://devlog.hexops.com/2021/zig-parser-combinators-and-why-theyre-awesome).
16 |
17 | It's a bit like a traditional parser generator, but done at runtime (instead of through code generation) and with a deep level of syntactic compatibility with traditional regex engines.
18 |
19 | It uses [an optimized GLL parser combinator framework called Combn](./src/combn/README.md) to support parsing some of the most complex languages, including left-and-right recursive context-free and some context-sensitive languages, in a fast way.
20 |
21 | ## A quick note about academic terminology
22 |
23 | Technically, Zorex is "an advanced pattern matching engine", and it is arguably incorrect to call it a _regular expression engine_ because regular expressions by nature cannot parse non-regular languages (such as HTML).
24 |
25 | Any regex engine that supports backtracking, however, is _also_ "not a regular expression engine", as the author of Perl's regex engine Larry Wall [puts it](https://raku.org/archive/doc/design/apo/A05.html):
26 |
27 | > “Regular expressions” […] are only marginally related to real regular expressions. Nevertheless, the term has grown with the capabilities of our pattern matching engines, so I’m not going to try to fight linguistic necessity here. I will, however, generally call them “regexes” (or “regexen”, when I’m in an Anglo-Saxon mood).
28 |
29 | Since the aim of Zorex is to maintain a deep level of syntactical compatibility with other regex engines people are familiar with, and _further extend that_ to support parsing more complex non-regular languages, we call Zorex a regex engine.
30 |
--------------------------------------------------------------------------------
/build.zig:
--------------------------------------------------------------------------------
1 | const Builder = @import("std").build.Builder;
2 |
3 | pub fn build(b: *Builder) void {
4 | const mode = b.standardReleaseOptions();
5 | const lib = b.addStaticLibrary("template", "src/zorex.zig");
6 | lib.setBuildMode(mode);
7 | lib.install();
8 |
9 | var main_tests = b.addTest("src/zorex.zig");
10 | main_tests.test_evented_io = true;
11 | main_tests.setBuildMode(mode);
12 | main_tests.setMainPkgPath("src/");
13 |
14 | const test_step = b.step("test", "Run library tests");
15 | test_step.dependOn(&main_tests.step);
16 | }
17 |
--------------------------------------------------------------------------------
/src/combn/README.md:
--------------------------------------------------------------------------------
1 | # combn: runtime GLL parser combinators for Zig
2 |
3 | Combn is the core parsing technique behind Zorex.
4 |
5 | ## Runtime composition
6 |
7 | It is a runtime-composed [parser combinator](https://en.wikipedia.org/wiki/Parser_combinator) framework, which enables one to build parsers _at runtime_ (compared to traditional parser combinators done through code generation) as described in ["Zig, Parser Combinators - and Why They're Awesome"](https://devlog.hexops.com/2021/zig-parser-combinators-and-why-theyre-awesome).
8 |
9 | Being runtime-composed means that you can e.g. define a DSL using parser combinators (such as regex-like syntax), and then produce a new parser at runtime to actually parse inputs for that regex-like syntax.
10 |
11 | ## Generalized LL parser (GLL)
12 |
13 | Behind the scenes, combn uses a generalized LL (GLL) parsing algorithm described in:
14 |
15 | > "Generalized Parser Combinators", Daniel Spiewak, University of Wisconsin, 2010. Implemented as the [gll-combinators Scala library](https://github.com/djspiewak/gll-combinators), using continuation-passing style and trampolined dispatch.
16 |
17 | This enables combn to parse some of the most complex language grammars out there, including left and right recursive context-free grammars, as well as some context-sensitive grammars.
18 |
19 | You can read more about GLL parsing in this great article: ["General Parser Combinators in Racket" by Vegard Øye](https://epsil.github.io/gll/)
20 |
21 | ## Zig ≈ performance
22 |
23 | Most (almost all?) GLL parser implementations are in higher-level languages (Haskell, Scala, OCaml, etc.) as their type systems (and especially Haskell's lazy evaluation) lend themselves very well to functional parsing approaches in general.
24 |
25 | Combn implementing an optimized GLL parser in a low-level language like Zig is, as far as we know, very rare or the first such instance.
26 |
27 | One reason we attribute to being able to implement this in Zig in a relatively straightforward way is due to its async support: we are able to leverage async Zig functions to effectively provide "lazy evaluation" and "sleep" dependent parse paths as is done in e.g. the Haskell implementations.
28 |
29 | ## Advantages over other GLL parsers
30 |
31 | Combn has a few advantages over other GLL parsers:
32 |
33 | ### Optimized parse-node-localized memoization
34 |
35 | The original GLL parsing algorithm is O(n^3) worst-case, better than GLR which is O(n^4) worst-case.
36 |
37 | Combn uses an even more optimized GLL parsing algorithm than the original, with parse-node-localized memoization, approximately the same as described in:
38 |
39 | > "Faster, Practical GLL Parsing", Ali Afroozeh and Anastasia Izmaylova, Centrum Wiskunde & Informatica,1098 XG Amsterdam, The Netherlands,
40 |
41 | ### Support for same-position reentrant parsers
42 |
43 | Some left-recursive parsers require same-position reentrancy, e.g.:
44 |
45 | ```ebnf
46 | Expr = Expr?, "abc" ;
47 | Grammar = Expr ;
48 | ```
49 |
50 | Where an input string "abcabcabc" would require `Expr` be parsed at offset=0 in the input string multiple times (in order to "greedily consume" the entire input.)
51 |
52 | Many GLL parser implementations have differing behavior depending on whether or not the above grammar is defined as `Grammar = Expr ;` or `Grammar = Expr, EOF ;` - matching only a single "abc" without EOF and greedily otherwise. Some implementations also implement this using a globalized rollback system if the entire parse fails, requiring re-parsing starting at the root of the parse tree.
53 |
54 | Combn uses parse-node-localized retries in the case of same-position reentrant grammars, which provides both better theoretical performance as well as consistent results regardless of what comes next in the grammar.
55 |
56 | ### Enumeration of all possible parse paths for ambiguous grammars
57 |
58 | Many parser combinator frameworks opt to only enable navigating down one possible "committed" path of ambiguous grammars, this makes dealing with the resulting data types easier but means it is not possible to enumerate all possible ways an ambiguous grammar would have been parsed.
59 |
60 | Combn uses fully generic type parameters, which does make it slightly more complex than other parser libraries but also enables enumerating all possible parse paths.
61 |
62 | ## How do I use it?
63 |
64 | You can look at [`test_complex.zig`](test_complex.zig) for some ideas, but note a few things:
65 |
66 | 1. **The usage is quite complex**
67 | - Due to being type-agnostic (you can define your own "AST node" result value, or compute and return results directly from within parsers) AND due to supporting full enumeration of ambiguous grammars, there is a lot of type munging required.
68 | 2. **You probably don't want to use this API directly**
69 | - I am working on an EBNF-like DSL grammar on top of this API which will enable you to quickly define a language in EBNF form and get a parser for parsing it (at runtime), which will be a far more reasonable interface.
70 | 3. Some parts of the API are still in motion / can be simplified.
71 |
72 | [Follow me on Twitter](https://twitter.com/slimsag) for updates.
73 |
--------------------------------------------------------------------------------
/src/combn/combinator/always.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 |
8 | const std = @import("std");
9 | const testing = std.testing;
10 | const mem = std.mem;
11 |
12 | pub const Void = struct {
13 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
14 | _ = self;
15 | _ = allocator;
16 | }
17 | };
18 |
19 | /// If the result is not `null`, its `.offset` value will be updated to reflect the current parse
20 | /// position before Always returns it.
21 | pub fn Context(comptime Value: type) type {
22 | return ?Result(Value);
23 | }
24 |
25 | /// Always yields the input value (once/unambiguously), or no value (if the input value is null).
26 | ///
27 | /// The `input` value is taken ownership of by the parser, and deinitialized once the parser is.
28 | pub fn Always(comptime Payload: type, comptime Value: type) type {
29 | return struct {
30 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, null),
31 | input: Context(Value),
32 |
33 | const Self = @This();
34 |
35 | pub fn init(allocator: mem.Allocator, input: Context(Value)) !*Parser(Payload, Value) {
36 | const self = Self{ .input = input };
37 | return try self.parser.heapAlloc(allocator, self);
38 | }
39 |
40 | pub fn initStack(input: Context(Value)) Self {
41 | return Self{ .input = input };
42 | }
43 |
44 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
45 | _ = freed;
46 | const self = @fieldParentPtr(Self, "parser", parser);
47 | if (self.input) |input| input.deinit(allocator);
48 | }
49 |
50 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
51 | _ = node_name_cache;
52 | const self = @fieldParentPtr(Self, "parser", parser);
53 |
54 | var v = std.hash_map.hashString("Always");
55 | v +%= std.hash_map.getAutoHashFn(?Result(Value), void)({}, self.input);
56 | return v;
57 | }
58 |
59 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) Error!void {
60 | const self = @fieldParentPtr(Self, "parser", parser);
61 | var ctx = in_ctx.with(self.input);
62 | defer ctx.results.close();
63 |
64 | if (self.input) |input| {
65 | var tmp = input.toUnowned();
66 | tmp.offset = ctx.offset;
67 | try ctx.results.add(tmp);
68 | }
69 | }
70 | };
71 | }
72 |
73 | test "always" {
74 | nosuspend {
75 | const allocator = testing.allocator;
76 |
77 | const Payload = void;
78 | const ctx = try ParserContext(Payload, Void).init(allocator, "hello world", {});
79 | defer ctx.deinit();
80 |
81 | const noop = try Always(Payload, Void).init(allocator, null);
82 | defer noop.deinit(allocator, null);
83 |
84 | try noop.parse(&ctx);
85 |
86 | var sub = ctx.subscribe();
87 | try testing.expect(sub.next() == null);
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/combn/combinator/combinator.zig:
--------------------------------------------------------------------------------
1 | pub const always = @import("always.zig");
2 | pub const Always = always.Always;
3 |
4 | pub const mapto = @import("mapto.zig");
5 | pub const MapTo = mapto.MapTo;
6 |
7 | pub const oneof_ambiguous = @import("oneof_ambiguous.zig");
8 | pub const OneOfAmbiguous = oneof_ambiguous.OneOfAmbiguous;
9 |
10 | pub const oneof = @import("oneof.zig");
11 | pub const OneOf = oneof.OneOf;
12 |
13 | pub const optional = @import("optional.zig");
14 | pub const Optional = optional.Optional;
15 |
16 | pub const reentrant = @import("reentrant.zig");
17 | pub const Reentrant = reentrant.Reentrant;
18 |
19 | pub const repeated_ambiguous = @import("repeated_ambiguous.zig");
20 | pub const RepeatedAmbiguous = repeated_ambiguous.RepeatedAmbiguous;
21 |
22 | pub const repeated = @import("repeated.zig");
23 | pub const Repeated = repeated.Repeated;
24 |
25 | pub const sequence_ambiguous = @import("sequence_ambiguous.zig");
26 | pub const SequenceAmbiguous = sequence_ambiguous.SequenceAmbiguous;
27 |
28 | pub const sequence = @import("sequence.zig");
29 | pub const Sequence = sequence.Sequence;
30 |
31 | test "include" {
32 | _ = OneOfAmbiguous;
33 | _ = RepeatedAmbiguous;
34 | }
35 |
--------------------------------------------------------------------------------
/src/combn/combinator/mapto.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const PosKey = gllparser.PosKey;
8 | const ParserPath = gllparser.ParserPath;
9 |
10 | const Literal = @import("../parser/literal.zig").Literal;
11 | const LiteralValue = @import("../parser/literal.zig").Value;
12 |
13 | const std = @import("std");
14 | const testing = std.testing;
15 | const mem = std.mem;
16 |
17 | pub fn Context(comptime Payload: type, comptime Value: type, comptime Target: type) type {
18 | return struct {
19 | parser: *Parser(Payload, Value),
20 | mapTo: fn (in: Result(Value), payload: Payload, allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(Target),
21 | };
22 | }
23 |
24 | /// Wraps the `input.parser`, mapping its value to the `dst` type.
25 | ///
26 | /// The `input.parser` must remain alive for as long as the `MapTo` parser will be used.
27 | pub fn MapTo(comptime Payload: type, comptime Value: type, comptime Target: type) type {
28 | return struct {
29 | parser: Parser(Payload, Target) = Parser(Payload, Target).init(parse, nodeName, deinit, countReferencesTo),
30 | input: Context(Payload, Value, Target),
31 |
32 | const Self = @This();
33 |
34 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value, Target)) !*Parser(Payload, Target) {
35 | const self = Self{ .input = input };
36 | return try self.parser.heapAlloc(allocator, self);
37 | }
38 |
39 | pub fn initStack(input: Context(Payload, Value, Target)) Self {
40 | return Self{ .input = input };
41 | }
42 |
43 | pub fn deinit(parser: *Parser(Payload, Target), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
44 | const self = @fieldParentPtr(Self, "parser", parser);
45 | self.input.parser.deinit(allocator, freed);
46 | }
47 |
48 | pub fn countReferencesTo(parser: *const Parser(Payload, Target), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
49 | const self = @fieldParentPtr(Self, "parser", parser);
50 | if (@ptrToInt(parser) == other) return 1;
51 | return self.input.parser.countReferencesTo(other, freed);
52 | }
53 |
54 | pub fn nodeName(parser: *const Parser(Payload, Target), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
55 | const self = @fieldParentPtr(Self, "parser", parser);
56 |
57 | var v = std.hash_map.hashString("MapTo");
58 | v +%= try self.input.parser.nodeName(node_name_cache);
59 | v +%= @ptrToInt(self.input.mapTo);
60 | return v;
61 | }
62 |
63 | pub fn parse(parser: *const Parser(Payload, Target), in_ctx: *const ParserContext(Payload, Target)) callconv(.Async) !void {
64 | const self = @fieldParentPtr(Self, "parser", parser);
65 | var ctx = in_ctx.with(self.input);
66 | defer ctx.results.close();
67 |
68 | const child_node_name = try ctx.input.parser.nodeName(&in_ctx.memoizer.node_name_cache);
69 | const child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset);
70 | defer child_ctx.deinitChild();
71 | if (!child_ctx.existing_results) try ctx.input.parser.parse(&child_ctx);
72 |
73 | var sub = child_ctx.subscribe();
74 | var closed = false;
75 | while (sub.next()) |next| {
76 | if (closed) {
77 | continue;
78 | }
79 | var frame = try std.heap.page_allocator.allocAdvanced(u8, 16, @frameSize(self.input.mapTo), std.mem.Allocator.Exact.at_least);
80 | defer std.heap.page_allocator.free(frame);
81 | const mapped = try await @asyncCall(frame, {}, self.input.mapTo, .{ next, in_ctx.input, ctx.allocator, ctx.key, ctx.path });
82 | if (mapped == null) {
83 | closed = true;
84 | continue;
85 | }
86 | try ctx.results.add(mapped.?);
87 | }
88 | }
89 | };
90 | }
91 |
92 | test "mapto" {
93 | nosuspend {
94 | const allocator = testing.allocator;
95 |
96 | const String = struct {
97 | value: []const u8,
98 |
99 | pub fn init(value: []const u8) @This() {
100 | return .{ .value = value };
101 | }
102 |
103 | pub fn deinit(self: *const @This(), _allocator: mem.Allocator) void {
104 | _ = self;
105 | _ = _allocator;
106 | }
107 | };
108 |
109 | const Payload = void;
110 | const ctx = try ParserContext(Payload, String).init(allocator, "hello world", {});
111 | defer ctx.deinit();
112 |
113 | const mapTo = try MapTo(Payload, LiteralValue, String).init(allocator, .{
114 | .parser = (try Literal(Payload).init(allocator, "hello")).ref(),
115 | .mapTo = struct {
116 | fn mapTo(in: Result(LiteralValue), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(String) {
117 | _ = payload;
118 | _ = _allocator;
119 | _ = key;
120 | _ = path;
121 | switch (in.result) {
122 | .err => return Result(String).initError(in.offset, in.result.err),
123 | else => return Result(String).init(in.offset, String.init("hello")),
124 | }
125 | }
126 | }.mapTo,
127 | });
128 | defer mapTo.deinit(allocator, null);
129 |
130 | try mapTo.parse(&ctx);
131 |
132 | var sub = ctx.subscribe();
133 | var first = sub.next().?;
134 | try testing.expectEqual(Result(String).init(5, String.init("hello")), first);
135 | try testing.expect(sub.next() == null);
136 | }
137 | }
138 |
--------------------------------------------------------------------------------
/src/combn/combinator/oneof.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 |
9 | const Literal = @import("../parser/literal.zig").Literal;
10 | const LiteralValue = @import("../parser/literal.zig").Value;
11 |
12 | const std = @import("std");
13 | const testing = std.testing;
14 | const mem = std.mem;
15 |
16 | pub fn Context(comptime Payload: type, comptime Value: type) type {
17 | return []const *Parser(Payload, Value);
18 | }
19 |
20 | pub const Ownership = enum {
21 | borrowed,
22 | owned,
23 | copy,
24 | };
25 |
26 | /// Matches one of the given `input` parsers, matching the first parse path. If ambiguous grammar
27 | /// matching is desired, see `OneOfAmbiguous`.
28 | ///
29 | /// The `input` parsers must remain alive for as long as the `OneOf` parser will be used.
30 | ///
31 | /// In the case of a non-ambiguous `OneOf` grammar of `Parser1 | Parser2`, the combinator will
32 | /// yield:
33 | ///
34 | /// ```
35 | /// stream(Parser1Value)
36 | /// ```
37 | ///
38 | /// Or:
39 | ///
40 | /// ```
41 | /// stream(Parser2Value)
42 | /// ```
43 | ///
44 | /// In the case of an ambiguous grammar `Parser1 | Parser2` where either parser can produce three
45 | /// different parse paths, it will always yield the first successful path.
46 | pub fn OneOf(comptime Payload: type, comptime Value: type) type {
47 | return struct {
48 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, countReferencesTo),
49 | input: Context(Payload, Value),
50 | ownership: Ownership,
51 |
52 | const Self = @This();
53 |
54 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value), ownership: Ownership) !*Parser(Payload, Value) {
55 | var self = Self{ .input = input, .ownership = ownership };
56 | if (ownership == .copy) {
57 | const Elem = std.meta.Elem(@TypeOf(input));
58 | var copy = try allocator.alloc(Elem, input.len);
59 | std.mem.copy(Elem, copy, input);
60 | self.input = copy;
61 | self.ownership = .owned;
62 | }
63 | return try self.parser.heapAlloc(allocator, self);
64 | }
65 |
66 | pub fn initStack(input: Context(Payload, Value), ownership: Ownership) Self {
67 | if (ownership == Ownership.copy) unreachable;
68 | return Self{ .input = input, .ownership = ownership };
69 | }
70 |
71 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
72 | const self = @fieldParentPtr(Self, "parser", parser);
73 | for (self.input) |in_parser| {
74 | in_parser.deinit(allocator, freed);
75 | }
76 | if (self.ownership == .owned) allocator.free(self.input);
77 | }
78 |
79 | pub fn countReferencesTo(parser: *const Parser(Payload, Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
80 | const self = @fieldParentPtr(Self, "parser", parser);
81 | if (@ptrToInt(parser) == other) return 1;
82 | var count: usize = 0;
83 | for (self.input) |in_parser| {
84 | count += in_parser.countReferencesTo(other, freed);
85 | }
86 | return count;
87 | }
88 |
89 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
90 | const self = @fieldParentPtr(Self, "parser", parser);
91 |
92 | var v = std.hash_map.hashString("OneOf");
93 | for (self.input) |in_parser| {
94 | v +%= try in_parser.nodeName(node_name_cache);
95 | }
96 | return v;
97 | }
98 |
99 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void {
100 | const self = @fieldParentPtr(Self, "parser", parser);
101 | var ctx = in_ctx.with(self.input);
102 | defer ctx.results.close();
103 |
104 | var gotValues: usize = 0;
105 | for (self.input) |in_parser| {
106 | const child_node_name = try in_parser.nodeName(&in_ctx.memoizer.node_name_cache);
107 | var child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset);
108 | defer child_ctx.deinitChild();
109 | if (!child_ctx.existing_results) try in_parser.parse(&child_ctx);
110 | var sub = child_ctx.subscribe();
111 | while (sub.next()) |next| {
112 | switch (next.result) {
113 | .err => {},
114 | else => {
115 | // TODO(slimsag): need path committal functionality
116 | if (gotValues == 0) try ctx.results.add(next.toUnowned());
117 | gotValues += 1;
118 | },
119 | }
120 | }
121 | }
122 | if (gotValues == 0) {
123 | // All parse paths failed, so return a nice error.
124 | //
125 | // TODO(slimsag): include names of expected input parsers
126 | //
127 | // TODO(slimsag): collect and return the furthest error if a parse path made
128 | // progress and failed.
129 | try ctx.results.add(Result(Value).initError(ctx.offset, "expected OneOf"));
130 | }
131 | }
132 | };
133 | }
134 |
135 | // Confirms that the following grammar works as expected:
136 | //
137 | // ```ebnf
138 | // Grammar = "ello" | "world" ;
139 | // ```
140 | //
141 | test "oneof" {
142 | nosuspend {
143 | const allocator = testing.allocator;
144 |
145 | const Payload = void;
146 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {});
147 | defer ctx.deinit();
148 |
149 | const parsers: []*Parser(Payload, LiteralValue) = &.{
150 | (try Literal(Payload).init(allocator, "ello")).ref(),
151 | (try Literal(Payload).init(allocator, "world")).ref(),
152 | };
153 | var helloOrWorld = try OneOf(Payload, LiteralValue).init(allocator, parsers, .borrowed);
154 | defer helloOrWorld.deinit(allocator, null);
155 | try helloOrWorld.parse(&ctx);
156 |
157 | var sub = ctx.subscribe();
158 | var r1 = sub.next().?;
159 | try testing.expectEqual(@as(usize, 4), r1.offset);
160 | try testing.expectEqualStrings("ello", r1.result.value.value);
161 | try testing.expect(sub.next() == null); // stream closed
162 | }
163 | }
164 |
165 | // Confirms behavior of the following grammar, which is ambiguous and should use OneOfAmbiguous
166 | // instead of OneOf if ambiguity needs to be enumerated:
167 | //
168 | // ```ebnf
169 | // Grammar = "ello" | "elloworld" ;
170 | // ```
171 | //
172 | test "oneof_ambiguous_first" {
173 | nosuspend {
174 | const allocator = testing.allocator;
175 |
176 | const Payload = void;
177 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {});
178 | defer ctx.deinit();
179 |
180 | const parsers: []*Parser(Payload, LiteralValue) = &.{
181 | (try Literal(Payload).init(allocator, "ello")).ref(),
182 | (try Literal(Payload).init(allocator, "elloworld")).ref(),
183 | };
184 | var helloOrWorld = try OneOf(Payload, LiteralValue).init(allocator, parsers, .borrowed);
185 | defer helloOrWorld.deinit(allocator, null);
186 | try helloOrWorld.parse(&ctx);
187 |
188 | var sub = ctx.subscribe();
189 | var r1 = sub.next().?;
190 | try testing.expectEqual(@as(usize, 4), r1.offset);
191 | try testing.expectEqualStrings("ello", r1.result.value.value);
192 | try testing.expect(sub.next() == null); // stream closed
193 | }
194 | }
195 |
--------------------------------------------------------------------------------
/src/combn/combinator/oneof_ambiguous.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 |
9 | const Literal = @import("../parser/literal.zig").Literal;
10 | const LiteralValue = @import("../parser/literal.zig").Value;
11 |
12 | const std = @import("std");
13 | const testing = std.testing;
14 | const mem = std.mem;
15 |
16 | pub fn Context(comptime Payload: type, comptime Value: type) type {
17 | return []const *Parser(Payload, Value);
18 | }
19 |
20 | pub const Ownership = enum {
21 | borrowed,
22 | owned,
23 | copy,
24 | };
25 |
26 | /// Matches one of the given `input` parsers, supporting ambiguous and unambiguous grammars.
27 | ///
28 | /// The `input` parsers must remain alive for as long as the `OneOfAmbiguous` parser will be used.
29 | ///
30 | /// In the case of a non-ambiguous `OneOfAmbiguous` grammar of `Parser1 | Parser2`, the combinator will
31 | /// yield:
32 | ///
33 | /// ```
34 | /// stream(Parser1Value)
35 | /// ```
36 | ///
37 | /// Or:
38 | ///
39 | /// ```
40 | /// stream(Parser2Value)
41 | /// ```
42 | ///
43 | /// In the case of an ambiguous grammar `Parser1 | Parser2` where either parser can produce three
44 | /// different parse paths, it will yield:
45 | ///
46 | /// ```
47 | /// stream(
48 | /// Parser1Value1,
49 | /// Parser1Value2,
50 | /// Parser1Value3,
51 | /// Parser2Value1,
52 | /// Parser2Value2,
53 | /// Parser2Value3,
54 | /// )
55 | /// ```
56 | ///
57 | pub fn OneOfAmbiguous(comptime Payload: type, comptime Value: type) type {
58 | return struct {
59 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, countReferencesTo),
60 | input: Context(Payload, Value),
61 | ownership: Ownership,
62 |
63 | const Self = @This();
64 |
65 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value), ownership: Ownership) !*Parser(Payload, Value) {
66 | var self = Self{ .input = input, .ownership = ownership };
67 | if (ownership == .copy) {
68 | const Elem = std.meta.Elem(@TypeOf(input));
69 | var copy = try allocator.alloc(Elem, input.len);
70 | std.mem.copy(Elem, copy, input);
71 | self.input = copy;
72 | self.ownership = .owned;
73 | }
74 | return try self.parser.heapAlloc(allocator, self);
75 | }
76 |
77 | pub fn initStack(input: Context(Payload, Value), ownership: Ownership) Self {
78 | if (ownership == Ownership.copy) unreachable;
79 | return Self{ .input = input, .ownership = ownership };
80 | }
81 |
82 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
83 | const self = @fieldParentPtr(Self, "parser", parser);
84 | for (self.input) |in_parser| {
85 | in_parser.deinit(allocator, freed);
86 | }
87 | if (self.ownership == .owned) allocator.free(self.input);
88 | }
89 |
90 | pub fn countReferencesTo(parser: *const Parser(Payload, Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
91 | const self = @fieldParentPtr(Self, "parser", parser);
92 | if (@ptrToInt(parser) == other) return 1;
93 | var count: usize = 0;
94 | for (self.input) |in_parser| {
95 | count += in_parser.countReferencesTo(other, freed);
96 | }
97 | return count;
98 | }
99 |
100 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
101 | const self = @fieldParentPtr(Self, "parser", parser);
102 |
103 | var v = std.hash_map.hashString("OneOfAmbiguous");
104 | for (self.input) |in_parser| {
105 | v +%= try in_parser.nodeName(node_name_cache);
106 | }
107 | return v;
108 | }
109 |
110 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void {
111 | const self = @fieldParentPtr(Self, "parser", parser);
112 | var ctx = in_ctx.with(self.input);
113 | defer ctx.results.close();
114 |
115 | var buffer = try ResultStream(Result(Value)).init(ctx.allocator, ctx.key);
116 | defer buffer.deinit();
117 | for (self.input) |in_parser| {
118 | const child_node_name = try in_parser.nodeName(&in_ctx.memoizer.node_name_cache);
119 | var child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset);
120 | defer child_ctx.deinitChild();
121 | if (!child_ctx.existing_results) try in_parser.parse(&child_ctx);
122 | var sub = child_ctx.subscribe();
123 | while (sub.next()) |next| {
124 | try buffer.add(next.toUnowned());
125 | }
126 | }
127 | buffer.close();
128 |
129 | var gotValues: usize = 0;
130 | var gotErrors: usize = 0;
131 | var sub = buffer.subscribe(ctx.key, ctx.path, Result(Value).initError(ctx.offset, "matches only the empty language"));
132 | while (sub.next()) |next| {
133 | switch (next.result) {
134 | .err => gotErrors += 1,
135 | else => gotValues += 1,
136 | }
137 | }
138 | if (gotValues > 0) {
139 | // At least one parse path succeeded, so discard all error'd parse paths.
140 | //
141 | // TODO(slimsag): would the client not want to enumerate error'd paths that made some
142 | // progress?
143 | var sub2 = buffer.subscribe(ctx.key, ctx.path, Result(Value).initError(ctx.offset, "matches only the empty language"));
144 | while (sub2.next()) |next| {
145 | switch (next.result) {
146 | .err => {},
147 | else => try ctx.results.add(next),
148 | }
149 | }
150 | return;
151 | }
152 | // All parse paths failed, so return a nice error.
153 | //
154 | // TODO(slimsag): include names of expected input parsers
155 | //
156 | // TODO(slimsag): collect and return the furthest error if a parse path made
157 | // progress and failed.
158 | try ctx.results.add(Result(Value).initError(ctx.offset, "expected OneOfAmbiguous"));
159 | }
160 | };
161 | }
162 |
163 | // Confirms that the following grammar works as expected:
164 | //
165 | // ```ebnf
166 | // Grammar = "ello" | "world" ;
167 | // ```
168 | //
169 | test "oneof" {
170 | nosuspend {
171 | const allocator = testing.allocator;
172 |
173 | const Payload = void;
174 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {});
175 | defer ctx.deinit();
176 |
177 | const parsers: []*Parser(Payload, LiteralValue) = &.{
178 | (try Literal(Payload).init(allocator, "ello")).ref(),
179 | (try Literal(Payload).init(allocator, "world")).ref(),
180 | };
181 | var helloOrWorld = try OneOfAmbiguous(Payload, LiteralValue).init(allocator, parsers, .borrowed);
182 | defer helloOrWorld.deinit(allocator, null);
183 | try helloOrWorld.parse(&ctx);
184 |
185 | var sub = ctx.subscribe();
186 | var first = sub.next().?;
187 | try testing.expectEqual(Result(LiteralValue).init(4, .{ .value = "ello" }).toUnowned(), first);
188 | try testing.expect(sub.next() == null); // stream closed
189 | }
190 | }
191 |
192 | // Confirms that the following grammar works as expected:
193 | //
194 | // ```ebnf
195 | // Grammar = "ello" | "elloworld" ;
196 | // ```
197 | //
198 | test "oneof_ambiguous" {
199 | nosuspend {
200 | const allocator = testing.allocator;
201 |
202 | const Payload = void;
203 | const ctx = try ParserContext(Payload, LiteralValue).init(allocator, "elloworld", {});
204 | defer ctx.deinit();
205 |
206 | const parsers: []*Parser(Payload, LiteralValue) = &.{ (try Literal(Payload).init(allocator, "ello")).ref(), (try Literal(Payload).init(allocator, "elloworld")).ref() };
207 | var helloOrWorld = try OneOfAmbiguous(Payload, LiteralValue).init(allocator, parsers, .borrowed);
208 | defer helloOrWorld.deinit(allocator, null);
209 | try helloOrWorld.parse(&ctx);
210 |
211 | var sub = ctx.subscribe();
212 | var r1 = sub.next().?;
213 | try testing.expectEqual(@as(usize, 4), r1.offset);
214 | try testing.expectEqualStrings("ello", r1.result.value.value);
215 | var r2 = sub.next().?;
216 | try testing.expectEqual(@as(usize, 9), r2.offset);
217 | try testing.expectEqualStrings("elloworld", r2.result.value.value);
218 | try testing.expect(sub.next() == null); // stream closed
219 | }
220 | }
221 |
--------------------------------------------------------------------------------
/src/combn/combinator/optional.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 |
8 | const Literal = @import("../parser/literal.zig").Literal;
9 | const LiteralValue = @import("../parser/literal.zig").Value;
10 |
11 | const std = @import("std");
12 | const testing = std.testing;
13 | const mem = std.mem;
14 |
15 | pub fn Context(comptime Payload: type, comptime Value: type) type {
16 | return *Parser(Payload, Value);
17 | }
18 |
19 | /// Wraps the `input.parser`, making it an optional parser producing an optional value.
20 | ///
21 | /// The `input.parser` must remain alive for as long as the `Optional` parser will be used.
22 | pub fn Optional(comptime Payload: type, comptime Value: type) type {
23 | return struct {
24 | parser: Parser(Payload, ?Value) = Parser(Payload, ?Value).init(parse, nodeName, deinit, countReferencesTo),
25 | input: Context(Payload, Value),
26 |
27 | const Self = @This();
28 |
29 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value)) !*Parser(Payload, ?Value) {
30 | const self = Self{ .input = input };
31 | return try self.parser.heapAlloc(allocator, self);
32 | }
33 |
34 | pub fn initStack(input: Context(Payload, Value)) Self {
35 | return Self{ .input = input };
36 | }
37 |
38 | pub fn deinit(parser: *Parser(Payload, ?Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
39 | const self = @fieldParentPtr(Self, "parser", parser);
40 | self.input.deinit(allocator, freed);
41 | }
42 |
43 | pub fn countReferencesTo(parser: *const Parser(Payload, ?Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
44 | const self = @fieldParentPtr(Self, "parser", parser);
45 | if (@ptrToInt(parser) == other) return 1;
46 | return self.input.countReferencesTo(other, freed);
47 | }
48 |
49 | pub fn nodeName(parser: *const Parser(Payload, ?Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
50 | const self = @fieldParentPtr(Self, "parser", parser);
51 |
52 | var v = std.hash_map.hashString("Optional");
53 | v +%= try self.input.nodeName(node_name_cache);
54 | return v;
55 | }
56 |
57 | pub fn parse(parser: *const Parser(Payload, ?Value), in_ctx: *const ParserContext(Payload, ?Value)) callconv(.Async) Error!void {
58 | const self = @fieldParentPtr(Self, "parser", parser);
59 | var ctx = in_ctx.with(self.input);
60 | defer ctx.results.close();
61 |
62 | const child_node_name = try ctx.input.nodeName(&in_ctx.memoizer.node_name_cache);
63 | const child_ctx = try in_ctx.initChild(Value, child_node_name, ctx.offset);
64 | defer child_ctx.deinitChild();
65 | if (!child_ctx.existing_results) try ctx.input.parse(&child_ctx);
66 |
67 | var sub = child_ctx.subscribe();
68 | while (sub.next()) |next| {
69 | switch (next.result) {
70 | .err => try ctx.results.add(Result(?Value).init(ctx.offset, null)),
71 | else => try ctx.results.add(Result(?Value).init(next.offset, next.result.value).toUnowned()),
72 | }
73 | }
74 | return;
75 | }
76 | };
77 | }
78 |
79 | test "optional_some" {
80 | nosuspend {
81 | const allocator = testing.allocator;
82 |
83 | const Payload = void;
84 | const ctx = try ParserContext(Payload, ?LiteralValue).init(allocator, "hello world", {});
85 | defer ctx.deinit();
86 |
87 | const optional = try Optional(Payload, LiteralValue).init(allocator, (try Literal(Payload).init(allocator, "hello")).ref());
88 | defer optional.deinit(allocator, null);
89 |
90 | try optional.parse(&ctx);
91 |
92 | var sub = ctx.subscribe();
93 | var r1 = sub.next().?;
94 | try testing.expectEqual(@as(usize, 5), r1.offset);
95 | try testing.expectEqualStrings("hello", r1.result.value.?.value);
96 | try testing.expectEqual(@as(?Result(?LiteralValue), null), sub.next());
97 | }
98 | }
99 |
100 | test "optional_none" {
101 | nosuspend {
102 | const allocator = testing.allocator;
103 |
104 | const Payload = void;
105 | const ctx = try ParserContext(Payload, ?LiteralValue).init(allocator, "hello world", {});
106 | defer ctx.deinit();
107 |
108 | const optional = try Optional(Payload, LiteralValue).init(allocator, (try Literal(Payload).init(allocator, "world")).ref());
109 | defer optional.deinit(allocator, null);
110 |
111 | try optional.parse(&ctx);
112 |
113 | var sub = ctx.subscribe();
114 | var first = sub.next().?;
115 | try testing.expectEqual(Result(?LiteralValue).init(0, null), first);
116 | try testing.expect(sub.next() == null);
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/src/combn/combinator/reentrant.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 |
9 | const std = @import("std");
10 | const testing = std.testing;
11 | const mem = std.mem;
12 |
13 | pub fn Context(comptime Payload: type, comptime Value: type) type {
14 | return *Parser(Payload, Value);
15 | }
16 |
17 | /// Wraps the `input.parser`, allowing it to be reentrant (such as in the case of a left recursive
18 | /// grammar.)
19 | ///
20 | /// This has relatively small overhead (so you may use it to wrap any reentrant parser), but is
21 | /// only strictly required for reentrant parsers where invoking the parser multiple times at the
22 | /// same exact position in the input string is required to emit a different result. For example:
23 | ///
24 | /// ```ebnf
25 | /// Expr = Expr?, "abc" ;
26 | /// Grammar = Expr ;
27 | /// ```
28 | ///
29 | /// Without a Reentrant wrapper, parsing the above Grammar would match only a singular
30 | /// `(null, abc)` match, because `Expr` is not invoked recursively. However, with a reentrant
31 | /// wrapper it would match `(((null,abc),abc),abc)` instead.
32 | ///
33 | /// The `input.parser` must remain alive for as long as the `Reentrant` parser will be used.
34 | pub fn Reentrant(comptime Payload: type, comptime Value: type) type {
35 | return struct {
36 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, deinit, countReferencesTo),
37 | input: Context(Payload, Value),
38 |
39 | const Self = @This();
40 |
41 | pub fn init(allocator: mem.Allocator, input: Context(Payload, Value)) !*Parser(Payload, Value) {
42 | const self = Self{ .input = input };
43 | return try self.parser.heapAlloc(allocator, self);
44 | }
45 |
46 | pub fn initStack(input: Context(Payload, Value)) Self {
47 | return Self{ .input = input };
48 | }
49 |
50 | pub fn deinit(parser: *Parser(Payload, Value), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
51 | const self = @fieldParentPtr(Self, "parser", parser);
52 | self.input.deinit(allocator, freed);
53 | }
54 |
55 | pub fn countReferencesTo(parser: *const Parser(Payload, Value), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
56 | const self = @fieldParentPtr(Self, "parser", parser);
57 | if (@ptrToInt(parser) == other) return 1;
58 | return self.input.countReferencesTo(other, freed);
59 | }
60 |
61 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
62 | const self = @fieldParentPtr(Self, "parser", parser);
63 |
64 | var v = std.hash_map.hashString("Reentrant");
65 | v +%= try self.input.nodeName(node_name_cache);
66 | return v;
67 | }
68 |
69 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void {
70 | const self = @fieldParentPtr(Self, "parser", parser);
71 | var ctx = in_ctx.with(self.input);
72 | defer ctx.results.close();
73 |
74 | // See gll_parser.zig:Memoizer.get for details on what this is doing and why.
75 | var retrying = false;
76 | var retrying_max_depth: ?usize = null;
77 | while (true) {
78 | const child_node_name = try ctx.input.nodeName(&in_ctx.memoizer.node_name_cache);
79 | const child_ctx = try in_ctx.initChildRetry(Value, child_node_name, ctx.offset, retrying_max_depth);
80 | defer child_ctx.deinitChild();
81 | if (!child_ctx.existing_results) try ctx.input.parse(&child_ctx);
82 |
83 | var buf = try ctx.allocator.create(ResultStream(Result(Value)));
84 | defer ctx.allocator.destroy(buf);
85 | buf.* = try ResultStream(Result(Value)).init(ctx.allocator, ctx.key);
86 | defer buf.deinit();
87 | var sub = child_ctx.subscribe();
88 | while (sub.next()) |next| {
89 | try buf.add(next.toUnowned());
90 | }
91 | buf.close();
92 |
93 | if ((sub.cyclic_closed or retrying) and !child_ctx.isRetrying(child_node_name, ctx.offset)) {
94 | if (retrying and sub.cyclic_closed) {
95 | if (retrying_max_depth.? > 0) retrying_max_depth.? -= 1;
96 | retrying = false;
97 | continue;
98 | }
99 | retrying = true;
100 | if (retrying_max_depth == null) {
101 | retrying_max_depth = 0;
102 | }
103 | retrying_max_depth.? += 1;
104 | continue;
105 | } else {
106 | var sub2 = buf.subscribe(ctx.key, ctx.path, Result(Value).initError(ctx.offset, "matches only the empty language"));
107 | while (sub2.next()) |next| {
108 | try ctx.results.add(next);
109 | }
110 | break;
111 | }
112 | }
113 | }
114 | };
115 | }
116 |
--------------------------------------------------------------------------------
/src/combn/combinator/repeated.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 |
9 | const Literal = @import("../parser/literal.zig").Literal;
10 | const LiteralValue = @import("../parser/literal.zig").Value;
11 |
12 | const std = @import("std");
13 | const testing = std.testing;
14 | const mem = std.mem;
15 |
16 | pub fn Context(comptime Payload: type, comptime V: type) type {
17 | return struct {
18 | /// The parser which should be repeatedly parsed.
19 | parser: *Parser(Payload, V),
20 |
21 | /// The minimum number of times the parser must successfully match.
22 | min: usize,
23 |
24 | /// The maximum number of times the parser can match, or -1 for unlimited.
25 | max: isize,
26 | };
27 | }
28 |
29 | /// Represents a single value in the stream of repeated values.
30 | ///
31 | /// In the case of a non-ambiguous grammar, a `Repeated` combinator will yield:
32 | ///
33 | /// ```
34 | /// stream(value1, value2)
35 | /// ```
36 | ///
37 | /// In the case of an ambiguous grammar, it would yield a stream with only the first parse path.
38 | /// Use RepeatedAmbiguous if ambiguou parse paths are desirable.
39 | pub fn Value(comptime V: type) type {
40 | return struct {
41 | results: *ResultStream(Result(V)),
42 |
43 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
44 | self.results.deinit();
45 | allocator.destroy(self.results);
46 | }
47 | };
48 | }
49 |
50 | /// Matches the `input` repeatedly, between `[min, max]` times (inclusive.) If ambiguous parse paths
51 | /// are desirable, use RepeatedAmbiguous.
52 | ///
53 | /// The `input` parsers must remain alive for as long as the `Repeated` parser will be used.
54 | pub fn Repeated(comptime Payload: type, comptime V: type) type {
55 | return struct {
56 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo),
57 | input: Context(Payload, V),
58 |
59 | const Self = @This();
60 |
61 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V)) !*Parser(Payload, Value(V)) {
62 | const self = Self{ .input = input };
63 | return try self.parser.heapAlloc(allocator, self);
64 | }
65 |
66 | pub fn initStack(input: Context(Payload, V)) Self {
67 | return Self{ .input = input };
68 | }
69 |
70 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
71 | const self = @fieldParentPtr(Self, "parser", parser);
72 | self.input.parser.deinit(allocator, freed);
73 | }
74 |
75 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
76 | const self = @fieldParentPtr(Self, "parser", parser);
77 | if (@ptrToInt(parser) == other) return 1;
78 | return self.input.parser.countReferencesTo(other, freed);
79 | }
80 |
81 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
82 | const self = @fieldParentPtr(Self, "parser", parser);
83 |
84 | var v = std.hash_map.hashString("Repeated");
85 | v +%= try self.input.parser.nodeName(node_name_cache);
86 | v +%= std.hash_map.getAutoHashFn(usize, void)({}, self.input.min);
87 | v +%= std.hash_map.getAutoHashFn(isize, void)({}, self.input.max);
88 | return v;
89 | }
90 |
91 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void {
92 | const self = @fieldParentPtr(Self, "parser", parser);
93 | var ctx = in_ctx.with(self.input);
94 | defer ctx.results.close();
95 |
96 | // Invoke the child parser repeatedly to produce each of our results. Each time we ask
97 | // the child parser to parse, it can produce a set of results (its result stream) which
98 | // are varying parse paths / interpretations, we take the first successful one.
99 |
100 | // Return early if we're not trying to parse anything (stream close signals to the
101 | // consumer there were no matches).
102 | if (ctx.input.max == 0) {
103 | return;
104 | }
105 |
106 | var buffer = try ctx.allocator.create(ResultStream(Result(V)));
107 | errdefer ctx.allocator.destroy(buffer);
108 | errdefer buffer.deinit();
109 | buffer.* = try ResultStream(Result(V)).init(ctx.allocator, ctx.key);
110 |
111 | var num_values: usize = 0;
112 | var offset: usize = ctx.offset;
113 | while (true) {
114 | const child_node_name = try self.input.parser.nodeName(&in_ctx.memoizer.node_name_cache);
115 | var child_ctx = try in_ctx.initChild(V, child_node_name, offset);
116 | defer child_ctx.deinitChild();
117 | if (!child_ctx.existing_results) try self.input.parser.parse(&child_ctx);
118 |
119 | var num_local_values: usize = 0;
120 | var sub = child_ctx.subscribe();
121 | while (sub.next()) |next| {
122 | switch (next.result) {
123 | .err => {
124 | offset = next.offset;
125 | if (num_values < ctx.input.min) {
126 | buffer.close();
127 | buffer.deinit();
128 | ctx.allocator.destroy(buffer);
129 | try ctx.results.add(Result(Value(V)).initError(next.offset, next.result.err));
130 | return;
131 | }
132 | buffer.close();
133 | try ctx.results.add(Result(Value(V)).init(offset, .{ .results = buffer }));
134 | return;
135 | },
136 | else => {
137 | // TODO(slimsag): need path committal functionality
138 | if (num_local_values == 0) {
139 | offset = next.offset;
140 | // TODO(slimsag): if no consumption, could get stuck forever!
141 | try buffer.add(next.toUnowned());
142 | }
143 | num_local_values += 1;
144 | },
145 | }
146 | }
147 |
148 | num_values += 1;
149 | if (num_values >= ctx.input.max and ctx.input.max != -1) break;
150 | }
151 | buffer.close();
152 | try ctx.results.add(Result(Value(V)).init(offset, .{ .results = buffer }));
153 | }
154 | };
155 | }
156 |
157 | test "repeated" {
158 | nosuspend {
159 | const allocator = testing.allocator;
160 |
161 | const Payload = void;
162 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abcabcabc123abc", {});
163 | defer ctx.deinit();
164 |
165 | var abcInfinity = try Repeated(Payload, LiteralValue).init(allocator, .{
166 | .parser = (try Literal(Payload).init(allocator, "abc")).ref(),
167 | .min = 0,
168 | .max = -1,
169 | });
170 | defer abcInfinity.deinit(allocator, null);
171 | try abcInfinity.parse(&ctx);
172 |
173 | var sub = ctx.subscribe();
174 | var repeated = sub.next().?.result.value;
175 | try testing.expect(sub.next() == null); // stream closed
176 |
177 | var repeatedSub = repeated.results.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language"));
178 | try testing.expectEqual(@as(usize, 3), repeatedSub.next().?.offset);
179 | try testing.expectEqual(@as(usize, 6), repeatedSub.next().?.offset);
180 | try testing.expectEqual(@as(usize, 9), repeatedSub.next().?.offset);
181 | try testing.expect(repeatedSub.next() == null); // stream closed
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/src/combn/combinator/repeated_ambiguous.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 | const PosKey = gllparser.PosKey;
9 | const ParserPath = gllparser.ParserPath;
10 |
11 | const Literal = @import("../parser/literal.zig").Literal;
12 | const LiteralValue = @import("../parser/literal.zig").Value;
13 |
14 | const std = @import("std");
15 | const testing = std.testing;
16 | const mem = std.mem;
17 |
18 | pub fn Context(comptime Payload: type, comptime V: type) type {
19 | return struct {
20 | /// The parser which should be repeatedly parsed.
21 | parser: *Parser(Payload, V),
22 |
23 | /// The minimum number of times the parser must successfully match.
24 | min: usize,
25 |
26 | /// The maximum number of times the parser can match, or -1 for unlimited.
27 | max: isize,
28 | };
29 | }
30 |
31 | /// Represents a single value in the stream of repeated values.
32 | ///
33 | /// In the case of a non-ambiguous grammar, a `RepeatedAmbiguous` combinator will yield:
34 | ///
35 | /// ```
36 | /// Value{
37 | /// node: value1,
38 | /// next: ResultStream(Value{
39 | /// node: value2,
40 | /// next: ...,
41 | /// })
42 | /// }
43 | /// ```
44 | ///
45 | /// In the case of an ambiguous grammar, it would yield streams with potentially multiple values
46 | /// (each representing one possible parse path / interpretation of the grammar):
47 | ///
48 | /// ```
49 | /// Value{
50 | /// node: value1,
51 | /// next: ResultStream(
52 | /// Value{
53 | /// node: value2variant1,
54 | /// next: ...,
55 | /// },
56 | /// Value{
57 | /// node: value2variant2,
58 | /// next: ...,
59 | /// },
60 | /// )
61 | /// }
62 | /// ```
63 | ///
64 | pub fn Value(comptime V: type) type {
65 | return struct {
66 | node: Result(V),
67 | next: *ResultStream(Result(@This())),
68 |
69 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
70 | self.next.deinit();
71 | self.node.deinit(allocator);
72 | allocator.destroy(self.next);
73 | }
74 |
75 | pub fn flatten(self: *const @This(), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!ResultStream(Result(V)) {
76 | var dst = try ResultStream(Result(V)).init(allocator, subscriber);
77 | try self.flatten_into(&dst, allocator, subscriber, path);
78 | dst.close(); // TODO(slimsag): why does deferring this not work?
79 | return dst;
80 | }
81 |
82 | pub fn flatten_into(self: *const @This(), dst: *ResultStream(Result(V)), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!void {
83 | try dst.add(self.node.toUnowned());
84 |
85 | var sub = self.next.subscribe(subscriber, path, Result(Value(V)).initError(0, "matches only the empty language"));
86 | nosuspend {
87 | while (sub.next()) |next_path| {
88 | switch (next_path.result) {
89 | .err => try dst.add(Result(V).initError(next_path.offset, next_path.result.err)),
90 | else => try next_path.result.value.flatten_into(dst, allocator, subscriber, path),
91 | }
92 | }
93 | }
94 | }
95 | };
96 | }
97 |
98 | /// Matches the `input` repeatedly, between `[min, max]` times (inclusive.)
99 | ///
100 | /// The `input` parsers must remain alive for as long as the `RepeatedAmbiguous` parser will be used.
101 | pub fn RepeatedAmbiguous(comptime Payload: type, comptime V: type) type {
102 | return struct {
103 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo),
104 | input: Context(Payload, V),
105 |
106 | const Self = @This();
107 |
108 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V)) !*Parser(Payload, Value(V)) {
109 | const self = Self{ .input = input };
110 | return try self.parser.heapAlloc(allocator, self);
111 | }
112 |
113 | pub fn initStack(input: Context(Payload, V)) Self {
114 | return Self{ .input = input };
115 | }
116 |
117 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
118 | const self = @fieldParentPtr(Self, "parser", parser);
119 | self.input.parser.deinit(allocator, freed);
120 | }
121 |
122 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
123 | const self = @fieldParentPtr(Self, "parser", parser);
124 | if (@ptrToInt(parser) == other) return 1;
125 | return self.input.parser.countReferencesTo(other, freed);
126 | }
127 |
128 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
129 | const self = @fieldParentPtr(Self, "parser", parser);
130 |
131 | var v = std.hash_map.hashString("RepeatedAmbiguous");
132 | v +%= try self.input.parser.nodeName(node_name_cache);
133 | v +%= std.hash_map.getAutoHashFn(usize, void)({}, self.input.min);
134 | v +%= std.hash_map.getAutoHashFn(isize, void)({}, self.input.max);
135 | return v;
136 | }
137 |
138 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void {
139 | const self = @fieldParentPtr(Self, "parser", parser);
140 | var ctx = in_ctx.with(self.input);
141 | defer ctx.results.close();
142 |
143 | // Invoke the child parser repeatedly to produce each of our results. Each time we ask
144 | // the child parser to parse, it can produce a set of results (its result stream) which
145 | // are varying parse paths / interpretations. Our set of results (our result stream)
146 | // will contain many more possible paths, for example consider a parser:
147 | //
148 | // S -> [A, B]
149 | //
150 | // Matched once, it can produce one or two separate parse paths / interpretations (A, B,
151 | // or A and B), and we may commit to producing certain ones. But match twice, and it
152 | // could produce:
153 | //
154 | // S -> [AB, BA, AA, BB]
155 | //
156 | // There is an exponential number of repetitive parse paths to follow. Thus, we simply
157 | // follow each path in order, trying one at a time until we commit or reject the
158 | // unwanted paths. We also have two options in how we follow the paths - depth-first in
159 | // order:
160 | //
161 | // AA, AB, BA, BB
162 | //
163 | // Or breadth-first in order:
164 | //
165 | // AA, BA, AB, BB
166 | //
167 | // Depth-first vs. breadth-first could impact the performance of some grammars by
168 | // making it harder to bail out of a given parse path quicker. Similarly, iteration
169 | // order could be more expensive depending on the order of operations, this will be
170 | // slower:
171 | //
172 | // Iteration 0: Try A
173 | // Iteration 0: Try B -> Commit to B
174 | // Iteration 1: Try A
175 | // Iteration 1: Try B -> Commit to B
176 | // Iteration 2: Try A
177 | // Iteration 2: Try B -> Commit to B
178 | //
179 | // Than this:
180 | //
181 | // Iteration 0: Try B -> Commit to B
182 | // Iteration 1: Try B -> Commit to B
183 | // Iteration 2: Try B -> Commit to B
184 | //
185 | // However, the most optimal order is not known ahead of time. Likely the best approach
186 | // would be to assume the next path will be the same as the past path, but in practice
187 | // this would involve more book-keeping and still be a guess. Instead, we just focus on
188 | // exploring all potential paths as quickly as possible (and future efforts will be
189 | // better spent on parallelization of exploring these paths.)
190 |
191 | // Return early if we're not trying to parse anything (stream close signals to the
192 | // consumer there were no matches).
193 | if (ctx.input.max == 0) {
194 | return;
195 | }
196 |
197 | // First we need to actually invoke the child parser. This will give us [A, B, C] and
198 | // we then invoke RepeatedAmbiguous(child) on the proceeding states to get the associated stream:
199 | //
200 | // stream(
201 | // (A, stream(
202 | // (A, stream(...),
203 | // (B, stream(...),
204 | // (C, stream(...),
205 | // ),
206 | // (B, stream(
207 | // (A, stream(...),
208 | // (B, stream(...),
209 | // (C, stream(...),
210 | // ),
211 | // (C, stream(
212 | // (A, stream(...),
213 | // (B, stream(...),
214 | // (C, stream(...),
215 | // ),
216 | // )
217 | //
218 | const child_node_name = try self.input.parser.nodeName(&in_ctx.memoizer.node_name_cache);
219 | var child_ctx = try in_ctx.initChild(V, child_node_name, ctx.offset);
220 | defer child_ctx.deinitChild();
221 | if (!child_ctx.existing_results) try self.input.parser.parse(&child_ctx);
222 |
223 | // For every top-level value (A, B, C in our example above.)
224 | var num_values: usize = 0;
225 | var sub = child_ctx.subscribe();
226 | var offset: usize = ctx.offset;
227 | while (sub.next()) |top_level| {
228 | if (num_values >= ctx.input.max and ctx.input.max != -1) break;
229 | num_values += 1;
230 | switch (top_level.result) {
231 | .err => {
232 | // Going down the path of this top-level value terminated with an error.
233 | if (num_values < 1 or num_values < ctx.input.min) {
234 | try ctx.results.add(Result(Value(V)).initError(top_level.offset, top_level.result.err));
235 | }
236 | continue;
237 | },
238 | else => {
239 | // We got a non-error top-level value (e.g. A, B, C).
240 | // TODO(slimsag): if no consumption, could get stuck forever!
241 | offset = top_level.offset;
242 |
243 | // Now get the stream that continues down this path (i.e. the stream
244 | // associated with A, B, C.)
245 | var path_results = try ctx.allocator.create(ResultStream(Result(Value(V))));
246 | path_results.* = try ResultStream(Result(Value(V))).init(ctx.allocator, ctx.key);
247 | var path = RepeatedAmbiguous(Payload, V).initStack(.{
248 | .parser = self.input.parser,
249 | .min = self.input.min,
250 | .max = if (self.input.max == -1) -1 else self.input.max - 1,
251 | });
252 | const path_node_name = try path.parser.nodeName(&in_ctx.memoizer.node_name_cache);
253 | var path_ctx = try in_ctx.initChild(Value(V), path_node_name, top_level.offset);
254 | defer path_ctx.deinitChild();
255 | if (!path_ctx.existing_results) try path.parser.parse(&path_ctx);
256 | var path_results_sub = path_ctx.subscribe();
257 | while (path_results_sub.next()) |next| {
258 | try path_results.add(next.toUnowned());
259 | }
260 | path_results.close();
261 |
262 | // Emit our top-level value tuple (e.g. (A, stream(...))
263 | try ctx.results.add(Result(Value(V)).init(top_level.offset, .{
264 | .node = top_level.toUnowned(),
265 | .next = path_results,
266 | }));
267 | },
268 | }
269 | }
270 | if (num_values < ctx.input.min) {
271 | // TODO(slimsag): include number of expected/found matches
272 | try ctx.results.add(Result(Value(V)).initError(offset, "expected more"));
273 | return;
274 | }
275 | return;
276 | }
277 | };
278 | }
279 |
280 | test "repeated" {
281 | nosuspend {
282 | const allocator = testing.allocator;
283 |
284 | const Payload = void;
285 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abcabcabc123abc", {});
286 | defer ctx.deinit();
287 |
288 | var abcInfinity = try RepeatedAmbiguous(Payload, LiteralValue).init(allocator, .{
289 | .parser = (try Literal(Payload).init(allocator, "abc")).ref(),
290 | .min = 0,
291 | .max = -1,
292 | });
293 | defer abcInfinity.deinit(allocator, null);
294 | try abcInfinity.parse(&ctx);
295 |
296 | var sub = ctx.subscribe();
297 | var list = sub.next();
298 | try testing.expect(sub.next() == null); // stream closed
299 |
300 | // first element
301 | try testing.expectEqual(@as(usize, 3), list.?.offset);
302 | try testing.expectEqual(@as(usize, 3), list.?.result.value.node.offset);
303 |
304 | // flatten the nested multi-dimensional array, since our grammar above is not ambiguous
305 | // this is fine to do and makes testing far easier.
306 | var flattened = try list.?.result.value.flatten(allocator, ctx.key, ctx.path);
307 | defer flattened.deinit();
308 | var flat = flattened.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language"));
309 | try testing.expectEqual(@as(usize, 3), flat.next().?.offset);
310 | try testing.expectEqual(@as(usize, 6), flat.next().?.offset);
311 | try testing.expectEqual(@as(usize, 9), flat.next().?.offset);
312 | try testing.expect(flat.next() == null); // stream closed
313 | }
314 | }
315 |
--------------------------------------------------------------------------------
/src/combn/combinator/sequence.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 |
9 | const Literal = @import("../parser/literal.zig").Literal;
10 | const LiteralValue = @import("../parser/literal.zig").Value;
11 | const MapTo = @import("mapto.zig").MapTo;
12 |
13 | const std = @import("std");
14 | const testing = std.testing;
15 | const mem = std.mem;
16 |
17 | pub fn Context(comptime Payload: type, comptime V: type) type {
18 | return []const *Parser(Payload, V);
19 | }
20 |
21 | /// Represents a sequence of parsed values.
22 | ///
23 | /// In the case of a non-ambiguous grammar, a `Sequence` combinator will yield:
24 | ///
25 | /// ```
26 | /// stream(value1, value2)
27 | /// ```
28 | ///
29 | /// In the case of an ambiguous grammar, it would yield a stream with only the first parse path.
30 | /// Use SequenceAmbiguous if ambiguou parse paths are desirable.
31 | pub fn Value(comptime V: type) type {
32 | return struct {
33 | results: *ResultStream(Result(V)),
34 |
35 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
36 | self.results.deinit();
37 | allocator.destroy(self.results);
38 | }
39 | };
40 | }
41 |
42 | pub const Ownership = enum {
43 | borrowed,
44 | owned,
45 | copy,
46 | };
47 |
48 | /// Matches the `input` parsers sequentially. The parsers must produce the same data type (use
49 | /// MapTo, if needed.) If ambiguous parse paths are desirable, use SequenceAmbiguous.
50 | ///
51 | /// The `input` parsers must remain alive for as long as the `Sequence` parser will be used.
52 | pub fn Sequence(comptime Payload: type, comptime V: type) type {
53 | return struct {
54 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo),
55 | input: Context(Payload, V),
56 | ownership: Ownership,
57 |
58 | const Self = @This();
59 |
60 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V), ownership: Ownership) !*Parser(Payload, Value(V)) {
61 | var self = Self{ .input = input, .ownership = ownership };
62 | if (ownership == .copy) {
63 | const Elem = std.meta.Elem(@TypeOf(input));
64 | var copy = try allocator.alloc(Elem, input.len);
65 | std.mem.copy(Elem, copy, input);
66 | self.input = copy;
67 | self.ownership = .owned;
68 | }
69 | return try self.parser.heapAlloc(allocator, self);
70 | }
71 |
72 | pub fn initStack(input: Context(Payload, V), ownership: Ownership) Self {
73 | if (ownership == Ownership.copy) unreachable;
74 | return Self{ .input = input };
75 | }
76 |
77 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
78 | const self = @fieldParentPtr(Self, "parser", parser);
79 | for (self.input) |child_parser| {
80 | child_parser.deinit(allocator, freed);
81 | }
82 | if (self.ownership == .owned) allocator.free(self.input);
83 | }
84 |
85 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
86 | const self = @fieldParentPtr(Self, "parser", parser);
87 | if (@ptrToInt(parser) == other) return 1;
88 | var count: usize = 0;
89 | for (self.input) |in_parser| {
90 | count += in_parser.countReferencesTo(other, freed);
91 | }
92 | return count;
93 | }
94 |
95 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
96 | const self = @fieldParentPtr(Self, "parser", parser);
97 |
98 | var v = std.hash_map.hashString("Sequence");
99 | for (self.input) |in_parser| {
100 | v +%= try in_parser.nodeName(node_name_cache);
101 | }
102 | return v;
103 | }
104 |
105 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void {
106 | const self = @fieldParentPtr(Self, "parser", parser);
107 | var ctx = in_ctx.with(self.input);
108 | defer ctx.results.close();
109 |
110 | // Invoke each child parser to produce each of our results. Each time we ask a child
111 | // parser to parse, it can produce a set of results (its result stream) which are
112 | // varying parse paths / interpretations, we take the first successful one.
113 |
114 | // Return early if we're not trying to parse anything (stream close signals to the
115 | // consumer there were no matches).
116 | if (self.input.len == 0) {
117 | return;
118 | }
119 |
120 | var buffer = try ctx.allocator.create(ResultStream(Result(V)));
121 | errdefer ctx.allocator.destroy(buffer);
122 | errdefer buffer.deinit();
123 | buffer.* = try ResultStream(Result(V)).init(ctx.allocator, ctx.key);
124 |
125 | var offset: usize = ctx.offset;
126 | for (self.input) |child_parser| {
127 | const child_node_name = try child_parser.nodeName(&in_ctx.memoizer.node_name_cache);
128 | var child_ctx = try in_ctx.initChild(V, child_node_name, offset);
129 | defer child_ctx.deinitChild();
130 | if (!child_ctx.existing_results) try child_parser.parse(&child_ctx);
131 |
132 | var num_local_values: usize = 0;
133 | var sub = child_ctx.subscribe();
134 | while (sub.next()) |next| {
135 | switch (next.result) {
136 | .err => {
137 | buffer.close();
138 | buffer.deinit();
139 | ctx.allocator.destroy(buffer);
140 | try ctx.results.add(Result(Value(V)).initError(next.offset, next.result.err));
141 | return;
142 | },
143 | else => {
144 | // TODO(slimsag): need path committal functionality
145 | if (num_local_values == 0) {
146 | // TODO(slimsag): if no consumption, could get stuck forever!
147 | offset = next.offset;
148 | try buffer.add(next.toUnowned());
149 | }
150 | num_local_values += 1;
151 | },
152 | }
153 | }
154 | }
155 | buffer.close();
156 | try ctx.results.add(Result(Value(V)).init(offset, .{ .results = buffer }));
157 | }
158 | };
159 | }
160 |
161 | test "sequence" {
162 | nosuspend {
163 | const allocator = testing.allocator;
164 |
165 | const Payload = void;
166 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abc123abc456_123abc", {});
167 | defer ctx.deinit();
168 |
169 | var seq = try Sequence(Payload, LiteralValue).init(allocator, &.{
170 | (try Literal(Payload).init(allocator, "abc")).ref(),
171 | (try Literal(Payload).init(allocator, "123ab")).ref(),
172 | (try Literal(Payload).init(allocator, "c45")).ref(),
173 | (try Literal(Payload).init(allocator, "6")).ref(),
174 | }, .borrowed);
175 | defer seq.deinit(allocator, null);
176 | try seq.parse(&ctx);
177 |
178 | var sub = ctx.subscribe();
179 | var sequence = sub.next().?.result.value;
180 | try testing.expect(sub.next() == null); // stream closed
181 |
182 | var sequenceSub = sequence.results.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language"));
183 | try testing.expectEqual(@as(usize, 3), sequenceSub.next().?.offset);
184 | try testing.expectEqual(@as(usize, 8), sequenceSub.next().?.offset);
185 | try testing.expectEqual(@as(usize, 11), sequenceSub.next().?.offset);
186 | try testing.expectEqual(@as(usize, 12), sequenceSub.next().?.offset);
187 | try testing.expect(sequenceSub.next() == null); // stream closed
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/src/combn/combinator/sequence_ambiguous.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 | const ResultStream = gllparser.ResultStream;
8 | const PosKey = gllparser.PosKey;
9 | const ParserPath = gllparser.ParserPath;
10 |
11 | const Literal = @import("../parser/literal.zig").Literal;
12 | const LiteralValue = @import("../parser/literal.zig").Value;
13 | const MapTo = @import("mapto.zig").MapTo;
14 |
15 | const std = @import("std");
16 | const testing = std.testing;
17 | const mem = std.mem;
18 |
19 | pub fn Context(comptime Payload: type, comptime V: type) type {
20 | return []const *Parser(Payload, V);
21 | }
22 |
23 | /// Represents a sequence of parsed values.
24 | ///
25 | /// In the case of a non-ambiguous grammar, a `SequenceAmbiguous` combinator will yield:
26 | ///
27 | /// ```
28 | /// Value{
29 | /// node: value1,
30 | /// next: ResultStream(Value{
31 | /// node: value2,
32 | /// next: ...,
33 | /// })
34 | /// }
35 | /// ```
36 | ///
37 | /// In the case of an ambiguous grammar, it would yield streams with potentially multiple values
38 | /// (each representing one possible parse path / interpretation of the grammar):
39 | ///
40 | /// ```
41 | /// Value{
42 | /// node: value1,
43 | /// next: ResultStream(
44 | /// Value{
45 | /// node: value2variant1,
46 | /// next: ...,
47 | /// },
48 | /// Value{
49 | /// node: value2variant2,
50 | /// next: ...,
51 | /// },
52 | /// )
53 | /// }
54 | /// ```
55 | ///
56 | pub fn Value(comptime V: type) type {
57 | return struct {
58 | node: Result(V),
59 | next: *ResultStream(Result(@This())),
60 |
61 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
62 | self.next.deinit();
63 | self.node.deinit(allocator);
64 | allocator.destroy(self.next);
65 | }
66 |
67 | pub fn flatten(self: *const @This(), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!ResultStream(Result(V)) {
68 | var dst = try ResultStream(Result(V)).init(allocator, subscriber);
69 | try self.flatten_into(&dst, allocator, subscriber, path);
70 | dst.close(); // TODO(slimsag): why does deferring this not work?
71 | return dst;
72 | }
73 |
74 | pub fn flatten_into(self: *const @This(), dst: *ResultStream(Result(V)), allocator: mem.Allocator, subscriber: PosKey, path: ParserPath) Error!void {
75 | try dst.add(self.node.toUnowned());
76 |
77 | var sub = self.next.subscribe(subscriber, path, Result(Value(V)).initError(0, "matches only the empty language"));
78 | nosuspend {
79 | while (sub.next()) |next_path| {
80 | switch (next_path.result) {
81 | .err => try dst.add(Result(V).initError(next_path.offset, next_path.result.err)),
82 | else => try next_path.result.value.flatten_into(dst, allocator, subscriber, path),
83 | }
84 | }
85 | }
86 | }
87 | };
88 | }
89 |
90 | pub const Ownership = enum {
91 | borrowed,
92 | owned,
93 | copy,
94 | };
95 |
96 | /// Matches the `input` parsers sequentially. The parsers must produce the same data type (use
97 | /// MapTo, if needed.)
98 | ///
99 | /// The `input` parsers must remain alive for as long as the `SequenceAmbiguous` parser will be used.
100 | pub fn SequenceAmbiguous(comptime Payload: type, comptime V: type) type {
101 | return struct {
102 | parser: Parser(Payload, Value(V)) = Parser(Payload, Value(V)).init(parse, nodeName, deinit, countReferencesTo),
103 | input: Context(Payload, V),
104 | ownership: Ownership,
105 |
106 | const Self = @This();
107 |
108 | pub fn init(allocator: mem.Allocator, input: Context(Payload, V), ownership: Ownership) !*Parser(Payload, Value(V)) {
109 | var self = Self{ .input = input, .ownership = ownership };
110 | if (ownership == .copy) {
111 | const Elem = std.meta.Elem(@TypeOf(input));
112 | var copy = try allocator.alloc(Elem, input.len);
113 | std.mem.copy(Elem, copy, input);
114 | self.input = copy;
115 | self.ownership = .owned;
116 | }
117 | return try self.parser.heapAlloc(allocator, self);
118 | }
119 |
120 | pub fn initStack(input: Context(Payload, V), ownership: Ownership) Self {
121 | if (ownership == Ownership.copy) unreachable;
122 | return Self{ .input = input, .ownership = ownership };
123 | }
124 |
125 | pub fn deinit(parser: *Parser(Payload, Value(V)), allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
126 | const self = @fieldParentPtr(Self, "parser", parser);
127 | for (self.input) |child_parser| {
128 | child_parser.deinit(allocator, freed);
129 | }
130 | if (self.ownership == .owned) allocator.free(self.input);
131 | }
132 |
133 | pub fn countReferencesTo(parser: *const Parser(Payload, Value(V)), other: usize, freed: *std.AutoHashMap(usize, void)) usize {
134 | const self = @fieldParentPtr(Self, "parser", parser);
135 | if (@ptrToInt(parser) == other) return 1;
136 | var count: usize = 0;
137 | for (self.input) |in_parser| {
138 | count += in_parser.countReferencesTo(other, freed);
139 | }
140 | return count;
141 | }
142 |
143 | pub fn nodeName(parser: *const Parser(Payload, Value(V)), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
144 | const self = @fieldParentPtr(Self, "parser", parser);
145 |
146 | var v = std.hash_map.hashString("SequenceAmbiguous");
147 | for (self.input) |in_parser| {
148 | v +%= try in_parser.nodeName(node_name_cache);
149 | }
150 | return v;
151 | }
152 |
153 | pub fn parse(parser: *const Parser(Payload, Value(V)), in_ctx: *const ParserContext(Payload, Value(V))) callconv(.Async) Error!void {
154 | const self = @fieldParentPtr(Self, "parser", parser);
155 | var ctx = in_ctx.with(self.input);
156 | defer ctx.results.close();
157 |
158 | if (self.input.len == 0) {
159 | return;
160 | }
161 |
162 | // For a sequence of input parsers [A, B, C], each one may produce multiple different
163 | // possible parser paths (valid interpretations of the same input state) in the case of
164 | // an ambiguous grammar. For example, the sequence of parsers [A, B, C] where each
165 | // produces 2 possible parser paths (e.g. A1, A2) we need to emit:
166 | //
167 | // stream(
168 | // (A1, stream(
169 | // (B1, stream(
170 | // (C1, None),
171 | // (C2, None),
172 | // )),
173 | // (B2, stream(
174 | // (C1, None),
175 | // (C2, None),
176 | // )),
177 | // )),
178 | // (A2, stream(
179 | // (B1, stream(
180 | // (C1, None),
181 | // (C2, None),
182 | // )),
183 | // (B2, stream(
184 | // (C1, None),
185 | // (C2, None),
186 | // )),
187 | // )),
188 | // )
189 | //
190 | // This call to `SequenceAmbiguous.parse` is only responsible for emitting the top level
191 | // (A1, A2) and invoking SequenceAmbiguous(next) to produce the associated `stream()` for those
192 | // parse states.
193 | const child_node_name = try self.input[0].nodeName(&in_ctx.memoizer.node_name_cache);
194 | var child_ctx = try in_ctx.initChild(V, child_node_name, ctx.offset);
195 | defer child_ctx.deinitChild();
196 | if (!child_ctx.existing_results) try self.input[0].parse(&child_ctx);
197 |
198 | // For every top-level value (A1, A2 in our example above.)
199 | var sub = child_ctx.subscribe();
200 | while (sub.next()) |top_level| {
201 | switch (top_level.result) {
202 | .err => {
203 | try ctx.results.add(Result(Value(V)).initError(top_level.offset, top_level.result.err));
204 | continue;
205 | },
206 | else => {
207 | // We got a non-error top-level value (e.g. A1, A2).
208 |
209 | // Now get the stream that continues down this path (i.e. the stream
210 | // associated with A1, A2.)
211 | var path_results = try ctx.allocator.create(ResultStream(Result(Value(V))));
212 | path_results.* = try ResultStream(Result(Value(V))).init(ctx.allocator, ctx.key);
213 | var path = SequenceAmbiguous(Payload, V).initStack(self.input[1..], .borrowed);
214 | const path_node_name = try path.parser.nodeName(&in_ctx.memoizer.node_name_cache);
215 | var path_ctx = try in_ctx.initChild(Value(V), path_node_name, top_level.offset);
216 | defer path_ctx.deinitChild();
217 | if (!path_ctx.existing_results) try path.parser.parse(&path_ctx);
218 | var path_results_sub = path_ctx.subscribe();
219 | while (path_results_sub.next()) |next| {
220 | try path_results.add(next.toUnowned());
221 | }
222 | path_results.close();
223 |
224 | // Emit our top-level value tuple (e.g. (A1, stream(...))
225 | try ctx.results.add(Result(Value(V)).init(top_level.offset, .{
226 | .node = top_level.toUnowned(),
227 | .next = path_results,
228 | }));
229 | },
230 | }
231 | }
232 | }
233 | };
234 | }
235 |
236 | test "sequence" {
237 | nosuspend {
238 | const allocator = testing.allocator;
239 |
240 | const Payload = void;
241 | const ctx = try ParserContext(Payload, Value(LiteralValue)).init(allocator, "abc123abc456_123abc", {});
242 | defer ctx.deinit();
243 |
244 | var seq = try SequenceAmbiguous(Payload, LiteralValue).init(allocator, &.{
245 | (try Literal(Payload).init(allocator, "abc")).ref(),
246 | (try Literal(Payload).init(allocator, "123ab")).ref(),
247 | (try Literal(Payload).init(allocator, "c45")).ref(),
248 | (try Literal(Payload).init(allocator, "6")).ref(),
249 | }, .borrowed);
250 | defer seq.deinit(allocator, null);
251 | try seq.parse(&ctx);
252 |
253 | var sub = ctx.subscribe();
254 | var list = sub.next();
255 | try testing.expect(sub.next() == null); // stream closed
256 |
257 | // first element
258 | try testing.expectEqual(@as(usize, 3), list.?.offset);
259 | try testing.expectEqual(@as(usize, 3), list.?.result.value.node.offset);
260 |
261 | // flatten the nested multi-dimensional array, since our grammar above is not ambiguous
262 | // this is fine to do and makes testing far easier.
263 | var flattened = try list.?.result.value.flatten(allocator, ctx.key, ctx.path);
264 | defer flattened.deinit();
265 | var flat = flattened.subscribe(ctx.key, ctx.path, Result(LiteralValue).initError(ctx.offset, "matches only the empty language"));
266 | try testing.expectEqual(@as(usize, 3), flat.next().?.offset);
267 | try testing.expectEqual(@as(usize, 8), flat.next().?.offset);
268 | try testing.expectEqual(@as(usize, 11), flat.next().?.offset);
269 | try testing.expectEqual(@as(usize, 12), flat.next().?.offset);
270 | try testing.expect(flat.next() == null); // stream closed
271 | }
272 | }
273 |
--------------------------------------------------------------------------------
/src/combn/combn.zig:
--------------------------------------------------------------------------------
1 | pub const combinator = @import("combinator/combinator.zig");
2 | pub const gllparser = @import("gllparser/gllparser.zig");
3 | pub const parser = @import("parser/parser.zig");
4 |
5 | usingnamespace @import("test_complex.zig");
6 |
--------------------------------------------------------------------------------
/src/combn/gllparser/ParserPath.zig:
--------------------------------------------------------------------------------
1 | //! Maintains the path which a parser took, i.e. which parser states were taken
2 | //! and in which order by maintaining a stack of parser position keys.
3 |
4 | stack: std.atomic.Stack(PosKey),
5 |
6 | const std = @import("std");
7 | const mem = std.mem;
8 | const PosKey = @import("parser.zig").PosKey;
9 | const ParserPath = @This();
10 |
11 | pub fn init() ParserPath {
12 | return .{
13 | .stack = std.atomic.Stack(PosKey).init(),
14 | };
15 | }
16 |
17 | pub fn deinit(self: ParserPath, allocator: mem.Allocator) void {
18 | var next = self.stack.root;
19 | while (next != null) {
20 | const tmp = next.?.next;
21 | allocator.destroy(next.?);
22 | next = tmp;
23 | }
24 | }
25 |
26 | pub fn push(self: *ParserPath, key: PosKey, allocator: mem.Allocator) !void {
27 | const Node = std.atomic.Stack(PosKey).Node;
28 | const pathNode = try allocator.create(Node);
29 | pathNode.* = .{
30 | .next = undefined,
31 | .data = key,
32 | };
33 | self.stack.push(pathNode);
34 | }
35 |
36 | pub fn clone(self: ParserPath, allocator: mem.Allocator) !ParserPath {
37 | var new = ParserPath.init();
38 | var next = self.stack.root;
39 | while (next != null) : (next = next.?.next) {
40 | try new.push(next.?.data, allocator);
41 | }
42 | return new;
43 | }
44 |
45 | pub fn contains(self: ParserPath, key: PosKey) bool {
46 | var next = self.stack.root;
47 | const eql = std.hash_map.getAutoEqlFn(PosKey, void);
48 | while (next != null) : (next = next.?.next) {
49 | if (eql({}, next.?.data, key)) return true;
50 | }
51 | return false;
52 | }
53 |
54 | pub fn print(self: ParserPath) void {
55 | var next = self.stack.root;
56 | std.debug.print("PATH", .{});
57 | while (next != null) : (next = next.?.next) {
58 | std.debug.print(" -> {}", .{next.?.data});
59 | }
60 | std.debug.print("\n", .{});
61 | }
62 |
--------------------------------------------------------------------------------
/src/combn/gllparser/gllparser.zig:
--------------------------------------------------------------------------------
1 | pub const ParserPath = @import("ParserPath.zig");
2 |
3 | pub const parser = @import("parser.zig");
4 | pub const Error = parser.Error;
5 | pub const ResultTag = parser.ResultTag;
6 | pub const Result = parser.Result;
7 | pub const PosKey = parser.PosKey;
8 | pub const NodeName = parser.NodeName;
9 | pub const Context = parser.Context;
10 | pub const Parser = parser.Parser;
11 |
12 | pub const result_stream = @import("result_stream.zig");
13 | pub const ResultStream = result_stream.ResultStream;
14 |
--------------------------------------------------------------------------------
/src/combn/gllparser/parser.zig:
--------------------------------------------------------------------------------
1 | const ResultStream = @import("result_stream.zig").ResultStream;
2 | const Iterator = @import("result_stream.zig").Iterator;
3 | const ParserPath = @import("ParserPath.zig");
4 |
5 | const std = @import("std");
6 | const testing = std.testing;
7 | const mem = std.mem;
8 |
9 | pub const Error = error{OutOfMemory};
10 |
11 | pub const ResultTag = enum {
12 | value,
13 | err,
14 | };
15 |
16 | /// deinitOptional invokes value.deinit(allocator), taking into account it being an optional
17 | /// `?Value`, `??Value`, etc.
18 | pub inline fn deinitOptional(value: anytype, allocator: mem.Allocator) void {
19 | switch (@typeInfo(@TypeOf(value))) {
20 | .Optional => if (value) |v| return deinitOptional(v, allocator),
21 | else => value.deinit(allocator),
22 | }
23 | }
24 |
25 | /// A parser result, one of:
26 | ///
27 | /// 1. A `value` and new `offset` into the input `src`.
28 | /// 2. An `err` and new `offset` ito the input `src` ((i.e. position of error).
29 | ///
30 | /// A Result always knows how to `deinit` itself.
31 | pub fn Result(comptime Value: type) type {
32 | return struct {
33 | offset: usize,
34 | result: union(ResultTag) {
35 | value: Value,
36 | err: []const u8,
37 | },
38 | owned: bool,
39 |
40 | pub fn init(offset: usize, value: Value) @This() {
41 | return .{
42 | .offset = offset,
43 | .result = .{ .value = value },
44 | .owned = true,
45 | };
46 | }
47 |
48 | pub fn deinit(self: @This(), allocator: mem.Allocator) void {
49 | if (!self.owned) return;
50 | switch (self.result) {
51 | .value => |value| {
52 | deinitOptional(value, allocator);
53 | },
54 | else => {},
55 | }
56 | }
57 |
58 | pub fn toUnowned(self: @This()) @This() {
59 | var tmp = self;
60 | tmp.owned = false;
61 | return tmp;
62 | }
63 |
64 | pub fn initError(offset: usize, err: []const u8) @This() {
65 | return .{
66 | .offset = offset,
67 | .result = .{ .err = err },
68 | .owned = false,
69 | };
70 | }
71 | };
72 | }
73 |
74 | const MemoizeValue = struct {
75 | results: usize, // untyped pointer *ResultStream(Result(Value))
76 | deinit: fn (results: usize, allocator: mem.Allocator) void,
77 | };
78 |
79 | fn MemoizedResult(comptime Value: type) type {
80 | return struct {
81 | results: *ResultStream(Result(Value)),
82 | was_cached: bool,
83 | };
84 | }
85 |
86 | /// A key describing a parser node at a specific position in an input string, as well as the number
87 | /// of times it reentrantly called itself at that exact position.
88 | const ParserPosDepthKey = struct {
89 | pos_key: PosKey,
90 | reentrant_depth: usize,
91 | };
92 |
93 | /// Describes the exact string and offset into it that a parser node is parsing.
94 | pub const PosKey = struct {
95 | node_name: NodeName,
96 | src_ptr: usize,
97 | offset: usize,
98 | };
99 |
100 | /// The name of a parser node. This includes hashes of:
101 | ///
102 | /// * The parser's type name (e.g. "MapTo", "Sequence", etc.)
103 | /// * The actual parser inputs (e.g. the list of parsers to match in a Sequence parser, or for a
104 | /// MapTo parser the input parser to match and the actual function that does mapping.)
105 | ///
106 | /// It is enough to distinctly represent a _single node in the parser graph._ Note that it is NOT
107 | /// the same as:
108 | ///
109 | /// * Identifying a singular parser instance (two parser instances with the same inputs will be
110 | /// "deduplicated" and have the same parser node name.)
111 | /// * Identifying a parser node at a particular position: the parser `offset` position and `src`
112 | /// string to parse are NOT parse of a parser node name, for that see `PosKey`.
113 | ///
114 | pub const NodeName = u64;
115 |
116 | /// Records a single recursion retry for a parser.
117 | const RecursionRetry = struct {
118 | /// The current reentrant depth of the parser.
119 | depth: usize,
120 |
121 | /// The maximum reentrant depth before this retry attempt will be stopped.
122 | max_depth: usize,
123 | };
124 |
125 | const Memoizer = struct {
126 | /// Parser position & reentrant depth key -> memoized results
127 | memoized: std.AutoHashMap(ParserPosDepthKey, MemoizeValue),
128 |
129 | /// *Parser(T, P) -> computed parser node name.
130 | node_name_cache: std.AutoHashMap(usize, NodeName),
131 |
132 | /// Maps position key -> the currently active recursion retry attempt, if any.
133 | recursion: std.AutoHashMap(PosKey, RecursionRetry),
134 |
135 | /// Memoized values to cleanup later, because freeing them inside a reentrant parser
136 | /// invocation is not possible as the parent still intends to use it.
137 | ///
138 | /// TODO(slimsag): consider something like reference counting here to reduce memory
139 | /// footprint.
140 | deferred_cleanups: std.ArrayList(MemoizeValue),
141 |
142 | /// Tells if the given parser node is currently being retried at different maximum reentrant
143 | /// depths as part of a Reentrant combinator.
144 | pub fn isRetrying(self: *@This(), key: PosKey) bool {
145 | const recursion = self.recursion.get(key);
146 | if (recursion == null) return false;
147 | return true;
148 | }
149 |
150 | fn clearPastRecursions(self: *@This(), parser: PosKey, new_max_depth: usize) !void {
151 | var i: usize = 0;
152 | while (i <= new_max_depth) : (i += 1) {
153 | const k = ParserPosDepthKey{
154 | .pos_key = parser,
155 | .reentrant_depth = i,
156 | };
157 | if (self.memoized.get(k)) |memoized| try self.deferred_cleanups.append(memoized);
158 | _ = self.memoized.remove(k);
159 | }
160 | }
161 |
162 | pub fn get(self: *@This(), comptime Value: type, allocator: mem.Allocator, parser_path: ParserPath, parser: PosKey, new_max_depth: ?usize) !MemoizedResult(Value) {
163 | // We memoize results for each unique ParserPosDepthKey, meaning that a parser node can be
164 | // invoked to parse a specific input string at a specific offset recursively in a reentrant
165 | // way up to a maximum depth (new_max_depth). This enables our GLL parser to handle grammars
166 | // that are left-recursive, such as:
167 | //
168 | // ```ebnf
169 | // Expr = Expr?, "abc" ;
170 | // Grammar = Expr ;
171 | // ```
172 | //
173 | // Where an input string "abcabcabc" would require `Expr` be parsed at offset=0 in the
174 | // input string multiple times. How many times? We start out with a maximum reentry depth
175 | // of zero, and if we determine that the parsing is cyclic (a ResultStream subscriber is in
176 | // fact itself the source) we consider that parse path as failed (it matches only the empty
177 | // language) and retry with a new_max_depth of N+1 and retry the whole parse path,
178 | // repeating this process until eventually we find the parsing is not cyclic.
179 | //
180 | // It is important to note that this is for handling reentrant parsing _at the same exact
181 | // offset position in the input string_, the GLL parsing algorithm itself handles left
182 | // recursive and right recursive parsing fine on its own, as long as the parse position is
183 | // changing, but many implementations cannot handle reentrant parsing at the same exact
184 | // offset position in the input string (I am unsure if this is by design, or a limitation
185 | // of the implementations themselves). Packrattle[1] which uses an "optimized" GLL parsing
186 | // algorithm (memoization is localized to parse nodes) is the closest to our algorithm, and
187 | // can handle this type of same-position left recursion in some instances such as with:
188 | //
189 | // ```ebnf
190 | // Expr = Expr?, "abc" ;
191 | // Grammar = Expr, EOF ;
192 | // ```
193 | //
194 | // However, it does so using a _globalized_ retry mechanism[2] which in this event resets
195 | // the entire parser back to an earlier point in time, only if the overall parse failed.
196 | // This also coincidently means that if the `EOF` matcher is removed (`Grammar = Expr ;`)
197 | // then `Expr` matching becomes "non-greedy" matching just one "abc" value instead of all
198 | // three as when the EOF matcher is in place.
199 | //
200 | // Our implementation here uses node-localized retries, which makes us not subject to the
201 | // same bug as packrattle and more optimized (the entire parse need not fail for us to
202 | // detect and retry in this case, we do so exactly at the reentrant parser node itself.)
203 | //
204 | // [1] https://github.com/robey/packrattle
205 | // [2] https://github.com/robey/packrattle/blob/3db99f2d87abdddb9d29a0d0cf86e272c59d4ddb/src/packrattle/engine.js#L137-L177
206 | //
207 | var reentrant_depth: usize = 0;
208 | const recursionEntry = self.recursion.get(parser);
209 | if (recursionEntry) |entry| {
210 | if (new_max_depth != null) {
211 | // Existing entry, but we want to retry with a new_max_depth;
212 | reentrant_depth = new_max_depth.?;
213 | try self.recursion.put(parser, .{ .depth = new_max_depth.?, .max_depth = new_max_depth.? });
214 | try self.clearPastRecursions(parser, new_max_depth.?);
215 | } else {
216 | // Existing entry, so increment the depth and continue.
217 | var depth = entry.depth;
218 | if (depth > 0) {
219 | depth -= 1;
220 | }
221 | try self.recursion.put(parser, .{ .depth = depth, .max_depth = entry.max_depth });
222 | reentrant_depth = depth;
223 | }
224 | } else if (new_max_depth != null) {
225 | // No existing entry, want to retry with new_max_depth.
226 | reentrant_depth = new_max_depth.?;
227 | try self.recursion.put(parser, .{ .depth = new_max_depth.?, .max_depth = new_max_depth.? });
228 | try self.clearPastRecursions(parser, new_max_depth.?);
229 | } else {
230 | // No existing entry, but a distant parent parser may be retrying with a max depth that
231 | // we should respect.
232 | var next_node = parser_path.stack.root;
233 | while (next_node) |next| {
234 | const parentRecursionEntry = self.recursion.get(next.data);
235 | if (parentRecursionEntry) |parent_entry| {
236 | reentrant_depth = parent_entry.depth;
237 | try self.clearPastRecursions(parser, parent_entry.max_depth);
238 | break;
239 | }
240 | next_node = next.next;
241 | }
242 | }
243 |
244 | // Do we have an existing result stream for this key?
245 | const m = try self.memoized.getOrPut(ParserPosDepthKey{
246 | .pos_key = parser,
247 | .reentrant_depth = reentrant_depth,
248 | });
249 | if (!m.found_existing) {
250 | // Create a new result stream for this key.
251 | var results = try allocator.create(ResultStream(Result(Value)));
252 | results.* = try ResultStream(Result(Value)).init(allocator, parser);
253 | m.value_ptr.* = MemoizeValue{
254 | .results = @ptrToInt(results),
255 | .deinit = struct {
256 | fn deinit(_resultsPtr: usize, _allocator: mem.Allocator) void {
257 | var _results = @intToPtr(*ResultStream(Result(Value)), _resultsPtr);
258 | _results.deinit();
259 | _allocator.destroy(_results);
260 | }
261 | }.deinit,
262 | };
263 | }
264 | return MemoizedResult(Value){
265 | .results = @intToPtr(*ResultStream(Result(Value)), m.value_ptr.results),
266 | .was_cached = m.found_existing,
267 | };
268 | }
269 |
270 | pub fn init(allocator: mem.Allocator) !*@This() {
271 | var self = try allocator.create(@This());
272 | self.* = .{
273 | .memoized = std.AutoHashMap(ParserPosDepthKey, MemoizeValue).init(allocator),
274 | .node_name_cache = std.AutoHashMap(usize, NodeName).init(allocator),
275 | .recursion = std.AutoHashMap(PosKey, RecursionRetry).init(allocator),
276 | .deferred_cleanups = std.ArrayList(MemoizeValue).init(allocator),
277 | };
278 | return self;
279 | }
280 |
281 | pub fn deinit(self: *@This(), allocator: mem.Allocator) void {
282 | var iter = self.memoized.iterator();
283 | while (iter.next()) |memoized| {
284 | memoized.value_ptr.deinit(memoized.value_ptr.results, allocator);
285 | }
286 | self.memoized.deinit();
287 | self.node_name_cache.deinit();
288 | self.recursion.deinit();
289 | for (self.deferred_cleanups.items) |item| {
290 | item.deinit(item.results, allocator);
291 | }
292 | self.deferred_cleanups.deinit();
293 | allocator.destroy(self);
294 | }
295 | };
296 |
297 | /// Describes context to be given to a `Parser`, such as `input` parameters, an `allocator`, and
298 | /// the actual `src` to parse.
299 | pub fn Context(comptime Input: type, comptime Value: type) type {
300 | return struct {
301 | input: Input,
302 | allocator: mem.Allocator,
303 | src: []const u8,
304 | offset: usize,
305 | results: *ResultStream(Result(Value)),
306 | existing_results: bool,
307 | memoizer: *Memoizer,
308 | key: PosKey,
309 | path: ParserPath,
310 |
311 | pub fn init(allocator: mem.Allocator, src: []const u8, input: Input) !@This() {
312 | var src_ptr: usize = 0;
313 | if (src.len > 0) {
314 | src_ptr = @ptrToInt(&src[0]);
315 | }
316 | const key = .{
317 | .node_name = 0,
318 | .src_ptr = src_ptr,
319 | .offset = 0,
320 | };
321 |
322 | var results = try allocator.create(ResultStream(Result(Value)));
323 | results.* = try ResultStream(Result(Value)).init(allocator, key);
324 | return @This(){
325 | .input = input,
326 | .allocator = allocator,
327 | .src = src,
328 | .offset = 0,
329 | .results = results,
330 | .existing_results = false,
331 | .memoizer = try Memoizer.init(allocator),
332 | .key = key,
333 | .path = ParserPath.init(),
334 | };
335 | }
336 |
337 | pub fn initChild(self: @This(), comptime NewValue: type, node_name: NodeName, offset: usize) !Context(Input, NewValue) {
338 | return self.initChildRetry(NewValue, node_name, offset, null);
339 | }
340 |
341 | /// initChildRetry initializes a child context to be used as a single retry attempt with a
342 | /// new maximum depth of reentrant parser invocations for the child and all of its
343 | /// children.
344 | pub fn initChildRetry(self: @This(), comptime NewValue: type, node_name: NodeName, offset: usize, max_depth: ?usize) !Context(Input, NewValue) {
345 | var src_ptr: usize = 0;
346 | if (self.src.len > 0) {
347 | src_ptr = @ptrToInt(&self.src[0]);
348 | }
349 | const key = PosKey{
350 | .node_name = node_name,
351 | .src_ptr = src_ptr,
352 | .offset = offset,
353 | };
354 | var child_ctx = Context(Input, NewValue){
355 | .input = self.input,
356 | .allocator = self.allocator,
357 | .src = self.src,
358 | .offset = offset,
359 | .results = undefined,
360 | .existing_results = false,
361 | .memoizer = self.memoizer,
362 | .key = key,
363 | .path = try self.path.clone(self.allocator),
364 | };
365 | try child_ctx.path.push(child_ctx.key, self.allocator);
366 |
367 | var memoized = try self.memoizer.get(NewValue, self.allocator, child_ctx.path, key, max_depth);
368 | child_ctx.results = memoized.results;
369 | if (memoized.was_cached) {
370 | child_ctx.existing_results = true;
371 | }
372 | return child_ctx;
373 | }
374 |
375 | /// isRetrying tells if this context represents a retry initiated previously via
376 | /// initChildRetry, potentially by a distant parent recursive call, indicating that a new
377 | /// reentrant retry should not be attempted.
378 | pub fn isRetrying(self: @This(), node_name: NodeName, offset: usize) bool {
379 | var src_ptr: usize = 0;
380 | if (self.src.len > 0) {
381 | src_ptr = @ptrToInt(&self.src[0]);
382 | }
383 | return self.memoizer.isRetrying(PosKey{
384 | .node_name = node_name,
385 | .src_ptr = src_ptr,
386 | .offset = offset,
387 | });
388 | }
389 |
390 | /// Subscribe to the results from this context. The caller owns the values and is
391 | /// responsible for calling `deinit` on each.
392 | pub fn subscribe(self: @This()) Iterator(Result(Value)) {
393 | return self.results.subscribe(
394 | self.key,
395 | self.path,
396 | Result(Value).initError(self.offset, "matches only the empty language"),
397 | );
398 | }
399 |
400 | pub fn with(self: @This(), new_input: anytype) Context(@TypeOf(new_input), Value) {
401 | return Context(@TypeOf(new_input), Value){
402 | .input = new_input,
403 | .allocator = self.allocator,
404 | .src = self.src,
405 | .offset = self.offset,
406 | .results = self.results,
407 | .existing_results = self.existing_results,
408 | .memoizer = self.memoizer,
409 | .key = self.key,
410 | .path = self.path,
411 | };
412 | }
413 |
414 | pub fn deinit(self: *const @This()) void {
415 | self.results.deinit();
416 | self.allocator.destroy(self.results);
417 | self.memoizer.deinit(self.allocator);
418 | self.path.deinit(self.allocator);
419 | return;
420 | }
421 |
422 | pub fn deinitChild(self: @This()) void {
423 | self.path.deinit(self.allocator);
424 | return;
425 | }
426 | };
427 | }
428 |
429 | /// An interface whose implementation can be swapped out at runtime. It carries an arbitrary
430 | /// `Context` to make the type signature generic, and produces a `Value` of the given type which
431 | /// may vary from parser to parser.
432 | ///
433 | /// The `Payload` type is used to denote a payload of a single type which is typically passed
434 | /// through all parsers in a grammar. Parser and parser combinator implementations should always
435 | /// allow the user to specify this type, and should generally avoid changing the type or using it
436 | /// for their own purposes unless they are e.g. deferring parsing to another language grammar
437 | /// entirely.
438 | pub fn Parser(comptime Payload: type, comptime Value: type) type {
439 | return struct {
440 | const Self = @This();
441 | _parse: fn (self: *const Self, ctx: *const Context(Payload, Value)) callconv(.Async) Error!void,
442 | _nodeName: fn (self: *const Self, node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64,
443 | _deinit: ?fn (self: *Self, allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void,
444 | _countReferencesTo: ?fn (self: *const Self, other: usize, freed: *std.AutoHashMap(usize, void)) usize,
445 | _heap_storage: ?[]u8,
446 | _refs: usize,
447 |
448 | pub fn init(
449 | parseImpl: fn (self: *const Self, ctx: *const Context(Payload, Value)) callconv(.Async) Error!void,
450 | nodeNameImpl: fn (self: *const Self, node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64,
451 | deinitImpl: ?fn (self: *Self, allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void,
452 | countReferencesToImpl: ?fn (self: *const Self, other: usize, freed: *std.AutoHashMap(usize, void)) usize,
453 | ) Self {
454 | return .{
455 | ._parse = parseImpl,
456 | ._nodeName = nodeNameImpl,
457 | ._deinit = deinitImpl,
458 | ._countReferencesTo = countReferencesToImpl,
459 | ._heap_storage = null,
460 | ._refs = 0,
461 | };
462 | }
463 |
464 | /// Allocates and stores the `parent` value (e.g. `Literal(...).init(...)` on the heap,
465 | /// turning this `Parser` into a heap-allocated one. Returned is a poiner to the
466 | /// heap-allocated `&parent.parser`.
467 | pub fn heapAlloc(self: *const Self, allocator: mem.Allocator, parent: anytype) !*Self {
468 | _ = self;
469 | const Parent = @TypeOf(parent);
470 | var memory = try allocator.allocAdvanced(u8, @alignOf(Parent), @sizeOf(Parent), mem.Allocator.Exact.at_least);
471 | var parent_ptr = @ptrCast(*Parent, &memory[0]);
472 | parent_ptr.* = parent;
473 | parent_ptr.parser._heap_storage = memory;
474 | return &parent_ptr.parser;
475 | }
476 |
477 | pub fn ref(self: *Self) *Self {
478 | self._refs += 1;
479 | return self;
480 | }
481 |
482 | pub fn countReferencesTo(self: *Self, other: usize, freed: *std.AutoHashMap(usize, void)) usize {
483 | if (freed.contains(@ptrToInt(self))) return 0;
484 | return if (self._countReferencesTo) |countRefs| countRefs(self, other, freed) else 0;
485 | }
486 |
487 | pub fn deinit(self: *Self, allocator: mem.Allocator, freed: ?*std.AutoHashMap(usize, void)) void {
488 | var freed_parsers = if (freed) |f| f else &std.AutoHashMap(usize, void).init(allocator);
489 | if (freed_parsers.contains(@ptrToInt(self))) {
490 | if (freed == null) {
491 | freed_parsers.deinit();
492 | }
493 | return;
494 | }
495 | if (self._refs > 0) self._refs -= 1;
496 | if (self._refs == 0 or self._refs == self.countReferencesTo(@ptrToInt(self), freed_parsers)) {
497 | freed_parsers.put(@ptrToInt(self), .{}) catch unreachable;
498 | self._refs = 0;
499 | if (self._deinit) |dfn| {
500 | dfn(self, allocator, freed_parsers);
501 | }
502 | if (self._heap_storage) |s| {
503 | allocator.free(s);
504 | }
505 | }
506 | if (freed == null) {
507 | freed_parsers.deinit();
508 | }
509 | }
510 |
511 | pub fn parse(self: *const Self, ctx: *const Context(Payload, Value)) callconv(.Async) Error!void {
512 | var frame = try std.heap.page_allocator.allocAdvanced(u8, 16, @frameSize(self._parse), std.mem.Allocator.Exact.at_least);
513 | defer std.heap.page_allocator.free(frame);
514 | return try await @asyncCall(frame, {}, self._parse, .{ self, ctx });
515 | }
516 |
517 | pub fn nodeName(self: *const Self, node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
518 | var v = try node_name_cache.getOrPut(@ptrToInt(self));
519 | if (!v.found_existing) {
520 | v.value_ptr.* = 1337; // "currently calculating" code
521 | const calculated = try self._nodeName(self, node_name_cache);
522 |
523 | // If self._nodeName added more entries to node_name_cache, ours is now potentially invalid.
524 | var vv = node_name_cache.getEntry(@ptrToInt(self));
525 | vv.?.value_ptr.* = calculated;
526 | return calculated;
527 | }
528 | if (v.value_ptr.* == 1337) {
529 | return 0; // reentrant, don't bother trying to calculate any more recursively
530 | }
531 | return v.value_ptr.*;
532 | }
533 | };
534 | }
535 |
536 | test "syntax" {
537 | _ = Parser(void, []u8);
538 | }
539 |
540 | test "heap_parser" {
541 | nosuspend {
542 | const Literal = @import("../parser/literal.zig").Literal;
543 | const LiteralValue = @import("../parser/literal.zig").Value;
544 |
545 | const allocator = testing.allocator;
546 |
547 | const Payload = void;
548 | var ctx = try Context(Payload, LiteralValue).init(allocator, "hello world", {});
549 | defer ctx.deinit();
550 |
551 | // The parser we'll store on the heap.
552 | var want = "hello";
553 | var literal_parser = Literal(Payload).initStack(want);
554 |
555 | // Move to heap.
556 | var heap_parser = try literal_parser.parser.heapAlloc(allocator, literal_parser);
557 | defer heap_parser.deinit(allocator, null);
558 |
559 | // Use it.
560 | try heap_parser.parse(&ctx);
561 |
562 | var sub = ctx.subscribe();
563 | var first = sub.next().?;
564 | defer first.deinit(ctx.allocator);
565 | try testing.expectEqual(Result(LiteralValue).init(want.len, .{ .value = "hello" }), first);
566 | try testing.expect(sub.next() == null);
567 | }
568 | }
569 |
--------------------------------------------------------------------------------
/src/combn/gllparser/result_stream.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const testing = std.testing;
3 | const mem = std.mem;
4 | const ParserPath = @import("ParserPath.zig");
5 | const PosKey = @import("parser.zig").PosKey;
6 | const deinitOptional = @import("parser.zig").deinitOptional;
7 |
8 | /// A ResultStream iterator.
9 | pub fn Iterator(comptime T: type) type {
10 | return struct {
11 | stream: *ResultStream(T),
12 | index: usize = 0,
13 | subscriber: PosKey,
14 | path: ParserPath,
15 | cyclic_closed: bool = false,
16 | cyclic_error: ?T,
17 |
18 | const Self = @This();
19 |
20 | /// Gets the next value, or null if the end of values has been reached.
21 | ///
22 | /// If the next value is not yet available, the frame is suspended and will be resumed once
23 | /// a new value is added.
24 | pub fn next(self: *Self) callconv(.Async) ?T {
25 | if (self.stream.past_values.items.len == 0 or self.index >= self.stream.past_values.items.len) {
26 | if (self.stream.closed or self.cyclic_closed or self.cyclic_error == null) {
27 | return null; // no more results
28 | }
29 | if (self.path.contains(self.subscriber)) {
30 | // The parser waiting on these results (self.subscriber) is itself a part of
31 | // a larger path of parsers which depend on this result in order to produce a
32 | // result. This indicates a cyclic grammar which parses the empty language,
33 | // e.g. in the most simple form:
34 | //
35 | // Expr = Expr;
36 | // Grammar = Expr;
37 | //
38 | // In practice it may be a more complex form; but regardless this means that
39 | // the subscriber should recieve no results.
40 | self.cyclic_closed = true;
41 | return self.cyclic_error.?;
42 | }
43 | // set ourselves up to be resumed later:
44 | self.stream.listeners.append(@frame()) catch unreachable;
45 | suspend {} // wait for more results, or stream close
46 | if (self.stream.closed) {
47 | return null; // no more results
48 | }
49 | }
50 | // return the next result
51 | const v = self.stream.past_values.items[self.index];
52 | self.index += 1;
53 | return v;
54 | }
55 | };
56 | }
57 |
58 | /// A stream of results from a parser.
59 | ///
60 | /// Listeners can be added at any time, and will recieve all past values upon
61 | /// subscription.
62 | ///
63 | /// New values can be added at any time.
64 | pub fn ResultStream(comptime T: type) type {
65 | return struct {
66 | past_values: std.ArrayList(T),
67 | listeners: std.ArrayList(anyframe),
68 | closed: bool,
69 | source: PosKey,
70 | allocator: mem.Allocator,
71 |
72 | const Self = @This();
73 |
74 | pub fn init(allocator: mem.Allocator, source: PosKey) !Self {
75 | return Self{
76 | .past_values = std.ArrayList(T).init(allocator),
77 | .listeners = std.ArrayList(anyframe).init(allocator),
78 | .closed = false,
79 | .source = source,
80 | .allocator = allocator,
81 | };
82 | }
83 |
84 | /// adds a value to the stream, resuming the frames of any pending listeners.
85 | ///
86 | /// Added values are owned by the result stream, subscribers borrow them and they are valid
87 | /// until the result stream is deinitialized - at which point `deinit(allocator)` is called
88 | /// on all values.
89 | ///
90 | /// Returns only once all pending listeners' frames have been resumed.
91 | pub fn add(self: *Self, value: T) !void {
92 | try self.past_values.append(value);
93 | for (self.listeners.items) |listener| {
94 | resume listener;
95 | }
96 | self.listeners.shrinkRetainingCapacity(0);
97 | }
98 |
99 | /// closes the stream, signaling the end and waiting for all pending listeners' frames to
100 | /// be resumed.
101 | pub fn close(self: *Self) void {
102 | self.closed = true;
103 | for (self.listeners.items) |listener| {
104 | resume listener;
105 | }
106 | self.listeners.shrinkRetainingCapacity(0);
107 | }
108 |
109 | /// deinitializes the stream, all future calls to add, subscribe, and usage of iterators is
110 | /// forbidden.
111 | ///
112 | /// All values in this result stream are deinitialized via a call to `v.deinit(allocator)`.
113 | ///
114 | /// `close` must be called before deinit.
115 | pub fn deinit(self: *const Self) void {
116 | for (self.past_values.items) |v| deinitOptional(v, self.allocator);
117 | self.past_values.deinit();
118 | self.listeners.deinit();
119 | }
120 |
121 | /// subscribes to all past and future values of the stream, producing an async iterator.
122 | ///
123 | /// Uses of the returned iterator are valid for as long as the result stream is not
124 | /// deinitialized.
125 | pub fn subscribe(self: *Self, subscriber: PosKey, path: ParserPath, cyclic_error: T) Iterator(T) {
126 | const iter = Iterator(T){
127 | .stream = self,
128 | .subscriber = subscriber,
129 | .path = path,
130 | .cyclic_error = cyclic_error,
131 | };
132 | return iter;
133 | }
134 | };
135 | }
136 |
137 | test "result_stream" {
138 | nosuspend {
139 | const value = struct {
140 | value: i32,
141 |
142 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
143 | _ = self;
144 | _ = allocator;
145 | }
146 | };
147 | const subscriber = PosKey{
148 | .node_name = 0,
149 | .src_ptr = 0,
150 | .offset = 0,
151 | };
152 | const source = subscriber;
153 | const path = ParserPath.init();
154 | var stream = try ResultStream(value).init(testing.allocator, source);
155 | defer stream.deinit();
156 |
157 | // Subscribe and begin to query a value (next() will suspend) before any values have been added
158 | // to the stream.
159 | var sub1 = stream.subscribe(subscriber, path, .{ .value = -1 });
160 | var sub1first = async sub1.next();
161 |
162 | // Add a value to the stream, our first subscription will get it.
163 | try stream.add(.{ .value = 1 });
164 | try testing.expectEqual(@as(i32, 1), (await sub1first).?.value);
165 |
166 | // Query the next value (next() will suspend again), then add a value and close the stream for
167 | // good.
168 | var sub1second = async sub1.next();
169 | try stream.add(.{ .value = 2 });
170 | stream.close();
171 |
172 | // Confirm we get the remaining values, and the null terminator forever after that.
173 | try testing.expectEqual(@as(i32, 2), (await sub1second).?.value);
174 | try testing.expectEqual(@as(?value, null), sub1.next());
175 | try testing.expectEqual(@as(?value, null), sub1.next());
176 |
177 | // Now that the stream is closed, add a new subscription and confirm we get all prior values.
178 | var sub2 = stream.subscribe(subscriber, path, .{ .value = -1 });
179 | try testing.expectEqual(@as(i32, 1), sub2.next().?.value);
180 | try testing.expectEqual(@as(i32, 2), sub2.next().?.value);
181 | try testing.expectEqual(@as(?value, null), sub2.next());
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/src/combn/parser/byte_range.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 |
8 | const std = @import("std");
9 | const testing = std.testing;
10 | const mem = std.mem;
11 |
12 | pub const Context = struct {
13 | // from byte (inclusive)
14 | from: u8,
15 |
16 | // to byte (inclusive)
17 | to: u8,
18 | };
19 |
20 | pub const Value = struct {
21 | value: u8,
22 |
23 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
24 | _ = self;
25 | _ = allocator;
26 | }
27 | };
28 |
29 | /// Matches any single byte in the specified range.
30 | pub fn ByteRange(comptime Payload: type) type {
31 | return struct {
32 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, null, null),
33 | input: Context,
34 |
35 | const Self = @This();
36 |
37 | pub fn init(allocator: mem.Allocator, input: Context) !*Parser(Payload, Value) {
38 | const self = Self{ .input = input };
39 | return try self.parser.heapAlloc(allocator, self);
40 | }
41 |
42 | pub fn initStack(input: Context) Self {
43 | return Self{ .input = input };
44 | }
45 |
46 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
47 | _ = node_name_cache;
48 | const self = @fieldParentPtr(Self, "parser", parser);
49 |
50 | var v = std.hash_map.hashString("ByteRange");
51 | v +%= self.input.from;
52 | v +%= self.input.to;
53 | return v;
54 | }
55 |
56 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void {
57 | const self = @fieldParentPtr(Self, "parser", parser);
58 | var ctx = in_ctx.with(self.input);
59 | defer ctx.results.close();
60 |
61 | const src = ctx.src[ctx.offset..];
62 | if (src.len == 0 or src[0] < self.input.from or src[0] > self.input.to) {
63 | // TODO(slimsag): include in error message the expected range (or "any byte" if full range)
64 | try ctx.results.add(Result(Value).initError(ctx.offset + 1, "expected byte range"));
65 | return;
66 | }
67 | try ctx.results.add(Result(Value).init(ctx.offset + 1, .{ .value = src[0] }));
68 | return;
69 | }
70 | };
71 | }
72 |
73 | test "byte_range" {
74 | nosuspend {
75 | const allocator = testing.allocator;
76 |
77 | const Payload = void;
78 | var ctx = try ParserContext(Payload, Value).init(allocator, "hello world", {});
79 | defer ctx.deinit();
80 |
81 | var any_byte = try ByteRange(Payload).init(allocator, .{ .from = 0, .to = 255 });
82 | defer any_byte.deinit(allocator, null);
83 | try any_byte.parse(&ctx);
84 |
85 | var sub = ctx.subscribe();
86 | var first = sub.next().?;
87 | defer first.deinit(ctx.allocator);
88 | try testing.expectEqual(Result(Value).init(1, .{ .value = 'h' }), first);
89 | try testing.expect(sub.next() == null);
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/src/combn/parser/end.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 |
8 | const std = @import("std");
9 | const testing = std.testing;
10 | const mem = std.mem;
11 |
12 | pub const Value = struct {
13 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
14 | _ = self;
15 | _ = allocator;
16 | }
17 | };
18 |
19 | /// Matches the end of the `input` string.
20 | pub fn End(comptime Payload: type) type {
21 | return struct {
22 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, null, null),
23 |
24 | const Self = @This();
25 |
26 | pub fn init(allocator: mem.Allocator) !*Parser(Payload, Value) {
27 | const self = Self{};
28 | return try self.parser.heapAlloc(allocator, self);
29 | }
30 |
31 | pub fn initStack() Self {
32 | return Self{};
33 | }
34 |
35 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
36 | _ = parser;
37 | _ = node_name_cache;
38 | return std.hash_map.hashString("End");
39 | }
40 |
41 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void {
42 | _ = parser;
43 | var ctx = in_ctx.with({});
44 | defer ctx.results.close();
45 |
46 | if (ctx.offset != ctx.src.len) {
47 | try ctx.results.add(Result(Value).initError(ctx.offset + 1, "expected end of input"));
48 | return;
49 | }
50 | try ctx.results.add(Result(Value).init(ctx.offset, .{}));
51 | return;
52 | }
53 | };
54 | }
55 |
56 | test "end" {
57 | nosuspend {
58 | const allocator = testing.allocator;
59 |
60 | const Payload = void;
61 | var ctx = try ParserContext(Payload, Value).init(allocator, "", {});
62 | defer ctx.deinit();
63 |
64 | var e = try End(Payload).init(allocator);
65 | defer e.deinit(allocator, null);
66 | try e.parse(&ctx);
67 |
68 | var sub = ctx.subscribe();
69 | var first = sub.next().?;
70 | defer first.deinit(ctx.allocator);
71 | try testing.expectEqual(Result(Value).init(0, .{}), first);
72 | try testing.expect(sub.next() == null);
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/combn/parser/literal.zig:
--------------------------------------------------------------------------------
1 | const gllparser = @import("../gllparser/gllparser.zig");
2 | const Error = gllparser.Error;
3 | const Parser = gllparser.Parser;
4 | const ParserContext = gllparser.Context;
5 | const Result = gllparser.Result;
6 | const NodeName = gllparser.NodeName;
7 |
8 | const std = @import("std");
9 | const testing = std.testing;
10 | const mem = std.mem;
11 |
12 | pub const Context = []const u8;
13 |
14 | pub const Value = struct {
15 | /// The `input` string itself.
16 | value: []const u8,
17 |
18 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
19 | _ = self;
20 | _ = allocator;
21 | }
22 | };
23 |
24 | /// Matches the literal `input` string.
25 | ///
26 | /// The `input` string must remain alive for as long as the `Literal` parser will be used.
27 | pub fn Literal(comptime Payload: type) type {
28 | return struct {
29 | parser: Parser(Payload, Value) = Parser(Payload, Value).init(parse, nodeName, null, null),
30 | input: Context,
31 |
32 | const Self = @This();
33 |
34 | pub fn init(allocator: mem.Allocator, input: Context) !*Parser(Payload, Value) {
35 | const self = Self{ .input = input };
36 | return try self.parser.heapAlloc(allocator, self);
37 | }
38 |
39 | pub fn initStack(input: Context) Self {
40 | return Self{ .input = input };
41 | }
42 |
43 | pub fn nodeName(parser: *const Parser(Payload, Value), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
44 | _ = node_name_cache;
45 | const self = @fieldParentPtr(Self, "parser", parser);
46 |
47 | var v = std.hash_map.hashString("Literal");
48 | v +%= std.hash_map.hashString(self.input);
49 | return v;
50 | }
51 |
52 | pub fn parse(parser: *const Parser(Payload, Value), in_ctx: *const ParserContext(Payload, Value)) callconv(.Async) !void {
53 | const self = @fieldParentPtr(Self, "parser", parser);
54 | var ctx = in_ctx.with(self.input);
55 | defer ctx.results.close();
56 |
57 | if (ctx.offset >= ctx.src.len or !mem.startsWith(u8, ctx.src[ctx.offset..], ctx.input)) {
58 | // TODO(slimsag): include what literal was expected
59 | try ctx.results.add(Result(Value).initError(ctx.offset + 1, "expected literal"));
60 | return;
61 | }
62 | try ctx.results.add(Result(Value).init(ctx.offset + ctx.input.len, .{ .value = self.input }));
63 | return;
64 | }
65 | };
66 | }
67 |
68 | test "literal" {
69 | nosuspend {
70 | const allocator = testing.allocator;
71 |
72 | const Payload = void;
73 | var ctx = try ParserContext(Payload, Value).init(allocator, "hello world", {});
74 | defer ctx.deinit();
75 |
76 | var want = "hello";
77 | var l = try Literal(Payload).init(allocator, want);
78 | defer l.deinit(allocator, null);
79 | try l.parse(&ctx);
80 |
81 | var sub = ctx.subscribe();
82 | var first = sub.next().?;
83 | defer first.deinit(ctx.allocator);
84 | try testing.expectEqual(Result(Value).init(want.len, .{ .value = "hello" }), first);
85 | try testing.expect(sub.next() == null);
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/combn/parser/parser.zig:
--------------------------------------------------------------------------------
1 | pub const byte_range = @import("byte_range.zig");
2 | pub const ByteRange = byte_range.ByteRange;
3 |
4 | pub const end = @import("end.zig");
5 | pub const End = end.End;
6 |
7 | pub const literal = @import("literal.zig");
8 | pub const Literal = literal.Literal;
9 |
10 | test "include" {
11 | _ = ByteRange;
12 | _ = End;
13 | _ = Literal;
14 | }
15 |
--------------------------------------------------------------------------------
/src/combn/test_complex.zig:
--------------------------------------------------------------------------------
1 | const combn = @import("combn.zig");
2 | const Result = combn.gllparser.Result;
3 | const Parser = combn.gllparser.Parser;
4 | const Error = combn.gllparser.Error;
5 | const Context = combn.gllparser.Context;
6 | const PosKey = combn.gllparser.PosKey;
7 | const ParserPath = combn.gllparser.ParserPath;
8 | const Literal = combn.parser.Literal;
9 | const LiteralValue = combn.parser.literal.Value;
10 | const MapTo = combn.combinator.MapTo;
11 | const Optional = combn.combinator.Optional;
12 | const Reentrant = combn.combinator.Reentrant;
13 | const SequenceAmbiguous = combn.combinator.SequenceAmbiguous;
14 | const SequenceAmbiguousValue = combn.combinator.sequence_ambiguous.Value;
15 |
16 | const std = @import("std");
17 | const mem = std.mem;
18 | const testing = std.testing;
19 |
20 | // Confirms that a direct left-recursive grammar for an empty language actually rejects
21 | // all input strings, and does not just hang indefinitely:
22 | //
23 | // ```ebnf
24 | // Expr = Expr ;
25 | // Grammar = Expr ;
26 | // ```
27 | //
28 | // See https://cs.stackexchange.com/q/138447/134837
29 | test "direct_left_recursion_empty_language" {
30 | nosuspend {
31 | const allocator = testing.allocator;
32 |
33 | const node = struct {
34 | name: []const u8,
35 |
36 | pub fn deinit(self: *const @This(), _allocator: mem.Allocator) void {
37 | _ = self;
38 | _ = _allocator;
39 | }
40 | };
41 |
42 | const Payload = void;
43 | const ctx = try Context(Payload, node).init(allocator, "abcabcabc123abc", {});
44 | defer ctx.deinit();
45 |
46 | var parsers = [_]*Parser(Payload, node){
47 | undefined, // placeholder for left-recursive Expr itself
48 | };
49 | var expr = try MapTo(Payload, SequenceAmbiguousValue(node), node).init(allocator, .{
50 | .parser = (try SequenceAmbiguous(Payload, node).init(allocator, &parsers, .borrowed)).ref(),
51 | .mapTo = struct {
52 | fn mapTo(in: Result(SequenceAmbiguousValue(node)), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) {
53 | _ = payload;
54 | switch (in.result) {
55 | .err => return Result(node).initError(in.offset, in.result.err),
56 | else => {
57 | var flattened = try in.result.value.flatten(_allocator, key, path);
58 | defer flattened.deinit();
59 | return Result(node).init(in.offset, node{ .name = "Expr" });
60 | },
61 | }
62 | }
63 | }.mapTo,
64 | });
65 | defer expr.deinit(allocator, null);
66 | parsers[0] = expr.ref();
67 | try expr.parse(&ctx);
68 |
69 | var sub = ctx.subscribe();
70 | var first = sub.next().?;
71 | try testing.expect(sub.next() == null); // stream closed
72 |
73 | // TODO(slimsag): perhaps better if it's not an error?
74 | try testing.expectEqual(@as(usize, 0), first.offset);
75 | try testing.expectEqualStrings("matches only the empty language", first.result.err);
76 | }
77 | }
78 |
79 | // Confirms that a direct left-recursive grammar for a valid languages works:
80 | //
81 | // ```ebnf
82 | // Expr = Expr?, "abc" ;
83 | // Grammar = Expr ;
84 | // ```
85 | //
86 | test "direct_left_recursion" {
87 | const allocator = testing.allocator;
88 |
89 | const node = struct {
90 | name: std.ArrayList(u8),
91 |
92 | pub fn deinit(self: *const @This(), _allocator: mem.Allocator) void {
93 | _ = _allocator;
94 | self.name.deinit();
95 | }
96 | };
97 |
98 | const Payload = void;
99 | const ctx = try Context(Payload, node).init(allocator, "abcabcabc123abc", {});
100 | defer ctx.deinit();
101 |
102 | var abcAsNode = try MapTo(Payload, LiteralValue, node).init(allocator, .{
103 | .parser = (try Literal(Payload).init(allocator, "abc")).ref(),
104 | .mapTo = struct {
105 | fn mapTo(in: Result(LiteralValue), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) {
106 | _ = _allocator;
107 | _ = payload;
108 | _ = key;
109 | _ = path;
110 | switch (in.result) {
111 | .err => return Result(node).initError(in.offset, in.result.err),
112 | else => {
113 | var name = std.ArrayList(u8).init(_allocator);
114 | try name.appendSlice("abc");
115 | return Result(node).init(in.offset, node{ .name = name });
116 | },
117 | }
118 | }
119 | }.mapTo,
120 | });
121 |
122 | var parsers = [_]*Parser(Payload, node){
123 | undefined, // placeholder for left-recursive Expr itself
124 | abcAsNode.ref(),
125 | };
126 | var expr = try Reentrant(Payload, node).init(
127 | allocator,
128 | try MapTo(Payload, SequenceAmbiguousValue(node), node).init(allocator, .{
129 | .parser = (try SequenceAmbiguous(Payload, node).init(allocator, &parsers, .borrowed)).ref(),
130 | .mapTo = struct {
131 | fn mapTo(in: Result(SequenceAmbiguousValue(node)), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) {
132 | _ = payload;
133 | switch (in.result) {
134 | .err => return Result(node).initError(in.offset, in.result.err),
135 | else => {
136 | var name = std.ArrayList(u8).init(_allocator);
137 |
138 | var flattened = try in.result.value.flatten(_allocator, key, path);
139 | defer flattened.deinit();
140 | var sub = flattened.subscribe(key, path, Result(node).initError(0, "matches only the empty language"));
141 | try name.appendSlice("(");
142 | var prev = false;
143 | while (sub.next()) |next| {
144 | if (prev) {
145 | try name.appendSlice(",");
146 | }
147 | prev = true;
148 | try name.appendSlice(next.result.value.name.items);
149 | }
150 | try name.appendSlice(")");
151 | return Result(node).init(in.offset, node{ .name = name });
152 | },
153 | }
154 | }
155 | }.mapTo,
156 | }),
157 | );
158 | var optionalExpr = try MapTo(Payload, ?node, node).init(allocator, .{
159 | .parser = (try Optional(Payload, node).init(allocator, expr.ref())).ref(),
160 | .mapTo = struct {
161 | fn mapTo(in: Result(?node), payload: Payload, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(node) {
162 | _ = payload;
163 | _ = key;
164 | _ = path;
165 | switch (in.result) {
166 | .err => return Result(node).initError(in.offset, in.result.err),
167 | else => {
168 | if (in.result.value == null) {
169 | var name = std.ArrayList(u8).init(_allocator);
170 | try name.appendSlice("null");
171 | return Result(node).init(in.offset, node{ .name = name });
172 | }
173 |
174 | var name = std.ArrayList(u8).init(_allocator);
175 | try name.appendSlice(in.result.value.?.name.items);
176 | return Result(node).init(in.offset, node{ .name = name });
177 | },
178 | }
179 | }
180 | }.mapTo,
181 | });
182 | parsers[0] = optionalExpr.ref();
183 | defer expr.deinit(allocator, null);
184 | try expr.parse(&ctx);
185 |
186 | var sub = ctx.subscribe();
187 | var first = sub.next().?;
188 | try testing.expect(sub.next() == null); // stream closed
189 |
190 | try testing.expectEqual(@as(usize, 0), first.offset);
191 | try testing.expectEqualStrings("(((null,abc),abc),abc)", first.result.value.name.items);
192 | }
193 |
--------------------------------------------------------------------------------
/src/dsl/Compilation.zig:
--------------------------------------------------------------------------------
1 | //! A Compilation is the result of parsing Zorex's DSL syntax. That is, the Zorex DSL is parsed to
2 | //! produce a Compilation (not e.g. an AST) which is itself a parser which, when invoked, parses
3 | //! the syntax described by the DSL.
4 |
5 | const combn = @import("../combn/combn.zig");
6 | const Parser = combn.gllparser.Parser;
7 |
8 | const String = @import("String.zig");
9 | const Node = @import("Node.zig");
10 | const CompilerContext = @import("CompilerContext.zig");
11 |
12 | const std = @import("std");
13 | const mem = std.mem;
14 |
15 | const Compilation = @This();
16 |
17 | value: union(ValueTag) {
18 | parser: CompiledParser,
19 | identifier: String,
20 | },
21 |
22 | pub const CompiledParser = struct {
23 | ptr: *Parser(void, *Node),
24 | slice: ?[]*const Parser(void, *Node),
25 |
26 | pub fn deinit(self: @This(), allocator: mem.Allocator) void {
27 | self.ptr.deinit(allocator, null);
28 | if (self.slice) |slice| {
29 | allocator.free(slice);
30 | }
31 | }
32 | };
33 |
34 | pub const ValueTag = enum {
35 | parser,
36 | identifier,
37 | };
38 |
39 | pub fn initParser(parser: CompiledParser) Compilation {
40 | return .{ .value = .{ .parser = parser } };
41 | }
42 |
43 | pub fn initIdentifier(identifier: String) Compilation {
44 | return .{ .value = .{ .identifier = identifier } };
45 | }
46 |
47 | pub fn deinit(self: *const Compilation, allocator: mem.Allocator) void {
48 | switch (self.value) {
49 | .parser => |v| v.deinit(allocator),
50 | .identifier => |v| v.deinit(allocator),
51 | }
52 | }
53 |
54 | const HashContext = struct {
55 | pub fn hash(self: @This(), key: Compilation) u64 {
56 | _ = self;
57 | return switch (key.value) {
58 | .parser => |p| @ptrToInt(p.ptr),
59 | .identifier => |ident| std.hash_map.hashString(ident.value),
60 | };
61 | }
62 |
63 | pub fn eql(self: @This(), a: Compilation, b: Compilation) bool {
64 | _ = self;
65 | return switch (a.value) {
66 | .parser => |aa| switch (b.value) {
67 | .parser => |bb| aa.ptr == bb.ptr,
68 | .identifier => false,
69 | },
70 | .identifier => |aa| switch (b.value) {
71 | .parser => false,
72 | .identifier => |bb| std.mem.eql(u8, aa.value, bb.value),
73 | },
74 | };
75 | }
76 | };
77 |
78 | pub const HashMap = std.HashMap(Compilation, Compilation, HashContext, std.hash_map.default_max_load_percentage);
79 |
--------------------------------------------------------------------------------
/src/dsl/CompilerContext.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const mem = std.mem;
3 |
4 | const Compilation = @import("Compilation.zig");
5 |
6 | const CompilerContext = @This();
7 |
8 | identifiers: Compilation.HashMap,
9 |
10 | pub fn init(allocator: mem.Allocator) !*CompilerContext {
11 | const compilerContext = try allocator.create(CompilerContext);
12 | compilerContext.* = CompilerContext{
13 | .identifiers = Compilation.HashMap.init(allocator),
14 | };
15 | return compilerContext;
16 | }
17 |
18 | pub fn deinit(self: *CompilerContext, allocator: mem.Allocator) void {
19 | self.identifiers.deinit();
20 | allocator.destroy(self);
21 | }
22 |
--------------------------------------------------------------------------------
/src/dsl/Node.zig:
--------------------------------------------------------------------------------
1 | //! The result of executing a Compilation is a tree of nodes describing the parsed language.
2 |
3 | const String = @import("String.zig");
4 |
5 | const std = @import("std");
6 | const mem = std.mem;
7 |
8 | const Node = @This();
9 |
10 | name: String,
11 | value: ?String,
12 | refs: usize,
13 | children: ?[]*Node,
14 |
15 | pub const Error = error{OutOfMemory};
16 |
17 | pub fn init(allocator: mem.Allocator, name: String, value: ?String) !*Node {
18 | var self = try allocator.create(Node);
19 | self.* = .{
20 | .name = name,
21 | .value = value,
22 | .refs = 1,
23 | .children = null,
24 | };
25 | return self;
26 | }
27 |
28 | pub fn ref(self: *Node) *Node {
29 | self.refs += 1;
30 | return self;
31 | }
32 |
33 | pub fn deinit(self: *Node, allocator: mem.Allocator) void {
34 | self.refs -= 1;
35 | if (self.refs == 0) {
36 | self.name.deinit(allocator);
37 | if (self.value) |v| v.deinit(allocator);
38 | if (self.children) |children| {
39 | for (children) |child| child.deinit(allocator);
40 | allocator.free(children);
41 | }
42 | allocator.destroy(self);
43 | }
44 | if (self.refs < 0) unreachable;
45 | }
46 |
47 | pub fn writeJSON(self: *const Node, allocator: mem.Allocator, out_stream: anytype) Error!void {
48 | var w = std.json.WriteStream(@TypeOf(out_stream), 5).init(out_stream);
49 |
50 | var ptrToID = std.AutoHashMap(*const Node, i32).init(allocator);
51 | defer ptrToID.deinit();
52 |
53 | try w.beginArray();
54 | try self._writeJSON(&w, &ptrToID);
55 | try w.endArray();
56 | }
57 |
58 | fn _writeJSON(self: *const Node, w: anytype, ptrToID: *std.AutoHashMap(*const Node, i32)) Error!void {
59 | if (self.children) |children| for (children) |child| try child._writeJSON(w, ptrToID);
60 |
61 | var v = try ptrToID.getOrPut(self);
62 | if (v.found_existing) return; // visited already
63 |
64 | v.value_ptr.* = @intCast(i32, ptrToID.count() - 1);
65 | try w.arrayElem();
66 | try w.beginObject();
67 | try w.objectField("name");
68 | try w.emitString(self.name.value);
69 | if (self.value) |value| {
70 | try w.objectField("value");
71 | try w.emitString(value.value);
72 | }
73 | if (self.children) |children| {
74 | try w.objectField("children");
75 | try w.beginArray();
76 | for (children) |child| {
77 | try w.arrayElem();
78 | try w.emitNumber(ptrToID.get(child).?);
79 | }
80 | try w.endArray();
81 | }
82 | try w.endObject();
83 | }
84 |
--------------------------------------------------------------------------------
/src/dsl/Program.zig:
--------------------------------------------------------------------------------
1 | //! The public interface for compiling and running Zorex programs.
2 |
3 | const compiler = @import("compiler.zig");
4 | const CompilerResult = @import("compiler.zig").CompilerResult;
5 | const Compilation = @import("Compilation.zig");
6 | const Node = @import("Node.zig");
7 | const CompilerContext = @import("CompilerContext.zig");
8 |
9 | const combn = @import("../combn/combn.zig");
10 | const Context = combn.gllparser.Context;
11 | const Result = combn.gllparser.Result;
12 |
13 | const std = @import("std");
14 | const testing = std.testing;
15 | const mem = std.mem;
16 | const assert = std.debug.assert;
17 |
18 | const Program = @This();
19 |
20 | /// If compile() fails, this error message and offset explains why and where.
21 | error_message: ?[]const u8,
22 | error_offset: usize,
23 |
24 | /// The source of the program, null after successful compilation.
25 | src: ?[]const u8,
26 |
27 | /// The compiled program.
28 | program: ?CompilerResult,
29 |
30 | /// Context for the program.
31 | context: ?Context(void, *Node),
32 |
33 | allocator: mem.Allocator,
34 |
35 | pub const Error = error{
36 | OutOfMemory,
37 | CompilationFailed,
38 | };
39 |
40 | /// Initializes a new program with the given source, which is borrowed until compile() is called
41 | /// and returns.
42 | pub fn init(allocator: mem.Allocator, src: []const u8) Program {
43 | return Program{
44 | .error_message = null,
45 | .error_offset = 0,
46 | .src = src,
47 | .program = null,
48 | .context = null,
49 | .allocator = allocator,
50 | };
51 | }
52 |
53 | /// Compiles the program, returning an error if compilation fails.
54 | pub fn compile(self: *Program) !void {
55 | // Compile the syntax.
56 | var compilerResult = try compiler.compile(self.allocator, self.src.?);
57 | switch (compilerResult.compilation.result) {
58 | .err => |e| {
59 | self.error_message = e;
60 | self.error_offset = compilerResult.compilation.offset;
61 | compilerResult.deinit(self.allocator);
62 | return Error.CompilationFailed;
63 | },
64 | .value => {},
65 | }
66 | self.program = compilerResult;
67 | self.src = null;
68 | }
69 |
70 | /// Executes the program with the given input.
71 | pub fn execute(self: *Program, input: []const u8) !*Node {
72 | nosuspend {
73 | self.context = try Context(void, *Node).init(self.allocator, input, {});
74 |
75 | const compilation = self.program.?.compilation.result.value;
76 | try compilation.value.parser.ptr.parse(&self.context.?);
77 |
78 | var sub = self.context.?.subscribe();
79 | var first = sub.next().?;
80 | assert(sub.next() == null); // no ambiguous parse paths here
81 | return first.result.value;
82 | }
83 | }
84 |
85 | pub fn deinit(self: *const Program) void {
86 | if (self.program) |prog| {
87 | self.context.?.deinit();
88 | prog.deinit(self.allocator);
89 | }
90 | }
91 |
92 | test "example_regex" {
93 | const allocator = testing.allocator;
94 |
95 | // Compile the regex.
96 | var program = Program.init(allocator, "/a/");
97 | defer program.deinit();
98 | program.compile() catch |err| switch (err) {
99 | Error.CompilationFailed => @panic(program.error_message.?),
100 | else => unreachable,
101 | };
102 |
103 | // Execute the regex.
104 | const input = "hmmm";
105 | const result = try program.execute(input);
106 |
107 | // Serialize to JSON.
108 | var buffer = std.ArrayList(u8).init(allocator);
109 | defer buffer.deinit();
110 | try result.writeJSON(allocator, buffer.writer());
111 |
112 | // Confirm the results.
113 | try testing.expectEqualStrings(
114 | \\[
115 | \\ {
116 | \\ "name": "TODO(slimsag): value from parsing regexp!"
117 | \\ }
118 | \\]
119 | , buffer.items);
120 | }
121 |
122 | test "example_zorex" {
123 | const allocator = testing.allocator;
124 |
125 | // Compile the zorex.
126 | var program = Program.init(allocator, "Date = /a/; Date");
127 | defer program.deinit();
128 | program.compile() catch |err| switch (err) {
129 | Error.CompilationFailed => @panic(program.error_message.?),
130 | else => unreachable,
131 | };
132 |
133 | // Execute the zorex.
134 | const input = "hmmm";
135 | const result = try program.execute(input);
136 |
137 | // Serialize to JSON.
138 | var buffer = std.ArrayList(u8).init(allocator);
139 | defer buffer.deinit();
140 | try result.writeJSON(allocator, buffer.writer());
141 |
142 | // Confirm the results.
143 | try testing.expectEqualStrings(
144 | \\[
145 | \\ {
146 | \\ "name": "TODO(slimsag): value from parsing regexp!"
147 | \\ },
148 | \\ {
149 | \\ "name": "unknown",
150 | \\ "children": [
151 | \\ 0
152 | \\ ]
153 | \\ }
154 | \\]
155 | , buffer.items);
156 | }
157 |
--------------------------------------------------------------------------------
/src/dsl/String.zig:
--------------------------------------------------------------------------------
1 | //! A string that is either unowned (e.g. a slice into another string) or owned, and able to deinit
2 | //! itself accordingly.
3 |
4 | const std = @import("std");
5 | const mem = std.mem;
6 |
7 | value: []const u8,
8 | owned: bool,
9 |
10 | pub fn initOwned(value: []const u8) !@This() {
11 | return .{ .value = value, .owned = false };
12 | }
13 |
14 | pub fn init(value: []const u8) @This() {
15 | return .{ .value = value, .owned = false };
16 | }
17 |
18 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
19 | if (self.owned) allocator.free(self.value);
20 | }
21 |
--------------------------------------------------------------------------------
/src/dsl/compiler.zig:
--------------------------------------------------------------------------------
1 | const combn = @import("../combn/combn.zig");
2 | const Result = combn.gllparser.Result;
3 | const Parser = combn.gllparser.Parser;
4 | const Error = combn.gllparser.Error;
5 | const Context = combn.gllparser.Context;
6 | const PosKey = combn.gllparser.PosKey;
7 | const ParserPath = combn.gllparser.ParserPath;
8 | const Sequence = combn.gllparser.Sequence;
9 | const SequenceValue = combn.combinator.sequence.Value;
10 | const Repeated = combn.combinator.Repeated;
11 | const RepeatedValue = combn.combinator.repeated.Value;
12 | const Literal = combn.parser.Literal;
13 | const LiteralValue = combn.parser.literal.Value;
14 | const OneOf = combn.combinator.OneOf;
15 | const MapTo = combn.combinator.MapTo;
16 | const Optional = combn.combinator.Optional;
17 |
18 | const String = @import("String.zig");
19 | const Node = @import("Node.zig");
20 | const Compilation = @import("Compilation.zig");
21 | const Identifier = @import("identifier.zig").Identifier;
22 | const CompilerContext = @import("CompilerContext.zig");
23 |
24 | const grammar = @import("grammar.zig");
25 |
26 | const std = @import("std");
27 | const testing = std.testing;
28 | const mem = std.mem;
29 | const assert = std.debug.assert;
30 |
31 | pub const CompilerResult = struct {
32 | compilation: Result(Compilation),
33 | ctx: Context(*CompilerContext, Compilation),
34 | compilerContext: *CompilerContext,
35 |
36 | pub fn deinit(self: *const @This(), allocator: mem.Allocator) void {
37 | self.ctx.deinit();
38 | self.compilerContext.deinit(allocator);
39 | }
40 | };
41 |
42 | pub fn compile(allocator: mem.Allocator, syntax: []const u8) !CompilerResult {
43 | const dsl_parser = try grammar.init(allocator);
44 | defer dsl_parser.deinit(allocator, null);
45 |
46 | var compilerContext = try CompilerContext.init(allocator);
47 | var ctx = try Context(*CompilerContext, Compilation).init(allocator, syntax, compilerContext);
48 | try dsl_parser.parse(&ctx);
49 |
50 | var sub = ctx.subscribe();
51 | var compilation = sub.next();
52 | assert(sub.next() == null); // our grammar is never ambiguous
53 | if (compilation == null) {
54 | return CompilerResult{
55 | .compilation = Result(Compilation).initError(ctx.offset, "failed to compile"),
56 | .compilerContext = compilerContext,
57 | .ctx = ctx,
58 | };
59 | }
60 | return CompilerResult{
61 | .compilation = compilation.?,
62 | .compilerContext = compilerContext,
63 | .ctx = ctx,
64 | };
65 | }
66 |
67 | test "DSL" {
68 | nosuspend {
69 | const allocator = testing.allocator;
70 |
71 | // Compile the regexp.
72 | var compilerResult = try compile(allocator, "/a/");
73 | defer compilerResult.deinit(allocator);
74 | switch (compilerResult.compilation.result) {
75 | .err => |e| @panic(e),
76 | .value => {},
77 | }
78 | var program = compilerResult.compilation.result.value;
79 |
80 | // Run the regexp.
81 | var input = "//";
82 | var ctx = try Context(void, *Node).init(allocator, input, {});
83 | defer ctx.deinit();
84 |
85 | try program.value.parser.ptr.parse(&ctx);
86 |
87 | var sub = ctx.subscribe();
88 | var first = sub.next().?;
89 | try testing.expectEqualStrings("TODO(slimsag): value from parsing regexp!", first.result.value.name.value);
90 | try testing.expectEqual(@as(usize, 0), first.offset);
91 | try testing.expect(sub.next() == null);
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/src/dsl/dsl.zig:
--------------------------------------------------------------------------------
1 | pub const Compilation = @import("Compilation.zig");
2 | pub const compiler = @import("compiler.zig");
3 | pub const Identifier = @import("identifier.zig").Identifier;
4 | pub const Node = @import("Node.zig");
5 | pub const Program = @import("Program.zig");
6 | pub const String = @import("String.zig");
7 |
--------------------------------------------------------------------------------
/src/dsl/grammar.zig:
--------------------------------------------------------------------------------
1 | const combn = @import("../combn/combn.zig");
2 | const Result = combn.gllparser.Result;
3 | const Parser = combn.gllparser.Parser;
4 | const Error = combn.gllparser.Error;
5 | const Context = combn.gllparser.Context;
6 | const PosKey = combn.gllparser.PosKey;
7 | const ParserPath = combn.gllparser.ParserPath;
8 | const Sequence = combn.combinator.Sequence;
9 | const SequenceValue = combn.combinator.sequence.Value;
10 | const Repeated = combn.combinator.Repeated;
11 | const RepeatedValue = combn.combinator.repeated.Value;
12 | const Literal = combn.parser.Literal;
13 | const LiteralValue = combn.parser.literal.Value;
14 | const OneOf = combn.combinator.OneOf;
15 | const MapTo = combn.combinator.MapTo;
16 | const Optional = combn.combinator.Optional;
17 |
18 | const String = @import("String.zig");
19 | const Node = @import("Node.zig");
20 | const Compilation = @import("Compilation.zig");
21 | const Identifier = @import("identifier.zig").Identifier;
22 | const CompilerContext = @import("CompilerContext.zig");
23 | const pattern_grammar = @import("pattern_grammar.zig");
24 |
25 | const std = @import("std");
26 | const mem = std.mem;
27 | const testing = std.testing;
28 | const assert = std.debug.assert;
29 |
30 | pub fn mapLiteralToNone(in: Result(LiteralValue), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) Error!?Result(?Compilation) {
31 | _ = compiler_context;
32 | _ = _allocator;
33 | _ = path;
34 | _ = key;
35 | return switch (in.result) {
36 | .err => Result(?Compilation).initError(in.offset, in.result.err),
37 | else => Result(?Compilation).init(in.offset, null),
38 | };
39 | }
40 |
41 | /// Maps a SequenceValue(*Node) -> singular *Node with no name and children (each of the nodes in
42 | /// the sequence.)
43 | fn mapNodeSequence(in: Result(SequenceValue(*Node)), program_context: void, _allocator: mem.Allocator, key: PosKey, path: ParserPath) Error!?Result(*Node) {
44 | _ = program_context;
45 | switch (in.result) {
46 | .err => return Result(*Node).initError(in.offset, in.result.err),
47 | else => {
48 | var sequence = in.result.value;
49 |
50 | // Collect all the children nodes.
51 | var children = std.ArrayList(*Node).init(_allocator);
52 | errdefer children.deinit();
53 | var sub = sequence.results.subscribe(key, path, Result(*Node).initError(in.offset, "matches only the empty language"));
54 | var offset = in.offset;
55 | while (sub.next()) |next| {
56 | offset = next.offset;
57 | try children.append(next.result.value.ref());
58 | }
59 |
60 | const node = try Node.init(_allocator, String.init("unknown"), null);
61 | node.children = children.toOwnedSlice();
62 | return Result(*Node).init(in.offset, node);
63 | },
64 | }
65 | }
66 |
67 | /// Maps a SequenceValue(?Compilation) -> singular ?Compilation which parses all compilations in sequence,
68 | /// emitting a single unnamed Node with children.
69 | fn mapCompilationSequence(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) Error!?Result(?Compilation) {
70 | _ = compiler_context;
71 | switch (in.result) {
72 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
73 | else => {
74 | var sequence = in.result.value;
75 |
76 | // Collect all the parser compilations.
77 | var parsers = std.ArrayList(*Parser(void, *Node)).init(_allocator);
78 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language"));
79 | var offset = in.offset;
80 | while (sub.next()) |next| {
81 | offset = next.offset;
82 | const compilation = next.result.value;
83 | if (compilation) |c| {
84 | try parsers.append(c.value.parser.ptr.ref());
85 | }
86 | }
87 | var slice = parsers.toOwnedSlice();
88 |
89 | // Build a parser which maps the many Parser(void, *Node) compilations into a
90 | // single Parser(void, *Node) which has each node as a child.
91 | var mapped = try MapTo(void, SequenceValue(*Node), *Node).init(_allocator, .{
92 | .parser = (try Sequence(void, *Node).init(_allocator, slice, .borrowed)).ref(),
93 | .mapTo = mapNodeSequence,
94 | });
95 |
96 | var result_compilation = Compilation.initParser(Compilation.CompiledParser{
97 | .ptr = mapped.ref(),
98 | .slice = slice,
99 | });
100 | return Result(?Compilation).init(offset, result_compilation);
101 | },
102 | }
103 | }
104 |
105 | pub fn whitespaceOneOrMore(allocator: mem.Allocator) !*Parser(*CompilerContext, ?Compilation) {
106 | const newline = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{
107 | .parser = (try OneOf(*CompilerContext, LiteralValue).init(allocator, &.{
108 | (try Literal(*CompilerContext).init(allocator, "\r\n")).ref(),
109 | (try Literal(*CompilerContext).init(allocator, "\r")).ref(),
110 | (try Literal(*CompilerContext).init(allocator, "\n")).ref(),
111 | }, .copy)).ref(),
112 | .mapTo = mapLiteralToNone,
113 | });
114 |
115 | const space = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{
116 | .parser = (try OneOf(*CompilerContext, LiteralValue).init(allocator, &.{
117 | (try Literal(*CompilerContext).init(allocator, " ")).ref(),
118 | (try Literal(*CompilerContext).init(allocator, "\t")).ref(),
119 | }, .copy)).ref(),
120 | .mapTo = mapLiteralToNone,
121 | });
122 |
123 | const whitespace = try OneOf(*CompilerContext, ?Compilation).init(allocator, &.{
124 | newline.ref(),
125 | space.ref(),
126 | }, .copy);
127 |
128 | // Whitespace+
129 | return try MapTo(*CompilerContext, RepeatedValue(?Compilation), ?Compilation).init(allocator, .{
130 | .parser = (try Repeated(*CompilerContext, ?Compilation).init(allocator, .{
131 | .parser = whitespace.ref(),
132 | .min = 1,
133 | .max = -1,
134 | })).ref(),
135 | .mapTo = struct {
136 | fn mapTo(in: Result(RepeatedValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
137 | _ = compiler_context;
138 | _ = _allocator;
139 | _ = key;
140 | _ = path;
141 | switch (in.result) {
142 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
143 | else => {
144 | // optimization: newline and space parsers produce no compilations, so no
145 | // need for us to pay any attention to repeated results.
146 | return Result(?Compilation).init(in.offset, null);
147 | },
148 | }
149 | }
150 | }.mapTo,
151 | });
152 | }
153 |
154 | pub fn init(allocator: mem.Allocator) !*Parser(*CompilerContext, Compilation) {
155 | // DSL grammar
156 | //
157 | // ```ebnf
158 | // Newline = "\r\n" | "\r" | "\n" ;
159 | // Space = " " | "\t" ;
160 | // Whitespace = Newline | Space ;
161 | // Assignment = "=" ;
162 | // Semicolon = ";" ;
163 | // Identifier = /[A-Z][[:alnum:]_]*/ ;
164 | // NestedPattern = "/", Pattern, "/" ;
165 | // Expr = NestedPattern | Identifier ;
166 | // ExprList = (ExprList, ",")? , Expr ;
167 | // Definition = Identifier , Whitespace+, Assignment, Whitespace+, ExprList, Semicolon ;
168 | // Grammar = (Definition | Expr | Whitespace+)+, EOF ;
169 | // ```
170 | //
171 | // TODO(dsl): Expr logical OR / alternation
172 | // TODO(dsl): Expr optional
173 | // TODO(dsl): Expr zero-or-more
174 | // TODO(dsl): Expr one-or-more
175 | // TODO(dsl): Expr repetition {x,y}
176 | // TODO(dsl): Expr grouping (...)
177 | // TODO(dsl): terminal string literals
178 | // TODO(dsl): comments
179 | // TODO(dsl): exception? "-"
180 | // TODO(dsl): positive/negative lookahead? Python: & followed by a symbol, token or parenthesized group indicates a positive lookahead (i.e., is required to match but not consumed), while ! indicates a negative lookahead (i.e., is required _not_ to match).
181 |
182 | const whitespace_one_or_more = try whitespaceOneOrMore(allocator);
183 |
184 | var assignment = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{
185 | .parser = (try Literal(*CompilerContext).init(allocator, "=")).ref(),
186 | .mapTo = mapLiteralToNone,
187 | });
188 | var semicolon = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{
189 | .parser = (try Literal(*CompilerContext).init(allocator, ";")).ref(),
190 | .mapTo = mapLiteralToNone,
191 | });
192 | var forward_slash = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{
193 | .parser = (try Literal(*CompilerContext).init(allocator, "/")).ref(),
194 | .mapTo = mapLiteralToNone,
195 | });
196 |
197 | var nested_pattern = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{
198 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{
199 | forward_slash.ref(),
200 | (try pattern_grammar.init(allocator)).ref(),
201 | forward_slash.ref(),
202 | }, .copy)).ref(),
203 | .mapTo = struct {
204 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
205 | _ = compiler_context;
206 | _ = key;
207 | _ = path;
208 | switch (in.result) {
209 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
210 | else => {
211 | var sequence = in.result.value;
212 | _ = sequence;
213 |
214 | // TODO(slimsag): actually compose the compilation to parse this regexp!
215 | const node = try Node.init(_allocator, String.init("TODO(slimsag): value from parsing regexp!"), null);
216 | const success = Result(*Node).init(in.offset, node);
217 | var always_success = try combn.combinator.Always(void, *Node).init(_allocator, success);
218 |
219 | var result_compilation = Compilation.initParser(Compilation.CompiledParser{
220 | .ptr = always_success.ref(),
221 | .slice = null,
222 | });
223 | return Result(?Compilation).init(in.offset, result_compilation);
224 | },
225 | }
226 | }
227 | }.mapTo,
228 | });
229 |
230 | var identifier_expr = try MapTo(*CompilerContext, ?Compilation, ?Compilation).init(allocator, .{
231 | .parser = (try Identifier.init(allocator)).ref(),
232 | .mapTo = struct {
233 | fn mapTo(in: Result(?Compilation), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
234 | _ = _allocator;
235 | _ = key;
236 | _ = path;
237 | switch (in.result) {
238 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
239 | else => {
240 | // Lookup this identifier, which was previously defined.
241 | // TODO(slimsag): make it possible to reference future-definitions?
242 | var compilation = compiler_context.identifiers.get(in.result.value.?);
243 | if (compilation == null) {
244 | // TODO(slimsag): include name of definition that was not found in error.
245 | return Result(?Compilation).initError(in.offset, "definition not found");
246 | }
247 | return Result(?Compilation).init(in.offset, compilation.?).toUnowned();
248 | },
249 | }
250 | }
251 | }.mapTo,
252 | });
253 | var expr = try OneOf(*CompilerContext, ?Compilation).init(allocator, &.{
254 | nested_pattern.ref(),
255 | identifier_expr.ref(),
256 | }, .copy);
257 |
258 | // ExprList = (ExprList, ",")? , Expr ;
259 | var expr_list_parsers = try allocator.alloc(*Parser(*CompilerContext, ?Compilation), 2);
260 | expr_list_parsers[1] = expr.ref(); // position 0 will be for left-recursive `(ExprList, ",")?` set later
261 | var expr_list = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{
262 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, expr_list_parsers, .owned)).ref(),
263 | .mapTo = mapCompilationSequence,
264 | });
265 | // (ExprList, ",")
266 | var comma = try MapTo(*CompilerContext, LiteralValue, ?Compilation).init(allocator, .{
267 | .parser = (try Literal(*CompilerContext).init(allocator, ",")).ref(),
268 | .mapTo = mapLiteralToNone,
269 | });
270 | var expr_list_inner_left = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{
271 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{
272 | expr_list.ref(),
273 | comma.ref(),
274 | }, .copy)).ref(),
275 | .mapTo = struct {
276 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
277 | _ = compiler_context;
278 | _ = _allocator;
279 | switch (in.result) {
280 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
281 | else => {
282 | var sequence = in.result.value;
283 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language"));
284 |
285 | var _expr_list = sub.next().?;
286 | _ = sub.next().?; // non-capturing compilation for comma
287 | assert(sub.next() == null);
288 | return _expr_list.toUnowned();
289 | },
290 | }
291 | }
292 | }.mapTo,
293 | });
294 | var optional_expr_list_inner_left = try MapTo(*CompilerContext, ??Compilation, ?Compilation).init(allocator, .{
295 | .parser = (try Optional(*CompilerContext, ?Compilation).init(allocator, expr_list_inner_left.ref())).ref(),
296 | .mapTo = struct {
297 | fn mapTo(in: Result(??Compilation), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
298 | _ = compiler_context;
299 | _ = _allocator;
300 | _ = key;
301 | _ = path;
302 | switch (in.result) {
303 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
304 | else => {
305 | if (in.result.value == null) {
306 | return Result(?Compilation).init(in.offset, null);
307 | }
308 | return Result(?Compilation).init(in.offset, in.result.value.?).toUnowned();
309 | },
310 | }
311 | }
312 | }.mapTo,
313 | });
314 | expr_list_parsers[0] = optional_expr_list_inner_left.ref();
315 |
316 | var definition = try MapTo(*CompilerContext, SequenceValue(?Compilation), ?Compilation).init(allocator, .{
317 | .parser = (try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{
318 | (try Identifier.init(allocator)).ref(),
319 | whitespace_one_or_more.ref(),
320 | assignment.ref(),
321 | whitespace_one_or_more.ref(),
322 | expr_list.ref(),
323 | semicolon.ref(),
324 | }, .copy)).ref(),
325 | .mapTo = struct {
326 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
327 | _ = _allocator;
328 | switch (in.result) {
329 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
330 | else => {
331 | var sequence = in.result.value;
332 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language"));
333 |
334 | var identifier = sub.next().?;
335 | _ = sub.next().?; // non-capturing compilation for whitespace
336 | _ = sub.next().?; // non-capturing compilation for assignment `=` operator
337 | _ = sub.next().?; // non-capturing compilation for whitespace
338 | var _expr_list = sub.next().?;
339 | var last = sub.next().?; // non-capturing compilation for semicolon
340 | assert(sub.next() == null);
341 |
342 | // Set identifier = _expr_list, so that future identifier expressions can
343 | // lookup the resulting expression compilation for the identifier.
344 | const v = try compiler_context.identifiers.getOrPut(identifier.result.value.?);
345 | if (v.found_existing) {
346 | // TODO(slimsag): include name of definition in error message
347 | return Result(?Compilation).initError(last.offset, "definition redefined");
348 | }
349 | v.value_ptr.* = _expr_list.result.value.?;
350 |
351 | // A definition assignment yields no nodes.
352 | return Result(?Compilation).init(in.offset, null);
353 | },
354 | }
355 | }
356 | }.mapTo,
357 | });
358 |
359 | var definition_or_expr_or_whitespace = try OneOf(*CompilerContext, ?Compilation).init(allocator, &.{
360 | definition.ref(),
361 | expr.ref(),
362 | whitespace_one_or_more.ref(),
363 | }, .copy);
364 |
365 | const non_null_root_compilation = try MapTo(*CompilerContext, RepeatedValue(?Compilation), ?Compilation).init(allocator, .{
366 | .parser = (try Repeated(*CompilerContext, ?Compilation).init(allocator, .{
367 | .parser = definition_or_expr_or_whitespace.ref(),
368 | .min = 1,
369 | .max = -1,
370 | })).ref(),
371 | .mapTo = struct {
372 | fn mapTo(in: Result(RepeatedValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
373 | _ = compiler_context;
374 | _ = _allocator;
375 | switch (in.result) {
376 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
377 | else => {
378 | var repeated = in.result.value;
379 | var sub = repeated.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language"));
380 |
381 | var offset = in.offset;
382 | var compilation: ?Result(?Compilation) = null;
383 | while (sub.next()) |next| {
384 | offset = next.offset;
385 | switch (next.result) {
386 | .value => |v| {
387 | if (v != null) {
388 | if (compilation == null) {
389 | compilation = Result(?Compilation).init(next.offset, v.?);
390 | } else {
391 | // another parse path yielded a compilation, i.e. our grammar was ambiguous -
392 | // and it definitely shouldn't be!
393 | unreachable;
394 | }
395 | }
396 | },
397 | .err => |e| return Result(?Compilation).initError(offset, e),
398 | }
399 | }
400 | if (compilation == null) {
401 | // Grammar does not have a root expression
402 | return Result(?Compilation).initError(offset, "root expression missing");
403 | }
404 | return compilation.?.toUnowned();
405 | },
406 | }
407 | }
408 | }.mapTo,
409 | });
410 |
411 | const end = try MapTo(*CompilerContext, combn.parser.end.Value, ?Compilation).init(allocator, .{
412 | .parser = (try combn.parser.End(*CompilerContext).init(allocator)).ref(),
413 | .mapTo = struct {
414 | fn mapTo(in: Result(combn.parser.end.Value), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
415 | _ = compiler_context;
416 | _ = _allocator;
417 | _ = key;
418 | _ = path;
419 | switch (in.result) {
420 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
421 | else => return Result(?Compilation).init(in.offset, null),
422 | }
423 | }
424 | }.mapTo,
425 | });
426 |
427 | const grammar_then_end = try Sequence(*CompilerContext, ?Compilation).init(allocator, &.{
428 | non_null_root_compilation.ref(),
429 | end.ref(),
430 | }, .copy);
431 |
432 | return try MapTo(*CompilerContext, SequenceValue(?Compilation), Compilation).init(allocator, .{
433 | .parser = grammar_then_end.ref(),
434 | .mapTo = struct {
435 | fn mapTo(in: Result(SequenceValue(?Compilation)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(Compilation) {
436 | _ = compiler_context;
437 | _ = _allocator;
438 | _ = key;
439 | _ = path;
440 | switch (in.result) {
441 | .err => return Result(Compilation).initError(in.offset, in.result.err),
442 | else => {
443 | var sequence = in.result.value;
444 |
445 | var sub = sequence.results.subscribe(key, path, Result(?Compilation).initError(in.offset, "matches only the empty language"));
446 | const root_compilation = sub.next();
447 | assert(root_compilation != null);
448 | const _end = sub.next();
449 | assert(_end != null);
450 | assert(sub.next() == null);
451 | return Result(Compilation).init(in.offset, root_compilation.?.result.value.?).toUnowned();
452 | },
453 | }
454 | }
455 | }.mapTo,
456 | });
457 | }
458 |
--------------------------------------------------------------------------------
/src/dsl/identifier.zig:
--------------------------------------------------------------------------------
1 | const combn = @import("../combn/combn.zig");
2 | const Result = combn.gllparser.Result;
3 | const Parser = combn.gllparser.Parser;
4 | const Error = combn.gllparser.Error;
5 | const Context = combn.gllparser.Context;
6 | const PosKey = combn.gllparser.PosKey;
7 | const ParserPath = combn.gllparser.ParserPath;
8 | const NodeName = combn.gllparser.NodeName;
9 |
10 | const String = @import("String.zig");
11 | const Compilation = @import("Compilation.zig");
12 | const CompilerContext = @import("CompilerContext.zig");
13 |
14 | const std = @import("std");
15 | const testing = std.testing;
16 | const mem = std.mem;
17 |
18 | /// Matches the identifier `input` string.
19 | ///
20 | /// The `input` string must remain alive for as long as the `Identifier` parser will be used.
21 | pub const Identifier = struct {
22 | parser: Parser(*CompilerContext, ?Compilation) = Parser(*CompilerContext, ?Compilation).init(parse, nodeName, null, null),
23 |
24 | const Self = @This();
25 |
26 | pub fn init(allocator: mem.Allocator) !*Parser(*CompilerContext, ?Compilation) {
27 | const self = Self{};
28 | return try self.parser.heapAlloc(allocator, self);
29 | }
30 |
31 | pub fn initStack() Self {
32 | return Self{};
33 | }
34 |
35 | pub fn nodeName(parser: *const Parser(*CompilerContext, ?Compilation), node_name_cache: *std.AutoHashMap(usize, NodeName)) Error!u64 {
36 | _ = parser;
37 | _ = node_name_cache;
38 | var v = std.hash_map.hashString("Identifier");
39 | return v;
40 | }
41 |
42 | pub fn parse(parser: *const Parser(*CompilerContext, ?Compilation), in_ctx: *const Context(*CompilerContext, ?Compilation)) callconv(.Async) !void {
43 | _ = parser;
44 | var ctx = in_ctx.with({});
45 | defer ctx.results.close();
46 |
47 | const src = ctx.src[ctx.offset..];
48 |
49 | var offset: usize = 0;
50 | if (src.len == 0) {
51 | try ctx.results.add(Result(?Compilation).initError(ctx.offset, "expected Identifier"));
52 | return;
53 | }
54 | {
55 | var isUpper = src[offset] >= 'A' and src[offset] <= 'Z';
56 | var isLower = src[offset] >= 'a' and src[offset] <= 'z';
57 | if (!isUpper and !isLower) {
58 | try ctx.results.add(Result(?Compilation).initError(ctx.offset + 1, "Identifier must start with a-zA-Z"));
59 | return;
60 | }
61 | }
62 | while (offset < src.len) {
63 | var isDigit = src[offset] >= '0' and src[offset] <= '9';
64 | var isUpper = src[offset] >= 'A' and src[offset] <= 'Z';
65 | var isLower = src[offset] >= 'a' and src[offset] <= 'z';
66 | if (!isDigit and !isUpper and !isLower and src[offset] != '_') {
67 | break;
68 | }
69 | offset += 1;
70 | }
71 | try ctx.results.add(Result(?Compilation).init(ctx.offset + offset, Compilation.initIdentifier(String.init(src[0..offset]))));
72 | }
73 | };
74 |
75 | test "identifier" {
76 | nosuspend {
77 | const allocator = testing.allocator;
78 |
79 | var compilerContext = try CompilerContext.init(allocator);
80 | defer compilerContext.deinit(allocator);
81 | var ctx = try Context(*CompilerContext, ?Compilation).init(allocator, "Grammar2", compilerContext);
82 | defer ctx.deinit();
83 |
84 | var l = try Identifier.init(allocator);
85 | defer l.deinit(allocator, null);
86 | try l.parse(&ctx);
87 |
88 | var sub = ctx.subscribe();
89 | var r1 = sub.next().?;
90 | try testing.expectEqual(@as(usize, 8), r1.offset);
91 | try testing.expectEqualStrings("Grammar2", r1.result.value.?.value.identifier.value);
92 | try testing.expect(sub.next() == null);
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/dsl/pattern_grammar.zig:
--------------------------------------------------------------------------------
1 | const combn = @import("../combn/combn.zig");
2 | const Result = combn.gllparser.Result;
3 | const Parser = combn.gllparser.Parser;
4 | const Error = combn.gllparser.Error;
5 | const Context = combn.gllparser.Context;
6 | const PosKey = combn.gllparser.PosKey;
7 | const ParserPath = combn.gllparser.ParserPath;
8 | const MapTo = combn.combinator.MapTo;
9 | const Repeated = combn.combinator.Repeated;
10 | const RepeatedValue = combn.combinator.repeated.Value;
11 | const ByteRange = combn.parser.ByteRange;
12 | const ByteRangeValue = combn.parser.byte_range.Value;
13 |
14 | const Compilation = @import("Compilation.zig");
15 | const CompilerContext = @import("CompilerContext.zig");
16 |
17 | const std = @import("std");
18 | const mem = std.mem;
19 |
20 | pub fn init(allocator: mem.Allocator) !*Parser(*CompilerContext, ?Compilation) {
21 | // Pattern matching grammar
22 | //
23 | // ```ebnf
24 | // Pattern = TBD ;
25 | // ```
26 | //
27 |
28 | const any_byte = try ByteRange(*CompilerContext).init(allocator, .{.from = 0, .to = 255});
29 | const any_bytes = try Repeated(*CompilerContext, ByteRangeValue).init(allocator, .{
30 | .parser = any_byte.ref(),
31 | .min = 0,
32 | .max = 1, // TODO(slimsag): make this parse more byte literals
33 | });
34 |
35 | const literal_any_bytes = try MapTo(*CompilerContext,combn.combinator.repeated.Value(ByteRangeValue), ?Compilation).init(allocator, .{
36 | .parser = any_bytes.ref(),
37 | .mapTo = struct {
38 | fn mapTo(in: Result(RepeatedValue(ByteRangeValue)), compiler_context: *CompilerContext, _allocator: mem.Allocator, key: PosKey, path: ParserPath) callconv(.Async) Error!?Result(?Compilation) {
39 | _ = compiler_context;
40 | _ = _allocator;
41 | _ = key;
42 | _ = path;
43 | switch (in.result) {
44 | .err => return Result(?Compilation).initError(in.offset, in.result.err),
45 | else => {
46 | // optimization: newline and space parsers produce no compilations, so no
47 | // need for us to pay any attention to repeated results.
48 | return Result(?Compilation).init(in.offset, null);
49 | },
50 | }
51 | }
52 | }.mapTo,
53 | });
54 | return literal_any_bytes;
55 | }
56 |
--------------------------------------------------------------------------------
/src/zorex.zig:
--------------------------------------------------------------------------------
1 | pub const combn = @import("combn/combn.zig");
2 | pub const dsl = @import("dsl/dsl.zig");
3 |
4 | test "include" {
5 | _ = dsl.Program;
6 | }
7 |
--------------------------------------------------------------------------------