├── src ├── test │ ├── 008_toplevel_var.zig │ ├── 001_fn_empty_block.zig │ ├── 007_param_without_type.zig │ ├── 033_char_basic.zig │ ├── 006_assignment.zig │ ├── 014_keyword_as_identifier.zig │ ├── 019_param_pointer.zig │ ├── 023_toplevel_pub_var.zig │ ├── 026_union_empty.zig │ ├── 027_enum_empty.zig │ ├── 031_string_basic.zig │ ├── 005_call_with_args.zig │ ├── 013_nested_call.zig │ ├── 016_addition.zig │ ├── 025_struct_empty.zig │ ├── 032_string_escapes.zig │ ├── 034_char_escape_quote.zig │ ├── 037_char_in_expr.zig │ ├── 039_param_error_union.zig │ ├── 002_return_semicolon.zig │ ├── 009_nested_blocks.zig │ ├── 015_keyword_prefix_allowed.zig │ ├── 017_mul_precedence.zig │ ├── 022_complex_type_prefixes.zig │ ├── 035_var_string_init.zig │ ├── 038_return_error_union.zig │ ├── 041_if_simple.zig │ ├── 045_for_simple.zig │ ├── 046_if_expr_value.zig │ ├── 018_nested_ops_calls.zig │ ├── 020_param_slice_array.zig │ ├── 021_return_optional_ptr.zig │ ├── 040_nested_error_union.zig │ ├── 043_while_simple.zig │ ├── 048_for_expr_else.zig │ ├── 054_for_assign_semicolon.zig │ ├── 055_if_payload_expr.zig │ ├── 010_two_functions.zig │ ├── 029_enum_fields.zig │ ├── 042_if_else.zig │ ├── 047_while_expr_else.zig │ ├── 057_for_payload_expr.zig │ ├── 004_pub_fn_params_ret.zig │ ├── 050_break_continue.zig │ ├── 052_if_assign_semicolon.zig │ ├── 028_struct_fields_simple.zig │ ├── 044_while_else.zig │ ├── 051_switch_stmt_no_semicolon.zig │ ├── 053_while_assign_semicolon.zig │ ├── 056_while_payload_else_expr.zig │ ├── 036_call_with_string_arg.zig │ ├── 049_switch_expr_minimal.zig │ ├── 058_switch_index_payload.zig │ ├── 003_var_const_and_call.zig │ ├── 024_mixed_toplevel.zig │ ├── 030_nested_containers.zig │ ├── 012_comments_between_decls.zig │ └── 011_line_comments.zig ├── root.zig ├── main.zig ├── trace_demo.zig ├── ansitty.zig ├── packrat_test.zig ├── ast.zig ├── zig.peg.txt ├── trace.zig ├── vm.zig ├── ziggrammar.zig └── zigparse.zig ├── test.zig ├── .gitignore ├── vm_loop_demo.zig ├── LICENSE ├── docs ├── io_writer.md └── vm-loop-llvm.md └── README.md /src/test/008_toplevel_var.zig: -------------------------------------------------------------------------------- 1 | const x = 1; 2 | -------------------------------------------------------------------------------- /src/test/001_fn_empty_block.zig: -------------------------------------------------------------------------------- 1 | fn main() void {} 2 | -------------------------------------------------------------------------------- /src/test/007_param_without_type.zig: -------------------------------------------------------------------------------- 1 | fn f(a) {} 2 | -------------------------------------------------------------------------------- /src/test/033_char_basic.zig: -------------------------------------------------------------------------------- 1 | const c = 'x'; 2 | 3 | -------------------------------------------------------------------------------- /src/test/006_assignment.zig: -------------------------------------------------------------------------------- 1 | fn f() void { x = 1; } 2 | -------------------------------------------------------------------------------- /src/test/014_keyword_as_identifier.zig: -------------------------------------------------------------------------------- 1 | fn fn() void {} 2 | -------------------------------------------------------------------------------- /src/test/019_param_pointer.zig: -------------------------------------------------------------------------------- 1 | fn f(p: *T) void {} 2 | -------------------------------------------------------------------------------- /src/test/023_toplevel_pub_var.zig: -------------------------------------------------------------------------------- 1 | pub var g = 1; 2 | 3 | -------------------------------------------------------------------------------- /src/test/026_union_empty.zig: -------------------------------------------------------------------------------- 1 | const U = union { }; 2 | 3 | -------------------------------------------------------------------------------- /src/test/027_enum_empty.zig: -------------------------------------------------------------------------------- 1 | const E = enum { }; 2 | 3 | -------------------------------------------------------------------------------- /src/test/031_string_basic.zig: -------------------------------------------------------------------------------- 1 | const s = "hello"; 2 | 3 | -------------------------------------------------------------------------------- /src/test/005_call_with_args.zig: -------------------------------------------------------------------------------- 1 | fn f() void { f(1, 2); } 2 | -------------------------------------------------------------------------------- /src/test/013_nested_call.zig: -------------------------------------------------------------------------------- 1 | fn f() void { g(h(1), 2); } 2 | -------------------------------------------------------------------------------- /src/test/016_addition.zig: -------------------------------------------------------------------------------- 1 | fn f() void { x = 1 + 2 + 3; } 2 | -------------------------------------------------------------------------------- /src/test/025_struct_empty.zig: -------------------------------------------------------------------------------- 1 | const S = struct { }; 2 | 3 | -------------------------------------------------------------------------------- /src/test/032_string_escapes.zig: -------------------------------------------------------------------------------- 1 | const s = "a\n\t\"\\"; 2 | 3 | -------------------------------------------------------------------------------- /src/test/034_char_escape_quote.zig: -------------------------------------------------------------------------------- 1 | const c = '\''; 2 | 3 | -------------------------------------------------------------------------------- /src/test/037_char_in_expr.zig: -------------------------------------------------------------------------------- 1 | fn f() void { x = 'a' + 1; } 2 | -------------------------------------------------------------------------------- /src/test/039_param_error_union.zig: -------------------------------------------------------------------------------- 1 | fn f(a: E!T) void {} 2 | -------------------------------------------------------------------------------- /src/test/002_return_semicolon.zig: -------------------------------------------------------------------------------- 1 | fn main() void { return; } 2 | -------------------------------------------------------------------------------- /src/test/009_nested_blocks.zig: -------------------------------------------------------------------------------- 1 | fn f() void { { var y = 7; } } 2 | -------------------------------------------------------------------------------- /src/test/015_keyword_prefix_allowed.zig: -------------------------------------------------------------------------------- 1 | fn returnValue() void {} 2 | -------------------------------------------------------------------------------- /src/test/017_mul_precedence.zig: -------------------------------------------------------------------------------- 1 | fn f() void { x = 1 + 2 * 3; } 2 | -------------------------------------------------------------------------------- /src/test/022_complex_type_prefixes.zig: -------------------------------------------------------------------------------- 1 | fn f(a: ?*[]*T) void {} 2 | -------------------------------------------------------------------------------- /src/test/035_var_string_init.zig: -------------------------------------------------------------------------------- 1 | fn f() void { var s = "hi"; } 2 | -------------------------------------------------------------------------------- /src/test/038_return_error_union.zig: -------------------------------------------------------------------------------- 1 | fn f() E!T { return; } 2 | 3 | -------------------------------------------------------------------------------- /src/test/041_if_simple.zig: -------------------------------------------------------------------------------- 1 | fn f() void { if (1) { return; } } 2 | -------------------------------------------------------------------------------- /src/test/045_for_simple.zig: -------------------------------------------------------------------------------- 1 | fn f() void { for (1) { x = 1; } } 2 | -------------------------------------------------------------------------------- /src/test/046_if_expr_value.zig: -------------------------------------------------------------------------------- 1 | const x = if (1) 2 else 3; 2 | 3 | -------------------------------------------------------------------------------- /src/test/018_nested_ops_calls.zig: -------------------------------------------------------------------------------- 1 | fn f() void { x = g(1+2, 3*4); } 2 | -------------------------------------------------------------------------------- /src/test/020_param_slice_array.zig: -------------------------------------------------------------------------------- 1 | fn f(a: []u8, b: [16]u8) void {} 2 | -------------------------------------------------------------------------------- /src/test/021_return_optional_ptr.zig: -------------------------------------------------------------------------------- 1 | fn f() ?*u8 { return; } 2 | 3 | -------------------------------------------------------------------------------- /src/test/040_nested_error_union.zig: -------------------------------------------------------------------------------- 1 | fn f() E!F!G { return; } 2 | 3 | -------------------------------------------------------------------------------- /src/test/043_while_simple.zig: -------------------------------------------------------------------------------- 1 | fn f() void { while (1) { x = 1; } } 2 | -------------------------------------------------------------------------------- /src/test/048_for_expr_else.zig: -------------------------------------------------------------------------------- 1 | const x = for (0) { } else 5; 2 | 3 | -------------------------------------------------------------------------------- /src/test/054_for_assign_semicolon.zig: -------------------------------------------------------------------------------- 1 | fn f() void { for (1) x = 1; } 2 | -------------------------------------------------------------------------------- /src/test/055_if_payload_expr.zig: -------------------------------------------------------------------------------- 1 | const x = if (1) |v| v else 0; 2 | 3 | -------------------------------------------------------------------------------- /src/test/010_two_functions.zig: -------------------------------------------------------------------------------- 1 | fn a() void {} 2 | fn b() void { return; } 3 | -------------------------------------------------------------------------------- /src/test/029_enum_fields.zig: -------------------------------------------------------------------------------- 1 | const Color = enum { red, green, blue }; 2 | 3 | -------------------------------------------------------------------------------- /src/test/042_if_else.zig: -------------------------------------------------------------------------------- 1 | fn f() void { if (1) { x = 1; } else { x = 2; } } 2 | -------------------------------------------------------------------------------- /src/test/047_while_expr_else.zig: -------------------------------------------------------------------------------- 1 | const x = while (0) { } else 42; 2 | 3 | -------------------------------------------------------------------------------- /src/test/057_for_payload_expr.zig: -------------------------------------------------------------------------------- 1 | const x = for (0) |i| { } else 5; 2 | 3 | -------------------------------------------------------------------------------- /src/test/004_pub_fn_params_ret.zig: -------------------------------------------------------------------------------- 1 | pub fn add(a: i32, b: i32) i32 { return a; } 2 | -------------------------------------------------------------------------------- /src/test/050_break_continue.zig: -------------------------------------------------------------------------------- 1 | fn f() void { while (1) { break; continue; } } 2 | -------------------------------------------------------------------------------- /src/test/052_if_assign_semicolon.zig: -------------------------------------------------------------------------------- 1 | fn f() void { if (1) x = 1 else y = 2; } 2 | -------------------------------------------------------------------------------- /src/test/028_struct_fields_simple.zig: -------------------------------------------------------------------------------- 1 | const Pair = struct { a: i32, b: u8 }; 2 | 3 | -------------------------------------------------------------------------------- /src/test/044_while_else.zig: -------------------------------------------------------------------------------- 1 | fn f() void { while (1) { x = 1; } else { x = 2; } } 2 | -------------------------------------------------------------------------------- /src/test/051_switch_stmt_no_semicolon.zig: -------------------------------------------------------------------------------- 1 | fn f() void { switch (0) { else => 0 } } 2 | -------------------------------------------------------------------------------- /src/test/053_while_assign_semicolon.zig: -------------------------------------------------------------------------------- 1 | fn f() void { while (1) x = 1 else y = 2; } 2 | -------------------------------------------------------------------------------- /src/test/056_while_payload_else_expr.zig: -------------------------------------------------------------------------------- 1 | const x = while (0) |*p| { } else 42; 2 | 3 | -------------------------------------------------------------------------------- /src/test/036_call_with_string_arg.zig: -------------------------------------------------------------------------------- 1 | fn f() void { g("x"); } 2 | fn g(a: i32) void {} 3 | -------------------------------------------------------------------------------- /src/test/049_switch_expr_minimal.zig: -------------------------------------------------------------------------------- 1 | const n = 1; 2 | const x = switch (n) { else => 0 }; 3 | 4 | -------------------------------------------------------------------------------- /src/test/058_switch_index_payload.zig: -------------------------------------------------------------------------------- 1 | const x = switch (1) { 1 => |i| i, else => 0 }; 2 | 3 | -------------------------------------------------------------------------------- /src/test/003_var_const_and_call.zig: -------------------------------------------------------------------------------- 1 | fn f() void { 2 | const x = 42; 3 | var y = 7; 4 | f(); 5 | } 6 | -------------------------------------------------------------------------------- /src/test/024_mixed_toplevel.zig: -------------------------------------------------------------------------------- 1 | const x = 1; 2 | fn a() void {} 3 | pub var y = 2; 4 | fn b() void { return; } 5 | -------------------------------------------------------------------------------- /src/test/030_nested_containers.zig: -------------------------------------------------------------------------------- 1 | const A = struct { 2 | inner: struct { x: i32 }, 3 | u: union { a: i8 }, 4 | }; 5 | 6 | -------------------------------------------------------------------------------- /src/test/012_comments_between_decls.zig: -------------------------------------------------------------------------------- 1 | fn a() void {} 2 | // spacing comment 3 | fn b() void { // inline 4 | // body comment 5 | } 6 | -------------------------------------------------------------------------------- /src/test/011_line_comments.zig: -------------------------------------------------------------------------------- 1 | // top comment 2 | fn f() void { // trailing comment 3 | // inner comment 4 | const x = 1; // after stmt 5 | // 6 | } 7 | -------------------------------------------------------------------------------- /test.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn main() !void { 4 | var x: i32 = 42; 5 | const y = 100; 6 | var sum = x + y; 7 | 8 | std.debug.print("Hello, world!\n", .{}); 9 | std.debug.print("Sum is: {}\n", .{sum}); 10 | } -------------------------------------------------------------------------------- /src/root.zig: -------------------------------------------------------------------------------- 1 | comptime { 2 | @setEvalBranchQuota(200000); 3 | _ = @import("peg.zig"); 4 | _ = @import("vm.zig"); 5 | _ = @import("packrat_test.zig"); 6 | } 7 | 8 | pub const ziglang = @import("ziggrammar.zig"); 9 | pub const peg = @import("peg.zig"); 10 | pub const vm = @import("vm.zig"); 11 | pub const trace = @import("trace.zig"); 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Zig build artifacts 2 | zig-out/ 3 | zig-cache/ 4 | .zig-cache/ 5 | 6 | # Debug and temporary files 7 | *.air 8 | *.asm 9 | *.ast 10 | *.c 11 | *.ir 12 | *.ll 13 | *.o 14 | *.s 15 | *.zir 16 | debug.log 17 | liveness.log 18 | *~ 19 | 20 | # Test files 21 | test_*.zig 22 | simple*.zig 23 | 24 | # PEG bytecode dumps 25 | tmp/ 26 | *.peg 27 | 28 | # VS Code 29 | .vscode/ 30 | 31 | # Executables 32 | /parse 33 | /bug 34 | /vm_loop_demo 35 | -------------------------------------------------------------------------------- /vm_loop_demo.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const peg = @import("src/peg.zig"); 3 | const vm_mod = @import("src/vm.zig"); 4 | 5 | pub fn main() !void { 6 | const GrammarType = peg.demoGrammar; 7 | const VM = vm_mod.VM(GrammarType); 8 | 9 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 10 | defer arena.deinit(); 11 | 12 | const allocator = arena.allocator(); 13 | 14 | var parser = try VM.initAlloc("[[1] [2]]", allocator, 64, 64, 512); 15 | defer parser.deinit(allocator); 16 | 17 | try parser.run(); 18 | } 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (Expat) 2 | 3 | Copyright (c) Mikael Brockman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const zisp = @import("zisp"); 3 | const pegvm = zisp.vm; 4 | const peg = zisp.peg; 5 | const ziglang = zisp.ziglang; 6 | const trace = zisp.trace; 7 | 8 | pub fn main() !void { 9 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 10 | defer arena.deinit(); 11 | const allocator = arena.allocator(); 12 | 13 | const args = try std.process.argsAlloc(allocator); 14 | defer std.process.argsFree(allocator, args); 15 | 16 | if (args.len == 2) { 17 | var stdoutbuf: [4096]u8 = undefined; 18 | const stdout_file = std.fs.File.stdout(); 19 | var stdout_writer = stdout_file.writer(&stdoutbuf); 20 | const stdout = &stdout_writer.interface; 21 | defer stdout.flush() catch {}; 22 | 23 | const tty = std.Io.tty.detectConfig(stdout_file); 24 | 25 | var vm = try pegvm.VM(ziglang.ZigGrammar).initAlloc(args[1], allocator, 64, 64, 512); 26 | defer vm.deinit(allocator); 27 | 28 | try trace.traceFrom(&vm, stdout, tty, .Root); 29 | try trace.dumpAst(&vm, stdout, tty); 30 | try trace.dumpForest(&vm, stdout, tty, allocator, .FnDecl); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /docs/io_writer.md: -------------------------------------------------------------------------------- 1 | # Introducing `std.Io.Writer` 2 | 3 | Zig's new `std.Io.Writer` interface replaces the old generic writers. The buffer now lives 4 | in the interface, providing a concrete, optimizer‑friendly stream abstraction with 5 | precisely defined error sets. 6 | 7 | ## Creating a writer 8 | 9 | A writer is created by giving a buffer to an implementation. The buffer size controls 10 | how much data can be staged before a flush. For example, to write to stdout: 11 | 12 | ```zig 13 | var stdout_buffer: [1024]u8 = undefined; 14 | var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); 15 | const stdout: *std.Io.Writer = &stdout_writer.interface; 16 | 17 | try stdout.print("hello world\n", .{}); 18 | try stdout.flush(); 19 | ``` 20 | 21 | For file I/O the pattern is similar: 22 | 23 | ```zig 24 | var buffer: [4096]u8 = undefined; 25 | var file_writer = file.writer(&buffer); 26 | try file_writer.interface.print("some data\n", .{}); 27 | try file_writer.interface.flush(); 28 | ``` 29 | 30 | ## Writing bytes 31 | 32 | `std.Io.Writer` provides several ways to send data: 33 | 34 | * `writeAll` writes a slice of bytes. 35 | * `print` performs formatted output. 36 | * `splatBytesAll` repeats a pattern without copying each byte. In 37 | `src/trace.zig` this draws the call stack: 38 | 39 | ```zig 40 | try writer.splatBytesAll("│", machine.calls.items.len + 1); 41 | ``` 42 | 43 | ## Advanced features 44 | 45 | Writers can propagate high level operations: 46 | 47 | * *Splatting* writes repeated patterns without allocating. 48 | * *sendFile* transfers data directly between file descriptors when available. 49 | * The buffer can be rebased to preserve unread bytes. 50 | * `fixed` constructs a writer that fails once the buffer is full, useful for 51 | writing into a fixed array. 52 | 53 | ## Don't forget to flush 54 | 55 | Buffered writers require an explicit `flush` to ensure all data reaches the 56 | underlying sink. This repository uses `flush` after dumping bytecode to files and 57 | on stdout to guarantee complete output. 58 | 59 | ```zig 60 | try stdout.flush(); 61 | ``` 62 | 63 | The concrete `std.Io.Writer` API encourages reusable stream code without 64 | leaking implementation details, while providing convenient methods and 65 | precise error reporting. 66 | -------------------------------------------------------------------------------- /src/trace_demo.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const peg = @import("peg.zig"); 3 | const vm = @import("vm.zig"); 4 | const trace = @import("trace.zig"); 5 | 6 | // Simple backtracking grammar to demonstrate memoization 7 | const DemoGrammar = struct { 8 | const R = std.meta.DeclEnum(@This()); 9 | 10 | // S ::= A 'x' | A 'y' 11 | pub fn S( 12 | _: union(enum) { 13 | ax: struct { 14 | a: peg.Call(R.A), 15 | x: peg.CharSet("x"), 16 | }, 17 | ay: struct { 18 | a: peg.Call(R.A), 19 | y: peg.CharSet("y"), 20 | }, 21 | }, 22 | ) void {} 23 | 24 | // A ::= 'a' A | 'a' 25 | pub fn A( 26 | _: union(enum) { 27 | recursive: struct { 28 | a: peg.CharSet("a"), 29 | rest: peg.Call(R.A), 30 | }, 31 | base: peg.CharSet("a"), 32 | }, 33 | ) void {} 34 | }; 35 | 36 | pub fn main() !void { 37 | const TestVM = vm.VM(DemoGrammar); 38 | 39 | var gpa = std.heap.GeneralPurposeAllocator(.{}){}; 40 | defer _ = gpa.deinit(); 41 | const allocator = gpa.allocator(); 42 | 43 | var stdout_buf: [4096]u8 = undefined; 44 | const stdout_file = std.fs.File.stdout(); 45 | var stdout_writer = stdout_file.writer(&stdout_buf); 46 | const stdout = &stdout_writer.interface; 47 | const tty = std.Io.tty.detectConfig(stdout_file); 48 | 49 | const input = "aaay"; 50 | 51 | // Create VM and trace without memoization 52 | var machine1 = try TestVM.initAlloc(input, allocator, 16, 16, 256); 53 | defer machine1.deinit(allocator); 54 | try trace.trace(&machine1, stdout, tty); 55 | 56 | // Create VM with memoization and trace 57 | var machine2 = try TestVM.initAlloc(input, allocator, 16, 16, 256); 58 | defer machine2.deinit(allocator); 59 | var memo = TestVM.MemoTable.init(allocator); 60 | defer memo.deinit(); 61 | machine2.memo = &memo; 62 | try trace.trace(&machine2, stdout, tty); 63 | 64 | // Show the difference in step counts 65 | const no_memo = try TestVM.countSteps(input, allocator); 66 | const with_memo = try TestVM.countStepsWithMemo(input, allocator); 67 | 68 | try stdout.print("\nSummary:\n", .{}); 69 | try stdout.print(" Without memoization: {d} steps\n", .{no_memo}); 70 | try stdout.print(" With memoization: {d} steps ({d} saved)\n", .{with_memo.steps, no_memo - with_memo.steps}); 71 | try stdout.print(" Cache hits: {d}\n", .{with_memo.hits}); 72 | 73 | try stdout.flush(); 74 | } 75 | -------------------------------------------------------------------------------- /src/ansitty.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub const SGR = struct { 4 | attribute: ?Attr = null, 5 | fgcolor: ?Color = null, 6 | bgcolor: ?Color = null, 7 | 8 | pub const Attr = enum(u8) { 9 | reset = 0, 10 | bold = 1, 11 | dim = 2, 12 | italic = 3, 13 | underline = 4, 14 | blink = 5, 15 | reverse = 7, 16 | }; 17 | 18 | pub const Color = enum(u8) { 19 | black = 30, 20 | red = 31, 21 | green = 32, 22 | yellow = 33, 23 | blue = 34, 24 | magenta = 35, 25 | cyan = 36, 26 | white = 37, 27 | _, 28 | 29 | pub fn bright(self: Color) Color { 30 | return @enumFromInt(@intFromEnum(self) + 60); 31 | } 32 | 33 | pub fn background(self: Color) Color { 34 | return @enumFromInt(@intFromEnum(self) + 10); 35 | } 36 | }; 37 | 38 | pub fn fg(color: Color) SGR { 39 | return .{ .fgcolor = color }; 40 | } 41 | 42 | pub fn bright(sgr: SGR) SGR { 43 | var next = sgr; 44 | if (next.fgcolor) |color| 45 | next.fgcolor = color.bright(); 46 | return next; 47 | } 48 | 49 | pub fn on(sgr: SGR, color: Color) SGR { 50 | var next = sgr; 51 | next.bgcolor = color; 52 | return next; 53 | } 54 | 55 | pub fn attr(attribute: Attr) SGR { 56 | return .{ .attribute = attribute }; 57 | } 58 | 59 | pub fn reset() SGR { 60 | return attr(.reset); 61 | } 62 | 63 | pub fn bold(sgr: SGR) SGR { 64 | var next = sgr; 65 | next.attribute = .bold; 66 | return next; 67 | } 68 | 69 | pub fn dim(sgr: SGR) SGR { 70 | var next = sgr; 71 | next.attribute = .dim; 72 | return next; 73 | } 74 | 75 | pub fn write(self: SGR, writer: *std.Io.Writer) !void { 76 | var first = true; 77 | try writer.writeAll("\x1b["); 78 | if (self.attribute) |attribute_value| { 79 | try writer.print("{d}", .{attribute_value}); 80 | first = false; 81 | } 82 | if (self.fgcolor) |x| { 83 | if (!first) try writer.writeAll(";"); 84 | try writer.print("{d}", .{x}); 85 | first = false; 86 | } 87 | if (self.bgcolor) |x| { 88 | if (!first) try writer.writeAll(";"); 89 | try writer.print("{d}", .{x.background()}); 90 | } 91 | try writer.writeAll("m"); 92 | } 93 | }; 94 | 95 | pub fn ColorPrinter(comptime StyleEnum: type) type { 96 | return struct { 97 | const Self = @This(); 98 | pub const Theme = std.EnumMap(StyleEnum, SGR); 99 | 100 | writer: *std.Io.Writer, 101 | tty: std.Io.tty.Config, 102 | theme: Theme, 103 | 104 | pub fn init(writer: *std.Io.Writer, tty: std.Io.tty.Config, theme: Theme) Self { 105 | return .{ .writer = writer, .tty = tty, .theme = theme }; 106 | } 107 | 108 | pub fn print( 109 | self: *Self, 110 | style: StyleEnum, 111 | comptime fmt: []const u8, 112 | args: anytype, 113 | ) !void { 114 | try self.setStyle(style); 115 | defer self.reset() catch {}; 116 | try self.writer.print(fmt, args); 117 | } 118 | 119 | pub fn setStyle(self: *Self, style: StyleEnum) !void { 120 | if (self.tty == .escape_codes) { 121 | if (self.theme.get(style)) |sgr| { 122 | try sgr.write(self.writer); 123 | } 124 | } 125 | } 126 | 127 | pub fn reset(self: *Self) !void { 128 | if (self.tty == .escape_codes) { 129 | try SGR.reset().write(self.writer); 130 | } 131 | } 132 | }; 133 | } 134 | 135 | pub const TreePrinter = struct { 136 | treesplat: BlazingFastTreeSplat = .empty, 137 | writer: *std.Io.Writer, 138 | 139 | pub fn init(writer: *std.Io.Writer) TreePrinter { 140 | return .{ 141 | .writer = writer, 142 | }; 143 | } 144 | 145 | pub fn printPrefix(self: *TreePrinter, is_last: bool) !void { 146 | try self.treesplat.show(self.writer, !is_last); 147 | } 148 | 149 | pub fn push(self: *TreePrinter, has_more: bool) !void { 150 | try self.treesplat.push(has_more); 151 | } 152 | 153 | pub fn pop(self: *TreePrinter) void { 154 | self.treesplat.pop(); 155 | } 156 | }; 157 | 158 | const BlazingFastTreeSplat = struct { 159 | levels: std.bit_set.IntegerBitSet(N) = std.bit_set.IntegerBitSet(N).initEmpty(), 160 | len: std.math.IntFittingRange(0, N) = 0, 161 | 162 | const N = 32; 163 | const Writer = std.Io.Writer; 164 | 165 | pub const empty = @This(){}; 166 | 167 | const pattern_a: [4]u8 = [4]u8{ 0xe2, 0x80, 0x80, 0x20 }; // "\xe2\x80\x80 " 168 | const pattern_b: [4]u8 = [4]u8{ 0xe2, 0x94, 0x82, 0x20 }; // "│ " 169 | 170 | pub fn writeUtf8Prefix( 171 | w: *Writer, 172 | bits: std.bit_set.IntegerBitSet(N), 173 | len: std.math.IntFittingRange(0, N), 174 | ) !void { 175 | const n = @as(usize, @intCast(len)) * 4; 176 | const buffer = try w.writableSlice(n); 177 | for (0..len) |i| { 178 | const pattern = if (bits.isSet(i)) &pattern_b else &pattern_a; 179 | @memcpy(buffer[i * 4 ..][0..4], pattern); 180 | } 181 | } 182 | 183 | pub fn show(self: @This(), writer: *Writer, more: bool) !void { 184 | try writeUtf8Prefix(writer, self.levels, self.len); 185 | if (self.len > 0) { 186 | try writer.writeAll(if (!more) "└─" else "├─"); 187 | } 188 | } 189 | 190 | pub fn push(self: *@This(), more: bool) !void { 191 | if (self.len + 1 >= N) return error.OutOfMemory; 192 | self.levels.setValue(self.len, more); 193 | self.len += 1; 194 | } 195 | 196 | pub fn pop(self: *@This()) void { 197 | self.len -= 1; 198 | } 199 | 200 | test "hehe" { 201 | var buffer: [1024]u8 = undefined; 202 | var w = std.Io.Writer.fixed(&buffer); 203 | var bits = std.bit_set.IntegerBitSet(N).initEmpty(); 204 | bits.set(0); 205 | bits.set(3); 206 | 207 | try writeUtf8Prefix(&w, bits, 4); 208 | try std.testing.expectEqualStrings("│ \xe2\x80\x80 \xe2\x80\x80 │ ", w.buffered()); 209 | } 210 | 211 | test "hehe 2" { 212 | var buffer: [1024]u8 = undefined; 213 | var w = std.Io.Writer.fixed(&buffer); 214 | 215 | var tree = @This().empty; 216 | try tree.push(true); 217 | try tree.push(false); 218 | try tree.push(false); 219 | try tree.push(true); 220 | try tree.show(&w, true); 221 | 222 | try std.testing.expectEqualStrings("│ \xe2\x80\x80 \xe2\x80\x80 │ ├─", w.buffered()); 223 | } 224 | }; 225 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zisp: Compile-Time PEG Experiments in Zig 2 | 3 | `zisp` is a proof of concept that asks how far Zig's compile-time machinery and the new labeled `switch` `continue` syntax can push parser generation. The project starts from high-level PEG (Parsing Expression Grammar) declarations and lowers them, at compile time, into tightly-specialized VM loops that read more like hand-written interpreters than generic parser combinators. 4 | 5 | ## Why this exists 6 | 7 | The repository doubles as a playground for a few ideas: 8 | 9 | - **`comptime`-driven codegen** – Grammar rules are analysed and expanded during compilation, producing concrete bytecode tables and AST layouts before the program ever runs. 10 | - **Switch-label `continue`** – The VM core relies on Zig 0.15's ability to `continue :vm next_ip` directly from inside nested control flow, giving a threaded-interpreter style loop without manual `goto`s. 11 | - **Runtime that still feels ergonomic** – Even with all the specialization, the public API stays close to "declare a grammar, parse a buffer, walk a typed AST". 12 | - **Transparency of the generated code** – We want to be able to inspect the lowered form easily (LLVM IR, assembly, AST dumps) and reason about the cost model. 13 | 14 | ## Repo layout 15 | 16 | - `src/peg.zig` – Grammar DSL, compile-time compilation of PEG rules, and AST helpers. 17 | - `src/vm.zig` – The bytecode interpreter/VM with loop-mode execution using labeled `switch` `continue`. 18 | - `src/main.zig` – CLI harness that exercises the parser and prints traces/ASTs. 19 | - `docs/vm-loop-llvm.md` – Walkthrough of how to force Zig/LLVM to emit the specialized loop for `demoGrammar`. 20 | - `vm_loop_demo.zig` – Minimal driver used by the docs to instantiate the VM in isolation. 21 | 22 | ## Getting started 23 | 24 | You need Zig 0.15.1 or newer (the build script uses the labeled-`continue` feature). The usual workflow: 25 | 26 | ```bash 27 | zig build run # build the CLI and run it 28 | zig build test # run the grammar + VM unit tests 29 | ``` 30 | 31 | The CLI parses a miniature Zig subset (`src/zigmini`). Today that grammar still rides on the older `pegvm.zig` backend simply because it hasn't been ported over yet, but the shape mirrors the new `peg.zig` + `vm.zig` pipeline. For a quick feel of the existing system, run `zig run src/peg.vm`—that’s the main entry point that prints the bytecode, step trace, and AST using the original VM. Try passing `--dump-pegcode` for a readable dump of the generated bytecode. 32 | 33 | #### Sample `zig run src/peg.zig` 34 | 35 | Running the grammar module directly prints the compiled bytecode, a step-by-step trace for a demo input, and the resulting typed forest: 36 | 37 | ``` 38 | $ zig run src/peg.zig 39 | 40 | &Value: 41 | 0 push ->3 42 | 1 call ->5 43 | 2 drop ->4 44 | 3 call ->15 45 | 4 done 46 | 47 | &Integer: 48 | 5 open 49 | 6 read 1..9 50 | 7 next 51 | 8 open 52 | 9 read 0..9* 53 | 10 shut 54 | ... 55 | 56 | Parsing: "[[1] [2]]" 57 | 58 | [ | 0000 push ->3 59 | | 0001 call ->5 60 | |-| 0005 open 61 | ... 62 | 63 | ✓ (156 steps) 64 | Array [0..16) "[[1] [2] [4096]]" 65 | └─values: 3 items 66 | ├─[0] Value: .array -> Integer d='1' 67 | ├─[1] Value: .array -> Integer d='2' 68 | └─[2] Value: .array -> Integer d='4', ds="096" 69 | ``` 70 | 71 | ### Forest shape 72 | 73 | The VM builds a "typed forest": every grammar rule owns a dedicated growable array, and siblings for a rule end up stored contiguously. That layout makes it cheap to gather a rule’s results and to reinterpret slices as strongly-typed structs/unions when you walk the AST later. In the demo run the root rule is `Array`, whose `values` field is emitted as a `Kleene` list of `Value` nodes; each `Value` lowers to either an `Integer` or another `Array`, and you can see the nesting clearly in the forest dump: 74 | 75 | ``` 76 | Array: 77 | └─values: 3 items 78 | ├─[0] Value: .array 79 | │ └─Array: 80 | │ └─values: 1 items 81 | │ └─[0] Value: .integer 82 | │ └─Integer: 83 | │ ├─d: '1' [2] 84 | │ └─ds: (empty) 85 | ├─[1] Value: .array 86 | │ └─Array: 87 | │ └─values: 1 items 88 | │ └─[0] Value: .integer 89 | │ └─Integer: 90 | │ ├─d: '2' [6] 91 | │ └─ds: (empty) 92 | └─[2] Value: .array 93 | └─Array: 94 | └─values: 1 items 95 | └─[0] Value: .integer 96 | └─Integer: 97 | ├─d: '4' [10] 98 | └─ds: "096" [11..14) 99 | ``` 100 | 101 | The full trace (with detailed stack annotations and AST layout) is available any time you want to sanity-check how a grammar runs. 102 | 103 | ### Inspecting the generated code 104 | 105 | To look directly at the loop-mode codegen for the included `demoGrammar`, follow the steps in `docs/vm-loop-llvm.md`. The short version: 106 | 107 | ```bash 108 | zig build-exe vm_loop_demo.zig \ 109 | -O ReleaseFast -fllvm \ 110 | -femit-llvm-ir=zig-out/vm_loop_demo.ll \ 111 | -femit-asm=zig-out/vm_loop_demo.s 112 | ``` 113 | 114 | The emitted `.ll` and `.s` highlight how the interpreter turns into a computed-goto state machine with literal bitsets for character classes. 115 | 116 | ### How specialization actually looks 117 | 118 | Because the VM bytecode is baked during `comptime`, the “interpreter” that ships in the binary already knows the exact instruction stream. `VM(G).next` gets monomorphized for the grammar, the opcode array becomes a constant, and the main loop lowers to one giant `switch`/jump-table keyed on the instruction pointer. In other words we don’t even switch on an opcode enum at runtime; we switch on the literal IP and jump straight to the inlined code for that specific instruction. A toy sketch of the shape you get looks like this: 119 | 120 | ```zig 121 | // Pseudocode, but this is the flavour LLVM ends up with. 122 | vm: switch (ip) { 123 | 0 => { // read '[' 124 | if (self.text[self.sp] != '[') return error.ParseFailed; 125 | self.sp += 1; 126 | continue :vm 1; 127 | }, 128 | 1 => { // call Skip rule 129 | try self.calls.append(.{ .return_ip = 2, .target_ip = 31, ... }); 130 | continue :vm 31; 131 | }, 132 | 2 => { // next field, etc. 133 | ...; 134 | continue :vm 3; 135 | }, 136 | else => return; 137 | } 138 | ``` 139 | 140 | Every case carries the rule metadata, call targets, character sets, and struct bookkeeping as compile-time constants. In release builds the control flow resembles an assembler hand-written threaded interpreter for a program that was known when you built the binary. The deep dive in `docs/vm-loop-llvm.md` shows the LLVM view, but even at the Zig level you can reason about the VM as a tightly unrolled state machine specialized to the grammar you compiled. 141 | 142 | ## Project status 143 | 144 | This is intentionally exploratory code. Expect breakage, rapid refactors, and plenty of TODOs around: 145 | 146 | - Enriching the grammar DSL with more PEG operators. 147 | - Experimenting with alternative backends (direct threaded code vs VM bytecode). 148 | - Measuring performance against other PEG implementations. 149 | - Refining the AST representation to reduce allocations. 150 | 151 | If you're curious about a specific angle—memoization strategies, labelled-switch ergonomics, or further `comptime` tricks—open an issue or hack on a branch. The more weird experiments, the better. 152 | 153 | ## License 154 | 155 | MIT. See `LICENSE` for details. 156 | -------------------------------------------------------------------------------- /src/packrat_test.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const peg = @import("peg.zig"); 3 | const vm = @import("vm.zig"); 4 | 5 | test "memoization saves steps on backtracking" { 6 | // Grammar: S ::= A 'x' | A 'y' 7 | // When parsing "aaa...y", first alternative parses A then fails on 'x', 8 | // second alternative re-parses A at THE SAME POSITION (0) 9 | // This is the CLASSIC memoization benefit case 10 | const BacktrackGrammar = struct { 11 | const R = std.meta.DeclEnum(@This()); 12 | 13 | pub const S = peg.Match(union(enum) { 14 | ax: peg.Call(.ax), 15 | ay: peg.Call(.ay), 16 | }); 17 | 18 | pub const ax = peg.Match(struct { 19 | a: peg.Call(R.A), 20 | x: peg.CharSet("x", .one), 21 | }); 22 | 23 | pub const ay = peg.Match(struct { 24 | a: peg.Call(R.A), 25 | y: peg.CharSet("y", .one), 26 | }); 27 | 28 | pub const A = peg.Match(union(enum) { 29 | recursive: peg.Call(.recursive), 30 | base: peg.Call(.base), 31 | }); 32 | 33 | pub const recursive = peg.Match(struct { 34 | a: peg.CharSet("a", .one), 35 | rest: peg.Call(R.A), 36 | }); 37 | 38 | pub const base = peg.CharSet("a", .one); 39 | }; 40 | 41 | const TestVM = vm.VM(BacktrackGrammar); 42 | 43 | // Test shows linear growth in savings as input grows 44 | const test_cases = [_]struct { input: [:0]const u8, no_memo: u32, with_memo: u32, saved: u32 }{ 45 | .{ .input = "ay", .no_memo = 46, .with_memo = 30, .saved = 16 }, 46 | .{ .input = "aay", .no_memo = 66, .with_memo = 40, .saved = 26 }, 47 | .{ .input = "aaay", .no_memo = 86, .with_memo = 50, .saved = 36 }, 48 | .{ .input = "aaaay", .no_memo = 106, .with_memo = 60, .saved = 46 }, 49 | }; 50 | 51 | for (test_cases) |tc| { 52 | const steps = try TestVM.countSteps(tc.input, std.testing.allocator); 53 | try std.testing.expectEqual(tc.no_memo, steps); 54 | 55 | const stats = try TestVM.countStepsWithMemo(tc.input, std.testing.allocator); 56 | try std.testing.expectEqual(tc.with_memo, stats.steps); 57 | try std.testing.expectEqual(@as(u32, 1), stats.hits); // Exactly one cache hit at position 0 58 | try std.testing.expectEqual(tc.saved, tc.no_memo - stats.steps); 59 | } 60 | } 61 | 62 | test "memoization caches both success and failure" { 63 | // Test that we cache both successful and failed parse attempts 64 | const CacheTestGrammar = struct { 65 | const R = std.meta.DeclEnum(@This()); 66 | 67 | pub const start = peg.Match(union(enum) { 68 | first: peg.Call(.first), 69 | second: peg.Call(.second), 70 | }); 71 | 72 | pub const first = peg.Match(struct { 73 | exp: peg.Call(R.expensive), 74 | x: peg.CharSet("x", .one), 75 | }); 76 | 77 | pub const second = peg.Match(struct { 78 | exp: peg.Call(R.expensive), 79 | y: peg.CharSet("y", .one), 80 | }); 81 | 82 | // Make it expensive so the benefit is clear 83 | pub const expensive = peg.Match(struct { 84 | a: peg.CharSet("a", .one), 85 | b: peg.CharSet("b", .one), 86 | c: peg.CharSet("c", .one), 87 | }); 88 | }; 89 | 90 | const TestVM = vm.VM(CacheTestGrammar); 91 | 92 | // "abcx" succeeds on first try, no cache benefit 93 | { 94 | const input = "abcx"; 95 | const steps = try TestVM.countSteps(input, std.testing.allocator); 96 | const stats = try TestVM.countStepsWithMemo(input, std.testing.allocator); 97 | 98 | try std.testing.expectEqual(@as(u32, 18), steps); 99 | try std.testing.expectEqual(@as(u32, 18), stats.steps); 100 | try std.testing.expectEqual(@as(u32, 0), stats.hits); // No backtracking, no cache hit 101 | } 102 | 103 | // "abcy" backtracks and re-uses cached parse of expensive() at position 0 104 | { 105 | const input = "abcy"; 106 | const steps = try TestVM.countSteps(input, std.testing.allocator); 107 | const stats = try TestVM.countStepsWithMemo(input, std.testing.allocator); 108 | 109 | try std.testing.expectEqual(@as(u32, 30), steps); 110 | try std.testing.expectEqual(@as(u32, 22), stats.steps); // Saved 4 steps 111 | try std.testing.expectEqual(@as(u32, 1), stats.hits); // One cache hit 112 | } 113 | } 114 | 115 | test "step count stability check" { 116 | // This test ensures we notice when VM or compiler changes affect performance 117 | // Update these values when making intentional changes 118 | 119 | const SimpleGrammar = struct { 120 | pub const start = peg.Match(struct { 121 | a: peg.CharSet("a", .one), 122 | b: peg.CharSet("b", .one), 123 | c: peg.CharSet("c", .one), 124 | }); 125 | }; 126 | 127 | const TestVM = vm.VM(SimpleGrammar); 128 | 129 | const steps = try TestVM.countSteps("abc", std.testing.allocator); 130 | try std.testing.expectEqual(@as(u32, 8), steps); 131 | } 132 | 133 | test "memoization works with nested rules" { 134 | // More complex grammar with nested rule calls that benefit from memoization 135 | const NestedGrammar = struct { 136 | const R = std.meta.DeclEnum(@This()); 137 | 138 | // start ::= (expr '+') | (expr '-') 139 | pub const start = peg.Match(union(enum) { 140 | plus: peg.Call(.plus), 141 | minus: peg.Call(.minus), 142 | }); 143 | 144 | pub const plus = peg.Match(struct { 145 | e: peg.Call(R.expr), 146 | op: peg.CharSet("+", .one), 147 | }); 148 | 149 | pub const minus = peg.Match(struct { 150 | e: peg.Call(R.expr), 151 | op: peg.CharSet("-", .one), 152 | }); 153 | 154 | // expr ::= term term* 155 | pub const expr = peg.Match(struct { 156 | first: peg.Call(R.term), 157 | rest: peg.Kleene(R.term), 158 | }); 159 | 160 | // term ::= 'a' | 'b' 161 | pub const term = peg.Match(union(enum) { 162 | a: peg.Call(.term_a), 163 | b: peg.Call(.term_b), 164 | }); 165 | 166 | pub const term_a = peg.CharSet("a", .one); 167 | pub const term_b = peg.CharSet("b", .one); 168 | }; 169 | 170 | const TestVM = vm.VM(NestedGrammar); 171 | 172 | // Input "ab-" will: 173 | // 1. Try expr '+' (parse expr successfully, fail on '+') 174 | // 2. Backtrack and try expr '-' (should use cached expr result) 175 | { 176 | const input = "ab-"; 177 | const steps = try TestVM.countSteps(input, std.testing.allocator); 178 | const stats = try TestVM.countStepsWithMemo(input, std.testing.allocator); 179 | 180 | // Verify memoization saves steps 181 | try std.testing.expect(stats.steps < steps); 182 | try std.testing.expect(stats.hits > 0); 183 | } 184 | } 185 | 186 | // REMOVED: The pathological test with kleene star was too complex. 187 | // The interaction between kleene star backtracking and memoization 188 | // makes the step counts hard to predict and maintain. 189 | 190 | test "memoization disabled by default" { 191 | // Verify that memoization is opt-in, not automatic 192 | const SimpleGrammar = struct { 193 | pub const start = peg.CharSet("a", .one); 194 | }; 195 | 196 | const TestVM = vm.VM(SimpleGrammar); 197 | 198 | // parse() should not use memoization 199 | try TestVM.parse("a", std.testing.allocator); 200 | 201 | // parseWithMemo() should use memoization 202 | try TestVM.parseWithMemo("a", std.testing.allocator); 203 | 204 | // Both should succeed, memoization is just an optimization 205 | } 206 | -------------------------------------------------------------------------------- /docs/vm-loop-llvm.md: -------------------------------------------------------------------------------- 1 | # Inspecting the `demoGrammar` VM Loop in LLVM IR 2 | 3 | This note shows how to force Zig to emit LLVM IR for the `VM(.Loop)` interpreter that `src/vm.zig` builds around the `demoGrammar` rules, and what the optimized lowering looks like once LLVM sees the fully monomorphized code. The snapshots below are abridged for readability, but every symbol referenced comes straight out of the real IR generated in `ReleaseFast` mode. 4 | 5 | ## Build the sample once 6 | 7 | 1. Keep the standalone driver that instantiates the VM with the demo grammar (`vm_loop_demo.zig`): 8 | ```zig 9 | const std = @import("std"); 10 | const peg = @import("src/peg.zig"); 11 | const vm_mod = @import("src/vm.zig"); 12 | 13 | pub fn main() !void { 14 | const VM = vm_mod.VM(peg.demoGrammar); 15 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 16 | defer arena.deinit(); 17 | const allocator = arena.allocator(); 18 | 19 | var parser = try VM.initAlloc("[[1] [2]]", allocator, 64, 64, 512); 20 | defer parser.deinit(allocator); 21 | try parser.run(); 22 | } 23 | ``` 24 | 25 | 2. Compile with LLVM output enabled: 26 | ```bash 27 | zig build-exe vm_loop_demo.zig \ 28 | -O ReleaseFast \ 29 | -fllvm \ 30 | -femit-llvm-ir=zig-out/vm_loop_demo.ll \ 31 | -femit-asm=zig-out/vm_loop_demo.s 32 | ``` 33 | 34 | The `.ll` file is optimized IR. `ReleaseFast` keeps the tight loop, but runs the usual instcombine/vectorize passes, so you are looking at realistic codegen rather than SSA straight from the front end. 35 | 36 | ## Monomorphized VM state 37 | 38 | The first few lines already show that the interpreter has been fully specialized to the grammar. There are no generics left—every type is spelling out `src.vm.VM(src.peg.demoGrammar)` and the layout matches the fields in `src/vm.zig`: 39 | 40 | ```llvm 41 | %"src.vm.VM(src.peg.demoGrammar)" = type { 42 | { ptr, i64 }, ; text slice 43 | %"array_list.Aligned(src.vm.VM(src.peg.demoGrammar).SaveFrame,null)", 44 | %"array_list.Aligned(src.vm.VM(src.peg.demoGrammar).CallFrame,null)", 45 | %"array_list.Aligned(src.ast.NodeType,null)", 46 | %"array_list.Aligned(u32,null)", ; child stack 47 | %"array_list.Aligned(src.vm.VM(src.peg.demoGrammar).StructuralFrame,null)", 48 | ptr, ; optional memo table 49 | i32, { i32, i8, [3 x i8] }, [4 x i8] 50 | } 51 | ``` 52 | 53 | Every support type follows suit. `SaveFrame`, `CallFrame`, `StructuralFrame`, and all helper `ArrayList` instantiations are concrete, so LLVM optimizes under fixed offsets and sizes instead of opaque pointers. 54 | 55 | ## Dispatch becomes a computed goto 56 | 57 | The VM loop body (`VM.next`) lowers to a single state-machine function with a dense jump table. The real symbol name is `"src.vm.VM(src.peg.demoGrammar).next__anon_3053"`, and the entry block pulls the next opcode by indexing into a static array of blockaddresses: 58 | 59 | ```llvm 60 | @__jmptab_4670 = internal unnamed_addr constant [35 x ptr] [ 61 | ptr blockaddress(@"...next__anon_3053", %Case), 62 | ptr blockaddress(@"...next__anon_3053", %Case1), 63 | ... 64 | ] 65 | 66 | Entry: 67 | %ip_ok = icmp ult i32 %ip, 35 68 | br i1 %ip_ok, label %dispatch, label %parse_fail 69 | 70 | dispatch: 71 | %slot = getelementptr inbounds [35 x ptr], ptr @__jmptab_4670, i64 %ip 72 | %dest = load ptr, ptr %slot 73 | indirectbr ptr %dest, 74 | [label %Case, label %Case1, label %Case2, ... label %Case34] 75 | ``` 76 | 77 | What used to be a giant Zig `switch (OP)` is now LLVM’s `indirectbr`. Each `CaseN` recipe implements an opcode from `src/vm.zig:234` with loop-mode rewrites (no returning IP, just `continue :vm`). 78 | 79 | ## Character sets collapse to bit masks 80 | 81 | `demoGrammar` only uses four character classes: `[1-9]`, `[0-9]`, `'['`, `']'`, and whitespace. LLVM turns those into constant bitsets (four 64-bit words) that live next to the jump table: 82 | 83 | ```llvm 84 | @1 = private unnamed_addr constant [4 x i64] [ ; "1".."9" 85 | 287667426198290432, 0, 0, 0 86 | ] 87 | @2 = private unnamed_addr constant [4 x i64] [ ; "0".."9" 88 | 287948901175001088, 0, 0, 0 89 | ] 90 | @3 = private unnamed_addr constant [4 x i64] [ ; '[[' 91 | 0, 134217728, 0, 0 92 | ] 93 | @4 = private unnamed_addr constant [4 x i64] [ ; "]" 94 | 0, 536870912, 0, 0 95 | ] 96 | @5 = private unnamed_addr constant [4 x i64] [ ; whitespace set 97 | 4294977024, 0, 0, 0 98 | ] 99 | ``` 100 | 101 | A typical opcode, the `CharRange('1','9', .one)` check inside `Integer`, expands to a mask lookup and branch: 102 | 103 | ```llvm 104 | Case6: ; read digit 1-9 105 | %byte = load i8, ptr %text_ptr 106 | %word_idx = lshr i64 %byte, 6 107 | %mask_word = load i64, ptr getelementptr([4 x i64], ptr @1, i64 0, i64 %word_idx) 108 | %bit_idx = and i64 %byte, 63 109 | %probe_bit = shl nuw i64 1, %bit_idx 110 | %is_match = icmp ne i64 (and i64 %probe_bit, %mask_word), 0 111 | br i1 %is_match, label %consume_digit, label %fail_digit 112 | ``` 113 | 114 | No helper calls needed—the compile-time predicates became raw bitmath. 115 | 116 | ## Rule calls and AST nodes write constant payloads 117 | 118 | When an opcode finishes a rule (`.done` in Zig), LLVM emits direct stores into the call-frame arrays. The value `3` below is the enum tag for `.Skip`, and all offsets line up with the `StructFrame`/`SaveFrame` layouts from the VM definition: 119 | 120 | ```llvm 121 | Case: ; OP.done 122 | %save_len = load i64, ptr %saves.len_ptr 123 | %save_slot = getelementptr %"...SaveFrame", ptr %saves.buffer, i64 %save_len 124 | store i64 %start_sp, ptr %save_slot ; frame.start_sp 125 | store i64 %return_ip, ptr (%save_slot + 8) 126 | store i64 %node_len, ptr (%save_slot + 16) 127 | store i32 3, ptr (%save_slot + 24) ; rule enum (Skip) 128 | store i32 %struct_lo, ptr (%save_slot + 28) 129 | store i32 %child_lo, ptr (%save_slot + 32) 130 | br label %Case1 ; continue with next opcode 131 | ``` 132 | 133 | The structural helpers (`open`, `next`, `shut`) similarly spill child slices into preallocated arrays, with their field tags baked in (`store i8 5` corresponds to `.field` nodes for struct fields). Because all of these numbers originate from `demoGrammar`, nothing is indirect. 134 | 135 | ## Memoization path is concrete too 136 | 137 | Even optional features like memo tables collapse to concrete hash-map calls: 138 | 139 | ```llvm 140 | Case17: ; memo lookup 141 | %memo_ptr = load ptr, ptr (%self + memo_offset) 142 | br i1 (icmp eq ptr %memo_ptr, null), label %skip_lookup, label %lookup 143 | 144 | lookup: 145 | %key_hash = tail call i64 @llvm.fshl.i64(i64 5, i64 %key_ip, i64 32) 146 | %tbl_slot = getelementptr [80 x %MemoEntry], ptr %memo_ptr, i64 %key_hash 147 | %entry = load %MemoEntry, ptr %tbl_slot 148 | ... 149 | ``` 150 | 151 | All the helper symbols (`hash_map.HashMapUnmanaged(src.vm.VM(src.peg.demoGrammar).MemoKey, ...)`) are grammar-qualified, so you can drop into them with `llvm-objdump` and correlate the fast path vs miss path exactly. 152 | 153 | ## Where to look next 154 | 155 | * `zig-out/vm_loop_demo.ll` — full optimized IR. Search for `next__anon` to jump straight into the VM loop, and use `CaseN` labels to navigate opcodes. 156 | * `zig-out/vm_loop_demo.s` — target-assembly view. On AArch64 the indirect jump turns into a `br xN` against the jump table, and the character-set probes become a couple of shifts and `tst`s. 157 | * `zig-out/vm_loop_demo` — the executable. `llvm-objdump -d zig-out/vm_loop_demo` mirrors what you see in `.s` but keeps symbol references intact if you prefer working from the binary. 158 | 159 | Because we compiled with `-fllvm`, any other LLVM tooling (e.g. `opt -analyze`, `llvm-mca`) can be aimed at the `.ll` file without replaying the Zig build. That makes it practical to iterate on VM changes while confirming what the interpreter looks like under real release builds. 160 | -------------------------------------------------------------------------------- /src/ast.zig: -------------------------------------------------------------------------------- 1 | // AST building infrastructure for PEG parsers 2 | // 3 | // This module provides: 4 | // - NodeType: Raw parse tree nodes (from VM) 5 | // - NodeState: Cursor for traversing nodes during AST construction 6 | // - Forest: Typed storage for built AST 7 | // - Helpers for building typed values from parse trees 8 | 9 | comptime { 10 | @setEvalBranchQuota(500000); 11 | } 12 | 13 | const std = @import("std"); 14 | 15 | // ============================================================================ 16 | // PARSE TREE NODE TYPE 17 | // ============================================================================ 18 | 19 | // NodeKind is recorded on every VM node so later stages can tell “real” rule calls 20 | // from the helper nodes created by structural opcodes. 21 | pub const NodeKind = enum(u8) { 22 | rule, // a grammar rule body produced by a call/done pair 23 | @"struct", // wrapper emitted by Struct.compile to group named fields 24 | field, // individual field wrapper inside the struct helper node 25 | maybe, // wrapper produced by Maybe.compile (optional payload) 26 | kleene, // wrapper produced by Kleene.compile (iterable payload) 27 | char_slice, // helper around a slice produced by kleene character ranges/sets 28 | }; 29 | 30 | /// Parse-tree node produced by the VM. 31 | /// 32 | /// Besides the explicit sibling pointers we capture the byte span and (when 33 | /// `kind == .rule`) which grammar rule produced the node. The `prev_sibling` 34 | /// field mirrors `next_sibling`, letting the VM unlink freshly-created nodes in 35 | /// O(1) when a backtrack rolls them back out of existence. 36 | pub const NodeType = struct { 37 | kind: NodeKind, 38 | rule_index: u32, 39 | start: u32, 40 | end: u32, 41 | first_child: ?u32, 42 | next_sibling: ?u32, 43 | prev_sibling: ?u32, 44 | parent: ?u32, 45 | }; 46 | 47 | // ============================================================================ 48 | // AST BUILDING TYPES 49 | // ============================================================================ 50 | 51 | pub const BuildContext = struct { 52 | text: []const u8, 53 | nodes: []const NodeType, 54 | positions: []const usize, 55 | }; 56 | 57 | pub const BuildError = error{ InvalidAst, UnsupportedPattern, OutOfMemory }; 58 | 59 | pub const NodeSlice = struct { offset: u32, len: u32 }; 60 | 61 | // ============================================================================ 62 | // NODE STATE - Cursor for traversing parse tree 63 | // ============================================================================ 64 | 65 | pub const NodeState = struct { 66 | pos: usize, 67 | end: usize, 68 | next_child: ?usize, 69 | 70 | pub fn init(node: NodeType) @This() { 71 | return .{ 72 | .pos = @intCast(node.start), 73 | .end = @intCast(node.end), 74 | .next_child = if (node.first_child) |fc| @intCast(fc) else null, 75 | }; 76 | } 77 | 78 | pub fn expectCall( 79 | self: *@This(), 80 | ctx: *const BuildContext, 81 | rule_index: u32, 82 | ) BuildError!u32 { 83 | const child_idx = self.next_child orelse return error.InvalidAst; 84 | const child = ctx.nodes[child_idx]; 85 | if (child.kind != .rule or child.rule_index != rule_index) { 86 | return error.InvalidAst; 87 | } 88 | 89 | const start_pos: usize = @intCast(child.start); 90 | const end_pos: usize = @intCast(child.end); 91 | 92 | if (self.pos != start_pos) { 93 | return error.InvalidAst; 94 | } 95 | 96 | self.pos = end_pos; 97 | self.next_child = childToIndex(child.next_sibling); 98 | 99 | const index = ctx.positions[child_idx]; 100 | return @intCast(index); 101 | } 102 | 103 | pub fn gatherNodeSlice( 104 | self: *@This(), 105 | ctx: *const BuildContext, 106 | rule_index: u32, 107 | ) BuildError!NodeSlice { 108 | var count: usize = 0; 109 | var first_index: usize = 0; 110 | var first = true; 111 | 112 | while (self.next_child) |child_idx| { 113 | const child = ctx.nodes[child_idx]; 114 | if (child.kind != .rule or child.rule_index != rule_index) { 115 | break; 116 | } 117 | 118 | const start_pos: usize = @intCast(child.start); 119 | const end_pos: usize = @intCast(child.end); 120 | if (self.pos != start_pos) { 121 | return error.InvalidAst; 122 | } 123 | 124 | const index = ctx.positions[child_idx]; 125 | if (first) { 126 | first_index = index; 127 | first = false; 128 | } else if (index != first_index + count) { 129 | return error.InvalidAst; 130 | } 131 | 132 | count += 1; 133 | self.pos = end_pos; 134 | self.next_child = childToIndex(child.next_sibling); 135 | } 136 | 137 | return .{ .offset = @intCast(first_index), .len = @intCast(count) }; 138 | } 139 | 140 | pub fn expectKind( 141 | self: *@This(), 142 | ctx: *const BuildContext, 143 | kind: NodeKind, 144 | ) BuildError!NodeState { 145 | const child_idx = self.next_child orelse return error.InvalidAst; 146 | const child = ctx.nodes[child_idx]; 147 | if (child.kind != kind) { 148 | return error.InvalidAst; 149 | } 150 | 151 | const start_pos: usize = @intCast(child.start); 152 | const end_pos: usize = @intCast(child.end); 153 | if (self.pos != start_pos) { 154 | return error.InvalidAst; 155 | } 156 | 157 | self.pos = end_pos; 158 | self.next_child = childToIndex(child.next_sibling); 159 | return NodeState.init(child); 160 | } 161 | }; 162 | 163 | // ============================================================================ 164 | // HELPER FUNCTIONS 165 | // ============================================================================ 166 | 167 | pub fn childToIndex(child: ?u32) ?usize { 168 | return if (child) |c| @intCast(c) else null; 169 | } 170 | 171 | pub fn stateNextBoundary( 172 | ctx: *const BuildContext, 173 | state: NodeState, 174 | ) BuildError!usize { 175 | if (state.next_child) |child_idx| { 176 | const start: usize = @intCast(ctx.nodes[child_idx].start); 177 | if (start > ctx.text.len) { 178 | return error.InvalidAst; 179 | } 180 | return start; 181 | } 182 | if (state.end > ctx.text.len) { 183 | return error.InvalidAst; 184 | } 185 | return state.end; 186 | } 187 | 188 | // Sort node indices for a rule so siblings (same parent) stay together. 189 | pub fn sortRuleGroup( 190 | nodes: []const NodeType, 191 | items: []usize, 192 | ) void { 193 | if (items.len <= 1) return; 194 | const Context = struct { 195 | nodes: []const NodeType, 196 | 197 | fn lessThan(self: @This(), lhs: usize, rhs: usize) bool { 198 | const na = self.nodes[lhs]; 199 | const nb = self.nodes[rhs]; 200 | 201 | const pa = na.parent; 202 | const pb = nb.parent; 203 | 204 | if (pa) |a| { 205 | if (pb) |b| { 206 | if (a == b) { 207 | // Same parent: sort by position 208 | if (na.start == nb.start) return lhs < rhs; 209 | return na.start < nb.start; 210 | } 211 | return a < b; 212 | } 213 | return false; 214 | } 215 | if (pb != null) return true; 216 | 217 | // No parents: sort by position 218 | if (na.start == nb.start) return lhs < rhs; 219 | return na.start < nb.start; 220 | } 221 | }; 222 | 223 | std.sort.block(usize, items, Context{ .nodes = nodes }, Context.lessThan); 224 | } 225 | 226 | // ============================================================================ 227 | // FOREST - Typed AST Storage 228 | // ============================================================================ 229 | 230 | pub fn Forest(comptime rules: type) type { 231 | const RuleEnum = std.meta.DeclEnum(rules); 232 | 233 | const Helpers = struct { 234 | fn RuleValueType(comptime rule: RuleEnum) type { 235 | const pattern = @field(rules, @tagName(rule)); 236 | return pattern; 237 | } 238 | 239 | fn NodeListType(comptime rule: RuleEnum) type { 240 | const ValueType = RuleValueType(rule); 241 | if (@sizeOf(ValueType) == 0) { 242 | return struct { 243 | items: []ValueType = &.{}, 244 | pub const empty = @This(){}; 245 | pub fn append(_: *@This(), _: std.mem.Allocator, _: ValueType) !void { 246 | return; 247 | } 248 | pub fn deinit(_: *@This(), _: std.mem.Allocator) void {} 249 | }; 250 | } 251 | return std.ArrayList(ValueType); 252 | } 253 | }; 254 | 255 | const ListsStruct = blk: { 256 | const rule_count = std.meta.tags(RuleEnum).len; 257 | var fields: [rule_count]std.builtin.Type.StructField = undefined; 258 | @setEvalBranchQuota(2000); 259 | inline for (std.meta.tags(RuleEnum), 0..) |rule_tag, i| { 260 | const ListType = Helpers.NodeListType(rule_tag); 261 | fields[i] = .{ 262 | .name = @tagName(rule_tag), 263 | .type = ListType, 264 | .default_value_ptr = null, 265 | .is_comptime = false, 266 | .alignment = @alignOf(ListType), 267 | }; 268 | } 269 | break :blk @Type(.{ .@"struct" = .{ 270 | .layout = .auto, 271 | .backing_integer = null, 272 | .fields = &fields, 273 | .decls = &.{}, 274 | .is_tuple = false, 275 | } }); 276 | }; 277 | 278 | return struct { 279 | const Self = @This(); 280 | 281 | lists: ListsStruct, 282 | 283 | pub fn init() Self { 284 | @setEvalBranchQuota(2000); 285 | var lists: ListsStruct = undefined; 286 | inline for (comptime std.meta.tags(RuleEnum)) |rule_tag| { 287 | @field(lists, @tagName(rule_tag)) = Helpers.NodeListType(rule_tag).empty; 288 | } 289 | return .{ .lists = lists }; 290 | } 291 | 292 | pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { 293 | inline for (comptime std.meta.tags(RuleEnum)) |rule_tag| { 294 | @field(self.lists, @tagName(rule_tag)).deinit(allocator); 295 | } 296 | } 297 | 298 | pub fn append( 299 | self: *Self, 300 | allocator: std.mem.Allocator, 301 | comptime rule: RuleEnum, 302 | value: Helpers.RuleValueType(rule), 303 | ) !u32 { 304 | if (comptime @sizeOf(Helpers.RuleValueType(rule)) == 0) { 305 | @compileError("Cannot append nodes for non-capturing rules"); 306 | } 307 | var list = &@field(self.lists, @tagName(rule)); 308 | try list.append(allocator, value); 309 | return @intCast(list.items.len - 1); 310 | } 311 | 312 | pub fn get( 313 | self: *const Self, 314 | comptime rule: RuleEnum, 315 | index: u32, 316 | ) *const Helpers.RuleValueType(rule) { 317 | return &@field(self.lists, @tagName(rule)).items[index]; 318 | } 319 | 320 | pub fn RuleValueType(comptime rule: RuleEnum) type { 321 | return Helpers.RuleValueType(rule); 322 | } 323 | }; 324 | } 325 | -------------------------------------------------------------------------------- /src/zig.peg.txt: -------------------------------------------------------------------------------- 1 | Root <- skip container_doc_comment? ContainerMembers eof 2 | 3 | # *** Top level *** 4 | ContainerMembers <- ContainerDeclaration* (ContainerField COMMA)* (ContainerField / ContainerDeclaration*) 5 | 6 | ContainerDeclaration <- TestDecl / ComptimeDecl / doc_comment? KEYWORD_pub? Decl 7 | 8 | TestDecl <- KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block 9 | 10 | ComptimeDecl <- KEYWORD_comptime Block 11 | 12 | Decl 13 | <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) 14 | / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? GlobalVarDecl 15 | 16 | FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr 17 | 18 | VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? AddrSpace? LinkSection? 19 | 20 | GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON 21 | 22 | ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)? 23 | 24 | # *** Block Level *** 25 | Statement 26 | <- KEYWORD_comptime ComptimeStatement 27 | / KEYWORD_nosuspend BlockExprStatement 28 | / KEYWORD_suspend BlockExprStatement 29 | / KEYWORD_defer BlockExprStatement 30 | / KEYWORD_errdefer Payload? BlockExprStatement 31 | / IfStatement 32 | / LabeledStatement 33 | / SwitchExpr 34 | / VarDeclExprStatement 35 | 36 | ComptimeStatement 37 | <- BlockExpr 38 | / VarDeclExprStatement 39 | 40 | IfStatement 41 | <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? 42 | / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) 43 | 44 | LabeledStatement <- BlockLabel? (Block / LoopStatement) 45 | 46 | LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement) 47 | 48 | ForStatement 49 | <- ForPrefix BlockExpr ( KEYWORD_else Statement )? 50 | / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) 51 | 52 | WhileStatement 53 | <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? 54 | / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) 55 | 56 | BlockExprStatement 57 | <- BlockExpr 58 | / AssignExpr SEMICOLON 59 | 60 | BlockExpr <- BlockLabel? Block 61 | 62 | # An expression, assignment, or any destructure, as a statement. 63 | VarDeclExprStatement 64 | <- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON 65 | / Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON 66 | 67 | # *** Expression Level *** 68 | 69 | # An assignment or a destructure whose LHS are all lvalue expressions. 70 | AssignExpr <- Expr (AssignOp Expr / (COMMA Expr)+ EQUAL Expr)? 71 | 72 | SingleAssignExpr <- Expr (AssignOp Expr)? 73 | 74 | Expr <- BoolOrExpr 75 | 76 | BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)* 77 | 78 | BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)* 79 | 80 | CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)? 81 | 82 | BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)* 83 | 84 | BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)* 85 | 86 | AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)* 87 | 88 | MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)* 89 | 90 | PrefixExpr <- PrefixOp* PrimaryExpr 91 | 92 | PrimaryExpr 93 | <- AsmExpr 94 | / IfExpr 95 | / KEYWORD_break BreakLabel? Expr? 96 | / KEYWORD_comptime Expr 97 | / KEYWORD_nosuspend Expr 98 | / KEYWORD_continue BreakLabel? 99 | / KEYWORD_resume Expr 100 | / KEYWORD_return Expr? 101 | / BlockLabel? LoopExpr 102 | / Block 103 | / CurlySuffixExpr 104 | 105 | IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? 106 | 107 | Block <- LBRACE Statement* RBRACE 108 | 109 | LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr) 110 | 111 | ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? 112 | 113 | WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? 114 | 115 | CurlySuffixExpr <- TypeExpr InitList? 116 | 117 | InitList 118 | <- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE 119 | / LBRACE Expr (COMMA Expr)* COMMA? RBRACE 120 | / LBRACE RBRACE 121 | 122 | TypeExpr <- PrefixTypeOp* ErrorUnionExpr 123 | 124 | ErrorUnionExpr <- SuffixExpr (EXCLAMATIONMARK TypeExpr)? 125 | 126 | SuffixExpr 127 | <- PrimaryTypeExpr (SuffixOp / FnCallArguments)* 128 | 129 | PrimaryTypeExpr 130 | <- BUILTINIDENTIFIER FnCallArguments 131 | / CHAR_LITERAL 132 | / ContainerDecl 133 | / DOT IDENTIFIER 134 | / DOT InitList 135 | / ErrorSetDecl 136 | / FLOAT 137 | / FnProto 138 | / GroupedExpr 139 | / LabeledTypeExpr 140 | / IDENTIFIER 141 | / IfTypeExpr 142 | / INTEGER 143 | / KEYWORD_comptime TypeExpr 144 | / KEYWORD_error DOT IDENTIFIER 145 | / KEYWORD_anyframe 146 | / KEYWORD_unreachable 147 | / STRINGLITERAL 148 | / SwitchExpr 149 | 150 | ContainerDecl <- (KEYWORD_extern / KEYWORD_packed)? ContainerDeclAuto 151 | 152 | ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE 153 | 154 | GroupedExpr <- LPAREN Expr RPAREN 155 | 156 | IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? 157 | 158 | LabeledTypeExpr 159 | <- BlockLabel Block 160 | / BlockLabel? LoopTypeExpr 161 | 162 | LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr) 163 | 164 | ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)? 165 | 166 | WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? 167 | 168 | SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE 169 | 170 | # *** Assembly *** 171 | AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN 172 | 173 | AsmOutput <- COLON AsmOutputList AsmInput? 174 | 175 | AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN 176 | 177 | AsmInput <- COLON AsmInputList AsmClobbers? 178 | 179 | AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN 180 | 181 | AsmClobbers <- COLON Expr 182 | 183 | # *** Helper grammar *** 184 | BreakLabel <- COLON IDENTIFIER 185 | 186 | BlockLabel <- IDENTIFIER COLON 187 | 188 | FieldInit <- DOT IDENTIFIER EQUAL Expr 189 | 190 | WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN 191 | 192 | LinkSection <- KEYWORD_linksection LPAREN Expr RPAREN 193 | 194 | AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN 195 | 196 | # Fn specific 197 | CallConv <- KEYWORD_callconv LPAREN Expr RPAREN 198 | 199 | ParamDecl 200 | <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType 201 | / DOT3 202 | 203 | ParamType 204 | <- KEYWORD_anytype 205 | / TypeExpr 206 | 207 | # Control flow prefixes 208 | IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? 209 | 210 | WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? 211 | 212 | ForPrefix <- KEYWORD_for LPAREN ForArgumentsList RPAREN PtrListPayload 213 | 214 | # Payloads 215 | Payload <- PIPE IDENTIFIER PIPE 216 | 217 | PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE 218 | 219 | PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE 220 | 221 | PtrListPayload <- PIPE ASTERISK? IDENTIFIER (COMMA ASTERISK? IDENTIFIER)* COMMA? PIPE 222 | 223 | # Switch specific 224 | SwitchProng <- KEYWORD_inline? SwitchCase EQUALRARROW PtrIndexPayload? SingleAssignExpr 225 | 226 | SwitchCase 227 | <- SwitchItem (COMMA SwitchItem)* COMMA? 228 | / KEYWORD_else 229 | 230 | SwitchItem <- Expr (DOT3 Expr)? 231 | 232 | # For specific 233 | ForArgumentsList <- ForItem (COMMA ForItem)* COMMA? 234 | 235 | ForItem <- Expr (DOT2 Expr?)? 236 | 237 | # Operators 238 | AssignOp 239 | <- ASTERISKEQUAL 240 | / ASTERISKPIPEEQUAL 241 | / SLASHEQUAL 242 | / PERCENTEQUAL 243 | / PLUSEQUAL 244 | / PLUSPIPEEQUAL 245 | / MINUSEQUAL 246 | / MINUSPIPEEQUAL 247 | / LARROW2EQUAL 248 | / LARROW2PIPEEQUAL 249 | / RARROW2EQUAL 250 | / AMPERSANDEQUAL 251 | / CARETEQUAL 252 | / PIPEEQUAL 253 | / ASTERISKPERCENTEQUAL 254 | / PLUSPERCENTEQUAL 255 | / MINUSPERCENTEQUAL 256 | / EQUAL 257 | 258 | CompareOp 259 | <- EQUALEQUAL 260 | / EXCLAMATIONMARKEQUAL 261 | / LARROW 262 | / RARROW 263 | / LARROWEQUAL 264 | / RARROWEQUAL 265 | 266 | BitwiseOp 267 | <- AMPERSAND 268 | / CARET 269 | / PIPE 270 | / KEYWORD_orelse 271 | / KEYWORD_catch Payload? 272 | 273 | BitShiftOp 274 | <- LARROW2 275 | / RARROW2 276 | / LARROW2PIPE 277 | 278 | AdditionOp 279 | <- PLUS 280 | / MINUS 281 | / PLUS2 282 | / PLUSPERCENT 283 | / MINUSPERCENT 284 | / PLUSPIPE 285 | / MINUSPIPE 286 | 287 | MultiplyOp 288 | <- PIPE2 289 | / ASTERISK 290 | / SLASH 291 | / PERCENT 292 | / ASTERISK2 293 | / ASTERISKPERCENT 294 | / ASTERISKPIPE 295 | 296 | PrefixOp 297 | <- EXCLAMATIONMARK 298 | / MINUS 299 | / TILDE 300 | / MINUSPERCENT 301 | / AMPERSAND 302 | / KEYWORD_try 303 | 304 | PrefixTypeOp 305 | <- QUESTIONMARK 306 | / KEYWORD_anyframe MINUSRARROW 307 | / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* 308 | / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* 309 | / ArrayTypeStart 310 | 311 | SuffixOp 312 | <- LBRACKET Expr (DOT2 (Expr? (COLON Expr)?)?)? RBRACKET 313 | / DOT IDENTIFIER 314 | / DOTASTERISK 315 | / DOTQUESTIONMARK 316 | 317 | FnCallArguments <- LPAREN ExprList RPAREN 318 | 319 | # Ptr specific 320 | SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET 321 | 322 | PtrTypeStart 323 | <- ASTERISK 324 | / ASTERISK2 325 | / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET 326 | 327 | ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET 328 | 329 | # ContainerDecl specific 330 | ContainerDeclAuto <- ContainerDeclType LBRACE container_doc_comment? ContainerMembers RBRACE 331 | 332 | ContainerDeclType 333 | <- KEYWORD_struct (LPAREN Expr RPAREN)? 334 | / KEYWORD_opaque 335 | / KEYWORD_enum (LPAREN Expr RPAREN)? 336 | / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)? 337 | 338 | # Alignment 339 | ByteAlign <- KEYWORD_align LPAREN Expr RPAREN 340 | 341 | # Lists 342 | IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)? 343 | 344 | SwitchProngList <- (SwitchProng COMMA)* SwitchProng? 345 | 346 | AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem? 347 | 348 | AsmInputList <- (AsmInputItem COMMA)* AsmInputItem? 349 | 350 | StringList <- (STRINGLITERAL COMMA)* STRINGLITERAL? 351 | 352 | ParamDeclList <- (ParamDecl COMMA)* ParamDecl? 353 | 354 | ExprList <- (Expr COMMA)* Expr? 355 | 356 | # *** Tokens *** 357 | eof <- !. 358 | bin <- [01] 359 | bin_ <- '_'? bin 360 | oct <- [0-7] 361 | oct_ <- '_'? oct 362 | hex <- [0-9a-fA-F] 363 | hex_ <- '_'? hex 364 | dec <- [0-9] 365 | dec_ <- '_'? dec 366 | 367 | bin_int <- bin bin_* 368 | oct_int <- oct oct_* 369 | dec_int <- dec dec_* 370 | hex_int <- hex hex_* 371 | 372 | ox80_oxBF <- [\200-\277] 373 | oxF4 <- '\364' 374 | ox80_ox8F <- [\200-\217] 375 | oxF1_oxF3 <- [\361-\363] 376 | oxF0 <- '\360' 377 | ox90_0xBF <- [\220-\277] 378 | oxEE_oxEF <- [\356-\357] 379 | oxED <- '\355' 380 | ox80_ox9F <- [\200-\237] 381 | oxE1_oxEC <- [\341-\354] 382 | oxE0 <- '\340' 383 | oxA0_oxBF <- [\240-\277] 384 | oxC2_oxDF <- [\302-\337] 385 | 386 | # From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ 387 | # First Byte Second Byte Third Byte Fourth Byte 388 | # [0x00,0x7F] 389 | # [0xC2,0xDF] [0x80,0xBF] 390 | # 0xE0 [0xA0,0xBF] [0x80,0xBF] 391 | # [0xE1,0xEC] [0x80,0xBF] [0x80,0xBF] 392 | # 0xED [0x80,0x9F] [0x80,0xBF] 393 | # [0xEE,0xEF] [0x80,0xBF] [0x80,0xBF] 394 | # 0xF0 [0x90,0xBF] [0x80,0xBF] [0x80,0xBF] 395 | # [0xF1,0xF3] [0x80,0xBF] [0x80,0xBF] [0x80,0xBF] 396 | # 0xF4 [0x80,0x8F] [0x80,0xBF] [0x80,0xBF] 397 | 398 | mb_utf8_literal <- 399 | oxF4 ox80_ox8F ox80_oxBF ox80_oxBF 400 | / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF 401 | / oxF0 ox90_0xBF ox80_oxBF ox80_oxBF 402 | / oxEE_oxEF ox80_oxBF ox80_oxBF 403 | / oxED ox80_ox9F ox80_oxBF 404 | / oxE1_oxEC ox80_oxBF ox80_oxBF 405 | / oxE0 oxA0_oxBF ox80_oxBF 406 | / oxC2_oxDF ox80_oxBF 407 | 408 | ascii_char_not_nl_slash_squote <- [\000-\011\013-\046\050-\133\135-\177] 409 | 410 | char_escape 411 | <- "\\x" hex hex 412 | / "\\u{" hex+ "}" 413 | / "\\" [nr\\t'"] 414 | char_char 415 | <- mb_utf8_literal 416 | / char_escape 417 | / ascii_char_not_nl_slash_squote 418 | 419 | string_char 420 | <- char_escape 421 | / [^\\"\n] 422 | 423 | container_doc_comment <- ('//!' [^\n]* [ \n]* skip)+ 424 | doc_comment <- ('///' [^\n]* [ \n]* skip)+ 425 | line_comment <- '//' ![!/][^\n]* / '////' [^\n]* 426 | line_string <- ("\\\\" [^\n]* [ \n]*)+ 427 | skip <- ([ \n] / line_comment)* 428 | 429 | CHAR_LITERAL <- "'" char_char "'" skip 430 | FLOAT 431 | <- "0x" hex_int "." hex_int ([pP] [-+]? dec_int)? skip 432 | / dec_int "." dec_int ([eE] [-+]? dec_int)? skip 433 | / "0x" hex_int [pP] [-+]? dec_int skip 434 | / dec_int [eE] [-+]? dec_int skip 435 | INTEGER 436 | <- "0b" bin_int skip 437 | / "0o" oct_int skip 438 | / "0x" hex_int skip 439 | / dec_int skip 440 | STRINGLITERALSINGLE <- "\"" string_char* "\"" skip 441 | STRINGLITERAL 442 | <- STRINGLITERALSINGLE 443 | / (line_string skip)+ 444 | IDENTIFIER 445 | <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip 446 | / "@" STRINGLITERALSINGLE 447 | BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip 448 | 449 | 450 | AMPERSAND <- '&' ![=] skip 451 | AMPERSANDEQUAL <- '&=' skip 452 | ASTERISK <- '*' ![*%=|] skip 453 | ASTERISK2 <- '**' skip 454 | ASTERISKEQUAL <- '*=' skip 455 | ASTERISKPERCENT <- '*%' ![=] skip 456 | ASTERISKPERCENTEQUAL <- '*%=' skip 457 | ASTERISKPIPE <- '*|' ![=] skip 458 | ASTERISKPIPEEQUAL <- '*|=' skip 459 | CARET <- '^' ![=] skip 460 | CARETEQUAL <- '^=' skip 461 | COLON <- ':' skip 462 | COMMA <- ',' skip 463 | DOT <- '.' ![*.?] skip 464 | DOT2 <- '..' ![.] skip 465 | DOT3 <- '...' skip 466 | DOTASTERISK <- '.*' skip 467 | DOTQUESTIONMARK <- '.?' skip 468 | EQUAL <- '=' ![>=] skip 469 | EQUALEQUAL <- '==' skip 470 | EQUALRARROW <- '=>' skip 471 | EXCLAMATIONMARK <- '!' ![=] skip 472 | EXCLAMATIONMARKEQUAL <- '!=' skip 473 | LARROW <- '<' ![<=] skip 474 | LARROW2 <- '<<' ![=|] skip 475 | LARROW2EQUAL <- '<<=' skip 476 | LARROW2PIPE <- '<<|' ![=] skip 477 | LARROW2PIPEEQUAL <- '<<|=' skip 478 | LARROWEQUAL <- '<=' skip 479 | LBRACE <- '{' skip 480 | LBRACKET <- '[' skip 481 | LPAREN <- '(' skip 482 | MINUS <- '-' ![%=>|] skip 483 | MINUSEQUAL <- '-=' skip 484 | MINUSPERCENT <- '-%' ![=] skip 485 | MINUSPERCENTEQUAL <- '-%=' skip 486 | MINUSPIPE <- '-|' ![=] skip 487 | MINUSPIPEEQUAL <- '-|=' skip 488 | MINUSRARROW <- '->' skip 489 | PERCENT <- '%' ![=] skip 490 | PERCENTEQUAL <- '%=' skip 491 | PIPE <- '|' ![|=] skip 492 | PIPE2 <- '||' skip 493 | PIPEEQUAL <- '|=' skip 494 | PLUS <- '+' ![%+=|] skip 495 | PLUS2 <- '++' skip 496 | PLUSEQUAL <- '+=' skip 497 | PLUSPERCENT <- '+%' ![=] skip 498 | PLUSPERCENTEQUAL <- '+%=' skip 499 | PLUSPIPE <- '+|' ![=] skip 500 | PLUSPIPEEQUAL <- '+|=' skip 501 | LETTERC <- 'c' skip 502 | QUESTIONMARK <- '?' skip 503 | RARROW <- '>' ![>=] skip 504 | RARROW2 <- '>>' ![=] skip 505 | RARROW2EQUAL <- '>>=' skip 506 | RARROWEQUAL <- '>=' skip 507 | RBRACE <- '}' skip 508 | RBRACKET <- ']' skip 509 | RPAREN <- ')' skip 510 | SEMICOLON <- ';' skip 511 | SLASH <- '/' ![=] skip 512 | SLASHEQUAL <- '/=' skip 513 | TILDE <- '~' skip 514 | 515 | end_of_word <- ![a-zA-Z0-9_] skip 516 | KEYWORD_addrspace <- 'addrspace' end_of_word 517 | KEYWORD_align <- 'align' end_of_word 518 | KEYWORD_allowzero <- 'allowzero' end_of_word 519 | KEYWORD_and <- 'and' end_of_word 520 | KEYWORD_anyframe <- 'anyframe' end_of_word 521 | KEYWORD_anytype <- 'anytype' end_of_word 522 | KEYWORD_asm <- 'asm' end_of_word 523 | KEYWORD_break <- 'break' end_of_word 524 | KEYWORD_callconv <- 'callconv' end_of_word 525 | KEYWORD_catch <- 'catch' end_of_word 526 | KEYWORD_comptime <- 'comptime' end_of_word 527 | KEYWORD_const <- 'const' end_of_word 528 | KEYWORD_continue <- 'continue' end_of_word 529 | KEYWORD_defer <- 'defer' end_of_word 530 | KEYWORD_else <- 'else' end_of_word 531 | KEYWORD_enum <- 'enum' end_of_word 532 | KEYWORD_errdefer <- 'errdefer' end_of_word 533 | KEYWORD_error <- 'error' end_of_word 534 | KEYWORD_export <- 'export' end_of_word 535 | KEYWORD_extern <- 'extern' end_of_word 536 | KEYWORD_fn <- 'fn' end_of_word 537 | KEYWORD_for <- 'for' end_of_word 538 | KEYWORD_if <- 'if' end_of_word 539 | KEYWORD_inline <- 'inline' end_of_word 540 | KEYWORD_noalias <- 'noalias' end_of_word 541 | KEYWORD_nosuspend <- 'nosuspend' end_of_word 542 | KEYWORD_noinline <- 'noinline' end_of_word 543 | KEYWORD_opaque <- 'opaque' end_of_word 544 | KEYWORD_or <- 'or' end_of_word 545 | KEYWORD_orelse <- 'orelse' end_of_word 546 | KEYWORD_packed <- 'packed' end_of_word 547 | KEYWORD_pub <- 'pub' end_of_word 548 | KEYWORD_resume <- 'resume' end_of_word 549 | KEYWORD_return <- 'return' end_of_word 550 | KEYWORD_linksection <- 'linksection' end_of_word 551 | KEYWORD_struct <- 'struct' end_of_word 552 | KEYWORD_suspend <- 'suspend' end_of_word 553 | KEYWORD_switch <- 'switch' end_of_word 554 | KEYWORD_test <- 'test' end_of_word 555 | KEYWORD_threadlocal <- 'threadlocal' end_of_word 556 | KEYWORD_try <- 'try' end_of_word 557 | KEYWORD_union <- 'union' end_of_word 558 | KEYWORD_unreachable <- 'unreachable' end_of_word 559 | KEYWORD_var <- 'var' end_of_word 560 | KEYWORD_volatile <- 'volatile' end_of_word 561 | KEYWORD_while <- 'while' end_of_word 562 | 563 | keyword <- KEYWORD_addrspace / KEYWORD_align / KEYWORD_allowzero / KEYWORD_and 564 | / KEYWORD_anyframe / KEYWORD_anytype / KEYWORD_asm 565 | / KEYWORD_break / KEYWORD_callconv / KEYWORD_catch 566 | / KEYWORD_comptime / KEYWORD_const / KEYWORD_continue / KEYWORD_defer 567 | / KEYWORD_else / KEYWORD_enum / KEYWORD_errdefer / KEYWORD_error / KEYWORD_export 568 | / KEYWORD_extern / KEYWORD_fn / KEYWORD_for / KEYWORD_if 569 | / KEYWORD_inline / KEYWORD_noalias / KEYWORD_nosuspend / KEYWORD_noinline 570 | / KEYWORD_opaque / KEYWORD_or / KEYWORD_orelse / KEYWORD_packed 571 | / KEYWORD_pub / KEYWORD_resume / KEYWORD_return / KEYWORD_linksection 572 | / KEYWORD_struct / KEYWORD_suspend / KEYWORD_switch / KEYWORD_test 573 | / KEYWORD_threadlocal / KEYWORD_try / KEYWORD_union / KEYWORD_unreachable 574 | / KEYWORD_var / KEYWORD_volatile / KEYWORD_while -------------------------------------------------------------------------------- /src/trace.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const peg = @import("peg.zig"); 3 | const ansi = @import("ansitty.zig"); 4 | 5 | const SGR = ansi.SGR; 6 | const ColorPrinter = ansi.ColorPrinter; 7 | const TreePrinter = ansi.TreePrinter; 8 | 9 | const ctrls = [_][]const u8{ 10 | "␀", "␁", "␂", "␃", "␄", "␅", "␆", "␇", 11 | "␈", "␉", "␤", "␋", "␌", "␍", "␎", "␏", 12 | "␐", "␑", "␒", "␓", "␔", "␕", "␖", "␗", 13 | "␘", "␙", "␚", "␛", "␜", "␝", "␞", "␟", 14 | "␠", 15 | }; 16 | 17 | pub const TraceStyle = enum { 18 | control_char, 19 | literal_char, 20 | escape_char, 21 | rule_name, 22 | variant_name, 23 | absolute_ip, 24 | relative_ip, 25 | call_ip, 26 | call_name, 27 | range_ellipsis, 28 | cursor, 29 | end_marker, 30 | stack_depth, 31 | opcode_ip, 32 | cache_hit, 33 | success, 34 | failure, 35 | quantifier, 36 | }; 37 | 38 | const TracePrinter = ColorPrinter(TraceStyle); 39 | 40 | const default_trace_theme = TracePrinter.Theme.init(.{ 41 | .control_char = SGR.fg(.magenta), 42 | .literal_char = SGR.fg(.yellow), 43 | .escape_char = SGR.fg(.yellow), 44 | .rule_name = SGR.fg(.green).bold(), 45 | .variant_name = SGR.fg(.green).bright(), 46 | .absolute_ip = SGR.fg(.cyan), 47 | .relative_ip = SGR.fg(.cyan), 48 | .call_ip = SGR.fg(.cyan), 49 | .call_name = SGR.fg(.blue), 50 | .range_ellipsis = SGR.attr(.dim), 51 | .cursor = SGR.attr(.bold), 52 | .end_marker = SGR.fg(.green).bright(), 53 | .stack_depth = SGR.attr(.dim), 54 | .opcode_ip = SGR.fg(.cyan), 55 | .quantifier = SGR.fg(.magenta).bright(), 56 | .cache_hit = SGR.fg(.green), 57 | .success = SGR.fg(.green).bright(), 58 | .failure = SGR.fg(.red), 59 | }); 60 | 61 | const MaxAstDepth: usize = 256; 62 | 63 | pub fn printChar(printer: *TracePrinter, c: u8) !void { 64 | if (c < ctrls.len) { 65 | try printer.print(.control_char, "{s}", .{ctrls[c]}); 66 | } else if (c >= 33 and c < 127 and c != '\\') { 67 | try printer.print(.literal_char, "{c}", .{c}); 68 | } else { 69 | try printer.print(.escape_char, "\\x{x:0>2}", .{c}); 70 | } 71 | } 72 | 73 | pub fn dumpOp( 74 | comptime rel: bool, 75 | op: peg.OpG(rel), 76 | printer: *TracePrinter, 77 | _: u32, 78 | ) !void { 79 | const writer = printer.writer; 80 | try writer.print("{s} ", .{switch (op) { 81 | .frob => |x| @tagName(x.fx), 82 | inline else => @tagName(op), 83 | }}); 84 | 85 | switch (op) { 86 | .frob => |ctrl| { 87 | if (rel == false) { 88 | try printer.print(.absolute_ip, "→{d}", .{ctrl.ip}); 89 | } else { 90 | if (ctrl.ip > 0) { 91 | try printer.print(.relative_ip, "+{d}", .{ctrl.ip}); 92 | } else { 93 | try printer.print(.relative_ip, "{d}", .{ctrl.ip}); 94 | } 95 | } 96 | }, 97 | .call => |target| { 98 | if (@TypeOf(target) == u32) { 99 | try printer.print(.call_ip, "→{d}", .{target}); 100 | } else { 101 | try printer.print(.call_name, "&{s}", .{@tagName(target)}); 102 | } 103 | }, 104 | .read => |read_op| { 105 | var i: u32 = 0; 106 | while (i < 256) : (i += 1) { 107 | if (read_op.set.isSet(i)) { 108 | // Check for ranges - look ahead for consecutive characters 109 | var range_end = i; 110 | while (range_end + 1 < 256 and read_op.set.isSet(range_end + 1)) : (range_end += 1) {} 111 | 112 | if (range_end > i + 1) { 113 | // We have a range of at least 3 characters 114 | // Print start of range 115 | try printChar(printer, @intCast(i)); 116 | try printer.print(.range_ellipsis, "{s}", .{"⋯"}); 117 | // Print end of range 118 | try printChar(printer, @intCast(range_end)); 119 | i = range_end; 120 | } else if (range_end == i + 1) { 121 | // Just two consecutive characters - print them separately 122 | try printChar(printer, @intCast(i)); 123 | try printChar(printer, @intCast(range_end)); 124 | i = range_end; 125 | } else { 126 | // Single character 127 | try printChar(printer, @intCast(i)); 128 | } 129 | } 130 | } 131 | // Add repetition indicator 132 | if (read_op.repeat == .kleene) { 133 | try printer.print(.quantifier, "*", .{}); 134 | } 135 | }, 136 | 137 | inline else => {}, 138 | } 139 | 140 | try printer.reset(); 141 | try writer.writeAll("\n"); 142 | } 143 | 144 | pub fn dumpCode(comptime T: type, writer: *std.Io.Writer, tty: std.Io.tty.Config) !void { 145 | const G = comptime peg.Grammar(T); 146 | const ops = comptime G.compile(false); 147 | 148 | var printer = TracePrinter.init(writer, tty, default_trace_theme); 149 | 150 | comptime var i = 0; 151 | inline for (ops) |op| { 152 | if (G.isStartOfRule(i)) |rule| { 153 | try printer.print(.rule_name, "\n&{t}:\n", .{rule}); 154 | } 155 | 156 | try writer.print("{d: >4} ", .{i}); 157 | try dumpOp(false, op, &printer, i); 158 | i += 1; 159 | } 160 | 161 | try writer.flush(); 162 | } 163 | 164 | fn traceStep( 165 | machine: anytype, 166 | ip: u32, 167 | last_sp: *?u32, 168 | printer: *TracePrinter, 169 | cache_hit: bool, 170 | ) !void { 171 | const Program = @TypeOf(machine.*).Ops; 172 | const writer = printer.writer; 173 | 174 | // Show current position in text 175 | if (machine.sp != last_sp.*) { 176 | try printer.setStyle(.cursor); 177 | if (machine.sp < machine.text.len) { 178 | try printChar(printer, machine.text[machine.sp]); 179 | } else { 180 | try printer.print(.end_marker, "⌀", .{}); 181 | } 182 | last_sp.* = machine.sp; 183 | } else { 184 | try writer.writeAll(" "); 185 | } 186 | try printer.reset(); 187 | try writer.writeAll(" "); 188 | 189 | // Show call stack depth 190 | try printer.setStyle(.stack_depth); 191 | try writer.splatBytesAll("│", machine.calls.items.len + 1); 192 | try writer.writeAll(" "); 193 | try printer.reset(); 194 | 195 | // Show instruction 196 | try printer.print(.opcode_ip, "{d:0>4} ", .{ip}); 197 | 198 | if (ip < Program.len) { 199 | if (cache_hit) { 200 | try printer.print(.cache_hit, "{s}", .{"⚡ "}); 201 | } 202 | try dumpOp(false, Program[ip], printer, ip); 203 | } 204 | } 205 | 206 | pub fn trace( 207 | machine: anytype, 208 | writer: *std.Io.Writer, 209 | tty: std.Io.tty.Config, 210 | ) !void { 211 | return traceFrom(machine, writer, tty, null); 212 | } 213 | 214 | pub fn traceFrom( 215 | machine: anytype, 216 | writer: *std.Io.Writer, 217 | tty: std.Io.tty.Config, 218 | comptime start_rule: ?@TypeOf(machine.*).Grammar.RuleEnum, 219 | ) !void { 220 | const VMType = @TypeOf(machine.*); 221 | const Program = VMType.Ops; 222 | const has_memo = machine.memo != null; 223 | 224 | var printer = TracePrinter.init(writer, tty, default_trace_theme); 225 | 226 | if (has_memo) { 227 | try writer.print("\nParsing with memoization: \"{s}\"\n\n", .{machine.text}); 228 | } else { 229 | try writer.print("\nParsing: \"{s}\"\n\n", .{machine.text}); 230 | } 231 | 232 | var ip: u32 = if (start_rule) |rule| VMType.Grammar.ruleStartIp(rule) else 0; 233 | var last_sp: ?u32 = null; 234 | var step_count: u32 = 0; 235 | var cache_hits: u32 = 0; 236 | 237 | while (true) : (step_count += 1) { 238 | // Check for cache hit when memoization is enabled 239 | const is_cache_hit = if (has_memo and ip < Program.len and Program[ip] == .call) blk: { 240 | const key = VMType.MemoKey{ .ip = Program[ip].call, .sp = machine.sp }; 241 | if (machine.memo.?.contains(key)) { 242 | cache_hits += 1; 243 | break :blk true; 244 | } 245 | break :blk false; 246 | } else false; 247 | 248 | try traceStep(machine, ip, &last_sp, &printer, is_cache_hit); 249 | try writer.flush(); 250 | 251 | // Execute step 252 | if (machine.next(ip, .Step)) |outcome| { 253 | if (outcome) |next_ip| { 254 | ip = next_ip; 255 | } else { 256 | if (has_memo) { 257 | try printer.print(.success, "\n✓ ({d} steps, {d} hits)\n", .{ step_count + 1, cache_hits }); 258 | } else { 259 | try printer.print(.success, "\n✓ ({d} steps)\n", .{step_count + 1}); 260 | } 261 | break; 262 | } 263 | } else |err| { 264 | try printer.print(.failure, "\n✕ {t} at step {d}\n", .{ err, step_count + 1 }); 265 | return err; 266 | } 267 | } 268 | } 269 | 270 | pub fn dumpAst( 271 | machine: anytype, 272 | writer: *std.Io.Writer, 273 | tty: std.Io.tty.Config, 274 | ) !void { 275 | const VMType = @TypeOf(machine.*); 276 | if (machine.root_node) |root| { 277 | var printer = TracePrinter.init(writer, tty, default_trace_theme); 278 | var tree = TreePrinter.init(writer); 279 | try printAstNode(VMType, machine, &printer, root, true, &tree); 280 | } else { 281 | try writer.writeAll("\n"); 282 | } 283 | } 284 | 285 | fn printAstNode( 286 | comptime VMType: type, 287 | machine: *const VMType, 288 | printer: *TracePrinter, 289 | index: u32, 290 | is_last: bool, 291 | tree: *TreePrinter, 292 | ) !void { 293 | const node = machine.nodes.items[index]; 294 | const writer = printer.writer; 295 | 296 | try tree.printPrefix(is_last); 297 | 298 | var label_style: TraceStyle = .variant_name; 299 | var label_text: []const u8 = undefined; 300 | 301 | switch (node.kind) { 302 | .rule => { 303 | const rule: VMType.RuleEnum = @enumFromInt(node.rule_index); 304 | label_style = .rule_name; 305 | label_text = @tagName(rule); 306 | }, 307 | else => { 308 | label_style = .variant_name; 309 | label_text = @tagName(node.kind); 310 | }, 311 | } 312 | 313 | const span = machine.text[node.start..node.end]; 314 | try printer.print(label_style, "{s}", .{label_text}); 315 | try writer.writeAll(" ["); 316 | try printer.print(.absolute_ip, "{d}", .{node.start}); 317 | try printer.print(.range_ellipsis, "…", .{}); 318 | try printer.print(.absolute_ip, "{d}", .{node.end}); 319 | try writer.writeAll(") "); 320 | try writer.writeAll("\""); 321 | for (span) |ch| { 322 | try printChar(printer, ch); 323 | } 324 | try writer.writeAll("\"\n"); 325 | 326 | if (node.first_child) |first| { 327 | try tree.push(!is_last); 328 | var current = first; 329 | while (true) { 330 | const next = machine.nodes.items[current].next_sibling; 331 | try printAstNode(VMType, machine, printer, current, next == null, tree); 332 | if (next) |n| { 333 | current = n; 334 | } else break; 335 | } 336 | tree.pop(); 337 | } 338 | } 339 | 340 | fn writeIndent(writer: *std.Io.Writer, depth: usize) !void { 341 | var i: usize = 0; 342 | while (i < depth) : (i += 1) { 343 | try writer.writeAll(" "); 344 | } 345 | } 346 | 347 | fn dumpForestValue( 348 | comptime VMType: type, 349 | forest: *const VMType.Grammar.Forest, 350 | printer: *TracePrinter, 351 | tree: *TreePrinter, 352 | text: []const u8, 353 | value: anytype, 354 | is_last: bool, 355 | ) anyerror!void { 356 | const Grammar = VMType.Grammar; 357 | const writer = printer.writer; 358 | const T = @TypeOf(value); 359 | 360 | if (T == void) { 361 | // Void values are hidden fields, skip them 362 | return; 363 | } 364 | 365 | // Check if the type has a Kind declaration 366 | const has_kind = comptime @hasDecl(T, "Kind"); 367 | if (has_kind) { 368 | const kind = T.Kind; 369 | switch (kind) { 370 | .@"struct" => { 371 | // Struct wrapper - directly show the inner value without the wrapper 372 | try dumpForestValue(VMType, forest, printer, tree, text, value.value, is_last); 373 | return; 374 | }, 375 | .@"union" => { 376 | // Union wrapper - directly show the inner value without the wrapper 377 | try dumpForestValue(VMType, forest, printer, tree, text, value.value, is_last); 378 | return; 379 | }, 380 | .call => { 381 | // Get the rule tag from the TargetName 382 | const rule_tag = comptime blk: { 383 | const name = T.TargetName; 384 | for (std.meta.tags(Grammar.RuleEnum)) |tag| { 385 | if (std.mem.eql(u8, @tagName(tag), name)) { 386 | break :blk tag; 387 | } 388 | } 389 | @compileError("Unknown rule: " ++ T.TargetName); 390 | }; 391 | try dumpForestNode(VMType, forest, printer, tree, text, rule_tag, value.index, is_last); 392 | return; 393 | }, 394 | .kleene => { 395 | try printer.print(.quantifier, "{d} items\n", .{value.len}); 396 | if (value.len > 0) { 397 | try tree.push(!is_last); 398 | var i: usize = 0; 399 | while (i < value.len) : (i += 1) { 400 | try tree.printPrefix(i == value.len - 1); 401 | try writer.writeAll("["); 402 | try printer.print(.absolute_ip, "{d}", .{i}); 403 | try writer.writeAll("] "); 404 | const rule_tag = T.RuleTag; 405 | const element_index: u32 = @intCast(value.offset + i); 406 | const is_last_item = i == value.len - 1; 407 | try dumpForestNode(VMType, forest, printer, tree, text, rule_tag, element_index, is_last_item); 408 | } 409 | tree.pop(); 410 | } 411 | return; 412 | }, 413 | .char => { 414 | // Single character 415 | const offset = value.offset; 416 | try writer.writeAll("'"); 417 | try printChar(printer, text[offset]); 418 | try writer.writeAll("' "); 419 | try printer.print(.range_ellipsis, "[{d}]", .{offset}); 420 | try writer.writeAll("\n"); 421 | return; 422 | }, 423 | .char_slice => { 424 | // Character slice 425 | const offset = value.offset; 426 | const len = value.len; 427 | if (len > 0) { 428 | try writer.writeAll("\""); 429 | var i: usize = 0; 430 | while (i < len) : (i += 1) { 431 | if (offset + i < text.len) { 432 | try printChar(printer, text[offset + i]); 433 | } 434 | } 435 | try writer.print("\" ", .{}); 436 | try printer.print(.range_ellipsis, "[{d}…{d})", .{ offset, offset + len }); 437 | } else { 438 | try printer.print(.failure, "(empty)", .{}); 439 | } 440 | try writer.writeAll("\n"); 441 | return; 442 | }, 443 | .hidden => { 444 | // Hidden values should not be displayed 445 | return; 446 | }, 447 | .maybe => { 448 | if (value.value == null) { 449 | try printer.print(.failure, "(null)\n", .{}); 450 | } else { 451 | try dumpForestValue(VMType, forest, printer, tree, text, value.value.?, is_last); 452 | } 453 | return; 454 | }, 455 | else => { 456 | // Unknown kind, fall through to default handling 457 | }, 458 | } 459 | } 460 | 461 | switch (@typeInfo(T)) { 462 | .optional => { 463 | if (value) |payload| { 464 | try writer.writeAll("(some)\n"); 465 | try tree.push(!is_last); 466 | try tree.printPrefix(true); 467 | try dumpForestValue(VMType, forest, printer, tree, text, payload, true); 468 | tree.pop(); 469 | } else { 470 | try printer.print(.failure, "(none)", .{}); 471 | try writer.writeAll("\n"); 472 | } 473 | return; 474 | }, 475 | .@"union" => |info| { 476 | const Tag = info.tag_type.?; 477 | const tag = std.meta.activeTag(value); 478 | try printer.print(.variant_name, ".{s}", .{@tagName(tag)}); 479 | inline for (info.fields) |field| { 480 | if (tag == @field(Tag, field.name)) { 481 | const payload = @field(value, field.name); 482 | if (@TypeOf(payload) != void) { 483 | try writer.writeAll("\n"); 484 | try tree.push(!is_last); 485 | try tree.printPrefix(true); 486 | try dumpForestValue(VMType, forest, printer, tree, text, payload, true); 487 | tree.pop(); 488 | } else { 489 | try writer.writeAll("\n"); 490 | } 491 | return; 492 | } 493 | } 494 | try writer.writeAll("\n"); 495 | return; 496 | }, 497 | .@"struct" => |info| { 498 | // Empty struct - just skip it entirely 499 | if (info.fields.len == 0) { 500 | return; 501 | } 502 | 503 | // Count non-void, non-empty fields 504 | const non_void_fields = comptime blk: { 505 | var count: usize = 0; 506 | for (info.fields) |field| { 507 | if (field.type == void) continue; 508 | const field_info = @typeInfo(field.type); 509 | if (field_info == .@"struct" and field_info.@"struct".fields.len == 0) continue; 510 | count += 1; 511 | } 512 | break :blk count; 513 | }; 514 | 515 | if (non_void_fields == 0) { 516 | return; 517 | } 518 | 519 | try writer.writeAll("\n"); 520 | try tree.push(!is_last); 521 | 522 | var field_idx: usize = 0; 523 | inline for (info.fields) |field| { 524 | if (comptime field.type == void) continue; 525 | 526 | // Skip empty struct fields 527 | const field_info = @typeInfo(field.type); 528 | if (field_info == .@"struct" and field_info.@"struct".fields.len == 0) continue; 529 | 530 | const is_last_field = (field_idx == non_void_fields - 1); 531 | try tree.printPrefix(is_last_field); 532 | try printer.print(.variant_name, "{s}", .{field.name}); 533 | try writer.writeAll(": "); 534 | const field_value = @field(value, field.name); 535 | try dumpForestValue(VMType, forest, printer, tree, text, field_value, is_last_field); 536 | field_idx += 1; 537 | } 538 | 539 | tree.pop(); 540 | return; 541 | }, 542 | .int, .comptime_int => { 543 | try printer.print(.absolute_ip, "{d}", .{value}); 544 | try writer.writeAll("\n"); 545 | return; 546 | }, 547 | .bool => { 548 | if (value) { 549 | try printer.print(.success, "true", .{}); 550 | } else { 551 | try printer.print(.failure, "false", .{}); 552 | } 553 | try writer.writeAll("\n"); 554 | return; 555 | }, 556 | .float => { 557 | try printer.print(.absolute_ip, "{d}", .{value}); 558 | try writer.writeAll("\n"); 559 | return; 560 | }, 561 | .pointer => |ptr| { 562 | if (ptr.size == .slice and ptr.child == u8) { 563 | try writer.writeAll("\""); 564 | for (value) |ch| { 565 | try printChar(printer, ch); 566 | } 567 | try writer.writeAll("\"\n"); 568 | return; 569 | } 570 | }, 571 | else => {}, 572 | } 573 | 574 | // Fallback for unexpected types 575 | try writer.print("({s})\n", .{@typeName(T)}); 576 | } 577 | 578 | fn dumpForestNode( 579 | comptime VMType: type, 580 | forest: *const VMType.Grammar.Forest, 581 | printer: *TracePrinter, 582 | tree: *TreePrinter, 583 | text: []const u8, 584 | comptime rule: VMType.RuleEnum, 585 | index: u32, 586 | is_last: bool, 587 | ) anyerror!void { 588 | const Grammar = VMType.Grammar; 589 | const writer = printer.writer; 590 | try printer.print(.rule_name, "{s}", .{@tagName(rule)}); 591 | 592 | const ValueType = Grammar.RuleValueType(rule); 593 | if (comptime ValueType == void) { 594 | try writer.writeAll("\n"); 595 | return; 596 | } 597 | 598 | const value_ptr = forest.get(rule, index); 599 | const value = value_ptr.*; 600 | try writer.writeAll(": "); 601 | try dumpForestValue(VMType, forest, printer, tree, text, value, is_last); 602 | } 603 | 604 | pub fn dumpForest( 605 | machine: anytype, 606 | writer: *std.Io.Writer, 607 | tty: std.Io.tty.Config, 608 | allocator: std.mem.Allocator, 609 | comptime root_rule: @TypeOf(machine.*).RuleEnum, 610 | ) !void { 611 | const VMType = @TypeOf(machine.*); 612 | 613 | var built = try machine.buildForest(allocator, root_rule); 614 | defer built.forest.deinit(allocator); 615 | 616 | try writer.writeAll("\nTyped Forest:\n"); 617 | var printer = TracePrinter.init(writer, tty, default_trace_theme); 618 | var tree = TreePrinter.init(writer); 619 | const text = machine.text[0..machine.text.len]; 620 | try dumpForestNode(VMType, &built.forest, &printer, &tree, text, root_rule, built.root_index, true); 621 | try writer.writeAll("\n"); 622 | } 623 | -------------------------------------------------------------------------------- /src/vm.zig: -------------------------------------------------------------------------------- 1 | comptime { 2 | @setEvalBranchQuota(500000); 3 | } 4 | 5 | const std = @import("std"); 6 | const peg = @import("peg.zig"); 7 | 8 | pub const Mode = enum { 9 | Step, 10 | Loop, 11 | }; 12 | 13 | const Node = peg.NodeType; 14 | const NodeKind = peg.NodeKind; 15 | 16 | pub fn VM(comptime GrammarType: type) type { 17 | return struct { 18 | const Self = @This(); 19 | comptime { 20 | @setEvalBranchQuota(500000); 21 | } 22 | pub const Grammar = peg.Grammar(GrammarType); 23 | pub const RuleEnum = Grammar.RuleEnum; 24 | pub const Ops = Grammar.compile(false); 25 | 26 | /// Bookkeeping for a structural opcode block (anything pushed by `open`). 27 | pub const StructuralFrame = struct { 28 | kind: NodeKind, // which helper node we opened 29 | node_index: u32, // index into the main node array for that helper 30 | node_child_start: usize, 31 | field_start: u32, // structs only: byte offset where the current field began 32 | field_child_start: usize, // structs only: children gathered for the current field 33 | }; 34 | 35 | // === Core Parsing State === 36 | sp: u32 = 0, 37 | text: [:0]const u8, 38 | 39 | // === Backtracking Stacks === 40 | saves: std.ArrayList(SaveFrame), 41 | calls: std.ArrayList(CallFrame), 42 | 43 | // === AST Construction === 44 | nodes: std.ArrayList(Node), 45 | child_stack: std.ArrayList(u32), 46 | root_node: ?u32 = null, 47 | 48 | // === Structural tracking === 49 | struct_stack: std.ArrayList(StructuralFrame), 50 | 51 | // === Memoization (optional) === 52 | memo: ?*MemoTable = null, 53 | 54 | /// Snapshot for the backtracking stack. Captures both the VM position and 55 | /// the lengths of the node/struct stacks so we can rewind them precisely. 56 | pub const SaveFrame = struct { 57 | ip: u32, 58 | sp: u32, 59 | call_depth: u32, 60 | node_len: usize, 61 | struct_depth: usize, 62 | child_len: usize, 63 | }; 64 | 65 | /// Call-stack frame representing an in-flight rule invocation. 66 | pub const CallFrame = struct { 67 | return_ip: u32, 68 | target_ip: u32, 69 | rule: RuleEnum, 70 | start_sp: u32, 71 | struct_depth_on_entry: usize, 72 | child_start: usize, 73 | }; 74 | 75 | pub const MemoKey = struct { 76 | ip: u32, 77 | sp: u32, 78 | }; 79 | 80 | pub const MemoEntry = struct { 81 | success: bool, 82 | end_sp: u32, 83 | }; 84 | 85 | pub const MemoTable = std.AutoHashMap(MemoKey, MemoEntry); 86 | 87 | pub fn init( 88 | text: [:0]const u8, 89 | saves: []SaveFrame, 90 | calls: []CallFrame, 91 | nodes: []Node, 92 | struct_frames: []StructuralFrame, 93 | child_indices: []u32, 94 | ) Self { 95 | return Self{ 96 | .sp = 0, 97 | .text = text, 98 | .saves = .initBuffer(saves), 99 | .calls = .initBuffer(calls), 100 | .nodes = .initBuffer(nodes), 101 | .struct_stack = .initBuffer(struct_frames), 102 | .child_stack = .initBuffer(child_indices), 103 | .memo = null, 104 | .root_node = null, 105 | }; 106 | } 107 | 108 | // === AST Construction Helpers === 109 | 110 | /// Link children from the child_stack to a parent node, setting up sibling relationships. 111 | /// Returns the first child index and truncates the child_stack back to `start`. 112 | fn linkChildrenToParent(self: *Self, parent_idx: u32, start: usize) ?u32 { 113 | // Extract the slice of child indices accumulated since `start` 114 | const end = self.child_stack.items.len; 115 | const slice = if (start < end) self.child_stack.items[start..end] else &[_]u32{}; 116 | 117 | var first_child: ?u32 = null; 118 | var prev_child: ?u32 = null; 119 | 120 | // Wire up the doubly-linked sibling chain 121 | for (slice, 0..) |child_idx, i| { 122 | if (i == 0) first_child = child_idx; 123 | 124 | // Link backwards to previous sibling 125 | self.nodes.items[child_idx].prev_sibling = prev_child; 126 | // Link previous sibling forward to current 127 | if (prev_child) |prev| { 128 | self.nodes.items[@intCast(prev)].next_sibling = child_idx; 129 | } 130 | // Last sibling has no next 131 | self.nodes.items[child_idx].next_sibling = null; 132 | // All children point up to the parent 133 | self.nodes.items[child_idx].parent = parent_idx; 134 | prev_child = child_idx; 135 | } 136 | 137 | // Pop accumulated children off the stack 138 | self.child_stack.items.len = if (start <= end) start else end; 139 | return first_child; 140 | } 141 | 142 | /// Create a new node from a completed call frame 143 | /// (this is the only place `kind == .rule` nodes are born). 144 | fn appendNode(self: *Self, frame: CallFrame, end_sp: u32) !u32 { 145 | const idx: u32 = @intCast(self.nodes.items.len); 146 | const first_child = self.linkChildrenToParent(idx, frame.child_start); 147 | 148 | try self.nodes.appendBounded(.{ 149 | .kind = .rule, 150 | .rule_index = @intFromEnum(frame.rule), 151 | .start = frame.start_sp, 152 | .end = end_sp, 153 | .first_child = first_child, 154 | .next_sibling = null, 155 | .prev_sibling = null, 156 | .parent = null, 157 | }); 158 | 159 | return idx; 160 | } 161 | 162 | fn pushChildIndex(self: *Self, value: u32) !void { 163 | try self.child_stack.appendBounded(value); 164 | } 165 | 166 | /// Attach a newly created node to whichever stack frame owns it. 167 | /// 168 | /// Structural opcodes (`open/next/shut`) push frames that sit "above" the 169 | /// call stack, so children created while inside them should land on those 170 | /// frames instead of on the rule call. 171 | fn attachChild(self: *Self, child: u32) !void { 172 | const struct_depth = self.struct_stack.items.len; 173 | if (self.calls.getLastOrNull()) |call| { 174 | if (struct_depth >= call.struct_depth_on_entry) 175 | // We're in a call but more importantly we're in a structure. 176 | try self.pushChildIndex(child); 177 | } else if (struct_depth > 0) 178 | try self.pushChildIndex(child); 179 | } 180 | 181 | /// Close out the current field and append a wrapper node for it. 182 | /// 183 | /// Struct compilation emits `open` / (field payload) / `next` / … / `shut`. 184 | /// Until we see either `next` or `shut` we accumulate the payload into 185 | /// a slice of `child_stack` starting at `field_child_start`. When the field 186 | /// ends we turn that payload into a dedicated `.field` node and push that 187 | /// node onto the struct’s own child list. 188 | fn finalizeStructField(self: *Self, frame: *StructuralFrame, end_sp: u32) !void { 189 | const field_idx: u32 = @intCast(self.nodes.items.len); 190 | const first_child = self.linkChildrenToParent(field_idx, frame.field_child_start); 191 | 192 | try self.nodes.appendBounded(.{ 193 | .kind = .field, 194 | .rule_index = 0, 195 | .start = frame.field_start, 196 | .end = end_sp, 197 | .first_child = first_child, 198 | .next_sibling = null, 199 | .prev_sibling = null, 200 | .parent = null, 201 | }); 202 | 203 | try self.pushChildIndex(field_idx); 204 | 205 | frame.field_start = end_sp; 206 | frame.field_child_start = self.child_stack.items.len; 207 | } 208 | 209 | /// Roll back the node list to a previous length (used during backtracking). 210 | fn truncateNodes(self: *Self, new_len: usize) void { 211 | if (new_len >= self.nodes.items.len) return; 212 | self.nodes.items.len = new_len; 213 | if (self.root_node) |root_idx| { 214 | if (root_idx >= new_len) self.root_node = null; 215 | } 216 | } 217 | 218 | /// Restore the child stack to a previous logical length, filtering out any 219 | /// indices that no longer point at live nodes (can happen after backtracking 220 | /// truncates the node list). 221 | fn restoreChildStack(self: *Self, target_len: usize) void { 222 | if (target_len == 0) { 223 | self.child_stack.items.len = 0; 224 | return; 225 | } 226 | 227 | const current = self.child_stack.items.len; 228 | const limit = if (target_len < current) target_len else current; 229 | var write: usize = 0; 230 | const live_nodes = self.nodes.items.len; 231 | 232 | var i: usize = 0; 233 | while (i < limit) : (i += 1) { 234 | const idx = self.child_stack.items[i]; 235 | if (idx < live_nodes) { 236 | self.child_stack.items[write] = idx; 237 | write += 1; 238 | } 239 | } 240 | 241 | self.child_stack.items.len = write; 242 | } 243 | 244 | // === VM Execution === 245 | 246 | /// Execute one or more VM instructions 247 | pub fn next( 248 | self: *Self, 249 | ip: u32, 250 | comptime mode: Mode, 251 | ) !(if (mode == .Loop) void else ?u32) { 252 | const loop = switch (mode) { 253 | .Step => false, 254 | .Loop => true, 255 | }; 256 | 257 | vm: switch (ip) { 258 | inline 0...Ops.len - 1 => |IP| { 259 | @setEvalBranchQuota(10_000); 260 | 261 | const OP = Ops[IP]; 262 | const IP1 = IP + 1; 263 | const ch = self.text[self.sp]; 264 | 265 | switch (OP) { 266 | .read => |read_op| { 267 | if (read_op.repeat == .kleene) { 268 | // Consume as many matching characters as possible 269 | while (self.sp < self.text.len and read_op.set.isSet(self.text[self.sp])) { 270 | self.sp += 1; 271 | } 272 | if (loop) continue :vm IP1 else return IP1; 273 | } else { 274 | // Single character match (repeat == .one) 275 | if (read_op.set.isSet(ch)) { 276 | self.sp += 1; 277 | if (loop) continue :vm IP1 else return IP1; 278 | } 279 | } 280 | }, 281 | 282 | .text => |lit| { 283 | const start: usize = @intCast(self.sp); 284 | const len: usize = lit.len; 285 | if (start + len <= self.text.len) { 286 | const slice = self.text[start .. start + len]; 287 | if (std.mem.eql(u8, slice, lit)) { 288 | self.sp = @intCast(start + len); 289 | if (loop) continue :vm IP1 else return IP1; 290 | } 291 | } 292 | }, 293 | 294 | .call => |target| { 295 | if (self.memo) |memo| { 296 | const key = MemoKey{ .ip = target, .sp = self.sp }; 297 | if (memo.get(key)) |entry| { 298 | if (entry.success) { 299 | self.sp = entry.end_sp; 300 | if (loop) continue :vm IP1 else return IP1; 301 | } else { 302 | if (self.saves.pop()) |save| { 303 | self.sp = save.sp; 304 | self.calls.items.len = save.call_depth; 305 | self.truncateNodes(save.node_len); 306 | self.restoreChildStack(save.child_len); 307 | if (loop) continue :vm save.ip else return save.ip; 308 | } 309 | return error.ParseFailed; 310 | } 311 | } 312 | } 313 | 314 | const rule = Grammar.ruleContainingIp(target) orelse unreachable; 315 | 316 | try self.calls.appendBounded(.{ 317 | .return_ip = IP1, 318 | .target_ip = target, 319 | .rule = rule, 320 | .start_sp = self.sp, 321 | .struct_depth_on_entry = self.struct_stack.items.len, 322 | .child_start = self.child_stack.items.len, 323 | }); 324 | 325 | if (loop) continue :vm target else return target; 326 | }, 327 | 328 | .frob => |ctrl| switch (ctrl.fx) { 329 | .push => { 330 | try self.saves.appendBounded(.{ 331 | .ip = ctrl.ip, 332 | .sp = self.sp, 333 | .call_depth = @intCast(self.calls.items.len), 334 | .node_len = self.nodes.items.len, 335 | .struct_depth = self.struct_stack.items.len, 336 | .child_len = self.child_stack.items.len, 337 | }); 338 | if (loop) continue :vm IP1 else return IP1; 339 | }, 340 | 341 | .drop => { 342 | _ = self.saves.pop(); 343 | if (loop) continue :vm ctrl.ip else return ctrl.ip; 344 | }, 345 | 346 | .move => { 347 | self.saves.items[self.saves.items.len - 1].sp = self.sp; 348 | self.saves.items[self.saves.items.len - 1].node_len = self.nodes.items.len; 349 | self.saves.items[self.saves.items.len - 1].struct_depth = self.struct_stack.items.len; 350 | self.saves.items[self.saves.items.len - 1].child_len = self.child_stack.items.len; 351 | if (loop) continue :vm ctrl.ip else return ctrl.ip; 352 | }, 353 | 354 | .wipe => { 355 | const save = self.saves.pop().?; 356 | self.sp = save.sp; 357 | self.struct_stack.items.len = save.struct_depth; 358 | self.truncateNodes(save.node_len); 359 | self.restoreChildStack(save.child_len); 360 | if (loop) continue :vm ctrl.ip else return ctrl.ip; 361 | }, 362 | }, 363 | 364 | .done => { 365 | if (self.calls.pop()) |frame| { 366 | const node_index = try self.appendNode(frame, self.sp); 367 | 368 | if (self.memo) |memo| { 369 | const key = MemoKey{ .ip = frame.target_ip, .sp = frame.start_sp }; 370 | try memo.put(key, .{ .success = true, .end_sp = self.sp }); 371 | } 372 | 373 | try self.attachChild(node_index); 374 | if (self.calls.items.len == 0) { 375 | self.root_node = node_index; 376 | } 377 | 378 | if (loop) continue :vm frame.return_ip else return frame.return_ip; 379 | } else { 380 | if (self.sp == self.text.len) { 381 | return (if (mode == .Loop) {} else null); 382 | } else { 383 | return error.UnconsumedInput; 384 | } 385 | } 386 | }, 387 | 388 | .over => return (if (mode == .Loop) {} else null), 389 | 390 | .open => |node_kind| { 391 | const node_index: u32 = @intCast(self.nodes.items.len); 392 | try self.nodes.appendBounded(.{ 393 | .kind = node_kind, 394 | .rule_index = 0, 395 | .start = self.sp, 396 | .end = self.sp, 397 | .first_child = null, 398 | .next_sibling = null, 399 | .prev_sibling = null, 400 | .parent = null, 401 | }); 402 | 403 | try self.struct_stack.appendBounded(.{ 404 | .kind = node_kind, 405 | .node_index = node_index, 406 | .node_child_start = self.child_stack.items.len, 407 | .field_start = self.sp, 408 | .field_child_start = self.child_stack.items.len, 409 | }); 410 | if (loop) continue :vm IP1 else return IP1; 411 | }, 412 | 413 | .next => { 414 | if (self.struct_stack.items.len > 0) { 415 | const frame = &self.struct_stack.items[self.struct_stack.items.len - 1]; 416 | if (frame.kind == .@"struct") { 417 | try self.finalizeStructField(frame, self.sp); 418 | } 419 | } 420 | if (loop) continue :vm IP1 else return IP1; 421 | }, 422 | 423 | .shut => { 424 | if (self.struct_stack.items.len > 0) { 425 | const frame = &self.struct_stack.items[self.struct_stack.items.len - 1]; 426 | if (frame.kind == .@"struct") { 427 | try self.finalizeStructField(frame, self.sp); 428 | } 429 | 430 | const frame_data = frame.*; 431 | self.struct_stack.items.len -= 1; 432 | 433 | const node_usize: usize = @intCast(frame_data.node_index); 434 | const first_child = self.linkChildrenToParent(frame_data.node_index, frame_data.node_child_start); 435 | 436 | self.nodes.items[node_usize].end = self.sp; 437 | self.nodes.items[node_usize].first_child = first_child; 438 | 439 | try self.attachChild(frame_data.node_index); 440 | } 441 | if (loop) continue :vm IP1 else return IP1; 442 | }, 443 | 444 | .fail => {}, 445 | } 446 | 447 | if (self.saves.pop()) |save| { 448 | if (self.memo) |memo| { 449 | var i = self.calls.items.len; 450 | while (i > save.call_depth) { 451 | i -= 1; 452 | const frame = self.calls.items[i]; 453 | const key = MemoKey{ .ip = frame.target_ip, .sp = frame.start_sp }; 454 | memo.put(key, .{ .success = false, .end_sp = frame.start_sp }) catch {}; 455 | } 456 | } 457 | 458 | self.sp = save.sp; 459 | self.calls.items.len = save.call_depth; 460 | self.struct_stack.items.len = save.struct_depth; 461 | self.truncateNodes(save.node_len); 462 | self.restoreChildStack(save.child_len); 463 | 464 | if (loop) continue :vm save.ip else return save.ip; 465 | } 466 | 467 | return error.ParseFailed; 468 | }, 469 | else => return (if (mode == .Loop) {} else null), 470 | } 471 | } 472 | 473 | /// Run VM until completion 474 | pub fn run(self: *Self) !void { 475 | try self.next(0, .Loop); 476 | } 477 | 478 | /// Run VM starting from a specific rule 479 | pub fn runFrom(self: *Self, comptime rule: RuleEnum) !void { 480 | const start_ip = Grammar.ruleStartIp(rule); 481 | try self.next(start_ip, .Loop); 482 | } 483 | 484 | // === Lifecycle & Utilities === 485 | 486 | pub fn initAlloc( 487 | text: [:0]const u8, 488 | gpa: std.mem.Allocator, 489 | maxsaves: usize, 490 | maxcalls: usize, 491 | maxnodes: usize, 492 | ) !Self { 493 | return Self.init( 494 | text, 495 | try gpa.alloc(SaveFrame, maxsaves), 496 | try gpa.alloc(CallFrame, maxcalls), 497 | try gpa.alloc(Node, maxnodes), 498 | try gpa.alloc(StructuralFrame, maxsaves), // Use same capacity as saves 499 | try gpa.alloc(u32, maxnodes), 500 | ); 501 | } 502 | 503 | pub fn deinit(self: *Self, gpa: std.mem.Allocator) void { 504 | self.saves.deinit(gpa); 505 | self.calls.deinit(gpa); 506 | self.nodes.deinit(gpa); 507 | self.struct_stack.deinit(gpa); 508 | self.child_stack.deinit(gpa); 509 | } 510 | 511 | pub fn parse( 512 | text: [:0]const u8, 513 | gpa: std.mem.Allocator, 514 | ) !void { 515 | var vm = try Self.initAlloc(text, gpa, 32, 32, 256); 516 | defer vm.deinit(gpa); 517 | _ = try vm.run(); 518 | } 519 | 520 | pub fn countSteps(text: [:0]const u8, gpa: std.mem.Allocator) !u32 { 521 | var self = try Self.initAlloc(text, gpa, 32, 32, 256); 522 | defer self.deinit(gpa); 523 | 524 | var ip: u32 = 0; 525 | var count: u32 = 1; 526 | 527 | while (try self.next(ip, .Step)) |new_ip| { 528 | ip = new_ip; 529 | count += 1; 530 | } 531 | 532 | return count; 533 | } 534 | 535 | pub fn parseWithMemo( 536 | text: [:0]const u8, 537 | gpa: std.mem.Allocator, 538 | ) !void { 539 | var vm = try Self.initAlloc(text, gpa, 32, 32, 256); 540 | defer vm.deinit(gpa); 541 | 542 | var memo = MemoTable.init(gpa); 543 | defer memo.deinit(); 544 | vm.memo = &memo; 545 | 546 | _ = try vm.run(); 547 | } 548 | 549 | // === AST Forest Construction === 550 | 551 | /// Build typed forest from parse tree 552 | pub fn buildForest( 553 | self: *const Self, 554 | allocator: std.mem.Allocator, 555 | comptime root_rule: RuleEnum, 556 | ) (peg.BuildError || error{NoAst})!Grammar.BuildResult(root_rule) { 557 | const root_index = self.root_node orelse return error.NoAst; 558 | const text_slice = self.text[0..self.text.len]; 559 | return Grammar.buildForestForRoot( 560 | allocator, 561 | text_slice, 562 | self.nodes.items, 563 | root_index, 564 | root_rule, 565 | ); 566 | } 567 | 568 | // === Memoization Utilities === 569 | 570 | pub fn countStepsWithMemo(text: [:0]const u8, gpa: std.mem.Allocator) !struct { steps: u32, hits: u32, misses: u32 } { 571 | var self = try Self.initAlloc(text, gpa, 32, 32, 256); 572 | defer self.deinit(gpa); 573 | 574 | var memo = MemoTable.init(gpa); 575 | defer memo.deinit(); 576 | self.memo = &memo; 577 | 578 | var ip: u32 = 0; 579 | var count: u32 = 1; 580 | var hits: u32 = 0; 581 | var misses: u32 = 0; 582 | 583 | while (true) { 584 | if (ip < Ops.len and Ops[ip] == .call) { 585 | const key = MemoKey{ .ip = Ops[ip].call, .sp = self.sp }; 586 | if (memo.contains(key)) { 587 | hits += 1; 588 | } else { 589 | misses += 1; 590 | } 591 | } 592 | 593 | if (try self.next(ip, .Step)) |new_ip| { 594 | ip = new_ip; 595 | count += 1; 596 | } else { 597 | break; 598 | } 599 | } 600 | 601 | return .{ .steps = count, .hits = hits, .misses = misses }; 602 | } 603 | }; 604 | } 605 | 606 | pub const SimpleGrammar = struct { 607 | pub const main = peg.Match(struct { 608 | a: peg.CharSet("a", .one), 609 | b: peg.CharSet("b", .one), 610 | }); 611 | }; 612 | 613 | const ChoiceGrammar = struct { 614 | const R = std.meta.DeclEnum(@This()); 615 | 616 | pub const main = peg.Match(union(enum) { 617 | ab: peg.Call(.ab), 618 | ac: peg.Call(.ac), 619 | }); 620 | 621 | pub const ab = peg.Match(struct { a: peg.CharSet("a", .one), b: peg.CharSet("b", .one) }); 622 | pub const ac = peg.Match(struct { a: peg.CharSet("a", .one), c: peg.CharSet("c", .one) }); 623 | }; 624 | 625 | pub const RecursiveGrammar = struct { 626 | const R = std.meta.DeclEnum(@This()); 627 | 628 | pub const main = peg.Call(.expr); 629 | 630 | pub const expr = peg.Match(union(enum) { 631 | number: peg.Call(.number), 632 | parens: peg.Call(.parens), 633 | }); 634 | 635 | pub const parens = peg.Match(struct { 636 | open: peg.CharSet("(", .one), 637 | expr: peg.Call(.expr), 638 | close: peg.CharSet(")", .one), 639 | }); 640 | 641 | pub const number = peg.Match(struct { 642 | first: peg.Char(peg.CharClass.range('1', '9'), .one), 643 | rest: peg.Char(peg.CharClass.range('0', '9'), .kleene), 644 | }); 645 | }; 646 | 647 | const KleeneGrammar = struct { 648 | const R = std.meta.DeclEnum(@This()); 649 | 650 | pub const main = peg.Match(struct { 651 | a_list: peg.CharSet("a", .kleene), 652 | b: peg.CharSet("b", .one), 653 | }); 654 | }; 655 | 656 | const OptionalGrammar = struct { 657 | pub const main = peg.Match(struct { 658 | a_opt: peg.Maybe(peg.CharSet("a", .one)), 659 | b: peg.CharSet("b", .one), 660 | }); 661 | }; 662 | 663 | fn step(vm: anytype, ip: *u32) !bool { 664 | if (try vm.next(ip.*, .Step)) |new_ip| { 665 | ip.* = new_ip; 666 | return true; 667 | } else { 668 | return false; 669 | } 670 | } 671 | 672 | fn expectParseSuccess(comptime G: type, text: [:0]const u8) !void { 673 | const TestVM = VM(G); 674 | try TestVM.parse(text, std.testing.allocator); 675 | _ = try TestVM.countSteps(text, std.testing.allocator); 676 | } 677 | 678 | fn expectParseFailure(comptime G: type, text: [:0]const u8) !void { 679 | const TestVM = VM(G); 680 | try std.testing.expectError(error.ParseFailed, TestVM.parse(text, std.testing.allocator)); 681 | try std.testing.expectError(error.ParseFailed, TestVM.countSteps(text, std.testing.allocator)); 682 | } 683 | 684 | test "basic VM iteration" { 685 | try expectParseSuccess(SimpleGrammar, "ab"); 686 | try expectParseFailure(SimpleGrammar, "ac"); 687 | } 688 | 689 | test "VM with backtracking" { 690 | try expectParseSuccess(ChoiceGrammar, "ab"); 691 | try expectParseSuccess(ChoiceGrammar, "ac"); 692 | try expectParseFailure(ChoiceGrammar, "ad"); 693 | } 694 | 695 | test "VM event iteration" { 696 | try std.testing.expectEqual( 697 | 6, 698 | try VM(SimpleGrammar).countSteps("ab", std.testing.allocator), 699 | ); 700 | } 701 | 702 | // Tests using the grammar compiler 703 | test "simple grammar compilation" { 704 | try expectParseSuccess(SimpleGrammar, "ab"); 705 | try expectParseFailure(SimpleGrammar, "ac"); 706 | try expectParseFailure(SimpleGrammar, "a"); 707 | } 708 | 709 | test "choice grammar compilation" { 710 | try expectParseSuccess(ChoiceGrammar, "ab"); 711 | try expectParseSuccess(ChoiceGrammar, "ac"); 712 | try expectParseFailure(ChoiceGrammar, "ad"); 713 | } 714 | 715 | test "kleene star grammar compilation" { 716 | try expectParseSuccess(KleeneGrammar, "b"); 717 | try expectParseSuccess(KleeneGrammar, "ab"); 718 | try expectParseSuccess(KleeneGrammar, "aaab"); 719 | try expectParseFailure(KleeneGrammar, "aaa"); 720 | } 721 | 722 | test "optional grammar compilation" { 723 | try expectParseSuccess(OptionalGrammar, "ab"); 724 | try expectParseSuccess(OptionalGrammar, "b"); 725 | try expectParseFailure(OptionalGrammar, "ac"); 726 | } 727 | 728 | test "recursive grammar compilation" { 729 | try expectParseSuccess(RecursiveGrammar, "42"); 730 | try expectParseSuccess(RecursiveGrammar, "(123)"); 731 | try expectParseSuccess(RecursiveGrammar, "((99))"); 732 | try expectParseFailure(RecursiveGrammar, "(42"); 733 | } 734 | 735 | test "demo grammar from pegvmfun" { 736 | try expectParseSuccess(peg.demoGrammar, "123 "); 737 | try expectParseSuccess(peg.demoGrammar, "[123 456 789]"); 738 | try expectParseSuccess(peg.demoGrammar, "[[1] [2]]"); 739 | try expectParseSuccess(peg.demoGrammar, "[]"); 740 | } 741 | 742 | test "memoization correctness" { 743 | const TestVM = VM(RecursiveGrammar); 744 | 745 | // Both should succeed 746 | try TestVM.parse("((42))", std.testing.allocator); 747 | try TestVM.parseWithMemo("((42))", std.testing.allocator); 748 | 749 | // Both should fail 750 | try std.testing.expectError(error.ParseFailed, TestVM.parse("((42", std.testing.allocator)); 751 | try std.testing.expectError(error.ParseFailed, TestVM.parseWithMemo("((42", std.testing.allocator)); 752 | } 753 | -------------------------------------------------------------------------------- /src/ziggrammar.zig: -------------------------------------------------------------------------------- 1 | // Gradual port of Zig grammar from zigparse.zig (old pegvm API) to new peg.zig API 2 | // 3 | // This file incrementally translates the ZigMiniGrammar to use the new 4 | // peg.zig pattern combinators: Match, CharSet, CharRange, Call, Kleene, etc. 5 | 6 | comptime { 7 | @setEvalBranchQuota(500000); 8 | } 9 | 10 | const std = @import("std"); 11 | const peg = @import("peg.zig"); 12 | 13 | const Match = peg.Match; 14 | const CharSet = peg.CharSet; 15 | const Char = peg.Char; 16 | const CharClass = peg.CharClass; 17 | const Literal = peg.Literal; 18 | const Call = peg.Call; 19 | const Kleene = peg.Kleene; 20 | const Maybe = peg.Maybe; 21 | const Hide = peg.Hide; 22 | const Shun = peg.Shun; 23 | 24 | pub const ZigGrammar = struct { 25 | const R = std.meta.DeclEnum(@This()); 26 | 27 | const AlphaLower = CharClass.range('a', 'z'); 28 | const AlphaUpper = CharClass.range('A', 'Z'); 29 | const Underscore = CharClass.anyOf("_"); 30 | const Digit = CharClass.range('0', '9'); 31 | 32 | const AlphaChar = AlphaLower.unionWith(AlphaUpper).unionWith(Underscore); 33 | const AlnumChar = AlphaChar.unionWith(Digit); 34 | 35 | // Whitespace and comments 36 | const NotNewline = CharClass.range(' ', '~') 37 | .unionWith(CharClass.anyOf("\t\r")); 38 | 39 | pub const LineComment = Match(struct { 40 | _start: Hide(Literal("//")), 41 | content: Char(NotNewline, .kleene), 42 | }); 43 | 44 | pub const Whitespace = Match(struct { 45 | s: CharSet(" \t\n\r", .one), 46 | ss: CharSet(" \t\n\r", .kleene), 47 | }); 48 | 49 | pub const SkipItem = Match(union(enum) { 50 | whitespace: Call(R.Whitespace), 51 | comment: Call(R.LineComment), 52 | }); 53 | 54 | pub const Skip = Kleene(R.SkipItem); 55 | 56 | // ======================================================================== 57 | // Identifiers and literals 58 | // ======================================================================== 59 | 60 | pub const Identifier = Match(struct { 61 | // TODO: Add reserved word checking with Shun 62 | first: Char(AlphaChar, .one), 63 | rest: Char(AlnumChar, .kleene), 64 | _skip: Hide(Call(R.Skip)), 65 | }); 66 | 67 | pub const BuiltinIdentifier = Match(struct { 68 | at: Hide(CharSet("@", .one)), 69 | first: Char(AlphaChar, .one), 70 | rest: Char(AlnumChar, .kleene), 71 | }); 72 | 73 | pub const Integer = Match(struct { 74 | first: Char(Digit, .one), 75 | rest: Char(Digit, .kleene), 76 | _skip: Hide(Call(R.Skip)), 77 | }); 78 | 79 | // ======================================================================== 80 | // String and char literals 81 | // ======================================================================== 82 | 83 | // Character classes for literals 84 | const EscapeChars = CharClass.anyOf("nr't\\\""); 85 | const ChrPlain = CharClass.range(' ', '&') 86 | .unionWith(CharClass.range('(', '[')) 87 | .unionWith(CharClass.range(']', '~')); 88 | const StrPlain = CharClass.range(' ', '!') 89 | .unionWith(CharClass.range('#', '[')) 90 | .unionWith(CharClass.range(']', '~')); 91 | 92 | pub const CharEscape = Match(struct { 93 | backslash: Hide(CharSet("\\", .one)), 94 | code: Char(EscapeChars, .one), 95 | }); 96 | 97 | pub const CharPlain = Char(ChrPlain, .one); 98 | 99 | pub const CharContent = Match(union(enum) { 100 | escape: Call(R.CharEscape), 101 | plain: Call(R.CharPlain), 102 | }); 103 | 104 | pub const CharLiteral = Match(struct { 105 | open: Hide(CharSet("'", .one)), 106 | content: Call(R.CharContent), 107 | close: Hide(CharSet("'", .one)), 108 | _skip: Hide(Call(R.Skip)), 109 | }); 110 | 111 | pub const StringEscape = Match(struct { 112 | backslash: Hide(CharSet("\\", .one)), 113 | code: Char(CharClass.anyOf("nr\"t\\"), .one), 114 | }); 115 | 116 | pub const StringPlain = Char(StrPlain, .one); 117 | 118 | pub const StringContent = Match(union(enum) { 119 | escape: Call(R.StringEscape), 120 | plain: Call(R.StringPlain), 121 | }); 122 | 123 | pub const StringLiteral = Match(struct { 124 | open: Hide(CharSet("\"", .one)), 125 | content: Kleene(R.StringContent), 126 | close: Hide(CharSet("\"", .one)), 127 | _skip: Hide(Call(R.Skip)), 128 | }); 129 | 130 | // ======================================================================== 131 | // Keywords - literal + boundary check + skip 132 | // ======================================================================== 133 | 134 | pub const IdentBoundary = Shun(Char(AlnumChar, .one)); 135 | 136 | // Helper for defining keywords 137 | fn Kw(comptime text: []const u8) type { 138 | return Match(struct { 139 | lit: Literal(text), 140 | _boundary: Hide(Call(R.IdentBoundary)), 141 | _skip: Hide(Call(R.Skip)), 142 | }); 143 | } 144 | 145 | pub const KwFn = Kw("fn"); 146 | pub const KwPub = Kw("pub"); 147 | pub const KwReturn = Kw("return"); 148 | pub const KwConst = Kw("const"); 149 | pub const KwVar = Kw("var"); 150 | pub const KwIf = Kw("if"); 151 | pub const KwElse = Kw("else"); 152 | pub const KwWhile = Kw("while"); 153 | pub const KwFor = Kw("for"); 154 | pub const KwSwitch = Kw("switch"); 155 | pub const KwStruct = Kw("struct"); 156 | pub const KwUnion = Kw("union"); 157 | pub const KwEnum = Kw("enum"); 158 | pub const KwError = Kw("error"); 159 | pub const KwDefer = Kw("defer"); 160 | pub const KwErrdefer = Kw("errdefer"); 161 | pub const KwSuspend = Kw("suspend"); 162 | pub const KwNosuspend = Kw("nosuspend"); 163 | pub const KwInline = Kw("inline"); 164 | pub const KwComptime = Kw("comptime"); 165 | pub const KwTest = Kw("test"); 166 | pub const KwBreak = Kw("break"); 167 | pub const KwContinue = Kw("continue"); 168 | 169 | // ======================================================================== 170 | // Operators - literal + optional negative lookahead + skip 171 | // ======================================================================== 172 | 173 | // Helper for operators (no lookahead needed for simple ones) 174 | fn Op(comptime text: []const u8) type { 175 | return Match(struct { 176 | lit: Literal(text), 177 | _skip: Hide(Call(R.Skip)), 178 | }); 179 | } 180 | 181 | pub const OpLParen = Op("("); 182 | pub const OpRParen = Op(")"); 183 | pub const OpLBrace = Op("{"); 184 | pub const OpRBrace = Op("}"); 185 | pub const OpLBracket = Op("["); 186 | pub const OpRBracket = Op("]"); 187 | pub const OpSemicolon = Op(";"); 188 | pub const OpColon = Op(":"); 189 | pub const OpComma = Op(","); 190 | pub const OpEquals = Op("="); 191 | pub const OpDot = Op("."); 192 | pub const OpPlus = Op("+"); 193 | pub const OpMinus = Op("-"); 194 | pub const OpStar = Op("*"); 195 | pub const OpSlash = Op("/"); 196 | pub const OpPercent = Op("%"); 197 | pub const OpBang = Op("!"); 198 | pub const OpAmpersand = Op("&"); 199 | pub const OpPipe = Op("|"); 200 | pub const OpCaret = Op("^"); 201 | pub const OpLAngle = Op("<"); 202 | pub const OpRAngle = Op(">"); 203 | pub const OpLShift = Op("<<"); 204 | pub const OpRShift = Op(">>"); 205 | pub const OpEqEq = Op("=="); 206 | pub const OpBangEq = Op("!="); 207 | pub const OpLtEq = Op("<="); 208 | pub const OpGtEq = Op(">="); 209 | pub const OpArrow = Op("=>"); 210 | pub const OpDotDot = Op(".."); 211 | pub const OpDotDotDot = Op("..."); 212 | pub const OpQuestion = Op("?"); 213 | pub const OpBangEqEq = Op("!=="); 214 | pub const OpTilde = Op("~"); 215 | 216 | // ======================================================================== 217 | // Type expressions 218 | // ======================================================================== 219 | 220 | pub const ArraySizePrefix = Match(struct { 221 | _open: Hide(Call(R.OpLBracket)), 222 | size: Call(R.Expr), 223 | _close: Hide(Call(R.OpRBracket)), 224 | }); 225 | 226 | pub const SlicePrefix = Match(struct { 227 | _open: Hide(Call(R.OpLBracket)), 228 | _close: Hide(Call(R.OpRBracket)), 229 | }); 230 | 231 | pub const TypePrefix = Match(union(enum) { 232 | optional: Call(R.OpQuestion), 233 | pointer: Call(R.OpStar), 234 | array_size: Call(R.ArraySizePrefix), 235 | slice: Call(R.SlicePrefix), 236 | }); 237 | 238 | pub const ErrorSetDecl = Match(struct { 239 | _kw: Hide(Call(R.KwError)), 240 | _open: Hide(Call(R.OpLBrace)), 241 | identifiers: Maybe(Call(R.IdentifierList)), 242 | _close: Hide(Call(R.OpRBrace)), 243 | }); 244 | 245 | pub const IdentifierRest = Match(struct { 246 | _comma: Hide(Call(R.OpComma)), 247 | ident: Call(R.Identifier), 248 | }); 249 | 250 | pub const IdentifierList = Match(struct { 251 | first: Call(R.Identifier), 252 | rest: Kleene(R.IdentifierRest), 253 | trailing_comma: Maybe(Call(R.OpComma)), 254 | }); 255 | 256 | pub const TypeAtom = Match(union(enum) { 257 | identifier: Call(R.Identifier), 258 | container: Call(R.ContainerExpr), 259 | error_set: Call(R.ErrorSetDecl), 260 | }); 261 | 262 | pub const TypeCore = Match(struct { 263 | prefixes: Kleene(R.TypePrefix), 264 | atom: Call(R.TypeAtom), 265 | }); 266 | 267 | pub const TypeErrorUnion = Match(struct { 268 | _bang: Hide(Call(R.OpBang)), 269 | rhs: Call(R.TypeExpr), 270 | }); 271 | 272 | pub const TypeExpr = Match(struct { 273 | core: Call(R.TypeCore), 274 | error_union: Maybe(Call(R.TypeErrorUnion)), 275 | }); 276 | 277 | // ======================================================================== 278 | // Container types (struct, union, enum) 279 | // ======================================================================== 280 | 281 | pub const FieldDecl = Match(struct { 282 | name: Call(R.Identifier), 283 | _colon: Hide(Call(R.OpColon)), 284 | type_: Call(R.TypeExpr), 285 | }); 286 | 287 | pub const FieldRest = Match(struct { 288 | _comma: Hide(Call(R.OpComma)), 289 | field: Call(R.FieldDecl), 290 | }); 291 | 292 | pub const FieldList = Match(struct { 293 | first: Call(R.FieldDecl), 294 | rest: Kleene(R.FieldRest), 295 | trailing_comma: Maybe(Call(R.OpComma)), 296 | }); 297 | 298 | pub const ContainerMembers = Match(struct { 299 | decls: Kleene(R.ContainerDeclaration), 300 | fields: Maybe(Call(R.FieldList)), 301 | }); 302 | 303 | pub const ContainerVarDecl = Match(struct { 304 | decl: Call(R.VarDecl), 305 | _semi: Hide(Call(R.OpSemicolon)), 306 | }); 307 | 308 | pub const ContainerDeclaration = Match(union(enum) { 309 | fn_decl: Call(R.FnDecl), 310 | var_decl: Call(R.ContainerVarDecl), 311 | }); 312 | 313 | pub const StructBody = Match(struct { 314 | _open: Hide(Call(R.OpLBrace)), 315 | members: Call(R.ContainerMembers), 316 | _close: Hide(Call(R.OpRBrace)), 317 | }); 318 | 319 | pub const EnumFieldRest = Match(struct { 320 | _comma: Hide(Call(R.OpComma)), 321 | ident: Call(R.Identifier), 322 | }); 323 | 324 | pub const EnumFields = Match(struct { 325 | first: Call(R.Identifier), 326 | rest: Kleene(R.EnumFieldRest), 327 | trailing_comma: Maybe(Call(R.OpComma)), 328 | }); 329 | 330 | pub const EnumBody = Match(struct { 331 | _open: Hide(Call(R.OpLBrace)), 332 | fields: Maybe(Call(R.EnumFields)), 333 | _close: Hide(Call(R.OpRBrace)), 334 | }); 335 | 336 | pub const StructExpr = Match(struct { 337 | _kw: Hide(Call(R.KwStruct)), 338 | body: Call(R.StructBody), 339 | }); 340 | 341 | pub const UnionExpr = Match(struct { 342 | _kw: Hide(Call(R.KwUnion)), 343 | body: Call(R.StructBody), 344 | }); 345 | 346 | pub const EnumExpr = Match(struct { 347 | _kw: Hide(Call(R.KwEnum)), 348 | body: Call(R.EnumBody), 349 | }); 350 | 351 | pub const ContainerExpr = Match(union(enum) { 352 | struct_: Call(R.StructExpr), 353 | union_: Call(R.UnionExpr), 354 | enum_: Call(R.EnumExpr), 355 | }); 356 | 357 | // ======================================================================== 358 | // Payloads (for if/while/for/switch) 359 | // ======================================================================== 360 | 361 | pub const Payload = Match(struct { 362 | _open: Hide(Call(R.OpPipe)), 363 | name: Call(R.Identifier), 364 | _close: Hide(Call(R.OpPipe)), 365 | }); 366 | 367 | pub const PtrPayload = Match(struct { 368 | _open: Hide(Call(R.OpPipe)), 369 | ptr: Maybe(Call(R.OpStar)), 370 | name: Call(R.Identifier), 371 | _close: Hide(Call(R.OpPipe)), 372 | }); 373 | 374 | // ======================================================================== 375 | // Expressions - Precedence from lowest to highest 376 | // ======================================================================== 377 | 378 | // Forward declaration for recursive expressions 379 | pub const Expr = Call(R.BoolOrExpr); 380 | 381 | pub const ParenExpr = Match(struct { 382 | _open: Hide(Call(R.OpLParen)), 383 | expr: Call(R.Expr), 384 | _close: Hide(Call(R.OpRParen)), 385 | }); 386 | 387 | // Control flow expressions 388 | pub const ErrorLiteral = Match(struct { 389 | _kw: Hide(Call(R.KwError)), 390 | _dot: Hide(Call(R.OpDot)), 391 | name: Call(R.Identifier), 392 | }); 393 | 394 | pub const ReturnExpr = Match(struct { 395 | _kw: Hide(Call(R.KwReturn)), 396 | value: Maybe(Call(R.Expr)), 397 | }); 398 | 399 | pub const LabelRef = Match(struct { 400 | _colon: Hide(Call(R.OpColon)), 401 | name: Call(R.Identifier), 402 | }); 403 | 404 | pub const BreakExpr = Match(struct { 405 | _kw: Hide(Call(R.KwBreak)), 406 | label: Maybe(Call(R.LabelRef)), 407 | value: Maybe(Call(R.Expr)), 408 | }); 409 | 410 | pub const ContinueExpr = Match(struct { 411 | _kw: Hide(Call(R.KwContinue)), 412 | label: Maybe(Call(R.LabelRef)), 413 | }); 414 | 415 | pub const IfExpr = Match(struct { 416 | _if: Hide(Call(R.KwIf)), 417 | _lparen: Hide(Call(R.OpLParen)), 418 | condition: Call(R.Expr), 419 | _rparen: Hide(Call(R.OpRParen)), 420 | payload: Maybe(Call(R.PtrPayload)), 421 | then_body: Call(R.Expr), 422 | _else: Hide(Call(R.KwElse)), 423 | else_body: Call(R.Expr), 424 | }); 425 | 426 | pub const WhileContinue = Match(struct { 427 | _colon: Hide(Call(R.OpColon)), 428 | _lparen: Hide(Call(R.OpLParen)), 429 | expr: Call(R.Expr), 430 | _rparen: Hide(Call(R.OpRParen)), 431 | }); 432 | 433 | pub const ElseClause = Match(struct { 434 | _else: Hide(Call(R.KwElse)), 435 | body: Call(R.Expr), 436 | }); 437 | 438 | pub const WhileExpr = Match(struct { 439 | _while: Hide(Call(R.KwWhile)), 440 | _lparen: Hide(Call(R.OpLParen)), 441 | condition: Call(R.Expr), 442 | _rparen: Hide(Call(R.OpRParen)), 443 | payload: Maybe(Call(R.PtrPayload)), 444 | continue_expr: Maybe(Call(R.WhileContinue)), 445 | body: Call(R.Block), 446 | else_clause: Maybe(Call(R.ElseClause)), 447 | }); 448 | 449 | pub const ForItem = Match(struct { 450 | expr: Call(R.Expr), 451 | range: Maybe(Match(struct { 452 | _dots: Hide(Call(R.OpDotDot)), 453 | end: Maybe(Call(R.Expr)), 454 | })), 455 | }); 456 | 457 | pub const ForArgsRest = Match(struct { 458 | _comma: Hide(Call(R.OpComma)), 459 | item: Call(R.ForItem), 460 | }); 461 | 462 | pub const ForArgsList = Match(struct { 463 | first: Call(R.ForItem), 464 | rest: Kleene(R.ForArgsRest), 465 | trailing_comma: Maybe(Call(R.OpComma)), 466 | }); 467 | 468 | pub const ForExpr = Match(struct { 469 | _for: Hide(Call(R.KwFor)), 470 | _lparen: Hide(Call(R.OpLParen)), 471 | args: Call(R.ForArgsList), 472 | _rparen: Hide(Call(R.OpRParen)), 473 | payload: Maybe(Call(R.PtrPayload)), 474 | body: Call(R.Block), 475 | else_clause: Maybe(Match(struct { 476 | _else: Hide(Call(R.KwElse)), 477 | body: Call(R.Expr), 478 | })), 479 | }); 480 | 481 | pub const CaseItem = Match(struct { 482 | value: Call(R.Expr), 483 | range: Maybe(Match(struct { 484 | _dots: Hide(Call(R.OpDotDotDot)), 485 | end: Call(R.Expr), 486 | })), 487 | }); 488 | 489 | pub const SwitchElse = Match(struct { 490 | _else: Hide(Call(R.KwElse)), 491 | _arrow: Hide(Call(R.OpArrow)), 492 | expr: Call(R.Expr), 493 | }); 494 | 495 | pub const CaseItemRest = Match(struct { 496 | _comma: Hide(Call(R.OpComma)), 497 | item: Call(R.CaseItem), 498 | }); 499 | 500 | pub const SwitchCase = Match(struct { 501 | inline_: Maybe(Call(R.KwInline)), 502 | first: Call(R.CaseItem), 503 | rest: Kleene(R.CaseItemRest), 504 | _arrow: Hide(Call(R.OpArrow)), 505 | expr: Call(R.Expr), 506 | }); 507 | 508 | pub const SwitchProng = Match(union(enum) { 509 | else_: Call(R.SwitchElse), 510 | case: Call(R.SwitchCase), 511 | }); 512 | 513 | pub const SwitchProngRest = Match(struct { 514 | _comma: Hide(Call(R.OpComma)), 515 | prong: Call(R.SwitchProng), 516 | }); 517 | 518 | pub const SwitchBody = Match(struct { 519 | _open: Hide(Call(R.OpLBrace)), 520 | prongs: Maybe(Match(struct { 521 | first: Call(R.SwitchProng), 522 | rest: Kleene(R.SwitchProngRest), 523 | })), 524 | _close: Hide(Call(R.OpRBrace)), 525 | }); 526 | 527 | pub const SwitchExpr = Match(struct { 528 | _switch: Hide(Call(R.KwSwitch)), 529 | _lparen: Hide(Call(R.OpLParen)), 530 | expr: Call(R.Expr), 531 | _rparen: Hide(Call(R.OpRParen)), 532 | body: Call(R.SwitchBody), 533 | }); 534 | 535 | pub const Primary = Match(union(enum) { 536 | paren: Call(R.ParenExpr), 537 | block: Call(R.Block), 538 | if_expr: Call(R.IfExpr), 539 | while_expr: Call(R.WhileExpr), 540 | for_expr: Call(R.ForExpr), 541 | switch_expr: Call(R.SwitchExpr), 542 | return_expr: Call(R.ReturnExpr), 543 | break_expr: Call(R.BreakExpr), 544 | continue_expr: Call(R.ContinueExpr), 545 | container: Call(R.ContainerExpr), 546 | builtin: Call(R.BuiltinIdentifier), 547 | error_lit: Call(R.ErrorLiteral), 548 | integer: Call(R.Integer), 549 | string: Call(R.StringLiteral), 550 | char: Call(R.CharLiteral), 551 | identifier: Call(R.Identifier), 552 | }); 553 | 554 | // Suffix operators: function calls, member access, indexing 555 | pub const FnCallArgs = Match(struct { 556 | _open: Hide(Call(R.OpLParen)), 557 | args: Maybe(Call(R.ExprList)), 558 | _close: Hide(Call(R.OpRParen)), 559 | }); 560 | 561 | pub const MemberAccess = Match(struct { 562 | _dot: Hide(Call(R.OpDot)), 563 | member: Call(R.Identifier), 564 | }); 565 | 566 | pub const ArrayIndex = Match(struct { 567 | _open: Hide(Call(R.OpLBracket)), 568 | index: Call(R.Expr), 569 | _close: Hide(Call(R.OpRBracket)), 570 | }); 571 | 572 | pub const Suffix = Match(union(enum) { 573 | call: Call(R.FnCallArgs), 574 | member: Call(R.MemberAccess), 575 | index: Call(R.ArrayIndex), 576 | }); 577 | 578 | pub const SuffixExpr = Match(struct { 579 | base: Call(R.Primary), 580 | suffixes: Kleene(R.Suffix), 581 | }); 582 | 583 | // Prefix operators: !, -, &, try 584 | pub const PrefixOp = Match(union(enum) { 585 | not: Call(R.OpBang), 586 | neg: Call(R.OpMinus), 587 | ref: Call(R.OpAmpersand), 588 | try_: Call(R.KwTry), 589 | }); 590 | 591 | pub const PrefixExpr = Match(struct { 592 | ops: Kleene(R.PrefixOp), 593 | expr: Call(R.SuffixExpr), 594 | }); 595 | 596 | // Multiply: *, /, % 597 | pub const MultiplyOp = Match(union(enum) { 598 | mul: Call(R.OpStar), 599 | div: Call(R.OpSlash), 600 | mod: Call(R.OpPercent), 601 | }); 602 | 603 | pub const MultiplyRhs = Match(struct { 604 | op: Call(R.MultiplyOp), 605 | rhs: Call(R.PrefixExpr), 606 | }); 607 | 608 | pub const MultiplyExpr = Match(struct { 609 | lhs: Call(R.PrefixExpr), 610 | rest: Kleene(R.MultiplyRhs), 611 | }); 612 | 613 | // Add: +, - 614 | pub const AddOp = Match(union(enum) { 615 | add: Call(R.OpPlus), 616 | sub: Call(R.OpMinus), 617 | }); 618 | 619 | pub const AddRhs = Match(struct { 620 | op: Call(R.AddOp), 621 | rhs: Call(R.MultiplyExpr), 622 | }); 623 | 624 | pub const AddExpr = Match(struct { 625 | lhs: Call(R.MultiplyExpr), 626 | rest: Kleene(R.AddRhs), 627 | }); 628 | 629 | // Bit shift: <<, >> 630 | pub const BitShiftOp = Match(union(enum) { 631 | left: Call(R.OpLShift), 632 | right: Call(R.OpRShift), 633 | }); 634 | 635 | pub const BitShiftRhs = Match(struct { 636 | op: Call(R.BitShiftOp), 637 | rhs: Call(R.AddExpr), 638 | }); 639 | 640 | pub const BitShiftExpr = Match(struct { 641 | lhs: Call(R.AddExpr), 642 | rest: Kleene(R.BitShiftRhs), 643 | }); 644 | 645 | // Bitwise: &, ^, | 646 | pub const BitwiseOp = Match(union(enum) { 647 | and_: Call(R.OpAmpersand), 648 | xor: Call(R.OpCaret), 649 | or_: Call(R.OpPipe), 650 | }); 651 | 652 | pub const BitwiseRhs = Match(struct { 653 | op: Call(R.BitwiseOp), 654 | rhs: Call(R.BitShiftExpr), 655 | }); 656 | 657 | pub const BitwiseExpr = Match(struct { 658 | lhs: Call(R.BitShiftExpr), 659 | rest: Kleene(R.BitwiseRhs), 660 | }); 661 | 662 | // Compare: ==, !=, <, >, <=, >= 663 | pub const CompareOp = Match(union(enum) { 664 | eq: Call(R.OpEqEq), 665 | neq: Call(R.OpBangEq), 666 | lt: Call(R.OpLAngle), 667 | gt: Call(R.OpRAngle), 668 | lte: Call(R.OpLtEq), 669 | gte: Call(R.OpGtEq), 670 | }); 671 | 672 | pub const CompareRhs = Match(struct { 673 | op: Call(R.CompareOp), 674 | rhs: Call(R.BitwiseExpr), 675 | }); 676 | 677 | pub const CompareExpr = Match(struct { 678 | lhs: Call(R.BitwiseExpr), 679 | rhs: Maybe(Call(R.CompareRhs)), 680 | }); 681 | 682 | // Boolean and: and 683 | pub const KwAnd = Kw("and"); 684 | pub const KwOr = Kw("or"); 685 | pub const KwTry = Kw("try"); 686 | 687 | pub const BoolAndRhs = Match(struct { 688 | _op: Hide(Call(R.KwAnd)), 689 | rhs: Call(R.CompareExpr), 690 | }); 691 | 692 | pub const BoolAndExpr = Match(struct { 693 | lhs: Call(R.CompareExpr), 694 | rest: Kleene(R.BoolAndRhs), 695 | }); 696 | 697 | // Boolean or: or 698 | pub const BoolOrRhs = Match(struct { 699 | _op: Hide(Call(R.KwOr)), 700 | rhs: Call(R.BoolAndExpr), 701 | }); 702 | 703 | pub const BoolOrExpr = Match(struct { 704 | lhs: Call(R.BoolAndExpr), 705 | rest: Kleene(R.BoolOrRhs), 706 | }); 707 | 708 | // Expression list (for function calls, etc.) 709 | pub const ExprListRest = Match(struct { 710 | _comma: Hide(Call(R.OpComma)), 711 | expr: Call(R.Expr), 712 | }); 713 | 714 | pub const ExprList = Match(struct { 715 | first: Call(R.Expr), 716 | rest: Kleene(R.ExprListRest), 717 | }); 718 | 719 | // ======================================================================== 720 | // Statements and blocks 721 | // ======================================================================== 722 | 723 | pub const VarDeclKeyword = Match(union(enum) { 724 | const_: Call(R.KwConst), 725 | var_: Call(R.KwVar), 726 | }); 727 | 728 | pub const VarDeclType = Match(struct { 729 | _colon: Hide(Call(R.OpColon)), 730 | type_: Call(R.TypeExpr), 731 | }); 732 | 733 | pub const VarDeclInit = Match(struct { 734 | _eq: Hide(Call(R.OpEquals)), 735 | value: Call(R.Expr), 736 | }); 737 | 738 | pub const VarDecl = Match(struct { 739 | keyword: Call(R.VarDeclKeyword), 740 | name: Call(R.Identifier), 741 | type_: Maybe(Call(R.VarDeclType)), 742 | init: Maybe(Call(R.VarDeclInit)), 743 | }); 744 | 745 | pub const ExprStmt = Match(struct { 746 | expr: Call(R.Expr), 747 | _semi: Hide(Call(R.OpSemicolon)), 748 | }); 749 | 750 | pub const VarDeclStmt = Match(struct { 751 | decl: Call(R.VarDecl), 752 | _semi: Hide(Call(R.OpSemicolon)), 753 | }); 754 | 755 | pub const IfStatement = Match(struct { 756 | _if: Hide(Call(R.KwIf)), 757 | _lparen: Hide(Call(R.OpLParen)), 758 | condition: Call(R.Expr), 759 | _rparen: Hide(Call(R.OpRParen)), 760 | payload: Maybe(Call(R.PtrPayload)), 761 | body: Call(R.Block), 762 | else_clause: Maybe(Match(struct { 763 | _else: Hide(Call(R.KwElse)), 764 | body: Call(R.Block), 765 | })), 766 | }); 767 | 768 | pub const DeferStatement = Match(struct { 769 | _kw: Hide(Call(R.KwDefer)), 770 | expr: Call(R.Expr), 771 | _semi: Hide(Call(R.OpSemicolon)), 772 | }); 773 | 774 | pub const Statement = Match(union(enum) { 775 | if_stmt: Call(R.IfStatement), 776 | defer_stmt: Call(R.DeferStatement), 777 | var_decl: Call(R.VarDeclStmt), 778 | expr: Call(R.ExprStmt), 779 | }); 780 | 781 | pub const Block = Match(struct { 782 | _open: Hide(Call(R.OpLBrace)), 783 | statements: Kleene(R.Statement), 784 | _close: Hide(Call(R.OpRBrace)), 785 | }); 786 | 787 | // ======================================================================== 788 | // Function declarations 789 | // ======================================================================== 790 | 791 | pub const Param = Match(struct { 792 | name: Call(R.Identifier), 793 | _colon: Hide(Call(R.OpColon)), 794 | type_: Call(R.TypeExpr), 795 | }); 796 | 797 | pub const ParamRest = Match(struct { 798 | _comma: Hide(Call(R.OpComma)), 799 | param: Call(R.Param), 800 | }); 801 | 802 | pub const ParamList = Match(struct { 803 | first: Call(R.Param), 804 | rest: Kleene(R.ParamRest), 805 | }); 806 | 807 | pub const FnDecl = Match(struct { 808 | pub_: Maybe(Call(R.KwPub)), 809 | _fn: Hide(Call(R.KwFn)), 810 | name: Call(R.Identifier), 811 | _lparen: Hide(Call(R.OpLParen)), 812 | params: Maybe(Call(R.ParamList)), 813 | _rparen: Hide(Call(R.OpRParen)), 814 | return_type: Call(R.TypeExpr), 815 | body: Call(R.Block), 816 | }); 817 | 818 | // ======================================================================== 819 | // Top-level 820 | // ======================================================================== 821 | 822 | pub const Root = Match(struct { 823 | _skip: Hide(Call(R.Skip)), 824 | decls: Kleene(R.FnDecl), 825 | }); 826 | }; 827 | 828 | // ============================================================================ 829 | // Tests - Port tests incrementally as we implement more rules 830 | // ============================================================================ 831 | 832 | const VMFactory = @import("vm.zig").VM; 833 | const TestVM = VMFactory(ZigGrammar); 834 | const NodeType = peg.NodeType; 835 | 836 | fn parseRule(comptime rule: ZigGrammar.R, input: [:0]const u8) !void { 837 | var saves_buf: [128]TestVM.SaveFrame = undefined; 838 | var calls_buf: [128]TestVM.CallFrame = undefined; 839 | var nodes_buf: [512]NodeType = undefined; 840 | var structs_buf: [128]TestVM.StructuralFrame = undefined; 841 | var child_buf: [512]u32 = undefined; 842 | 843 | var vm = TestVM.init(input, &saves_buf, &calls_buf, &nodes_buf, &structs_buf, &child_buf); 844 | try vm.runFrom(rule); 845 | } 846 | 847 | test "zig grammar: identifier" { 848 | try parseRule(.Identifier, "foo"); 849 | } 850 | 851 | test "zig grammar: builtin identifier" { 852 | try parseRule(.BuiltinIdentifier, "@foo"); 853 | } 854 | 855 | test "zig grammar: integer" { 856 | try parseRule(.Integer, "42"); 857 | } 858 | 859 | test "zig grammar: char literal plain" { 860 | try parseRule(.CharLiteral, "'a'"); 861 | } 862 | 863 | test "zig grammar: char literal escape" { 864 | try parseRule(.CharLiteral, "'\\n'"); 865 | } 866 | 867 | test "zig grammar: string literal empty" { 868 | try parseRule(.StringLiteral, "\"\""); 869 | } 870 | 871 | test "zig grammar: string literal simple" { 872 | try parseRule(.StringLiteral, "\"hello\""); 873 | } 874 | 875 | test "zig grammar: string literal with escape" { 876 | try parseRule(.StringLiteral, "\"hello\\nworld\""); 877 | } 878 | 879 | test "zig grammar: return expression no value" { 880 | try parseRule(.ReturnExpr, "return"); 881 | } 882 | 883 | test "zig grammar: return expression with value" { 884 | try parseRule(.ReturnExpr, "return 42"); 885 | } 886 | 887 | test "zig grammar: var decl no init" { 888 | try parseRule(.VarDecl, "var x"); 889 | } 890 | 891 | test "zig grammar: const decl with init" { 892 | try parseRule(.VarDecl, "const x = 42"); 893 | } 894 | 895 | test "zig grammar: var decl with type" { 896 | try parseRule(.VarDecl, "var x: i32 = 42"); 897 | } 898 | 899 | test "zig grammar: expr statement" { 900 | try parseRule(.ExprStmt, "foo;"); 901 | } 902 | 903 | test "zig grammar: empty block" { 904 | try parseRule(.Block, "{}"); 905 | } 906 | 907 | test "zig grammar: block with statement" { 908 | try parseRule(.Block, "{ return; }"); 909 | } 910 | 911 | test "zig grammar: block with multiple statements" { 912 | try parseRule(.Block, "{ var x = 1; return x; }"); 913 | } 914 | 915 | test "zig grammar: simple function" { 916 | try parseRule(.FnDecl, "fn main() void {}"); 917 | } 918 | 919 | test "zig grammar: function with return" { 920 | try parseRule(.FnDecl, "fn main() void { return; }"); 921 | } 922 | 923 | test "zig grammar: function with params" { 924 | try parseRule(.FnDecl, "fn add(a: i32, b: i32) i32 { return a; }"); 925 | } 926 | 927 | test "zig grammar: pub function" { 928 | try parseRule(.FnDecl, "pub fn main() void {}"); 929 | } 930 | 931 | test "zig grammar: root empty" { 932 | try parseRule(.Root, ""); 933 | } 934 | 935 | test "zig grammar: root with function" { 936 | try parseRule(.Root, "fn main() void {}"); 937 | } 938 | 939 | test "zig grammar: struct type" { 940 | try parseRule(.ContainerExpr, "struct { x: i32, y: i32 }"); 941 | } 942 | 943 | test "zig grammar: enum type" { 944 | try parseRule(.ContainerExpr, "enum { A, B, C }"); 945 | } 946 | 947 | test "zig grammar: error set" { 948 | try parseRule(.ErrorSetDecl, "error{OutOfMemory, InvalidInput}"); 949 | } 950 | 951 | test "zig grammar: pointer type" { 952 | try parseRule(.TypeExpr, "*i32"); 953 | } 954 | 955 | test "zig grammar: optional type" { 956 | try parseRule(.TypeExpr, "?i32"); 957 | } 958 | 959 | test "zig grammar: array type" { 960 | try parseRule(.TypeExpr, "[10]u8"); 961 | } 962 | 963 | test "zig grammar: error union type" { 964 | try parseRule(.TypeExpr, "anyerror!void"); 965 | } 966 | 967 | test "zig grammar: addition" { 968 | try parseRule(.Expr, "1 + 2"); 969 | } 970 | 971 | test "zig grammar: multiplication precedence" { 972 | try parseRule(.Expr, "1 + 2 * 3"); 973 | } 974 | 975 | test "zig grammar: comparison" { 976 | try parseRule(.Expr, "x < 10"); 977 | } 978 | 979 | test "zig grammar: boolean and" { 980 | try parseRule(.Expr, "a and b"); 981 | } 982 | 983 | test "zig grammar: boolean or" { 984 | try parseRule(.Expr, "a or b"); 985 | } 986 | 987 | test "zig grammar: function call" { 988 | try parseRule(.Expr, "foo()"); 989 | } 990 | 991 | test "zig grammar: function call with args" { 992 | try parseRule(.Expr, "add(1, 2)"); 993 | } 994 | 995 | test "zig grammar: member access" { 996 | try parseRule(.Expr, "foo.bar"); 997 | } 998 | 999 | test "zig grammar: array index simple" { 1000 | try parseRule(.ArrayIndex, "[0]"); 1001 | } 1002 | 1003 | test "zig grammar: array index" { 1004 | try parseRule(.Expr, "arr[0]"); 1005 | } 1006 | 1007 | test "zig grammar: chained suffix" { 1008 | try parseRule(.Expr, "foo.bar()[0]"); 1009 | } 1010 | 1011 | test "zig grammar: prefix operators" { 1012 | try parseRule(.Expr, "!x"); 1013 | } 1014 | 1015 | test "zig grammar: negative" { 1016 | try parseRule(.Expr, "-42"); 1017 | } 1018 | 1019 | test "zig grammar: try prefix" { 1020 | try parseRule(.Expr, "try foo()"); 1021 | } 1022 | 1023 | test "zig grammar: complex expression" { 1024 | try parseRule(.Expr, "a + b * c == d and e or f"); 1025 | } 1026 | 1027 | var stdoutbuf: [4096]u8 = undefined; 1028 | const stdout_file = std.fs.File.stdout(); 1029 | var stdout_writer = stdout_file.writer(&stdoutbuf); 1030 | const stdout = &stdout_writer.interface; 1031 | 1032 | pub fn main() !void { 1033 | const trace = @import("trace.zig"); 1034 | const tty = std.Io.tty.detectConfig(stdout_file); 1035 | 1036 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 1037 | defer arena.deinit(); 1038 | const allocator = arena.allocator(); 1039 | 1040 | // Try to parse "[0]" as ArrayIndex rule 1041 | var vm = try TestVM.initAlloc("[0]", allocator, 64, 64, 512); 1042 | defer vm.deinit(allocator); 1043 | 1044 | try trace.traceFrom(&vm, stdout, tty, .Root); 1045 | try stdout.flush(); 1046 | } 1047 | -------------------------------------------------------------------------------- /src/zigparse.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | comptime { 4 | @setEvalBranchQuota(200000); 5 | } 6 | const pegvm = @import("pegvm.zig"); 7 | 8 | const VM = pegvm.VM; 9 | const Combinators = pegvm.Combinators; 10 | const ascii = pegvm.ascii; 11 | 12 | pub const ZigMiniGrammar = struct { 13 | // It's very important that some rules have explicit [N]Op return types. 14 | // In fact, the compiler usually crashes without them. 15 | // 16 | // The byte offset to each rule must be clear to the compiler, 17 | // since we take their addresses. 18 | // 19 | // By putting the main recursion targets above the rules that call them, 20 | // sized explicitly, we can use type inference for the others. 21 | 22 | const C = Combinators(@This()); 23 | const Op = C.Op; 24 | const one = C.call; 25 | const opt = C.opt; 26 | const star = C.star; 27 | const alt = C.anyOf; 28 | 29 | fn rule(x: anytype) [x.len + 1]Op { 30 | return x ++ C.ret; 31 | } 32 | 33 | // Whitespace and comment handling 34 | const not_nl_ascii = C.charclass(.{ ascii[' ' .. '~' + 1], '\t', '\r' }); 35 | const line_comment = C.text("//") ++ star(not_nl_ascii); 36 | 37 | pub const Skip: C.Annotated(10) = C.silent(rule( 38 | star(alt(.{ C.charclass(" \t\n\r"), line_comment })) 39 | )); 40 | 41 | pub const Statement: C.Annotated(29) = C.node(rule( 42 | alt(.{ 43 | one(&IfStatement), 44 | one(&LabeledWhileStatement), 45 | one(&LabeledForStatement), 46 | one(&SwitchExpr), 47 | one(&DeferStatement), 48 | one(&ErrDeferStatement), 49 | one(&NoSuspendStatement), 50 | one(&SuspendStatement), 51 | one(&VarDeclExprStatement), 52 | one(&BlockExprStatement), 53 | }), 54 | )); 55 | 56 | pub const Block: C.Annotated(8) = C.node(rule( 57 | @"{" ++ star(one(&Statement)) ++ @"}", 58 | )); 59 | 60 | pub const Expr: C.Annotated(6) = C.silent(rule( 61 | one(&BoolAndExpr) ++ 62 | star(one(&BoolOrOp) ++ one(&BoolAndExpr)), 63 | )); 64 | 65 | pub const TypeAtom: C.Annotated(8) = C.node(rule( 66 | alt(.{ 67 | one(&Identifier), 68 | one(&ContainerExpr), 69 | one(&ErrorSetDecl), 70 | }), 71 | )); 72 | 73 | pub const TypeExpr: C.Annotated(11) = 74 | C.node(rule(one(&TypeCore) ++ opt(@"!" ++ one(&TypeExpr)))); 75 | 76 | pub const Identifier = C.node(rule(C.shun(reserved_exact) ++ alpha ++ star(alnum_us) ++ one(&Skip))); 77 | 78 | pub const BuiltinIdentifier = C.node(rule(C.char('@') ++ alpha ++ star(alnum_us))); 79 | 80 | pub const Integer = C.node(rule(C.several(digit) ++ one(&Skip))); 81 | 82 | pub const IdentifierList = C.node(rule( 83 | one(&Identifier) ++ 84 | star(@"," ++ one(&Identifier)) ++ 85 | opt(@","), 86 | )); 87 | 88 | pub const ErrorSetDecl = C.node(rule(@"error" ++ @"{" ++ opt(one(&IdentifierList)) ++ @"}")); 89 | 90 | pub const AssignExpr = C.node(rule( 91 | alt(.{ 92 | one(&Identifier) ++ @"=" ++ one(&Expr), 93 | one(&Expr), 94 | }), 95 | )); 96 | 97 | pub const ExprList = C.node(rule( 98 | one(&Expr) ++ 99 | star(@"," ++ one(&Expr)), 100 | )); 101 | 102 | pub const ArrayStart = C.node(rule(@"[" ++ one(&Expr) ++ @"]")); 103 | 104 | pub const TypePrefix = C.node(rule( 105 | alt(.{ 106 | @"?", 107 | @"*", 108 | one(&ArrayStart), 109 | @"[" ++ @"]", 110 | }), 111 | )); 112 | 113 | pub const TypeCore = C.node(rule(star(one(&TypePrefix)) ++ one(&TypeAtom))); 114 | 115 | pub const FieldDecl = C.node(rule( 116 | one(&Identifier) ++ @":" ++ one(&TypeExpr), 117 | )); 118 | 119 | pub const FieldList = C.node(rule( 120 | one(&FieldDecl) ++ 121 | star(C.seq(.{ @",", one(&FieldDecl) })) ++ 122 | opt(@","), 123 | )); 124 | 125 | 126 | pub const ContainerMembers: C.Annotated(7) = C.node(rule( 127 | star(one(&ContainerDeclaration)) ++ 128 | opt(one(&FieldList)) 129 | )); 130 | 131 | pub const EnumFields = C.node(rule( 132 | one(&Identifier) ++ 133 | star(C.seq(.{ @",", one(&Identifier) })) ++ 134 | opt(@","), 135 | )); 136 | 137 | pub const EnumBody = C.node(rule( 138 | @"{" ++ opt(one(&EnumFields)) ++ @"}", 139 | )); 140 | 141 | pub const ContainerDeclaration: C.Annotated(7) = C.node(rule( 142 | alt(.{ 143 | one(&FnDecl), 144 | one(&VarDecl) ++ @";", 145 | }), 146 | )); 147 | 148 | 149 | pub const StructBody: C.Annotated(6) = C.node(rule( 150 | @"{" ++ one(&ContainerMembers) ++ @"}" 151 | )); 152 | 153 | pub const ContainerExpr = C.node(rule( 154 | alt(.{ 155 | C.seq(.{ @"struct", one(&StructBody) }), 156 | C.seq(.{ @"union", one(&StructBody) }), 157 | C.seq(.{ @"enum", one(&EnumBody) }), 158 | }), 159 | )); 160 | 161 | pub const ParenExpr = C.node(rule(@"(" ++ one(&Expr) ++ @")")); 162 | 163 | pub const LinkSection = C.node(rule(@"linksection" ++ one(&ParenExpr))); 164 | pub const AddrSpace = C.node(rule(@"addrspace" ++ one(&ParenExpr))); 165 | pub const CallConv = C.node(rule(@"callconv" ++ one(&ParenExpr))); 166 | pub const ByteAlign = C.node(rule(@"align" ++ one(&ParenExpr))); 167 | 168 | pub const ErrorLiteral = C.node(rule(@"error" ++ @"." ++ one(&Identifier))); 169 | 170 | pub const DotIdentifier = C.node(rule(@"." ++ one(&Identifier))); 171 | 172 | pub const CharLiteral = C.node(rule( 173 | C.char('\'') ++ 174 | alt(.{ chr_escape, chr_plain }) ++ 175 | C.char('\'') ++ 176 | one(&Skip), 177 | )); 178 | 179 | pub const StringLiteral = C.node(rule( 180 | C.char('"') ++ 181 | star(alt(.{ str_escape, str_plain })) ++ 182 | C.char('"') ++ 183 | one(&Skip), 184 | )); 185 | 186 | pub const Primary: C.Annotated(50) = C.node(rule( 187 | alt(.{ 188 | one(&ParenExpr), 189 | one(&Block), 190 | one(&IfExpr), 191 | one(&WhileExprE), 192 | one(&ForExprE), 193 | one(&SwitchExpr), 194 | one(&ReturnExpr), 195 | one(&BreakExpr), 196 | one(&ContinueExpr), 197 | one(&ContainerExpr), 198 | one(&BuiltinIdentifier), 199 | one(&ErrorLiteral), 200 | one(&DotIdentifier), 201 | one(&Integer), 202 | one(&StringLiteral), 203 | one(&CharLiteral), 204 | one(&Identifier), 205 | }), 206 | )); 207 | 208 | pub const FnCallArguments = 209 | rule(@"(" ++ opt(one(&ExprList)) ++ @")"); 210 | 211 | pub const MemberAccess = C.node(rule(@"." ++ one(&Identifier))); 212 | 213 | const sliceSuffix = 214 | @".." ++ opt(one(&Expr)) ++ opt(@":" ++ one(&Expr)); 215 | 216 | pub const IndexOrSlice = C.node(rule( 217 | @"[" ++ one(&Expr) ++ opt(sliceSuffix) ++ @"]", 218 | )); 219 | 220 | pub const OneSuffix = C.node(rule( 221 | alt(.{ 222 | one(&FnCallArguments), 223 | one(&IndexOrSlice), 224 | @".*", 225 | @".?", 226 | one(&MemberAccess), 227 | }), 228 | )); 229 | 230 | pub const SuffixExpr = C.silent(rule( 231 | one(&Primary) ++ star(one(&OneSuffix)), 232 | )); 233 | 234 | pub const ReturnExpr = C.node(rule(@"return" ++ opt(one(&Expr)))); 235 | 236 | pub const LabelRef = C.node(rule(@":" ++ one(&Identifier))); 237 | pub const LabelDef = C.node(rule(one(&Identifier) ++ @":")); 238 | 239 | pub const BreakExpr = C.node(rule( 240 | @"break" ++ opt(one(&LabelRef)) ++ opt(one(&Expr)), 241 | )); 242 | 243 | pub const ContinueExpr = C.node(rule(@"continue" ++ opt(one(&LabelRef)))); 244 | 245 | pub const PtrPayload = C.node(rule( 246 | @"|" ++ opt(@"*") ++ one(&Identifier) ++ @"|", 247 | )); 248 | 249 | pub const PtrIndexPayload = C.node(rule( 250 | @"|" ++ 251 | opt(@"*") ++ one(&Identifier) ++ 252 | opt(@"," ++ one(&Identifier)) ++ 253 | @"|", 254 | )); 255 | 256 | pub const PtrListPayload = C.node(rule( 257 | @"|" ++ 258 | opt(@"*") ++ one(&Identifier) ++ 259 | star(@"," ++ opt(@"*") ++ one(&Identifier)) ++ 260 | opt(@",") ++ 261 | @"|", 262 | )); 263 | 264 | pub const Payload = C.node(rule(@"|" ++ one(&Identifier) ++ @"|")); 265 | 266 | pub const PayloadExpr = C.node(rule( 267 | opt(one(&PtrPayload)) ++ one(&Expr), 268 | )); 269 | 270 | pub const IfExpr = C.node(rule( 271 | @"if" ++ @"(" ++ one(&Expr) ++ @")" ++ 272 | one(&PayloadExpr) ++ 273 | @"else" ++ one(&Expr), 274 | )); 275 | 276 | pub const WhileContinueExpr = C.node(rule(@":" ++ @"(" ++ one(&AssignExpr) ++ @")")); 277 | 278 | pub const WhileExprE = C.node(rule( 279 | @"while" ++ one(&ParenExpr) ++ 280 | opt(one(&PtrPayload)) ++ 281 | opt(one(&WhileContinueExpr)) ++ 282 | one(&Block) ++ 283 | opt(@"else" ++ one(&Expr)), 284 | )); 285 | 286 | pub const ForItem = C.node(rule( 287 | one(&Expr) ++ opt(@".." ++ opt(one(&Expr))), 288 | )); 289 | 290 | pub const ForArgumentsList = C.node(rule( 291 | one(&ForItem) ++ 292 | star(@"," ++ one(&ForItem)) ++ 293 | opt(@","), 294 | )); 295 | 296 | pub const ForExprE = C.node(rule( 297 | @"for" ++ @"(" ++ one(&ForArgumentsList) ++ @")" ++ 298 | opt(one(&PtrListPayload)) ++ 299 | one(&Block) ++ 300 | opt(@"else" ++ one(&Expr)), 301 | )); 302 | 303 | pub const CaseItem = C.node(rule( 304 | one(&Expr) ++ opt(@"..." ++ one(&Expr)), 305 | )); 306 | 307 | pub const ElseProng = C.node(rule( 308 | @"else" ++ @"=>" ++ opt(one(&PtrIndexPayload)) ++ one(&Expr), 309 | )); 310 | 311 | pub const CaseProng = C.node(rule( 312 | opt(@"inline") ++ 313 | one(&CaseItem) ++ 314 | star(@"," ++ one(&CaseItem)) ++ 315 | @"=>" ++ 316 | opt(one(&PtrIndexPayload)) ++ 317 | one(&Expr), 318 | )); 319 | 320 | pub const Prong = C.node(rule( 321 | alt(.{ 322 | one(&ElseProng), 323 | one(&CaseProng), 324 | }), 325 | )); 326 | 327 | pub const SwitchBody = C.node(rule( 328 | @"{" ++ 329 | opt(one(&Prong) ++ 330 | star(@"," ++ one(&Prong))) ++ 331 | @"}", 332 | )); 333 | 334 | pub const SwitchExpr = C.node(rule( 335 | @"switch" ++ one(&ParenExpr) ++ one(&SwitchBody), 336 | )); 337 | 338 | pub const PrefixOp = C.node(rule(alt(.{ @"!", @"-", @"~", @"-%", @"&", @"try" }))); 339 | 340 | pub const PrefixExpr = C.silent(rule( 341 | star(one(&PrefixOp)) ++ 342 | one(&SuffixExpr), 343 | )); 344 | 345 | pub const MultiplyOp = C.node(rule(alt(.{ @"*", @"/", @"%" }))); 346 | 347 | pub const MultiplyExpr = C.silent(rule( 348 | one(&PrefixExpr) ++ 349 | star(one(&MultiplyOp) ++ one(&PrefixExpr)), 350 | )); 351 | 352 | pub const AddOp = C.node(rule(alt(.{ @"+", @"-" }))); 353 | 354 | pub const AddExpr = C.silent(rule( 355 | one(&MultiplyExpr) ++ star( 356 | one(&AddOp) ++ one(&MultiplyExpr), 357 | ), 358 | )); 359 | 360 | pub const BitShiftOp = C.node(rule(alt(.{ @"<<", @">>" }))); 361 | 362 | pub const BitShiftExpr = C.silent(rule( 363 | one(&AddExpr) ++ star( 364 | one(&BitShiftOp) ++ one(&AddExpr), 365 | ), 366 | )); 367 | 368 | pub const BitwiseOp = C.node(rule(alt(.{ 369 | @"&", 370 | @"^", 371 | @"|", 372 | @"orelse", 373 | @"catch" ++ opt(one(&Payload)), 374 | }))); 375 | 376 | pub const BitwiseExpr = C.silent(rule( 377 | one(&BitShiftExpr) ++ 378 | star( 379 | one(&BitwiseOp) ++ 380 | one(&BitShiftExpr), 381 | ), 382 | )); 383 | 384 | pub const CompareOp = C.node(rule(alt(.{ @"==", @"!=", @"<=", @">=", @"<", @">" }))); 385 | 386 | pub const CompareExpr = C.silent(rule( 387 | one(&BitwiseExpr) ++ 388 | opt( 389 | one(&CompareOp) ++ 390 | one(&BitwiseExpr), 391 | ), 392 | )); 393 | 394 | pub const BoolAndOp = C.node(rule(@"and")); 395 | pub const BoolOrOp = C.node(rule(@"or")); 396 | 397 | pub const BoolAndExpr = C.silent(rule( 398 | one(&CompareExpr) ++ 399 | star(one(&BoolAndOp) ++ one(&CompareExpr)), 400 | )); 401 | 402 | pub const CallExpr = C.node(rule( 403 | alt(.{ one(&Identifier), one(&BuiltinIdentifier) }) ++ 404 | @"(" ++ 405 | opt(one(&ExprList)) ++ 406 | @")", 407 | )); 408 | 409 | pub const Param = C.node(rule( 410 | opt(@"comptime") ++ one(&Identifier) ++ @":" ++ one(&TypeExpr), 411 | )); 412 | 413 | pub const ParamList = C.node(rule( 414 | one(&Param) ++ 415 | star(@"," ++ one(&Param)), 416 | )); 417 | 418 | pub const ReturnStmt = C.node(rule( 419 | @"return" ++ opt(one(&Expr)), 420 | )); 421 | 422 | pub const VarDecl = C.node(rule( 423 | alt(.{ @"const", @"var" }) ++ 424 | one(&Identifier) ++ 425 | opt(@"=" ++ one(&Expr)), 426 | )); 427 | 428 | pub const BlockExpr = C.node(rule( 429 | opt(one(&LabelDef)) ++ one(&Block), 430 | )); 431 | 432 | pub const BlockExprStatement = C.node(rule( 433 | alt(.{ 434 | one(&BlockExpr), 435 | one(&AssignExpr) ++ @";", 436 | }), 437 | )); 438 | 439 | pub const VarDeclExprStatement = C.node(rule( 440 | alt(.{ 441 | one(&VarDecl) ++ @";", 442 | one(&Expr) ++ @";", 443 | }), 444 | )); 445 | 446 | pub const Else = C.node(rule( 447 | @"else" ++ opt(one(&PtrPayload)) ++ one(&Statement), 448 | )); 449 | 450 | pub const IfStatement = C.node(rule( 451 | @"if" ++ @"(" ++ one(&Expr) ++ @")" ++ opt(one(&PtrPayload)) ++ alt(.{ 452 | one(&BlockExpr) ++ opt(one(&Else)), 453 | one(&AssignExpr) ++ alt(.{ @";", one(&Else) }), 454 | }), 455 | )); 456 | 457 | pub const WhileStatement = C.node(rule( 458 | @"while" ++ @"(" ++ one(&Expr) ++ @")" ++ 459 | opt(one(&PtrPayload)) ++ 460 | opt(one(&WhileContinueExpr)) ++ 461 | alt(.{ 462 | one(&BlockExpr) ++ opt(one(&Else)), 463 | one(&AssignExpr) ++ alt(.{ @";", one(&Else) }), 464 | }), 465 | )); 466 | 467 | pub const LabeledWhileStatement = C.node(rule( 468 | opt(one(&LabelDef)) ++ one(&WhileStatement), 469 | )); 470 | 471 | pub const ForStatement = C.node(rule( 472 | @"for" ++ @"(" ++ one(&ForArgumentsList) ++ @")" ++ 473 | opt(one(&PtrListPayload)) ++ 474 | alt(.{ 475 | one(&BlockExpr), 476 | one(&AssignExpr) ++ @";", 477 | }), 478 | )); 479 | 480 | pub const LabeledForStatement = C.node(rule( 481 | opt(one(&LabelDef)) ++ one(&ForStatement), 482 | )); 483 | 484 | pub const DeferStatement = C.node(rule( 485 | @"defer" ++ one(&BlockExprStatement), 486 | )); 487 | pub const ErrDeferStatement = C.node(rule( 488 | @"errdefer" ++ opt(one(&Payload)) ++ one(&BlockExprStatement), 489 | )); 490 | pub const NoSuspendStatement = C.node(rule( 491 | @"nosuspend" ++ one(&BlockExprStatement), 492 | )); 493 | pub const SuspendStatement = C.node(rule( 494 | @"suspend" ++ one(&BlockExprStatement), 495 | )); 496 | 497 | pub const FnDecl = C.node(rule( 498 | opt(@"pub") ++ 499 | opt(alt(.{ 500 | @"export", 501 | @"extern" ++ opt(one(&StringLiteral)), 502 | @"inline", 503 | @"noinline", 504 | })) ++ 505 | @"fn" ++ 506 | one(&Identifier) ++ 507 | @"(" ++ 508 | opt(one(&ParamList)) ++ 509 | @")" ++ 510 | opt(one(&ByteAlign)) ++ 511 | opt(one(&AddrSpace)) ++ 512 | opt(one(&LinkSection)) ++ 513 | opt(one(&CallConv)) ++ 514 | opt(@"!") ++ 515 | one(&TypeExpr) ++ 516 | one(&Block), 517 | )); 518 | 519 | pub const VarDeclProto = C.node(rule( 520 | alt(.{ @"const", @"var" }) ++ 521 | one(&Identifier) ++ 522 | opt(@":" ++ one(&TypeExpr)) ++ 523 | opt(one(&ByteAlign)) ++ 524 | opt(one(&AddrSpace)) ++ 525 | opt(one(&LinkSection)), 526 | )); 527 | 528 | pub const GlobalVarDecl = rule( 529 | one(&VarDeclProto) ++ 530 | opt(@"=" ++ one(&Expr)) ++ 531 | @";", 532 | ); 533 | 534 | pub const TopVarDecl = C.node(rule( 535 | opt(@"pub") ++ 536 | opt(alt(.{ 537 | @"export", 538 | @"extern" ++ opt(one(&StringLiteral)), 539 | })) ++ 540 | opt(@"threadlocal") ++ 541 | one(&GlobalVarDecl), 542 | )); 543 | 544 | pub const TestDecl = C.node(rule( 545 | @"test" ++ 546 | opt(alt(.{ 547 | one(&StringLiteral), 548 | one(&Identifier), 549 | })) ++ 550 | one(&Block), 551 | )); 552 | 553 | pub const ComptimeDecl = C.node(rule( 554 | @"comptime" ++ one(&Block), 555 | )); 556 | 557 | pub const Root: C.Annotated(15) = C.node(rule( 558 | one(&Skip) ++ 559 | star(alt(.{ 560 | one(&FnDecl), 561 | one(&TopVarDecl), 562 | one(&TestDecl), 563 | one(&ComptimeDecl), 564 | })) ++ 565 | C.eof 566 | )); 567 | 568 | pub const start = rule(one(&Root) ++ C.ok); 569 | 570 | const chr_escape = 571 | @"\\" ++ C.charclass("nr't\\\""); 572 | const chr_plain = 573 | C.charclass(.{ 574 | ascii[' ' .. '&' + 1], 575 | ascii['(' .. '[' + 1], 576 | ascii[']' .. '~' + 1], 577 | }); 578 | 579 | const alpha = 580 | C.charclass(.{ ascii['a' .. 'z' + 1], ascii['A' .. 'Z' + 1], "_" }); 581 | const digit = 582 | C.charclass(ascii['0' .. '9' + 1]); 583 | const alnum_us = 584 | C.charclass(.{ 585 | ascii['a' .. 'z' + 1], ascii['A' .. 'Z' + 1], ascii['0' .. '9' + 1], "_", 586 | }); 587 | const str_escape = 588 | @"\\" ++ C.charclass("nr\"t\\"); 589 | const str_plain = 590 | C.charclass(.{ 591 | ascii[' ' .. '!' + 1], 592 | ascii['#' .. '[' + 1], 593 | ascii[']' .. '~' + 1], 594 | }); 595 | 596 | 597 | fn kw(name: []const u8) [6]Op { 598 | return C.text(name) ++ ident_boundary ++ one(&Skip); 599 | } 600 | 601 | const @"fn" = kw("fn"); 602 | const @"pub" = kw("pub"); 603 | const @"return" = kw("return"); 604 | const @"const" = kw("const"); 605 | const @"var" = kw("var"); 606 | const @"export" = kw("export"); 607 | const @"extern" = kw("extern"); 608 | const @"threadlocal" = kw("threadlocal"); 609 | const @"addrspace" = kw("addrspace"); 610 | const @"linksection" = kw("linksection"); 611 | const @"callconv" = kw("callconv"); 612 | const @"align" = kw("align"); 613 | const @"if" = kw("if"); 614 | const @"comptime" = kw("comptime"); 615 | const @"test" = kw("test"); 616 | const @"else" = kw("else"); 617 | const @"while" = kw("while"); 618 | const @"for" = kw("for"); 619 | const @"switch" = kw("switch"); 620 | const @"break" = kw("break"); 621 | const @"continue" = kw("continue"); 622 | const @"defer" = kw("defer"); 623 | const @"errdefer" = kw("errdefer"); 624 | const @"suspend" = kw("suspend"); 625 | const @"nosuspend" = kw("nosuspend"); 626 | const @"struct" = kw("struct"); 627 | const @"union" = kw("union"); 628 | const @"enum" = kw("enum"); 629 | const @"noinline" = kw("noinline"); 630 | const @"inline" = kw("inline"); 631 | const @"and" = kw("and"); 632 | const @"or" = kw("or"); 633 | const @"orelse" = kw("orelse"); 634 | const @"try" = kw("try"); 635 | const @"catch" = kw("catch"); 636 | const @"error" = kw("error"); 637 | 638 | fn op(s: []const u8, neg: []const u8) [if (neg.len == 0) 2 else 6]Op { 639 | if (neg.len == 0) { 640 | return C.text(s) ++ one(&Skip); 641 | } else { 642 | return C.text(s) ++ C.shun(C.charclass(neg)) ++ one(&Skip); 643 | } 644 | } 645 | 646 | const @"(" = op("(", ""); 647 | const @")" = op(")", ""); 648 | const @"{" = op("{", ""); 649 | const @"}" = op("}", ""); 650 | const @"[" = op("[", ""); 651 | const @"]" = op("]", ""); 652 | const @":" = op(":", ""); 653 | const @"," = op(",", ""); 654 | const @";" = op(";", ""); 655 | const @"=" = op("=", ""); 656 | const @"*" = op("*", "*%=|"); 657 | const @"?" = op("?", ""); 658 | const @"." = op(".", "."); 659 | const @".*" = op(".*", ""); 660 | const @"..." = op("...", ""); 661 | const @".?" = op(".?", ""); 662 | const @"\\" = op("\\", ""); 663 | const @"=>" = op("=>", ""); 664 | const @".." = op("..", "."); 665 | const @"+" = op("+", "%+=|"); 666 | const @"-" = op("-", "%=>|"); 667 | const @"/" = op("/", "="); 668 | const @"%" = op("%", "="); 669 | const @"<<" = op("<<", "=|"); 670 | const @">>" = op(">>", "="); 671 | const @"^" = op("^", "="); 672 | const @"|" = op("|", "|="); 673 | const @"==" = op("==", ""); 674 | const @"!=" = op("!=", ""); 675 | const @"<=" = op("<=", ""); 676 | const @">=" = op(">=", ""); 677 | const @"<" = op("<", "<="); 678 | const @">" = op(">", ">="); 679 | const @"!" = op("!", "="); 680 | const @"~" = op("~", ""); 681 | const @"-%" = op("-%", "="); 682 | const @"&" = op("&", "="); 683 | 684 | const ident_boundary = C.shun(alnum_us); // next is not [A-Za-z0-9_] 685 | 686 | fn ident(name: []const u8) [5]Op { 687 | return C.text(name) ++ ident_boundary; 688 | } 689 | 690 | const reserved_exact = alt(.{ 691 | ident("fn"), 692 | ident("pub"), 693 | ident("return"), 694 | ident("const"), 695 | ident("var"), 696 | ident("break"), 697 | ident("continue"), 698 | ident("defer"), 699 | ident("errdefer"), 700 | ident("suspend"), 701 | ident("nosuspend"), 702 | ident("comptime"), 703 | ident("test"), 704 | ident("export"), 705 | ident("extern"), 706 | ident("threadlocal"), 707 | ident("addrspace"), 708 | ident("linksection"), 709 | ident("callconv"), 710 | ident("if"), 711 | ident("else"), 712 | ident("while"), 713 | ident("for"), 714 | ident("switch"), 715 | ident("struct"), 716 | ident("union"), 717 | ident("enum"), 718 | ident("and"), 719 | ident("or"), 720 | ident("orelse"), 721 | ident("try"), 722 | ident("catch"), 723 | ident("noinline"), 724 | ident("error"), 725 | ident("inline"), 726 | }); 727 | }; 728 | 729 | pub const ZigMiniParser = VM(ZigMiniGrammar); 730 | 731 | pub fn parseZigMini(src: [:0]const u8) !bool { 732 | return ZigMiniParser.parseFully(std.testing.allocator, src, .auto_continue); 733 | } 734 | 735 | test "zig mini: empty program" { 736 | try std.testing.expect(try parseZigMini("\n")); 737 | } 738 | 739 | test "zig mini: fn without params and no return type" { 740 | try std.testing.expect(try parseZigMini( 741 | "fn main() void { return; }\n", 742 | )); 743 | } 744 | 745 | test "zig mini: pub fn with params and return type identifier" { 746 | try std.testing.expect(try parseZigMini( 747 | "pub fn add(a: i32, b: i32) i32 { return a; }\n", 748 | )); 749 | } 750 | 751 | test "zig mini: var/const decls and simple expr statement" { 752 | const src = 753 | "fn f() void {\n" ++ 754 | " const x = 42;\n" ++ 755 | " var y = 7;\n" ++ 756 | " f();\n" ++ 757 | "}\n"; 758 | try std.testing.expect(try parseZigMini(src)); 759 | } 760 | 761 | test "zig mini: const decl only" { 762 | try std.testing.expect(try parseZigMini("fn f() void { const x = 42; }\n")); 763 | } 764 | 765 | test "zig mini: var decl only" { 766 | try std.testing.expect(try parseZigMini("fn f() void { var y = 7; }\n")); 767 | } 768 | 769 | test "zig mini: call expr stmt only" { 770 | try std.testing.expect(try parseZigMini("fn f() void { f(); }\n")); 771 | } 772 | 773 | fn parseFile(comptime path: []const u8) !bool { 774 | const src: [:0]const u8 = @embedFile(path); 775 | if (try parseZigMini(src)) { 776 | return true; 777 | } else { 778 | return false; 779 | } 780 | } 781 | 782 | test "file 001_fn_empty_block" { 783 | try std.testing.expect(try parseFile("test/001_fn_empty_block.zig")); 784 | } 785 | 786 | test "file 002_return_semicolon" { 787 | try std.testing.expect(try parseFile("test/002_return_semicolon.zig")); 788 | } 789 | 790 | test "file 003_var_const_and_call" { 791 | try std.testing.expect(try parseFile("test/003_var_const_and_call.zig")); 792 | } 793 | 794 | test "file 004_pub_fn_params_ret" { 795 | try std.testing.expect(try parseFile("test/004_pub_fn_params_ret.zig")); 796 | } 797 | 798 | test "file 005_call_with_args" { 799 | try std.testing.expect(try parseFile("test/005_call_with_args.zig")); 800 | } 801 | 802 | test "file 006_assignment" { 803 | try std.testing.expect(try parseFile("test/006_assignment.zig")); 804 | } 805 | 806 | test "file 008_toplevel_var" { 807 | try std.testing.expect(try parseFile("test/008_toplevel_var.zig")); 808 | } 809 | 810 | test "file 009_nested_blocks" { 811 | try std.testing.expect(try parseFile("test/009_nested_blocks.zig")); 812 | } 813 | 814 | test "file 010_two_functions" { 815 | try std.testing.expect(try parseFile("test/010_two_functions.zig")); 816 | } 817 | 818 | test "file 011_line_comments" { 819 | try std.testing.expect(try parseFile("test/011_line_comments.zig")); 820 | } 821 | 822 | test "file 012_comments_between_decls" { 823 | try std.testing.expect(try parseFile("test/012_comments_between_decls.zig")); 824 | } 825 | 826 | test "file 013_nested_call" { 827 | try std.testing.expect(try parseFile("test/013_nested_call.zig")); 828 | } 829 | 830 | test "file 014_keyword_as_identifier should fail" { 831 | try std.testing.expect(!try parseFile("test/014_keyword_as_identifier.zig")); 832 | } 833 | 834 | test "file 015_keyword_prefix_allowed" { 835 | try std.testing.expect(try parseFile("test/015_keyword_prefix_allowed.zig")); 836 | } 837 | 838 | test "file 016_addition" { 839 | try std.testing.expect(try parseFile("test/016_addition.zig")); 840 | } 841 | 842 | test "file 017_mul_precedence" { 843 | try std.testing.expect(try parseFile("test/017_mul_precedence.zig")); 844 | } 845 | 846 | test "file 018_nested_ops_calls" { 847 | try std.testing.expect(try parseFile("test/018_nested_ops_calls.zig")); 848 | } 849 | 850 | test "file 019_param_pointer" { 851 | try std.testing.expect(try parseFile("test/019_param_pointer.zig")); 852 | } 853 | 854 | test "file 020_param_slice_array" { 855 | try std.testing.expect(try parseFile("test/020_param_slice_array.zig")); 856 | } 857 | 858 | test "file 021_return_optional_ptr" { 859 | try std.testing.expect(try parseFile("test/021_return_optional_ptr.zig")); 860 | } 861 | 862 | test "file 022_complex_type_prefixes" { 863 | try std.testing.expect(try parseFile("test/022_complex_type_prefixes.zig")); 864 | } 865 | 866 | test "file 023_toplevel_pub_var" { 867 | try std.testing.expect(try parseFile("test/023_toplevel_pub_var.zig")); 868 | } 869 | 870 | test "file 024_mixed_toplevel" { 871 | try std.testing.expect(try parseFile("test/024_mixed_toplevel.zig")); 872 | } 873 | 874 | test "file 025_struct_empty" { 875 | try std.testing.expect(try parseFile("test/025_struct_empty.zig")); 876 | } 877 | 878 | test "file 026_union_empty" { 879 | try std.testing.expect(try parseFile("test/026_union_empty.zig")); 880 | } 881 | 882 | test "file 027_enum_empty" { 883 | try std.testing.expect(try parseFile("test/027_enum_empty.zig")); 884 | } 885 | 886 | test "file 028_struct_fields_simple" { 887 | try std.testing.expect(try parseFile("test/028_struct_fields_simple.zig")); 888 | } 889 | 890 | test "file 029_enum_fields" { 891 | try std.testing.expect(try parseFile("test/029_enum_fields.zig")); 892 | } 893 | 894 | test "file 030_nested_containers" { 895 | try std.testing.expect(try parseFile("test/030_nested_containers.zig")); 896 | } 897 | 898 | test "file 031_string_basic" { 899 | try std.testing.expect(try parseFile("test/031_string_basic.zig")); 900 | } 901 | 902 | test "file 032_string_escapes" { 903 | try std.testing.expect(try parseFile("test/032_string_escapes.zig")); 904 | } 905 | 906 | test "file 033_char_basic" { 907 | try std.testing.expect(try parseFile("test/033_char_basic.zig")); 908 | } 909 | 910 | test "file 034_char_escape_quote" { 911 | try std.testing.expect(try parseFile("test/034_char_escape_quote.zig")); 912 | } 913 | 914 | test "file 035_var_string_init" { 915 | try std.testing.expect(try parseFile("test/035_var_string_init.zig")); 916 | } 917 | 918 | test "file 036_call_with_string_arg" { 919 | try std.testing.expect(try parseFile("test/036_call_with_string_arg.zig")); 920 | } 921 | 922 | test "file 037_char_in_expr" { 923 | try std.testing.expect(try parseFile("test/037_char_in_expr.zig")); 924 | } 925 | 926 | test "file 038_return_error_union" { 927 | try std.testing.expect(try parseFile("test/038_return_error_union.zig")); 928 | } 929 | 930 | test "file 039_param_error_union" { 931 | try std.testing.expect(try parseFile("test/039_param_error_union.zig")); 932 | } 933 | 934 | test "file 040_nested_error_union" { 935 | try std.testing.expect(try parseFile("test/040_nested_error_union.zig")); 936 | } 937 | 938 | test "file 041_if_simple" { 939 | try std.testing.expect(try parseFile("test/041_if_simple.zig")); 940 | } 941 | 942 | test "file 042_if_else" { 943 | try std.testing.expect(try parseFile("test/042_if_else.zig")); 944 | } 945 | 946 | test "file 043_while_simple" { 947 | try std.testing.expect(try parseFile("test/043_while_simple.zig")); 948 | } 949 | 950 | test "file 044_while_else" { 951 | try std.testing.expect(try parseFile("test/044_while_else.zig")); 952 | } 953 | 954 | test "file 045_for_simple" { 955 | try std.testing.expect(try parseFile("test/045_for_simple.zig")); 956 | } 957 | 958 | test "file 046_if_expr_value" { 959 | try std.testing.expect(try parseFile("test/046_if_expr_value.zig")); 960 | } 961 | 962 | test "file 047_while_expr_else" { 963 | try std.testing.expect(try parseFile("test/047_while_expr_else.zig")); 964 | } 965 | 966 | test "file 048_for_expr_else" { 967 | try std.testing.expect(try parseFile("test/048_for_expr_else.zig")); 968 | } 969 | 970 | test "file 049_switch_expr_minimal" { 971 | try std.testing.expect(try parseFile("test/049_switch_expr_minimal.zig")); 972 | } 973 | 974 | test "file 050_break_continue" { 975 | try std.testing.expect(try parseFile("test/050_break_continue.zig")); 976 | } 977 | 978 | test "file 051_switch_stmt_no_semicolon" { 979 | try std.testing.expect(try parseFile("test/051_switch_stmt_no_semicolon.zig")); 980 | } 981 | 982 | test "file 052_if_assign_semicolon" { 983 | try std.testing.expect(try parseFile("test/052_if_assign_semicolon.zig")); 984 | } 985 | 986 | test "file 053_while_assign_semicolon" { 987 | try std.testing.expect(try parseFile("test/053_while_assign_semicolon.zig")); 988 | } 989 | 990 | test "file 054_for_assign_semicolon" { 991 | try std.testing.expect(try parseFile("test/054_for_assign_semicolon.zig")); 992 | } 993 | 994 | test "file 055_if_payload_expr" { 995 | try std.testing.expect(try parseFile("test/055_if_payload_expr.zig")); 996 | } 997 | 998 | test "file 056_while_payload_else_expr" { 999 | try std.testing.expect(try parseFile("test/056_while_payload_else_expr.zig")); 1000 | } 1001 | 1002 | test "file 057_for_payload_expr" { 1003 | try std.testing.expect(try parseFile("test/057_for_payload_expr.zig")); 1004 | } 1005 | 1006 | test "file 058_switch_index_payload" { 1007 | try std.testing.expect(try parseFile("test/058_switch_index_payload.zig")); 1008 | } 1009 | 1010 | test "expr: catch operator basic" { 1011 | try std.testing.expect(try parseZigMini("fn f() void { x = y catch z; }\n")); 1012 | } 1013 | 1014 | test "expr: catch with payload" { 1015 | try std.testing.expect(try parseZigMini("fn f() void { x = y catch |e| z; }\n")); 1016 | } 1017 | 1018 | test "spacing: operators without spaces" { 1019 | try std.testing.expect(try parseZigMini("fn f() void {x=1+2*3;}\n")); 1020 | } 1021 | 1022 | test "spacing: catch without space before payload" { 1023 | try std.testing.expect(try parseZigMini("fn f() void {x=y catch|e|z;}\n")); 1024 | } 1025 | 1026 | test "keyword boundary: orelse not inside identifier" { 1027 | // This should parse as a single identifier, not as an operator. 1028 | try std.testing.expect(try parseZigMini("fn f() void { xorelsey; }\n")); 1029 | } 1030 | 1031 | test "suffix: member access" { 1032 | try std.testing.expect(try parseZigMini("fn f() void { x = a.b; }\n")); 1033 | } 1034 | 1035 | test "suffix: index access" { 1036 | try std.testing.expect(try parseZigMini("fn f() void { x = a[0]; }\n")); 1037 | } 1038 | 1039 | test "suffix: slice simple" { 1040 | try std.testing.expect(try parseZigMini("fn f() void { x = a[0..n]; }\n")); 1041 | } 1042 | 1043 | test "suffix: slice with stride" { 1044 | try std.testing.expect(try parseZigMini("fn f() void { x = a[0..10:2]; }\n")); 1045 | } 1046 | 1047 | test "suffix: slice open end" { 1048 | try std.testing.expect(try parseZigMini("fn f() void { x = a[0..]; }\n")); 1049 | } 1050 | 1051 | test "suffix: optional unwrap" { 1052 | try std.testing.expect(try parseZigMini("fn f() void { x = y.?; }\n")); 1053 | } 1054 | 1055 | test "suffix: deref" { 1056 | try std.testing.expect(try parseZigMini("fn f() void { x = y.*; }\n")); 1057 | } 1058 | 1059 | test "suffix: call chain" { 1060 | try std.testing.expect(try parseZigMini("fn f() void { x = a.b(1,2)[1..n].* .? .c(); }\n")); 1061 | } 1062 | 1063 | test "suffix: builtin call then chain" { 1064 | try std.testing.expect(try parseZigMini("fn f() void { x = @foo()(1)[0].bar; }\n")); 1065 | } 1066 | 1067 | test "switch: range prong" { 1068 | const src = 1069 | "fn f() void {\n" ++ 1070 | " const x = 0;\n" ++ 1071 | " switch (x) { 0...9 => 1, else => 0 }\n" ++ 1072 | "}\n"; 1073 | try std.testing.expect(try parseZigMini(src)); 1074 | } 1075 | 1076 | test "for: range single item and payload" { 1077 | const src = 1078 | "fn f() void {\n" ++ 1079 | " for (0..10) |i| { }\n" ++ 1080 | "}\n"; 1081 | try std.testing.expect(try parseZigMini(src)); 1082 | } 1083 | 1084 | test "for: multiple items with payload list" { 1085 | const src = 1086 | "fn f() void {\n" ++ 1087 | " for (0..10, 0..n,) |i, j| { }\n" ++ 1088 | "}\n"; 1089 | try std.testing.expect(try parseZigMini(src)); 1090 | } 1091 | 1092 | test "for: open-ended range item" { 1093 | const src = 1094 | "fn f() void {\n" ++ 1095 | " for (0..) |i| { }\n" ++ 1096 | "}\n"; 1097 | try std.testing.expect(try parseZigMini(src)); 1098 | } 1099 | 1100 | test "while: continue expression with block" { 1101 | const src = 1102 | "fn f() void {\n" ++ 1103 | " while (x) : (i = i + 1) { break; }\n" ++ 1104 | "}\n"; 1105 | try std.testing.expect(try parseZigMini(src)); 1106 | } 1107 | 1108 | test "while: continue expression with assign branch" { 1109 | const src = 1110 | "fn f() void {\n" ++ 1111 | " while (x) : (i = i + 1) y = z;\n" ++ 1112 | "}\n"; 1113 | try std.testing.expect(try parseZigMini(src)); 1114 | } 1115 | 1116 | test "error: literal as expr" { 1117 | try std.testing.expect(try parseZigMini("fn f() void { const x = error.Foo; }\n")); 1118 | } 1119 | 1120 | test "error: literal with catch" { 1121 | try std.testing.expect(try parseZigMini("fn f() void { x = y catch error.Fail; }\n")); 1122 | } 1123 | 1124 | test "error: set decl in return type" { 1125 | const src = "fn f() error{A,B}!T { return; }\n"; 1126 | try std.testing.expect(try parseZigMini(src)); 1127 | } 1128 | 1129 | test "error: set decl in param type" { 1130 | const src = "fn f(e: error{ A, B, }) void {}\n"; 1131 | try std.testing.expect(try parseZigMini(src)); 1132 | } 1133 | 1134 | test "labels: labeled block with break value" { 1135 | const src = 1136 | "fn f() void {\n" ++ 1137 | " blk: { break :blk 1; }\n" ++ 1138 | "}\n"; 1139 | try std.testing.expect(try parseZigMini(src)); 1140 | } 1141 | 1142 | test "labels: while labeled and continue/break labels" { 1143 | const src = 1144 | "fn f() void {\n" ++ 1145 | " outer: while (0) { continue :outer; break :outer; }\n" ++ 1146 | "}\n"; 1147 | try std.testing.expect(try parseZigMini(src)); 1148 | } 1149 | 1150 | test "labels: for labeled and continue label" { 1151 | const src = 1152 | "fn f() void {\n" ++ 1153 | " outer: for (0..10) |i| { continue :outer; }\n" ++ 1154 | "}\n"; 1155 | try std.testing.expect(try parseZigMini(src)); 1156 | } 1157 | 1158 | test "abi: extern fn with attrs" { 1159 | const src = 1160 | "extern \"c\" fn f(a: i32) align(4) addrspace(0) linksection(\".text\") callconv(.C) void { }\n"; 1161 | try std.testing.expect(try parseZigMini(src)); 1162 | } 1163 | 1164 | test "fn: inferred error set return" { 1165 | try std.testing.expect(try parseZigMini("fn f() !u32 { return; }\n")); 1166 | } 1167 | 1168 | test "abi: export threadlocal global with type and attrs" { 1169 | const src = 1170 | "export threadlocal const x: i32 align(16) addrspace(1) linksection(\".data\") = 0;\n"; 1171 | try std.testing.expect(try parseZigMini(src)); 1172 | } 1173 | 1174 | test "toplevel: test and comptime blocks" { 1175 | const src = 1176 | "test \"name\" { defer {} }\n" ++ 1177 | "comptime { var a = 1; }\n"; 1178 | try std.testing.expect(try parseZigMini(src)); 1179 | } 1180 | 1181 | test "stmt: defer, errdefer, suspend, nosuspend" { 1182 | const src = 1183 | "fn f() void {\n" ++ 1184 | " defer { }\n" ++ 1185 | " errdefer |e| { }\n" ++ 1186 | " suspend { }\n" ++ 1187 | " nosuspend { }\n" ++ 1188 | "}\n"; 1189 | try std.testing.expect(try parseZigMini(src)); 1190 | } 1191 | 1192 | test "switch: inline prong single item" { 1193 | const src = 1194 | "fn f() void {\n" ++ 1195 | " const x = 0;\n" ++ 1196 | " switch (x) { inline 1 => 2, else => 3 }\n" ++ 1197 | "}\n"; 1198 | try std.testing.expect(try parseZigMini(src)); 1199 | } 1200 | 1201 | test "switch: multiple items with range" { 1202 | const src = 1203 | "fn f() void {\n" ++ 1204 | " const x = 0;\n" ++ 1205 | " switch (x) { 0, 2...4, 9 => 1, else => 0 }\n" ++ 1206 | "}\n"; 1207 | try std.testing.expect(try parseZigMini(src)); 1208 | } 1209 | --------------------------------------------------------------------------------