├── .gitignore ├── benchmarks └── dishwasher │ ├── build.zig.zon │ ├── src │ ├── gl2.xml │ └── main.zig │ └── build.zig ├── src ├── dishwasher.zig ├── stringify.zig ├── Scanner.zig ├── populate.zig └── parse.zig ├── LICENSE ├── README.md └── docs └── index.html /.gitignore: -------------------------------------------------------------------------------- 1 | .zig-cache 2 | zig-out 3 | 4 | benchmarks/dishwasher/src/mega.xml 5 | -------------------------------------------------------------------------------- /benchmarks/dishwasher/build.zig.zon: -------------------------------------------------------------------------------- 1 | .{ 2 | .name = .benchmarks_dishwasher, 3 | .version = "0.0.0", 4 | .fingerprint = 0xadb94a6b532fc55c, 5 | .dependencies = .{ .dishwasher = .{ .path = "../.." } }, 6 | .paths = .{ "build.zig", "build.zig.zon", "src" }, 7 | } 8 | -------------------------------------------------------------------------------- /src/dishwasher.zig: -------------------------------------------------------------------------------- 1 | pub const Scanner = @import("Scanner.zig"); 2 | pub const Populate = @import("populate.zig").Populate; 3 | 4 | pub const parse = @import("parse.zig"); 5 | pub const populate = @import("populate.zig"); 6 | 7 | test { 8 | _ = @import("Scanner.zig"); 9 | _ = @import("parse.zig"); 10 | _ = @import("populate.zig"); 11 | } 12 | -------------------------------------------------------------------------------- /benchmarks/dishwasher/src/gl2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Copyright 2013-2020 The Khronos Group Inc. 5 | SPDX-License-Identifier: Apache-2.0 6 | 7 | This file, gl.xml, is the OpenGL and OpenGL API Registry. The canonical 8 | version of the registry, together with documentation, schema, and Python 9 | generator scripts used to generate C header files for OpenGL and OpenGL ES, 10 | can always be found in the Khronos Registry at 11 | https://github.com/KhronosGroup/OpenGL-Registry 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /benchmarks/dishwasher/build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn build(b: *std.Build) void { 4 | const target = b.standardTargetOptions(.{}); 5 | const optimize = b.standardOptimizeOption(.{}); 6 | 7 | const dishwasher = b.dependency("dishwasher", .{}); 8 | 9 | const exe_mod = b.createModule(.{ 10 | .root_source_file = b.path("src/main.zig"), 11 | .target = target, 12 | .optimize = optimize, 13 | }); 14 | 15 | const exe = b.addExecutable(.{ 16 | .name = "dishwasher-benchmarks", 17 | .root_module = exe_mod, 18 | }); 19 | 20 | exe.root_module.addImport("dishwasher", dishwasher.module("dishwasher")); 21 | 22 | b.installArtifact(exe); 23 | 24 | const run_cmd = b.addRunArtifact(exe); 25 | 26 | run_cmd.step.dependOn(b.getInstallStep()); 27 | 28 | if (b.args) |args| { 29 | run_cmd.addArgs(args); 30 | } 31 | 32 | const run_step = b.step("run", "Run the app"); 33 | run_step.dependOn(&run_cmd.step); 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | 3 | Copyright 2024 edqx 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | 11 | -------------------------------------------------------------------------------- /benchmarks/dishwasher/src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const dishwasher = @import("dishwasher"); 3 | 4 | pub fn main() !void { 5 | var gpa = std.heap.GeneralPurposeAllocator(.{}){}; 6 | 7 | var times = std.ArrayList(usize).init(gpa.allocator()); 8 | defer times.deinit(); 9 | 10 | const fileData = try std.fs.cwd().readFileAlloc(gpa.allocator(), "./src/gl.xml", 1024 * 1024 * 200); 11 | defer gpa.allocator().free(fileData); 12 | 13 | for (0..100) |_| { 14 | const us1 = std.time.microTimestamp(); 15 | var ownedDocument = try dishwasher.parse.fromSlice(gpa.allocator(), fileData); 16 | defer ownedDocument.deinit(); 17 | const us2 = std.time.microTimestamp(); 18 | try times.append(@intCast(@divFloor(us2 - us1, 1000))); 19 | } 20 | 21 | const min = std.mem.min(usize, times.items); 22 | const max = std.mem.max(usize, times.items); 23 | var sum: usize = 0; 24 | for (times.items) |i| sum += i; 25 | const avg = @as(f32, @floatFromInt(sum)) / @as(f32, @floatFromInt(times.items.len)); 26 | 27 | try std.io.getStdOut().writer().print("min: {}ms, max: {}ms, avg: {d:2}ms\n", .{ min, max, avg }); 28 | } 29 | -------------------------------------------------------------------------------- /src/stringify.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const Tree = @import("./parse.zig").Tree; 3 | 4 | const Options = struct { 5 | const IndentStyle = union(enum) { 6 | none: void, 7 | tabs: void, 8 | spaces: usize, 9 | }; 10 | 11 | const TextFormattingStyle = union(enum) { 12 | verbatim: void, 13 | max_width: usize, 14 | }; 15 | 16 | indent_style: IndentStyle = .{ 17 | .spaces = 4, 18 | }, 19 | 20 | // Whether or not text content should be trimmed and aligned as if whitespace 21 | // was limited to 1 (one) space. 22 | format_text_content: TextFormattingStyle = .{ .max_width = 80 }, 23 | }; 24 | 25 | fn writeTextFormatted(indentation: usize, text: []const u8, max_width: usize) !void {} 26 | 27 | fn writeTreeImpl(tree: Tree, options: Options, writer: anytype, depth: usize) !void { 28 | for (tree.children) |child| { 29 | switch (child) { 30 | .elem => |elem_child| {}, 31 | .text => |text_child| { 32 | if (options.format_text_content) { 33 | writeTextFormatted(text_child.contents, options, writer); 34 | } else { 35 | writer.write(text_child.contents); 36 | } 37 | }, 38 | .comment => |comment_child| {}, 39 | } 40 | } 41 | } 42 | 43 | pub fn writeTree(tree: Tree, options: Options, writer: anytype) !void { 44 | try writeTreeImpl(tree, options, writer, 0); 45 | } 46 | 47 | test writeTree { 48 | var tree: Tree = .{ .children = &.{} }; 49 | var array = std.array_list.Managed(u8).init(std.testing.allocator); 50 | defer array.deinit(); 51 | 52 | writeTree(tree, .{}, array.writer()); 53 | } 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dishwasher 2 | 3 | A fairly fast XML parser for [Zig](https://ziglang.org). 4 | 5 | **For those on Zig 0.15.2, check out the [0.15.2](https://github.com/edqx/dishwasher/tree/0.15.2) branch** 6 | 7 | Note that this parser isn't strictly spec-compliant, however it will probably 8 | work with most well-formed xml documents. 9 | 10 | ## Features 11 | - [x] Pretty speedy 12 | - [x] Reader API-friendly 13 | - [x] Can populate structs 14 | - [x] Can populate dynamic values 15 | - [x] Compile-time parsing 16 | - [x] Diagnostics for malformed documents 17 | - [ ] Stringification (coming soon) 18 | 19 | ### Benchmarks 20 | 21 | Here are the results from the given benchmarks on my pc (i9-14900kf) in different 22 | optimisation modes, when parsing the [OpenGL XML Spec document](https://github.com/KhronosGroup/OpenGL-Registry/blob/main/xml/gl.xml) 23 | fully. 24 | 25 | | Mode | Min | Max | Avg | 26 | |------|-----|-----|-----| 27 | | `Debug` | `100ms` | `280ms` | `131ms` | 28 | | `ReleaseSafe` | `13ms` | `25ms` | `15ms` | 29 | | `ReleaseSmall` | `18ms` | `50ms` | `30ms` | 30 | | `ReleaseFast` | `7ms` | `27ms` | `13ms` | 31 | 32 | _All times are averaged over 100 runs, rounded to the nearest 2sf._ 33 | 34 | ## Documentation 35 | 36 | Generate documentation for dishwasher with `zig build docs`, will output browser files at `zig-out/docs`. You can serve 37 | this however you like, for example with Python: `python -m http.server` or with NodeJS: `npx serve`. 38 | 39 | ## Usage 40 | 41 | Dishwasher has 4 APIs, 3 of which will be most useful. 42 | 43 | - [Parsing API](#parsing-api) - for parsing an entire XML document at runtime. 44 | - [Populate API](#populate-api) - for mapping an XML document to a given struct. 45 | - [Comptime Parsing and Populate API](#comptime-parsing-and-populate-api) - for parsing an entire XML document at compile time. 46 | - [Scanner API](#scanner-api) - for iterating through XML symbols from a slice or reader. 47 | 48 | ### Parsing API 49 | Dishwasher lets you parse an XML document from either an entire slice or a 50 | reader into a tree-like structure that represents all nodes. 51 | 52 | All of the parse methods create an arena which is returned back to you so that 53 | you can deinitialise it when you no longer need the data. 54 | 55 | #### Parse from a slice 56 | ```zig 57 | const owned_tree = dishwasher.parse.fromSlice(allocator, xml_text); 58 | defer owned_tree.deinit(); // all strings and lists will be free'd 59 | 60 | std.debug.assert(owned_tree.tree.children[0] == .elem); 61 | ``` 62 | 63 | #### Parse from a reader 64 | ```zig 65 | const owned_tree = dishwasher.parse.fromReader(allocator, file.reader()); 66 | defer owned_tree.deinit(); 67 | 68 | std.debug.assert(owned_tree.tree.children[0] == .elem); 69 | ``` 70 | 71 | #### Diagnostics 72 | You can also get basic information about invalid documents using the parse 73 | diagnostics struct, and passing it into either `parse.fromSliceDiagnostics` 74 | or `parse.fromReaderDiagnostics`. 75 | ```zig 76 | var diagnostics = dishwasher.parse.Diagnostics.init(allocator); 77 | defer diagnostics.deinit(); 78 | 79 | const parsed = try dishwasher.parse.fromReaderDiagnostics(allocator, file.reader(), &diagnostics); 80 | defer parsed.deinit(); 81 | 82 | for (diagnostics.defects.items) |defect| { 83 | std.debug.print("{} from {}..{}", .{ defect.kind, defect.range.start, defect.range.end }); 84 | } 85 | ``` 86 | 87 | #### Tree API 88 | The returned tree has the following signature: 89 | ```zig 90 | const Tree = struct { 91 | pub const Node = union(enum) { 92 | pub const Elem = struct { 93 | pub const Attr = struct { 94 | name: []const u8, 95 | value: ?[]const u8, 96 | }; 97 | 98 | tag_name: []const u8, 99 | attributes: []const Attr, 100 | tree: ?Tree, 101 | 102 | // Get an attribute given its name. 103 | pub fn attributeByName(self: Elem, needle: []const u8) ?Attr; 104 | pub fn attr(self: Elem, needle: []const u8) ?Attr; 105 | 106 | // Get the value of an attribute given its name. Note that if the 107 | // attribute has no value, e.g.,