├── examples └── only_enum.proto ├── default.nix ├── .github └── workflows │ └── ci.yml ├── README.md ├── LICENSE └── src ├── main.zig ├── lib.zig ├── generator.zig ├── parser.zig └── tokenizer.zig /examples/only_enum.proto: -------------------------------------------------------------------------------- 1 | enum SomeKind { 2 | NONE = 0; 3 | A = 1; 4 | B = 2; 5 | C = 3; 6 | } 7 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | with (import (fetchTarball https://github.com/nixos/nixpkgs/archive/nixpkgs-unstable.tar.gz) {}); 2 | let 3 | inherit (pkgs) stdenv fetchFromGitHub; 4 | in stdenv.mkDerivation (rec { 5 | name = "protozig"; 6 | buildInputs = with pkgs; [ 7 | protobuf 8 | nanopb 9 | gdb 10 | zig 11 | zls 12 | ]; 13 | }) 14 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | name: Build and test 8 | runs-on: ${{ matrix.os }}-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | os: [macos, ubuntu, windows] 13 | 14 | steps: 15 | - name: Configure git 16 | if: matrix.os == 'windows' 17 | run: git config --global core.autocrlf false 18 | 19 | - name: Checkout 20 | uses: actions/checkout@v2 21 | 22 | - name: Setup Zig 23 | uses: goto-bus-stop/setup-zig@v1 24 | with: 25 | version: 0.9.0 26 | 27 | - name: Test 28 | run: zig build test 29 | 30 | - name: Build binary 31 | run: zig build install 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # protozig 2 | 3 | Ideally, this will be a complete implementation of [protobuf](https://developers.google.com/protocol-buffers/docs/overview) 4 | in [Zig](https://ziglang.org). 5 | 6 | Initially, my focus will be entirely on a standalone protobuf to Zig translator. The syntax version I'll be focusing 7 | on will be `proto3` (in the future, happy to add the previous `proto2` version too). 8 | 9 | ## How to... 10 | 11 | You will need at least [Zig v0.9.0](https://ziglang.org/download/) in your path. Alternatively, if you can use 12 | Nix, simply enter a new shell in the repo's root: 13 | 14 | ``` 15 | $ nix-shell 16 | ``` 17 | 18 | Building the `protozig` proto-to-zig translator: 19 | 20 | ``` 21 | $ zig build install 22 | ``` 23 | 24 | Running tests: 25 | 26 | ``` 27 | $ zig build test 28 | ``` 29 | 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Jakub Konka 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const fmt = std.fmt; 3 | const fs = std.fs; 4 | const io = std.io; 5 | const mem = std.mem; 6 | const process = std.process; 7 | 8 | const protozig = @import("lib.zig"); 9 | 10 | var gpa_alloc = std.heap.GeneralPurposeAllocator(.{}){}; 11 | const gpa = gpa_alloc.allocator(); 12 | 13 | const usage = 14 | \\Usage: protozig 15 | \\ 16 | \\General options: 17 | \\-h, --help Print this help and exit 18 | ; 19 | 20 | fn fatal(comptime format: []const u8, args: anytype) noreturn { 21 | exit: { 22 | const msg = fmt.allocPrint(gpa, "fatal: " ++ format, args) catch break :exit; 23 | defer gpa.free(msg); 24 | io.getStdErr().writeAll(msg) catch {}; 25 | } 26 | process.exit(1); 27 | } 28 | 29 | pub fn main() !void { 30 | var arena_allocator = std.heap.ArenaAllocator.init(gpa); 31 | defer arena_allocator.deinit(); 32 | const arena = arena_allocator.allocator(); 33 | 34 | const args = try std.process.argsAlloc(arena); 35 | if (args.len == 1) { 36 | fatal("no input file specified", .{}); 37 | } 38 | 39 | const stdout = io.getStdOut().writer(); 40 | const stderr = io.getStdErr().writer(); 41 | _ = stderr; 42 | if (mem.eql(u8, "-h", args[1]) or mem.eql(u8, "--help", args[1])) { 43 | try stdout.writeAll(usage); 44 | return; 45 | } 46 | 47 | const proto_file_path = args[1]; 48 | const proto_file = try fs.cwd().openFile(proto_file_path, .{}); 49 | defer proto_file.close(); 50 | const raw_contents = try proto_file.readToEndAlloc(arena, std.math.maxInt(u32)); 51 | 52 | const res = try protozig.generate(gpa, raw_contents); 53 | switch (res) { 54 | .ok => |code| { 55 | defer gpa.free(code); 56 | try stdout.writeAll(code); 57 | try stdout.writeByte('\n'); 58 | }, 59 | .err => |err_msg| { 60 | try stderr.writeAll(err_msg); 61 | try stderr.writeByte('\n'); 62 | }, 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/lib.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const mem = std.mem; 3 | const testing = std.testing; 4 | 5 | pub const generator = @import("generator.zig"); 6 | pub const parser = @import("parser.zig"); 7 | pub const tokenizer = @import("tokenizer.zig"); 8 | 9 | test { 10 | testing.refAllDecls(@This()); 11 | } 12 | 13 | const Allocator = mem.Allocator; 14 | const Generator = generator.Generator; 15 | const Parser = parser.Parser; 16 | const Scope = parser.Scope; 17 | const Tokenizer = tokenizer.Tokenizer; 18 | const Token = tokenizer.Token; 19 | const TokenIndex = tokenizer.TokenIndex; 20 | const TokenIterator = tokenizer.TokenIterator; 21 | 22 | pub const Result = union(enum) { 23 | ok: []const u8, 24 | err: []const u8, 25 | }; 26 | 27 | pub fn generate(gpa: Allocator, source: []const u8) !Result { 28 | var arena_alloc = std.heap.ArenaAllocator.init(gpa); 29 | defer arena_alloc.deinit(); 30 | const arena = arena_alloc.allocator(); 31 | 32 | var ttokenizer = Tokenizer{ .buffer = source }; 33 | var tokens = std.ArrayList(Token).init(arena); 34 | 35 | while (true) { 36 | const token = ttokenizer.next(); 37 | try tokens.append(token); 38 | if (token.id == .eof) break; 39 | } 40 | 41 | var token_it = TokenIterator{ .buffer = tokens.items }; 42 | var scope = Scope{}; 43 | var pparser = Parser{ 44 | .arena = arena, 45 | .token_it = &token_it, 46 | .scope = &scope, 47 | }; 48 | switch (try pparser.parse()) { 49 | .ok => {}, 50 | .err => |err_msg| { 51 | var msg = std.ArrayList(u8).init(gpa); 52 | defer msg.deinit(); 53 | 54 | const token = token_it.buffer[err_msg.loc]; 55 | // TODO restore the immediate parent scope for error handling 56 | const loc = try std.fmt.allocPrint(arena, "{s}\n", .{source[token.loc.start..token.loc.end]}); 57 | try msg.appendSlice(loc); 58 | try msg.appendSlice(err_msg.msg); 59 | 60 | return Result{ .err = msg.toOwnedSlice() }; 61 | }, 62 | } 63 | 64 | var code = std.ArrayList(u8).init(gpa); 65 | defer code.deinit(); 66 | 67 | var ggenerator = Generator{ 68 | .arena = arena, 69 | .buffer = source, 70 | .tokens = tokens.items, 71 | .parse_scope = &scope, 72 | }; 73 | switch (try ggenerator.generate(&code)) { 74 | .ok => {}, 75 | .err => |err_msg| { 76 | return Result{ .err = err_msg.msg }; 77 | }, 78 | } 79 | 80 | return Result{ .ok = code.toOwnedSlice() }; 81 | } 82 | -------------------------------------------------------------------------------- /src/generator.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const assert = std.debug.assert; 3 | const mem = std.mem; 4 | const parser = @import("parser.zig"); 5 | const tokenizer = @import("tokenizer.zig"); 6 | 7 | const Allocator = mem.Allocator; 8 | const Token = tokenizer.Token; 9 | const ParseNode = parser.Node; 10 | const ParseScope = parser.Scope; 11 | 12 | const ErrorMsg = struct { 13 | msg: []const u8, 14 | // loc: usize, 15 | }; 16 | 17 | const GenResult = union(enum) { 18 | ok: void, 19 | err: *ErrorMsg, 20 | }; 21 | 22 | const GenError = error{ 23 | OutOfMemory, 24 | GenFail, 25 | } || std.fmt.ParseIntError; 26 | 27 | pub const Generator = struct { 28 | arena: Allocator, 29 | buffer: []const u8, 30 | tokens: []const Token, 31 | parse_scope: *ParseScope, 32 | err_msg: ?*ErrorMsg = null, 33 | 34 | fn fail(gen: *Generator, comptime format: []const u8, args: anytype) GenError { 35 | assert(gen.err_msg == null); 36 | const err_msg = try gen.arena.create(ErrorMsg); 37 | err_msg.* = .{ .msg = try std.fmt.allocPrint(gen.arena, format, args) }; 38 | gen.err_msg = err_msg; 39 | return error.GenFail; 40 | } 41 | 42 | pub fn generate(gen: *Generator, code: *std.ArrayList(u8)) !GenResult { 43 | gen.generateInternal(code) catch |err| switch (err) { 44 | error.GenFail => { 45 | return GenResult{ .err = gen.err_msg.? }; 46 | }, 47 | else => |e| return e, 48 | }; 49 | return GenResult{ .ok = {} }; 50 | } 51 | 52 | fn generateInternal(gen: *Generator, code: *std.ArrayList(u8)) GenError!void { 53 | for (gen.parse_scope.nodes.items) |node| { 54 | switch (node) { 55 | .@"enum" => { 56 | // TODO verify at global scope that it wasn't redefined by any chance 57 | try gen.generateEnum(node, code); 58 | }, 59 | // else => { 60 | // return gen.fail("TODO unhandled node type: {s}", .{@tagName(node)}); 61 | // }, 62 | } 63 | } 64 | } 65 | 66 | fn generateEnum(gen: *Generator, node: ParseNode, code: *std.ArrayList(u8)) GenError!void { 67 | // TODO these should be methods on respective wrapper structs like `Ast` in zig, etc. 68 | const enum_name_tok = gen.tokens[node.@"enum".name]; 69 | const enum_name = gen.buffer[enum_name_tok.loc.start..enum_name_tok.loc.end]; 70 | const writer = code.writer(); 71 | try writer.print("pub const {s} = enum {{\n", .{enum_name}); 72 | 73 | var field_names = std.StringHashMap(void).init(gen.arena); 74 | var field_values = std.AutoHashMap(usize, void).init(gen.arena); 75 | 76 | for (node.@"enum".fields.items) |field| { 77 | const field_name_tok = gen.tokens[field[0]]; 78 | const field_name = gen.buffer[field_name_tok.loc.start..field_name_tok.loc.end]; 79 | 80 | if (field_names.contains(field_name)) { 81 | return gen.fail("variant '{s}' already defined", .{field_name}); 82 | } 83 | try field_names.putNoClobber(field_name, {}); 84 | 85 | const field_value_tok = gen.tokens[field[1]]; 86 | const field_value = try std.fmt.parseInt( 87 | usize, 88 | gen.buffer[field_value_tok.loc.start..field_value_tok.loc.end], 89 | 10, 90 | ); 91 | 92 | if (field_values.contains(field_value)) { 93 | return gen.fail("value '{d}' already assigned to a variant", .{field_value}); 94 | } 95 | try field_values.putNoClobber(field_value, {}); 96 | 97 | try writer.print(" {s} = {d},\n", .{ field_name, field_value }); 98 | } 99 | 100 | try writer.writeAll("};"); 101 | } 102 | }; 103 | -------------------------------------------------------------------------------- /src/parser.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const assert = std.debug.assert; 3 | const log = std.log; 4 | const mem = std.mem; 5 | const tokenizer = @import("tokenizer.zig"); 6 | 7 | const Allocator = mem.Allocator; 8 | const TokenIndex = tokenizer.TokenIndex; 9 | const TokenIterator = tokenizer.TokenIterator; 10 | const Tokenizer = tokenizer.Tokenizer; 11 | const Token = tokenizer.Token; 12 | 13 | pub const Scope = struct { 14 | nodes: std.ArrayListUnmanaged(Node) = .{}, 15 | }; 16 | 17 | const Loc = struct { 18 | start: TokenIndex, 19 | end: TokenIndex, 20 | }; 21 | 22 | pub const Node = union(enum) { 23 | @"enum": EnumNode, 24 | }; 25 | 26 | const EnumNode = struct { 27 | loc: Loc, 28 | name: TokenIndex, 29 | fields: std.ArrayListUnmanaged(FieldTuple) = .{}, 30 | 31 | const FieldTuple = std.meta.Tuple(&[_]type{ TokenIndex, TokenIndex }); 32 | }; 33 | 34 | const ParseError = error{ 35 | OutOfMemory, 36 | ParseFail, 37 | }; 38 | 39 | const ParseResult = union(enum) { 40 | ok: void, 41 | err: *ErrorMsg, 42 | }; 43 | 44 | const ErrorMsg = struct { 45 | msg: []const u8, 46 | loc: TokenIndex, 47 | }; 48 | 49 | pub const Parser = struct { 50 | arena: Allocator, 51 | token_it: *TokenIterator, 52 | scope: *Scope, 53 | err_msg: ?*ErrorMsg = null, 54 | 55 | fn fail(parser: *Parser, comptime format: []const u8, args: anytype) ParseError { 56 | assert(parser.err_msg == null); 57 | const err_msg = try parser.arena.create(ErrorMsg); 58 | err_msg.* = .{ 59 | .msg = try std.fmt.allocPrint(parser.arena, format, args), 60 | .loc = parser.token_it.pos, 61 | }; 62 | parser.err_msg = err_msg; 63 | return error.ParseFail; 64 | } 65 | 66 | pub fn parse(parser: *Parser) !ParseResult { 67 | parser.parseInternal() catch |err| switch (err) { 68 | error.ParseFail => { 69 | return ParseResult{ .err = parser.err_msg.? }; 70 | }, 71 | else => |e| return e, 72 | }; 73 | return ParseResult{ .ok = {} }; 74 | } 75 | 76 | fn parseInternal(parser: *Parser) ParseError!void { 77 | while (true) { 78 | const pos = parser.token_it.pos; 79 | const token = parser.token_it.next(); 80 | if (token.id == .eof) break; 81 | 82 | switch (token.id) { 83 | .keyword_enum => { 84 | try parser.parseEnum(pos); 85 | }, 86 | else => { 87 | return parser.fail("TODO unhandled token", .{}); 88 | }, 89 | } 90 | } 91 | } 92 | 93 | fn parseEnum(parser: *Parser, start: TokenIndex) ParseError!void { 94 | var enum_node = EnumNode{ 95 | .loc = .{ 96 | .start = start, 97 | .end = undefined, 98 | }, 99 | .name = undefined, 100 | }; 101 | enum_node.name = try parser.expectToken(.identifier); 102 | _ = try parser.expectToken(.l_brace); 103 | 104 | while (true) { 105 | const pos = parser.token_it.pos; 106 | const token = parser.token_it.next(); 107 | 108 | switch (token.id) { 109 | .identifier => { 110 | const field_name = pos; 111 | _ = try parser.expectToken(.equal); 112 | const field_value = try parser.expectToken(.int_literal); 113 | _ = try parser.expectToken(.semicolon); 114 | try enum_node.fields.append(parser.arena, .{ field_name, field_value }); 115 | }, 116 | .r_brace => { 117 | enum_node.loc.end = pos; 118 | break; 119 | }, 120 | else => { 121 | return parser.fail("unexpected token: {}", .{token.id}); 122 | }, 123 | } 124 | } 125 | 126 | // log.debug("enum := {}", .{enum_node}); 127 | 128 | try parser.scope.nodes.append(parser.arena, Node{ 129 | .@"enum" = enum_node, 130 | }); 131 | } 132 | 133 | fn expectToken(parser: *Parser, id: Token.Id) ParseError!TokenIndex { 134 | const pos = parser.token_it.pos; 135 | _ = parser.token_it.peek() orelse return parser.fail("unexpected end of file", .{}); 136 | const token = parser.token_it.next(); 137 | if (token.id == id) { 138 | return pos; 139 | } else { 140 | parser.token_it.seekTo(pos); 141 | return parser.fail("wrong token: expected {}, found {}", .{ 142 | id, token.id, 143 | }); 144 | } 145 | } 146 | }; 147 | -------------------------------------------------------------------------------- /src/tokenizer.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log; 3 | const testing = std.testing; 4 | 5 | pub const Token = struct { 6 | id: Id, 7 | loc: Loc, 8 | 9 | pub const Loc = struct { 10 | start: usize, 11 | end: usize, 12 | }; 13 | 14 | pub const keywords = std.ComptimeStringMap(Id, .{ 15 | .{ "enum", .keyword_enum }, 16 | .{ "message", .keyword_message }, 17 | .{ "repeated", .keyword_repeated }, 18 | .{ "oneof", .keyword_oneof }, 19 | .{ "syntax", .keyword_syntax }, 20 | .{ "package", .keyword_package }, 21 | .{ "import", .keyword_import }, 22 | }); 23 | 24 | pub fn getKeyword(bytes: []const u8) ?Id { 25 | return keywords.get(bytes); 26 | } 27 | 28 | pub const Id = enum { 29 | // zig fmt: off 30 | eof, 31 | 32 | invalid, 33 | l_brace, // { 34 | r_brace, // } 35 | l_sbrace, // [ 36 | r_sbrace, // ] 37 | l_paren, // ( 38 | r_paren, // ) 39 | dot, // . 40 | comma, // , 41 | semicolon, // ; 42 | equal, // = 43 | 44 | string_literal, // "something" 45 | int_literal, // 1 46 | identifier, // ident 47 | 48 | keyword_enum, // enum { ... } 49 | keyword_message, // message { ... } 50 | keyword_repeated, // repeated Type field = 5; 51 | keyword_oneof, // oneof { ... } 52 | keyword_syntax, // syntax = "proto3"; 53 | keyword_package, // package my_pkg; 54 | keyword_import, // import "other.proto"; 55 | // zig fmt: on 56 | }; 57 | }; 58 | 59 | pub const TokenIndex = usize; 60 | 61 | pub const TokenIterator = struct { 62 | buffer: []const Token, 63 | pos: TokenIndex = 0, 64 | 65 | pub fn next(self: *TokenIterator) Token { 66 | const token = self.buffer[self.pos]; 67 | self.pos += 1; 68 | return token; 69 | } 70 | 71 | pub fn peek(self: TokenIterator) ?Token { 72 | if (self.pos >= self.buffer.len) return null; 73 | return self.buffer[self.pos]; 74 | } 75 | 76 | pub fn reset(self: *TokenIterator) void { 77 | self.pos = 0; 78 | } 79 | 80 | pub fn seekTo(self: *TokenIterator, pos: TokenIndex) void { 81 | self.pos = pos; 82 | } 83 | 84 | pub fn seekBy(self: *TokenIterator, offset: isize) void { 85 | const new_pos = @bitCast(isize, self.pos) + offset; 86 | if (new_pos < 0) { 87 | self.pos = 0; 88 | } else { 89 | self.pos = @intCast(usize, new_pos); 90 | } 91 | } 92 | }; 93 | 94 | pub const Tokenizer = struct { 95 | buffer: []const u8, 96 | index: usize = 0, 97 | 98 | pub fn next(self: *Tokenizer) Token { 99 | var result = Token{ 100 | .id = .eof, 101 | .loc = .{ 102 | .start = self.index, 103 | .end = undefined, 104 | }, 105 | }; 106 | 107 | var state: union(enum) { 108 | start, 109 | identifier, 110 | string_literal, 111 | int_literal, 112 | slash, 113 | line_comment, 114 | multiline_comment, 115 | multiline_comment_end, 116 | } = .start; 117 | 118 | while (self.index < self.buffer.len) : (self.index += 1) { 119 | const c = self.buffer[self.index]; 120 | switch (state) { 121 | .start => switch (c) { 122 | ' ', '\t', '\n', '\r' => { 123 | result.loc.start = self.index + 1; 124 | }, 125 | 'a'...'z', 'A'...'Z', '_' => { 126 | state = .identifier; 127 | result.id = .identifier; 128 | }, 129 | '{' => { 130 | result.id = .l_brace; 131 | self.index += 1; 132 | break; 133 | }, 134 | '}' => { 135 | result.id = .r_brace; 136 | self.index += 1; 137 | break; 138 | }, 139 | '[' => { 140 | result.id = .l_sbrace; 141 | self.index += 1; 142 | break; 143 | }, 144 | ']' => { 145 | result.id = .r_sbrace; 146 | self.index += 1; 147 | break; 148 | }, 149 | '(' => { 150 | result.id = .l_paren; 151 | self.index += 1; 152 | break; 153 | }, 154 | ')' => { 155 | result.id = .r_paren; 156 | self.index += 1; 157 | break; 158 | }, 159 | ';' => { 160 | result.id = .semicolon; 161 | self.index += 1; 162 | break; 163 | }, 164 | '.' => { 165 | result.id = .dot; 166 | self.index += 1; 167 | break; 168 | }, 169 | ',' => { 170 | result.id = .comma; 171 | self.index += 1; 172 | break; 173 | }, 174 | '0'...'9' => { 175 | state = .int_literal; 176 | result.id = .int_literal; 177 | }, 178 | '=' => { 179 | result.id = .equal; 180 | self.index += 1; 181 | break; 182 | }, 183 | '/' => { 184 | state = .slash; 185 | }, 186 | '"' => { 187 | result.id = .string_literal; 188 | state = .string_literal; 189 | }, 190 | else => { 191 | result.id = .invalid; 192 | result.loc.end = self.index; 193 | self.index += 1; 194 | return result; 195 | }, 196 | }, 197 | .slash => switch (c) { 198 | '/' => { 199 | state = .line_comment; 200 | }, 201 | '*' => { 202 | state = .multiline_comment; 203 | }, 204 | else => { 205 | result.id = .invalid; 206 | self.index += 1; 207 | break; 208 | }, 209 | }, 210 | .line_comment => switch (c) { 211 | '\n' => { 212 | state = .start; 213 | result.loc.start = self.index + 1; 214 | }, 215 | else => {}, 216 | }, 217 | .multiline_comment => switch (c) { 218 | '*' => { 219 | state = .multiline_comment_end; 220 | }, 221 | else => {}, 222 | }, 223 | .multiline_comment_end => switch (c) { 224 | '/' => { 225 | state = .start; 226 | result.loc.start = self.index + 1; 227 | }, 228 | else => { 229 | state = .multiline_comment; 230 | }, 231 | }, 232 | .identifier => switch (c) { 233 | 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, 234 | else => { 235 | if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| { 236 | result.id = id; 237 | } 238 | break; 239 | }, 240 | }, 241 | .int_literal => switch (c) { 242 | '0'...'9' => {}, 243 | else => { 244 | break; 245 | }, 246 | }, 247 | .string_literal => switch (c) { 248 | '"' => { 249 | self.index += 1; 250 | break; 251 | }, 252 | else => {}, // TODO validate characters/encoding 253 | }, 254 | } 255 | } 256 | 257 | if (result.id == .eof) { 258 | result.loc.start = self.index; 259 | } 260 | 261 | result.loc.end = self.index; 262 | return result; 263 | } 264 | }; 265 | 266 | fn testExpected(source: []const u8, expected: []const Token.Id) !void { 267 | var tokenizer = Tokenizer{ 268 | .buffer = source, 269 | }; 270 | for (expected) |exp, i| { 271 | const token = tokenizer.next(); 272 | if (exp != token.id) { 273 | const stderr = std.io.getStdErr().writer(); 274 | try stderr.print("Tokens don't match: (exp) {} != (giv) {} at pos {d}\n", .{ exp, token.id, i + 1 }); 275 | return error.TestExpectedEqual; 276 | } 277 | try testing.expectEqual(exp, token.id); 278 | } 279 | } 280 | 281 | test "simple enum" { 282 | try testExpected( 283 | \\/* 284 | \\ * Some cool kind 285 | \\ */ 286 | \\enum SomeKind 287 | \\{ 288 | \\ // This generally means none 289 | \\ NONE = 0; 290 | \\ // This means A 291 | \\ // and only A 292 | \\ A = 1; 293 | \\ /* B * * * * */ 294 | \\ B = 2; 295 | \\ // And this one is just a C 296 | \\ C = 3; 297 | \\} 298 | , &[_]Token.Id{ 299 | // zig fmt: off 300 | .keyword_enum, .identifier, 301 | .l_brace, 302 | .identifier, .equal, .int_literal, .semicolon, 303 | .identifier, .equal, .int_literal, .semicolon, 304 | .identifier, .equal, .int_literal, .semicolon, 305 | .identifier, .equal, .int_literal, .semicolon, 306 | .r_brace, 307 | // zig fmt: on 308 | }); 309 | } 310 | 311 | test "simple enum - weird formatting" { 312 | try testExpected( 313 | \\enum SomeKind { NONE = 0; 314 | \\A = 1; 315 | \\ B = 2; C = 3; 316 | \\} 317 | , &[_]Token.Id{ 318 | // zig fmt: off 319 | .keyword_enum, .identifier, 320 | .l_brace, 321 | .identifier, .equal, .int_literal, .semicolon, 322 | .identifier, .equal, .int_literal, .semicolon, 323 | .identifier, .equal, .int_literal, .semicolon, 324 | .identifier, .equal, .int_literal, .semicolon, 325 | .r_brace, 326 | // zig fmt: on 327 | }); 328 | } 329 | 330 | test "simple message" { 331 | try testExpected( 332 | \\message MyMessage 333 | \\{ 334 | \\ Ptr ptr_field = 1; 335 | \\ int32 ptr_len = 2; 336 | \\} 337 | , &[_]Token.Id{ 338 | // zig fmt: off 339 | .keyword_message, .identifier, 340 | .l_brace, 341 | .identifier, .identifier, .equal, .int_literal, .semicolon, 342 | .identifier, .identifier, .equal, .int_literal, .semicolon, 343 | .r_brace, 344 | // zig fmt: on 345 | }); 346 | } 347 | 348 | test "full proto spec file" { 349 | try testExpected( 350 | \\// autogen by super_proto_gen.py 351 | \\ 352 | \\syntax = "proto3"; 353 | \\ 354 | \\package my_pkg; 355 | \\ 356 | \\import "another.proto"; 357 | \\ 358 | \\message MsgA { 359 | \\ int32 field_1 = 1; 360 | \\ repeated Msg msgs = 2 [(nanopb).type=FT_POINTER]; 361 | \\} 362 | \\ 363 | \\// Tagged union y'all! 364 | \\message Msg { 365 | \\ oneof msg { 366 | \\ MsgA msg_a = 1 [json_name="msg_a"]; 367 | \\ MsgB msg_b = 2 [ json_name = "msg_b" ]; 368 | \\ } 369 | \\} 370 | \\ 371 | \\/* 372 | \\ * Message B 373 | \\ */ 374 | \\message MsgB { 375 | \\ // Some kind 376 | \\ Kind kind = 1; 377 | \\ // If the message is valid 378 | \\ bool valid = 2; 379 | \\} 380 | \\ 381 | \\enum Kind { 382 | \\ KIND_NONE = 0; 383 | \\ KIND_A = 1; 384 | \\ KIND_B = 2; 385 | \\} 386 | , &[_]Token.Id{ 387 | // zig fmt: off 388 | 389 | .keyword_syntax, .equal, .string_literal, .semicolon, 390 | 391 | .keyword_package, .identifier, .semicolon, 392 | 393 | .keyword_import, .string_literal, .semicolon, 394 | 395 | .keyword_message, .identifier, 396 | .l_brace, 397 | .identifier, .identifier, .equal, .int_literal, .semicolon, 398 | .keyword_repeated, .identifier, .identifier, .equal, .int_literal, .l_sbrace, .l_paren, .identifier, .r_paren, .dot, .identifier, .equal, .identifier, .r_sbrace, .semicolon, 399 | .r_brace, 400 | 401 | .keyword_message, .identifier, 402 | .l_brace, 403 | .keyword_oneof, .identifier, 404 | .l_brace, 405 | .identifier, .identifier, .equal, .int_literal, .l_sbrace, .identifier, .equal, .string_literal, .r_sbrace, .semicolon, 406 | .identifier, .identifier, .equal, .int_literal, .l_sbrace, .identifier, .equal, .string_literal, .r_sbrace, .semicolon, 407 | .r_brace, 408 | .r_brace, 409 | 410 | .keyword_message, .identifier, 411 | .l_brace, 412 | .identifier, .identifier, .equal, .int_literal, .semicolon, 413 | .identifier, .identifier, .equal, .int_literal, .semicolon, 414 | .r_brace, 415 | 416 | .keyword_enum, .identifier, 417 | .l_brace, 418 | .identifier, .equal, .int_literal, .semicolon, 419 | .identifier, .equal, .int_literal, .semicolon, 420 | .identifier, .equal, .int_literal, .semicolon, 421 | .r_brace, 422 | 423 | // zig fmt: on 424 | }); 425 | } 426 | --------------------------------------------------------------------------------