├── assembler
├── .gitignore
├── README.md
├── src
│ ├── instruction-types.zig
│ ├── instruction-getters.zig
│ └── main.zig
├── build.zig
└── build.zig.zon
├── README.md
├── compiler
├── .gitignore
├── README.md
├── build.zig
├── src
│ ├── frontend
│ │ ├── tokens.zig
│ │ ├── semantic-analysis.zig
│ │ ├── semantic
│ │ │ ├── loop-labeling.zig
│ │ │ ├── type-checking.zig
│ │ │ └── identifier-resolution.zig
│ │ ├── lexer.zig
│ │ └── parser.zig
│ ├── diagnostics.zig
│ ├── ast
│ │ ├── c.zig
│ │ └── asm.zig
│ ├── backend
│ │ └── emission.zig
│ ├── middleend
│ │ ├── register-allocator.zig
│ │ └── gen.zig
│ ├── prettyprinter.zig
│ ├── testing.zig
│ └── main.zig
└── build.zig.zon
├── run.sh
├── sh
├── assembler.sh
├── compiler.sh
└── sim.sh
├── .gitignore
├── sim.sv
├── src
├── memory.sv
├── program_counter.sv
├── register_file.sv
├── instruction_memory.sv
├── top.sv
├── alu.sv
└── decoder.sv
├── flake.nix
├── tests.sh
└── flake.lock
/assembler/.gitignore:
--------------------------------------------------------------------------------
1 | /.zig-cache
2 | zig-out
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
Dragonfruit
3 |
--------------------------------------------------------------------------------
/compiler/.gitignore:
--------------------------------------------------------------------------------
1 | .zig-cache
2 | zig-out
3 | sb.lox
4 | sb.c
5 | sb.asm
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | DEBUG=${DEBUG:-0}
6 |
7 | sh sh/compiler.sh
8 | sh sh/assembler.sh
9 | sh sh/sim.sh
--------------------------------------------------------------------------------
/sh/assembler.sh:
--------------------------------------------------------------------------------
1 | rm -f program
2 |
3 | cd assembler
4 | zig build
5 | cd ..
6 |
7 | ./assembler/zig-out/bin/assembler program.asm program
--------------------------------------------------------------------------------
/sh/compiler.sh:
--------------------------------------------------------------------------------
1 | DEBUG=${DEBUG:-0}
2 |
3 | cd compiler
4 | zig build
5 | cd ..
6 |
7 | DEBUG=$DEBUG ./compiler/zig-out/bin/compiler program.c
--------------------------------------------------------------------------------
/sh/sim.sh:
--------------------------------------------------------------------------------
1 | rm -rf build
2 | rm -f *.vcd
3 | mkdir -p build
4 |
5 | iverilog -g2012 -o "build/sim_test" src/*.sv "sim.sv"
6 | vvp "build/sim_test"
7 | gtkwave sim.vcd
8 |
9 | rm -rf build
10 | rm sim.vcd
--------------------------------------------------------------------------------
/assembler/README.md:
--------------------------------------------------------------------------------
1 | # RV32I Assembler in zig
2 |
3 | Part of [RV32I](https://github.com/oxrinz/rv32i)
4 |
5 | Fully working and fleshed out RV32I assembler
6 |
7 | Usage:
8 | ```bash
9 | zig build run -- out.asm program
10 | ```
11 |
12 | First argument is input, second is output
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 |
3 | program
4 | program.asm
5 | program.c
6 |
7 | OpenTimer
8 |
9 | **/.zig-cache/
10 | **/zig-out/
11 | **/release/
12 | **/debug/
13 | **/build/
14 | **/build-*/
15 |
16 | # some temp files that might accidentally get leaved in, don't remove even if they don't exist
17 | *.vcd
18 | *.vvp
19 | out
20 | ROADMAP.md
21 | temp
22 |
--------------------------------------------------------------------------------
/sim.sv:
--------------------------------------------------------------------------------
1 | module sim;
2 |
3 | reg clk;
4 | wire [31:0] pc_out;
5 | wire [31:0] instr;
6 |
7 | top dut (.clk(clk));
8 |
9 | initial begin
10 | clk = 0;
11 | forever #1 clk = ~clk;
12 | end
13 |
14 | initial begin
15 | $dumpfile("sim.vcd");
16 | $dumpvars(0, sim);
17 |
18 | #1200;
19 |
20 | $display("Simulation completed successfully");
21 | $finish;
22 | end
23 |
24 | endmodule
25 |
--------------------------------------------------------------------------------
/src/memory.sv:
--------------------------------------------------------------------------------
1 | module memory(
2 | input wire clk,
3 | input wire [31:0] addr,
4 | input wire [31:0] data,
5 | input wire read,
6 | input wire write,
7 | output reg [31:0] data_out
8 | );
9 | reg [31:0] memory[0:1023];
10 |
11 | always @(*) begin
12 | if (read == 1) begin
13 | data_out = memory[addr];
14 | end
15 |
16 | if (write == 1) begin
17 | memory[addr] = data;
18 | end
19 | end
20 |
21 | endmodule
--------------------------------------------------------------------------------
/src/program_counter.sv:
--------------------------------------------------------------------------------
1 | module program_counter (
2 | input wire clk,
3 | input wire rst,
4 | input wire enable,
5 | input wire load,
6 | input wire [31:0] addr,
7 | output reg [31:0] pc
8 | );
9 |
10 | always @(posedge clk or posedge rst) begin
11 | if (rst) begin
12 | pc <= 32'b0;
13 | end else if (enable) begin
14 | if (load) begin
15 | pc <= addr;
16 | end else begin
17 | pc <= pc + 32'd1;
18 | end
19 | end
20 | end
21 |
22 | endmodule
23 |
--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
1 | {
2 | inputs = {
3 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
4 | flake-utils.url = "github:numtide/flake-utils";
5 | };
6 |
7 | outputs = { self, nixpkgs, flake-utils }:
8 | flake-utils.lib.eachDefaultSystem (system:
9 | let pkgs = nixpkgs.legacyPackages.${system};
10 | in {
11 | devShells.default = pkgs.mkShell {
12 | buildInputs = with pkgs; [
13 | zig
14 | iverilog
15 | gtkwave
16 | yosys
17 | tcl
18 |
19 | gnumake
20 | gcc
21 | ];
22 |
23 | shellHook = ''
24 | export TCL_TCLSH=${pkgs.tcl}/bin/tclsh
25 | '';
26 | };
27 | });
28 | }
29 |
--------------------------------------------------------------------------------
/compiler/README.md:
--------------------------------------------------------------------------------
1 | # RV32I Compiler
2 |
3 | Compiles a silly made up language (c) to RV32I assembly. To be used with [rv32i-asm](https://github.com/oxrinz/rv32i-asm). Part of [rv32i](https://github.com/oxrinz/rv32i)
4 |
5 | ---
6 |
7 | ## Currently supports
8 | - [x] Precedence climbing
9 | - [x] Add, sub, mul, div
10 | - [x] Bitwise ops
11 | - [x] Logical ops
12 | - [x] Short-circuiting
13 | - [x] Variables
14 | - [x] In-place operators
15 | - [x] If statements
16 | - [ ] ? operator (probably won't be implemented)
17 | - [x] Correct scoping
18 | - [x] Loops
19 | - [ ] Switch cases (also most likely won't be implemented)
20 | - [ ] Functions
21 | - [ ] Multiple files
22 | - [ ] Long int
23 | - [ ] Unsigned int
24 | - [ ] Pointers
25 | - [ ] Arrays
26 | - [ ] Strings & characters
27 | - [ ] Memory allocation
28 | - [ ] Structs
--------------------------------------------------------------------------------
/src/register_file.sv:
--------------------------------------------------------------------------------
1 | module register_file (
2 | input clk,
3 | input rst_n,
4 |
5 | input [ 4:0] rs1_addr,
6 | input [ 4:0] rs2_addr,
7 | output [31:0] rs1_data,
8 | output [31:0] rs2_data,
9 |
10 | input we,
11 | input [ 4:0] rd_addr,
12 | input [31:0] rd_data
13 | );
14 |
15 | reg [31:0] registers[31:0];
16 | reg [ 2:0] flags [ 2:0];
17 |
18 | assign rs1_data = (rs1_addr == 0) ? 32'b0 : registers[rs1_addr];
19 | assign rs2_data = (rs2_addr == 0) ? 32'b0 : registers[rs2_addr];
20 |
21 | always @(posedge clk or negedge rst_n) begin
22 | if (!rst_n) begin
23 | integer i;
24 | for (i = 0; i < 32; i = i + 1) begin
25 | registers[i] <= 32'b0;
26 | end
27 |
28 | registers[2] = 32'h3FF;
29 |
30 | end else if (we && rd_addr != 0) begin
31 | registers[rd_addr] <= rd_data;
32 | end
33 | end
34 |
35 | endmodule
36 |
--------------------------------------------------------------------------------
/assembler/src/instruction-types.zig:
--------------------------------------------------------------------------------
1 | pub const RTypeInstruction = enum {
2 | ADD,
3 | SUB,
4 | SLL,
5 | SLT,
6 | SLTU,
7 | XOR,
8 | SRL,
9 | SRA,
10 | OR,
11 | AND,
12 |
13 | // m extension
14 | MUL,
15 | MULH,
16 | MULSU,
17 | MULU,
18 | DIV,
19 | DIVU,
20 | REM,
21 | REMU,
22 | };
23 |
24 | pub const ITypeInstruction = enum {
25 | ADDI,
26 | SLTI,
27 | SLTIU,
28 | XORI,
29 | ORI,
30 | ANDI,
31 | SLLI,
32 | SRLI,
33 | SRAI,
34 | LB,
35 | LH,
36 | LW,
37 | LBU,
38 | LHU,
39 | JALR,
40 | };
41 |
42 | pub const STypeInstruction = enum {
43 | SB,
44 | SH,
45 | SW,
46 | };
47 |
48 | pub const BTypeInstruction = enum {
49 | BEQ,
50 | BNE,
51 | BLT,
52 | BGE,
53 | BLTU,
54 | BGEU,
55 | };
56 |
57 | pub const UTypeInstruction = enum {
58 | LUI,
59 | AUIPC,
60 | };
61 |
62 | pub const JTypeInstruction = enum {
63 | JAL,
64 | };
65 |
--------------------------------------------------------------------------------
/assembler/build.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 |
3 | pub fn build(b: *std.Build) void {
4 | const target = b.standardTargetOptions(.{});
5 | const optimize = b.standardOptimizeOption(.{});
6 |
7 | const exe = b.addExecutable(.{
8 | .name = "assembler",
9 | .root_source_file = b.path("src/main.zig"),
10 | .target = target,
11 | .optimize = optimize,
12 | });
13 |
14 | b.installArtifact(exe);
15 |
16 | const run_cmd = b.addRunArtifact(exe);
17 |
18 | run_cmd.step.dependOn(b.getInstallStep());
19 |
20 | if (b.args) |args| {
21 | run_cmd.addArgs(args);
22 | }
23 |
24 | const run_step = b.step("run", "Run the app");
25 | run_step.dependOn(&run_cmd.step);
26 |
27 | const exe_unit_tests = b.addTest(.{
28 | .root_source_file = b.path("src/main.zig"),
29 | .target = target,
30 | .optimize = optimize,
31 | });
32 |
33 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
34 |
35 | const test_step = b.step("test", "Run unit tests");
36 | test_step.dependOn(&run_exe_unit_tests.step);
37 | }
38 |
--------------------------------------------------------------------------------
/tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "${DEBUG}" = "1" ]; then
4 | IVERILOG_FLAGS="-DDEBUG"
5 | else
6 | IVERILOG_FLAGS=""
7 | fi
8 |
9 | run_tests() {
10 | local component=$1
11 | echo -e "\n================================\nRunning ${component} tests..."
12 | cd $component
13 | zig build test
14 | local test_result=$?
15 | cd ..
16 | if [ $test_result -eq 0 ]; then
17 | echo "✓ ${component} tests passed"
18 | else
19 | echo "✗ ${component} tests failed with exit code ${test_result}"
20 | fi
21 | echo -e "================================"
22 | return $test_result
23 | }
24 |
25 |
26 | overall_status=0
27 | run_tests "compiler"
28 | compiler_status=$?
29 | overall_status=$((overall_status + compiler_status))
30 | run_tests "assembler"
31 | assembler_status=$?
32 | overall_status=$((overall_status + assembler_status))
33 |
34 | rm -rf test_results
35 | rm -rf build
36 | echo -e "\nTest Summary:"
37 | echo "Compiler tests: $([ $compiler_status -eq 0 ] && echo "PASSED" || echo "FAILED")"
38 | echo "Assembler tests: $([ $assembler_status -eq 0 ] && echo "PASSED" || echo "FAILED")"
39 | exit $overall_status
--------------------------------------------------------------------------------
/compiler/build.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 |
3 | pub fn build(b: *std.Build) void {
4 | const target = b.standardTargetOptions(.{});
5 | const optimize = b.standardOptimizeOption(.{});
6 |
7 | const exe = b.addExecutable(.{
8 | .name = "compiler",
9 | .root_source_file = b.path("src/main.zig"),
10 | .target = target,
11 | .optimize = optimize,
12 | });
13 |
14 | b.installArtifact(exe);
15 |
16 | const run_cmd = b.addRunArtifact(exe);
17 |
18 | run_cmd.step.dependOn(b.getInstallStep());
19 |
20 | if (b.args) |args| {
21 | run_cmd.addArgs(args);
22 | }
23 |
24 | const run_step = b.step("run", "Run the app");
25 | run_step.dependOn(&run_cmd.step);
26 |
27 | const exe_unit_tests = b.addTest(.{
28 | .root_source_file = b.path("src/main.zig"),
29 | .target = target,
30 | .optimize = optimize,
31 | });
32 |
33 | const testing_module = b.addModule("testing", .{
34 | .root_source_file = .{ .cwd_relative = "src/testing.zig" },
35 | });
36 |
37 | exe_unit_tests.root_module.addImport("testing", testing_module);
38 |
39 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
40 |
41 | const test_step = b.step("test", "Run unit tests");
42 | test_step.dependOn(&run_exe_unit_tests.step);
43 | }
44 |
--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": {
3 | "flake-utils": {
4 | "inputs": {
5 | "systems": "systems"
6 | },
7 | "locked": {
8 | "lastModified": 1731533236,
9 | "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
10 | "owner": "numtide",
11 | "repo": "flake-utils",
12 | "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
13 | "type": "github"
14 | },
15 | "original": {
16 | "owner": "numtide",
17 | "repo": "flake-utils",
18 | "type": "github"
19 | }
20 | },
21 | "nixpkgs": {
22 | "locked": {
23 | "lastModified": 1738961098,
24 | "narHash": "sha256-yWNBf6VDW38tl179FEuJ0qukthVfB02kv+mRsfUsWC0=",
25 | "owner": "NixOS",
26 | "repo": "nixpkgs",
27 | "rev": "a3eaf5e8eca7cab680b964138fb79073704aca75",
28 | "type": "github"
29 | },
30 | "original": {
31 | "owner": "NixOS",
32 | "ref": "nixos-unstable",
33 | "repo": "nixpkgs",
34 | "type": "github"
35 | }
36 | },
37 | "root": {
38 | "inputs": {
39 | "flake-utils": "flake-utils",
40 | "nixpkgs": "nixpkgs"
41 | }
42 | },
43 | "systems": {
44 | "locked": {
45 | "lastModified": 1681028828,
46 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
47 | "owner": "nix-systems",
48 | "repo": "default",
49 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
50 | "type": "github"
51 | },
52 | "original": {
53 | "owner": "nix-systems",
54 | "repo": "default",
55 | "type": "github"
56 | }
57 | }
58 | },
59 | "root": "root",
60 | "version": 7
61 | }
62 |
--------------------------------------------------------------------------------
/compiler/src/frontend/tokens.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const Allocator = std.mem.Allocator;
3 |
4 | pub fn is_binary_operator(token: TokenType) bool {
5 | switch (token) {
6 | .PLUS, .MINUS, .STAR, .SLASH, .PERCENTAGE, .AMPERSAND, .PIPE, .CARET, .LEFT_SHIFT, .RIGHT_SHIFT, .AMPERSAND_AMPERSAND, .PIPE_PIPE, .BANG, .BANG_EQUAL, .EQUAL, .EQUAL_EQUAL, .GREATER, .GREATER_EQUAL, .LESS, .LESS_EQUAL => return true,
7 | else => return false,
8 | }
9 | }
10 |
11 | pub fn is_in_place_starter(token: TokenType) bool {
12 | switch (token) {
13 | .PLUS, .MINUS, .STAR, .SLASH, .PERCENTAGE, .AMPERSAND, .PIPE, .CARET, .LEFT_SHIFT, .RIGHT_SHIFT => return true,
14 | else => return false,
15 | }
16 | }
17 |
18 | pub const TokenType = enum {
19 | LEFT_PAREN,
20 | RIGHT_PAREN,
21 | LEFT_BRACE,
22 | RIGHT_BRACE,
23 |
24 | COMMA,
25 | DOT,
26 |
27 | MINUS,
28 | PLUS,
29 |
30 | SEMICOLON,
31 | SLASH,
32 | STAR,
33 | PERCENTAGE,
34 |
35 | BANG,
36 | BANG_EQUAL,
37 | EQUAL,
38 | EQUAL_EQUAL,
39 | GREATER,
40 | GREATER_EQUAL,
41 | LESS,
42 | LESS_EQUAL,
43 |
44 | IDENTIFIER,
45 | STRING,
46 | NUMBER,
47 |
48 | INT,
49 | IF,
50 | ELSE,
51 | VOID,
52 | RETURN,
53 |
54 | QUESTION_MARK,
55 | COLON,
56 |
57 | AMPERSAND,
58 | AMPERSAND_AMPERSAND,
59 | PIPE,
60 | PIPE_PIPE,
61 | CARET,
62 | LEFT_SHIFT,
63 | RIGHT_SHIFT,
64 |
65 | WHILE,
66 | DO,
67 | FOR,
68 | BREAK,
69 | CONTINUE,
70 | };
71 |
72 | pub const Literal = union(enum) { string: []const u8, number: i32 };
73 |
74 | pub const Token = struct {
75 | type: TokenType,
76 | literal: ?Literal,
77 | line: usize,
78 |
79 | pub fn init(token_type: TokenType, literal: ?Literal, line: usize) Token {
80 | return .{
81 | .type = token_type,
82 | .literal = literal,
83 | .line = line,
84 | };
85 | }
86 | };
87 |
--------------------------------------------------------------------------------
/compiler/src/diagnostics.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 |
3 | pub var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
4 | const allocator = arena.allocator();
5 |
6 | var messages = std.ArrayList(Diagnostic).init(allocator);
7 |
8 | const Diagnostic = struct {
9 | message: []const u8,
10 | line: ?usize,
11 | type: enum {
12 | PANIC,
13 | WARNING,
14 | ERROR,
15 | },
16 | };
17 |
18 | const ColorCode = struct {
19 | const reset = "\x1b[0m";
20 | const red = "\x1b[31m";
21 | const yellow = "\x1b[33m";
22 | const magenta = "\x1b[35m";
23 | };
24 |
25 | fn printMsg(msg: Diagnostic) !void {
26 | const stderr = std.io.getStdErr().writer();
27 | switch (msg.type) {
28 | .PANIC => {
29 | try stderr.print("{s}PANIC:{s}", .{ ColorCode.magenta, ColorCode.reset });
30 | },
31 | .WARNING => try stderr.print("{s}WARNING{s}", .{ ColorCode.yellow, ColorCode.reset }),
32 | .ERROR => {
33 | try stderr.print("{s}ERROR{s}", .{ ColorCode.red, ColorCode.reset });
34 | },
35 | }
36 |
37 | const lineInfo = if (msg.line) |line|
38 | try std.fmt.allocPrint(messages.allocator, "line {d}: ", .{line})
39 | else
40 | "";
41 | defer if (msg.line != null) messages.allocator.free(lineInfo);
42 |
43 | try stderr.print(" {s}{s}\n", .{ lineInfo, msg.message });
44 | }
45 |
46 | pub fn printAll() void {
47 | for (messages.items) |msg| {
48 | printMsg(msg) catch @panic("Failed to print error messages");
49 | }
50 | }
51 |
52 | pub fn addPanic(message: []const u8, line: ?usize) void {
53 | messages.append(.{
54 | .message = message,
55 | .line = line,
56 | .type = .PANIC,
57 | }) catch @panic("Failed to append panic message");
58 | }
59 |
60 | pub fn addError(message: []const u8, line: ?usize) void {
61 | messages.append(.{
62 | .message = message,
63 | .line = line,
64 | .type = .ERROR,
65 | }) catch @panic("Failed to append panic message");
66 | }
67 |
68 | pub fn addWarning(message: []const u8, line: ?usize) void {
69 | messages.append(.{
70 | .message = message,
71 | .line = line,
72 | .type = .WARNING,
73 | }) catch @panic("Failed to append panic message");
74 | }
75 |
--------------------------------------------------------------------------------
/compiler/src/frontend/semantic-analysis.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("../ast/c.zig");
3 | const IdentifierResolution = @import("semantic/identifier-resolution.zig").IdentifierResolution;
4 | const LoopLabeling = @import("semantic/loop-labeling.zig").LoopLabeling;
5 | const TypeChecking = @import("semantic/type-checking.zig").TypeChecking;
6 | const testing = @import("../testing.zig");
7 |
8 | pub const SemanticAnalysis = struct {
9 | allocator: std.mem.Allocator,
10 | pub fn init(allocator: std.mem.Allocator) SemanticAnalysis {
11 | return .{ .allocator = allocator };
12 | }
13 |
14 | pub fn analyze(self: *SemanticAnalysis, program: c_ast.Program) !c_ast.Program {
15 | var variable_resolution = IdentifierResolution.init(self.allocator);
16 | var loop_labeling = LoopLabeling.init(self.allocator);
17 | var type_checking = TypeChecking.init(self.allocator);
18 |
19 | return try type_checking.check(try loop_labeling.label(try variable_resolution.resolve(program)));
20 | }
21 | };
22 |
23 | test "function parameter duplicate definition" {
24 | const input =
25 | \\int main()
26 | \\{
27 | \\ int foo(int a, int a);
28 | \\}
29 | \\
30 | ;
31 |
32 | const result = testing.cToSemanticAnalysis(input);
33 | try testing.expectError(error.DuplicateDefinition, result);
34 | }
35 |
36 | test "incompatible declarations" {
37 | const input =
38 | \\int main() {
39 | \\ int foo(int a);
40 | \\ return foo(1);
41 | \\}
42 | \\
43 | \\int foo(int a, int b);
44 | ;
45 |
46 | const result = testing.cToSemanticAnalysis(input);
47 | try testing.expectError(error.IncompatibleFunctionDeclarations, result);
48 | }
49 |
50 | test "variable used as function" {
51 | const input =
52 | \\int main()
53 | \\{
54 | \\ int x = 3;
55 | \\ return x();
56 | \\}
57 | ;
58 |
59 | const result = testing.cToSemanticAnalysis(input);
60 | try testing.expectError(error.VariableUsedAsFunction, result);
61 | }
62 |
63 | test "function already defined" {
64 | const input =
65 | \\int x(int a) {
66 | \\ return a;
67 | \\}
68 | \\
69 | \\int x(int a) {
70 | \\ return a;
71 | \\}
72 | \\
73 | \\int main()
74 | \\{
75 | \\ return x();
76 | \\}
77 | ;
78 |
79 | const result = testing.cToSemanticAnalysis(input);
80 | try testing.expectError(error.FunctionAlreadyDefined, result);
81 | }
82 |
--------------------------------------------------------------------------------
/src/instruction_memory.sv:
--------------------------------------------------------------------------------
1 | module instruction_memory (
2 | input wire [31:0] addr,
3 | output reg [31:0] instr_out
4 | );
5 | reg [31:0] memory[0:1023];
6 |
7 | `ifndef SYNTHESIS
8 | // SIMULATION CODE - will be used with iverilog
9 | integer i;
10 | integer file;
11 | integer status;
12 | reg continue_reading;
13 |
14 | initial begin
15 | // Initialize memory with zeros
16 | for (i = 0; i < 1024; i = i + 1) begin
17 | memory[i] = 32'h0;
18 | end
19 |
20 | // Load program from binary file
21 | file = $fopen("program", "rb");
22 | if (file == 0) begin
23 | $display("Error: Failed to open file");
24 | $finish;
25 | end
26 |
27 | i = 0;
28 | continue_reading = 1;
29 | while (continue_reading && i < 1024) begin
30 | status = $fgetc(file);
31 | if (status != -1) begin
32 | memory[i][31:24] = status;
33 | status = $fgetc(file);
34 | if (status != -1) begin
35 | memory[i][23:16] = status;
36 | status = $fgetc(file);
37 | if (status != -1) begin
38 | memory[i][15:8] = status;
39 | status = $fgetc(file);
40 | if (status != -1) begin
41 | memory[i][7:0] = status;
42 | end else begin
43 | memory[i][7:0] = 8'h00;
44 | continue_reading = 0;
45 | end
46 | end else begin
47 | memory[i][15:0] = 16'h0000;
48 | continue_reading = 0;
49 | end
50 | end else begin
51 | memory[i][23:0] = 24'h000000;
52 | continue_reading = 0;
53 | end
54 | i = i + 1;
55 | end else begin
56 | continue_reading = 0;
57 | end
58 | end
59 | $fclose(file);
60 |
61 | `ifdef DEBUG
62 | $display("Loaded %0d 32-bit instructions", i);
63 | $display("Memory initialization complete");
64 | `endif
65 | end
66 | `else
67 | // SYNTHESIS CODE - will be used with Yosys
68 | initial begin
69 | // For synthesis, either leave memory uninitialized (RAM will be inferred)
70 | // or provide a few default instructions if needed
71 | memory[0] = 32'h00000013; // NOP (addi x0, x0, 0)
72 | // Add more initialization if needed
73 | end
74 | `endif
75 |
76 | always @(*) begin
77 | instr_out = memory[addr];
78 | end
79 | endmodule
--------------------------------------------------------------------------------
/src/top.sv:
--------------------------------------------------------------------------------
1 | module top (
2 | input wire clk
3 | );
4 |
5 | // instr fetch signals
6 | wire [31:0] instr;
7 | wire pc_enable;
8 | wire pc_load;
9 | wire [31:0] load_addr;
10 | wire [31:0] pc_out;
11 |
12 | // control signals from decoder
13 | wire [3:0] alu_ops;
14 | wire reg_write;
15 | wire mem_read;
16 | wire mem_write;
17 | wire [31:0] mem_addr;
18 | wire [31:0] mem_out;
19 | wire [1:0] mem_width;
20 | wire is_branch;
21 | wire is_jump;
22 | wire is_jalr;
23 | wire is_i_type;
24 | wire is_i_load_type;
25 | wire is_store;
26 | wire [4:0] rs1;
27 | wire [4:0] rs2;
28 | wire [31:0] rs1_data;
29 | wire [31:0] rs2_data;
30 | wire [4:0] rd;
31 | wire [31:0] rd_data;
32 | wire [31:0] imm;
33 | wire is_lui;
34 |
35 | // pc control
36 | assign pc_enable = 1;
37 |
38 | // reset logic
39 | reg rst;
40 | initial begin
41 | rst = 1'b1;
42 | #1 rst = 1'b0;
43 | end
44 |
45 | // fetch
46 |
47 | program_counter pc_inst (
48 | .clk(clk),
49 | .rst(rst),
50 | .enable(pc_enable),
51 | .load(pc_load),
52 | .addr(load_addr),
53 | .pc(pc_out)
54 | );
55 |
56 | instruction_memory instr_mem (
57 | .addr(pc_out),
58 | .instr_out(instr)
59 | );
60 |
61 | memory memory_inst (
62 | .clk(clk),
63 | .addr(mem_addr),
64 | .data(rs2_data),
65 | .read(mem_read),
66 | .write(mem_write),
67 | .data_out(mem_out)
68 | );
69 |
70 | alu alu_inst (
71 | .clk(clk),
72 | .is_lui(is_lui),
73 | .imm(imm),
74 | .rd_data(rd_data),
75 | .is_branch(is_branch),
76 | .is_i_type(is_i_type),
77 | .is_i_load_type(is_i_load_type),
78 | .rs1_data(rs1_data),
79 | .rs2_data(rs2_data),
80 | .alu_ops(alu_ops),
81 | .pc_data(pc_out),
82 | .pc_load(pc_load),
83 | .new_pc_data(load_addr),
84 | .is_store(is_store),
85 | .mem_addr(mem_addr),
86 | .mem_data(mem_out)
87 | );
88 |
89 | decoder decoder_inst (
90 | .instr(instr),
91 | .alu_ops(alu_ops),
92 | .reg_write(reg_write),
93 | .mem_read(mem_read),
94 | .mem_write(mem_write),
95 | .mem_width(mem_width),
96 | .is_branch(is_branch),
97 | .rs1(rs1),
98 | .rs2(rs2),
99 | .rd(rd),
100 | .imm(imm),
101 | .is_lui(is_lui),
102 | .is_i_type(is_i_type),
103 | .is_i_load_type(is_i_load_type),
104 | .is_store(is_store)
105 | );
106 |
107 | register_file regfile_inst (
108 | .clk(clk),
109 | // .rst_n(rst),
110 | .rs1_addr(rs1),
111 | .rs2_addr(rs2),
112 | .rs1_data(rs1_data),
113 | .rs2_data(rs2_data),
114 |
115 | .we(reg_write),
116 | .rd_addr(rd),
117 | .rd_data(rd_data)
118 | );
119 |
120 | endmodule
121 |
--------------------------------------------------------------------------------
/compiler/src/frontend/semantic/loop-labeling.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("../../ast/c.zig");
3 |
4 | pub const LoopLabeling = struct {
5 | allocator: std.mem.Allocator,
6 | counter: usize,
7 |
8 | pub fn init(allocator: std.mem.Allocator) LoopLabeling {
9 | return .{
10 | .allocator = allocator,
11 | .counter = 0,
12 | };
13 | }
14 |
15 | pub fn label(self: *LoopLabeling, program: c_ast.Program) !c_ast.Program {
16 | var result = program;
17 | self.labelFunction(&result.function[0], null);
18 | return result;
19 | }
20 |
21 | fn getNextId(self: *LoopLabeling) []const u8 {
22 | const id = std.fmt.allocPrint(self.allocator, "loop_{d}", .{self.counter}) catch unreachable;
23 | self.counter += 1;
24 | return id;
25 | }
26 |
27 | fn labelFunction(self: *LoopLabeling, function: *c_ast.FunctionDeclaration, loop_ctx: ?[]const u8) void {
28 | for (function.body.?.block_items) |*item| {
29 | self.labelBlockItem(item, loop_ctx);
30 | }
31 | }
32 |
33 | fn labelBlockItem(self: *LoopLabeling, item: *c_ast.BlockItem, loop_ctx: ?[]const u8) void {
34 | switch (item.*) {
35 | .statement => |*stmt| self.labelStatement(stmt, loop_ctx),
36 | .declaration => {},
37 | }
38 | }
39 |
40 | fn labelStatement(self: *LoopLabeling, stmt: *c_ast.Statement, loop_ctx: ?[]const u8) void {
41 | switch (stmt.*) {
42 | .compound => |*block| {
43 | for (block.block_items) |*item| {
44 | self.labelBlockItem(item, loop_ctx);
45 | }
46 | },
47 | .if_ => |*if_stmt| {
48 | self.labelStatement(if_stmt.then, loop_ctx);
49 | if (if_stmt.else_) |else_stmt| {
50 | self.labelStatement(else_stmt, loop_ctx);
51 | }
52 | },
53 | .while_ => |*while_stmt| {
54 | const label_id = self.getNextId();
55 | while_stmt.identifier = label_id;
56 | self.labelStatement(while_stmt.body, label_id);
57 | },
58 | .do_while => |*do_while_stmt| {
59 | const label_id = self.getNextId();
60 | do_while_stmt.identifier = label_id;
61 | self.labelStatement(do_while_stmt.body, label_id);
62 | },
63 | .for_ => |*for_stmt| {
64 | const label_id = self.getNextId();
65 | for_stmt.identifier = label_id;
66 | self.labelStatement(for_stmt.body, label_id);
67 | },
68 | .break_ => |*break_stmt| {
69 | if (loop_ctx) |ctx| {
70 | break_stmt.identifier = ctx;
71 | }
72 | },
73 | .continue_ => |*continue_stmt| {
74 | if (loop_ctx) |ctx| {
75 | continue_stmt.identifier = ctx;
76 | }
77 | },
78 | else => {},
79 | }
80 | }
81 | };
82 |
--------------------------------------------------------------------------------
/src/alu.sv:
--------------------------------------------------------------------------------
1 | module alu (
2 | input clk,
3 | input is_lui,
4 | input is_i_type,
5 | input is_i_load_type,
6 | input is_branch,
7 | input is_store,
8 | input [3:0] alu_ops,
9 | input [31:0] rs1_data,
10 | input [31:0] rs2_data,
11 | input [31:0] imm,
12 | input [31:0] pc_data,
13 | input [31:0] mem_data,
14 | output reg pc_load,
15 | output reg [31:0] rd_data,
16 | output reg [31:0] new_pc_data,
17 | output reg [31:0] mem_addr
18 | );
19 |
20 | wire signed [31:0] rs1_signed = $signed(rs1_data);
21 | wire signed [31:0] rs2_signed = $signed(rs2_data);
22 | wire signed [31:0] imm_signed = $signed(imm);
23 |
24 | always @* begin
25 | rd_data = 32'b0;
26 | pc_load = 0;
27 |
28 | if (is_lui) begin
29 | rd_data = imm << 12;
30 |
31 | end else if (is_i_type) begin
32 | case (alu_ops)
33 | 4'b0000: rd_data = rs1_signed + imm_signed;
34 | 4'b0010: rd_data = rs1_signed ^ imm_signed;
35 | 4'b1000: rd_data = rs1_signed < imm;
36 | 4'b1011: rd_data = rs1_data < imm;
37 | endcase
38 |
39 | end else if (is_i_load_type) begin
40 | mem_addr = rs1_data + imm;
41 | rd_data = mem_data;
42 |
43 |
44 | end else if (is_branch) begin
45 | case (alu_ops)
46 | 4'b0000:
47 | if (rs1_signed == rs2_signed) begin
48 | pc_load = 1;
49 | new_pc_data = pc_data + imm_signed;
50 | end
51 |
52 | 4'b0001:
53 | if (rs1_signed != rs2_signed) begin
54 | pc_load = 1;
55 | new_pc_data = pc_data + imm_signed;
56 | end
57 |
58 | 4'b0010:
59 | if (rs1_signed < rs2_signed) begin
60 | pc_load = 1;
61 | new_pc_data = pc_data + imm_signed;
62 | end
63 |
64 | 4'b0011:
65 | if (rs1_signed >= rs2_signed) begin
66 | pc_load = 1;
67 | new_pc_data = pc_data + imm_signed;
68 | end
69 |
70 | 4'b0110: begin
71 | rd_data = pc_data + 1;
72 | pc_load = 1;
73 | new_pc_data = pc_data + imm_signed;
74 | end
75 |
76 | 4'b0111: begin
77 | rd_data = pc_data + 1;
78 | pc_load = 1;
79 | new_pc_data = rs1_data + imm_signed;
80 | end
81 | endcase
82 |
83 | end else if (is_store) begin
84 | mem_addr = rs1_data + imm;
85 |
86 | end else begin
87 | case (alu_ops)
88 | 4'b0000: rd_data = rs1_signed + rs2_signed;
89 | 4'b0001: rd_data = rs1_signed - rs2_signed;
90 | 4'b0010: rd_data = rs1_signed ^ rs2_signed;
91 | 4'b0011: rd_data = rs1_signed | rs2_signed;
92 | 4'b0100: rd_data = rs1_signed & rs2_signed;
93 | 4'b0101: rd_data = rs1_signed << rs2_signed;
94 | 4'b0110: rd_data = rs1_signed >> rs2_signed;
95 | 4'b1100: rd_data = rs1_signed * rs2_signed;
96 | 4'b1101: rd_data = rs1_signed / rs2_signed;
97 | 4'b1110: rd_data = rs1_signed % rs2_signed;
98 | 4'b1001: rd_data = rs1_signed < rs2_signed;
99 | default: rd_data = 32'b0;
100 | endcase
101 | end
102 | end
103 |
104 | endmodule
105 |
--------------------------------------------------------------------------------
/compiler/src/ast/c.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 |
3 | pub const BinaryOperator = enum {
4 | Add,
5 | Subtract,
6 | Multiply,
7 | Divide,
8 | Remainder,
9 |
10 | Bitwise_AND,
11 | Bitwise_OR,
12 | Bitwise_XOR,
13 | Left_Shift,
14 | Right_Shift,
15 |
16 | Less,
17 | Less_Or_Equal,
18 | Greater,
19 | Greater_Or_Equal,
20 | Equal,
21 | Not_Equal,
22 | And,
23 | Or,
24 |
25 | pub fn getType(op: *const BinaryOperator) enum { ARITHMETIC, BITWISE, COMPARISON, SHORT_CIRCUIT } {
26 | switch (op.*) {
27 | .Add, .Subtract, .Multiply, .Divide, .Remainder => return .ARITHMETIC,
28 | .Bitwise_AND, .Bitwise_OR, .Bitwise_XOR, .Left_Shift, .Right_Shift => return .BITWISE,
29 | .Less, .Less_Or_Equal, .Greater, .Greater_Or_Equal, .Equal, .Not_Equal => return .COMPARISON,
30 | .And, .Or => return .SHORT_CIRCUIT,
31 | }
32 | }
33 | };
34 |
35 | pub const Binary = struct {
36 | operator: BinaryOperator,
37 | left: *Expression,
38 | right: *Expression,
39 | };
40 |
41 | pub const Variable = struct {
42 | identifier: []const u8,
43 | };
44 |
45 | pub const Assignment = struct {
46 | left: *Expression,
47 | right: *Expression,
48 | };
49 |
50 | pub const FunctionCall = struct {
51 | identifier: []const u8,
52 | args: []*Expression,
53 | };
54 |
55 | pub const Expression = union(enum) {
56 | constant: i32,
57 | binary: Binary,
58 | variable: Variable,
59 | assignment: Assignment,
60 | function_call: FunctionCall,
61 | };
62 |
63 | pub const Return = struct {
64 | exp: Expression,
65 | };
66 |
67 | pub const If = struct {
68 | condition: Expression,
69 | then: *Statement,
70 | else_: ?*Statement,
71 | };
72 |
73 | pub const While = struct {
74 | condition: Expression,
75 | body: *Statement,
76 | identifier: ?[]const u8,
77 | };
78 |
79 | pub const DoWhile = struct {
80 | condition: Expression,
81 | body: *Statement,
82 | identifier: ?[]const u8,
83 | };
84 |
85 | pub const ForInit = union(enum) {
86 | init_decl: VariableDeclaration,
87 | init_exp: ?Expression,
88 | };
89 |
90 | pub const For = struct {
91 | init: ForInit,
92 | condition: ?Expression,
93 | post: ?Expression,
94 | body: *Statement,
95 | identifier: ?[]const u8,
96 | };
97 |
98 | pub const Break = struct {
99 | identifier: ?[]const u8,
100 | };
101 |
102 | pub const Continue = struct {
103 | identifier: ?[]const u8,
104 | };
105 |
106 | pub const Statement = union(enum) {
107 | ret: Return,
108 | exp: Expression,
109 | compound: Block,
110 | if_: If,
111 | break_: Break,
112 | continue_: Continue,
113 | while_: While,
114 | do_while: DoWhile,
115 | for_: For,
116 | };
117 |
118 | pub const VariableDeclaration = struct {
119 | identifier: []const u8,
120 | initial: ?Expression,
121 | };
122 |
123 | pub const FunctionDeclaration = struct {
124 | identifier: []const u8,
125 | params: [][]const u8,
126 | body: ?Block,
127 | };
128 |
129 | pub const Declaration = union(enum) {
130 | variable_declaration: VariableDeclaration,
131 | function_declaration: FunctionDeclaration,
132 | };
133 |
134 | pub const BlockItem = union(enum) {
135 | statement: Statement,
136 | declaration: Declaration,
137 | };
138 |
139 | pub const Block = struct {
140 | block_items: []BlockItem,
141 | };
142 |
143 | pub const Program = struct {
144 | function: []FunctionDeclaration,
145 | };
146 |
--------------------------------------------------------------------------------
/assembler/build.zig.zon:
--------------------------------------------------------------------------------
1 | .{
2 | // This is the default name used by packages depending on this one. For
3 | // example, when a user runs `zig fetch --save `, this field is used
4 | // as the key in the `dependencies` table. Although the user can choose a
5 | // different name, most users will stick with this provided value.
6 | //
7 | // It is redundant to include "zig" in this name because it is already
8 | // within the Zig package namespace.
9 | .name = "assembler",
10 |
11 | // This is a [Semantic Version](https://semver.org/).
12 | // In a future version of Zig it will be used for package deduplication.
13 | .version = "0.0.0",
14 |
15 | // This field is optional.
16 | // This is currently advisory only; Zig does not yet do anything
17 | // with this value.
18 | //.minimum_zig_version = "0.11.0",
19 |
20 | // This field is optional.
21 | // Each dependency must either provide a `url` and `hash`, or a `path`.
22 | // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
23 | // Once all dependencies are fetched, `zig build` no longer requires
24 | // internet connectivity.
25 | .dependencies = .{
26 | // See `zig fetch --save ` for a command-line interface for adding dependencies.
27 | //.example = .{
28 | // // When updating this field to a new URL, be sure to delete the corresponding
29 | // // `hash`, otherwise you are communicating that you expect to find the old hash at
30 | // // the new URL.
31 | // .url = "https://example.com/foo.tar.gz",
32 | //
33 | // // This is computed from the file contents of the directory of files that is
34 | // // obtained after fetching `url` and applying the inclusion rules given by
35 | // // `paths`.
36 | // //
37 | // // This field is the source of truth; packages do not come from a `url`; they
38 | // // come from a `hash`. `url` is just one of many possible mirrors for how to
39 | // // obtain a package matching this `hash`.
40 | // //
41 | // // Uses the [multihash](https://multiformats.io/multihash/) format.
42 | // .hash = "...",
43 | //
44 | // // When this is provided, the package is found in a directory relative to the
45 | // // build root. In this case the package's hash is irrelevant and therefore not
46 | // // computed. This field and `url` are mutually exclusive.
47 | // .path = "foo",
48 |
49 | // // When this is set to `true`, a package is declared to be lazily
50 | // // fetched. This makes the dependency only get fetched if it is
51 | // // actually used.
52 | // .lazy = false,
53 | //},
54 | },
55 |
56 | // Specifies the set of files and directories that are included in this package.
57 | // Only files and directories listed here are included in the `hash` that
58 | // is computed for this package. Only files listed here will remain on disk
59 | // when using the zig package manager. As a rule of thumb, one should list
60 | // files required for compilation plus any license(s).
61 | // Paths are relative to the build root. Use the empty string (`""`) to refer to
62 | // the build root itself.
63 | // A directory listed here means that all files within, recursively, are included.
64 | .paths = .{
65 | "build.zig",
66 | "build.zig.zon",
67 | "src",
68 | // For example...
69 | //"LICENSE",
70 | //"README.md",
71 | },
72 | }
73 |
--------------------------------------------------------------------------------
/compiler/build.zig.zon:
--------------------------------------------------------------------------------
1 | .{
2 | // This is the default name used by packages depending on this one. For
3 | // example, when a user runs `zig fetch --save `, this field is used
4 | // as the key in the `dependencies` table. Although the user can choose a
5 | // different name, most users will stick with this provided value.
6 | //
7 | // It is redundant to include "zig" in this name because it is already
8 | // within the Zig package namespace.
9 | .name = .compiler,
10 | .fingerprint = 0xaa62bd49671a6d1c,
11 |
12 | // This is a [Semantic Version](https://semver.org/).
13 | // In a future version of Zig it will be used for package deduplication.
14 | .version = "0.0.0",
15 |
16 | // This field is optional.
17 | // This is currently advisory only; Zig does not yet do anything
18 | // with this value.
19 | //.minimum_zig_version = "0.11.0",
20 |
21 | // This field is optional.
22 | // Each dependency must either provide a `url` and `hash`, or a `path`.
23 | // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
24 | // Once all dependencies are fetched, `zig build` no longer requires
25 | // internet connectivity.
26 | .dependencies = .{
27 | // See `zig fetch --save ` for a command-line interface for adding dependencies.
28 | //.example = .{
29 | // // When updating this field to a new URL, be sure to delete the corresponding
30 | // // `hash`, otherwise you are communicating that you expect to find the old hash at
31 | // // the new URL.
32 | // .url = "https://example.com/foo.tar.gz",
33 | //
34 | // // This is computed from the file contents of the directory of files that is
35 | // // obtained after fetching `url` and applying the inclusion rules given by
36 | // // `paths`.
37 | // //
38 | // // This field is the source of truth; packages do not come from a `url`; they
39 | // // come from a `hash`. `url` is just one of many possible mirrors for how to
40 | // // obtain a package matching this `hash`.
41 | // //
42 | // // Uses the [multihash](https://multiformats.io/multihash/) format.
43 | // .hash = "...",
44 | //
45 | // // When this is provided, the package is found in a directory relative to the
46 | // // build root. In this case the package's hash is irrelevant and therefore not
47 | // // computed. This field and `url` are mutually exclusive.
48 | // .path = "foo",
49 |
50 | // // When this is set to `true`, a package is declared to be lazily
51 | // // fetched. This makes the dependency only get fetched if it is
52 | // // actually used.
53 | // .lazy = false,
54 | //},
55 | },
56 |
57 | // Specifies the set of files and directories that are included in this package.
58 | // Only files and directories listed here are included in the `hash` that
59 | // is computed for this package. Only files listed here will remain on disk
60 | // when using the zig package manager. As a rule of thumb, one should list
61 | // files required for compilation plus any license(s).
62 | // Paths are relative to the build root. Use the empty string (`""`) to refer to
63 | // the build root itself.
64 | // A directory listed here means that all files within, recursively, are included.
65 | .paths = .{
66 | "build.zig",
67 | "build.zig.zon",
68 | "src",
69 | // For example...
70 | //"LICENSE",
71 | //"README.md",
72 | },
73 | }
74 |
--------------------------------------------------------------------------------
/assembler/src/instruction-getters.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const instr_types = @import("instruction-types.zig");
3 | const RTypeInstruction = instr_types.RTypeInstruction;
4 | const ITypeInstruction = instr_types.ITypeInstruction;
5 | const STypeInstruction = instr_types.STypeInstruction;
6 | const BTypeInstruction = instr_types.BTypeInstruction;
7 | const UTypeInstruction = instr_types.UTypeInstruction;
8 | const JTypeInstruction = instr_types.JTypeInstruction;
9 |
10 | pub fn getRTypeInstruction(instruction: []const u8) !RTypeInstruction {
11 | if (std.mem.eql(u8, instruction, "add")) return .ADD;
12 | if (std.mem.eql(u8, instruction, "sub")) return .SUB;
13 | if (std.mem.eql(u8, instruction, "sll")) return .SLL;
14 | if (std.mem.eql(u8, instruction, "slt")) return .SLT;
15 | if (std.mem.eql(u8, instruction, "sltu")) return .SLTU;
16 | if (std.mem.eql(u8, instruction, "xor")) return .XOR;
17 | if (std.mem.eql(u8, instruction, "srl")) return .SRL;
18 | if (std.mem.eql(u8, instruction, "sra")) return .SRA;
19 | if (std.mem.eql(u8, instruction, "or")) return .OR;
20 | if (std.mem.eql(u8, instruction, "and")) return .AND;
21 |
22 | // m extension
23 | if (std.mem.eql(u8, instruction, "mul")) return .MUL;
24 | if (std.mem.eql(u8, instruction, "mulh")) return .MULH;
25 | if (std.mem.eql(u8, instruction, "mulsu")) return .MULSU;
26 | if (std.mem.eql(u8, instruction, "mulu")) return .MULU;
27 | if (std.mem.eql(u8, instruction, "div")) return .DIV;
28 | if (std.mem.eql(u8, instruction, "divu")) return .DIVU;
29 | if (std.mem.eql(u8, instruction, "rem")) return .REM;
30 | if (std.mem.eql(u8, instruction, "remu")) return .REMU;
31 | unreachable;
32 | }
33 |
34 | pub fn getITypeInstruction(instruction: []const u8) !ITypeInstruction {
35 | if (std.mem.eql(u8, instruction, "addi")) return .ADDI;
36 | if (std.mem.eql(u8, instruction, "slti")) return .SLTI;
37 | if (std.mem.eql(u8, instruction, "sltiu")) return .SLTIU;
38 | if (std.mem.eql(u8, instruction, "xori")) return .XORI;
39 | if (std.mem.eql(u8, instruction, "ori")) return .ORI;
40 | if (std.mem.eql(u8, instruction, "andi")) return .ANDI;
41 | if (std.mem.eql(u8, instruction, "slli")) return .SLLI;
42 | if (std.mem.eql(u8, instruction, "srli")) return .SRLI;
43 | if (std.mem.eql(u8, instruction, "srai")) return .SRAI;
44 | if (std.mem.eql(u8, instruction, "lb")) return .LB;
45 | if (std.mem.eql(u8, instruction, "lh")) return .LH;
46 | if (std.mem.eql(u8, instruction, "lw")) return .LW;
47 | if (std.mem.eql(u8, instruction, "lbu")) return .LBU;
48 | if (std.mem.eql(u8, instruction, "lhu")) return .LHU;
49 | if (std.mem.eql(u8, instruction, "jalr")) return .JALR;
50 | unreachable;
51 | }
52 |
53 | pub fn getSTypeInstruction(instruction: []const u8) !STypeInstruction {
54 | if (std.mem.eql(u8, instruction, "sb")) return .SB;
55 | if (std.mem.eql(u8, instruction, "sh")) return .SH;
56 | if (std.mem.eql(u8, instruction, "sw")) return .SW;
57 | unreachable;
58 | }
59 |
60 | pub fn getBTypeInstruction(instruction: []const u8) !BTypeInstruction {
61 | if (std.mem.eql(u8, instruction, "beq")) return .BEQ;
62 | if (std.mem.eql(u8, instruction, "bne")) return .BNE;
63 | if (std.mem.eql(u8, instruction, "blt")) return .BLT;
64 | if (std.mem.eql(u8, instruction, "bge")) return .BGE;
65 | if (std.mem.eql(u8, instruction, "bltu")) return .BLTU;
66 | if (std.mem.eql(u8, instruction, "bgeu")) return .BGEU;
67 | unreachable;
68 | }
69 |
70 | pub fn getUTypeInstruction(instruction: []const u8) !UTypeInstruction {
71 | if (std.mem.eql(u8, instruction, "lui")) return .LUI;
72 | if (std.mem.eql(u8, instruction, "auipc")) return .AUIPC;
73 | unreachable;
74 | }
75 |
76 | pub fn getJTypeInstruction(instruction: []const u8) !JTypeInstruction {
77 | if (std.mem.eql(u8, instruction, "jal")) return .JAL;
78 | unreachable;
79 | }
80 |
--------------------------------------------------------------------------------
/compiler/src/backend/emission.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const asm_ast = @import("../ast/asm.zig");
3 |
4 | pub const Emitter = struct {
5 | program: asm_ast.Program,
6 |
7 | pub fn init(program: asm_ast.Program) Emitter {
8 | return .{ .program = program };
9 | }
10 |
11 | fn reg_to_string() []const u8 {}
12 |
13 | pub fn getAssemblyString(self: Emitter, allocator: std.mem.Allocator) ![]const u8 {
14 | var buffer = std.ArrayList(u8).init(allocator);
15 | errdefer buffer.deinit();
16 | const writer = buffer.writer();
17 |
18 | for (self.program.function.instructions) |instruction| {
19 | switch (instruction) {
20 | .rtype => |r| {
21 | try std.fmt.format(writer, "{s} {s} {s} {s}\n", .{
22 | r.instr.toString(),
23 | r.destination.toString(),
24 | r.source1.toString(),
25 | r.source2.toString(),
26 | });
27 | },
28 | .itype => |i| {
29 | switch (i.instr) {
30 | .LW, .LH, .LB, .LHU, .LBU => {
31 | try std.fmt.format(writer, "{s} {s} {}({s})\n", .{
32 | i.instr.toString(),
33 | i.destination.toString(),
34 | i.immediate,
35 | i.source.toString(),
36 | });
37 | },
38 | else => {
39 | try std.fmt.format(writer, "{s} {s} {s} {}\n", .{
40 | i.instr.toString(),
41 | i.destination.toString(),
42 | i.source.toString(),
43 | i.immediate,
44 | });
45 | },
46 | }
47 | },
48 | .btype => |b| {
49 | try std.fmt.format(writer, "{s} {s} {s} {s}\n", .{
50 | b.instr.toString(),
51 | b.source1.toString(),
52 | b.source2.toString(),
53 | b.label,
54 | });
55 | },
56 | .stype => |s| {
57 | try std.fmt.format(writer, "{s} {s} {}({s})\n", .{
58 | s.instr.toString(),
59 | s.source1.toString(),
60 | s.immediate,
61 | s.source2.toString(),
62 | });
63 | },
64 | .jtype => |j| {
65 | try std.fmt.format(writer, "{s} {s} {s}\n", .{
66 | j.instr.toString(),
67 | j.destination.toString(),
68 | j.label,
69 | });
70 | },
71 | .utype => |u| {
72 | try std.fmt.format(writer, "{s} {s} {}\n", .{
73 | u.instr.toString(),
74 | u.destination.toString(),
75 | u.immediate,
76 | });
77 | },
78 | .label => |label| {
79 | try std.fmt.format(writer, "{s}:\n", .{
80 | label.name,
81 | });
82 | },
83 | }
84 | }
85 |
86 | return buffer.toOwnedSlice(); // Caller owns the memory
87 | }
88 |
89 | pub fn write(self: Emitter, out_name: []const u8, allocator: std.mem.Allocator) !void {
90 | const dirname = std.fs.path.dirname(out_name) orelse ".";
91 | const stem = std.fs.path.stem(out_name);
92 |
93 | var path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
94 | const output_path = try std.fmt.bufPrint(&path_buf, "{s}/{s}.asm", .{
95 | dirname,
96 | stem,
97 | });
98 |
99 | const assembly = try self.getAssemblyString(allocator);
100 | defer allocator.free(assembly);
101 |
102 | const file = try std.fs.cwd().createFile(
103 | output_path,
104 | .{},
105 | );
106 | defer file.close();
107 |
108 | try file.writeAll(assembly);
109 | }
110 | };
111 |
--------------------------------------------------------------------------------
/src/decoder.sv:
--------------------------------------------------------------------------------
1 | module decoder (
2 | input [31:0] instr,
3 |
4 | output reg [3:0] alu_ops, // add, sub, xor, or, and, sll, srl, sra, slt, sltu
5 |
6 | output reg reg_write, // 1 for R, I. 0 for S, B
7 |
8 | output reg mem_read, // 1 for LB/LH/LW
9 | output reg mem_write, // 1 for SB/SH/SW
10 | output reg [1:0] mem_width, // 00 byte, 01 half, 10 word
11 |
12 | output reg is_lui,
13 | output reg is_i_type,
14 | output reg is_i_load_type,
15 | output reg is_branch,
16 | output reg is_store,
17 |
18 | output [4:0] rs1,
19 | output [4:0] rs2,
20 | output reg rs1_used,
21 | output reg rs2_used,
22 |
23 | output [4:0] rd,
24 |
25 | output reg [31:0] imm
26 | );
27 |
28 | wire [4:0] opcode = instr[6:2];
29 | wire [2:0] funct3 = instr[14:12];
30 | wire [6:0] funct7 = instr[31:25];
31 |
32 | localparam R_TYPE = 5'b01100;
33 | localparam I_TYPE = 5'b00100;
34 | localparam I_LOAD_TYPE = 5'b00000;
35 | localparam I_JALR_TYPE = 5'b11001;
36 | localparam B_TYPE = 5'b11000;
37 | localparam S_TYPE = 5'b01000;
38 | localparam J_TYPE = 5'b11011;
39 | localparam LUI = 5'b01101;
40 | localparam LOAD = 5'b00000;
41 | localparam STORE = 5'b01000;
42 | localparam BRANCH = 5'b11000;
43 |
44 | assign rs1 = instr[19:15];
45 | assign rs2 = instr[24:20];
46 |
47 | assign rd = instr[11:7];
48 |
49 | always @(*) begin
50 | is_lui = 0;
51 | is_i_type = 0;
52 | is_i_load_type = 0;
53 | is_branch = 0;
54 | is_store = 0;
55 |
56 | mem_write = 0;
57 | mem_read = 0;
58 | reg_write = 0;
59 |
60 | case (opcode)
61 | R_TYPE: begin
62 | if (funct7 != 7'b0000001) begin
63 | case (funct3)
64 | 3'b000: alu_ops = (funct7 !== 7'b0100000) ? 4'b0000 : 4'b0001; // ADD / SUB
65 | 3'b100: alu_ops = 4'b0010; // XOR
66 | 3'b110: alu_ops = 4'b0011; // OR
67 | 3'b111: alu_ops = 4'b0100; // AND
68 | 3'b001: alu_ops = 4'b0101; // SLL
69 | 3'b101: alu_ops = (funct7 !== 7'b0100000) ? 4'b0110 : 4'b1000; // SRL : SRA
70 | 3'b010: alu_ops = 4'b1001; // SLT
71 | 3'b011: alu_ops = 4'b1011; // SLTU
72 | endcase
73 | end else begin
74 | case (funct3)
75 | 3'b000: alu_ops = 4'b1100; // MUL
76 | 3'b100: alu_ops = 4'b1101; // DIV
77 | 3'b110: alu_ops = 4'b1110; // REM
78 | default: alu_ops = 4'b0000; // REST NOT DEFINED !! TODO
79 | endcase
80 | end
81 |
82 | rs1_used = 1;
83 | rs2_used = 1;
84 | reg_write = 1;
85 | end
86 |
87 | I_TYPE: begin
88 | case (funct3)
89 | 3'b000: alu_ops = 4'b0000;
90 | 3'b100: alu_ops = 4'b0010;
91 | 3'b110: alu_ops = 4'b0011;
92 | 3'b111: alu_ops = 4'b0100;
93 | 3'b101: begin
94 | if (imm[11:5] == 7'b0000000) alu_ops = 4'b0101;
95 | else if (imm[11:5] == 7'b0100000) alu_ops = 4'b0111;
96 | end
97 | 3'b010: alu_ops = 4'b1001;
98 | 3'b011: alu_ops = 4'b1011; // SLTIU
99 | endcase
100 |
101 | imm = instr[31:20];
102 | is_i_type = 1;
103 | reg_write = 1;
104 | end
105 |
106 | I_LOAD_TYPE: begin
107 | case (funct3)
108 | 3'b000: mem_width = 2'b00; // LB
109 | 3'b001: mem_width = 2'b01; // LH
110 | 3'b010: mem_width = 2'b10; // LW
111 | 3'b011: mem_width = 2'b00; // LBU
112 | 3'b100: mem_width = 2'b01; // LHU
113 | endcase
114 |
115 | imm = instr[31:20];
116 | mem_read = 1;
117 | is_i_load_type = 1;
118 | reg_write = 1;
119 | end
120 |
121 | B_TYPE: begin
122 | case (funct3)
123 | 3'b000: alu_ops = 4'b0000; // BEQ
124 | 3'b001: alu_ops = 4'b0001; // BNE
125 | 3'b100: alu_ops = 4'b0010; // BLT
126 | 3'b101: alu_ops = 4'b0011; // BGE
127 | 3'b110: alu_ops = 4'b0100; // BLTU
128 | 3'b111: alu_ops = 4'b0101; // BGEU
129 | endcase
130 |
131 | imm[31:12] = {20{instr[31]}};
132 | imm[11:5] = instr[31:25];
133 | imm[4:0] = instr[11:7];
134 |
135 | is_branch = 1;
136 | rs1_used = 1;
137 | rs2_used = 1;
138 | end
139 |
140 | J_TYPE: begin
141 | alu_ops = 4'b0110; // JAL
142 |
143 | imm = {{13{instr[31]}}, instr[30:12]};
144 |
145 | is_branch = 1;
146 | rs1_used = 1;
147 | rs2_used = 1;
148 | reg_write = 1;
149 | end
150 |
151 | I_JALR_TYPE: begin
152 | alu_ops = 4'b0111; //JALR
153 |
154 | is_branch = 1;
155 | imm = instr[31:20];
156 | reg_write = 1;
157 | end
158 |
159 | S_TYPE: begin
160 | case (funct3)
161 | 3'b000: mem_width = 2'b00; // SB
162 | 3'b001: mem_width = 2'b01; // SH
163 | 3'b010: mem_width = 2'b10; // SW
164 | endcase
165 |
166 | imm[11:5] = instr[31:25];
167 | imm[4:0] = instr[11:7];
168 |
169 | mem_write = 1;
170 | is_store = 1;
171 | rs1_used = 1;
172 | rs2_used = 1;
173 | end
174 |
175 | LUI: begin
176 | is_lui = 1;
177 | imm = instr[31:20];
178 | reg_write = 1;
179 | end
180 | default: is_i_type = 0;
181 | endcase
182 | end
183 |
184 | endmodule
185 |
--------------------------------------------------------------------------------
/compiler/src/ast/asm.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 |
3 | // utils for enums, preferably have this in a separate file
4 | pub fn EnumMethods(comptime T: type) type {
5 | return struct {
6 | pub fn toString(self: T) []const u8 {
7 | const str = @tagName(self);
8 | comptime var max_len = 0;
9 | inline for (@typeInfo(T).Enum.fields) |field| {
10 | max_len = @max(max_len, field.name.len);
11 | }
12 | var buf: [max_len]u8 = undefined;
13 | return std.ascii.lowerString(&buf, str);
14 | }
15 | };
16 | }
17 |
18 | // TODO: there has to be a cleaner way to do this
19 | pub fn convert(instr: InstructionType) union(enum) { rtype: RType_Inst, itype: IType_Inst, btype: BType_Inst, stype: SType_Inst, utype: UType_Inst, jtype: JType_Inst } {
20 | const instr_name = @tagName(instr);
21 |
22 | inline for (@typeInfo(RType_Inst).Enum.fields) |field| {
23 | if (std.mem.eql(u8, instr_name, field.name)) {
24 | return .{ .rtype = @field(RType_Inst, field.name) };
25 | }
26 | }
27 |
28 | inline for (@typeInfo(IType_Inst).Enum.fields) |field| {
29 | if (std.mem.eql(u8, instr_name, field.name)) {
30 | return .{ .itype = @field(IType_Inst, field.name) };
31 | }
32 | }
33 |
34 | inline for (@typeInfo(BType_Inst).Enum.fields) |field| {
35 | if (std.mem.eql(u8, instr_name, field.name)) {
36 | return .{ .btype = @field(BType_Inst, field.name) };
37 | }
38 | }
39 |
40 | inline for (@typeInfo(SType_Inst).Enum.fields) |field| {
41 | if (std.mem.eql(u8, instr_name, field.name)) {
42 | return .{ .stype = @field(SType_Inst, field.name) };
43 | }
44 | }
45 |
46 | inline for (@typeInfo(UType_Inst).Enum.fields) |field| {
47 | if (std.mem.eql(u8, instr_name, field.name)) {
48 | return .{ .utype = @field(UType_Inst, field.name) };
49 | }
50 | }
51 |
52 | inline for (@typeInfo(JType_Inst).Enum.fields) |field| {
53 | if (std.mem.eql(u8, instr_name, field.name)) {
54 | return .{ .jtype = @field(JType_Inst, field.name) };
55 | }
56 | }
57 |
58 | std.debug.print("Can't convert {} instruction\n", .{instr});
59 |
60 | unreachable;
61 | }
62 |
63 | pub const Reg = enum {
64 | zero,
65 | ra,
66 | sp,
67 | t0,
68 | t1,
69 | t2,
70 | fp,
71 | a0,
72 | a1,
73 | a2,
74 | a3,
75 | a4,
76 | a5,
77 | a6,
78 | a7,
79 | t3,
80 |
81 | pub fn toString(self: Reg) []const u8 {
82 | return @tagName(self);
83 | }
84 | };
85 |
86 | const Addi = struct {
87 | source: Reg,
88 | destination: Reg,
89 | imm: u12,
90 | };
91 |
92 | pub const InstructionType = enum {
93 | ADD,
94 | SUB,
95 | XOR,
96 | OR,
97 | AND,
98 | SLL,
99 | SRL,
100 | SRA,
101 | SLT,
102 | SLTU,
103 | MUL,
104 | MULH,
105 | MULSU,
106 | MULU,
107 | DIV,
108 | DIVU,
109 | REM,
110 | REMU,
111 |
112 | ADDI,
113 | // SUB,
114 | XORI,
115 | // OR,
116 | // AND,
117 | // SLL,
118 | // SRL,
119 | // SRA,
120 | SLTI,
121 | SLTIU,
122 |
123 | LB,
124 | LH,
125 | LW,
126 | LBU,
127 | LHU,
128 |
129 | SB,
130 | SH,
131 | SW,
132 |
133 | BEQ,
134 | BNE,
135 | BLT,
136 | BGE,
137 | BLTU,
138 | BGEU,
139 |
140 | LUI,
141 | AUIPC,
142 |
143 | JAL,
144 | JALR,
145 | };
146 |
147 | pub const RType_Inst = enum {
148 | ADD,
149 | SUB,
150 | XOR,
151 | OR,
152 | AND,
153 | SLL,
154 | SRL,
155 | SRA,
156 | SLT,
157 | SLTU,
158 |
159 | MUL,
160 | MULH,
161 | MULSU,
162 | MULU,
163 | DIV,
164 | DIVU,
165 | REM,
166 | REMU,
167 |
168 | pub usingnamespace EnumMethods(RType_Inst);
169 | };
170 |
171 | pub const IType_Inst = enum {
172 | ADDI,
173 | // SUB,
174 | XORI,
175 | // OR,
176 | // AND,
177 | // SLL,
178 | // SRL,
179 | // SRA,
180 | SLTI,
181 | SLTIU,
182 | // MUL,
183 | // MULH,
184 | // MULSU,
185 | // MULU,
186 | // DIV,
187 | // DIVU,
188 | // REM,
189 | // REMU,
190 |
191 | LB,
192 | LH,
193 | LW,
194 | LBU,
195 | LHU,
196 |
197 | JALR,
198 |
199 | pub usingnamespace EnumMethods(IType_Inst);
200 | };
201 |
202 | pub const BType_Inst = enum {
203 | BEQ,
204 | BNE,
205 | BLT,
206 | BGE,
207 | BLTU,
208 | BGEU,
209 |
210 | pub usingnamespace EnumMethods(BType_Inst);
211 | };
212 |
213 | pub const SType_Inst = enum {
214 | SB,
215 | SH,
216 | SW,
217 |
218 | pub usingnamespace EnumMethods(SType_Inst);
219 | };
220 |
221 | pub const UType_Inst = enum {
222 | LUI,
223 | AUIPC,
224 |
225 | pub usingnamespace EnumMethods(UType_Inst);
226 | };
227 |
228 | pub const JType_Inst = enum {
229 | JAL,
230 |
231 | pub usingnamespace EnumMethods(JType_Inst);
232 | };
233 |
234 | pub const RType = struct {
235 | instr: RType_Inst,
236 | source1: Reg,
237 | source2: Reg,
238 | destination: Reg,
239 | };
240 |
241 | pub const IType = struct {
242 | instr: IType_Inst,
243 | source: Reg,
244 | destination: Reg,
245 | immediate: i32,
246 | };
247 |
248 | pub const BType = struct {
249 | instr: BType_Inst,
250 | source1: Reg,
251 | source2: Reg,
252 | label: []const u8,
253 | };
254 |
255 | pub const SType = struct {
256 | instr: SType_Inst,
257 | source1: Reg,
258 | source2: Reg,
259 | immediate: i32,
260 | };
261 |
262 | pub const UType = struct {
263 | instr: UType_Inst,
264 | destination: Reg,
265 | immediate: i32,
266 | };
267 |
268 | pub const JType = struct {
269 | instr: JType_Inst,
270 | destination: Reg,
271 | label: []const u8,
272 | };
273 |
274 | const Label = struct {
275 | name: []const u8,
276 | };
277 |
278 | pub const Instruction = union(enum) {
279 | rtype: RType,
280 | itype: IType,
281 | stype: SType,
282 | btype: BType,
283 | utype: UType,
284 | jtype: JType,
285 |
286 | label: Label,
287 | };
288 |
289 | pub const FunctionDefinition = struct {
290 | identifier: []const u8,
291 | instructions: []Instruction,
292 | };
293 |
294 | pub const Program = struct {
295 | function: FunctionDefinition,
296 | };
297 |
--------------------------------------------------------------------------------
/compiler/src/frontend/lexer.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const tokens = @import("tokens.zig");
3 | const Token = tokens.Token;
4 | const TokenType = tokens.TokenType;
5 |
6 | var keywords: std.StringHashMap(TokenType) = undefined;
7 |
8 | pub fn initKeywords(allocator: std.mem.Allocator) !void {
9 | keywords = std.StringHashMap(TokenType).init(allocator);
10 |
11 | try keywords.put("int", .INT);
12 |
13 | try keywords.put("void", .VOID);
14 | try keywords.put("return", .RETURN);
15 |
16 | try keywords.put("if", .IF);
17 | try keywords.put("else", .ELSE);
18 |
19 | try keywords.put("break", .BREAK);
20 | try keywords.put("continue", .CONTINUE);
21 | try keywords.put("while", .WHILE);
22 | try keywords.put("do", .DO);
23 | try keywords.put("for", .FOR);
24 | }
25 |
26 | pub const Lexer = struct {
27 | allocator: std.mem.Allocator,
28 | source: []const u8,
29 | tokens: std.ArrayList(Token),
30 | currentIndex: usize = 0,
31 | currentLine: usize = 1,
32 | start: usize = 0,
33 |
34 | pub fn init(allocator: std.mem.Allocator, source: []const u8) Lexer {
35 | initKeywords(allocator) catch @panic("gg");
36 | return .{
37 | .allocator = allocator,
38 | .source = source,
39 | .tokens = std.ArrayList(Token).init(allocator),
40 | };
41 | }
42 |
43 | pub fn deinit(self: *Lexer) void {
44 | keywords.deinit();
45 | self.tokens.deinit();
46 | }
47 |
48 | pub fn scan(self: *Lexer) void {
49 | while (self.currentIndex < self.source.len) {
50 | self.start = self.currentIndex;
51 | const token = self.scanToken();
52 | if (token) |t| self.tokens.append(t) catch @panic("out of memory");
53 | }
54 | }
55 |
56 | fn isAtEnd(self: *Lexer) bool {
57 | return self.currentIndex >= self.source.len;
58 | }
59 |
60 | fn advance(self: *Lexer) u8 {
61 | self.currentIndex += 1;
62 | return self.source[self.currentIndex - 1];
63 | }
64 |
65 | fn isDigit(self: *Lexer, c: u8) bool {
66 | _ = self;
67 | return c >= '0' and c <= '9';
68 | }
69 |
70 | fn isAlpha(self: *Lexer, c: u8) bool {
71 | _ = self;
72 | return (c >= 'a' and c <= 'z') or
73 | (c >= 'A' and c <= 'Z') or
74 | c == '_';
75 | }
76 |
77 | fn isAlphaNumeric(self: *Lexer, c: u8) bool {
78 | return self.isAlpha(c) or self.isDigit(c);
79 | }
80 |
81 | fn peek(self: *Lexer) u8 {
82 | if (self.isAtEnd()) return 0;
83 | return self.source[self.currentIndex];
84 | }
85 |
86 | fn match(self: *Lexer, expected: u8) bool {
87 | if (self.isAtEnd()) return false;
88 |
89 | if (self.source[self.currentIndex] != expected) return false;
90 |
91 | self.currentIndex += 1;
92 | return true;
93 | }
94 |
95 | fn scanComment(self: *Lexer) void {
96 | while (self.peek() != '\n' and !self.isAtEnd()) {
97 | _ = self.advance();
98 | }
99 | }
100 |
101 | fn string(self: *Lexer) ?Token {
102 | while (self.peek() != '"' and !self.isAtEnd()) {
103 | if (self.peek() == '\n') {
104 | self.currentLine += 1;
105 | }
106 | _ = self.advance();
107 | }
108 |
109 | if (self.isAtEnd()) {
110 | return null;
111 | }
112 |
113 | _ = self.advance();
114 |
115 | const value = self.source[self.start + 1 .. self.currentIndex - 1];
116 |
117 | const token = Token.init(.STRING, .{ .string = value }, self.currentLine);
118 |
119 | return token;
120 | }
121 |
122 | fn number(self: *Lexer) ?Token {
123 | while (self.isDigit(self.peek())) _ = self.advance();
124 |
125 | const number_str = self.source[self.start..self.currentIndex];
126 |
127 | const token = Token.init(.NUMBER, .{ .number = std.fmt.parseInt(i32, number_str, 10) catch @panic("failed to parse int") }, self.currentLine);
128 | return token;
129 | }
130 |
131 | fn identifier(self: *Lexer) ?Token {
132 | while (self.isAlphaNumeric(self.peek())) _ = self.advance();
133 |
134 | const value = self.source[self.start..self.currentIndex];
135 | const ttype = keywords.get(value) orelse TokenType.IDENTIFIER;
136 |
137 | const token = Token.init(ttype, .{ .string = value }, self.currentLine);
138 | return token;
139 | }
140 |
141 | pub fn scanToken(self: *Lexer) ?Token {
142 | const char = self.advance();
143 |
144 | const token_type: TokenType = switch (char) {
145 | '(' => .LEFT_PAREN,
146 | ')' => .RIGHT_PAREN,
147 | '{' => .LEFT_BRACE,
148 | '}' => .RIGHT_BRACE,
149 | ',' => .COMMA,
150 | '.' => .DOT,
151 | '-' => .MINUS,
152 | '+' => .PLUS,
153 | ';' => .SEMICOLON,
154 | '*' => .STAR,
155 | '%' => .PERCENTAGE,
156 | '?' => .QUESTION_MARK,
157 | ':' => .COLON,
158 | '&' => if (self.match('&')) .AMPERSAND_AMPERSAND else .AMPERSAND,
159 | '|' => if (self.match('|')) .PIPE_PIPE else .PIPE,
160 | '^' => .CARET,
161 |
162 | '!' => if (self.match('=')) .BANG_EQUAL else .BANG,
163 | '=' => if (self.match('=')) .EQUAL_EQUAL else .EQUAL,
164 | '<' => if (self.match('=')) .LESS_EQUAL else if (self.match('<')) .LEFT_SHIFT else .LESS,
165 |
166 | '>' => if (self.match('=')) .GREATER_EQUAL else if (self.match('>')) .RIGHT_SHIFT else .GREATER,
167 |
168 | '/' => blk: {
169 | const result: TokenType = if (self.match('/')) {
170 | while (self.peek() != '\n' and !self.isAtEnd()) {
171 | _ = self.advance();
172 | }
173 | if (!self.isAtEnd()) return self.scanToken() else return null;
174 | } else .SLASH;
175 | break :blk result;
176 | },
177 | '\n' => {
178 | self.currentLine += 1;
179 | return null;
180 | },
181 | ' ' => return null,
182 | '\r' => return null,
183 | '\t' => return null,
184 | '"',
185 | => {
186 | return self.string();
187 | },
188 | else => {
189 | if (self.isDigit(char)) return self.number();
190 |
191 | if (self.isAlpha(char)) return self.identifier();
192 |
193 | const msg = std.fmt.allocPrint(self.allocator, "unexpected character at line {}", .{self.currentLine}) catch @panic("try again");
194 | defer self.allocator.free(msg);
195 | @panic(msg);
196 | },
197 | };
198 |
199 | const token = Token.init(token_type, null, self.currentLine);
200 | return token;
201 | }
202 | };
203 |
--------------------------------------------------------------------------------
/compiler/src/middleend/register-allocator.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("../ast/c.zig");
3 | const asm_ast = @import("../ast/asm.zig");
4 |
5 | const Interval = struct {
6 | start: usize,
7 | end: usize,
8 | };
9 |
10 | const Register = struct {
11 | reg: asm_ast.Reg,
12 | assigned: bool = false,
13 | variable: ?[]const u8 = null,
14 | expiration: ?usize = null,
15 | };
16 |
17 | pub const RegisterAllocator = struct {
18 | allocator: std.mem.Allocator,
19 | intervals: std.StringHashMap(Interval),
20 | line: usize = 0,
21 | registers: [5]Register,
22 |
23 | pub fn init(allocator: std.mem.Allocator) RegisterAllocator {
24 | return RegisterAllocator{
25 | .allocator = allocator,
26 | .intervals = std.StringHashMap(Interval).init(allocator),
27 | .registers = [_]Register{
28 | .{ .reg = .a0 },
29 | .{ .reg = .a1 },
30 | .{ .reg = .t0 },
31 | .{ .reg = .t1 },
32 | .{ .reg = .t2 },
33 | },
34 | };
35 | }
36 |
37 | pub fn getVariableRegister(self: *RegisterAllocator, variable: []const u8, line: usize) asm_ast.Reg {
38 | self.updateRegisters(line);
39 |
40 | for (&self.registers) |*reg| {
41 | if (reg.assigned and reg.variable != null) {
42 | if (std.mem.eql(u8, reg.variable.?, variable)) {
43 | return reg.reg;
44 | }
45 | }
46 | }
47 |
48 | if (self.intervals.get(variable)) |interval| {
49 | for (self.registers) |*reg| {
50 | if (reg.assigned == false) {
51 | reg.assigned = true;
52 | reg.variable = variable;
53 | reg.expiration = interval.end;
54 | return reg.reg;
55 | }
56 | }
57 | }
58 |
59 | @panic("no registers available");
60 | }
61 |
62 | pub fn getTempRegister(self: *RegisterAllocator, line: usize) asm_ast.Reg {
63 | self.updateRegisters(line);
64 |
65 | for (&self.registers) |*reg| {
66 | if (!reg.assigned) {
67 | reg.assigned = true;
68 | reg.variable = null;
69 | reg.expiration = line + 1;
70 | return reg.reg;
71 | }
72 | }
73 |
74 | @panic("no registers available");
75 | }
76 |
77 | pub fn expireRegister(self: *RegisterAllocator, reg: asm_ast.Reg) void {
78 | for (&self.registers) |*self_reg| {
79 | if (self_reg.*.reg == reg) {
80 | self_reg.assigned = false;
81 | self_reg.variable = null;
82 | self_reg.expiration = null;
83 | }
84 | }
85 | }
86 |
87 | fn updateRegisters(self: *RegisterAllocator, line: usize) void {
88 | for (&self.registers) |*reg| {
89 | if (reg.assigned and reg.expiration != null and reg.expiration.? <= line) {
90 | reg.assigned = false;
91 | reg.variable = null;
92 | reg.expiration = null;
93 | }
94 | }
95 | }
96 |
97 | pub fn scanFunction(self: *RegisterAllocator, function: c_ast.FunctionDeclaration) !void {
98 | if (function.body == null) @panic("performing linear scan on a function without a body");
99 |
100 | self.intervals = std.StringHashMap(Interval).init(self.allocator);
101 |
102 | for (function.params) |param| {
103 | try self.intervals.put(param, .{
104 | .start = 0,
105 | .end = 0,
106 | });
107 | }
108 |
109 | for (function.body.?.block_items) |block_item| {
110 | switch (block_item) {
111 | .statement => |statement| {
112 | try self.scanStatement(statement);
113 | },
114 | .declaration => {
115 | try self.scanDeclaration(block_item.declaration);
116 | },
117 | }
118 |
119 | self.line += 1;
120 | }
121 | }
122 |
123 | fn scanVariable(self: *RegisterAllocator, variable: []const u8) !void {
124 | if (self.intervals.getPtr(variable)) |interval| {
125 | interval.end = self.line;
126 | } else {
127 | try self.intervals.put(variable, Interval{
128 | .start = self.line,
129 | .end = self.line,
130 | });
131 | }
132 | }
133 |
134 | fn scanStatement(self: *RegisterAllocator, statement: c_ast.Statement) !void {
135 | switch (statement) {
136 | .ret => |ret| {
137 | _ = ret;
138 | },
139 | .exp => |exp| {
140 | try self.scanExpression(exp);
141 | },
142 | .if_ => |if_| {
143 | if (if_.else_) |else_| try self.scanStatement(else_.*);
144 | try self.scanExpression(if_.condition);
145 | try self.scanStatement(if_.then.*);
146 | },
147 | .compound => |compound| {
148 | for (compound.block_items) |block_item| {
149 | switch (block_item) {
150 | .statement => {
151 | try self.scanStatement(block_item.statement);
152 | },
153 | .declaration => {
154 | try self.scanDeclaration(block_item.declaration);
155 | },
156 | }
157 | }
158 | },
159 | .do_while => |do_while| {
160 | try self.scanStatement(do_while.body.*);
161 | try self.scanExpression(do_while.condition);
162 | },
163 | .for_ => |for_| {
164 | if (for_.condition) |condition| try self.scanExpression(condition);
165 | if (for_.post) |post| try self.scanExpression(post);
166 | try self.scanStatement(for_.body.*);
167 | if (for_.init.init_exp) |init_exp| try self.scanExpression(init_exp);
168 | try self.scanVariable(for_.init.init_decl.identifier);
169 | },
170 | .while_ => |while_| {
171 | _ = while_;
172 | },
173 | else => {},
174 | }
175 | }
176 |
177 | fn scanDeclaration(self: *RegisterAllocator, declaration: c_ast.Declaration) !void {
178 | switch (declaration) {
179 | .variable_declaration => |variable_declaration| {
180 | try self.scanVariable(variable_declaration.identifier);
181 | if (variable_declaration.initial) |initial| try self.scanExpression(initial);
182 | },
183 | else => {},
184 | }
185 | }
186 |
187 | fn scanExpression(self: *RegisterAllocator, exp: c_ast.Expression) anyerror!void {
188 | switch (exp) {
189 | .assignment => |assignment| {
190 | try self.scanExpression(assignment.left.*);
191 | try self.scanExpression(assignment.right.*);
192 | },
193 | .variable => |variable| {
194 | try self.scanVariable(variable.identifier);
195 | },
196 | .binary => |binary| {
197 | try self.scanExpression(binary.left.*);
198 | try self.scanExpression(binary.right.*);
199 | },
200 | .function_call => |function_call| {
201 | for (function_call.args) |arg| {
202 | try self.scanExpression(arg.*);
203 | }
204 | },
205 | else => {},
206 | }
207 | }
208 | };
209 |
--------------------------------------------------------------------------------
/compiler/src/prettyprinter.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("ast/c.zig");
3 |
4 | pub fn printExpression(exp: c_ast.Expression, indent: usize) void {
5 | const spaces = " " ** 64;
6 | std.debug.print("{s}", .{spaces[0..indent]});
7 | switch (exp) {
8 | .constant => |c| {
9 | std.debug.print("Constant: {}\n", .{c});
10 | },
11 | .binary => |b| {
12 | const op = switch (b.operator) {
13 | .Add => "+",
14 | .Subtract => "-",
15 | .Multiply => "*",
16 | .Divide => "/",
17 | .Remainder => "%",
18 | .Bitwise_AND => "&",
19 | .Bitwise_OR => "|",
20 | .Bitwise_XOR => "^",
21 | .Left_Shift => "<<",
22 | .Right_Shift => ">>",
23 | .Less => "<",
24 | .Less_Or_Equal => "<=",
25 | .Greater => ">",
26 | .Greater_Or_Equal => ">=",
27 | .Equal => "==",
28 | .Not_Equal => "!=",
29 | .And => "&&",
30 | .Or => "||",
31 | };
32 | std.debug.print("Binary Op: {s}\n", .{op});
33 | printExpression(b.left.*, indent + 4);
34 | printExpression(b.right.*, indent + 4);
35 | },
36 | .variable => |v| {
37 | std.debug.print("Variable: {s}\n", .{v.identifier});
38 | },
39 | .assignment => |a| {
40 | std.debug.print("Assignment:\n", .{});
41 | printExpression(a.left.*, indent + 4);
42 | printExpression(a.right.*, indent + 4);
43 | },
44 | .function_call => |fc| {
45 | std.debug.print("Function Call: {s}()\n", .{fc.identifier});
46 | for (fc.args) |arg| {
47 | printExpression(arg.*, indent + 4);
48 | }
49 | },
50 | }
51 | }
52 |
53 | pub fn printStatement(stmt: c_ast.Statement, indent: usize) void {
54 | const spaces = " " ** 64;
55 | std.debug.print("{s}", .{spaces[0..indent]});
56 | switch (stmt) {
57 | .ret => {
58 | std.debug.print("RETURN\n", .{});
59 | printExpression(stmt.ret.exp, indent + 2);
60 | },
61 | .exp => |expression| {
62 | printExpression(expression, indent);
63 | },
64 | .if_ => {
65 | std.debug.print("If\n", .{});
66 | printExpression(stmt.if_.condition, indent + 2);
67 | std.debug.print("{s}Then\n", .{spaces[0..indent]});
68 | printStatement(stmt.if_.then.*, indent + 2);
69 | if (stmt.if_.else_ != null) {
70 | std.debug.print("{s}Else\n", .{spaces[0..indent]});
71 | printStatement(stmt.if_.else_.?.*, indent + 2);
72 | }
73 | },
74 | .compound => {
75 | std.debug.print("Compound body:\n", .{});
76 | for (stmt.compound.block_items) |item| {
77 | printBlockItem(item, indent + 2);
78 | }
79 | },
80 | .while_ => {
81 | std.debug.print("While", .{});
82 | if (stmt.while_.identifier) |id| {
83 | std.debug.print(" ({s})", .{id});
84 | }
85 | std.debug.print("\n", .{});
86 | printExpression(stmt.while_.condition, indent + 2);
87 | std.debug.print("{s}Body\n", .{spaces[0..indent]});
88 | printStatement(stmt.while_.body.*, indent + 2);
89 | },
90 | .do_while => {
91 | std.debug.print("DoWhile", .{});
92 | if (stmt.do_while.identifier) |id| {
93 | std.debug.print(" ({s})", .{id});
94 | }
95 | std.debug.print("\n", .{});
96 | printStatement(stmt.do_while.body.*, indent + 2);
97 | std.debug.print("{s}While\n", .{spaces[0..indent]});
98 | printExpression(stmt.do_while.condition, indent + 2);
99 | },
100 | .for_ => {
101 | std.debug.print("For", .{});
102 | if (stmt.for_.identifier) |id| {
103 | std.debug.print(" ({s})", .{id});
104 | }
105 | std.debug.print("\n", .{});
106 | std.debug.print("{s}Init:\n", .{spaces[0 .. indent + 2]});
107 | switch (stmt.for_.init) {
108 | .init_exp => |init_exp| {
109 | printExpression(init_exp.?, indent + 4);
110 | },
111 | .init_decl => |init_decl| {
112 | printDeclaration(.{ .variable_declaration = init_decl }, indent + 4);
113 | },
114 | }
115 | std.debug.print("{s}Condition:\n", .{spaces[0 .. indent + 2]});
116 | if (stmt.for_.condition) |condition| {
117 | printExpression(condition, indent + 4);
118 | } else {
119 | std.debug.print("{s}(none)\n", .{spaces[0 .. indent + 4]});
120 | }
121 | std.debug.print("{s}Post:\n", .{spaces[0 .. indent + 2]});
122 | if (stmt.for_.post) |post| {
123 | printExpression(post, indent + 4);
124 | } else {
125 | std.debug.print("{s}(none)\n", .{spaces[0 .. indent + 4]});
126 | }
127 | std.debug.print("{s}Body:\n", .{spaces[0 .. indent + 2]});
128 | printStatement(stmt.for_.body.*, indent + 4);
129 | },
130 | .break_ => {
131 | std.debug.print("Break", .{});
132 | if (stmt.break_.identifier) |id| {
133 | std.debug.print(" ({s})", .{id});
134 | }
135 | std.debug.print("\n", .{});
136 | },
137 | .continue_ => {
138 | std.debug.print("Continue", .{});
139 | if (stmt.continue_.identifier) |id| {
140 | std.debug.print(" ({s})", .{id});
141 | }
142 | std.debug.print("\n", .{});
143 | },
144 | }
145 | }
146 |
147 | pub fn printDeclaration(decl: c_ast.Declaration, indent: usize) void {
148 | const spaces = " " ** 64;
149 | switch (decl) {
150 | .variable_declaration => |var_decl| {
151 | std.debug.print("{s}Declaration: {s}\n", .{ spaces[0..indent], var_decl.identifier });
152 | if (var_decl.initial) |initial| {
153 | printExpression(initial, indent + 2);
154 | }
155 | },
156 | .function_declaration => |func_decl| {
157 | printFunction(func_decl, indent);
158 | },
159 | }
160 | }
161 |
162 | pub fn printBlockItem(item: c_ast.BlockItem, indent: usize) void {
163 | switch (item) {
164 | .statement => |stmt| printStatement(stmt, indent),
165 | .declaration => |decl| printDeclaration(decl, indent),
166 | }
167 | }
168 |
169 | pub fn printFunction(func: c_ast.FunctionDeclaration, indent: usize) void {
170 | const spaces = " " ** 64;
171 | std.debug.print("{s}Function: {s}\n", .{ spaces[0..indent], func.identifier });
172 |
173 | // Print parameters
174 | if (func.params.len > 0) {
175 | std.debug.print("{s}Parameters:\n", .{spaces[0..indent]});
176 | for (func.params) |param| {
177 | std.debug.print("{s}{s}\n", .{ spaces[0 .. indent + 2], param });
178 | }
179 | }
180 |
181 | // Print body if it exists
182 | if (func.body) |body| {
183 | std.debug.print("{s}Body:\n", .{spaces[0..indent]});
184 | for (body.block_items) |item| {
185 | printBlockItem(item, indent + 2);
186 | }
187 | }
188 | }
189 |
190 | pub fn printProgram(program: c_ast.Program) void {
191 | std.debug.print("Program:\n", .{});
192 | for (program.function) |function| {
193 | printFunction(function, 2);
194 | }
195 | }
196 |
--------------------------------------------------------------------------------
/compiler/src/frontend/semantic/type-checking.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("../../ast/c.zig");
3 | const diagnostics = @import("../../diagnostics.zig");
4 |
5 | const Symbol = struct { defined: ?bool, type_: Type };
6 |
7 | const Type = union(enum) {
8 | int,
9 | function: Function,
10 | };
11 |
12 | const Function = struct {
13 | length: usize,
14 | };
15 |
16 | pub const TypeChecking = struct {
17 | allocator: std.mem.Allocator,
18 | symbols: std.StringHashMap(Symbol),
19 |
20 | pub fn init(allocator: std.mem.Allocator) TypeChecking {
21 | return .{
22 | .allocator = allocator,
23 | .symbols = std.StringHashMap(Symbol).init(allocator),
24 | };
25 | }
26 |
27 | pub fn check(self: *TypeChecking, program: c_ast.Program) !c_ast.Program {
28 | for (program.function) |function| {
29 | _ = try self.checkFunctionDeclaration(function);
30 | }
31 | return program;
32 | }
33 |
34 | fn checkFunctionDeclaration(self: *TypeChecking, function: c_ast.FunctionDeclaration) !c_ast.FunctionDeclaration {
35 | const type_ = Function{ .length = function.params.len };
36 | const has_body = function.body != null;
37 |
38 | var already_defined = false;
39 |
40 | if (self.symbols.get(function.identifier)) |symbol| {
41 | const old_function = symbol;
42 |
43 | switch (old_function.type_) {
44 | .function => {
45 | if (!std.mem.eql(u8, std.mem.asBytes(&type_), std.mem.asBytes(&old_function.type_.function))) {
46 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Incompatible declarations for function {s}", .{function.identifier});
47 | diagnostics.addError(msg, null);
48 | return error.IncompatibleFunctionDeclarations;
49 | }
50 | },
51 | else => {
52 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Incompatible declarations for function {s}", .{function.identifier});
53 | diagnostics.addError(msg, null);
54 | return error.IncompatibleFunctionDeclarations;
55 | },
56 | }
57 |
58 | already_defined = old_function.defined.?;
59 |
60 | if (already_defined and has_body) {
61 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Function already defined {s}", .{function.identifier});
62 | diagnostics.addError(msg, null);
63 | return error.FunctionAlreadyDefined;
64 | }
65 | }
66 |
67 | try self.symbols.put(function.identifier, Symbol{
68 | .type_ = .{
69 | .function = type_,
70 | },
71 | .defined = already_defined or has_body,
72 | });
73 |
74 | if (has_body) {
75 | for (function.params) |param| {
76 | try self.symbols.put(param, Symbol{
77 | .type_ = .int,
78 | .defined = null,
79 | });
80 | }
81 |
82 | _ = try self.checkBlock(function.body.?);
83 | }
84 |
85 | return function;
86 | }
87 |
88 | fn checkVariableDeclaration(self: *TypeChecking, variable: c_ast.VariableDeclaration) !c_ast.VariableDeclaration {
89 | try self.symbols.put(
90 | variable.identifier,
91 | .{
92 | .type_ = .int,
93 | .defined = null,
94 | },
95 | );
96 |
97 | if (variable.initial != null) {
98 | _ = try self.checkExpression(variable.initial.?);
99 | }
100 |
101 | return variable;
102 | }
103 |
104 | fn checkBlock(self: *TypeChecking, block: c_ast.Block) anyerror!c_ast.Block {
105 | const result = block;
106 |
107 | for (block.block_items) |block_item| {
108 | switch (block_item) {
109 | .declaration => |decl| {
110 | _ = try self.checkDeclaration(decl);
111 | },
112 | .statement => |stmt| {
113 | _ = try self.checkStatement(stmt);
114 | },
115 | }
116 | }
117 |
118 | return result;
119 | }
120 |
121 | fn checkDeclaration(self: *TypeChecking, declaration: c_ast.Declaration) anyerror!c_ast.Declaration {
122 | var result = declaration;
123 | switch (declaration) {
124 | .function_declaration => |func_decl| result.function_declaration = try self.checkFunctionDeclaration(func_decl),
125 | .variable_declaration => |var_decl| result.variable_declaration = try self.checkVariableDeclaration(var_decl),
126 | }
127 | return result;
128 | }
129 |
130 | fn checkStatement(self: *TypeChecking, statement: c_ast.Statement) !c_ast.Statement {
131 | switch (statement) {
132 | .compound => |compound| {
133 | _ = try self.checkBlock(compound);
134 | },
135 | .do_while => |do_while| {
136 | _ = try self.checkExpression(do_while.condition);
137 | _ = try self.checkStatement(do_while.body.*);
138 | },
139 | .exp => |exp| {
140 | _ = try self.checkExpression(exp);
141 | },
142 | .for_ => |for_| {
143 | _ = try self.checkStatement(for_.body.*);
144 | if (for_.condition != null) _ = try self.checkExpression(for_.condition.?);
145 | if (for_.post != null) _ = try self.checkExpression(for_.post.?);
146 | switch (for_.init) {
147 | .init_decl => _ = try self.checkVariableDeclaration(for_.init.init_decl),
148 | .init_exp => {
149 | if (for_.init.init_exp != null) _ = try self.checkExpression(for_.init.init_exp.?);
150 | },
151 | }
152 | },
153 | .if_ => |if_| {
154 | _ = try self.checkExpression(if_.condition);
155 | _ = try self.checkStatement(if_.then.*);
156 | if (if_.else_ != null) _ = try self.checkStatement(if_.else_.?.*);
157 | },
158 | .ret => |ret| {
159 | _ = try self.checkExpression(ret.exp);
160 | },
161 | .while_ => |while_| {
162 | _ = try self.checkExpression(while_.condition);
163 | _ = try self.checkStatement(while_.body.*);
164 | },
165 | else => {}, //nothing to check
166 | }
167 |
168 | return statement;
169 | }
170 |
171 | fn checkExpression(self: *TypeChecking, expression: c_ast.Expression) !c_ast.Expression {
172 | switch (expression) {
173 | .variable => {
174 | if (self.symbols.get(expression.variable.identifier).?.type_ != .int) {
175 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Function name {s} used as a variable", .{expression.variable.identifier});
176 | diagnostics.addError(msg, null);
177 | return error.FunctionUsedAsVariable;
178 | }
179 | },
180 | .function_call => {
181 | const type_ = self.symbols.get(expression.function_call.identifier).?.type_;
182 |
183 | if (type_ == .int) {
184 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Variable {s} used as function name", .{expression.function_call.identifier});
185 | diagnostics.addError(msg, null);
186 | return error.VariableUsedAsFunction;
187 | }
188 | if (type_.function.length != expression.function_call.args.len) {
189 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Function {s} called with wrong number of arguments", .{expression.function_call.identifier});
190 | diagnostics.addError(msg, null);
191 | return error.FunctionCallWrongArguments;
192 | }
193 | for (expression.function_call.args) |arg| {
194 | _ = try self.checkExpression(arg.*);
195 | }
196 | },
197 | else => {},
198 | }
199 |
200 | return expression;
201 | }
202 | };
203 |
--------------------------------------------------------------------------------
/compiler/src/testing.zig:
--------------------------------------------------------------------------------
1 | pub usingnamespace @import("std").testing;
2 | const std = @import("std");
3 | const c_ast = @import("ast/c.zig");
4 | const Lexer = @import("frontend/lexer.zig").Lexer;
5 | const Parser = @import("frontend/parser.zig").Parser;
6 | const SemanticAnalysis = @import("frontend/semantic-analysis.zig").SemanticAnalysis;
7 | const Generator = @import("middleend/gen.zig").Generator;
8 | const Emitter = @import("backend/emission.zig").Emitter;
9 |
10 | pub fn cToSemanticAnalysis(input: []const u8) !c_ast.Program {
11 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
12 | defer arena.deinit(); // This single call will free all allocations at once
13 | const allocator = arena.allocator();
14 |
15 | var lexer = Lexer.init(allocator, input);
16 | lexer.scan();
17 |
18 | var parser = Parser.init(lexer.tokens.items, allocator);
19 | const program_definition = try parser.parse();
20 |
21 | var semantic = SemanticAnalysis.init(allocator);
22 | return try semantic.analyze(program_definition);
23 | }
24 |
25 | pub fn cToAST(input: []const u8) !c_ast.Program {
26 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
27 | defer arena.deinit();
28 | const allocator = arena.allocator();
29 |
30 | var lexer = Lexer.init(allocator, input);
31 | lexer.scan();
32 |
33 | var parser = Parser.init(lexer.tokens.items, allocator);
34 | return try parser.parse();
35 | }
36 |
37 | pub fn generate(input: []const u8, allocator: std.mem.Allocator) ![]const u8 {
38 | var lexer = Lexer.init(allocator, input);
39 | lexer.scan();
40 |
41 | var parser = Parser.init(lexer.tokens.items, allocator);
42 | const program_definition = try parser.parse();
43 |
44 | var semantic = SemanticAnalysis.init(allocator);
45 | const analyzed_program = try semantic.analyze(program_definition);
46 |
47 | var generator = Generator.init(analyzed_program, allocator);
48 | const generated_program = try generator.generate();
49 |
50 | var emitter = Emitter.init(generated_program);
51 | return try emitter.getAssemblyString(allocator);
52 | }
53 |
54 | fn runShellCommand(allocator: std.mem.Allocator, command: []const u8) ![]u8 {
55 | const max_output_size = 1024 * 1024;
56 |
57 | var child = std.process.Child.init(&[_][]const u8{ "sh", "-c", command }, allocator);
58 |
59 | child.stdout_behavior = .Pipe;
60 | child.stderr_behavior = .Pipe;
61 |
62 | try child.spawn();
63 |
64 | var stdout_buffer = try allocator.alloc(u8, max_output_size);
65 | errdefer allocator.free(stdout_buffer);
66 | var stdout_len: usize = 0;
67 |
68 | var stderr_buffer = try allocator.alloc(u8, max_output_size);
69 | defer allocator.free(stderr_buffer);
70 | var stderr_len: usize = 0;
71 |
72 | while (true) {
73 | const bytes_read = try child.stdout.?.read(stdout_buffer[stdout_len..]);
74 | if (bytes_read == 0) break;
75 | stdout_len += bytes_read;
76 | if (stdout_len >= max_output_size) break;
77 | }
78 |
79 | while (true) {
80 | const bytes_read = try child.stderr.?.read(stderr_buffer[stderr_len..]);
81 | if (bytes_read == 0) break;
82 | stderr_len += bytes_read;
83 | if (stderr_len >= max_output_size) break;
84 | }
85 |
86 | const term = try child.wait();
87 |
88 | if (term.Exited != 0) {
89 | std.debug.print("Test failed\n", .{});
90 | return error.TestFailed;
91 | }
92 |
93 | return allocator.realloc(stdout_buffer, stdout_len);
94 | }
95 |
96 | pub const WireCheck = struct {
97 | tick: u32,
98 | wire: []const u8,
99 | value: u32,
100 | };
101 |
102 | pub fn testWithSystemVerilog(
103 | test_name: []const u8,
104 | c_code: []const u8,
105 | wire_checks: []const WireCheck,
106 | ) !void {
107 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
108 | defer arena.deinit();
109 | const allocator = arena.allocator();
110 |
111 | try std.fs.cwd().makePath("../temp");
112 |
113 | const assembly = try generate(c_code, allocator);
114 |
115 | const asm_path = try std.fmt.allocPrint(allocator, "../temp/{s}.asm", .{test_name});
116 | defer allocator.free(asm_path);
117 | const asm_file = try std.fs.cwd().createFile(asm_path, .{});
118 | defer asm_file.close();
119 | try asm_file.writeAll(assembly);
120 |
121 | const tb_content = try generateTestbench(test_name, wire_checks, allocator);
122 |
123 | const tb_path = try std.fmt.allocPrint(allocator, "../temp/{s}.sv", .{test_name});
124 | defer allocator.free(tb_path);
125 | const tb_file = try std.fs.cwd().createFile(tb_path, .{});
126 | defer tb_file.close();
127 | try tb_file.writeAll(tb_content);
128 |
129 | {
130 | const assemble_cmd = try std.fmt.allocPrint(allocator, "cd .. && cd assembler && zig build run -- \"../temp/{s}.asm\" ../program", .{test_name});
131 | defer allocator.free(assemble_cmd);
132 | _ = try runShellCommand(allocator, assemble_cmd);
133 | }
134 |
135 | {
136 | const iverilog_cmd = try std.fmt.allocPrint(allocator, "cd .. && iverilog -g2012 \"temp/{s}\" src/**.sv -o temp_output.vvp", .{tb_path});
137 | defer allocator.free(iverilog_cmd);
138 | _ = try runShellCommand(allocator, iverilog_cmd);
139 | }
140 |
141 | {
142 | const vvp_output = runShellCommand(allocator, "cd .. && vvp temp_output.vvp") catch |err| {
143 | const temp_out_path = try std.fmt.allocPrint(allocator, "../temp_output.vvp", .{});
144 | defer allocator.free(temp_out_path);
145 | std.fs.cwd().deleteFile(temp_out_path) catch {};
146 | return err;
147 | };
148 |
149 | const temp_out_path = try std.fmt.allocPrint(allocator, "../temp_output.vvp", .{});
150 | defer allocator.free(temp_out_path);
151 | std.fs.cwd().deleteFile(temp_out_path) catch {};
152 |
153 | defer allocator.free(vvp_output);
154 | }
155 |
156 | const vcd_path = try std.fmt.allocPrint(allocator, "../{s}.vcd", .{test_name});
157 | defer allocator.free(vcd_path);
158 | std.fs.cwd().deleteFile(vcd_path) catch {};
159 |
160 | const cwd = try std.process.getCwdAlloc(allocator);
161 |
162 | const rel_path = "../temp";
163 | const abs_path = try std.fs.path.resolve(allocator, &[_][]const u8{ cwd, rel_path });
164 |
165 | try std.fs.deleteTreeAbsolute(abs_path);
166 | }
167 |
168 | fn generateTestbench(
169 | test_name: []const u8,
170 | wire_checks: []const WireCheck,
171 | allocator: std.mem.Allocator,
172 | ) ![]const u8 {
173 | var tb = std.ArrayList(u8).init(allocator);
174 | defer tb.deinit();
175 |
176 | try tb.writer().print(
177 | \\module {s}_tb;
178 | \\ reg clk;
179 | \\top dut (.clk(clk));
180 | \\initial begin
181 | \\ clk = 0;
182 | \\forever #1 clk = ~clk;
183 | \\end
184 | \\initial begin
185 | \\$dumpfile("{s}.vcd");
186 | \\$dumpvars(0, dut);
187 | \\end
188 | \\initial begin
189 | \\
190 | , .{
191 | test_name,
192 | test_name,
193 | });
194 |
195 | var sorted_checks = std.ArrayList(WireCheck).init(allocator);
196 | defer sorted_checks.deinit();
197 |
198 | try sorted_checks.appendSlice(wire_checks);
199 |
200 | const Context = struct {
201 | pub fn lessThan(_: @This(), lhs: WireCheck, rhs: WireCheck) bool {
202 | return lhs.tick < rhs.tick;
203 | }
204 | };
205 | std.sort.insertion(WireCheck, sorted_checks.items, Context{}, Context.lessThan);
206 |
207 | var current_tick: u32 = 0;
208 | var first_check = true;
209 |
210 | for (sorted_checks.items) |check| {
211 | if (first_check or check.tick != current_tick) {
212 | if (!first_check) {
213 | try tb.appendSlice("\n");
214 | }
215 |
216 | try tb.writer().print("#{d};\n", .{check.tick});
217 | current_tick = check.tick;
218 | first_check = false;
219 | }
220 |
221 | try tb.writer().print(
222 | \\if (dut.{s} !== 32'd{d}) begin
223 | \\ $error("Alu {s} got wrong value: got %d, expected {d}",
224 | \\ dut.{s});
225 | \\ $fatal(1, "Test failed");
226 | \\end
227 | \\
228 | , .{
229 | check.wire,
230 | check.value,
231 | check.wire,
232 | check.value,
233 | check.wire,
234 | });
235 | }
236 |
237 | try tb.writer().print(
238 | \\$finish;
239 | \\end
240 | \\endmodule
241 | \\
242 | , .{});
243 |
244 | return tb.toOwnedSlice();
245 | }
246 |
--------------------------------------------------------------------------------
/compiler/src/frontend/semantic/identifier-resolution.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("../../ast/c.zig");
3 | const diagnostics = @import("../../diagnostics.zig");
4 |
5 | const MapEntry = struct {
6 | new_name: []const u8,
7 | from_current_scope: bool,
8 | has_linkage: bool,
9 | };
10 |
11 | pub const IdentifierResolution = struct {
12 | allocator: std.mem.Allocator,
13 | counter: usize,
14 |
15 | pub fn init(allocator: std.mem.Allocator) IdentifierResolution {
16 | return .{
17 | .allocator = allocator,
18 | .counter = 0,
19 | };
20 | }
21 |
22 | pub fn resolve(self: *IdentifierResolution, program: c_ast.Program) !c_ast.Program {
23 | var identifier_map = std.StringHashMap(MapEntry).init(self.allocator);
24 |
25 | var new_program = program;
26 | new_program = new_program;
27 |
28 | for (new_program.function) |*function| {
29 | function.* = try self.resolveFunctionDeclaration(function.*, &identifier_map);
30 | }
31 |
32 | return new_program;
33 | }
34 |
35 | fn resolveBlock(self: *IdentifierResolution, block: c_ast.Block, identifier_map: *std.StringHashMap(MapEntry)) anyerror!c_ast.Block {
36 | var new_block = block;
37 | new_block = new_block; // dig doesn't recognize that editing arrays with a pointer modifies an object, in this case new_block
38 | for (new_block.block_items) |*block_item| {
39 | switch (block_item.*) {
40 | .declaration => {
41 | block_item.declaration = try self.resolveDeclaration(block_item.declaration, identifier_map);
42 | },
43 | .statement => {
44 | block_item.statement = try self.resolveStatement(block_item.statement, identifier_map);
45 | },
46 | }
47 | }
48 | return new_block;
49 | }
50 |
51 | // resolves local variables and parameters. didn't name it properly to avoid long name
52 | fn resolveLocal(self: *IdentifierResolution, identifier: []const u8, identifier_map: *std.StringHashMap(MapEntry)) !MapEntry {
53 | if (identifier_map.get(identifier)) |entry| {
54 | if (entry.from_current_scope) {
55 | const err_msg = try std.fmt.allocPrint(self.allocator, "Duplicate identifier definition: {s}", .{identifier});
56 | diagnostics.addError(err_msg, null);
57 | return error.DuplicateDefinition;
58 | }
59 | }
60 |
61 | const unique_name = try std.fmt.allocPrint(self.allocator, "var_{d}", .{self.counter});
62 | self.counter += 1;
63 |
64 | return MapEntry{
65 | .from_current_scope = true,
66 | .new_name = unique_name,
67 | .has_linkage = true,
68 | };
69 | }
70 |
71 | fn resolveVariableDeclaration(self: *IdentifierResolution, declaration: c_ast.VariableDeclaration, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.VariableDeclaration {
72 | var result = declaration;
73 |
74 | const entry = try self.resolveLocal(result.identifier, identifier_map);
75 |
76 | try identifier_map.put(
77 | result.identifier,
78 | entry,
79 | );
80 |
81 | result.identifier = entry.new_name;
82 |
83 | if (result.initial != null) {
84 | result.initial = try self.resolveExp(result.initial.?, identifier_map);
85 | }
86 |
87 | return result;
88 | }
89 |
90 | fn resolveFunctionDeclaration(self: *IdentifierResolution, declaration: c_ast.FunctionDeclaration, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.FunctionDeclaration {
91 | var result = declaration;
92 |
93 | if (identifier_map.get(result.identifier)) |entry| {
94 | if (entry.from_current_scope == true and entry.has_linkage == false) @panic("Duplicate declaration");
95 | }
96 |
97 | try identifier_map.put(
98 | result.identifier,
99 | MapEntry{
100 | .from_current_scope = true,
101 | .new_name = result.identifier,
102 | .has_linkage = true,
103 | },
104 | );
105 |
106 | var inner_map = try self.cloneVariableMap(identifier_map);
107 | var new_params = std.ArrayList([]const u8).init(self.allocator);
108 | for (result.params) |param| {
109 | const entry = try self.resolveLocal(param, &inner_map);
110 | try new_params.append(entry.new_name);
111 | try inner_map.put(param, entry);
112 | }
113 | result.params = try new_params.toOwnedSlice();
114 |
115 | if (result.body != null) {
116 | result.body = try self.resolveBlock(result.body.?, &inner_map);
117 | }
118 |
119 | return result;
120 | }
121 |
122 | fn resolveDeclaration(self: *IdentifierResolution, declaration: c_ast.Declaration, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.Declaration {
123 | switch (declaration) {
124 | .variable_declaration => {
125 | return .{ .variable_declaration = try self.resolveVariableDeclaration(declaration.variable_declaration, identifier_map) };
126 | },
127 | else => {
128 | return .{ .function_declaration = try self.resolveFunctionDeclaration(declaration.function_declaration, identifier_map) };
129 | },
130 | }
131 | }
132 |
133 | fn cloneVariableMap(self: *IdentifierResolution, identifier_map: *std.StringHashMap(MapEntry)) !std.StringHashMap(MapEntry) {
134 | _ = self;
135 | var new_map = std.StringHashMap(MapEntry).init(identifier_map.allocator);
136 |
137 | var iterator = identifier_map.iterator();
138 | while (iterator.next()) |entry| {
139 | var new_entry = entry.value_ptr.*;
140 | new_entry.from_current_scope = false;
141 | try new_map.put(entry.key_ptr.*, new_entry);
142 | }
143 |
144 | return new_map;
145 | }
146 |
147 | fn resolveStatement(self: *IdentifierResolution, statement: c_ast.Statement, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.Statement {
148 | var result = statement;
149 | switch (result) {
150 | .exp => {
151 | result.exp = try self.resolveExp(result.exp, identifier_map);
152 | },
153 | .ret => {
154 | result.ret.exp = try self.resolveExp(result.ret.exp, identifier_map);
155 | },
156 | .if_ => {
157 | var else_: ?*c_ast.Statement = null;
158 | if (result.if_.else_ != null) {
159 | const resolved_else = try self.resolveStatement(result.if_.else_.?.*, identifier_map);
160 | else_ = try self.allocator.create(c_ast.Statement);
161 | else_.?.* = resolved_else;
162 | }
163 |
164 | const resolved_then = try self.resolveStatement(result.if_.then.*, identifier_map);
165 | const then_statement = try self.allocator.create(c_ast.Statement);
166 | then_statement.* = resolved_then;
167 |
168 | result.if_ = .{
169 | .condition = try self.resolveExp(result.if_.condition, identifier_map),
170 | .then = then_statement,
171 | .else_ = else_,
172 | };
173 | },
174 | .compound => {
175 | var new_map = try self.cloneVariableMap(identifier_map);
176 | result.compound = try self.resolveBlock(result.compound, &new_map);
177 | },
178 | .break_ => {},
179 | .continue_ => {},
180 | .do_while => |*do_while| {
181 | const body = try self.resolveStatement(do_while.*.body.*, identifier_map);
182 | const body_ptr = try self.allocator.create(c_ast.Statement);
183 | body_ptr.* = body;
184 |
185 | do_while.body = body_ptr;
186 | do_while.condition = try self.resolveExp(do_while.*.condition, identifier_map);
187 | },
188 | .for_ => |*for_| {
189 | switch (for_.init) {
190 | .init_decl => {
191 | const decl = try self.resolveDeclaration(.{ .variable_declaration = for_.init.init_decl }, identifier_map);
192 | for_.init.init_decl = decl.variable_declaration;
193 | },
194 | .init_exp => {
195 | if (for_.init.init_exp != null) {
196 | for_.init.init_exp = try self.resolveExp(for_.init.init_exp.?, identifier_map);
197 | }
198 | },
199 | }
200 |
201 | if (for_.condition != null) {
202 | for_.condition = try self.resolveExp(for_.*.condition.?, identifier_map);
203 | }
204 |
205 | if (for_.post != null) {
206 | for_.post = try self.resolveExp(for_.*.post.?, identifier_map);
207 | }
208 |
209 | const body = try self.resolveStatement(for_.*.body.*, identifier_map);
210 | const body_ptr = try self.allocator.create(c_ast.Statement);
211 | body_ptr.* = body;
212 |
213 | for_.body = body_ptr;
214 | },
215 | .while_ => |*while_| {
216 | const body = try self.resolveStatement(while_.*.body.*, identifier_map);
217 | const body_ptr = try self.allocator.create(c_ast.Statement);
218 | body_ptr.* = body;
219 |
220 | while_.body = body_ptr;
221 | while_.condition = try self.resolveExp(while_.*.condition, identifier_map);
222 | },
223 | }
224 | return result;
225 | }
226 |
227 | fn resolveExp(self: *IdentifierResolution, expression: c_ast.Expression, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.Expression {
228 | var result = expression;
229 | switch (result) {
230 | .variable => {
231 | result.variable.identifier = identifier_map.get(result.variable.identifier).?.new_name;
232 | },
233 | .binary => {
234 | result.binary.left.* = try self.resolveExp(result.binary.left.*, identifier_map);
235 | result.binary.right.* = try self.resolveExp(result.binary.right.*, identifier_map);
236 | },
237 | .assignment => {
238 | if (result.assignment.left.* != .variable) @panic("Invalid lvalue");
239 | result.assignment.left.* = try self.resolveExp(result.assignment.left.*, identifier_map);
240 | result.assignment.right.* = try self.resolveExp(result.assignment.right.*, identifier_map);
241 | },
242 | .constant => {},
243 | .function_call => {
244 | if (identifier_map.get(result.function_call.identifier)) |entry| {
245 | const new_name = entry.new_name;
246 | var new_args = std.ArrayList(*c_ast.Expression).init(self.allocator);
247 | for (result.function_call.args) |arg| {
248 | const expr = try self.allocator.create(c_ast.Expression);
249 | expr.* = try self.resolveExp(arg.*, identifier_map);
250 | try new_args.append(expr);
251 | }
252 | return .{
253 | .function_call = .{
254 | .identifier = new_name,
255 | .args = try new_args.toOwnedSlice(),
256 | },
257 | };
258 | } else {
259 | const err_msg = try std.fmt.allocPrint(self.allocator, "Undeclared function: {s}", .{result.function_call.identifier});
260 | diagnostics.addError(err_msg, null);
261 | return error.UndeclaredFunction;
262 | }
263 | },
264 | }
265 | return result;
266 | }
267 | };
268 |
--------------------------------------------------------------------------------
/compiler/src/main.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const builtin = @import("builtin");
3 | const testing = @import("testing.zig");
4 | const Lexer = @import("frontend/lexer.zig").Lexer;
5 | const Parser = @import("frontend/parser.zig").Parser;
6 | const SemanticAnalysis = @import("frontend/semantic-analysis.zig").SemanticAnalysis;
7 | const Generator = @import("middleend/gen.zig").Generator;
8 | const Emitter = @import("backend/emission.zig").Emitter;
9 | const diagnostics = @import("diagnostics.zig");
10 | const prettyprinter = @import("prettyprinter.zig");
11 |
12 | pub fn main() !void {
13 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
14 | defer arena.deinit();
15 | defer diagnostics.arena.deinit();
16 | const allocator = arena.allocator();
17 |
18 | const debug_str = std.process.getEnvVarOwned(allocator, "DEBUG") catch "";
19 | defer if (debug_str.len > 0) allocator.free(debug_str);
20 | const debug_value = if (debug_str.len > 0)
21 | std.fmt.parseInt(u8, debug_str, 10) catch 0
22 | else
23 | 0;
24 |
25 | const args = try std.process.argsAlloc(allocator);
26 | if (args.len != 2) {
27 | std.debug.print("Usage: {s} \n", .{args[0]});
28 | std.process.exit(1);
29 | }
30 |
31 | const file_path = args[1];
32 | if (!std.mem.endsWith(u8, file_path, ".c")) {
33 | std.debug.print("Error: File must have .c extension\n", .{});
34 | std.process.exit(1);
35 | }
36 |
37 | const file = try std.fs.cwd().openFile(file_path, .{});
38 | defer file.close();
39 |
40 | const file_size = try file.getEndPos();
41 | const source = try allocator.alloc(u8, file_size);
42 | const bytes_read = try file.readAll(source);
43 |
44 | if (bytes_read != file_size) {
45 | std.debug.print("Error: Could not read entire file\n", .{});
46 | std.process.exit(1);
47 | }
48 |
49 | const assembly: []const u8 = generate(source, allocator, debug_value) catch {
50 | diagnostics.printAll();
51 | std.process.exit(1);
52 | };
53 |
54 | const dirname = std.fs.path.dirname(file_path) orelse ".";
55 | const stem = std.fs.path.stem(file_path);
56 |
57 | var path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
58 | const output_path = try std.fmt.bufPrint(&path_buf, "{s}/{s}.asm", .{
59 | dirname,
60 | stem,
61 | });
62 |
63 | const out_file = try std.fs.cwd().createFile(
64 | output_path,
65 | .{},
66 | );
67 | defer out_file.close();
68 |
69 | try out_file.writeAll(assembly);
70 | }
71 |
72 | fn generate(input: []const u8, allocator: std.mem.Allocator, debug_value: i32) ![]const u8 {
73 | var lexer = Lexer.init(allocator, input);
74 | lexer.scan();
75 |
76 | if (debug_value == 1 and builtin.is_test == false) {
77 | std.debug.print("\n======== Tokens ========\n", .{});
78 | for (lexer.tokens.items) |token| {
79 | std.debug.print("{s} '{?}' at line {d}\n", .{
80 | @tagName(token.type),
81 | token.literal,
82 | token.line,
83 | });
84 | }
85 | std.debug.print("========================\n", .{});
86 | }
87 |
88 | var parser = Parser.init(lexer.tokens.items, allocator);
89 | const program_definition = try parser.parse();
90 |
91 | if (debug_value == 1 and builtin.is_test == false) {
92 | std.debug.print("\n======== Program ========\n", .{});
93 | prettyprinter.printProgram(program_definition);
94 | std.debug.print("===========================\n", .{});
95 | }
96 |
97 | var semantic = SemanticAnalysis.init(allocator);
98 | const analyzed_program_definition = semantic.analyze(program_definition) catch |err| {
99 | return err;
100 | };
101 |
102 | if (debug_value == 1 and builtin.is_test == false) {
103 | std.debug.print("\n=== Semantic analysis ===\n", .{});
104 | prettyprinter.printProgram(analyzed_program_definition);
105 | std.debug.print("===========================\n", .{});
106 | }
107 |
108 | var generator = Generator.init(analyzed_program_definition, allocator);
109 | const program = try generator.generate();
110 |
111 | var emitter = Emitter.init(program);
112 |
113 | return try emitter.getAssemblyString(allocator);
114 | }
115 |
116 | test "basic addition" {
117 | const input =
118 | \\int main()
119 | \\{
120 | \\ return 2 + 6;
121 | \\}
122 | ;
123 |
124 | const check = testing.WireCheck{
125 | .tick = 10,
126 | .wire = "alu_inst.rd_data",
127 | .value = 8,
128 | };
129 |
130 | try testing.testWithSystemVerilog("basic_addition", input, &[_]testing.WireCheck{check});
131 | }
132 |
133 | test "basic precedence" {
134 | const input =
135 | \\int main()
136 | \\{
137 | \\ return 10 - 6 + 2 * 4;
138 | \\}
139 | ;
140 |
141 | const check = testing.WireCheck{
142 | .tick = 18,
143 | .wire = "alu_inst.rd_data",
144 | .value = 12,
145 | };
146 |
147 | try testing.testWithSystemVerilog("basic_precedence", input, &[_]testing.WireCheck{check});
148 | }
149 |
150 | test "basic precedence 2" {
151 | const input =
152 | \\int main()
153 | \\{
154 | \\ return 20 - 6 * (4 - 2);
155 | \\}
156 | ;
157 |
158 | const check = testing.WireCheck{
159 | .tick = 18,
160 | .wire = "alu_inst.rd_data",
161 | .value = 8,
162 | };
163 |
164 | try testing.testWithSystemVerilog("basic_precedence_2", input, &[_]testing.WireCheck{check});
165 | }
166 |
167 | test "and short circuit 1" {
168 | const input =
169 | \\int main()
170 | \\{
171 | \\ return 20 == 20 && 10 != 5;
172 | \\}
173 | ;
174 |
175 | const check = testing.WireCheck{
176 | .tick = 24,
177 | .wire = "instr_mem.addr",
178 | .value = 12,
179 | };
180 |
181 | try testing.testWithSystemVerilog("and_short_circuit_1", input, &[_]testing.WireCheck{check});
182 | }
183 |
184 | test "or short circuit 1" {
185 | const input =
186 | \\int main()
187 | \\{
188 | \\ return 3 > 3 || 19 != 19 || 4 <= 3 || 2 >= 2 || 2 != 2;
189 | \\}
190 | ;
191 |
192 | const check = testing.WireCheck{
193 | .tick = 50,
194 | .wire = "instr_mem.addr",
195 | .value = 30,
196 | };
197 |
198 | try testing.testWithSystemVerilog("or_short_circuit_1", input, &[_]testing.WireCheck{check});
199 | }
200 |
201 | test "and short circuit 2" {
202 | const input =
203 | \\int main()
204 | \\{
205 | \\ return 2 == 2 && 2 <= 1 && 5 > 3;
206 | \\}
207 | ;
208 |
209 | const check = testing.WireCheck{
210 | .tick = 26,
211 | .wire = "instr_mem.addr",
212 | .value = 16,
213 | };
214 |
215 | try testing.testWithSystemVerilog("and_short_circuit_2", input, &[_]testing.WireCheck{check});
216 | }
217 |
218 | test "variables 1" {
219 | const input =
220 | \\int main()
221 | \\{
222 | \\ int beh = 5 + 1;
223 | \\ int bah = beh - 1 * 2;
224 | \\ return bah + 8;
225 | \\}
226 | \\
227 | ;
228 |
229 | const check = testing.WireCheck{
230 | .tick = 38,
231 | .wire = "alu_inst.rd_data",
232 | .value = 12,
233 | };
234 |
235 | try testing.testWithSystemVerilog("variables_1", input, &[_]testing.WireCheck{check});
236 | }
237 |
238 | test "variables 2" {
239 | const input =
240 | \\int main()
241 | \\{
242 | \\ int beh = 5 + 1;
243 | \\ int bah = beh - 1 * 2;
244 | \\ int bumbam = beh + bah;
245 | \\ return bumbam + beh;
246 | \\}
247 | \\
248 | ;
249 |
250 | const check = testing.WireCheck{
251 | .tick = 54,
252 | .wire = "alu_inst.rd_data",
253 | .value = 16,
254 | };
255 |
256 | try testing.testWithSystemVerilog("variables_2", input, &[_]testing.WireCheck{check});
257 | }
258 |
259 | test "if 1" {
260 | const input =
261 | \\int main()
262 | \\{
263 | \\ int ab = 0;
264 | \\ if (ab != 0)
265 | \\ ab = 2;
266 | \\ else
267 | \\ ab = 19;
268 | \\}
269 | \\
270 | ;
271 |
272 | const check = testing.WireCheck{
273 | .tick = 34,
274 | .wire = "instr_mem.addr",
275 | .value = 21,
276 | };
277 |
278 | try testing.testWithSystemVerilog("if_1", input, &[_]testing.WireCheck{check});
279 | }
280 |
281 | test "if 2" {
282 | const input =
283 | \\int main()
284 | \\{
285 | \\ int ab = 0;
286 | \\ if (ab == 0)
287 | \\ if (ab != 0)
288 | \\ ab = 2;
289 | \\ else
290 | \\ ab = 7;
291 | \\ else
292 | \\ ab = 19;
293 | \\}
294 | \\
295 | ;
296 |
297 | const check = testing.WireCheck{
298 | .tick = 38,
299 | .wire = "instr_mem.addr",
300 | .value = 23,
301 | };
302 |
303 | try testing.testWithSystemVerilog("if_2", input, &[_]testing.WireCheck{check});
304 | }
305 |
306 | test "if 3" {
307 | const input =
308 | \\int main()
309 | \\{
310 | \\ int ab = 0;
311 | \\ if (ab != 0)
312 | \\ ab = 2;
313 | \\
314 | \\ ab = 62;
315 | \\}
316 | \\
317 | ;
318 |
319 | const check = testing.WireCheck{
320 | .tick = 26,
321 | .wire = "alu_inst.rd_data",
322 | .value = 62,
323 | };
324 |
325 | try testing.testWithSystemVerilog("if_3", input, &[_]testing.WireCheck{check});
326 | }
327 |
328 | test "compound if 1" {
329 | const input =
330 | \\int main()
331 | \\{
332 | \\ int ab = 0;
333 | \\ if (ab == 1)
334 | \\ {
335 | \\ ab = 5;
336 | \\ }
337 | \\ else if (ab == 2)
338 | \\ {
339 | \\ ab = 4;
340 | \\ }
341 | \\ else
342 | \\ {
343 | \\ ab = 3;
344 | \\ ab += 11;
345 | \\ }
346 | \\}
347 | \\
348 | ;
349 |
350 | const check = testing.WireCheck{
351 | .tick = 48,
352 | .wire = "alu_inst.rd_data",
353 | .value = 14,
354 | };
355 |
356 | try testing.testWithSystemVerilog("compund_if_1", input, &[_]testing.WireCheck{check});
357 | }
358 |
359 | test "multiple scopes variable resolution" {
360 | var arena = std.heap.ArenaAllocator.init(testing.allocator);
361 | defer arena.deinit();
362 | const allocator = arena.allocator();
363 | const input =
364 | \\int main()
365 | \\{
366 | \\ int x = 1;
367 | \\ {
368 | \\ int x = 2;
369 | \\ if (x > 1) {
370 | \\ x = 3;
371 | \\ int x = 4;
372 | \\ }
373 | \\ return x;
374 | \\ }
375 | \\ return x;
376 | \\}
377 | \\
378 | ;
379 |
380 | var lexer = Lexer.init(allocator, input);
381 | lexer.scan();
382 | var parser = Parser.init(lexer.tokens.items, allocator);
383 | const program_definition = try parser.parse();
384 |
385 | var semantic = SemanticAnalysis.init(allocator);
386 |
387 | const analyzed_program_definition = try semantic.analyze(program_definition);
388 |
389 | try std.testing.expectEqualStrings("var_0", analyzed_program_definition.function[0].body.?.block_items[0].declaration.variable_declaration.identifier);
390 | try std.testing.expectEqualStrings("var_1", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[0].declaration.variable_declaration.identifier);
391 | try std.testing.expectEqualStrings("var_1", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[1].statement.if_.then.compound.block_items[0].statement.exp.assignment.left.variable.identifier);
392 | try std.testing.expectEqualStrings("var_2", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[1].statement.if_.then.compound.block_items[1].declaration.variable_declaration.identifier);
393 | try std.testing.expectEqualStrings("var_1", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[2].statement.ret.exp.variable.identifier);
394 | try std.testing.expectEqualStrings("var_0", analyzed_program_definition.function[0].body.?.block_items[2].statement.ret.exp.variable.identifier);
395 | }
396 |
397 | test "loop labeling" {
398 | var arena = std.heap.ArenaAllocator.init(testing.allocator);
399 | defer arena.deinit();
400 | const allocator = arena.allocator();
401 | const input =
402 | \\int main()
403 | \\{
404 | \\ int a = 16;
405 | \\ int b = 2;
406 | \\ while (a > 0)
407 | \\ {
408 | \\ for (int i = 0; i < 10; i += 1)
409 | \\ {
410 | \\ if (i % 2 == 0)
411 | \\ continue;
412 | \\ a = a / 2;
413 | \\ }
414 | \\ if (a == b)
415 | \\ break;
416 | \\ }
417 | \\}
418 | \\
419 | ;
420 |
421 | var lexer = Lexer.init(allocator, input);
422 | lexer.scan();
423 | var parser = Parser.init(lexer.tokens.items, allocator);
424 | const program_definition = try parser.parse();
425 |
426 | var semantic = SemanticAnalysis.init(allocator);
427 | const analyzed_program_definition = try semantic.analyze(program_definition);
428 |
429 | try std.testing.expectEqualStrings("loop_0", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.identifier.?);
430 | try std.testing.expectEqualStrings("loop_1", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.body.compound.block_items[0].statement.for_.identifier.?);
431 | try std.testing.expectEqualStrings("loop_1", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.body.compound.block_items[0].statement.for_.body.compound.block_items[0].statement.if_.then.continue_.identifier.?);
432 | try std.testing.expectEqualStrings("loop_0", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.body.compound.block_items[1].statement.if_.then.*.break_.identifier.?);
433 | }
434 |
435 | test "while loop" {
436 | const input =
437 | \\int main()
438 | \\{
439 | \\ int a = 16;
440 | \\ while (a > 12)
441 | \\ {
442 | \\ a -= 2;
443 | \\ }
444 | \\}
445 | \\
446 | ;
447 |
448 | const check = testing.WireCheck{
449 | .tick = 70,
450 | .wire = "instr_mem.addr",
451 | .value = 18,
452 | };
453 |
454 | try testing.testWithSystemVerilog("while_loop", input, &[_]testing.WireCheck{check});
455 | }
456 |
457 | test "do while loop" {
458 | const input =
459 | \\int main()
460 | \\{
461 | \\ int a = 16;
462 | \\ do {
463 | \\ a -= 2;
464 | \\ } while (a > 12);
465 | \\}
466 | \\
467 | ;
468 |
469 | const check = testing.WireCheck{
470 | .tick = 56,
471 | .wire = "instr_mem.addr",
472 | .value = 17,
473 | };
474 |
475 | try testing.testWithSystemVerilog("do_while_loop", input, &[_]testing.WireCheck{check});
476 | }
477 |
478 | test "do while break loop" {
479 | const input =
480 | \\int main()
481 | \\{
482 | \\ int a = 16;
483 | \\ do
484 | \\ {
485 | \\ a -= 2;
486 | \\ if (a <= 12)
487 | \\ break;
488 | \\ } while (a > 10);
489 | \\}
490 | \\
491 | ;
492 |
493 | const check = testing.WireCheck{
494 | .tick = 72,
495 | .wire = "instr_mem.addr",
496 | .value = 24,
497 | };
498 |
499 | try testing.testWithSystemVerilog("do_while_break_loop", input, &[_]testing.WireCheck{check});
500 | }
501 |
502 | test "for continue loop" {
503 | const input =
504 | \\int main()
505 | \\{
506 | \\ int a = 16;
507 | \\ for (int i = 0; i < 4; i += 1)
508 | \\ {
509 | \\ if (i % 2 == 0)
510 | \\ {
511 | \\ a += 2;
512 | \\ continue;
513 | \\ }
514 | \\ a += 1;
515 | \\ }
516 | \\ a -= 10;
517 | \\}
518 | \\
519 | ;
520 |
521 | const check = testing.WireCheck{
522 | .tick = 246,
523 | .wire = "alu_inst.rd_data",
524 | .value = 12,
525 | };
526 |
527 | try testing.testWithSystemVerilog("for_continue_loop", input, &[_]testing.WireCheck{check});
528 | }
529 |
--------------------------------------------------------------------------------
/compiler/src/frontend/parser.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const tokens_script = @import("tokens.zig");
3 | const Token = tokens_script.Token;
4 | const TokenType = tokens_script.TokenType;
5 | const c_ast = @import("../ast/c.zig");
6 | const diagnostics = @import("../diagnostics.zig");
7 | const testing = @import("../testing.zig");
8 |
9 | pub const Parser = struct {
10 | tokens: []const Token,
11 | cursor: usize,
12 | allocator: std.mem.Allocator,
13 |
14 | pub fn init(tokens: []const Token, allocator: std.mem.Allocator) Parser {
15 | return Parser{
16 | .tokens = tokens,
17 | .cursor = 0,
18 | .allocator = allocator,
19 | };
20 | }
21 |
22 | pub fn parse(self: *Parser) !c_ast.Program {
23 | var function_array = std.ArrayList(c_ast.FunctionDeclaration).init(self.allocator);
24 | while (self.cursor < self.tokens.len - 1) {
25 | const func_decl = try self.parseDeclaration();
26 | try function_array.append(func_decl.function_declaration);
27 | self.cursor += 1;
28 | }
29 | return .{ .function = try function_array.toOwnedSlice() };
30 | }
31 |
32 | // TODO: can't start function with a left paren
33 | fn parseFunction(self: *Parser) !c_ast.FunctionDeclaration {
34 | try self.expect(.INT);
35 | self.cursor += 1;
36 | try self.expect(.IDENTIFIER);
37 |
38 | const identifier = self.curr().literal.?.string;
39 |
40 | self.cursor += 1;
41 | try self.expect(.LEFT_PAREN);
42 | self.cursor += 1;
43 |
44 | const params = try self.parseFunctionParams();
45 |
46 | try self.expect(.LEFT_BRACE);
47 | self.cursor += 1;
48 |
49 | const body = try self.parseBlock();
50 | self.cursor += 1;
51 |
52 | return .{
53 | .identifier = identifier,
54 | .params = params,
55 | .body = body,
56 | };
57 | }
58 |
59 | fn parseBlock(self: *Parser) !c_ast.Block {
60 | var function_body = std.ArrayList(c_ast.BlockItem).init(self.allocator);
61 |
62 | while (self.cursor < self.tokens.len - 1 and self.curr().type != .RIGHT_BRACE) {
63 | const block_item = try self.parseBlockItem();
64 | try function_body.append(block_item);
65 | self.cursor += 1;
66 | }
67 |
68 | return .{
69 | .block_items = try function_body.toOwnedSlice(),
70 | };
71 | }
72 |
73 | fn parseBlockItem(self: *Parser) !c_ast.BlockItem {
74 | switch (self.curr().type) {
75 | .INT => {
76 | return .{ .declaration = try self.parseDeclaration() };
77 | },
78 | else => return .{ .statement = try self.parseStatement() },
79 | }
80 | }
81 |
82 | fn parseDeclaration(self: *Parser) anyerror!c_ast.Declaration {
83 | try self.expect(.INT);
84 | self.cursor += 1;
85 | try self.expect(.IDENTIFIER);
86 |
87 | const identifier = self.curr().literal.?.string;
88 | self.cursor += 1;
89 |
90 | switch (self.curr().type) {
91 | .SEMICOLON => {
92 | return .{
93 | .variable_declaration = .{
94 | .identifier = identifier,
95 | .initial = null,
96 | },
97 | };
98 | },
99 | .LEFT_PAREN => {
100 | self.cursor += 1;
101 |
102 | const params = try self.parseFunctionParams();
103 |
104 | var body: ?c_ast.Block = null;
105 | if (self.curr().type != .SEMICOLON) {
106 | self.cursor += 1;
107 | body = try self.parseBlock();
108 | }
109 |
110 | return .{
111 | .function_declaration = c_ast.FunctionDeclaration{
112 | .identifier = identifier,
113 | .params = params,
114 | .body = body,
115 | },
116 | };
117 | },
118 | else => {
119 | try self.expect(.EQUAL);
120 | self.cursor += 1;
121 | const expression = try self.parseExpression(0);
122 |
123 | return .{ .variable_declaration = .{ .identifier = identifier, .initial = expression.* } };
124 | },
125 | }
126 | }
127 |
128 | // fn parseIf(self: *Parser) c_ast.If {}
129 |
130 | fn parseStatement(self: *Parser) anyerror!c_ast.Statement {
131 | switch (self.curr().type) {
132 | .RETURN => {
133 | self.cursor += 1;
134 |
135 | // ??????????????
136 | // ??????????????
137 | const expr_ptr = try self.parseExpression(0);
138 | const expr = expr_ptr.*;
139 | self.allocator.destroy(expr_ptr);
140 | return .{
141 | .ret = .{ .exp = expr },
142 | };
143 | },
144 | .IF => {
145 | self.cursor += 1;
146 | try self.expect(.LEFT_PAREN);
147 | self.cursor += 1;
148 | const condition = try self.parseExpression(0);
149 | try self.expect(.RIGHT_PAREN);
150 | self.cursor += 1;
151 |
152 | const then = try self.parseStatement();
153 | const then_ptr = try self.allocator.create(c_ast.Statement);
154 | then_ptr.* = then;
155 |
156 | var else_ptr: ?*c_ast.Statement = null;
157 |
158 | if (self.peek(1).type == .ELSE) {
159 | self.cursor += 2;
160 | const else_ = try self.parseStatement();
161 | else_ptr = try self.allocator.create(c_ast.Statement);
162 | else_ptr.?.* = else_;
163 | }
164 | return .{
165 | .if_ = .{
166 | .condition = condition.*,
167 | .then = then_ptr,
168 | .else_ = else_ptr,
169 | },
170 | };
171 | },
172 | .LEFT_BRACE => {
173 | self.cursor += 1;
174 | const block = try self.parseBlock();
175 | return .{
176 | .compound = block,
177 | };
178 | },
179 | .BREAK => {
180 | self.cursor += 1;
181 | return .{
182 | .break_ = .{ .identifier = null },
183 | };
184 | },
185 | .CONTINUE => {
186 | self.cursor += 1;
187 | return .{
188 | .continue_ = .{ .identifier = null },
189 | };
190 | },
191 | .WHILE => {
192 | self.cursor += 1;
193 | try self.expect(.LEFT_PAREN);
194 | const condition = try self.parseExpression(0);
195 | const body = try self.parseStatement();
196 | const body_ptr = try self.allocator.create(c_ast.Statement);
197 | body_ptr.* = body;
198 | return c_ast.Statement{
199 | .while_ = .{
200 | .body = body_ptr,
201 | .identifier = null,
202 | .condition = condition.*,
203 | },
204 | };
205 | },
206 | .DO => {
207 | self.cursor += 1;
208 | const body = try self.parseStatement();
209 | const body_ptr = try self.allocator.create(c_ast.Statement);
210 | body_ptr.* = body;
211 |
212 | self.cursor += 1;
213 | try self.expect(.WHILE);
214 | self.cursor += 1;
215 | try self.expect(.LEFT_PAREN);
216 | const condition = try self.parseExpression(0);
217 |
218 | return c_ast.Statement{
219 | .do_while = .{
220 | .body = body_ptr,
221 | .condition = condition.*,
222 | .identifier = null,
223 | },
224 | };
225 | },
226 | .FOR => {
227 | self.cursor += 1;
228 | try self.expect(.LEFT_PAREN);
229 | self.cursor += 1;
230 |
231 | var for_init: c_ast.ForInit = undefined;
232 | if (self.curr().type == .INT) {
233 | const declaration = try self.parseDeclaration();
234 | for_init = .{ .init_decl = declaration.variable_declaration };
235 | } else {
236 | const expression = try self.parseExpression(0);
237 | for_init = .{ .init_exp = expression.* };
238 | }
239 |
240 | var condition: ?*c_ast.Expression = null;
241 | if (self.curr().type != .RIGHT_PAREN) {
242 | self.cursor += 1;
243 | condition = try self.parseExpression(0);
244 | }
245 |
246 | var post: ?*c_ast.Expression = null;
247 | if (self.curr().type != .RIGHT_PAREN) {
248 | self.cursor += 1;
249 | post = try self.parseExpression(0);
250 | }
251 |
252 | self.cursor += 1;
253 | const body = try self.parseStatement();
254 | const body_ptr = try self.allocator.create(c_ast.Statement);
255 | body_ptr.* = body;
256 |
257 | return c_ast.Statement{
258 | .for_ = .{
259 | .init = for_init,
260 | .condition = if (condition != null) condition.?.* else null,
261 | .post = if (post != null) post.?.* else null,
262 | .body = body_ptr,
263 | .identifier = null,
264 | },
265 | };
266 | },
267 | else => {
268 | const expr_ptr = try self.parseExpression(0);
269 | const expr = expr_ptr.*;
270 | self.allocator.destroy(expr_ptr);
271 | return .{
272 | .exp = expr,
273 | };
274 | },
275 | }
276 | }
277 |
278 | fn parseExpression(self: *Parser, min_prec: i16) anyerror!*c_ast.Expression {
279 | var left = try self.parseFactor();
280 |
281 | while (self.cursor < self.tokens.len and
282 | tokens_script.is_binary_operator(self.curr().type) and
283 | self.precedence(self.curr()) >= min_prec)
284 | {
285 | const curr_prec = self.precedence(self.curr());
286 |
287 | if (self.curr().type == .EQUAL) {
288 | self.cursor += 1;
289 | const right = try self.parseExpression(curr_prec);
290 |
291 | const new_expr = try self.allocator.create(c_ast.Expression);
292 | new_expr.* = .{
293 | .assignment = .{
294 | .left = left,
295 | .right = right,
296 | },
297 | };
298 |
299 | left = new_expr;
300 | }
301 | // check inplace operators
302 | //
303 | else if (tokens_script.is_in_place_starter(self.curr().type) == true and self.peek(-1).type == .IDENTIFIER and self.peek(1).type == .EQUAL) {
304 | const operator = self.parseBinop();
305 | self.cursor += 1;
306 | try self.expect(.EQUAL);
307 | self.cursor += 1;
308 |
309 | const variable_expr = try self.allocator.create(c_ast.Expression);
310 | variable_expr.* = .{
311 | .variable = .{ .identifier = self.peek(-3).literal.?.string },
312 | };
313 |
314 | const binary_expr = try self.allocator.create(c_ast.Expression);
315 | binary_expr.* = .{
316 | .binary = .{
317 | .operator = operator,
318 | .left = variable_expr,
319 | .right = try self.parseExpression(curr_prec),
320 | },
321 | };
322 |
323 | const new_expr = try self.allocator.create(c_ast.Expression);
324 | new_expr.* = .{
325 | .assignment = .{
326 | .left = left,
327 | .right = binary_expr,
328 | },
329 | };
330 |
331 | left = new_expr;
332 | } else {
333 | const operator = self.parseBinop();
334 | self.cursor += 1;
335 |
336 | const right = try self.parseExpression(curr_prec + 1);
337 |
338 | const new_expr = try self.allocator.create(c_ast.Expression);
339 | new_expr.* = .{
340 | .binary = .{
341 | .operator = operator,
342 | .left = left,
343 | .right = right,
344 | },
345 | };
346 |
347 | left = new_expr;
348 | }
349 | }
350 |
351 | return left;
352 | }
353 |
354 | fn parseFactor(self: *Parser) !*c_ast.Expression {
355 | var expr = try self.allocator.create(c_ast.Expression);
356 |
357 | switch (self.curr().type) {
358 | .NUMBER => {
359 | expr.* = .{
360 | .constant = self.curr().literal.?.number,
361 | };
362 | self.cursor += 1;
363 | },
364 | .LEFT_PAREN => {
365 | self.cursor += 1;
366 | const inner_expr = try self.parseExpression(0);
367 | try self.expect(.RIGHT_PAREN);
368 | self.cursor += 1;
369 |
370 | expr = inner_expr;
371 | },
372 | .IDENTIFIER => {
373 | switch (self.peek(1).type) {
374 | .LEFT_PAREN => {
375 | self.cursor += 2;
376 | expr.* = .{
377 | .function_call = c_ast.FunctionCall{
378 | .identifier = self.peek(-2).literal.?.string,
379 | .args = try self.parseFunctionArgs(),
380 | },
381 | };
382 | },
383 | else => {
384 | expr.* = .{
385 | .variable = .{
386 | .identifier = self.curr().literal.?.string,
387 | },
388 | };
389 | self.cursor += 1;
390 | },
391 | }
392 | },
393 | else => {
394 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Syntax error at line {}. Expected one of the following: NUMBER, LEFT_PAREN, IDENTIFIER. Got token type {}", .{
395 | self.curr().line,
396 | self.curr().type,
397 | });
398 | diagnostics.addError(msg, self.curr().line);
399 | return error.SyntaxError;
400 | },
401 | }
402 |
403 | return expr;
404 | }
405 |
406 | fn parseFunctionParams(self: *Parser) ![][]const u8 {
407 | if (self.curr().type == .RIGHT_PAREN) {
408 | self.cursor += 1;
409 | return &[_][]const u8{};
410 | }
411 |
412 | var param_list = std.ArrayList([]const u8).init(self.allocator);
413 |
414 | try self.expect(.INT);
415 | self.cursor += 1;
416 | try param_list.append(self.curr().literal.?.string);
417 | self.cursor += 1;
418 |
419 | while (self.curr().type != .RIGHT_PAREN) {
420 | try self.expect(.COMMA);
421 | self.cursor += 1;
422 | try self.expect(.INT);
423 | self.cursor += 1;
424 | try param_list.append(self.curr().literal.?.string);
425 | self.cursor += 1;
426 | }
427 |
428 | try self.expect(.RIGHT_PAREN);
429 | self.cursor += 1;
430 |
431 | return try param_list.toOwnedSlice();
432 | }
433 |
434 | fn parseFunctionArgs(self: *Parser) ![]*c_ast.Expression {
435 | if (self.curr().type == .RIGHT_PAREN) {
436 | self.cursor += 1;
437 | return &[_]*c_ast.Expression{};
438 | }
439 | var param_list = std.ArrayList(*c_ast.Expression).init(self.allocator);
440 |
441 | try param_list.append(try self.parseExpression(0));
442 |
443 | while (self.curr().type != .RIGHT_PAREN) {
444 | try self.expect(.COMMA);
445 | self.cursor += 1;
446 | try param_list.append(try self.parseExpression(0));
447 | }
448 |
449 | try self.expect(.RIGHT_PAREN);
450 | self.cursor += 1;
451 |
452 | return try param_list.toOwnedSlice();
453 | }
454 |
455 | fn parseBinop(self: *Parser) c_ast.BinaryOperator {
456 | switch (self.curr().type) {
457 | .PLUS => return .Add,
458 | .MINUS => return .Subtract,
459 | .STAR => return .Multiply,
460 | .SLASH => return .Divide,
461 | .PERCENTAGE => return .Remainder,
462 |
463 | .AMPERSAND => return .Bitwise_AND,
464 | .PIPE => return .Bitwise_OR,
465 | .CARET => return .Bitwise_XOR,
466 | .LEFT_SHIFT => return .Left_Shift,
467 | .RIGHT_SHIFT => return .Right_Shift,
468 |
469 | .LESS => return .Less,
470 | .LESS_EQUAL => return .Less_Or_Equal,
471 | .GREATER => return .Greater,
472 | .GREATER_EQUAL => return .Greater_Or_Equal,
473 | .EQUAL_EQUAL => return .Equal,
474 | .BANG_EQUAL => return .Not_Equal,
475 | .AMPERSAND_AMPERSAND => return .And,
476 | .PIPE_PIPE => return .Or,
477 | else => unreachable,
478 | }
479 | }
480 |
481 | fn precedence(self: *Parser, token: Token) i16 {
482 | _ = self;
483 | switch (token.type) {
484 | .EQUAL => return 1,
485 | .PIPE_PIPE => return 5,
486 | .AMPERSAND_AMPERSAND => return 10,
487 | .EQUAL_EQUAL, .BANG_EQUAL => return 30,
488 | .LESS, .LESS_EQUAL, .GREATER, .GREATER_EQUAL => return 35,
489 | .LEFT_SHIFT, .RIGHT_SHIFT => return 48,
490 | .AMPERSAND => return 47,
491 | .CARET => return 46,
492 | .PIPE => return 45,
493 | .PLUS, .MINUS => return 45,
494 | .STAR, .SLASH, .PERCENTAGE => return 50,
495 | else => unreachable,
496 | }
497 | }
498 |
499 | fn expect(self: *Parser, token_type: TokenType) !void {
500 | if (self.curr().type != token_type) {
501 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Syntax error. Expected token type {}. Got token type {}", .{
502 | token_type,
503 | self.curr().type,
504 | });
505 | diagnostics.addError(msg, self.curr().line);
506 | return error.SyntaxError;
507 | }
508 | }
509 |
510 | fn curr(self: *Parser) Token {
511 | return self.tokens[self.cursor];
512 | }
513 |
514 | fn peek(self: *Parser, offset: i32) Token {
515 | return self.tokens[@intCast(@as(i32, @intCast(self.cursor)) + offset)];
516 | }
517 |
518 | fn printCurr(self: *Parser) void {
519 | std.debug.print("Current token: {}\n", .{self.curr()});
520 | }
521 | };
522 |
523 | test "multi function + parameter syntax error" {
524 | const input =
525 | \\ int foo(int a, int b)
526 | \\{
527 | \\ return a + b;
528 | \\}
529 | \\
530 | \\int main()
531 | \\{
532 | \\ foo(1, int 1);
533 | \\}
534 | \\}
535 | \\
536 | ;
537 |
538 | const result = testing.cToSemanticAnalysis(input);
539 | try testing.expectError(error.SyntaxError, result);
540 | }
541 |
--------------------------------------------------------------------------------
/compiler/src/middleend/gen.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const c_ast = @import("../ast/c.zig");
3 | const asm_ast = @import("../ast/asm.zig");
4 | const diagnostics = @import("../diagnostics.zig");
5 | const RegisterAllocator = @import("register-allocator.zig").RegisterAllocator;
6 |
7 | pub const Generator = struct {
8 | allocator: std.mem.Allocator,
9 | program: c_ast.Program,
10 | instruction_buffer: std.ArrayList(asm_ast.Instruction),
11 | rd: asm_ast.Reg,
12 | rs1: asm_ast.Reg,
13 | rs2: asm_ast.Reg,
14 | immediate: i32 = 0,
15 | label: []const u8,
16 | variable_store: std.ArrayList([]const u8),
17 | if_counter: u32 = 0,
18 | short_circuit_counter: u32 = 0,
19 | reg_bitmap: std.bit_set.IntegerBitSet(6),
20 | function_line: usize = 0,
21 |
22 | ra: RegisterAllocator,
23 |
24 | pub fn init(program: c_ast.Program, allocator: std.mem.Allocator) Generator {
25 | return .{
26 | .allocator = allocator,
27 | .program = program,
28 | .instruction_buffer = std.ArrayList(asm_ast.Instruction).init(allocator),
29 | .rd = asm_ast.Reg.t1,
30 | .rs1 = asm_ast.Reg.t1,
31 | .rs2 = asm_ast.Reg.t1,
32 | .label = "",
33 | .variable_store = std.ArrayList([]const u8).init(allocator),
34 | .reg_bitmap = std.bit_set.IntegerBitSet(6).initEmpty(),
35 | .ra = RegisterAllocator.init(allocator),
36 | };
37 | }
38 |
39 | fn allocReg(self: *Generator) asm_ast.Reg {
40 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 };
41 |
42 | for (allocation_order, 0..) |reg, i| {
43 | if (!self.reg_bitmap.isSet(i)) {
44 | self.reg_bitmap.set(i);
45 | return reg;
46 | }
47 | }
48 |
49 | @panic("can't allocate more than 6 registers yet");
50 | }
51 |
52 | fn getSecondLastReg(self: *Generator) asm_ast.Reg {
53 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 };
54 |
55 | for (allocation_order, 0..) |reg, i| {
56 | _ = reg;
57 | if (!self.reg_bitmap.isSet(i) or i == allocation_order.len - 1) {
58 | return allocation_order[i - 2];
59 | }
60 | }
61 |
62 | @panic("no or 1 registers allocated");
63 | }
64 |
65 | fn getLastReg(self: *Generator) asm_ast.Reg {
66 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 };
67 |
68 | for (allocation_order, 0..) |reg, i| {
69 | _ = reg;
70 | if (!self.reg_bitmap.isSet(i) or i == allocation_order.len - 1) {
71 | if (i == 0) {
72 | @panic("no registers allocated");
73 | } else {
74 | return allocation_order[i - 1];
75 | }
76 | }
77 | }
78 |
79 | @panic("no registers allocated");
80 | }
81 |
82 | fn freeLastReg(self: *Generator) void {
83 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 };
84 |
85 | for (allocation_order, 0..) |reg, i| {
86 | _ = reg;
87 | if (!self.reg_bitmap.isSet(i)) {
88 | if (i == 0) {
89 | @panic("trying to free when all registers are already free");
90 | } else {
91 | self.reg_bitmap.unset(i - 1);
92 | return;
93 | }
94 | }
95 | }
96 | }
97 |
98 | fn appendLabel(self: *Generator, label: []const u8) !void {
99 | try self.instruction_buffer.append(.{ .label = .{ .name = label } });
100 | }
101 |
102 | fn appendInstr(self: *Generator, instr: asm_ast.InstructionType) void {
103 | const instr_converted = asm_ast.convert(instr);
104 | const instruction = switch (instr_converted) {
105 | .rtype => asm_ast.Instruction{
106 | .rtype = .{
107 | .instr = instr_converted.rtype,
108 | .destination = self.rd,
109 | .source1 = self.rs1,
110 | .source2 = self.rs2,
111 | },
112 | },
113 | .itype => asm_ast.Instruction{
114 | .itype = .{
115 | .instr = instr_converted.itype,
116 | .destination = self.rd,
117 | .source = self.rs1,
118 | .immediate = self.immediate,
119 | },
120 | },
121 | .btype => asm_ast.Instruction{
122 | .btype = .{
123 | .instr = instr_converted.btype,
124 | .source1 = self.rs1,
125 | .source2 = self.rs2,
126 | .label = self.label,
127 | },
128 | },
129 | .stype => asm_ast.Instruction{
130 | .stype = .{
131 | .immediate = self.immediate,
132 | .instr = instr_converted.stype,
133 | .source1 = self.rs1,
134 | .source2 = self.rs2,
135 | },
136 | },
137 | .jtype => asm_ast.Instruction{
138 | .jtype = .{
139 | .label = self.label,
140 | .instr = instr_converted.jtype,
141 | .destination = self.rd,
142 | },
143 | },
144 | else => @panic("fuck you"),
145 | };
146 | self.instruction_buffer.append(instruction) catch @panic("Failed to append instruction");
147 | }
148 |
149 | fn appendRType(self: *Generator, rtype: asm_ast.RType) !void {
150 | try self.instruction_buffer.append(.{ .rtype = rtype });
151 | }
152 |
153 | fn appendIType(self: *Generator, itype: asm_ast.IType) !void {
154 | try self.instruction_buffer.append(.{ .itype = itype });
155 | }
156 |
157 | fn appendSType(self: *Generator, stype: asm_ast.SType) !void {
158 | try self.instruction_buffer.append(.{ .stype = stype });
159 | }
160 |
161 | fn appendBType(self: *Generator, btype: asm_ast.BType) !void {
162 | try self.instruction_buffer.append(.{ .btype = btype });
163 | }
164 |
165 | fn appendJType(self: *Generator, jtype: asm_ast.JType) !void {
166 | try self.instruction_buffer.append(.{ .jtype = jtype });
167 | }
168 |
169 | fn loadImmediate(self: *Generator, value: i32) !asm_ast.Reg {
170 | const dest_reg = self.ra.getTempRegister(self.function_line);
171 | const unsigned_val: u32 = @bitCast(value);
172 | const upper_bits: u20 = @truncate(unsigned_val >> 12);
173 | const lower_bits: u12 = @truncate(unsigned_val);
174 |
175 | var add_template = asm_ast.IType{
176 | .instr = .ADDI,
177 | .destination = dest_reg,
178 | .source = .zero,
179 | .immediate = lower_bits,
180 | };
181 |
182 | if (upper_bits != 0) {
183 | try self.instruction_buffer.append(.{
184 | .utype = .{
185 | .instr = .LUI,
186 | .destination = dest_reg,
187 | .immediate = upper_bits,
188 | },
189 | });
190 |
191 | add_template.source = dest_reg;
192 | }
193 |
194 | try self.instruction_buffer.append(.{ .itype = add_template });
195 |
196 | return dest_reg;
197 | }
198 |
199 | fn appendOperator(self: *Generator, operator: c_ast.BinaryOperator, left: asm_ast.Reg, right: asm_ast.Reg) !void {
200 | var template = asm_ast.RType{
201 | .instr = .ADD,
202 | .destination = left,
203 | .source1 = left,
204 | .source2 = right,
205 | };
206 |
207 | switch (operator) {
208 | .Add => {
209 | try self.appendRType(template);
210 | },
211 | .Subtract => {
212 | template.instr = .SUB;
213 | try self.appendRType(template);
214 | },
215 | .Multiply => {
216 | template.instr = .MUL;
217 | try self.appendRType(template);
218 | },
219 | .Divide => {
220 | template.instr = .DIV;
221 | try self.appendRType(template);
222 | },
223 | .Remainder => {
224 | template.instr = .REM;
225 | try self.appendRType(template);
226 | },
227 | .Bitwise_AND => {
228 | template.instr = .AND;
229 | try self.appendRType(template);
230 | },
231 | .Bitwise_OR => {
232 | template.instr = .OR;
233 | try self.appendRType(template);
234 | },
235 | .Bitwise_XOR => {
236 | template.instr = .XOR;
237 | try self.appendRType(template);
238 | },
239 | .Left_Shift => {
240 | template.instr = .SLL;
241 | try self.appendRType(template);
242 | },
243 | .Right_Shift => {
244 | template.instr = .SRL;
245 | try self.appendRType(template);
246 | },
247 | .Less => {
248 | template.instr = .SLT;
249 | try self.appendRType(template);
250 | },
251 | .Less_Or_Equal => {
252 | template.instr = .SLT;
253 | template.source1 = right;
254 | template.source2 = left;
255 | try self.appendRType(template);
256 | try self.appendIType(asm_ast.IType{ .immediate = 1, .source = left, .instr = .XORI, .destination = left });
257 | },
258 | .Greater => {
259 | template.source1 = right;
260 | template.source2 = left;
261 | template.instr = .SLT;
262 | try self.appendRType(template);
263 | },
264 | .Greater_Or_Equal => {
265 | template.instr = .SLT;
266 | try self.appendRType(template);
267 | try self.appendIType(asm_ast.IType{ .immediate = 1, .source = left, .instr = .XORI, .destination = left });
268 | },
269 | .Equal => {
270 | template.instr = .SUB;
271 | try self.appendRType(template);
272 | try self.appendIType(asm_ast.IType{ .immediate = 1, .source = left, .instr = .SLTIU, .destination = left });
273 | },
274 | .Not_Equal => {
275 | var itype_template = asm_ast.IType{ .immediate = 1, .source = left, .instr = .SLTIU, .destination = left };
276 | template.instr = .SUB;
277 | try self.appendRType(template);
278 | try self.appendIType(itype_template);
279 | itype_template.instr = .XORI;
280 | try self.appendIType(itype_template);
281 | },
282 | .And, .Or => @panic("And and Or operators ran in appendOperator even though they have a separate function for generation. This shouldn't happen this is a bug"),
283 | }
284 | }
285 |
286 | fn getVariableId(self: *Generator, identifier: []const u8) !i32 {
287 | var variable: ?i32 = null;
288 | for (self.variable_store.items, 0..) |item, index| {
289 | if (std.mem.eql(u8, item, identifier)) {
290 | variable = @intCast(index);
291 | }
292 | }
293 |
294 | if (variable == null) {
295 | try self.variable_store.append(identifier);
296 | variable = @as(i32, @intCast(self.variable_store.items.len)) - 1;
297 | }
298 |
299 | return variable.?;
300 | }
301 |
302 | fn generateShortCircuitingBinary(self: *Generator, binary: c_ast.Binary, label_name: []const u8, is_and: bool) !asm_ast.Reg {
303 | const is_short_circuit = binary.operator.getType() == .SHORT_CIRCUIT;
304 | if (is_short_circuit == true) {
305 | const source = try self.generateShortCircuitingBinary(binary.left.*.binary, label_name, binary.operator == .And);
306 | try self.appendBType(asm_ast.BType{ .instr = if (is_and) .BEQ else .BNE, .label = label_name, .source1 = source, .source2 = .zero });
307 | self.ra.expireRegister(source);
308 | } else {
309 | _ = try self.generateExpression(binary.left.*);
310 | }
311 |
312 | return try self.generateExpression(binary.right.*);
313 | }
314 |
315 | fn generateBinary(self: *Generator, binary: c_ast.Binary) !asm_ast.Reg {
316 | const optype = binary.operator.getType();
317 |
318 | if (optype == .SHORT_CIRCUIT) {
319 | if (binary.left.binary.operator.getType() == .SHORT_CIRCUIT or
320 | (binary.left.* == .binary and binary.right.* == .binary and
321 | binary.left.binary.operator.getType() == .COMPARISON and
322 | binary.right.binary.operator.getType() == .COMPARISON))
323 | {
324 | var label_name_list = std.ArrayList(u8).init(self.allocator);
325 | try label_name_list.appendSlice("short_circuit_end_");
326 | try std.fmt.format(label_name_list.writer(), "{d}", .{self.short_circuit_counter});
327 | self.short_circuit_counter += 1;
328 | const label_name = try label_name_list.toOwnedSlice();
329 |
330 | const dest = try self.generateShortCircuitingBinary(binary, label_name, binary.operator == .And);
331 |
332 | try self.appendLabel(label_name);
333 |
334 | return dest;
335 | } else {
336 | @panic("Can't short-circuit non comparison operators");
337 | }
338 | } else {
339 | // check if right side expression is a constant. if it is, evaluate left side first (non constant)
340 | // all expressions return a0. a1 is used for internal calculations. in other words, all right side expressions return a0, and left side return a1
341 | const right_is_const = binary.right.* == .constant;
342 | var left: ?asm_ast.Reg = null;
343 | if (right_is_const) {
344 | left = try self.generateExpression(binary.left.*);
345 | }
346 |
347 | const right = try self.generateExpression(binary.right.*);
348 |
349 | if (!right_is_const) {
350 | left = try self.generateExpression(binary.left.*);
351 | }
352 |
353 | try self.appendOperator(binary.operator, left.?, right);
354 |
355 | self.ra.expireRegister(right);
356 | return left.?;
357 | }
358 | }
359 |
360 | fn generateExpression(self: *Generator, exp: c_ast.Expression) anyerror!asm_ast.Reg {
361 | switch (exp) {
362 | .assignment => |assignment| {
363 | _ = try self.generateExpression(assignment.right.*);
364 | _ = try self.loadImmediate(try self.getVariableId(assignment.left.*.variable.identifier));
365 |
366 | try self.appendSType(asm_ast.SType{ .immediate = 0, .source1 = self.getSecondLastReg(), .source2 = self.getLastReg(), .instr = .SW });
367 | self.freeLastReg();
368 | },
369 | .variable => |variable| {
370 | _ = try self.loadImmediate(try self.getVariableId(variable.identifier));
371 |
372 | try self.appendIType(asm_ast.IType{ .destination = self.getLastReg(), .immediate = 0, .instr = .LW, .source = self.getLastReg() });
373 | },
374 | .constant => |constant| {
375 | return try self.loadImmediate(constant);
376 | },
377 | .binary => |binary| {
378 | return try self.generateBinary(binary);
379 | },
380 | .function_call => |function_call| {
381 | var arg_counter: usize = 0;
382 | for (function_call.args) |arg| {
383 | _ = try self.generateExpression(arg.*);
384 | self.rs1 = .t1;
385 | self.rs2 = .zero;
386 | // scuffed as shit. please remove this asap i beg you
387 | switch (arg_counter) {
388 | 0 => self.rd = .a0,
389 | 1 => self.rd = .a1,
390 | 2 => self.rd = .a2,
391 | 3 => self.rd = .a3,
392 | 4 => self.rd = .a4,
393 | 5 => self.rd = .a5,
394 | 6 => self.rd = .a6,
395 | 7 => self.rd = .a7,
396 | else => diagnostics.addError("Having more than 8 function arguments not supported yet", 0),
397 | }
398 | self.appendInstr(.ADD);
399 | arg_counter += 1;
400 | }
401 |
402 | self.label = function_call.identifier;
403 | self.rd = .ra;
404 | self.appendInstr(.JAL);
405 |
406 | self.rs1 = .a0;
407 | self.rs2 = .zero;
408 | self.rd = .t1;
409 | self.appendInstr(.ADD);
410 | },
411 | }
412 |
413 | @panic("not implemented yet");
414 | }
415 |
416 | fn generateIf(self: *Generator, if_: c_ast.If) !void {
417 | var if_name_array = std.ArrayList(u8).init(self.allocator);
418 | defer if_name_array.deinit();
419 | try std.fmt.format(if_name_array.writer(), "if_end_{d}", .{self.if_counter});
420 | const if_name = try if_name_array.toOwnedSlice();
421 | var else_name_array = std.ArrayList(u8).init(self.allocator);
422 | defer else_name_array.deinit();
423 | try std.fmt.format(else_name_array.writer(), "else_end_{d}", .{self.if_counter});
424 | const else_name = try else_name_array.toOwnedSlice();
425 | self.if_counter += 1;
426 |
427 | _ = try self.generateExpression(if_.condition);
428 |
429 | try self.appendBType(asm_ast.BType{ .instr = .BEQ, .label = if_name, .source1 = .zero, .source2 = self.getLastReg() });
430 | self.freeLastReg();
431 |
432 | try self.generateStatement(if_.then.*);
433 | if (if_.else_ != null) {
434 | try self.appendJType(asm_ast.JType{ .destination = self.getLastReg(), .instr = .JAL, .label = else_name });
435 | }
436 | self.freeLastReg();
437 |
438 | try self.appendLabel(if_name);
439 |
440 | if (if_.else_ != null) {
441 | try self.generateStatement(if_.else_.?.*);
442 |
443 | try self.appendLabel(else_name);
444 | }
445 | }
446 |
447 | fn generateStatement(self: *Generator, statement: c_ast.Statement) anyerror!void {
448 | switch (statement) {
449 | .ret => |ret| {
450 | _ = try self.generateExpression(ret.exp);
451 | },
452 | .exp => |exp| {
453 | _ = try self.generateExpression(exp);
454 | },
455 | .if_ => |if_| {
456 | try self.generateIf(if_);
457 | },
458 | .compound => |compound| {
459 | for (compound.block_items) |block_item| {
460 | switch (block_item) {
461 | .statement => {
462 | try self.generateStatement(block_item.statement);
463 | },
464 | .declaration => {
465 | try self.generateDeclaration(block_item.declaration);
466 | },
467 | }
468 | }
469 | },
470 | .break_ => |break_| {
471 | const identifier = try std.fmt.allocPrint(self.allocator, "break_{s}", .{break_.identifier.?});
472 |
473 | try self.appendJType(asm_ast.JType{ .label = identifier, .destination = .zero, .instr = .JAL });
474 | },
475 | .continue_ => |continue_| {
476 | const identifier = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{continue_.identifier.?});
477 |
478 | try self.appendJType(asm_ast.JType{ .label = identifier, .destination = .zero, .instr = .JAL });
479 | },
480 | .do_while => |do_while| {
481 | const identifier_start = try std.fmt.allocPrint(self.allocator, "{s}_start", .{do_while.identifier.?});
482 | const identifier_continue = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{do_while.identifier.?});
483 | const identifier_break = try std.fmt.allocPrint(self.allocator, "break_{s}", .{do_while.identifier.?});
484 | try self.appendLabel(identifier_start);
485 |
486 | try self.generateStatement(do_while.body.*);
487 | try self.appendLabel(identifier_continue);
488 |
489 | _ = try self.generateExpression(do_while.condition);
490 |
491 | try self.appendBType(asm_ast.BType{ .instr = .BNE, .label = identifier_start, .source1 = self.getLastReg(), .source2 = .zero });
492 |
493 | try self.appendLabel(identifier_break);
494 | },
495 | .for_ => |for_| {
496 | const identifier_start = try std.fmt.allocPrint(self.allocator, "{s}_start", .{for_.identifier.?});
497 | const identifier_continue = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{for_.identifier.?});
498 | const identifier_break = try std.fmt.allocPrint(self.allocator, "break_{s}", .{for_.identifier.?});
499 |
500 | switch (for_.init) {
501 | .init_decl => try self.generateDeclaration(.{ .variable_declaration = for_.init.init_decl }),
502 | .init_exp => if (for_.init.init_exp != null) {
503 | _ = try self.generateExpression(for_.init.init_exp.?);
504 | },
505 | }
506 | try self.appendLabel(identifier_start);
507 |
508 | if (for_.condition != null) _ = try self.generateExpression(for_.condition.?);
509 |
510 | try self.appendBType(asm_ast.BType{ .instr = .BEQ, .label = identifier_break, .source1 = self.getLastReg(), .source2 = .zero });
511 | self.freeLastReg();
512 |
513 | try self.generateStatement(for_.body.*);
514 | self.freeLastReg();
515 |
516 | try self.appendLabel(identifier_continue);
517 |
518 | if (for_.post != null) _ = try self.generateExpression(for_.post.?);
519 |
520 | try self.appendJType(asm_ast.JType{ .label = identifier_start, .destination = .zero, .instr = .JAL });
521 | try self.appendLabel(identifier_break);
522 | self.freeLastReg();
523 | },
524 | .while_ => |while_| {
525 | const identifier_continue = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{while_.identifier.?});
526 | const identifier_break = try std.fmt.allocPrint(self.allocator, "break_{s}", .{while_.identifier.?});
527 | try self.appendLabel(identifier_continue);
528 |
529 | _ = try self.generateExpression(while_.condition);
530 |
531 | try self.appendBType(asm_ast.BType{ .instr = .BEQ, .label = identifier_break, .source1 = self.getLastReg(), .source2 = .zero });
532 |
533 | try self.generateStatement(while_.body.*);
534 |
535 | try self.appendJType(asm_ast.JType{ .destination = .zero, .instr = .JAL, .label = identifier_continue });
536 |
537 | try self.appendLabel(identifier_break);
538 | },
539 | }
540 | }
541 |
542 | fn generateDeclaration(self: *Generator, declaration: c_ast.Declaration) !void {
543 | switch (declaration) {
544 | .variable_declaration => {
545 | if (declaration.variable_declaration.initial == null) return else {
546 | _ = try self.generateExpression(declaration.variable_declaration.initial.?);
547 | _ = try self.loadImmediate(try self.getVariableId(declaration.variable_declaration.identifier));
548 |
549 | try self.appendSType(asm_ast.SType{ .immediate = 0, .source1 = self.getSecondLastReg(), .source2 = self.getLastReg(), .instr = .SW });
550 | self.freeLastReg();
551 | self.freeLastReg();
552 | }
553 | },
554 | else => {},
555 | }
556 | }
557 |
558 | fn generateBlock(self: *Generator, block: c_ast.Block) !void {
559 | for (block.block_items) |block_item| {
560 | switch (block_item) {
561 | .statement => {
562 | try self.generateStatement(block_item.statement);
563 | },
564 | .declaration => {
565 | try self.generateDeclaration(block_item.declaration);
566 | },
567 | }
568 | self.function_line += 1;
569 | }
570 | }
571 |
572 | fn generateFunction(self: *Generator, function: c_ast.FunctionDeclaration) !void {
573 | try self.ra.scanFunction(function);
574 | self.function_line = 0;
575 |
576 | if (std.mem.eql(u8, function.identifier, "main")) {
577 | try self.appendLabel("_start");
578 | } else {
579 | try self.appendLabel(function.identifier);
580 | }
581 |
582 | try self.appendIType(asm_ast.IType{ .destination = .sp, .source = .sp, .immediate = -1, .instr = .ADDI });
583 | try self.appendSType(asm_ast.SType{ .instr = .SW, .source1 = .ra, .immediate = 0, .source2 = .sp });
584 |
585 | if (function.body != null) try self.generateBlock(function.body.?);
586 |
587 | try self.appendIType(asm_ast.IType{ .instr = .LW, .destination = .ra, .source = .sp, .immediate = 0 });
588 | try self.appendIType(asm_ast.IType{ .destination = .sp, .source = .sp, .immediate = 1, .instr = .ADDI });
589 |
590 | if (!std.mem.eql(u8, function.identifier, "main")) {
591 | self.rs1 = .ra;
592 | self.rd = .zero;
593 | self.immediate = 0;
594 | self.appendInstr(.JALR);
595 | }
596 | }
597 |
598 | pub fn generate(self: *Generator) !asm_ast.Program {
599 | for (self.program.function) |function| {
600 | try self.generateFunction(function);
601 | }
602 |
603 | return .{
604 | .function = .{
605 | .identifier = self.program.function[0].identifier,
606 | .instructions = try self.instruction_buffer.toOwnedSlice(),
607 | },
608 | };
609 | }
610 | };
611 |
--------------------------------------------------------------------------------
/assembler/src/main.zig:
--------------------------------------------------------------------------------
1 | const std = @import("std");
2 | const instr_types = @import("instruction-types.zig");
3 | const instr_getters = @import("instruction-getters.zig");
4 |
5 | const Instruction = union(enum) {
6 | RType: struct {
7 | instruction: instr_types.RTypeInstruction,
8 | rd: u8,
9 | rs1: u8,
10 | rs2: u8,
11 | },
12 | IType: struct {
13 | instruction: instr_types.ITypeInstruction,
14 | rd: u8,
15 | rs1: u8,
16 | imm: i12,
17 | },
18 | SType: struct {
19 | instruction: instr_types.STypeInstruction,
20 | rs1: u8,
21 | rs2: u8,
22 | imm: i12,
23 | },
24 | BType: struct {
25 | instruction: instr_types.BTypeInstruction,
26 | rs1: u8,
27 | rs2: u8,
28 | imm: i12,
29 | },
30 | UType: struct {
31 | instruction: instr_types.UTypeInstruction,
32 | rd: u8,
33 | imm: i20,
34 | },
35 | JType: struct {
36 | instruction: instr_types.JTypeInstruction,
37 | rd: u8,
38 | imm: i20,
39 | },
40 | fn encode(self: *const Instruction) !u32 {
41 | return switch (self.*) {
42 | .RType => |rtype| {
43 | const opcode = 0b0110011;
44 |
45 | const rd = @as(u32, rtype.rd);
46 | const rs1 = @as(u32, rtype.rs1);
47 | const rs2 = @as(u32, rtype.rs2);
48 |
49 | const funct3: u32 = switch (rtype.instruction) {
50 | .ADD, .SUB, .MUL => 0b000,
51 | .XOR, .DIV => 0b100,
52 | .OR, .REM => 0b110,
53 | .AND, .REMU => 0b111,
54 | .SLL, .MULH => 0b001,
55 | .SRL, .SRA, .DIVU => 0b101,
56 | .SLT, .MULSU => 0b010,
57 | .SLTU, .MULU => 0b011,
58 | };
59 |
60 | const funct7: u32 = switch (rtype.instruction) {
61 | .SUB, .SRA => 0b0100000,
62 | .MUL, .MULH, .MULSU, .MULU, .DIV, .DIVU, .REM, .REMU => 0b0000001,
63 | else => 0b0000000,
64 | };
65 |
66 | return opcode |
67 | (rd << 7) |
68 | (funct3 << 12) |
69 | (rs1 << 15) |
70 | (rs2 << 20) |
71 | (funct7 << 25);
72 | },
73 | .IType => |itype| {
74 | const opcode: u32 = switch (itype.instruction) {
75 | .LB, .LH, .LW, .LBU, .LHU => 0b0000011,
76 | .JALR => 0b1100111,
77 | else => 0b0010011,
78 | };
79 |
80 | const rd = @as(u32, itype.rd);
81 | const rs1 = @as(u32, itype.rs1);
82 |
83 | const imm_bits: u12 = @bitCast(itype.imm);
84 | const imm = switch (itype.instruction) {
85 | .SLLI, .SRLI => @as(u32, imm_bits & 0x1F),
86 | .SRAI => @as(u32, (imm_bits & 0x1F) | 0x400),
87 | else => @as(u32, imm_bits),
88 | };
89 |
90 | const funct3: u32 = switch (itype.instruction) {
91 | .ADDI => 0b000,
92 | .XORI => 0b100,
93 | .ORI => 0b110,
94 | .ANDI => 0b111,
95 | .SLLI => 0b001,
96 | .SRLI, .SRAI => 0b101,
97 | .SLTI => 0b010,
98 | .SLTIU => 0b011,
99 |
100 | .LB => 0b000,
101 | .LH => 0b001,
102 | .LW => 0b010,
103 | .LBU => 0b100,
104 | .LHU => 0b101,
105 | .JALR => 0b000,
106 | };
107 |
108 | return opcode |
109 | (rd << 7) |
110 | (funct3 << 12) |
111 | (rs1 << 15) |
112 | (imm << 20);
113 | },
114 | .SType => |stype| {
115 | const opcode = 0b0100011;
116 |
117 | const rs1 = @as(u32, stype.rs1);
118 | const rs2 = @as(u32, stype.rs2);
119 | const imm_bits: u12 = @bitCast(stype.imm);
120 |
121 | const funct3: u32 = switch (stype.instruction) {
122 | .SB => 0b000,
123 | .SH => 0b001,
124 | .SW => 0b010,
125 | };
126 |
127 | const imm_lo = imm_bits & 0x1F;
128 | const imm_hi = @as(u32, (imm_bits >> 5) & 0x7F);
129 |
130 | return opcode |
131 | (imm_lo << 7) |
132 | (funct3 << 12) |
133 | (rs1 << 15) |
134 | (rs2 << 20) |
135 | (imm_hi << 25);
136 | },
137 | .BType => |btype| {
138 | const opcode = 0b1100011;
139 |
140 | const rs1 = @as(u32, btype.rs1);
141 | const rs2 = @as(u32, btype.rs2);
142 | const imm_bits: u12 = @bitCast(btype.imm);
143 |
144 | const imm_lo = imm_bits & 0x1F;
145 | const imm_hi = @as(u32, (imm_bits >> 5) & 0x7F);
146 |
147 | const funct3: u32 = switch (btype.instruction) {
148 | .BEQ => 0b000,
149 | .BNE => 0b001,
150 | .BLT => 0b100,
151 | .BGE => 0b101,
152 | .BLTU => 0b110,
153 | .BGEU => 0b111,
154 | };
155 |
156 | return opcode |
157 | (imm_lo << 7) |
158 | (funct3 << 12) |
159 | (rs1 << 15) |
160 | (rs2 << 20) |
161 | (imm_hi << 25);
162 | },
163 | .UType => |utype| {
164 | const opcode: u32 = switch (utype.instruction) {
165 | .LUI => 0b0110111,
166 | .AUIPC => 0b0010111,
167 | };
168 |
169 | const rd = @as(u32, utype.rd);
170 | const imm_bits: u20 = @bitCast(utype.imm);
171 | const imm = @as(u32, imm_bits);
172 |
173 | return opcode |
174 | (rd << 7) |
175 | (imm << 12);
176 | },
177 | .JType => |jtype| {
178 | const opcode = 0b1101111;
179 | const rd = @as(u32, jtype.rd);
180 |
181 | const imm_bits: u20 = @bitCast(jtype.imm);
182 | const imm = @as(u32, imm_bits);
183 |
184 | return opcode |
185 | (rd << 7) |
186 | (imm << 12);
187 | },
188 | };
189 | }
190 | };
191 |
192 | fn splitStringIntoLines(allocator: *const std.mem.Allocator, input: []const u8) ![][]const u8 {
193 | var lines = std.ArrayList([]const u8).init(allocator.*);
194 | defer lines.deinit();
195 | var tokenizer = std.mem.tokenize(u8, input, "\n");
196 | while (tokenizer.next()) |line| {
197 | try lines.append(line);
198 | }
199 | return lines.toOwnedSlice();
200 | }
201 |
202 | fn splitStringByWhitespace(allocator: *const std.mem.Allocator, input: []const u8) ![][]const u8 {
203 | var tokens = std.ArrayList([]const u8).init(allocator.*);
204 | defer tokens.deinit();
205 | var tokenizer = std.mem.tokenize(u8, input, " \t\n\r");
206 | while (tokenizer.next()) |token| {
207 | try tokens.append(token);
208 | }
209 | return tokens.toOwnedSlice();
210 | }
211 |
212 | fn assemble(allocator: *const std.mem.Allocator, source: []const u8) !std.ArrayList(u32) {
213 | const lines = try splitStringIntoLines(allocator, source);
214 | defer allocator.free(lines);
215 |
216 | var encoded = std.ArrayList(u32).init(allocator.*);
217 |
218 | if (!@import("builtin").is_test) {
219 | const jal_str = "jal zero _start";
220 |
221 | const jal_tokens = try splitStringByWhitespace(allocator, jal_str);
222 | defer allocator.free(jal_tokens);
223 |
224 | var jal_instruction = try parseInstruction(allocator, jal_tokens, lines, 0);
225 | jal_instruction.JType.imm += 1;
226 | try encoded.append(try jal_instruction.encode());
227 | }
228 |
229 | for (lines, 0..) |line, index| {
230 | if (line.len > 0 and line[0] == ';' or line[line.len - 1] == ':') continue;
231 |
232 | const tokens = try splitStringByWhitespace(allocator, line);
233 | defer allocator.free(tokens);
234 | const pseudo_parsed_tokens = try parsePseudoInstruction(allocator, tokens);
235 | const instruction = try parseInstruction(allocator, pseudo_parsed_tokens, lines, index);
236 | try encoded.append(try instruction.encode());
237 | }
238 |
239 | return encoded;
240 | }
241 |
242 | const InstructionType = enum {
243 | RType,
244 | IType,
245 | SType,
246 | BType,
247 | UType,
248 | JType,
249 | None,
250 | };
251 |
252 | fn getInstructionType(instruction: []const u8, instruction_sets: struct {
253 | rtype: []const []const u8,
254 | itype: []const []const u8,
255 | stype: []const []const u8,
256 | btype: []const []const u8,
257 | utype: []const []const u8,
258 | jtype: []const []const u8,
259 | }) InstructionType {
260 | for (instruction_sets.rtype) |candidate| {
261 | if (std.mem.eql(u8, instruction, candidate)) {
262 | return .RType;
263 | }
264 | }
265 | for (instruction_sets.itype) |candidate| {
266 | if (std.mem.eql(u8, instruction, candidate)) {
267 | return .IType;
268 | }
269 | }
270 | for (instruction_sets.stype) |candidate| {
271 | if (std.mem.eql(u8, instruction, candidate)) {
272 | return .SType;
273 | }
274 | }
275 | for (instruction_sets.btype) |candidate| {
276 | if (std.mem.eql(u8, instruction, candidate)) {
277 | return .BType;
278 | }
279 | }
280 | for (instruction_sets.utype) |candidate| {
281 | if (std.mem.eql(u8, instruction, candidate)) {
282 | return .UType;
283 | }
284 | }
285 | for (instruction_sets.jtype) |candidate| {
286 | if (std.mem.eql(u8, instruction, candidate)) {
287 | return .JType;
288 | }
289 | }
290 | return .None;
291 | }
292 |
293 | fn createRegMap(allocator: *const std.mem.Allocator) !std.StringHashMap(u8) {
294 | const reg_names = [_][]const u8{
295 | "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3",
296 | "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11",
297 | "t3", "t4", "t5", "t6",
298 | };
299 | var map = std.StringHashMap(u8).init(allocator.*);
300 | for (reg_names, 0..) |name, index| {
301 | try map.put(name, @as(u8, @intCast(index)));
302 | }
303 | try map.put("fp", 8);
304 | return map;
305 | }
306 |
307 | fn parseRegister(reg: []const u8, reg_map: *const std.StringHashMap(u8)) !u8 {
308 | if (reg[0] == 'x') {
309 | return try std.fmt.parseInt(u8, reg[1..], 10);
310 | } else {
311 | return reg_map.get(reg).?;
312 | }
313 | }
314 |
315 | fn findLabelRelativeIndex(lines: [][]const u8, name: []const u8, index: usize) i32 {
316 | var found: ?usize = null;
317 | var buffer: [33]u8 = undefined;
318 | const label_name = std.fmt.bufPrint(&buffer, "{s}:", .{name}) catch unreachable;
319 |
320 | for (lines, 0..) |line, found_index| {
321 | if (std.mem.eql(u8, line[0..line.len], label_name)) {
322 | found = found_index;
323 | break;
324 | }
325 | }
326 |
327 | if (found == null) std.debug.panic("Label of name {s} not found", .{label_name});
328 | const found_index = found.?;
329 |
330 | var labels_encountered: usize = 0;
331 |
332 | if (found_index < index) {
333 | for (lines[found_index..index]) |line| {
334 | if (line.len > 0 and line[line.len - 1] == ':') {
335 | labels_encountered += 1;
336 | }
337 | }
338 |
339 | return -@as(i32, @intCast(index - found_index - labels_encountered));
340 | } else {
341 | for (lines[index..found_index]) |line| {
342 | if (line.len > 0 and line[line.len - 1] == ':') {
343 | labels_encountered += 1;
344 | }
345 | }
346 |
347 | return @as(i32, @intCast(found_index - index - labels_encountered));
348 | }
349 | }
350 |
351 | fn parsePseudoInstruction(allocator: *const std.mem.Allocator, tokens: [][]const u8) ![][]const u8 {
352 | if (tokens.len == 0) return tokens;
353 |
354 | const pseudo = tokens[0];
355 |
356 | if (std.mem.eql(u8, pseudo, "ret")) {
357 | var result = try allocator.alloc([]const u8, 4);
358 | result[0] = "jalr";
359 | result[1] = "x0";
360 | result[2] = "x1";
361 | result[3] = "0";
362 | return result;
363 | }
364 |
365 | return tokens;
366 | }
367 |
368 | fn parseInstruction(allocator: *const std.mem.Allocator, tokens: [][]const u8, lines: [][]const u8, index: usize) !Instruction {
369 | var reg_map = try createRegMap(allocator);
370 | defer reg_map.deinit();
371 |
372 | const instruction_token = tokens[0];
373 | const instruction_sets = .{
374 | .rtype = &[_][]const u8{ "add", "sub", "mul", "div", "rem", "sll", "slt", "sltu", "xor", "srl", "sra", "or", "and" },
375 | .itype = &[_][]const u8{ "addi", "muli", "divi", "slti", "sltiu", "xori", "andi", "ori", "slli", "srli", "srai", "lb", "lh", "lw", "lbu", "lhu", "jalr" },
376 | .stype = &[_][]const u8{ "sb", "sh", "sw" },
377 | .btype = &[_][]const u8{ "beq", "bne", "blt", "bge", "bltu", "bgeu" },
378 | .utype = &[_][]const u8{ "lui", "auipc" },
379 | .jtype = &[_][]const u8{"jal"},
380 | };
381 |
382 | var instruction: Instruction = undefined;
383 |
384 | switch (getInstructionType(instruction_token, instruction_sets)) {
385 | .RType => {
386 | instruction = .{ .RType = .{
387 | .instruction = try instr_getters.getRTypeInstruction(instruction_token),
388 | .rd = try parseRegister(tokens[1], ®_map),
389 | .rs1 = try parseRegister(tokens[2], ®_map),
390 | .rs2 = try parseRegister(tokens[3], ®_map),
391 | } };
392 | },
393 | .IType => {
394 | const base_instruction = try instr_getters.getITypeInstruction(instruction_token);
395 |
396 | const load_instructions = [_][]const u8{ "lb", "lh", "lw", "lbu", "lhu" };
397 | const is_load = for (load_instructions) |load_instr| {
398 | if (std.mem.eql(u8, instruction_token, load_instr)) break true;
399 | } else false;
400 |
401 | if (is_load) {
402 | const rd = try parseRegister(tokens[1], ®_map);
403 |
404 | const offset_reg = tokens[2];
405 | const paren_idx = std.mem.indexOf(u8, offset_reg, "(").?;
406 | const imm = try std.fmt.parseInt(i12, offset_reg[0..paren_idx], 10);
407 |
408 | const rs1_str = offset_reg[paren_idx + 1 .. offset_reg.len - 1];
409 | const rs1 = try parseRegister(rs1_str, ®_map);
410 |
411 | instruction = .{ .IType = .{
412 | .instruction = base_instruction,
413 | .rd = rd,
414 | .rs1 = rs1,
415 | .imm = imm,
416 | } };
417 | } else if (std.mem.eql(u8, instruction_token, "jalr")) {
418 | const rd = try parseRegister(tokens[1], ®_map);
419 | const rs1 = try parseRegister(tokens[2], ®_map);
420 | const imm = try std.fmt.parseInt(i12, tokens[3], 10);
421 |
422 | instruction = .{ .IType = .{
423 | .instruction = base_instruction,
424 | .rd = rd,
425 | .rs1 = rs1,
426 | .imm = imm,
427 | } };
428 | } else {
429 | instruction = .{ .IType = .{
430 | .instruction = base_instruction,
431 | .rd = try parseRegister(tokens[1], ®_map),
432 | .rs1 = try parseRegister(tokens[2], ®_map),
433 | .imm = try std.fmt.parseInt(i12, tokens[3], 10),
434 | } };
435 | }
436 | },
437 | .SType => {
438 | const rs2 = try parseRegister(tokens[1], ®_map);
439 | const offset_rs1 = tokens[2];
440 | const paren_idx = std.mem.indexOf(u8, offset_rs1, "(").?;
441 | const imm = try std.fmt.parseInt(i12, offset_rs1[0..paren_idx], 10);
442 | const rs1_str = offset_rs1[paren_idx + 1 .. offset_rs1.len - 1];
443 | const rs1 = try parseRegister(rs1_str, ®_map);
444 |
445 | instruction = .{
446 | .SType = .{
447 | .instruction = try instr_getters.getSTypeInstruction(instruction_token),
448 | .rs1 = rs1,
449 | .rs2 = rs2,
450 | .imm = imm,
451 | },
452 | };
453 | },
454 | .BType => {
455 | const imm: i12 = @intCast(findLabelRelativeIndex(lines, tokens[3], index));
456 |
457 | instruction = .{
458 | .BType = .{
459 | .instruction = try instr_getters.getBTypeInstruction(instruction_token),
460 | .rs1 = try parseRegister(tokens[1], ®_map),
461 | .rs2 = try parseRegister(tokens[2], ®_map),
462 | .imm = imm,
463 | },
464 | };
465 | },
466 | .UType => {
467 | instruction = .{
468 | .UType = .{
469 | .instruction = try instr_getters.getUTypeInstruction(instruction_token),
470 | .rd = try parseRegister(tokens[1], ®_map),
471 | .imm = try std.fmt.parseInt(i20, tokens[2], 10),
472 | },
473 | };
474 | },
475 | .JType => {
476 | const imm: i20 = @intCast(findLabelRelativeIndex(lines, tokens[2], index));
477 |
478 | instruction = .{
479 | .JType = .{
480 | .instruction = try instr_getters.getJTypeInstruction(instruction_token),
481 | .rd = try parseRegister(tokens[1], ®_map),
482 | .imm = imm,
483 | },
484 | };
485 | },
486 |
487 | else => return error.UnknownInstruction,
488 | }
489 |
490 | return instruction;
491 | }
492 |
493 | pub fn main() !void {
494 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
495 | defer arena.deinit();
496 | const allocator = arena.allocator();
497 |
498 | const args = try std.process.argsAlloc(std.heap.page_allocator);
499 | defer std.process.argsFree(std.heap.page_allocator, args);
500 |
501 | const input_path = args[1];
502 | const output_path = args[2];
503 |
504 | const file = try std.fs.cwd().openFile(input_path, .{});
505 | defer file.close();
506 |
507 | const source_code = try file.readToEndAlloc(std.heap.page_allocator, 1024 * 1024);
508 | defer std.heap.page_allocator.free(source_code);
509 |
510 | const machine_code = try assemble(&allocator, source_code);
511 |
512 | const output_file = try std.fs.cwd().createFile(output_path, .{
513 | .read = true,
514 | .truncate = true,
515 | });
516 | defer output_file.close();
517 |
518 | var buf: [4]u8 = undefined;
519 | for (machine_code.items) |code| {
520 | buf[0] = @truncate(code >> 24);
521 | buf[1] = @truncate(code >> 16);
522 | buf[2] = @truncate(code >> 8);
523 | buf[3] = @truncate(code);
524 | try output_file.writeAll(&buf);
525 | }
526 | }
527 |
528 | test "add" {
529 | const machine_code = try assemble(&std.testing.allocator, "add ra sp gp");
530 | defer machine_code.deinit();
531 | try std.testing.expectEqual(@as(u32, 0x3100B3), machine_code.items[0]);
532 | }
533 |
534 | test "sub" {
535 | const machine_code = try assemble(&std.testing.allocator, "sub tp t0 t1");
536 | defer machine_code.deinit();
537 | try std.testing.expectEqual(@as(u32, 0x40628233), machine_code.items[0]);
538 | }
539 |
540 | test "sll" {
541 | const machine_code = try assemble(&std.testing.allocator, "sll t2 s0 fp");
542 | defer machine_code.deinit();
543 | try std.testing.expectEqual(@as(u32, 0x8413B3), machine_code.items[0]);
544 | }
545 |
546 | test "slt" {
547 | const machine_code = try assemble(&std.testing.allocator, "slt s1 a0 a1");
548 | defer machine_code.deinit();
549 | try std.testing.expectEqual(@as(u32, 0xB524B3), machine_code.items[0]);
550 | }
551 |
552 | test "sltu" {
553 | const machine_code = try assemble(&std.testing.allocator, "sltu a2 a3 a4");
554 | defer machine_code.deinit();
555 | try std.testing.expectEqual(@as(u32, 0xE6B633), machine_code.items[0]);
556 | }
557 |
558 | test "xor" {
559 | const machine_code = try assemble(&std.testing.allocator, "xor a5 a6 a7");
560 | defer machine_code.deinit();
561 | try std.testing.expectEqual(@as(u32, 0x11847B3), machine_code.items[0]);
562 | }
563 |
564 | test "srl" {
565 | const machine_code = try assemble(&std.testing.allocator, "srl s2 s3 s4");
566 | defer machine_code.deinit();
567 | try std.testing.expectEqual(@as(u32, 0x149D933), machine_code.items[0]);
568 | }
569 |
570 | test "sra" {
571 | const machine_code = try assemble(&std.testing.allocator, "sra s5 s6 s7");
572 | defer machine_code.deinit();
573 | try std.testing.expectEqual(@as(u32, 0x417B5AB3), machine_code.items[0]);
574 | }
575 |
576 | test "or" {
577 | const machine_code = try assemble(&std.testing.allocator, "or s8 s9 s10");
578 | defer machine_code.deinit();
579 | try std.testing.expectEqual(@as(u32, 0x1ACEC33), machine_code.items[0]);
580 | }
581 |
582 | test "and" {
583 | const machine_code = try assemble(&std.testing.allocator, "and t3 t4 t5");
584 | defer machine_code.deinit();
585 | try std.testing.expectEqual(@as(u32, 0x1EEFE33), machine_code.items[0]);
586 | }
587 |
588 | test "addi" {
589 | const machine_code = try assemble(&std.testing.allocator, "addi t6 ra 3");
590 | defer machine_code.deinit();
591 | try std.testing.expectEqual(@as(u32, 0x308F93), machine_code.items[0]);
592 | }
593 |
594 | test "slti" {
595 | const machine_code = try assemble(&std.testing.allocator, "slti sp sp 3");
596 | defer machine_code.deinit();
597 | try std.testing.expectEqual(@as(u32, 0x312113), machine_code.items[0]);
598 | }
599 |
600 | test "sltiu" {
601 | const machine_code = try assemble(&std.testing.allocator, "sltiu a0 a0 3");
602 | defer machine_code.deinit();
603 | try std.testing.expectEqual(@as(u32, 0x353513), machine_code.items[0]);
604 | }
605 |
606 | test "xori" {
607 | const machine_code = try assemble(&std.testing.allocator, "xori a1 a1 3");
608 | defer machine_code.deinit();
609 | try std.testing.expectEqual(@as(u32, 0x35C593), machine_code.items[0]);
610 | }
611 |
612 | test "ori" {
613 | const machine_code = try assemble(&std.testing.allocator, "ori a2 a2 3");
614 | defer machine_code.deinit();
615 | try std.testing.expectEqual(@as(u32, 0x366613), machine_code.items[0]);
616 | }
617 |
618 | test "andi" {
619 | const machine_code = try assemble(&std.testing.allocator, "andi a3 a3 3");
620 | defer machine_code.deinit();
621 | try std.testing.expectEqual(@as(u32, 0x36F693), machine_code.items[0]);
622 | }
623 |
624 | test "slli" {
625 | const machine_code = try assemble(&std.testing.allocator, "slli a4 a4 3");
626 | defer machine_code.deinit();
627 | try std.testing.expectEqual(@as(u32, 0x371713), machine_code.items[0]);
628 | }
629 |
630 | test "srai" {
631 | const machine_code = try assemble(&std.testing.allocator, "srai a6 a6 3");
632 | defer machine_code.deinit();
633 | try std.testing.expectEqual(@as(u32, 0x40385813), machine_code.items[0]);
634 | }
635 |
636 | test "lb" {
637 | const machine_code = try assemble(&std.testing.allocator, "lb x19 24(x20)");
638 | defer machine_code.deinit();
639 | try std.testing.expectEqual(@as(u32, 0x018A0983), machine_code.items[0]);
640 | }
641 |
642 | test "lh" {
643 | const machine_code = try assemble(&std.testing.allocator, "lh x21 -32(x22)");
644 | defer machine_code.deinit();
645 | try std.testing.expectEqual(@as(u32, 0xFE0B1A83), machine_code.items[0]);
646 | }
647 |
648 | test "lw" {
649 | const machine_code = try assemble(&std.testing.allocator, "lw x23 64(x24)");
650 | defer machine_code.deinit();
651 | try std.testing.expectEqual(@as(u32, 0x040C2B83), machine_code.items[0]);
652 | }
653 |
654 | test "lbu" {
655 | const machine_code = try assemble(&std.testing.allocator, "lbu x25 16(x26)");
656 | defer machine_code.deinit();
657 | try std.testing.expectEqual(@as(u32, 0x010D4C83), machine_code.items[0]);
658 | }
659 |
660 | test "lhu" {
661 | const machine_code = try assemble(&std.testing.allocator, "lhu x27 -128(x28)");
662 | defer machine_code.deinit();
663 | try std.testing.expectEqual(@as(u32, 0xF80E5D83), machine_code.items[0]);
664 | }
665 |
666 | test "sb" {
667 | const machine_code = try assemble(&std.testing.allocator, "sb s4 0(s4)");
668 | defer machine_code.deinit();
669 | try std.testing.expectEqual(@as(u32, 0x14a0023), machine_code.items[0]);
670 | }
671 |
672 | test "sh" {
673 | const machine_code = try assemble(&std.testing.allocator, "sh s5 2(s5)");
674 | defer machine_code.deinit();
675 | try std.testing.expectEqual(@as(u32, 0x15a9123), machine_code.items[0]);
676 | }
677 |
678 | test "sw" {
679 | const machine_code = try assemble(&std.testing.allocator, "sw s6 3(s6)");
680 | defer machine_code.deinit();
681 | try std.testing.expectEqual(@as(u32, 0x16b21a3), machine_code.items[0]);
682 | }
683 |
684 | // TODO: these are more than likely broken, ai generated. fix them
685 |
686 | // test "beq" {
687 | // const source =
688 | // \\beq s7 s7 label
689 | // \\ addi x0 x0 0
690 | // \\ addi x0 x0 0
691 | // \\label:
692 | // \\ addi x0 x0 0
693 | // \\
694 | // ;
695 | // const machine_code = try assemble(&std.testing.allocator, source);
696 | // defer machine_code.deinit();
697 |
698 | // try std.testing.expectEqual(@as(u32, 0x17b8163), machine_code.items[0]);
699 |
700 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]);
701 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]);
702 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]);
703 | // }
704 |
705 | // test "bne" {
706 | // const source =
707 | // \\bne t0 t0 label
708 | // \\ addi x0 x0 0
709 | // \\ addi x0 x0 0
710 | // \\label:
711 | // \\ addi x0 x0 0
712 | // \\
713 | // ;
714 | // const machine_code = try assemble(&std.testing.allocator, source);
715 | // defer machine_code.deinit();
716 |
717 | // try std.testing.expectEqual(@as(u32, 0x529163), machine_code.items[0]);
718 |
719 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]);
720 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]);
721 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]);
722 | // }
723 |
724 | // test "blt" {
725 | // const source =
726 | // \\blt t1 t1 label
727 | // \\ addi x0 x0 0
728 | // \\ addi x0 x0 0
729 | // \\label:
730 | // \\ addi x0 x0 0
731 | // \\
732 | // ;
733 | // const machine_code = try assemble(&std.testing.allocator, source);
734 | // defer machine_code.deinit();
735 |
736 | // try std.testing.expectEqual(@as(u32, 0x634263), machine_code.items[0]);
737 |
738 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]);
739 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]);
740 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]);
741 | // }
742 |
743 | // test "bge" {
744 | // const source =
745 | // \\bge t2 t2 label
746 | // \\ addi x0 x0 0
747 | // \\ addi x0 x0 0
748 | // \\label:
749 | // \\ addi x0 x0 0
750 | // \\
751 | // ;
752 | // const machine_code = try assemble(&std.testing.allocator, source);
753 | // defer machine_code.deinit();
754 |
755 | // try std.testing.expectEqual(@as(u32, 0x73d163), machine_code.items[0]);
756 |
757 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]);
758 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]);
759 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]);
760 | // }
761 |
762 | // test "bltu" {
763 | // const source =
764 | // \\bltu t3 t3 label
765 | // \\ addi x0 x0 0
766 | // \\ addi x0 x0 0
767 | // \\label:
768 | // \\ addi x0 x0 0
769 | // \\
770 | // ;
771 | // const machine_code = try assemble(&std.testing.allocator, source);
772 | // defer machine_code.deinit();
773 |
774 | // try std.testing.expectEqual(@as(u32, 0x1ce6163), machine_code.items[0]);
775 |
776 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]);
777 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]);
778 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]);
779 | // }
780 |
781 | // test "bgeu" {
782 | // const source =
783 | // \\bgeu t4 t4 label
784 | // \\ addi x0 x0 0
785 | // \\ addi x0 x0 0
786 | // \\label:
787 | // \\ addi x0 x0 0
788 | // \\
789 | // ;
790 | // const machine_code = try assemble(&std.testing.allocator, source);
791 | // defer machine_code.deinit();
792 |
793 | // try std.testing.expectEqual(@as(u32, 0x1def163), machine_code.items[0]);
794 |
795 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]);
796 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]);
797 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]);
798 | // }
799 |
800 | test "lui" {
801 | const machine_code = try assemble(&std.testing.allocator, "lui t5 3");
802 | defer machine_code.deinit();
803 | try std.testing.expectEqual(@as(u32, 0x3f37), machine_code.items[0]);
804 | }
805 |
806 | test "auipc" {
807 | const machine_code = try assemble(&std.testing.allocator, "auipc t6 3");
808 | defer machine_code.deinit();
809 | try std.testing.expectEqual(@as(u32, 0x3f97), machine_code.items[0]);
810 | }
811 |
812 | // test "jal" {
813 | // const machine_code = try assemble(&std.testing.allocator, "jal ra 0");
814 | // defer machine_code.deinit();
815 | // try std.testing.expectEqual(@as(u32, 0xef), machine_code.items[0]);
816 | // }
817 |
818 | // test "jalr" {
819 | // const machine_code = try assemble(&std.testing.allocator, "jalr sp sp 3");
820 | // defer machine_code.deinit();
821 | // try std.testing.expectEqual(@as(u32, 0x310167), machine_code.items[0]);
822 | // }
823 |
--------------------------------------------------------------------------------