├── assembler ├── .gitignore ├── README.md ├── src │ ├── instruction-types.zig │ ├── instruction-getters.zig │ └── main.zig ├── build.zig └── build.zig.zon ├── README.md ├── compiler ├── .gitignore ├── README.md ├── build.zig ├── src │ ├── frontend │ │ ├── tokens.zig │ │ ├── semantic-analysis.zig │ │ ├── semantic │ │ │ ├── loop-labeling.zig │ │ │ ├── type-checking.zig │ │ │ └── identifier-resolution.zig │ │ ├── lexer.zig │ │ └── parser.zig │ ├── diagnostics.zig │ ├── ast │ │ ├── c.zig │ │ └── asm.zig │ ├── backend │ │ └── emission.zig │ ├── middleend │ │ ├── register-allocator.zig │ │ └── gen.zig │ ├── prettyprinter.zig │ ├── testing.zig │ └── main.zig └── build.zig.zon ├── run.sh ├── sh ├── assembler.sh ├── compiler.sh └── sim.sh ├── .gitignore ├── sim.sv ├── src ├── memory.sv ├── program_counter.sv ├── register_file.sv ├── instruction_memory.sv ├── top.sv ├── alu.sv └── decoder.sv ├── flake.nix ├── tests.sh └── flake.lock /assembler/.gitignore: -------------------------------------------------------------------------------- 1 | /.zig-cache 2 | zig-out -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Dragonfruit

3 | -------------------------------------------------------------------------------- /compiler/.gitignore: -------------------------------------------------------------------------------- 1 | .zig-cache 2 | zig-out 3 | sb.lox 4 | sb.c 5 | sb.asm -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | DEBUG=${DEBUG:-0} 6 | 7 | sh sh/compiler.sh 8 | sh sh/assembler.sh 9 | sh sh/sim.sh -------------------------------------------------------------------------------- /sh/assembler.sh: -------------------------------------------------------------------------------- 1 | rm -f program 2 | 3 | cd assembler 4 | zig build 5 | cd .. 6 | 7 | ./assembler/zig-out/bin/assembler program.asm program -------------------------------------------------------------------------------- /sh/compiler.sh: -------------------------------------------------------------------------------- 1 | DEBUG=${DEBUG:-0} 2 | 3 | cd compiler 4 | zig build 5 | cd .. 6 | 7 | DEBUG=$DEBUG ./compiler/zig-out/bin/compiler program.c -------------------------------------------------------------------------------- /sh/sim.sh: -------------------------------------------------------------------------------- 1 | rm -rf build 2 | rm -f *.vcd 3 | mkdir -p build 4 | 5 | iverilog -g2012 -o "build/sim_test" src/*.sv "sim.sv" 6 | vvp "build/sim_test" 7 | gtkwave sim.vcd 8 | 9 | rm -rf build 10 | rm sim.vcd -------------------------------------------------------------------------------- /assembler/README.md: -------------------------------------------------------------------------------- 1 | # RV32I Assembler in zig 2 | 3 | Part of [RV32I](https://github.com/oxrinz/rv32i) 4 | 5 | Fully working and fleshed out RV32I assembler 6 | 7 | Usage: 8 | ```bash 9 | zig build run -- out.asm program 10 | ``` 11 | 12 | First argument is input, second is output -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | 3 | program 4 | program.asm 5 | program.c 6 | 7 | OpenTimer 8 | 9 | **/.zig-cache/ 10 | **/zig-out/ 11 | **/release/ 12 | **/debug/ 13 | **/build/ 14 | **/build-*/ 15 | 16 | # some temp files that might accidentally get leaved in, don't remove even if they don't exist 17 | *.vcd 18 | *.vvp 19 | out 20 | ROADMAP.md 21 | temp 22 | -------------------------------------------------------------------------------- /sim.sv: -------------------------------------------------------------------------------- 1 | module sim; 2 | 3 | reg clk; 4 | wire [31:0] pc_out; 5 | wire [31:0] instr; 6 | 7 | top dut (.clk(clk)); 8 | 9 | initial begin 10 | clk = 0; 11 | forever #1 clk = ~clk; 12 | end 13 | 14 | initial begin 15 | $dumpfile("sim.vcd"); 16 | $dumpvars(0, sim); 17 | 18 | #1200; 19 | 20 | $display("Simulation completed successfully"); 21 | $finish; 22 | end 23 | 24 | endmodule 25 | -------------------------------------------------------------------------------- /src/memory.sv: -------------------------------------------------------------------------------- 1 | module memory( 2 | input wire clk, 3 | input wire [31:0] addr, 4 | input wire [31:0] data, 5 | input wire read, 6 | input wire write, 7 | output reg [31:0] data_out 8 | ); 9 | reg [31:0] memory[0:1023]; 10 | 11 | always @(*) begin 12 | if (read == 1) begin 13 | data_out = memory[addr]; 14 | end 15 | 16 | if (write == 1) begin 17 | memory[addr] = data; 18 | end 19 | end 20 | 21 | endmodule -------------------------------------------------------------------------------- /src/program_counter.sv: -------------------------------------------------------------------------------- 1 | module program_counter ( 2 | input wire clk, 3 | input wire rst, 4 | input wire enable, 5 | input wire load, 6 | input wire [31:0] addr, 7 | output reg [31:0] pc 8 | ); 9 | 10 | always @(posedge clk or posedge rst) begin 11 | if (rst) begin 12 | pc <= 32'b0; 13 | end else if (enable) begin 14 | if (load) begin 15 | pc <= addr; 16 | end else begin 17 | pc <= pc + 32'd1; 18 | end 19 | end 20 | end 21 | 22 | endmodule 23 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs = { 3 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; 4 | flake-utils.url = "github:numtide/flake-utils"; 5 | }; 6 | 7 | outputs = { self, nixpkgs, flake-utils }: 8 | flake-utils.lib.eachDefaultSystem (system: 9 | let pkgs = nixpkgs.legacyPackages.${system}; 10 | in { 11 | devShells.default = pkgs.mkShell { 12 | buildInputs = with pkgs; [ 13 | zig 14 | iverilog 15 | gtkwave 16 | yosys 17 | tcl 18 | 19 | gnumake 20 | gcc 21 | ]; 22 | 23 | shellHook = '' 24 | export TCL_TCLSH=${pkgs.tcl}/bin/tclsh 25 | ''; 26 | }; 27 | }); 28 | } 29 | -------------------------------------------------------------------------------- /compiler/README.md: -------------------------------------------------------------------------------- 1 | # RV32I Compiler 2 | 3 | Compiles a silly made up language (c) to RV32I assembly. To be used with [rv32i-asm](https://github.com/oxrinz/rv32i-asm). Part of [rv32i](https://github.com/oxrinz/rv32i) 4 | 5 | --- 6 | 7 | ## Currently supports 8 | - [x] Precedence climbing 9 | - [x] Add, sub, mul, div 10 | - [x] Bitwise ops 11 | - [x] Logical ops 12 | - [x] Short-circuiting 13 | - [x] Variables 14 | - [x] In-place operators 15 | - [x] If statements 16 | - [ ] ? operator (probably won't be implemented) 17 | - [x] Correct scoping 18 | - [x] Loops 19 | - [ ] Switch cases (also most likely won't be implemented) 20 | - [ ] Functions 21 | - [ ] Multiple files 22 | - [ ] Long int 23 | - [ ] Unsigned int 24 | - [ ] Pointers 25 | - [ ] Arrays 26 | - [ ] Strings & characters 27 | - [ ] Memory allocation 28 | - [ ] Structs -------------------------------------------------------------------------------- /src/register_file.sv: -------------------------------------------------------------------------------- 1 | module register_file ( 2 | input clk, 3 | input rst_n, 4 | 5 | input [ 4:0] rs1_addr, 6 | input [ 4:0] rs2_addr, 7 | output [31:0] rs1_data, 8 | output [31:0] rs2_data, 9 | 10 | input we, 11 | input [ 4:0] rd_addr, 12 | input [31:0] rd_data 13 | ); 14 | 15 | reg [31:0] registers[31:0]; 16 | reg [ 2:0] flags [ 2:0]; 17 | 18 | assign rs1_data = (rs1_addr == 0) ? 32'b0 : registers[rs1_addr]; 19 | assign rs2_data = (rs2_addr == 0) ? 32'b0 : registers[rs2_addr]; 20 | 21 | always @(posedge clk or negedge rst_n) begin 22 | if (!rst_n) begin 23 | integer i; 24 | for (i = 0; i < 32; i = i + 1) begin 25 | registers[i] <= 32'b0; 26 | end 27 | 28 | registers[2] = 32'h3FF; 29 | 30 | end else if (we && rd_addr != 0) begin 31 | registers[rd_addr] <= rd_data; 32 | end 33 | end 34 | 35 | endmodule 36 | -------------------------------------------------------------------------------- /assembler/src/instruction-types.zig: -------------------------------------------------------------------------------- 1 | pub const RTypeInstruction = enum { 2 | ADD, 3 | SUB, 4 | SLL, 5 | SLT, 6 | SLTU, 7 | XOR, 8 | SRL, 9 | SRA, 10 | OR, 11 | AND, 12 | 13 | // m extension 14 | MUL, 15 | MULH, 16 | MULSU, 17 | MULU, 18 | DIV, 19 | DIVU, 20 | REM, 21 | REMU, 22 | }; 23 | 24 | pub const ITypeInstruction = enum { 25 | ADDI, 26 | SLTI, 27 | SLTIU, 28 | XORI, 29 | ORI, 30 | ANDI, 31 | SLLI, 32 | SRLI, 33 | SRAI, 34 | LB, 35 | LH, 36 | LW, 37 | LBU, 38 | LHU, 39 | JALR, 40 | }; 41 | 42 | pub const STypeInstruction = enum { 43 | SB, 44 | SH, 45 | SW, 46 | }; 47 | 48 | pub const BTypeInstruction = enum { 49 | BEQ, 50 | BNE, 51 | BLT, 52 | BGE, 53 | BLTU, 54 | BGEU, 55 | }; 56 | 57 | pub const UTypeInstruction = enum { 58 | LUI, 59 | AUIPC, 60 | }; 61 | 62 | pub const JTypeInstruction = enum { 63 | JAL, 64 | }; 65 | -------------------------------------------------------------------------------- /assembler/build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn build(b: *std.Build) void { 4 | const target = b.standardTargetOptions(.{}); 5 | const optimize = b.standardOptimizeOption(.{}); 6 | 7 | const exe = b.addExecutable(.{ 8 | .name = "assembler", 9 | .root_source_file = b.path("src/main.zig"), 10 | .target = target, 11 | .optimize = optimize, 12 | }); 13 | 14 | b.installArtifact(exe); 15 | 16 | const run_cmd = b.addRunArtifact(exe); 17 | 18 | run_cmd.step.dependOn(b.getInstallStep()); 19 | 20 | if (b.args) |args| { 21 | run_cmd.addArgs(args); 22 | } 23 | 24 | const run_step = b.step("run", "Run the app"); 25 | run_step.dependOn(&run_cmd.step); 26 | 27 | const exe_unit_tests = b.addTest(.{ 28 | .root_source_file = b.path("src/main.zig"), 29 | .target = target, 30 | .optimize = optimize, 31 | }); 32 | 33 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); 34 | 35 | const test_step = b.step("test", "Run unit tests"); 36 | test_step.dependOn(&run_exe_unit_tests.step); 37 | } 38 | -------------------------------------------------------------------------------- /tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "${DEBUG}" = "1" ]; then 4 | IVERILOG_FLAGS="-DDEBUG" 5 | else 6 | IVERILOG_FLAGS="" 7 | fi 8 | 9 | run_tests() { 10 | local component=$1 11 | echo -e "\n================================\nRunning ${component} tests..." 12 | cd $component 13 | zig build test 14 | local test_result=$? 15 | cd .. 16 | if [ $test_result -eq 0 ]; then 17 | echo "✓ ${component} tests passed" 18 | else 19 | echo "✗ ${component} tests failed with exit code ${test_result}" 20 | fi 21 | echo -e "================================" 22 | return $test_result 23 | } 24 | 25 | 26 | overall_status=0 27 | run_tests "compiler" 28 | compiler_status=$? 29 | overall_status=$((overall_status + compiler_status)) 30 | run_tests "assembler" 31 | assembler_status=$? 32 | overall_status=$((overall_status + assembler_status)) 33 | 34 | rm -rf test_results 35 | rm -rf build 36 | echo -e "\nTest Summary:" 37 | echo "Compiler tests: $([ $compiler_status -eq 0 ] && echo "PASSED" || echo "FAILED")" 38 | echo "Assembler tests: $([ $assembler_status -eq 0 ] && echo "PASSED" || echo "FAILED")" 39 | exit $overall_status -------------------------------------------------------------------------------- /compiler/build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn build(b: *std.Build) void { 4 | const target = b.standardTargetOptions(.{}); 5 | const optimize = b.standardOptimizeOption(.{}); 6 | 7 | const exe = b.addExecutable(.{ 8 | .name = "compiler", 9 | .root_source_file = b.path("src/main.zig"), 10 | .target = target, 11 | .optimize = optimize, 12 | }); 13 | 14 | b.installArtifact(exe); 15 | 16 | const run_cmd = b.addRunArtifact(exe); 17 | 18 | run_cmd.step.dependOn(b.getInstallStep()); 19 | 20 | if (b.args) |args| { 21 | run_cmd.addArgs(args); 22 | } 23 | 24 | const run_step = b.step("run", "Run the app"); 25 | run_step.dependOn(&run_cmd.step); 26 | 27 | const exe_unit_tests = b.addTest(.{ 28 | .root_source_file = b.path("src/main.zig"), 29 | .target = target, 30 | .optimize = optimize, 31 | }); 32 | 33 | const testing_module = b.addModule("testing", .{ 34 | .root_source_file = .{ .cwd_relative = "src/testing.zig" }, 35 | }); 36 | 37 | exe_unit_tests.root_module.addImport("testing", testing_module); 38 | 39 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); 40 | 41 | const test_step = b.step("test", "Run unit tests"); 42 | test_step.dependOn(&run_exe_unit_tests.step); 43 | } 44 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "inputs": { 5 | "systems": "systems" 6 | }, 7 | "locked": { 8 | "lastModified": 1731533236, 9 | "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", 10 | "owner": "numtide", 11 | "repo": "flake-utils", 12 | "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", 13 | "type": "github" 14 | }, 15 | "original": { 16 | "owner": "numtide", 17 | "repo": "flake-utils", 18 | "type": "github" 19 | } 20 | }, 21 | "nixpkgs": { 22 | "locked": { 23 | "lastModified": 1738961098, 24 | "narHash": "sha256-yWNBf6VDW38tl179FEuJ0qukthVfB02kv+mRsfUsWC0=", 25 | "owner": "NixOS", 26 | "repo": "nixpkgs", 27 | "rev": "a3eaf5e8eca7cab680b964138fb79073704aca75", 28 | "type": "github" 29 | }, 30 | "original": { 31 | "owner": "NixOS", 32 | "ref": "nixos-unstable", 33 | "repo": "nixpkgs", 34 | "type": "github" 35 | } 36 | }, 37 | "root": { 38 | "inputs": { 39 | "flake-utils": "flake-utils", 40 | "nixpkgs": "nixpkgs" 41 | } 42 | }, 43 | "systems": { 44 | "locked": { 45 | "lastModified": 1681028828, 46 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 47 | "owner": "nix-systems", 48 | "repo": "default", 49 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 50 | "type": "github" 51 | }, 52 | "original": { 53 | "owner": "nix-systems", 54 | "repo": "default", 55 | "type": "github" 56 | } 57 | } 58 | }, 59 | "root": "root", 60 | "version": 7 61 | } 62 | -------------------------------------------------------------------------------- /compiler/src/frontend/tokens.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const Allocator = std.mem.Allocator; 3 | 4 | pub fn is_binary_operator(token: TokenType) bool { 5 | switch (token) { 6 | .PLUS, .MINUS, .STAR, .SLASH, .PERCENTAGE, .AMPERSAND, .PIPE, .CARET, .LEFT_SHIFT, .RIGHT_SHIFT, .AMPERSAND_AMPERSAND, .PIPE_PIPE, .BANG, .BANG_EQUAL, .EQUAL, .EQUAL_EQUAL, .GREATER, .GREATER_EQUAL, .LESS, .LESS_EQUAL => return true, 7 | else => return false, 8 | } 9 | } 10 | 11 | pub fn is_in_place_starter(token: TokenType) bool { 12 | switch (token) { 13 | .PLUS, .MINUS, .STAR, .SLASH, .PERCENTAGE, .AMPERSAND, .PIPE, .CARET, .LEFT_SHIFT, .RIGHT_SHIFT => return true, 14 | else => return false, 15 | } 16 | } 17 | 18 | pub const TokenType = enum { 19 | LEFT_PAREN, 20 | RIGHT_PAREN, 21 | LEFT_BRACE, 22 | RIGHT_BRACE, 23 | 24 | COMMA, 25 | DOT, 26 | 27 | MINUS, 28 | PLUS, 29 | 30 | SEMICOLON, 31 | SLASH, 32 | STAR, 33 | PERCENTAGE, 34 | 35 | BANG, 36 | BANG_EQUAL, 37 | EQUAL, 38 | EQUAL_EQUAL, 39 | GREATER, 40 | GREATER_EQUAL, 41 | LESS, 42 | LESS_EQUAL, 43 | 44 | IDENTIFIER, 45 | STRING, 46 | NUMBER, 47 | 48 | INT, 49 | IF, 50 | ELSE, 51 | VOID, 52 | RETURN, 53 | 54 | QUESTION_MARK, 55 | COLON, 56 | 57 | AMPERSAND, 58 | AMPERSAND_AMPERSAND, 59 | PIPE, 60 | PIPE_PIPE, 61 | CARET, 62 | LEFT_SHIFT, 63 | RIGHT_SHIFT, 64 | 65 | WHILE, 66 | DO, 67 | FOR, 68 | BREAK, 69 | CONTINUE, 70 | }; 71 | 72 | pub const Literal = union(enum) { string: []const u8, number: i32 }; 73 | 74 | pub const Token = struct { 75 | type: TokenType, 76 | literal: ?Literal, 77 | line: usize, 78 | 79 | pub fn init(token_type: TokenType, literal: ?Literal, line: usize) Token { 80 | return .{ 81 | .type = token_type, 82 | .literal = literal, 83 | .line = line, 84 | }; 85 | } 86 | }; 87 | -------------------------------------------------------------------------------- /compiler/src/diagnostics.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 4 | const allocator = arena.allocator(); 5 | 6 | var messages = std.ArrayList(Diagnostic).init(allocator); 7 | 8 | const Diagnostic = struct { 9 | message: []const u8, 10 | line: ?usize, 11 | type: enum { 12 | PANIC, 13 | WARNING, 14 | ERROR, 15 | }, 16 | }; 17 | 18 | const ColorCode = struct { 19 | const reset = "\x1b[0m"; 20 | const red = "\x1b[31m"; 21 | const yellow = "\x1b[33m"; 22 | const magenta = "\x1b[35m"; 23 | }; 24 | 25 | fn printMsg(msg: Diagnostic) !void { 26 | const stderr = std.io.getStdErr().writer(); 27 | switch (msg.type) { 28 | .PANIC => { 29 | try stderr.print("{s}PANIC:{s}", .{ ColorCode.magenta, ColorCode.reset }); 30 | }, 31 | .WARNING => try stderr.print("{s}WARNING{s}", .{ ColorCode.yellow, ColorCode.reset }), 32 | .ERROR => { 33 | try stderr.print("{s}ERROR{s}", .{ ColorCode.red, ColorCode.reset }); 34 | }, 35 | } 36 | 37 | const lineInfo = if (msg.line) |line| 38 | try std.fmt.allocPrint(messages.allocator, "line {d}: ", .{line}) 39 | else 40 | ""; 41 | defer if (msg.line != null) messages.allocator.free(lineInfo); 42 | 43 | try stderr.print(" {s}{s}\n", .{ lineInfo, msg.message }); 44 | } 45 | 46 | pub fn printAll() void { 47 | for (messages.items) |msg| { 48 | printMsg(msg) catch @panic("Failed to print error messages"); 49 | } 50 | } 51 | 52 | pub fn addPanic(message: []const u8, line: ?usize) void { 53 | messages.append(.{ 54 | .message = message, 55 | .line = line, 56 | .type = .PANIC, 57 | }) catch @panic("Failed to append panic message"); 58 | } 59 | 60 | pub fn addError(message: []const u8, line: ?usize) void { 61 | messages.append(.{ 62 | .message = message, 63 | .line = line, 64 | .type = .ERROR, 65 | }) catch @panic("Failed to append panic message"); 66 | } 67 | 68 | pub fn addWarning(message: []const u8, line: ?usize) void { 69 | messages.append(.{ 70 | .message = message, 71 | .line = line, 72 | .type = .WARNING, 73 | }) catch @panic("Failed to append panic message"); 74 | } 75 | -------------------------------------------------------------------------------- /compiler/src/frontend/semantic-analysis.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("../ast/c.zig"); 3 | const IdentifierResolution = @import("semantic/identifier-resolution.zig").IdentifierResolution; 4 | const LoopLabeling = @import("semantic/loop-labeling.zig").LoopLabeling; 5 | const TypeChecking = @import("semantic/type-checking.zig").TypeChecking; 6 | const testing = @import("../testing.zig"); 7 | 8 | pub const SemanticAnalysis = struct { 9 | allocator: std.mem.Allocator, 10 | pub fn init(allocator: std.mem.Allocator) SemanticAnalysis { 11 | return .{ .allocator = allocator }; 12 | } 13 | 14 | pub fn analyze(self: *SemanticAnalysis, program: c_ast.Program) !c_ast.Program { 15 | var variable_resolution = IdentifierResolution.init(self.allocator); 16 | var loop_labeling = LoopLabeling.init(self.allocator); 17 | var type_checking = TypeChecking.init(self.allocator); 18 | 19 | return try type_checking.check(try loop_labeling.label(try variable_resolution.resolve(program))); 20 | } 21 | }; 22 | 23 | test "function parameter duplicate definition" { 24 | const input = 25 | \\int main() 26 | \\{ 27 | \\ int foo(int a, int a); 28 | \\} 29 | \\ 30 | ; 31 | 32 | const result = testing.cToSemanticAnalysis(input); 33 | try testing.expectError(error.DuplicateDefinition, result); 34 | } 35 | 36 | test "incompatible declarations" { 37 | const input = 38 | \\int main() { 39 | \\ int foo(int a); 40 | \\ return foo(1); 41 | \\} 42 | \\ 43 | \\int foo(int a, int b); 44 | ; 45 | 46 | const result = testing.cToSemanticAnalysis(input); 47 | try testing.expectError(error.IncompatibleFunctionDeclarations, result); 48 | } 49 | 50 | test "variable used as function" { 51 | const input = 52 | \\int main() 53 | \\{ 54 | \\ int x = 3; 55 | \\ return x(); 56 | \\} 57 | ; 58 | 59 | const result = testing.cToSemanticAnalysis(input); 60 | try testing.expectError(error.VariableUsedAsFunction, result); 61 | } 62 | 63 | test "function already defined" { 64 | const input = 65 | \\int x(int a) { 66 | \\ return a; 67 | \\} 68 | \\ 69 | \\int x(int a) { 70 | \\ return a; 71 | \\} 72 | \\ 73 | \\int main() 74 | \\{ 75 | \\ return x(); 76 | \\} 77 | ; 78 | 79 | const result = testing.cToSemanticAnalysis(input); 80 | try testing.expectError(error.FunctionAlreadyDefined, result); 81 | } 82 | -------------------------------------------------------------------------------- /src/instruction_memory.sv: -------------------------------------------------------------------------------- 1 | module instruction_memory ( 2 | input wire [31:0] addr, 3 | output reg [31:0] instr_out 4 | ); 5 | reg [31:0] memory[0:1023]; 6 | 7 | `ifndef SYNTHESIS 8 | // SIMULATION CODE - will be used with iverilog 9 | integer i; 10 | integer file; 11 | integer status; 12 | reg continue_reading; 13 | 14 | initial begin 15 | // Initialize memory with zeros 16 | for (i = 0; i < 1024; i = i + 1) begin 17 | memory[i] = 32'h0; 18 | end 19 | 20 | // Load program from binary file 21 | file = $fopen("program", "rb"); 22 | if (file == 0) begin 23 | $display("Error: Failed to open file"); 24 | $finish; 25 | end 26 | 27 | i = 0; 28 | continue_reading = 1; 29 | while (continue_reading && i < 1024) begin 30 | status = $fgetc(file); 31 | if (status != -1) begin 32 | memory[i][31:24] = status; 33 | status = $fgetc(file); 34 | if (status != -1) begin 35 | memory[i][23:16] = status; 36 | status = $fgetc(file); 37 | if (status != -1) begin 38 | memory[i][15:8] = status; 39 | status = $fgetc(file); 40 | if (status != -1) begin 41 | memory[i][7:0] = status; 42 | end else begin 43 | memory[i][7:0] = 8'h00; 44 | continue_reading = 0; 45 | end 46 | end else begin 47 | memory[i][15:0] = 16'h0000; 48 | continue_reading = 0; 49 | end 50 | end else begin 51 | memory[i][23:0] = 24'h000000; 52 | continue_reading = 0; 53 | end 54 | i = i + 1; 55 | end else begin 56 | continue_reading = 0; 57 | end 58 | end 59 | $fclose(file); 60 | 61 | `ifdef DEBUG 62 | $display("Loaded %0d 32-bit instructions", i); 63 | $display("Memory initialization complete"); 64 | `endif 65 | end 66 | `else 67 | // SYNTHESIS CODE - will be used with Yosys 68 | initial begin 69 | // For synthesis, either leave memory uninitialized (RAM will be inferred) 70 | // or provide a few default instructions if needed 71 | memory[0] = 32'h00000013; // NOP (addi x0, x0, 0) 72 | // Add more initialization if needed 73 | end 74 | `endif 75 | 76 | always @(*) begin 77 | instr_out = memory[addr]; 78 | end 79 | endmodule -------------------------------------------------------------------------------- /src/top.sv: -------------------------------------------------------------------------------- 1 | module top ( 2 | input wire clk 3 | ); 4 | 5 | // instr fetch signals 6 | wire [31:0] instr; 7 | wire pc_enable; 8 | wire pc_load; 9 | wire [31:0] load_addr; 10 | wire [31:0] pc_out; 11 | 12 | // control signals from decoder 13 | wire [3:0] alu_ops; 14 | wire reg_write; 15 | wire mem_read; 16 | wire mem_write; 17 | wire [31:0] mem_addr; 18 | wire [31:0] mem_out; 19 | wire [1:0] mem_width; 20 | wire is_branch; 21 | wire is_jump; 22 | wire is_jalr; 23 | wire is_i_type; 24 | wire is_i_load_type; 25 | wire is_store; 26 | wire [4:0] rs1; 27 | wire [4:0] rs2; 28 | wire [31:0] rs1_data; 29 | wire [31:0] rs2_data; 30 | wire [4:0] rd; 31 | wire [31:0] rd_data; 32 | wire [31:0] imm; 33 | wire is_lui; 34 | 35 | // pc control 36 | assign pc_enable = 1; 37 | 38 | // reset logic 39 | reg rst; 40 | initial begin 41 | rst = 1'b1; 42 | #1 rst = 1'b0; 43 | end 44 | 45 | // fetch 46 | 47 | program_counter pc_inst ( 48 | .clk(clk), 49 | .rst(rst), 50 | .enable(pc_enable), 51 | .load(pc_load), 52 | .addr(load_addr), 53 | .pc(pc_out) 54 | ); 55 | 56 | instruction_memory instr_mem ( 57 | .addr(pc_out), 58 | .instr_out(instr) 59 | ); 60 | 61 | memory memory_inst ( 62 | .clk(clk), 63 | .addr(mem_addr), 64 | .data(rs2_data), 65 | .read(mem_read), 66 | .write(mem_write), 67 | .data_out(mem_out) 68 | ); 69 | 70 | alu alu_inst ( 71 | .clk(clk), 72 | .is_lui(is_lui), 73 | .imm(imm), 74 | .rd_data(rd_data), 75 | .is_branch(is_branch), 76 | .is_i_type(is_i_type), 77 | .is_i_load_type(is_i_load_type), 78 | .rs1_data(rs1_data), 79 | .rs2_data(rs2_data), 80 | .alu_ops(alu_ops), 81 | .pc_data(pc_out), 82 | .pc_load(pc_load), 83 | .new_pc_data(load_addr), 84 | .is_store(is_store), 85 | .mem_addr(mem_addr), 86 | .mem_data(mem_out) 87 | ); 88 | 89 | decoder decoder_inst ( 90 | .instr(instr), 91 | .alu_ops(alu_ops), 92 | .reg_write(reg_write), 93 | .mem_read(mem_read), 94 | .mem_write(mem_write), 95 | .mem_width(mem_width), 96 | .is_branch(is_branch), 97 | .rs1(rs1), 98 | .rs2(rs2), 99 | .rd(rd), 100 | .imm(imm), 101 | .is_lui(is_lui), 102 | .is_i_type(is_i_type), 103 | .is_i_load_type(is_i_load_type), 104 | .is_store(is_store) 105 | ); 106 | 107 | register_file regfile_inst ( 108 | .clk(clk), 109 | // .rst_n(rst), 110 | .rs1_addr(rs1), 111 | .rs2_addr(rs2), 112 | .rs1_data(rs1_data), 113 | .rs2_data(rs2_data), 114 | 115 | .we(reg_write), 116 | .rd_addr(rd), 117 | .rd_data(rd_data) 118 | ); 119 | 120 | endmodule 121 | -------------------------------------------------------------------------------- /compiler/src/frontend/semantic/loop-labeling.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("../../ast/c.zig"); 3 | 4 | pub const LoopLabeling = struct { 5 | allocator: std.mem.Allocator, 6 | counter: usize, 7 | 8 | pub fn init(allocator: std.mem.Allocator) LoopLabeling { 9 | return .{ 10 | .allocator = allocator, 11 | .counter = 0, 12 | }; 13 | } 14 | 15 | pub fn label(self: *LoopLabeling, program: c_ast.Program) !c_ast.Program { 16 | var result = program; 17 | self.labelFunction(&result.function[0], null); 18 | return result; 19 | } 20 | 21 | fn getNextId(self: *LoopLabeling) []const u8 { 22 | const id = std.fmt.allocPrint(self.allocator, "loop_{d}", .{self.counter}) catch unreachable; 23 | self.counter += 1; 24 | return id; 25 | } 26 | 27 | fn labelFunction(self: *LoopLabeling, function: *c_ast.FunctionDeclaration, loop_ctx: ?[]const u8) void { 28 | for (function.body.?.block_items) |*item| { 29 | self.labelBlockItem(item, loop_ctx); 30 | } 31 | } 32 | 33 | fn labelBlockItem(self: *LoopLabeling, item: *c_ast.BlockItem, loop_ctx: ?[]const u8) void { 34 | switch (item.*) { 35 | .statement => |*stmt| self.labelStatement(stmt, loop_ctx), 36 | .declaration => {}, 37 | } 38 | } 39 | 40 | fn labelStatement(self: *LoopLabeling, stmt: *c_ast.Statement, loop_ctx: ?[]const u8) void { 41 | switch (stmt.*) { 42 | .compound => |*block| { 43 | for (block.block_items) |*item| { 44 | self.labelBlockItem(item, loop_ctx); 45 | } 46 | }, 47 | .if_ => |*if_stmt| { 48 | self.labelStatement(if_stmt.then, loop_ctx); 49 | if (if_stmt.else_) |else_stmt| { 50 | self.labelStatement(else_stmt, loop_ctx); 51 | } 52 | }, 53 | .while_ => |*while_stmt| { 54 | const label_id = self.getNextId(); 55 | while_stmt.identifier = label_id; 56 | self.labelStatement(while_stmt.body, label_id); 57 | }, 58 | .do_while => |*do_while_stmt| { 59 | const label_id = self.getNextId(); 60 | do_while_stmt.identifier = label_id; 61 | self.labelStatement(do_while_stmt.body, label_id); 62 | }, 63 | .for_ => |*for_stmt| { 64 | const label_id = self.getNextId(); 65 | for_stmt.identifier = label_id; 66 | self.labelStatement(for_stmt.body, label_id); 67 | }, 68 | .break_ => |*break_stmt| { 69 | if (loop_ctx) |ctx| { 70 | break_stmt.identifier = ctx; 71 | } 72 | }, 73 | .continue_ => |*continue_stmt| { 74 | if (loop_ctx) |ctx| { 75 | continue_stmt.identifier = ctx; 76 | } 77 | }, 78 | else => {}, 79 | } 80 | } 81 | }; 82 | -------------------------------------------------------------------------------- /src/alu.sv: -------------------------------------------------------------------------------- 1 | module alu ( 2 | input clk, 3 | input is_lui, 4 | input is_i_type, 5 | input is_i_load_type, 6 | input is_branch, 7 | input is_store, 8 | input [3:0] alu_ops, 9 | input [31:0] rs1_data, 10 | input [31:0] rs2_data, 11 | input [31:0] imm, 12 | input [31:0] pc_data, 13 | input [31:0] mem_data, 14 | output reg pc_load, 15 | output reg [31:0] rd_data, 16 | output reg [31:0] new_pc_data, 17 | output reg [31:0] mem_addr 18 | ); 19 | 20 | wire signed [31:0] rs1_signed = $signed(rs1_data); 21 | wire signed [31:0] rs2_signed = $signed(rs2_data); 22 | wire signed [31:0] imm_signed = $signed(imm); 23 | 24 | always @* begin 25 | rd_data = 32'b0; 26 | pc_load = 0; 27 | 28 | if (is_lui) begin 29 | rd_data = imm << 12; 30 | 31 | end else if (is_i_type) begin 32 | case (alu_ops) 33 | 4'b0000: rd_data = rs1_signed + imm_signed; 34 | 4'b0010: rd_data = rs1_signed ^ imm_signed; 35 | 4'b1000: rd_data = rs1_signed < imm; 36 | 4'b1011: rd_data = rs1_data < imm; 37 | endcase 38 | 39 | end else if (is_i_load_type) begin 40 | mem_addr = rs1_data + imm; 41 | rd_data = mem_data; 42 | 43 | 44 | end else if (is_branch) begin 45 | case (alu_ops) 46 | 4'b0000: 47 | if (rs1_signed == rs2_signed) begin 48 | pc_load = 1; 49 | new_pc_data = pc_data + imm_signed; 50 | end 51 | 52 | 4'b0001: 53 | if (rs1_signed != rs2_signed) begin 54 | pc_load = 1; 55 | new_pc_data = pc_data + imm_signed; 56 | end 57 | 58 | 4'b0010: 59 | if (rs1_signed < rs2_signed) begin 60 | pc_load = 1; 61 | new_pc_data = pc_data + imm_signed; 62 | end 63 | 64 | 4'b0011: 65 | if (rs1_signed >= rs2_signed) begin 66 | pc_load = 1; 67 | new_pc_data = pc_data + imm_signed; 68 | end 69 | 70 | 4'b0110: begin 71 | rd_data = pc_data + 1; 72 | pc_load = 1; 73 | new_pc_data = pc_data + imm_signed; 74 | end 75 | 76 | 4'b0111: begin 77 | rd_data = pc_data + 1; 78 | pc_load = 1; 79 | new_pc_data = rs1_data + imm_signed; 80 | end 81 | endcase 82 | 83 | end else if (is_store) begin 84 | mem_addr = rs1_data + imm; 85 | 86 | end else begin 87 | case (alu_ops) 88 | 4'b0000: rd_data = rs1_signed + rs2_signed; 89 | 4'b0001: rd_data = rs1_signed - rs2_signed; 90 | 4'b0010: rd_data = rs1_signed ^ rs2_signed; 91 | 4'b0011: rd_data = rs1_signed | rs2_signed; 92 | 4'b0100: rd_data = rs1_signed & rs2_signed; 93 | 4'b0101: rd_data = rs1_signed << rs2_signed; 94 | 4'b0110: rd_data = rs1_signed >> rs2_signed; 95 | 4'b1100: rd_data = rs1_signed * rs2_signed; 96 | 4'b1101: rd_data = rs1_signed / rs2_signed; 97 | 4'b1110: rd_data = rs1_signed % rs2_signed; 98 | 4'b1001: rd_data = rs1_signed < rs2_signed; 99 | default: rd_data = 32'b0; 100 | endcase 101 | end 102 | end 103 | 104 | endmodule 105 | -------------------------------------------------------------------------------- /compiler/src/ast/c.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub const BinaryOperator = enum { 4 | Add, 5 | Subtract, 6 | Multiply, 7 | Divide, 8 | Remainder, 9 | 10 | Bitwise_AND, 11 | Bitwise_OR, 12 | Bitwise_XOR, 13 | Left_Shift, 14 | Right_Shift, 15 | 16 | Less, 17 | Less_Or_Equal, 18 | Greater, 19 | Greater_Or_Equal, 20 | Equal, 21 | Not_Equal, 22 | And, 23 | Or, 24 | 25 | pub fn getType(op: *const BinaryOperator) enum { ARITHMETIC, BITWISE, COMPARISON, SHORT_CIRCUIT } { 26 | switch (op.*) { 27 | .Add, .Subtract, .Multiply, .Divide, .Remainder => return .ARITHMETIC, 28 | .Bitwise_AND, .Bitwise_OR, .Bitwise_XOR, .Left_Shift, .Right_Shift => return .BITWISE, 29 | .Less, .Less_Or_Equal, .Greater, .Greater_Or_Equal, .Equal, .Not_Equal => return .COMPARISON, 30 | .And, .Or => return .SHORT_CIRCUIT, 31 | } 32 | } 33 | }; 34 | 35 | pub const Binary = struct { 36 | operator: BinaryOperator, 37 | left: *Expression, 38 | right: *Expression, 39 | }; 40 | 41 | pub const Variable = struct { 42 | identifier: []const u8, 43 | }; 44 | 45 | pub const Assignment = struct { 46 | left: *Expression, 47 | right: *Expression, 48 | }; 49 | 50 | pub const FunctionCall = struct { 51 | identifier: []const u8, 52 | args: []*Expression, 53 | }; 54 | 55 | pub const Expression = union(enum) { 56 | constant: i32, 57 | binary: Binary, 58 | variable: Variable, 59 | assignment: Assignment, 60 | function_call: FunctionCall, 61 | }; 62 | 63 | pub const Return = struct { 64 | exp: Expression, 65 | }; 66 | 67 | pub const If = struct { 68 | condition: Expression, 69 | then: *Statement, 70 | else_: ?*Statement, 71 | }; 72 | 73 | pub const While = struct { 74 | condition: Expression, 75 | body: *Statement, 76 | identifier: ?[]const u8, 77 | }; 78 | 79 | pub const DoWhile = struct { 80 | condition: Expression, 81 | body: *Statement, 82 | identifier: ?[]const u8, 83 | }; 84 | 85 | pub const ForInit = union(enum) { 86 | init_decl: VariableDeclaration, 87 | init_exp: ?Expression, 88 | }; 89 | 90 | pub const For = struct { 91 | init: ForInit, 92 | condition: ?Expression, 93 | post: ?Expression, 94 | body: *Statement, 95 | identifier: ?[]const u8, 96 | }; 97 | 98 | pub const Break = struct { 99 | identifier: ?[]const u8, 100 | }; 101 | 102 | pub const Continue = struct { 103 | identifier: ?[]const u8, 104 | }; 105 | 106 | pub const Statement = union(enum) { 107 | ret: Return, 108 | exp: Expression, 109 | compound: Block, 110 | if_: If, 111 | break_: Break, 112 | continue_: Continue, 113 | while_: While, 114 | do_while: DoWhile, 115 | for_: For, 116 | }; 117 | 118 | pub const VariableDeclaration = struct { 119 | identifier: []const u8, 120 | initial: ?Expression, 121 | }; 122 | 123 | pub const FunctionDeclaration = struct { 124 | identifier: []const u8, 125 | params: [][]const u8, 126 | body: ?Block, 127 | }; 128 | 129 | pub const Declaration = union(enum) { 130 | variable_declaration: VariableDeclaration, 131 | function_declaration: FunctionDeclaration, 132 | }; 133 | 134 | pub const BlockItem = union(enum) { 135 | statement: Statement, 136 | declaration: Declaration, 137 | }; 138 | 139 | pub const Block = struct { 140 | block_items: []BlockItem, 141 | }; 142 | 143 | pub const Program = struct { 144 | function: []FunctionDeclaration, 145 | }; 146 | -------------------------------------------------------------------------------- /assembler/build.zig.zon: -------------------------------------------------------------------------------- 1 | .{ 2 | // This is the default name used by packages depending on this one. For 3 | // example, when a user runs `zig fetch --save `, this field is used 4 | // as the key in the `dependencies` table. Although the user can choose a 5 | // different name, most users will stick with this provided value. 6 | // 7 | // It is redundant to include "zig" in this name because it is already 8 | // within the Zig package namespace. 9 | .name = "assembler", 10 | 11 | // This is a [Semantic Version](https://semver.org/). 12 | // In a future version of Zig it will be used for package deduplication. 13 | .version = "0.0.0", 14 | 15 | // This field is optional. 16 | // This is currently advisory only; Zig does not yet do anything 17 | // with this value. 18 | //.minimum_zig_version = "0.11.0", 19 | 20 | // This field is optional. 21 | // Each dependency must either provide a `url` and `hash`, or a `path`. 22 | // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. 23 | // Once all dependencies are fetched, `zig build` no longer requires 24 | // internet connectivity. 25 | .dependencies = .{ 26 | // See `zig fetch --save ` for a command-line interface for adding dependencies. 27 | //.example = .{ 28 | // // When updating this field to a new URL, be sure to delete the corresponding 29 | // // `hash`, otherwise you are communicating that you expect to find the old hash at 30 | // // the new URL. 31 | // .url = "https://example.com/foo.tar.gz", 32 | // 33 | // // This is computed from the file contents of the directory of files that is 34 | // // obtained after fetching `url` and applying the inclusion rules given by 35 | // // `paths`. 36 | // // 37 | // // This field is the source of truth; packages do not come from a `url`; they 38 | // // come from a `hash`. `url` is just one of many possible mirrors for how to 39 | // // obtain a package matching this `hash`. 40 | // // 41 | // // Uses the [multihash](https://multiformats.io/multihash/) format. 42 | // .hash = "...", 43 | // 44 | // // When this is provided, the package is found in a directory relative to the 45 | // // build root. In this case the package's hash is irrelevant and therefore not 46 | // // computed. This field and `url` are mutually exclusive. 47 | // .path = "foo", 48 | 49 | // // When this is set to `true`, a package is declared to be lazily 50 | // // fetched. This makes the dependency only get fetched if it is 51 | // // actually used. 52 | // .lazy = false, 53 | //}, 54 | }, 55 | 56 | // Specifies the set of files and directories that are included in this package. 57 | // Only files and directories listed here are included in the `hash` that 58 | // is computed for this package. Only files listed here will remain on disk 59 | // when using the zig package manager. As a rule of thumb, one should list 60 | // files required for compilation plus any license(s). 61 | // Paths are relative to the build root. Use the empty string (`""`) to refer to 62 | // the build root itself. 63 | // A directory listed here means that all files within, recursively, are included. 64 | .paths = .{ 65 | "build.zig", 66 | "build.zig.zon", 67 | "src", 68 | // For example... 69 | //"LICENSE", 70 | //"README.md", 71 | }, 72 | } 73 | -------------------------------------------------------------------------------- /compiler/build.zig.zon: -------------------------------------------------------------------------------- 1 | .{ 2 | // This is the default name used by packages depending on this one. For 3 | // example, when a user runs `zig fetch --save `, this field is used 4 | // as the key in the `dependencies` table. Although the user can choose a 5 | // different name, most users will stick with this provided value. 6 | // 7 | // It is redundant to include "zig" in this name because it is already 8 | // within the Zig package namespace. 9 | .name = .compiler, 10 | .fingerprint = 0xaa62bd49671a6d1c, 11 | 12 | // This is a [Semantic Version](https://semver.org/). 13 | // In a future version of Zig it will be used for package deduplication. 14 | .version = "0.0.0", 15 | 16 | // This field is optional. 17 | // This is currently advisory only; Zig does not yet do anything 18 | // with this value. 19 | //.minimum_zig_version = "0.11.0", 20 | 21 | // This field is optional. 22 | // Each dependency must either provide a `url` and `hash`, or a `path`. 23 | // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. 24 | // Once all dependencies are fetched, `zig build` no longer requires 25 | // internet connectivity. 26 | .dependencies = .{ 27 | // See `zig fetch --save ` for a command-line interface for adding dependencies. 28 | //.example = .{ 29 | // // When updating this field to a new URL, be sure to delete the corresponding 30 | // // `hash`, otherwise you are communicating that you expect to find the old hash at 31 | // // the new URL. 32 | // .url = "https://example.com/foo.tar.gz", 33 | // 34 | // // This is computed from the file contents of the directory of files that is 35 | // // obtained after fetching `url` and applying the inclusion rules given by 36 | // // `paths`. 37 | // // 38 | // // This field is the source of truth; packages do not come from a `url`; they 39 | // // come from a `hash`. `url` is just one of many possible mirrors for how to 40 | // // obtain a package matching this `hash`. 41 | // // 42 | // // Uses the [multihash](https://multiformats.io/multihash/) format. 43 | // .hash = "...", 44 | // 45 | // // When this is provided, the package is found in a directory relative to the 46 | // // build root. In this case the package's hash is irrelevant and therefore not 47 | // // computed. This field and `url` are mutually exclusive. 48 | // .path = "foo", 49 | 50 | // // When this is set to `true`, a package is declared to be lazily 51 | // // fetched. This makes the dependency only get fetched if it is 52 | // // actually used. 53 | // .lazy = false, 54 | //}, 55 | }, 56 | 57 | // Specifies the set of files and directories that are included in this package. 58 | // Only files and directories listed here are included in the `hash` that 59 | // is computed for this package. Only files listed here will remain on disk 60 | // when using the zig package manager. As a rule of thumb, one should list 61 | // files required for compilation plus any license(s). 62 | // Paths are relative to the build root. Use the empty string (`""`) to refer to 63 | // the build root itself. 64 | // A directory listed here means that all files within, recursively, are included. 65 | .paths = .{ 66 | "build.zig", 67 | "build.zig.zon", 68 | "src", 69 | // For example... 70 | //"LICENSE", 71 | //"README.md", 72 | }, 73 | } 74 | -------------------------------------------------------------------------------- /assembler/src/instruction-getters.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const instr_types = @import("instruction-types.zig"); 3 | const RTypeInstruction = instr_types.RTypeInstruction; 4 | const ITypeInstruction = instr_types.ITypeInstruction; 5 | const STypeInstruction = instr_types.STypeInstruction; 6 | const BTypeInstruction = instr_types.BTypeInstruction; 7 | const UTypeInstruction = instr_types.UTypeInstruction; 8 | const JTypeInstruction = instr_types.JTypeInstruction; 9 | 10 | pub fn getRTypeInstruction(instruction: []const u8) !RTypeInstruction { 11 | if (std.mem.eql(u8, instruction, "add")) return .ADD; 12 | if (std.mem.eql(u8, instruction, "sub")) return .SUB; 13 | if (std.mem.eql(u8, instruction, "sll")) return .SLL; 14 | if (std.mem.eql(u8, instruction, "slt")) return .SLT; 15 | if (std.mem.eql(u8, instruction, "sltu")) return .SLTU; 16 | if (std.mem.eql(u8, instruction, "xor")) return .XOR; 17 | if (std.mem.eql(u8, instruction, "srl")) return .SRL; 18 | if (std.mem.eql(u8, instruction, "sra")) return .SRA; 19 | if (std.mem.eql(u8, instruction, "or")) return .OR; 20 | if (std.mem.eql(u8, instruction, "and")) return .AND; 21 | 22 | // m extension 23 | if (std.mem.eql(u8, instruction, "mul")) return .MUL; 24 | if (std.mem.eql(u8, instruction, "mulh")) return .MULH; 25 | if (std.mem.eql(u8, instruction, "mulsu")) return .MULSU; 26 | if (std.mem.eql(u8, instruction, "mulu")) return .MULU; 27 | if (std.mem.eql(u8, instruction, "div")) return .DIV; 28 | if (std.mem.eql(u8, instruction, "divu")) return .DIVU; 29 | if (std.mem.eql(u8, instruction, "rem")) return .REM; 30 | if (std.mem.eql(u8, instruction, "remu")) return .REMU; 31 | unreachable; 32 | } 33 | 34 | pub fn getITypeInstruction(instruction: []const u8) !ITypeInstruction { 35 | if (std.mem.eql(u8, instruction, "addi")) return .ADDI; 36 | if (std.mem.eql(u8, instruction, "slti")) return .SLTI; 37 | if (std.mem.eql(u8, instruction, "sltiu")) return .SLTIU; 38 | if (std.mem.eql(u8, instruction, "xori")) return .XORI; 39 | if (std.mem.eql(u8, instruction, "ori")) return .ORI; 40 | if (std.mem.eql(u8, instruction, "andi")) return .ANDI; 41 | if (std.mem.eql(u8, instruction, "slli")) return .SLLI; 42 | if (std.mem.eql(u8, instruction, "srli")) return .SRLI; 43 | if (std.mem.eql(u8, instruction, "srai")) return .SRAI; 44 | if (std.mem.eql(u8, instruction, "lb")) return .LB; 45 | if (std.mem.eql(u8, instruction, "lh")) return .LH; 46 | if (std.mem.eql(u8, instruction, "lw")) return .LW; 47 | if (std.mem.eql(u8, instruction, "lbu")) return .LBU; 48 | if (std.mem.eql(u8, instruction, "lhu")) return .LHU; 49 | if (std.mem.eql(u8, instruction, "jalr")) return .JALR; 50 | unreachable; 51 | } 52 | 53 | pub fn getSTypeInstruction(instruction: []const u8) !STypeInstruction { 54 | if (std.mem.eql(u8, instruction, "sb")) return .SB; 55 | if (std.mem.eql(u8, instruction, "sh")) return .SH; 56 | if (std.mem.eql(u8, instruction, "sw")) return .SW; 57 | unreachable; 58 | } 59 | 60 | pub fn getBTypeInstruction(instruction: []const u8) !BTypeInstruction { 61 | if (std.mem.eql(u8, instruction, "beq")) return .BEQ; 62 | if (std.mem.eql(u8, instruction, "bne")) return .BNE; 63 | if (std.mem.eql(u8, instruction, "blt")) return .BLT; 64 | if (std.mem.eql(u8, instruction, "bge")) return .BGE; 65 | if (std.mem.eql(u8, instruction, "bltu")) return .BLTU; 66 | if (std.mem.eql(u8, instruction, "bgeu")) return .BGEU; 67 | unreachable; 68 | } 69 | 70 | pub fn getUTypeInstruction(instruction: []const u8) !UTypeInstruction { 71 | if (std.mem.eql(u8, instruction, "lui")) return .LUI; 72 | if (std.mem.eql(u8, instruction, "auipc")) return .AUIPC; 73 | unreachable; 74 | } 75 | 76 | pub fn getJTypeInstruction(instruction: []const u8) !JTypeInstruction { 77 | if (std.mem.eql(u8, instruction, "jal")) return .JAL; 78 | unreachable; 79 | } 80 | -------------------------------------------------------------------------------- /compiler/src/backend/emission.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const asm_ast = @import("../ast/asm.zig"); 3 | 4 | pub const Emitter = struct { 5 | program: asm_ast.Program, 6 | 7 | pub fn init(program: asm_ast.Program) Emitter { 8 | return .{ .program = program }; 9 | } 10 | 11 | fn reg_to_string() []const u8 {} 12 | 13 | pub fn getAssemblyString(self: Emitter, allocator: std.mem.Allocator) ![]const u8 { 14 | var buffer = std.ArrayList(u8).init(allocator); 15 | errdefer buffer.deinit(); 16 | const writer = buffer.writer(); 17 | 18 | for (self.program.function.instructions) |instruction| { 19 | switch (instruction) { 20 | .rtype => |r| { 21 | try std.fmt.format(writer, "{s} {s} {s} {s}\n", .{ 22 | r.instr.toString(), 23 | r.destination.toString(), 24 | r.source1.toString(), 25 | r.source2.toString(), 26 | }); 27 | }, 28 | .itype => |i| { 29 | switch (i.instr) { 30 | .LW, .LH, .LB, .LHU, .LBU => { 31 | try std.fmt.format(writer, "{s} {s} {}({s})\n", .{ 32 | i.instr.toString(), 33 | i.destination.toString(), 34 | i.immediate, 35 | i.source.toString(), 36 | }); 37 | }, 38 | else => { 39 | try std.fmt.format(writer, "{s} {s} {s} {}\n", .{ 40 | i.instr.toString(), 41 | i.destination.toString(), 42 | i.source.toString(), 43 | i.immediate, 44 | }); 45 | }, 46 | } 47 | }, 48 | .btype => |b| { 49 | try std.fmt.format(writer, "{s} {s} {s} {s}\n", .{ 50 | b.instr.toString(), 51 | b.source1.toString(), 52 | b.source2.toString(), 53 | b.label, 54 | }); 55 | }, 56 | .stype => |s| { 57 | try std.fmt.format(writer, "{s} {s} {}({s})\n", .{ 58 | s.instr.toString(), 59 | s.source1.toString(), 60 | s.immediate, 61 | s.source2.toString(), 62 | }); 63 | }, 64 | .jtype => |j| { 65 | try std.fmt.format(writer, "{s} {s} {s}\n", .{ 66 | j.instr.toString(), 67 | j.destination.toString(), 68 | j.label, 69 | }); 70 | }, 71 | .utype => |u| { 72 | try std.fmt.format(writer, "{s} {s} {}\n", .{ 73 | u.instr.toString(), 74 | u.destination.toString(), 75 | u.immediate, 76 | }); 77 | }, 78 | .label => |label| { 79 | try std.fmt.format(writer, "{s}:\n", .{ 80 | label.name, 81 | }); 82 | }, 83 | } 84 | } 85 | 86 | return buffer.toOwnedSlice(); // Caller owns the memory 87 | } 88 | 89 | pub fn write(self: Emitter, out_name: []const u8, allocator: std.mem.Allocator) !void { 90 | const dirname = std.fs.path.dirname(out_name) orelse "."; 91 | const stem = std.fs.path.stem(out_name); 92 | 93 | var path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; 94 | const output_path = try std.fmt.bufPrint(&path_buf, "{s}/{s}.asm", .{ 95 | dirname, 96 | stem, 97 | }); 98 | 99 | const assembly = try self.getAssemblyString(allocator); 100 | defer allocator.free(assembly); 101 | 102 | const file = try std.fs.cwd().createFile( 103 | output_path, 104 | .{}, 105 | ); 106 | defer file.close(); 107 | 108 | try file.writeAll(assembly); 109 | } 110 | }; 111 | -------------------------------------------------------------------------------- /src/decoder.sv: -------------------------------------------------------------------------------- 1 | module decoder ( 2 | input [31:0] instr, 3 | 4 | output reg [3:0] alu_ops, // add, sub, xor, or, and, sll, srl, sra, slt, sltu 5 | 6 | output reg reg_write, // 1 for R, I. 0 for S, B 7 | 8 | output reg mem_read, // 1 for LB/LH/LW 9 | output reg mem_write, // 1 for SB/SH/SW 10 | output reg [1:0] mem_width, // 00 byte, 01 half, 10 word 11 | 12 | output reg is_lui, 13 | output reg is_i_type, 14 | output reg is_i_load_type, 15 | output reg is_branch, 16 | output reg is_store, 17 | 18 | output [4:0] rs1, 19 | output [4:0] rs2, 20 | output reg rs1_used, 21 | output reg rs2_used, 22 | 23 | output [4:0] rd, 24 | 25 | output reg [31:0] imm 26 | ); 27 | 28 | wire [4:0] opcode = instr[6:2]; 29 | wire [2:0] funct3 = instr[14:12]; 30 | wire [6:0] funct7 = instr[31:25]; 31 | 32 | localparam R_TYPE = 5'b01100; 33 | localparam I_TYPE = 5'b00100; 34 | localparam I_LOAD_TYPE = 5'b00000; 35 | localparam I_JALR_TYPE = 5'b11001; 36 | localparam B_TYPE = 5'b11000; 37 | localparam S_TYPE = 5'b01000; 38 | localparam J_TYPE = 5'b11011; 39 | localparam LUI = 5'b01101; 40 | localparam LOAD = 5'b00000; 41 | localparam STORE = 5'b01000; 42 | localparam BRANCH = 5'b11000; 43 | 44 | assign rs1 = instr[19:15]; 45 | assign rs2 = instr[24:20]; 46 | 47 | assign rd = instr[11:7]; 48 | 49 | always @(*) begin 50 | is_lui = 0; 51 | is_i_type = 0; 52 | is_i_load_type = 0; 53 | is_branch = 0; 54 | is_store = 0; 55 | 56 | mem_write = 0; 57 | mem_read = 0; 58 | reg_write = 0; 59 | 60 | case (opcode) 61 | R_TYPE: begin 62 | if (funct7 != 7'b0000001) begin 63 | case (funct3) 64 | 3'b000: alu_ops = (funct7 !== 7'b0100000) ? 4'b0000 : 4'b0001; // ADD / SUB 65 | 3'b100: alu_ops = 4'b0010; // XOR 66 | 3'b110: alu_ops = 4'b0011; // OR 67 | 3'b111: alu_ops = 4'b0100; // AND 68 | 3'b001: alu_ops = 4'b0101; // SLL 69 | 3'b101: alu_ops = (funct7 !== 7'b0100000) ? 4'b0110 : 4'b1000; // SRL : SRA 70 | 3'b010: alu_ops = 4'b1001; // SLT 71 | 3'b011: alu_ops = 4'b1011; // SLTU 72 | endcase 73 | end else begin 74 | case (funct3) 75 | 3'b000: alu_ops = 4'b1100; // MUL 76 | 3'b100: alu_ops = 4'b1101; // DIV 77 | 3'b110: alu_ops = 4'b1110; // REM 78 | default: alu_ops = 4'b0000; // REST NOT DEFINED !! TODO 79 | endcase 80 | end 81 | 82 | rs1_used = 1; 83 | rs2_used = 1; 84 | reg_write = 1; 85 | end 86 | 87 | I_TYPE: begin 88 | case (funct3) 89 | 3'b000: alu_ops = 4'b0000; 90 | 3'b100: alu_ops = 4'b0010; 91 | 3'b110: alu_ops = 4'b0011; 92 | 3'b111: alu_ops = 4'b0100; 93 | 3'b101: begin 94 | if (imm[11:5] == 7'b0000000) alu_ops = 4'b0101; 95 | else if (imm[11:5] == 7'b0100000) alu_ops = 4'b0111; 96 | end 97 | 3'b010: alu_ops = 4'b1001; 98 | 3'b011: alu_ops = 4'b1011; // SLTIU 99 | endcase 100 | 101 | imm = instr[31:20]; 102 | is_i_type = 1; 103 | reg_write = 1; 104 | end 105 | 106 | I_LOAD_TYPE: begin 107 | case (funct3) 108 | 3'b000: mem_width = 2'b00; // LB 109 | 3'b001: mem_width = 2'b01; // LH 110 | 3'b010: mem_width = 2'b10; // LW 111 | 3'b011: mem_width = 2'b00; // LBU 112 | 3'b100: mem_width = 2'b01; // LHU 113 | endcase 114 | 115 | imm = instr[31:20]; 116 | mem_read = 1; 117 | is_i_load_type = 1; 118 | reg_write = 1; 119 | end 120 | 121 | B_TYPE: begin 122 | case (funct3) 123 | 3'b000: alu_ops = 4'b0000; // BEQ 124 | 3'b001: alu_ops = 4'b0001; // BNE 125 | 3'b100: alu_ops = 4'b0010; // BLT 126 | 3'b101: alu_ops = 4'b0011; // BGE 127 | 3'b110: alu_ops = 4'b0100; // BLTU 128 | 3'b111: alu_ops = 4'b0101; // BGEU 129 | endcase 130 | 131 | imm[31:12] = {20{instr[31]}}; 132 | imm[11:5] = instr[31:25]; 133 | imm[4:0] = instr[11:7]; 134 | 135 | is_branch = 1; 136 | rs1_used = 1; 137 | rs2_used = 1; 138 | end 139 | 140 | J_TYPE: begin 141 | alu_ops = 4'b0110; // JAL 142 | 143 | imm = {{13{instr[31]}}, instr[30:12]}; 144 | 145 | is_branch = 1; 146 | rs1_used = 1; 147 | rs2_used = 1; 148 | reg_write = 1; 149 | end 150 | 151 | I_JALR_TYPE: begin 152 | alu_ops = 4'b0111; //JALR 153 | 154 | is_branch = 1; 155 | imm = instr[31:20]; 156 | reg_write = 1; 157 | end 158 | 159 | S_TYPE: begin 160 | case (funct3) 161 | 3'b000: mem_width = 2'b00; // SB 162 | 3'b001: mem_width = 2'b01; // SH 163 | 3'b010: mem_width = 2'b10; // SW 164 | endcase 165 | 166 | imm[11:5] = instr[31:25]; 167 | imm[4:0] = instr[11:7]; 168 | 169 | mem_write = 1; 170 | is_store = 1; 171 | rs1_used = 1; 172 | rs2_used = 1; 173 | end 174 | 175 | LUI: begin 176 | is_lui = 1; 177 | imm = instr[31:20]; 178 | reg_write = 1; 179 | end 180 | default: is_i_type = 0; 181 | endcase 182 | end 183 | 184 | endmodule 185 | -------------------------------------------------------------------------------- /compiler/src/ast/asm.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | // utils for enums, preferably have this in a separate file 4 | pub fn EnumMethods(comptime T: type) type { 5 | return struct { 6 | pub fn toString(self: T) []const u8 { 7 | const str = @tagName(self); 8 | comptime var max_len = 0; 9 | inline for (@typeInfo(T).Enum.fields) |field| { 10 | max_len = @max(max_len, field.name.len); 11 | } 12 | var buf: [max_len]u8 = undefined; 13 | return std.ascii.lowerString(&buf, str); 14 | } 15 | }; 16 | } 17 | 18 | // TODO: there has to be a cleaner way to do this 19 | pub fn convert(instr: InstructionType) union(enum) { rtype: RType_Inst, itype: IType_Inst, btype: BType_Inst, stype: SType_Inst, utype: UType_Inst, jtype: JType_Inst } { 20 | const instr_name = @tagName(instr); 21 | 22 | inline for (@typeInfo(RType_Inst).Enum.fields) |field| { 23 | if (std.mem.eql(u8, instr_name, field.name)) { 24 | return .{ .rtype = @field(RType_Inst, field.name) }; 25 | } 26 | } 27 | 28 | inline for (@typeInfo(IType_Inst).Enum.fields) |field| { 29 | if (std.mem.eql(u8, instr_name, field.name)) { 30 | return .{ .itype = @field(IType_Inst, field.name) }; 31 | } 32 | } 33 | 34 | inline for (@typeInfo(BType_Inst).Enum.fields) |field| { 35 | if (std.mem.eql(u8, instr_name, field.name)) { 36 | return .{ .btype = @field(BType_Inst, field.name) }; 37 | } 38 | } 39 | 40 | inline for (@typeInfo(SType_Inst).Enum.fields) |field| { 41 | if (std.mem.eql(u8, instr_name, field.name)) { 42 | return .{ .stype = @field(SType_Inst, field.name) }; 43 | } 44 | } 45 | 46 | inline for (@typeInfo(UType_Inst).Enum.fields) |field| { 47 | if (std.mem.eql(u8, instr_name, field.name)) { 48 | return .{ .utype = @field(UType_Inst, field.name) }; 49 | } 50 | } 51 | 52 | inline for (@typeInfo(JType_Inst).Enum.fields) |field| { 53 | if (std.mem.eql(u8, instr_name, field.name)) { 54 | return .{ .jtype = @field(JType_Inst, field.name) }; 55 | } 56 | } 57 | 58 | std.debug.print("Can't convert {} instruction\n", .{instr}); 59 | 60 | unreachable; 61 | } 62 | 63 | pub const Reg = enum { 64 | zero, 65 | ra, 66 | sp, 67 | t0, 68 | t1, 69 | t2, 70 | fp, 71 | a0, 72 | a1, 73 | a2, 74 | a3, 75 | a4, 76 | a5, 77 | a6, 78 | a7, 79 | t3, 80 | 81 | pub fn toString(self: Reg) []const u8 { 82 | return @tagName(self); 83 | } 84 | }; 85 | 86 | const Addi = struct { 87 | source: Reg, 88 | destination: Reg, 89 | imm: u12, 90 | }; 91 | 92 | pub const InstructionType = enum { 93 | ADD, 94 | SUB, 95 | XOR, 96 | OR, 97 | AND, 98 | SLL, 99 | SRL, 100 | SRA, 101 | SLT, 102 | SLTU, 103 | MUL, 104 | MULH, 105 | MULSU, 106 | MULU, 107 | DIV, 108 | DIVU, 109 | REM, 110 | REMU, 111 | 112 | ADDI, 113 | // SUB, 114 | XORI, 115 | // OR, 116 | // AND, 117 | // SLL, 118 | // SRL, 119 | // SRA, 120 | SLTI, 121 | SLTIU, 122 | 123 | LB, 124 | LH, 125 | LW, 126 | LBU, 127 | LHU, 128 | 129 | SB, 130 | SH, 131 | SW, 132 | 133 | BEQ, 134 | BNE, 135 | BLT, 136 | BGE, 137 | BLTU, 138 | BGEU, 139 | 140 | LUI, 141 | AUIPC, 142 | 143 | JAL, 144 | JALR, 145 | }; 146 | 147 | pub const RType_Inst = enum { 148 | ADD, 149 | SUB, 150 | XOR, 151 | OR, 152 | AND, 153 | SLL, 154 | SRL, 155 | SRA, 156 | SLT, 157 | SLTU, 158 | 159 | MUL, 160 | MULH, 161 | MULSU, 162 | MULU, 163 | DIV, 164 | DIVU, 165 | REM, 166 | REMU, 167 | 168 | pub usingnamespace EnumMethods(RType_Inst); 169 | }; 170 | 171 | pub const IType_Inst = enum { 172 | ADDI, 173 | // SUB, 174 | XORI, 175 | // OR, 176 | // AND, 177 | // SLL, 178 | // SRL, 179 | // SRA, 180 | SLTI, 181 | SLTIU, 182 | // MUL, 183 | // MULH, 184 | // MULSU, 185 | // MULU, 186 | // DIV, 187 | // DIVU, 188 | // REM, 189 | // REMU, 190 | 191 | LB, 192 | LH, 193 | LW, 194 | LBU, 195 | LHU, 196 | 197 | JALR, 198 | 199 | pub usingnamespace EnumMethods(IType_Inst); 200 | }; 201 | 202 | pub const BType_Inst = enum { 203 | BEQ, 204 | BNE, 205 | BLT, 206 | BGE, 207 | BLTU, 208 | BGEU, 209 | 210 | pub usingnamespace EnumMethods(BType_Inst); 211 | }; 212 | 213 | pub const SType_Inst = enum { 214 | SB, 215 | SH, 216 | SW, 217 | 218 | pub usingnamespace EnumMethods(SType_Inst); 219 | }; 220 | 221 | pub const UType_Inst = enum { 222 | LUI, 223 | AUIPC, 224 | 225 | pub usingnamespace EnumMethods(UType_Inst); 226 | }; 227 | 228 | pub const JType_Inst = enum { 229 | JAL, 230 | 231 | pub usingnamespace EnumMethods(JType_Inst); 232 | }; 233 | 234 | pub const RType = struct { 235 | instr: RType_Inst, 236 | source1: Reg, 237 | source2: Reg, 238 | destination: Reg, 239 | }; 240 | 241 | pub const IType = struct { 242 | instr: IType_Inst, 243 | source: Reg, 244 | destination: Reg, 245 | immediate: i32, 246 | }; 247 | 248 | pub const BType = struct { 249 | instr: BType_Inst, 250 | source1: Reg, 251 | source2: Reg, 252 | label: []const u8, 253 | }; 254 | 255 | pub const SType = struct { 256 | instr: SType_Inst, 257 | source1: Reg, 258 | source2: Reg, 259 | immediate: i32, 260 | }; 261 | 262 | pub const UType = struct { 263 | instr: UType_Inst, 264 | destination: Reg, 265 | immediate: i32, 266 | }; 267 | 268 | pub const JType = struct { 269 | instr: JType_Inst, 270 | destination: Reg, 271 | label: []const u8, 272 | }; 273 | 274 | const Label = struct { 275 | name: []const u8, 276 | }; 277 | 278 | pub const Instruction = union(enum) { 279 | rtype: RType, 280 | itype: IType, 281 | stype: SType, 282 | btype: BType, 283 | utype: UType, 284 | jtype: JType, 285 | 286 | label: Label, 287 | }; 288 | 289 | pub const FunctionDefinition = struct { 290 | identifier: []const u8, 291 | instructions: []Instruction, 292 | }; 293 | 294 | pub const Program = struct { 295 | function: FunctionDefinition, 296 | }; 297 | -------------------------------------------------------------------------------- /compiler/src/frontend/lexer.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const tokens = @import("tokens.zig"); 3 | const Token = tokens.Token; 4 | const TokenType = tokens.TokenType; 5 | 6 | var keywords: std.StringHashMap(TokenType) = undefined; 7 | 8 | pub fn initKeywords(allocator: std.mem.Allocator) !void { 9 | keywords = std.StringHashMap(TokenType).init(allocator); 10 | 11 | try keywords.put("int", .INT); 12 | 13 | try keywords.put("void", .VOID); 14 | try keywords.put("return", .RETURN); 15 | 16 | try keywords.put("if", .IF); 17 | try keywords.put("else", .ELSE); 18 | 19 | try keywords.put("break", .BREAK); 20 | try keywords.put("continue", .CONTINUE); 21 | try keywords.put("while", .WHILE); 22 | try keywords.put("do", .DO); 23 | try keywords.put("for", .FOR); 24 | } 25 | 26 | pub const Lexer = struct { 27 | allocator: std.mem.Allocator, 28 | source: []const u8, 29 | tokens: std.ArrayList(Token), 30 | currentIndex: usize = 0, 31 | currentLine: usize = 1, 32 | start: usize = 0, 33 | 34 | pub fn init(allocator: std.mem.Allocator, source: []const u8) Lexer { 35 | initKeywords(allocator) catch @panic("gg"); 36 | return .{ 37 | .allocator = allocator, 38 | .source = source, 39 | .tokens = std.ArrayList(Token).init(allocator), 40 | }; 41 | } 42 | 43 | pub fn deinit(self: *Lexer) void { 44 | keywords.deinit(); 45 | self.tokens.deinit(); 46 | } 47 | 48 | pub fn scan(self: *Lexer) void { 49 | while (self.currentIndex < self.source.len) { 50 | self.start = self.currentIndex; 51 | const token = self.scanToken(); 52 | if (token) |t| self.tokens.append(t) catch @panic("out of memory"); 53 | } 54 | } 55 | 56 | fn isAtEnd(self: *Lexer) bool { 57 | return self.currentIndex >= self.source.len; 58 | } 59 | 60 | fn advance(self: *Lexer) u8 { 61 | self.currentIndex += 1; 62 | return self.source[self.currentIndex - 1]; 63 | } 64 | 65 | fn isDigit(self: *Lexer, c: u8) bool { 66 | _ = self; 67 | return c >= '0' and c <= '9'; 68 | } 69 | 70 | fn isAlpha(self: *Lexer, c: u8) bool { 71 | _ = self; 72 | return (c >= 'a' and c <= 'z') or 73 | (c >= 'A' and c <= 'Z') or 74 | c == '_'; 75 | } 76 | 77 | fn isAlphaNumeric(self: *Lexer, c: u8) bool { 78 | return self.isAlpha(c) or self.isDigit(c); 79 | } 80 | 81 | fn peek(self: *Lexer) u8 { 82 | if (self.isAtEnd()) return 0; 83 | return self.source[self.currentIndex]; 84 | } 85 | 86 | fn match(self: *Lexer, expected: u8) bool { 87 | if (self.isAtEnd()) return false; 88 | 89 | if (self.source[self.currentIndex] != expected) return false; 90 | 91 | self.currentIndex += 1; 92 | return true; 93 | } 94 | 95 | fn scanComment(self: *Lexer) void { 96 | while (self.peek() != '\n' and !self.isAtEnd()) { 97 | _ = self.advance(); 98 | } 99 | } 100 | 101 | fn string(self: *Lexer) ?Token { 102 | while (self.peek() != '"' and !self.isAtEnd()) { 103 | if (self.peek() == '\n') { 104 | self.currentLine += 1; 105 | } 106 | _ = self.advance(); 107 | } 108 | 109 | if (self.isAtEnd()) { 110 | return null; 111 | } 112 | 113 | _ = self.advance(); 114 | 115 | const value = self.source[self.start + 1 .. self.currentIndex - 1]; 116 | 117 | const token = Token.init(.STRING, .{ .string = value }, self.currentLine); 118 | 119 | return token; 120 | } 121 | 122 | fn number(self: *Lexer) ?Token { 123 | while (self.isDigit(self.peek())) _ = self.advance(); 124 | 125 | const number_str = self.source[self.start..self.currentIndex]; 126 | 127 | const token = Token.init(.NUMBER, .{ .number = std.fmt.parseInt(i32, number_str, 10) catch @panic("failed to parse int") }, self.currentLine); 128 | return token; 129 | } 130 | 131 | fn identifier(self: *Lexer) ?Token { 132 | while (self.isAlphaNumeric(self.peek())) _ = self.advance(); 133 | 134 | const value = self.source[self.start..self.currentIndex]; 135 | const ttype = keywords.get(value) orelse TokenType.IDENTIFIER; 136 | 137 | const token = Token.init(ttype, .{ .string = value }, self.currentLine); 138 | return token; 139 | } 140 | 141 | pub fn scanToken(self: *Lexer) ?Token { 142 | const char = self.advance(); 143 | 144 | const token_type: TokenType = switch (char) { 145 | '(' => .LEFT_PAREN, 146 | ')' => .RIGHT_PAREN, 147 | '{' => .LEFT_BRACE, 148 | '}' => .RIGHT_BRACE, 149 | ',' => .COMMA, 150 | '.' => .DOT, 151 | '-' => .MINUS, 152 | '+' => .PLUS, 153 | ';' => .SEMICOLON, 154 | '*' => .STAR, 155 | '%' => .PERCENTAGE, 156 | '?' => .QUESTION_MARK, 157 | ':' => .COLON, 158 | '&' => if (self.match('&')) .AMPERSAND_AMPERSAND else .AMPERSAND, 159 | '|' => if (self.match('|')) .PIPE_PIPE else .PIPE, 160 | '^' => .CARET, 161 | 162 | '!' => if (self.match('=')) .BANG_EQUAL else .BANG, 163 | '=' => if (self.match('=')) .EQUAL_EQUAL else .EQUAL, 164 | '<' => if (self.match('=')) .LESS_EQUAL else if (self.match('<')) .LEFT_SHIFT else .LESS, 165 | 166 | '>' => if (self.match('=')) .GREATER_EQUAL else if (self.match('>')) .RIGHT_SHIFT else .GREATER, 167 | 168 | '/' => blk: { 169 | const result: TokenType = if (self.match('/')) { 170 | while (self.peek() != '\n' and !self.isAtEnd()) { 171 | _ = self.advance(); 172 | } 173 | if (!self.isAtEnd()) return self.scanToken() else return null; 174 | } else .SLASH; 175 | break :blk result; 176 | }, 177 | '\n' => { 178 | self.currentLine += 1; 179 | return null; 180 | }, 181 | ' ' => return null, 182 | '\r' => return null, 183 | '\t' => return null, 184 | '"', 185 | => { 186 | return self.string(); 187 | }, 188 | else => { 189 | if (self.isDigit(char)) return self.number(); 190 | 191 | if (self.isAlpha(char)) return self.identifier(); 192 | 193 | const msg = std.fmt.allocPrint(self.allocator, "unexpected character at line {}", .{self.currentLine}) catch @panic("try again"); 194 | defer self.allocator.free(msg); 195 | @panic(msg); 196 | }, 197 | }; 198 | 199 | const token = Token.init(token_type, null, self.currentLine); 200 | return token; 201 | } 202 | }; 203 | -------------------------------------------------------------------------------- /compiler/src/middleend/register-allocator.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("../ast/c.zig"); 3 | const asm_ast = @import("../ast/asm.zig"); 4 | 5 | const Interval = struct { 6 | start: usize, 7 | end: usize, 8 | }; 9 | 10 | const Register = struct { 11 | reg: asm_ast.Reg, 12 | assigned: bool = false, 13 | variable: ?[]const u8 = null, 14 | expiration: ?usize = null, 15 | }; 16 | 17 | pub const RegisterAllocator = struct { 18 | allocator: std.mem.Allocator, 19 | intervals: std.StringHashMap(Interval), 20 | line: usize = 0, 21 | registers: [5]Register, 22 | 23 | pub fn init(allocator: std.mem.Allocator) RegisterAllocator { 24 | return RegisterAllocator{ 25 | .allocator = allocator, 26 | .intervals = std.StringHashMap(Interval).init(allocator), 27 | .registers = [_]Register{ 28 | .{ .reg = .a0 }, 29 | .{ .reg = .a1 }, 30 | .{ .reg = .t0 }, 31 | .{ .reg = .t1 }, 32 | .{ .reg = .t2 }, 33 | }, 34 | }; 35 | } 36 | 37 | pub fn getVariableRegister(self: *RegisterAllocator, variable: []const u8, line: usize) asm_ast.Reg { 38 | self.updateRegisters(line); 39 | 40 | for (&self.registers) |*reg| { 41 | if (reg.assigned and reg.variable != null) { 42 | if (std.mem.eql(u8, reg.variable.?, variable)) { 43 | return reg.reg; 44 | } 45 | } 46 | } 47 | 48 | if (self.intervals.get(variable)) |interval| { 49 | for (self.registers) |*reg| { 50 | if (reg.assigned == false) { 51 | reg.assigned = true; 52 | reg.variable = variable; 53 | reg.expiration = interval.end; 54 | return reg.reg; 55 | } 56 | } 57 | } 58 | 59 | @panic("no registers available"); 60 | } 61 | 62 | pub fn getTempRegister(self: *RegisterAllocator, line: usize) asm_ast.Reg { 63 | self.updateRegisters(line); 64 | 65 | for (&self.registers) |*reg| { 66 | if (!reg.assigned) { 67 | reg.assigned = true; 68 | reg.variable = null; 69 | reg.expiration = line + 1; 70 | return reg.reg; 71 | } 72 | } 73 | 74 | @panic("no registers available"); 75 | } 76 | 77 | pub fn expireRegister(self: *RegisterAllocator, reg: asm_ast.Reg) void { 78 | for (&self.registers) |*self_reg| { 79 | if (self_reg.*.reg == reg) { 80 | self_reg.assigned = false; 81 | self_reg.variable = null; 82 | self_reg.expiration = null; 83 | } 84 | } 85 | } 86 | 87 | fn updateRegisters(self: *RegisterAllocator, line: usize) void { 88 | for (&self.registers) |*reg| { 89 | if (reg.assigned and reg.expiration != null and reg.expiration.? <= line) { 90 | reg.assigned = false; 91 | reg.variable = null; 92 | reg.expiration = null; 93 | } 94 | } 95 | } 96 | 97 | pub fn scanFunction(self: *RegisterAllocator, function: c_ast.FunctionDeclaration) !void { 98 | if (function.body == null) @panic("performing linear scan on a function without a body"); 99 | 100 | self.intervals = std.StringHashMap(Interval).init(self.allocator); 101 | 102 | for (function.params) |param| { 103 | try self.intervals.put(param, .{ 104 | .start = 0, 105 | .end = 0, 106 | }); 107 | } 108 | 109 | for (function.body.?.block_items) |block_item| { 110 | switch (block_item) { 111 | .statement => |statement| { 112 | try self.scanStatement(statement); 113 | }, 114 | .declaration => { 115 | try self.scanDeclaration(block_item.declaration); 116 | }, 117 | } 118 | 119 | self.line += 1; 120 | } 121 | } 122 | 123 | fn scanVariable(self: *RegisterAllocator, variable: []const u8) !void { 124 | if (self.intervals.getPtr(variable)) |interval| { 125 | interval.end = self.line; 126 | } else { 127 | try self.intervals.put(variable, Interval{ 128 | .start = self.line, 129 | .end = self.line, 130 | }); 131 | } 132 | } 133 | 134 | fn scanStatement(self: *RegisterAllocator, statement: c_ast.Statement) !void { 135 | switch (statement) { 136 | .ret => |ret| { 137 | _ = ret; 138 | }, 139 | .exp => |exp| { 140 | try self.scanExpression(exp); 141 | }, 142 | .if_ => |if_| { 143 | if (if_.else_) |else_| try self.scanStatement(else_.*); 144 | try self.scanExpression(if_.condition); 145 | try self.scanStatement(if_.then.*); 146 | }, 147 | .compound => |compound| { 148 | for (compound.block_items) |block_item| { 149 | switch (block_item) { 150 | .statement => { 151 | try self.scanStatement(block_item.statement); 152 | }, 153 | .declaration => { 154 | try self.scanDeclaration(block_item.declaration); 155 | }, 156 | } 157 | } 158 | }, 159 | .do_while => |do_while| { 160 | try self.scanStatement(do_while.body.*); 161 | try self.scanExpression(do_while.condition); 162 | }, 163 | .for_ => |for_| { 164 | if (for_.condition) |condition| try self.scanExpression(condition); 165 | if (for_.post) |post| try self.scanExpression(post); 166 | try self.scanStatement(for_.body.*); 167 | if (for_.init.init_exp) |init_exp| try self.scanExpression(init_exp); 168 | try self.scanVariable(for_.init.init_decl.identifier); 169 | }, 170 | .while_ => |while_| { 171 | _ = while_; 172 | }, 173 | else => {}, 174 | } 175 | } 176 | 177 | fn scanDeclaration(self: *RegisterAllocator, declaration: c_ast.Declaration) !void { 178 | switch (declaration) { 179 | .variable_declaration => |variable_declaration| { 180 | try self.scanVariable(variable_declaration.identifier); 181 | if (variable_declaration.initial) |initial| try self.scanExpression(initial); 182 | }, 183 | else => {}, 184 | } 185 | } 186 | 187 | fn scanExpression(self: *RegisterAllocator, exp: c_ast.Expression) anyerror!void { 188 | switch (exp) { 189 | .assignment => |assignment| { 190 | try self.scanExpression(assignment.left.*); 191 | try self.scanExpression(assignment.right.*); 192 | }, 193 | .variable => |variable| { 194 | try self.scanVariable(variable.identifier); 195 | }, 196 | .binary => |binary| { 197 | try self.scanExpression(binary.left.*); 198 | try self.scanExpression(binary.right.*); 199 | }, 200 | .function_call => |function_call| { 201 | for (function_call.args) |arg| { 202 | try self.scanExpression(arg.*); 203 | } 204 | }, 205 | else => {}, 206 | } 207 | } 208 | }; 209 | -------------------------------------------------------------------------------- /compiler/src/prettyprinter.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("ast/c.zig"); 3 | 4 | pub fn printExpression(exp: c_ast.Expression, indent: usize) void { 5 | const spaces = " " ** 64; 6 | std.debug.print("{s}", .{spaces[0..indent]}); 7 | switch (exp) { 8 | .constant => |c| { 9 | std.debug.print("Constant: {}\n", .{c}); 10 | }, 11 | .binary => |b| { 12 | const op = switch (b.operator) { 13 | .Add => "+", 14 | .Subtract => "-", 15 | .Multiply => "*", 16 | .Divide => "/", 17 | .Remainder => "%", 18 | .Bitwise_AND => "&", 19 | .Bitwise_OR => "|", 20 | .Bitwise_XOR => "^", 21 | .Left_Shift => "<<", 22 | .Right_Shift => ">>", 23 | .Less => "<", 24 | .Less_Or_Equal => "<=", 25 | .Greater => ">", 26 | .Greater_Or_Equal => ">=", 27 | .Equal => "==", 28 | .Not_Equal => "!=", 29 | .And => "&&", 30 | .Or => "||", 31 | }; 32 | std.debug.print("Binary Op: {s}\n", .{op}); 33 | printExpression(b.left.*, indent + 4); 34 | printExpression(b.right.*, indent + 4); 35 | }, 36 | .variable => |v| { 37 | std.debug.print("Variable: {s}\n", .{v.identifier}); 38 | }, 39 | .assignment => |a| { 40 | std.debug.print("Assignment:\n", .{}); 41 | printExpression(a.left.*, indent + 4); 42 | printExpression(a.right.*, indent + 4); 43 | }, 44 | .function_call => |fc| { 45 | std.debug.print("Function Call: {s}()\n", .{fc.identifier}); 46 | for (fc.args) |arg| { 47 | printExpression(arg.*, indent + 4); 48 | } 49 | }, 50 | } 51 | } 52 | 53 | pub fn printStatement(stmt: c_ast.Statement, indent: usize) void { 54 | const spaces = " " ** 64; 55 | std.debug.print("{s}", .{spaces[0..indent]}); 56 | switch (stmt) { 57 | .ret => { 58 | std.debug.print("RETURN\n", .{}); 59 | printExpression(stmt.ret.exp, indent + 2); 60 | }, 61 | .exp => |expression| { 62 | printExpression(expression, indent); 63 | }, 64 | .if_ => { 65 | std.debug.print("If\n", .{}); 66 | printExpression(stmt.if_.condition, indent + 2); 67 | std.debug.print("{s}Then\n", .{spaces[0..indent]}); 68 | printStatement(stmt.if_.then.*, indent + 2); 69 | if (stmt.if_.else_ != null) { 70 | std.debug.print("{s}Else\n", .{spaces[0..indent]}); 71 | printStatement(stmt.if_.else_.?.*, indent + 2); 72 | } 73 | }, 74 | .compound => { 75 | std.debug.print("Compound body:\n", .{}); 76 | for (stmt.compound.block_items) |item| { 77 | printBlockItem(item, indent + 2); 78 | } 79 | }, 80 | .while_ => { 81 | std.debug.print("While", .{}); 82 | if (stmt.while_.identifier) |id| { 83 | std.debug.print(" ({s})", .{id}); 84 | } 85 | std.debug.print("\n", .{}); 86 | printExpression(stmt.while_.condition, indent + 2); 87 | std.debug.print("{s}Body\n", .{spaces[0..indent]}); 88 | printStatement(stmt.while_.body.*, indent + 2); 89 | }, 90 | .do_while => { 91 | std.debug.print("DoWhile", .{}); 92 | if (stmt.do_while.identifier) |id| { 93 | std.debug.print(" ({s})", .{id}); 94 | } 95 | std.debug.print("\n", .{}); 96 | printStatement(stmt.do_while.body.*, indent + 2); 97 | std.debug.print("{s}While\n", .{spaces[0..indent]}); 98 | printExpression(stmt.do_while.condition, indent + 2); 99 | }, 100 | .for_ => { 101 | std.debug.print("For", .{}); 102 | if (stmt.for_.identifier) |id| { 103 | std.debug.print(" ({s})", .{id}); 104 | } 105 | std.debug.print("\n", .{}); 106 | std.debug.print("{s}Init:\n", .{spaces[0 .. indent + 2]}); 107 | switch (stmt.for_.init) { 108 | .init_exp => |init_exp| { 109 | printExpression(init_exp.?, indent + 4); 110 | }, 111 | .init_decl => |init_decl| { 112 | printDeclaration(.{ .variable_declaration = init_decl }, indent + 4); 113 | }, 114 | } 115 | std.debug.print("{s}Condition:\n", .{spaces[0 .. indent + 2]}); 116 | if (stmt.for_.condition) |condition| { 117 | printExpression(condition, indent + 4); 118 | } else { 119 | std.debug.print("{s}(none)\n", .{spaces[0 .. indent + 4]}); 120 | } 121 | std.debug.print("{s}Post:\n", .{spaces[0 .. indent + 2]}); 122 | if (stmt.for_.post) |post| { 123 | printExpression(post, indent + 4); 124 | } else { 125 | std.debug.print("{s}(none)\n", .{spaces[0 .. indent + 4]}); 126 | } 127 | std.debug.print("{s}Body:\n", .{spaces[0 .. indent + 2]}); 128 | printStatement(stmt.for_.body.*, indent + 4); 129 | }, 130 | .break_ => { 131 | std.debug.print("Break", .{}); 132 | if (stmt.break_.identifier) |id| { 133 | std.debug.print(" ({s})", .{id}); 134 | } 135 | std.debug.print("\n", .{}); 136 | }, 137 | .continue_ => { 138 | std.debug.print("Continue", .{}); 139 | if (stmt.continue_.identifier) |id| { 140 | std.debug.print(" ({s})", .{id}); 141 | } 142 | std.debug.print("\n", .{}); 143 | }, 144 | } 145 | } 146 | 147 | pub fn printDeclaration(decl: c_ast.Declaration, indent: usize) void { 148 | const spaces = " " ** 64; 149 | switch (decl) { 150 | .variable_declaration => |var_decl| { 151 | std.debug.print("{s}Declaration: {s}\n", .{ spaces[0..indent], var_decl.identifier }); 152 | if (var_decl.initial) |initial| { 153 | printExpression(initial, indent + 2); 154 | } 155 | }, 156 | .function_declaration => |func_decl| { 157 | printFunction(func_decl, indent); 158 | }, 159 | } 160 | } 161 | 162 | pub fn printBlockItem(item: c_ast.BlockItem, indent: usize) void { 163 | switch (item) { 164 | .statement => |stmt| printStatement(stmt, indent), 165 | .declaration => |decl| printDeclaration(decl, indent), 166 | } 167 | } 168 | 169 | pub fn printFunction(func: c_ast.FunctionDeclaration, indent: usize) void { 170 | const spaces = " " ** 64; 171 | std.debug.print("{s}Function: {s}\n", .{ spaces[0..indent], func.identifier }); 172 | 173 | // Print parameters 174 | if (func.params.len > 0) { 175 | std.debug.print("{s}Parameters:\n", .{spaces[0..indent]}); 176 | for (func.params) |param| { 177 | std.debug.print("{s}{s}\n", .{ spaces[0 .. indent + 2], param }); 178 | } 179 | } 180 | 181 | // Print body if it exists 182 | if (func.body) |body| { 183 | std.debug.print("{s}Body:\n", .{spaces[0..indent]}); 184 | for (body.block_items) |item| { 185 | printBlockItem(item, indent + 2); 186 | } 187 | } 188 | } 189 | 190 | pub fn printProgram(program: c_ast.Program) void { 191 | std.debug.print("Program:\n", .{}); 192 | for (program.function) |function| { 193 | printFunction(function, 2); 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /compiler/src/frontend/semantic/type-checking.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("../../ast/c.zig"); 3 | const diagnostics = @import("../../diagnostics.zig"); 4 | 5 | const Symbol = struct { defined: ?bool, type_: Type }; 6 | 7 | const Type = union(enum) { 8 | int, 9 | function: Function, 10 | }; 11 | 12 | const Function = struct { 13 | length: usize, 14 | }; 15 | 16 | pub const TypeChecking = struct { 17 | allocator: std.mem.Allocator, 18 | symbols: std.StringHashMap(Symbol), 19 | 20 | pub fn init(allocator: std.mem.Allocator) TypeChecking { 21 | return .{ 22 | .allocator = allocator, 23 | .symbols = std.StringHashMap(Symbol).init(allocator), 24 | }; 25 | } 26 | 27 | pub fn check(self: *TypeChecking, program: c_ast.Program) !c_ast.Program { 28 | for (program.function) |function| { 29 | _ = try self.checkFunctionDeclaration(function); 30 | } 31 | return program; 32 | } 33 | 34 | fn checkFunctionDeclaration(self: *TypeChecking, function: c_ast.FunctionDeclaration) !c_ast.FunctionDeclaration { 35 | const type_ = Function{ .length = function.params.len }; 36 | const has_body = function.body != null; 37 | 38 | var already_defined = false; 39 | 40 | if (self.symbols.get(function.identifier)) |symbol| { 41 | const old_function = symbol; 42 | 43 | switch (old_function.type_) { 44 | .function => { 45 | if (!std.mem.eql(u8, std.mem.asBytes(&type_), std.mem.asBytes(&old_function.type_.function))) { 46 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Incompatible declarations for function {s}", .{function.identifier}); 47 | diagnostics.addError(msg, null); 48 | return error.IncompatibleFunctionDeclarations; 49 | } 50 | }, 51 | else => { 52 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Incompatible declarations for function {s}", .{function.identifier}); 53 | diagnostics.addError(msg, null); 54 | return error.IncompatibleFunctionDeclarations; 55 | }, 56 | } 57 | 58 | already_defined = old_function.defined.?; 59 | 60 | if (already_defined and has_body) { 61 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Function already defined {s}", .{function.identifier}); 62 | diagnostics.addError(msg, null); 63 | return error.FunctionAlreadyDefined; 64 | } 65 | } 66 | 67 | try self.symbols.put(function.identifier, Symbol{ 68 | .type_ = .{ 69 | .function = type_, 70 | }, 71 | .defined = already_defined or has_body, 72 | }); 73 | 74 | if (has_body) { 75 | for (function.params) |param| { 76 | try self.symbols.put(param, Symbol{ 77 | .type_ = .int, 78 | .defined = null, 79 | }); 80 | } 81 | 82 | _ = try self.checkBlock(function.body.?); 83 | } 84 | 85 | return function; 86 | } 87 | 88 | fn checkVariableDeclaration(self: *TypeChecking, variable: c_ast.VariableDeclaration) !c_ast.VariableDeclaration { 89 | try self.symbols.put( 90 | variable.identifier, 91 | .{ 92 | .type_ = .int, 93 | .defined = null, 94 | }, 95 | ); 96 | 97 | if (variable.initial != null) { 98 | _ = try self.checkExpression(variable.initial.?); 99 | } 100 | 101 | return variable; 102 | } 103 | 104 | fn checkBlock(self: *TypeChecking, block: c_ast.Block) anyerror!c_ast.Block { 105 | const result = block; 106 | 107 | for (block.block_items) |block_item| { 108 | switch (block_item) { 109 | .declaration => |decl| { 110 | _ = try self.checkDeclaration(decl); 111 | }, 112 | .statement => |stmt| { 113 | _ = try self.checkStatement(stmt); 114 | }, 115 | } 116 | } 117 | 118 | return result; 119 | } 120 | 121 | fn checkDeclaration(self: *TypeChecking, declaration: c_ast.Declaration) anyerror!c_ast.Declaration { 122 | var result = declaration; 123 | switch (declaration) { 124 | .function_declaration => |func_decl| result.function_declaration = try self.checkFunctionDeclaration(func_decl), 125 | .variable_declaration => |var_decl| result.variable_declaration = try self.checkVariableDeclaration(var_decl), 126 | } 127 | return result; 128 | } 129 | 130 | fn checkStatement(self: *TypeChecking, statement: c_ast.Statement) !c_ast.Statement { 131 | switch (statement) { 132 | .compound => |compound| { 133 | _ = try self.checkBlock(compound); 134 | }, 135 | .do_while => |do_while| { 136 | _ = try self.checkExpression(do_while.condition); 137 | _ = try self.checkStatement(do_while.body.*); 138 | }, 139 | .exp => |exp| { 140 | _ = try self.checkExpression(exp); 141 | }, 142 | .for_ => |for_| { 143 | _ = try self.checkStatement(for_.body.*); 144 | if (for_.condition != null) _ = try self.checkExpression(for_.condition.?); 145 | if (for_.post != null) _ = try self.checkExpression(for_.post.?); 146 | switch (for_.init) { 147 | .init_decl => _ = try self.checkVariableDeclaration(for_.init.init_decl), 148 | .init_exp => { 149 | if (for_.init.init_exp != null) _ = try self.checkExpression(for_.init.init_exp.?); 150 | }, 151 | } 152 | }, 153 | .if_ => |if_| { 154 | _ = try self.checkExpression(if_.condition); 155 | _ = try self.checkStatement(if_.then.*); 156 | if (if_.else_ != null) _ = try self.checkStatement(if_.else_.?.*); 157 | }, 158 | .ret => |ret| { 159 | _ = try self.checkExpression(ret.exp); 160 | }, 161 | .while_ => |while_| { 162 | _ = try self.checkExpression(while_.condition); 163 | _ = try self.checkStatement(while_.body.*); 164 | }, 165 | else => {}, //nothing to check 166 | } 167 | 168 | return statement; 169 | } 170 | 171 | fn checkExpression(self: *TypeChecking, expression: c_ast.Expression) !c_ast.Expression { 172 | switch (expression) { 173 | .variable => { 174 | if (self.symbols.get(expression.variable.identifier).?.type_ != .int) { 175 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Function name {s} used as a variable", .{expression.variable.identifier}); 176 | diagnostics.addError(msg, null); 177 | return error.FunctionUsedAsVariable; 178 | } 179 | }, 180 | .function_call => { 181 | const type_ = self.symbols.get(expression.function_call.identifier).?.type_; 182 | 183 | if (type_ == .int) { 184 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Variable {s} used as function name", .{expression.function_call.identifier}); 185 | diagnostics.addError(msg, null); 186 | return error.VariableUsedAsFunction; 187 | } 188 | if (type_.function.length != expression.function_call.args.len) { 189 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Function {s} called with wrong number of arguments", .{expression.function_call.identifier}); 190 | diagnostics.addError(msg, null); 191 | return error.FunctionCallWrongArguments; 192 | } 193 | for (expression.function_call.args) |arg| { 194 | _ = try self.checkExpression(arg.*); 195 | } 196 | }, 197 | else => {}, 198 | } 199 | 200 | return expression; 201 | } 202 | }; 203 | -------------------------------------------------------------------------------- /compiler/src/testing.zig: -------------------------------------------------------------------------------- 1 | pub usingnamespace @import("std").testing; 2 | const std = @import("std"); 3 | const c_ast = @import("ast/c.zig"); 4 | const Lexer = @import("frontend/lexer.zig").Lexer; 5 | const Parser = @import("frontend/parser.zig").Parser; 6 | const SemanticAnalysis = @import("frontend/semantic-analysis.zig").SemanticAnalysis; 7 | const Generator = @import("middleend/gen.zig").Generator; 8 | const Emitter = @import("backend/emission.zig").Emitter; 9 | 10 | pub fn cToSemanticAnalysis(input: []const u8) !c_ast.Program { 11 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 12 | defer arena.deinit(); // This single call will free all allocations at once 13 | const allocator = arena.allocator(); 14 | 15 | var lexer = Lexer.init(allocator, input); 16 | lexer.scan(); 17 | 18 | var parser = Parser.init(lexer.tokens.items, allocator); 19 | const program_definition = try parser.parse(); 20 | 21 | var semantic = SemanticAnalysis.init(allocator); 22 | return try semantic.analyze(program_definition); 23 | } 24 | 25 | pub fn cToAST(input: []const u8) !c_ast.Program { 26 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 27 | defer arena.deinit(); 28 | const allocator = arena.allocator(); 29 | 30 | var lexer = Lexer.init(allocator, input); 31 | lexer.scan(); 32 | 33 | var parser = Parser.init(lexer.tokens.items, allocator); 34 | return try parser.parse(); 35 | } 36 | 37 | pub fn generate(input: []const u8, allocator: std.mem.Allocator) ![]const u8 { 38 | var lexer = Lexer.init(allocator, input); 39 | lexer.scan(); 40 | 41 | var parser = Parser.init(lexer.tokens.items, allocator); 42 | const program_definition = try parser.parse(); 43 | 44 | var semantic = SemanticAnalysis.init(allocator); 45 | const analyzed_program = try semantic.analyze(program_definition); 46 | 47 | var generator = Generator.init(analyzed_program, allocator); 48 | const generated_program = try generator.generate(); 49 | 50 | var emitter = Emitter.init(generated_program); 51 | return try emitter.getAssemblyString(allocator); 52 | } 53 | 54 | fn runShellCommand(allocator: std.mem.Allocator, command: []const u8) ![]u8 { 55 | const max_output_size = 1024 * 1024; 56 | 57 | var child = std.process.Child.init(&[_][]const u8{ "sh", "-c", command }, allocator); 58 | 59 | child.stdout_behavior = .Pipe; 60 | child.stderr_behavior = .Pipe; 61 | 62 | try child.spawn(); 63 | 64 | var stdout_buffer = try allocator.alloc(u8, max_output_size); 65 | errdefer allocator.free(stdout_buffer); 66 | var stdout_len: usize = 0; 67 | 68 | var stderr_buffer = try allocator.alloc(u8, max_output_size); 69 | defer allocator.free(stderr_buffer); 70 | var stderr_len: usize = 0; 71 | 72 | while (true) { 73 | const bytes_read = try child.stdout.?.read(stdout_buffer[stdout_len..]); 74 | if (bytes_read == 0) break; 75 | stdout_len += bytes_read; 76 | if (stdout_len >= max_output_size) break; 77 | } 78 | 79 | while (true) { 80 | const bytes_read = try child.stderr.?.read(stderr_buffer[stderr_len..]); 81 | if (bytes_read == 0) break; 82 | stderr_len += bytes_read; 83 | if (stderr_len >= max_output_size) break; 84 | } 85 | 86 | const term = try child.wait(); 87 | 88 | if (term.Exited != 0) { 89 | std.debug.print("Test failed\n", .{}); 90 | return error.TestFailed; 91 | } 92 | 93 | return allocator.realloc(stdout_buffer, stdout_len); 94 | } 95 | 96 | pub const WireCheck = struct { 97 | tick: u32, 98 | wire: []const u8, 99 | value: u32, 100 | }; 101 | 102 | pub fn testWithSystemVerilog( 103 | test_name: []const u8, 104 | c_code: []const u8, 105 | wire_checks: []const WireCheck, 106 | ) !void { 107 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator); 108 | defer arena.deinit(); 109 | const allocator = arena.allocator(); 110 | 111 | try std.fs.cwd().makePath("../temp"); 112 | 113 | const assembly = try generate(c_code, allocator); 114 | 115 | const asm_path = try std.fmt.allocPrint(allocator, "../temp/{s}.asm", .{test_name}); 116 | defer allocator.free(asm_path); 117 | const asm_file = try std.fs.cwd().createFile(asm_path, .{}); 118 | defer asm_file.close(); 119 | try asm_file.writeAll(assembly); 120 | 121 | const tb_content = try generateTestbench(test_name, wire_checks, allocator); 122 | 123 | const tb_path = try std.fmt.allocPrint(allocator, "../temp/{s}.sv", .{test_name}); 124 | defer allocator.free(tb_path); 125 | const tb_file = try std.fs.cwd().createFile(tb_path, .{}); 126 | defer tb_file.close(); 127 | try tb_file.writeAll(tb_content); 128 | 129 | { 130 | const assemble_cmd = try std.fmt.allocPrint(allocator, "cd .. && cd assembler && zig build run -- \"../temp/{s}.asm\" ../program", .{test_name}); 131 | defer allocator.free(assemble_cmd); 132 | _ = try runShellCommand(allocator, assemble_cmd); 133 | } 134 | 135 | { 136 | const iverilog_cmd = try std.fmt.allocPrint(allocator, "cd .. && iverilog -g2012 \"temp/{s}\" src/**.sv -o temp_output.vvp", .{tb_path}); 137 | defer allocator.free(iverilog_cmd); 138 | _ = try runShellCommand(allocator, iverilog_cmd); 139 | } 140 | 141 | { 142 | const vvp_output = runShellCommand(allocator, "cd .. && vvp temp_output.vvp") catch |err| { 143 | const temp_out_path = try std.fmt.allocPrint(allocator, "../temp_output.vvp", .{}); 144 | defer allocator.free(temp_out_path); 145 | std.fs.cwd().deleteFile(temp_out_path) catch {}; 146 | return err; 147 | }; 148 | 149 | const temp_out_path = try std.fmt.allocPrint(allocator, "../temp_output.vvp", .{}); 150 | defer allocator.free(temp_out_path); 151 | std.fs.cwd().deleteFile(temp_out_path) catch {}; 152 | 153 | defer allocator.free(vvp_output); 154 | } 155 | 156 | const vcd_path = try std.fmt.allocPrint(allocator, "../{s}.vcd", .{test_name}); 157 | defer allocator.free(vcd_path); 158 | std.fs.cwd().deleteFile(vcd_path) catch {}; 159 | 160 | const cwd = try std.process.getCwdAlloc(allocator); 161 | 162 | const rel_path = "../temp"; 163 | const abs_path = try std.fs.path.resolve(allocator, &[_][]const u8{ cwd, rel_path }); 164 | 165 | try std.fs.deleteTreeAbsolute(abs_path); 166 | } 167 | 168 | fn generateTestbench( 169 | test_name: []const u8, 170 | wire_checks: []const WireCheck, 171 | allocator: std.mem.Allocator, 172 | ) ![]const u8 { 173 | var tb = std.ArrayList(u8).init(allocator); 174 | defer tb.deinit(); 175 | 176 | try tb.writer().print( 177 | \\module {s}_tb; 178 | \\ reg clk; 179 | \\top dut (.clk(clk)); 180 | \\initial begin 181 | \\ clk = 0; 182 | \\forever #1 clk = ~clk; 183 | \\end 184 | \\initial begin 185 | \\$dumpfile("{s}.vcd"); 186 | \\$dumpvars(0, dut); 187 | \\end 188 | \\initial begin 189 | \\ 190 | , .{ 191 | test_name, 192 | test_name, 193 | }); 194 | 195 | var sorted_checks = std.ArrayList(WireCheck).init(allocator); 196 | defer sorted_checks.deinit(); 197 | 198 | try sorted_checks.appendSlice(wire_checks); 199 | 200 | const Context = struct { 201 | pub fn lessThan(_: @This(), lhs: WireCheck, rhs: WireCheck) bool { 202 | return lhs.tick < rhs.tick; 203 | } 204 | }; 205 | std.sort.insertion(WireCheck, sorted_checks.items, Context{}, Context.lessThan); 206 | 207 | var current_tick: u32 = 0; 208 | var first_check = true; 209 | 210 | for (sorted_checks.items) |check| { 211 | if (first_check or check.tick != current_tick) { 212 | if (!first_check) { 213 | try tb.appendSlice("\n"); 214 | } 215 | 216 | try tb.writer().print("#{d};\n", .{check.tick}); 217 | current_tick = check.tick; 218 | first_check = false; 219 | } 220 | 221 | try tb.writer().print( 222 | \\if (dut.{s} !== 32'd{d}) begin 223 | \\ $error("Alu {s} got wrong value: got %d, expected {d}", 224 | \\ dut.{s}); 225 | \\ $fatal(1, "Test failed"); 226 | \\end 227 | \\ 228 | , .{ 229 | check.wire, 230 | check.value, 231 | check.wire, 232 | check.value, 233 | check.wire, 234 | }); 235 | } 236 | 237 | try tb.writer().print( 238 | \\$finish; 239 | \\end 240 | \\endmodule 241 | \\ 242 | , .{}); 243 | 244 | return tb.toOwnedSlice(); 245 | } 246 | -------------------------------------------------------------------------------- /compiler/src/frontend/semantic/identifier-resolution.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("../../ast/c.zig"); 3 | const diagnostics = @import("../../diagnostics.zig"); 4 | 5 | const MapEntry = struct { 6 | new_name: []const u8, 7 | from_current_scope: bool, 8 | has_linkage: bool, 9 | }; 10 | 11 | pub const IdentifierResolution = struct { 12 | allocator: std.mem.Allocator, 13 | counter: usize, 14 | 15 | pub fn init(allocator: std.mem.Allocator) IdentifierResolution { 16 | return .{ 17 | .allocator = allocator, 18 | .counter = 0, 19 | }; 20 | } 21 | 22 | pub fn resolve(self: *IdentifierResolution, program: c_ast.Program) !c_ast.Program { 23 | var identifier_map = std.StringHashMap(MapEntry).init(self.allocator); 24 | 25 | var new_program = program; 26 | new_program = new_program; 27 | 28 | for (new_program.function) |*function| { 29 | function.* = try self.resolveFunctionDeclaration(function.*, &identifier_map); 30 | } 31 | 32 | return new_program; 33 | } 34 | 35 | fn resolveBlock(self: *IdentifierResolution, block: c_ast.Block, identifier_map: *std.StringHashMap(MapEntry)) anyerror!c_ast.Block { 36 | var new_block = block; 37 | new_block = new_block; // dig doesn't recognize that editing arrays with a pointer modifies an object, in this case new_block 38 | for (new_block.block_items) |*block_item| { 39 | switch (block_item.*) { 40 | .declaration => { 41 | block_item.declaration = try self.resolveDeclaration(block_item.declaration, identifier_map); 42 | }, 43 | .statement => { 44 | block_item.statement = try self.resolveStatement(block_item.statement, identifier_map); 45 | }, 46 | } 47 | } 48 | return new_block; 49 | } 50 | 51 | // resolves local variables and parameters. didn't name it properly to avoid long name 52 | fn resolveLocal(self: *IdentifierResolution, identifier: []const u8, identifier_map: *std.StringHashMap(MapEntry)) !MapEntry { 53 | if (identifier_map.get(identifier)) |entry| { 54 | if (entry.from_current_scope) { 55 | const err_msg = try std.fmt.allocPrint(self.allocator, "Duplicate identifier definition: {s}", .{identifier}); 56 | diagnostics.addError(err_msg, null); 57 | return error.DuplicateDefinition; 58 | } 59 | } 60 | 61 | const unique_name = try std.fmt.allocPrint(self.allocator, "var_{d}", .{self.counter}); 62 | self.counter += 1; 63 | 64 | return MapEntry{ 65 | .from_current_scope = true, 66 | .new_name = unique_name, 67 | .has_linkage = true, 68 | }; 69 | } 70 | 71 | fn resolveVariableDeclaration(self: *IdentifierResolution, declaration: c_ast.VariableDeclaration, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.VariableDeclaration { 72 | var result = declaration; 73 | 74 | const entry = try self.resolveLocal(result.identifier, identifier_map); 75 | 76 | try identifier_map.put( 77 | result.identifier, 78 | entry, 79 | ); 80 | 81 | result.identifier = entry.new_name; 82 | 83 | if (result.initial != null) { 84 | result.initial = try self.resolveExp(result.initial.?, identifier_map); 85 | } 86 | 87 | return result; 88 | } 89 | 90 | fn resolveFunctionDeclaration(self: *IdentifierResolution, declaration: c_ast.FunctionDeclaration, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.FunctionDeclaration { 91 | var result = declaration; 92 | 93 | if (identifier_map.get(result.identifier)) |entry| { 94 | if (entry.from_current_scope == true and entry.has_linkage == false) @panic("Duplicate declaration"); 95 | } 96 | 97 | try identifier_map.put( 98 | result.identifier, 99 | MapEntry{ 100 | .from_current_scope = true, 101 | .new_name = result.identifier, 102 | .has_linkage = true, 103 | }, 104 | ); 105 | 106 | var inner_map = try self.cloneVariableMap(identifier_map); 107 | var new_params = std.ArrayList([]const u8).init(self.allocator); 108 | for (result.params) |param| { 109 | const entry = try self.resolveLocal(param, &inner_map); 110 | try new_params.append(entry.new_name); 111 | try inner_map.put(param, entry); 112 | } 113 | result.params = try new_params.toOwnedSlice(); 114 | 115 | if (result.body != null) { 116 | result.body = try self.resolveBlock(result.body.?, &inner_map); 117 | } 118 | 119 | return result; 120 | } 121 | 122 | fn resolveDeclaration(self: *IdentifierResolution, declaration: c_ast.Declaration, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.Declaration { 123 | switch (declaration) { 124 | .variable_declaration => { 125 | return .{ .variable_declaration = try self.resolveVariableDeclaration(declaration.variable_declaration, identifier_map) }; 126 | }, 127 | else => { 128 | return .{ .function_declaration = try self.resolveFunctionDeclaration(declaration.function_declaration, identifier_map) }; 129 | }, 130 | } 131 | } 132 | 133 | fn cloneVariableMap(self: *IdentifierResolution, identifier_map: *std.StringHashMap(MapEntry)) !std.StringHashMap(MapEntry) { 134 | _ = self; 135 | var new_map = std.StringHashMap(MapEntry).init(identifier_map.allocator); 136 | 137 | var iterator = identifier_map.iterator(); 138 | while (iterator.next()) |entry| { 139 | var new_entry = entry.value_ptr.*; 140 | new_entry.from_current_scope = false; 141 | try new_map.put(entry.key_ptr.*, new_entry); 142 | } 143 | 144 | return new_map; 145 | } 146 | 147 | fn resolveStatement(self: *IdentifierResolution, statement: c_ast.Statement, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.Statement { 148 | var result = statement; 149 | switch (result) { 150 | .exp => { 151 | result.exp = try self.resolveExp(result.exp, identifier_map); 152 | }, 153 | .ret => { 154 | result.ret.exp = try self.resolveExp(result.ret.exp, identifier_map); 155 | }, 156 | .if_ => { 157 | var else_: ?*c_ast.Statement = null; 158 | if (result.if_.else_ != null) { 159 | const resolved_else = try self.resolveStatement(result.if_.else_.?.*, identifier_map); 160 | else_ = try self.allocator.create(c_ast.Statement); 161 | else_.?.* = resolved_else; 162 | } 163 | 164 | const resolved_then = try self.resolveStatement(result.if_.then.*, identifier_map); 165 | const then_statement = try self.allocator.create(c_ast.Statement); 166 | then_statement.* = resolved_then; 167 | 168 | result.if_ = .{ 169 | .condition = try self.resolveExp(result.if_.condition, identifier_map), 170 | .then = then_statement, 171 | .else_ = else_, 172 | }; 173 | }, 174 | .compound => { 175 | var new_map = try self.cloneVariableMap(identifier_map); 176 | result.compound = try self.resolveBlock(result.compound, &new_map); 177 | }, 178 | .break_ => {}, 179 | .continue_ => {}, 180 | .do_while => |*do_while| { 181 | const body = try self.resolveStatement(do_while.*.body.*, identifier_map); 182 | const body_ptr = try self.allocator.create(c_ast.Statement); 183 | body_ptr.* = body; 184 | 185 | do_while.body = body_ptr; 186 | do_while.condition = try self.resolveExp(do_while.*.condition, identifier_map); 187 | }, 188 | .for_ => |*for_| { 189 | switch (for_.init) { 190 | .init_decl => { 191 | const decl = try self.resolveDeclaration(.{ .variable_declaration = for_.init.init_decl }, identifier_map); 192 | for_.init.init_decl = decl.variable_declaration; 193 | }, 194 | .init_exp => { 195 | if (for_.init.init_exp != null) { 196 | for_.init.init_exp = try self.resolveExp(for_.init.init_exp.?, identifier_map); 197 | } 198 | }, 199 | } 200 | 201 | if (for_.condition != null) { 202 | for_.condition = try self.resolveExp(for_.*.condition.?, identifier_map); 203 | } 204 | 205 | if (for_.post != null) { 206 | for_.post = try self.resolveExp(for_.*.post.?, identifier_map); 207 | } 208 | 209 | const body = try self.resolveStatement(for_.*.body.*, identifier_map); 210 | const body_ptr = try self.allocator.create(c_ast.Statement); 211 | body_ptr.* = body; 212 | 213 | for_.body = body_ptr; 214 | }, 215 | .while_ => |*while_| { 216 | const body = try self.resolveStatement(while_.*.body.*, identifier_map); 217 | const body_ptr = try self.allocator.create(c_ast.Statement); 218 | body_ptr.* = body; 219 | 220 | while_.body = body_ptr; 221 | while_.condition = try self.resolveExp(while_.*.condition, identifier_map); 222 | }, 223 | } 224 | return result; 225 | } 226 | 227 | fn resolveExp(self: *IdentifierResolution, expression: c_ast.Expression, identifier_map: *std.StringHashMap(MapEntry)) !c_ast.Expression { 228 | var result = expression; 229 | switch (result) { 230 | .variable => { 231 | result.variable.identifier = identifier_map.get(result.variable.identifier).?.new_name; 232 | }, 233 | .binary => { 234 | result.binary.left.* = try self.resolveExp(result.binary.left.*, identifier_map); 235 | result.binary.right.* = try self.resolveExp(result.binary.right.*, identifier_map); 236 | }, 237 | .assignment => { 238 | if (result.assignment.left.* != .variable) @panic("Invalid lvalue"); 239 | result.assignment.left.* = try self.resolveExp(result.assignment.left.*, identifier_map); 240 | result.assignment.right.* = try self.resolveExp(result.assignment.right.*, identifier_map); 241 | }, 242 | .constant => {}, 243 | .function_call => { 244 | if (identifier_map.get(result.function_call.identifier)) |entry| { 245 | const new_name = entry.new_name; 246 | var new_args = std.ArrayList(*c_ast.Expression).init(self.allocator); 247 | for (result.function_call.args) |arg| { 248 | const expr = try self.allocator.create(c_ast.Expression); 249 | expr.* = try self.resolveExp(arg.*, identifier_map); 250 | try new_args.append(expr); 251 | } 252 | return .{ 253 | .function_call = .{ 254 | .identifier = new_name, 255 | .args = try new_args.toOwnedSlice(), 256 | }, 257 | }; 258 | } else { 259 | const err_msg = try std.fmt.allocPrint(self.allocator, "Undeclared function: {s}", .{result.function_call.identifier}); 260 | diagnostics.addError(err_msg, null); 261 | return error.UndeclaredFunction; 262 | } 263 | }, 264 | } 265 | return result; 266 | } 267 | }; 268 | -------------------------------------------------------------------------------- /compiler/src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const builtin = @import("builtin"); 3 | const testing = @import("testing.zig"); 4 | const Lexer = @import("frontend/lexer.zig").Lexer; 5 | const Parser = @import("frontend/parser.zig").Parser; 6 | const SemanticAnalysis = @import("frontend/semantic-analysis.zig").SemanticAnalysis; 7 | const Generator = @import("middleend/gen.zig").Generator; 8 | const Emitter = @import("backend/emission.zig").Emitter; 9 | const diagnostics = @import("diagnostics.zig"); 10 | const prettyprinter = @import("prettyprinter.zig"); 11 | 12 | pub fn main() !void { 13 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 14 | defer arena.deinit(); 15 | defer diagnostics.arena.deinit(); 16 | const allocator = arena.allocator(); 17 | 18 | const debug_str = std.process.getEnvVarOwned(allocator, "DEBUG") catch ""; 19 | defer if (debug_str.len > 0) allocator.free(debug_str); 20 | const debug_value = if (debug_str.len > 0) 21 | std.fmt.parseInt(u8, debug_str, 10) catch 0 22 | else 23 | 0; 24 | 25 | const args = try std.process.argsAlloc(allocator); 26 | if (args.len != 2) { 27 | std.debug.print("Usage: {s} \n", .{args[0]}); 28 | std.process.exit(1); 29 | } 30 | 31 | const file_path = args[1]; 32 | if (!std.mem.endsWith(u8, file_path, ".c")) { 33 | std.debug.print("Error: File must have .c extension\n", .{}); 34 | std.process.exit(1); 35 | } 36 | 37 | const file = try std.fs.cwd().openFile(file_path, .{}); 38 | defer file.close(); 39 | 40 | const file_size = try file.getEndPos(); 41 | const source = try allocator.alloc(u8, file_size); 42 | const bytes_read = try file.readAll(source); 43 | 44 | if (bytes_read != file_size) { 45 | std.debug.print("Error: Could not read entire file\n", .{}); 46 | std.process.exit(1); 47 | } 48 | 49 | const assembly: []const u8 = generate(source, allocator, debug_value) catch { 50 | diagnostics.printAll(); 51 | std.process.exit(1); 52 | }; 53 | 54 | const dirname = std.fs.path.dirname(file_path) orelse "."; 55 | const stem = std.fs.path.stem(file_path); 56 | 57 | var path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; 58 | const output_path = try std.fmt.bufPrint(&path_buf, "{s}/{s}.asm", .{ 59 | dirname, 60 | stem, 61 | }); 62 | 63 | const out_file = try std.fs.cwd().createFile( 64 | output_path, 65 | .{}, 66 | ); 67 | defer out_file.close(); 68 | 69 | try out_file.writeAll(assembly); 70 | } 71 | 72 | fn generate(input: []const u8, allocator: std.mem.Allocator, debug_value: i32) ![]const u8 { 73 | var lexer = Lexer.init(allocator, input); 74 | lexer.scan(); 75 | 76 | if (debug_value == 1 and builtin.is_test == false) { 77 | std.debug.print("\n======== Tokens ========\n", .{}); 78 | for (lexer.tokens.items) |token| { 79 | std.debug.print("{s} '{?}' at line {d}\n", .{ 80 | @tagName(token.type), 81 | token.literal, 82 | token.line, 83 | }); 84 | } 85 | std.debug.print("========================\n", .{}); 86 | } 87 | 88 | var parser = Parser.init(lexer.tokens.items, allocator); 89 | const program_definition = try parser.parse(); 90 | 91 | if (debug_value == 1 and builtin.is_test == false) { 92 | std.debug.print("\n======== Program ========\n", .{}); 93 | prettyprinter.printProgram(program_definition); 94 | std.debug.print("===========================\n", .{}); 95 | } 96 | 97 | var semantic = SemanticAnalysis.init(allocator); 98 | const analyzed_program_definition = semantic.analyze(program_definition) catch |err| { 99 | return err; 100 | }; 101 | 102 | if (debug_value == 1 and builtin.is_test == false) { 103 | std.debug.print("\n=== Semantic analysis ===\n", .{}); 104 | prettyprinter.printProgram(analyzed_program_definition); 105 | std.debug.print("===========================\n", .{}); 106 | } 107 | 108 | var generator = Generator.init(analyzed_program_definition, allocator); 109 | const program = try generator.generate(); 110 | 111 | var emitter = Emitter.init(program); 112 | 113 | return try emitter.getAssemblyString(allocator); 114 | } 115 | 116 | test "basic addition" { 117 | const input = 118 | \\int main() 119 | \\{ 120 | \\ return 2 + 6; 121 | \\} 122 | ; 123 | 124 | const check = testing.WireCheck{ 125 | .tick = 10, 126 | .wire = "alu_inst.rd_data", 127 | .value = 8, 128 | }; 129 | 130 | try testing.testWithSystemVerilog("basic_addition", input, &[_]testing.WireCheck{check}); 131 | } 132 | 133 | test "basic precedence" { 134 | const input = 135 | \\int main() 136 | \\{ 137 | \\ return 10 - 6 + 2 * 4; 138 | \\} 139 | ; 140 | 141 | const check = testing.WireCheck{ 142 | .tick = 18, 143 | .wire = "alu_inst.rd_data", 144 | .value = 12, 145 | }; 146 | 147 | try testing.testWithSystemVerilog("basic_precedence", input, &[_]testing.WireCheck{check}); 148 | } 149 | 150 | test "basic precedence 2" { 151 | const input = 152 | \\int main() 153 | \\{ 154 | \\ return 20 - 6 * (4 - 2); 155 | \\} 156 | ; 157 | 158 | const check = testing.WireCheck{ 159 | .tick = 18, 160 | .wire = "alu_inst.rd_data", 161 | .value = 8, 162 | }; 163 | 164 | try testing.testWithSystemVerilog("basic_precedence_2", input, &[_]testing.WireCheck{check}); 165 | } 166 | 167 | test "and short circuit 1" { 168 | const input = 169 | \\int main() 170 | \\{ 171 | \\ return 20 == 20 && 10 != 5; 172 | \\} 173 | ; 174 | 175 | const check = testing.WireCheck{ 176 | .tick = 24, 177 | .wire = "instr_mem.addr", 178 | .value = 12, 179 | }; 180 | 181 | try testing.testWithSystemVerilog("and_short_circuit_1", input, &[_]testing.WireCheck{check}); 182 | } 183 | 184 | test "or short circuit 1" { 185 | const input = 186 | \\int main() 187 | \\{ 188 | \\ return 3 > 3 || 19 != 19 || 4 <= 3 || 2 >= 2 || 2 != 2; 189 | \\} 190 | ; 191 | 192 | const check = testing.WireCheck{ 193 | .tick = 50, 194 | .wire = "instr_mem.addr", 195 | .value = 30, 196 | }; 197 | 198 | try testing.testWithSystemVerilog("or_short_circuit_1", input, &[_]testing.WireCheck{check}); 199 | } 200 | 201 | test "and short circuit 2" { 202 | const input = 203 | \\int main() 204 | \\{ 205 | \\ return 2 == 2 && 2 <= 1 && 5 > 3; 206 | \\} 207 | ; 208 | 209 | const check = testing.WireCheck{ 210 | .tick = 26, 211 | .wire = "instr_mem.addr", 212 | .value = 16, 213 | }; 214 | 215 | try testing.testWithSystemVerilog("and_short_circuit_2", input, &[_]testing.WireCheck{check}); 216 | } 217 | 218 | test "variables 1" { 219 | const input = 220 | \\int main() 221 | \\{ 222 | \\ int beh = 5 + 1; 223 | \\ int bah = beh - 1 * 2; 224 | \\ return bah + 8; 225 | \\} 226 | \\ 227 | ; 228 | 229 | const check = testing.WireCheck{ 230 | .tick = 38, 231 | .wire = "alu_inst.rd_data", 232 | .value = 12, 233 | }; 234 | 235 | try testing.testWithSystemVerilog("variables_1", input, &[_]testing.WireCheck{check}); 236 | } 237 | 238 | test "variables 2" { 239 | const input = 240 | \\int main() 241 | \\{ 242 | \\ int beh = 5 + 1; 243 | \\ int bah = beh - 1 * 2; 244 | \\ int bumbam = beh + bah; 245 | \\ return bumbam + beh; 246 | \\} 247 | \\ 248 | ; 249 | 250 | const check = testing.WireCheck{ 251 | .tick = 54, 252 | .wire = "alu_inst.rd_data", 253 | .value = 16, 254 | }; 255 | 256 | try testing.testWithSystemVerilog("variables_2", input, &[_]testing.WireCheck{check}); 257 | } 258 | 259 | test "if 1" { 260 | const input = 261 | \\int main() 262 | \\{ 263 | \\ int ab = 0; 264 | \\ if (ab != 0) 265 | \\ ab = 2; 266 | \\ else 267 | \\ ab = 19; 268 | \\} 269 | \\ 270 | ; 271 | 272 | const check = testing.WireCheck{ 273 | .tick = 34, 274 | .wire = "instr_mem.addr", 275 | .value = 21, 276 | }; 277 | 278 | try testing.testWithSystemVerilog("if_1", input, &[_]testing.WireCheck{check}); 279 | } 280 | 281 | test "if 2" { 282 | const input = 283 | \\int main() 284 | \\{ 285 | \\ int ab = 0; 286 | \\ if (ab == 0) 287 | \\ if (ab != 0) 288 | \\ ab = 2; 289 | \\ else 290 | \\ ab = 7; 291 | \\ else 292 | \\ ab = 19; 293 | \\} 294 | \\ 295 | ; 296 | 297 | const check = testing.WireCheck{ 298 | .tick = 38, 299 | .wire = "instr_mem.addr", 300 | .value = 23, 301 | }; 302 | 303 | try testing.testWithSystemVerilog("if_2", input, &[_]testing.WireCheck{check}); 304 | } 305 | 306 | test "if 3" { 307 | const input = 308 | \\int main() 309 | \\{ 310 | \\ int ab = 0; 311 | \\ if (ab != 0) 312 | \\ ab = 2; 313 | \\ 314 | \\ ab = 62; 315 | \\} 316 | \\ 317 | ; 318 | 319 | const check = testing.WireCheck{ 320 | .tick = 26, 321 | .wire = "alu_inst.rd_data", 322 | .value = 62, 323 | }; 324 | 325 | try testing.testWithSystemVerilog("if_3", input, &[_]testing.WireCheck{check}); 326 | } 327 | 328 | test "compound if 1" { 329 | const input = 330 | \\int main() 331 | \\{ 332 | \\ int ab = 0; 333 | \\ if (ab == 1) 334 | \\ { 335 | \\ ab = 5; 336 | \\ } 337 | \\ else if (ab == 2) 338 | \\ { 339 | \\ ab = 4; 340 | \\ } 341 | \\ else 342 | \\ { 343 | \\ ab = 3; 344 | \\ ab += 11; 345 | \\ } 346 | \\} 347 | \\ 348 | ; 349 | 350 | const check = testing.WireCheck{ 351 | .tick = 48, 352 | .wire = "alu_inst.rd_data", 353 | .value = 14, 354 | }; 355 | 356 | try testing.testWithSystemVerilog("compund_if_1", input, &[_]testing.WireCheck{check}); 357 | } 358 | 359 | test "multiple scopes variable resolution" { 360 | var arena = std.heap.ArenaAllocator.init(testing.allocator); 361 | defer arena.deinit(); 362 | const allocator = arena.allocator(); 363 | const input = 364 | \\int main() 365 | \\{ 366 | \\ int x = 1; 367 | \\ { 368 | \\ int x = 2; 369 | \\ if (x > 1) { 370 | \\ x = 3; 371 | \\ int x = 4; 372 | \\ } 373 | \\ return x; 374 | \\ } 375 | \\ return x; 376 | \\} 377 | \\ 378 | ; 379 | 380 | var lexer = Lexer.init(allocator, input); 381 | lexer.scan(); 382 | var parser = Parser.init(lexer.tokens.items, allocator); 383 | const program_definition = try parser.parse(); 384 | 385 | var semantic = SemanticAnalysis.init(allocator); 386 | 387 | const analyzed_program_definition = try semantic.analyze(program_definition); 388 | 389 | try std.testing.expectEqualStrings("var_0", analyzed_program_definition.function[0].body.?.block_items[0].declaration.variable_declaration.identifier); 390 | try std.testing.expectEqualStrings("var_1", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[0].declaration.variable_declaration.identifier); 391 | try std.testing.expectEqualStrings("var_1", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[1].statement.if_.then.compound.block_items[0].statement.exp.assignment.left.variable.identifier); 392 | try std.testing.expectEqualStrings("var_2", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[1].statement.if_.then.compound.block_items[1].declaration.variable_declaration.identifier); 393 | try std.testing.expectEqualStrings("var_1", analyzed_program_definition.function[0].body.?.block_items[1].statement.compound.block_items[2].statement.ret.exp.variable.identifier); 394 | try std.testing.expectEqualStrings("var_0", analyzed_program_definition.function[0].body.?.block_items[2].statement.ret.exp.variable.identifier); 395 | } 396 | 397 | test "loop labeling" { 398 | var arena = std.heap.ArenaAllocator.init(testing.allocator); 399 | defer arena.deinit(); 400 | const allocator = arena.allocator(); 401 | const input = 402 | \\int main() 403 | \\{ 404 | \\ int a = 16; 405 | \\ int b = 2; 406 | \\ while (a > 0) 407 | \\ { 408 | \\ for (int i = 0; i < 10; i += 1) 409 | \\ { 410 | \\ if (i % 2 == 0) 411 | \\ continue; 412 | \\ a = a / 2; 413 | \\ } 414 | \\ if (a == b) 415 | \\ break; 416 | \\ } 417 | \\} 418 | \\ 419 | ; 420 | 421 | var lexer = Lexer.init(allocator, input); 422 | lexer.scan(); 423 | var parser = Parser.init(lexer.tokens.items, allocator); 424 | const program_definition = try parser.parse(); 425 | 426 | var semantic = SemanticAnalysis.init(allocator); 427 | const analyzed_program_definition = try semantic.analyze(program_definition); 428 | 429 | try std.testing.expectEqualStrings("loop_0", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.identifier.?); 430 | try std.testing.expectEqualStrings("loop_1", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.body.compound.block_items[0].statement.for_.identifier.?); 431 | try std.testing.expectEqualStrings("loop_1", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.body.compound.block_items[0].statement.for_.body.compound.block_items[0].statement.if_.then.continue_.identifier.?); 432 | try std.testing.expectEqualStrings("loop_0", analyzed_program_definition.function[0].body.?.block_items[2].statement.while_.body.compound.block_items[1].statement.if_.then.*.break_.identifier.?); 433 | } 434 | 435 | test "while loop" { 436 | const input = 437 | \\int main() 438 | \\{ 439 | \\ int a = 16; 440 | \\ while (a > 12) 441 | \\ { 442 | \\ a -= 2; 443 | \\ } 444 | \\} 445 | \\ 446 | ; 447 | 448 | const check = testing.WireCheck{ 449 | .tick = 70, 450 | .wire = "instr_mem.addr", 451 | .value = 18, 452 | }; 453 | 454 | try testing.testWithSystemVerilog("while_loop", input, &[_]testing.WireCheck{check}); 455 | } 456 | 457 | test "do while loop" { 458 | const input = 459 | \\int main() 460 | \\{ 461 | \\ int a = 16; 462 | \\ do { 463 | \\ a -= 2; 464 | \\ } while (a > 12); 465 | \\} 466 | \\ 467 | ; 468 | 469 | const check = testing.WireCheck{ 470 | .tick = 56, 471 | .wire = "instr_mem.addr", 472 | .value = 17, 473 | }; 474 | 475 | try testing.testWithSystemVerilog("do_while_loop", input, &[_]testing.WireCheck{check}); 476 | } 477 | 478 | test "do while break loop" { 479 | const input = 480 | \\int main() 481 | \\{ 482 | \\ int a = 16; 483 | \\ do 484 | \\ { 485 | \\ a -= 2; 486 | \\ if (a <= 12) 487 | \\ break; 488 | \\ } while (a > 10); 489 | \\} 490 | \\ 491 | ; 492 | 493 | const check = testing.WireCheck{ 494 | .tick = 72, 495 | .wire = "instr_mem.addr", 496 | .value = 24, 497 | }; 498 | 499 | try testing.testWithSystemVerilog("do_while_break_loop", input, &[_]testing.WireCheck{check}); 500 | } 501 | 502 | test "for continue loop" { 503 | const input = 504 | \\int main() 505 | \\{ 506 | \\ int a = 16; 507 | \\ for (int i = 0; i < 4; i += 1) 508 | \\ { 509 | \\ if (i % 2 == 0) 510 | \\ { 511 | \\ a += 2; 512 | \\ continue; 513 | \\ } 514 | \\ a += 1; 515 | \\ } 516 | \\ a -= 10; 517 | \\} 518 | \\ 519 | ; 520 | 521 | const check = testing.WireCheck{ 522 | .tick = 246, 523 | .wire = "alu_inst.rd_data", 524 | .value = 12, 525 | }; 526 | 527 | try testing.testWithSystemVerilog("for_continue_loop", input, &[_]testing.WireCheck{check}); 528 | } 529 | -------------------------------------------------------------------------------- /compiler/src/frontend/parser.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const tokens_script = @import("tokens.zig"); 3 | const Token = tokens_script.Token; 4 | const TokenType = tokens_script.TokenType; 5 | const c_ast = @import("../ast/c.zig"); 6 | const diagnostics = @import("../diagnostics.zig"); 7 | const testing = @import("../testing.zig"); 8 | 9 | pub const Parser = struct { 10 | tokens: []const Token, 11 | cursor: usize, 12 | allocator: std.mem.Allocator, 13 | 14 | pub fn init(tokens: []const Token, allocator: std.mem.Allocator) Parser { 15 | return Parser{ 16 | .tokens = tokens, 17 | .cursor = 0, 18 | .allocator = allocator, 19 | }; 20 | } 21 | 22 | pub fn parse(self: *Parser) !c_ast.Program { 23 | var function_array = std.ArrayList(c_ast.FunctionDeclaration).init(self.allocator); 24 | while (self.cursor < self.tokens.len - 1) { 25 | const func_decl = try self.parseDeclaration(); 26 | try function_array.append(func_decl.function_declaration); 27 | self.cursor += 1; 28 | } 29 | return .{ .function = try function_array.toOwnedSlice() }; 30 | } 31 | 32 | // TODO: can't start function with a left paren 33 | fn parseFunction(self: *Parser) !c_ast.FunctionDeclaration { 34 | try self.expect(.INT); 35 | self.cursor += 1; 36 | try self.expect(.IDENTIFIER); 37 | 38 | const identifier = self.curr().literal.?.string; 39 | 40 | self.cursor += 1; 41 | try self.expect(.LEFT_PAREN); 42 | self.cursor += 1; 43 | 44 | const params = try self.parseFunctionParams(); 45 | 46 | try self.expect(.LEFT_BRACE); 47 | self.cursor += 1; 48 | 49 | const body = try self.parseBlock(); 50 | self.cursor += 1; 51 | 52 | return .{ 53 | .identifier = identifier, 54 | .params = params, 55 | .body = body, 56 | }; 57 | } 58 | 59 | fn parseBlock(self: *Parser) !c_ast.Block { 60 | var function_body = std.ArrayList(c_ast.BlockItem).init(self.allocator); 61 | 62 | while (self.cursor < self.tokens.len - 1 and self.curr().type != .RIGHT_BRACE) { 63 | const block_item = try self.parseBlockItem(); 64 | try function_body.append(block_item); 65 | self.cursor += 1; 66 | } 67 | 68 | return .{ 69 | .block_items = try function_body.toOwnedSlice(), 70 | }; 71 | } 72 | 73 | fn parseBlockItem(self: *Parser) !c_ast.BlockItem { 74 | switch (self.curr().type) { 75 | .INT => { 76 | return .{ .declaration = try self.parseDeclaration() }; 77 | }, 78 | else => return .{ .statement = try self.parseStatement() }, 79 | } 80 | } 81 | 82 | fn parseDeclaration(self: *Parser) anyerror!c_ast.Declaration { 83 | try self.expect(.INT); 84 | self.cursor += 1; 85 | try self.expect(.IDENTIFIER); 86 | 87 | const identifier = self.curr().literal.?.string; 88 | self.cursor += 1; 89 | 90 | switch (self.curr().type) { 91 | .SEMICOLON => { 92 | return .{ 93 | .variable_declaration = .{ 94 | .identifier = identifier, 95 | .initial = null, 96 | }, 97 | }; 98 | }, 99 | .LEFT_PAREN => { 100 | self.cursor += 1; 101 | 102 | const params = try self.parseFunctionParams(); 103 | 104 | var body: ?c_ast.Block = null; 105 | if (self.curr().type != .SEMICOLON) { 106 | self.cursor += 1; 107 | body = try self.parseBlock(); 108 | } 109 | 110 | return .{ 111 | .function_declaration = c_ast.FunctionDeclaration{ 112 | .identifier = identifier, 113 | .params = params, 114 | .body = body, 115 | }, 116 | }; 117 | }, 118 | else => { 119 | try self.expect(.EQUAL); 120 | self.cursor += 1; 121 | const expression = try self.parseExpression(0); 122 | 123 | return .{ .variable_declaration = .{ .identifier = identifier, .initial = expression.* } }; 124 | }, 125 | } 126 | } 127 | 128 | // fn parseIf(self: *Parser) c_ast.If {} 129 | 130 | fn parseStatement(self: *Parser) anyerror!c_ast.Statement { 131 | switch (self.curr().type) { 132 | .RETURN => { 133 | self.cursor += 1; 134 | 135 | // ?????????????? 136 | // ?????????????? 137 | const expr_ptr = try self.parseExpression(0); 138 | const expr = expr_ptr.*; 139 | self.allocator.destroy(expr_ptr); 140 | return .{ 141 | .ret = .{ .exp = expr }, 142 | }; 143 | }, 144 | .IF => { 145 | self.cursor += 1; 146 | try self.expect(.LEFT_PAREN); 147 | self.cursor += 1; 148 | const condition = try self.parseExpression(0); 149 | try self.expect(.RIGHT_PAREN); 150 | self.cursor += 1; 151 | 152 | const then = try self.parseStatement(); 153 | const then_ptr = try self.allocator.create(c_ast.Statement); 154 | then_ptr.* = then; 155 | 156 | var else_ptr: ?*c_ast.Statement = null; 157 | 158 | if (self.peek(1).type == .ELSE) { 159 | self.cursor += 2; 160 | const else_ = try self.parseStatement(); 161 | else_ptr = try self.allocator.create(c_ast.Statement); 162 | else_ptr.?.* = else_; 163 | } 164 | return .{ 165 | .if_ = .{ 166 | .condition = condition.*, 167 | .then = then_ptr, 168 | .else_ = else_ptr, 169 | }, 170 | }; 171 | }, 172 | .LEFT_BRACE => { 173 | self.cursor += 1; 174 | const block = try self.parseBlock(); 175 | return .{ 176 | .compound = block, 177 | }; 178 | }, 179 | .BREAK => { 180 | self.cursor += 1; 181 | return .{ 182 | .break_ = .{ .identifier = null }, 183 | }; 184 | }, 185 | .CONTINUE => { 186 | self.cursor += 1; 187 | return .{ 188 | .continue_ = .{ .identifier = null }, 189 | }; 190 | }, 191 | .WHILE => { 192 | self.cursor += 1; 193 | try self.expect(.LEFT_PAREN); 194 | const condition = try self.parseExpression(0); 195 | const body = try self.parseStatement(); 196 | const body_ptr = try self.allocator.create(c_ast.Statement); 197 | body_ptr.* = body; 198 | return c_ast.Statement{ 199 | .while_ = .{ 200 | .body = body_ptr, 201 | .identifier = null, 202 | .condition = condition.*, 203 | }, 204 | }; 205 | }, 206 | .DO => { 207 | self.cursor += 1; 208 | const body = try self.parseStatement(); 209 | const body_ptr = try self.allocator.create(c_ast.Statement); 210 | body_ptr.* = body; 211 | 212 | self.cursor += 1; 213 | try self.expect(.WHILE); 214 | self.cursor += 1; 215 | try self.expect(.LEFT_PAREN); 216 | const condition = try self.parseExpression(0); 217 | 218 | return c_ast.Statement{ 219 | .do_while = .{ 220 | .body = body_ptr, 221 | .condition = condition.*, 222 | .identifier = null, 223 | }, 224 | }; 225 | }, 226 | .FOR => { 227 | self.cursor += 1; 228 | try self.expect(.LEFT_PAREN); 229 | self.cursor += 1; 230 | 231 | var for_init: c_ast.ForInit = undefined; 232 | if (self.curr().type == .INT) { 233 | const declaration = try self.parseDeclaration(); 234 | for_init = .{ .init_decl = declaration.variable_declaration }; 235 | } else { 236 | const expression = try self.parseExpression(0); 237 | for_init = .{ .init_exp = expression.* }; 238 | } 239 | 240 | var condition: ?*c_ast.Expression = null; 241 | if (self.curr().type != .RIGHT_PAREN) { 242 | self.cursor += 1; 243 | condition = try self.parseExpression(0); 244 | } 245 | 246 | var post: ?*c_ast.Expression = null; 247 | if (self.curr().type != .RIGHT_PAREN) { 248 | self.cursor += 1; 249 | post = try self.parseExpression(0); 250 | } 251 | 252 | self.cursor += 1; 253 | const body = try self.parseStatement(); 254 | const body_ptr = try self.allocator.create(c_ast.Statement); 255 | body_ptr.* = body; 256 | 257 | return c_ast.Statement{ 258 | .for_ = .{ 259 | .init = for_init, 260 | .condition = if (condition != null) condition.?.* else null, 261 | .post = if (post != null) post.?.* else null, 262 | .body = body_ptr, 263 | .identifier = null, 264 | }, 265 | }; 266 | }, 267 | else => { 268 | const expr_ptr = try self.parseExpression(0); 269 | const expr = expr_ptr.*; 270 | self.allocator.destroy(expr_ptr); 271 | return .{ 272 | .exp = expr, 273 | }; 274 | }, 275 | } 276 | } 277 | 278 | fn parseExpression(self: *Parser, min_prec: i16) anyerror!*c_ast.Expression { 279 | var left = try self.parseFactor(); 280 | 281 | while (self.cursor < self.tokens.len and 282 | tokens_script.is_binary_operator(self.curr().type) and 283 | self.precedence(self.curr()) >= min_prec) 284 | { 285 | const curr_prec = self.precedence(self.curr()); 286 | 287 | if (self.curr().type == .EQUAL) { 288 | self.cursor += 1; 289 | const right = try self.parseExpression(curr_prec); 290 | 291 | const new_expr = try self.allocator.create(c_ast.Expression); 292 | new_expr.* = .{ 293 | .assignment = .{ 294 | .left = left, 295 | .right = right, 296 | }, 297 | }; 298 | 299 | left = new_expr; 300 | } 301 | // check inplace operators 302 | // 303 | else if (tokens_script.is_in_place_starter(self.curr().type) == true and self.peek(-1).type == .IDENTIFIER and self.peek(1).type == .EQUAL) { 304 | const operator = self.parseBinop(); 305 | self.cursor += 1; 306 | try self.expect(.EQUAL); 307 | self.cursor += 1; 308 | 309 | const variable_expr = try self.allocator.create(c_ast.Expression); 310 | variable_expr.* = .{ 311 | .variable = .{ .identifier = self.peek(-3).literal.?.string }, 312 | }; 313 | 314 | const binary_expr = try self.allocator.create(c_ast.Expression); 315 | binary_expr.* = .{ 316 | .binary = .{ 317 | .operator = operator, 318 | .left = variable_expr, 319 | .right = try self.parseExpression(curr_prec), 320 | }, 321 | }; 322 | 323 | const new_expr = try self.allocator.create(c_ast.Expression); 324 | new_expr.* = .{ 325 | .assignment = .{ 326 | .left = left, 327 | .right = binary_expr, 328 | }, 329 | }; 330 | 331 | left = new_expr; 332 | } else { 333 | const operator = self.parseBinop(); 334 | self.cursor += 1; 335 | 336 | const right = try self.parseExpression(curr_prec + 1); 337 | 338 | const new_expr = try self.allocator.create(c_ast.Expression); 339 | new_expr.* = .{ 340 | .binary = .{ 341 | .operator = operator, 342 | .left = left, 343 | .right = right, 344 | }, 345 | }; 346 | 347 | left = new_expr; 348 | } 349 | } 350 | 351 | return left; 352 | } 353 | 354 | fn parseFactor(self: *Parser) !*c_ast.Expression { 355 | var expr = try self.allocator.create(c_ast.Expression); 356 | 357 | switch (self.curr().type) { 358 | .NUMBER => { 359 | expr.* = .{ 360 | .constant = self.curr().literal.?.number, 361 | }; 362 | self.cursor += 1; 363 | }, 364 | .LEFT_PAREN => { 365 | self.cursor += 1; 366 | const inner_expr = try self.parseExpression(0); 367 | try self.expect(.RIGHT_PAREN); 368 | self.cursor += 1; 369 | 370 | expr = inner_expr; 371 | }, 372 | .IDENTIFIER => { 373 | switch (self.peek(1).type) { 374 | .LEFT_PAREN => { 375 | self.cursor += 2; 376 | expr.* = .{ 377 | .function_call = c_ast.FunctionCall{ 378 | .identifier = self.peek(-2).literal.?.string, 379 | .args = try self.parseFunctionArgs(), 380 | }, 381 | }; 382 | }, 383 | else => { 384 | expr.* = .{ 385 | .variable = .{ 386 | .identifier = self.curr().literal.?.string, 387 | }, 388 | }; 389 | self.cursor += 1; 390 | }, 391 | } 392 | }, 393 | else => { 394 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Syntax error at line {}. Expected one of the following: NUMBER, LEFT_PAREN, IDENTIFIER. Got token type {}", .{ 395 | self.curr().line, 396 | self.curr().type, 397 | }); 398 | diagnostics.addError(msg, self.curr().line); 399 | return error.SyntaxError; 400 | }, 401 | } 402 | 403 | return expr; 404 | } 405 | 406 | fn parseFunctionParams(self: *Parser) ![][]const u8 { 407 | if (self.curr().type == .RIGHT_PAREN) { 408 | self.cursor += 1; 409 | return &[_][]const u8{}; 410 | } 411 | 412 | var param_list = std.ArrayList([]const u8).init(self.allocator); 413 | 414 | try self.expect(.INT); 415 | self.cursor += 1; 416 | try param_list.append(self.curr().literal.?.string); 417 | self.cursor += 1; 418 | 419 | while (self.curr().type != .RIGHT_PAREN) { 420 | try self.expect(.COMMA); 421 | self.cursor += 1; 422 | try self.expect(.INT); 423 | self.cursor += 1; 424 | try param_list.append(self.curr().literal.?.string); 425 | self.cursor += 1; 426 | } 427 | 428 | try self.expect(.RIGHT_PAREN); 429 | self.cursor += 1; 430 | 431 | return try param_list.toOwnedSlice(); 432 | } 433 | 434 | fn parseFunctionArgs(self: *Parser) ![]*c_ast.Expression { 435 | if (self.curr().type == .RIGHT_PAREN) { 436 | self.cursor += 1; 437 | return &[_]*c_ast.Expression{}; 438 | } 439 | var param_list = std.ArrayList(*c_ast.Expression).init(self.allocator); 440 | 441 | try param_list.append(try self.parseExpression(0)); 442 | 443 | while (self.curr().type != .RIGHT_PAREN) { 444 | try self.expect(.COMMA); 445 | self.cursor += 1; 446 | try param_list.append(try self.parseExpression(0)); 447 | } 448 | 449 | try self.expect(.RIGHT_PAREN); 450 | self.cursor += 1; 451 | 452 | return try param_list.toOwnedSlice(); 453 | } 454 | 455 | fn parseBinop(self: *Parser) c_ast.BinaryOperator { 456 | switch (self.curr().type) { 457 | .PLUS => return .Add, 458 | .MINUS => return .Subtract, 459 | .STAR => return .Multiply, 460 | .SLASH => return .Divide, 461 | .PERCENTAGE => return .Remainder, 462 | 463 | .AMPERSAND => return .Bitwise_AND, 464 | .PIPE => return .Bitwise_OR, 465 | .CARET => return .Bitwise_XOR, 466 | .LEFT_SHIFT => return .Left_Shift, 467 | .RIGHT_SHIFT => return .Right_Shift, 468 | 469 | .LESS => return .Less, 470 | .LESS_EQUAL => return .Less_Or_Equal, 471 | .GREATER => return .Greater, 472 | .GREATER_EQUAL => return .Greater_Or_Equal, 473 | .EQUAL_EQUAL => return .Equal, 474 | .BANG_EQUAL => return .Not_Equal, 475 | .AMPERSAND_AMPERSAND => return .And, 476 | .PIPE_PIPE => return .Or, 477 | else => unreachable, 478 | } 479 | } 480 | 481 | fn precedence(self: *Parser, token: Token) i16 { 482 | _ = self; 483 | switch (token.type) { 484 | .EQUAL => return 1, 485 | .PIPE_PIPE => return 5, 486 | .AMPERSAND_AMPERSAND => return 10, 487 | .EQUAL_EQUAL, .BANG_EQUAL => return 30, 488 | .LESS, .LESS_EQUAL, .GREATER, .GREATER_EQUAL => return 35, 489 | .LEFT_SHIFT, .RIGHT_SHIFT => return 48, 490 | .AMPERSAND => return 47, 491 | .CARET => return 46, 492 | .PIPE => return 45, 493 | .PLUS, .MINUS => return 45, 494 | .STAR, .SLASH, .PERCENTAGE => return 50, 495 | else => unreachable, 496 | } 497 | } 498 | 499 | fn expect(self: *Parser, token_type: TokenType) !void { 500 | if (self.curr().type != token_type) { 501 | const msg = try std.fmt.allocPrint(diagnostics.arena.allocator(), "Syntax error. Expected token type {}. Got token type {}", .{ 502 | token_type, 503 | self.curr().type, 504 | }); 505 | diagnostics.addError(msg, self.curr().line); 506 | return error.SyntaxError; 507 | } 508 | } 509 | 510 | fn curr(self: *Parser) Token { 511 | return self.tokens[self.cursor]; 512 | } 513 | 514 | fn peek(self: *Parser, offset: i32) Token { 515 | return self.tokens[@intCast(@as(i32, @intCast(self.cursor)) + offset)]; 516 | } 517 | 518 | fn printCurr(self: *Parser) void { 519 | std.debug.print("Current token: {}\n", .{self.curr()}); 520 | } 521 | }; 522 | 523 | test "multi function + parameter syntax error" { 524 | const input = 525 | \\ int foo(int a, int b) 526 | \\{ 527 | \\ return a + b; 528 | \\} 529 | \\ 530 | \\int main() 531 | \\{ 532 | \\ foo(1, int 1); 533 | \\} 534 | \\} 535 | \\ 536 | ; 537 | 538 | const result = testing.cToSemanticAnalysis(input); 539 | try testing.expectError(error.SyntaxError, result); 540 | } 541 | -------------------------------------------------------------------------------- /compiler/src/middleend/gen.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const c_ast = @import("../ast/c.zig"); 3 | const asm_ast = @import("../ast/asm.zig"); 4 | const diagnostics = @import("../diagnostics.zig"); 5 | const RegisterAllocator = @import("register-allocator.zig").RegisterAllocator; 6 | 7 | pub const Generator = struct { 8 | allocator: std.mem.Allocator, 9 | program: c_ast.Program, 10 | instruction_buffer: std.ArrayList(asm_ast.Instruction), 11 | rd: asm_ast.Reg, 12 | rs1: asm_ast.Reg, 13 | rs2: asm_ast.Reg, 14 | immediate: i32 = 0, 15 | label: []const u8, 16 | variable_store: std.ArrayList([]const u8), 17 | if_counter: u32 = 0, 18 | short_circuit_counter: u32 = 0, 19 | reg_bitmap: std.bit_set.IntegerBitSet(6), 20 | function_line: usize = 0, 21 | 22 | ra: RegisterAllocator, 23 | 24 | pub fn init(program: c_ast.Program, allocator: std.mem.Allocator) Generator { 25 | return .{ 26 | .allocator = allocator, 27 | .program = program, 28 | .instruction_buffer = std.ArrayList(asm_ast.Instruction).init(allocator), 29 | .rd = asm_ast.Reg.t1, 30 | .rs1 = asm_ast.Reg.t1, 31 | .rs2 = asm_ast.Reg.t1, 32 | .label = "", 33 | .variable_store = std.ArrayList([]const u8).init(allocator), 34 | .reg_bitmap = std.bit_set.IntegerBitSet(6).initEmpty(), 35 | .ra = RegisterAllocator.init(allocator), 36 | }; 37 | } 38 | 39 | fn allocReg(self: *Generator) asm_ast.Reg { 40 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 }; 41 | 42 | for (allocation_order, 0..) |reg, i| { 43 | if (!self.reg_bitmap.isSet(i)) { 44 | self.reg_bitmap.set(i); 45 | return reg; 46 | } 47 | } 48 | 49 | @panic("can't allocate more than 6 registers yet"); 50 | } 51 | 52 | fn getSecondLastReg(self: *Generator) asm_ast.Reg { 53 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 }; 54 | 55 | for (allocation_order, 0..) |reg, i| { 56 | _ = reg; 57 | if (!self.reg_bitmap.isSet(i) or i == allocation_order.len - 1) { 58 | return allocation_order[i - 2]; 59 | } 60 | } 61 | 62 | @panic("no or 1 registers allocated"); 63 | } 64 | 65 | fn getLastReg(self: *Generator) asm_ast.Reg { 66 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 }; 67 | 68 | for (allocation_order, 0..) |reg, i| { 69 | _ = reg; 70 | if (!self.reg_bitmap.isSet(i) or i == allocation_order.len - 1) { 71 | if (i == 0) { 72 | @panic("no registers allocated"); 73 | } else { 74 | return allocation_order[i - 1]; 75 | } 76 | } 77 | } 78 | 79 | @panic("no registers allocated"); 80 | } 81 | 82 | fn freeLastReg(self: *Generator) void { 83 | const allocation_order = [_]asm_ast.Reg{ .a0, .a1, .t0, .t1, .t2, .t3 }; 84 | 85 | for (allocation_order, 0..) |reg, i| { 86 | _ = reg; 87 | if (!self.reg_bitmap.isSet(i)) { 88 | if (i == 0) { 89 | @panic("trying to free when all registers are already free"); 90 | } else { 91 | self.reg_bitmap.unset(i - 1); 92 | return; 93 | } 94 | } 95 | } 96 | } 97 | 98 | fn appendLabel(self: *Generator, label: []const u8) !void { 99 | try self.instruction_buffer.append(.{ .label = .{ .name = label } }); 100 | } 101 | 102 | fn appendInstr(self: *Generator, instr: asm_ast.InstructionType) void { 103 | const instr_converted = asm_ast.convert(instr); 104 | const instruction = switch (instr_converted) { 105 | .rtype => asm_ast.Instruction{ 106 | .rtype = .{ 107 | .instr = instr_converted.rtype, 108 | .destination = self.rd, 109 | .source1 = self.rs1, 110 | .source2 = self.rs2, 111 | }, 112 | }, 113 | .itype => asm_ast.Instruction{ 114 | .itype = .{ 115 | .instr = instr_converted.itype, 116 | .destination = self.rd, 117 | .source = self.rs1, 118 | .immediate = self.immediate, 119 | }, 120 | }, 121 | .btype => asm_ast.Instruction{ 122 | .btype = .{ 123 | .instr = instr_converted.btype, 124 | .source1 = self.rs1, 125 | .source2 = self.rs2, 126 | .label = self.label, 127 | }, 128 | }, 129 | .stype => asm_ast.Instruction{ 130 | .stype = .{ 131 | .immediate = self.immediate, 132 | .instr = instr_converted.stype, 133 | .source1 = self.rs1, 134 | .source2 = self.rs2, 135 | }, 136 | }, 137 | .jtype => asm_ast.Instruction{ 138 | .jtype = .{ 139 | .label = self.label, 140 | .instr = instr_converted.jtype, 141 | .destination = self.rd, 142 | }, 143 | }, 144 | else => @panic("fuck you"), 145 | }; 146 | self.instruction_buffer.append(instruction) catch @panic("Failed to append instruction"); 147 | } 148 | 149 | fn appendRType(self: *Generator, rtype: asm_ast.RType) !void { 150 | try self.instruction_buffer.append(.{ .rtype = rtype }); 151 | } 152 | 153 | fn appendIType(self: *Generator, itype: asm_ast.IType) !void { 154 | try self.instruction_buffer.append(.{ .itype = itype }); 155 | } 156 | 157 | fn appendSType(self: *Generator, stype: asm_ast.SType) !void { 158 | try self.instruction_buffer.append(.{ .stype = stype }); 159 | } 160 | 161 | fn appendBType(self: *Generator, btype: asm_ast.BType) !void { 162 | try self.instruction_buffer.append(.{ .btype = btype }); 163 | } 164 | 165 | fn appendJType(self: *Generator, jtype: asm_ast.JType) !void { 166 | try self.instruction_buffer.append(.{ .jtype = jtype }); 167 | } 168 | 169 | fn loadImmediate(self: *Generator, value: i32) !asm_ast.Reg { 170 | const dest_reg = self.ra.getTempRegister(self.function_line); 171 | const unsigned_val: u32 = @bitCast(value); 172 | const upper_bits: u20 = @truncate(unsigned_val >> 12); 173 | const lower_bits: u12 = @truncate(unsigned_val); 174 | 175 | var add_template = asm_ast.IType{ 176 | .instr = .ADDI, 177 | .destination = dest_reg, 178 | .source = .zero, 179 | .immediate = lower_bits, 180 | }; 181 | 182 | if (upper_bits != 0) { 183 | try self.instruction_buffer.append(.{ 184 | .utype = .{ 185 | .instr = .LUI, 186 | .destination = dest_reg, 187 | .immediate = upper_bits, 188 | }, 189 | }); 190 | 191 | add_template.source = dest_reg; 192 | } 193 | 194 | try self.instruction_buffer.append(.{ .itype = add_template }); 195 | 196 | return dest_reg; 197 | } 198 | 199 | fn appendOperator(self: *Generator, operator: c_ast.BinaryOperator, left: asm_ast.Reg, right: asm_ast.Reg) !void { 200 | var template = asm_ast.RType{ 201 | .instr = .ADD, 202 | .destination = left, 203 | .source1 = left, 204 | .source2 = right, 205 | }; 206 | 207 | switch (operator) { 208 | .Add => { 209 | try self.appendRType(template); 210 | }, 211 | .Subtract => { 212 | template.instr = .SUB; 213 | try self.appendRType(template); 214 | }, 215 | .Multiply => { 216 | template.instr = .MUL; 217 | try self.appendRType(template); 218 | }, 219 | .Divide => { 220 | template.instr = .DIV; 221 | try self.appendRType(template); 222 | }, 223 | .Remainder => { 224 | template.instr = .REM; 225 | try self.appendRType(template); 226 | }, 227 | .Bitwise_AND => { 228 | template.instr = .AND; 229 | try self.appendRType(template); 230 | }, 231 | .Bitwise_OR => { 232 | template.instr = .OR; 233 | try self.appendRType(template); 234 | }, 235 | .Bitwise_XOR => { 236 | template.instr = .XOR; 237 | try self.appendRType(template); 238 | }, 239 | .Left_Shift => { 240 | template.instr = .SLL; 241 | try self.appendRType(template); 242 | }, 243 | .Right_Shift => { 244 | template.instr = .SRL; 245 | try self.appendRType(template); 246 | }, 247 | .Less => { 248 | template.instr = .SLT; 249 | try self.appendRType(template); 250 | }, 251 | .Less_Or_Equal => { 252 | template.instr = .SLT; 253 | template.source1 = right; 254 | template.source2 = left; 255 | try self.appendRType(template); 256 | try self.appendIType(asm_ast.IType{ .immediate = 1, .source = left, .instr = .XORI, .destination = left }); 257 | }, 258 | .Greater => { 259 | template.source1 = right; 260 | template.source2 = left; 261 | template.instr = .SLT; 262 | try self.appendRType(template); 263 | }, 264 | .Greater_Or_Equal => { 265 | template.instr = .SLT; 266 | try self.appendRType(template); 267 | try self.appendIType(asm_ast.IType{ .immediate = 1, .source = left, .instr = .XORI, .destination = left }); 268 | }, 269 | .Equal => { 270 | template.instr = .SUB; 271 | try self.appendRType(template); 272 | try self.appendIType(asm_ast.IType{ .immediate = 1, .source = left, .instr = .SLTIU, .destination = left }); 273 | }, 274 | .Not_Equal => { 275 | var itype_template = asm_ast.IType{ .immediate = 1, .source = left, .instr = .SLTIU, .destination = left }; 276 | template.instr = .SUB; 277 | try self.appendRType(template); 278 | try self.appendIType(itype_template); 279 | itype_template.instr = .XORI; 280 | try self.appendIType(itype_template); 281 | }, 282 | .And, .Or => @panic("And and Or operators ran in appendOperator even though they have a separate function for generation. This shouldn't happen this is a bug"), 283 | } 284 | } 285 | 286 | fn getVariableId(self: *Generator, identifier: []const u8) !i32 { 287 | var variable: ?i32 = null; 288 | for (self.variable_store.items, 0..) |item, index| { 289 | if (std.mem.eql(u8, item, identifier)) { 290 | variable = @intCast(index); 291 | } 292 | } 293 | 294 | if (variable == null) { 295 | try self.variable_store.append(identifier); 296 | variable = @as(i32, @intCast(self.variable_store.items.len)) - 1; 297 | } 298 | 299 | return variable.?; 300 | } 301 | 302 | fn generateShortCircuitingBinary(self: *Generator, binary: c_ast.Binary, label_name: []const u8, is_and: bool) !asm_ast.Reg { 303 | const is_short_circuit = binary.operator.getType() == .SHORT_CIRCUIT; 304 | if (is_short_circuit == true) { 305 | const source = try self.generateShortCircuitingBinary(binary.left.*.binary, label_name, binary.operator == .And); 306 | try self.appendBType(asm_ast.BType{ .instr = if (is_and) .BEQ else .BNE, .label = label_name, .source1 = source, .source2 = .zero }); 307 | self.ra.expireRegister(source); 308 | } else { 309 | _ = try self.generateExpression(binary.left.*); 310 | } 311 | 312 | return try self.generateExpression(binary.right.*); 313 | } 314 | 315 | fn generateBinary(self: *Generator, binary: c_ast.Binary) !asm_ast.Reg { 316 | const optype = binary.operator.getType(); 317 | 318 | if (optype == .SHORT_CIRCUIT) { 319 | if (binary.left.binary.operator.getType() == .SHORT_CIRCUIT or 320 | (binary.left.* == .binary and binary.right.* == .binary and 321 | binary.left.binary.operator.getType() == .COMPARISON and 322 | binary.right.binary.operator.getType() == .COMPARISON)) 323 | { 324 | var label_name_list = std.ArrayList(u8).init(self.allocator); 325 | try label_name_list.appendSlice("short_circuit_end_"); 326 | try std.fmt.format(label_name_list.writer(), "{d}", .{self.short_circuit_counter}); 327 | self.short_circuit_counter += 1; 328 | const label_name = try label_name_list.toOwnedSlice(); 329 | 330 | const dest = try self.generateShortCircuitingBinary(binary, label_name, binary.operator == .And); 331 | 332 | try self.appendLabel(label_name); 333 | 334 | return dest; 335 | } else { 336 | @panic("Can't short-circuit non comparison operators"); 337 | } 338 | } else { 339 | // check if right side expression is a constant. if it is, evaluate left side first (non constant) 340 | // all expressions return a0. a1 is used for internal calculations. in other words, all right side expressions return a0, and left side return a1 341 | const right_is_const = binary.right.* == .constant; 342 | var left: ?asm_ast.Reg = null; 343 | if (right_is_const) { 344 | left = try self.generateExpression(binary.left.*); 345 | } 346 | 347 | const right = try self.generateExpression(binary.right.*); 348 | 349 | if (!right_is_const) { 350 | left = try self.generateExpression(binary.left.*); 351 | } 352 | 353 | try self.appendOperator(binary.operator, left.?, right); 354 | 355 | self.ra.expireRegister(right); 356 | return left.?; 357 | } 358 | } 359 | 360 | fn generateExpression(self: *Generator, exp: c_ast.Expression) anyerror!asm_ast.Reg { 361 | switch (exp) { 362 | .assignment => |assignment| { 363 | _ = try self.generateExpression(assignment.right.*); 364 | _ = try self.loadImmediate(try self.getVariableId(assignment.left.*.variable.identifier)); 365 | 366 | try self.appendSType(asm_ast.SType{ .immediate = 0, .source1 = self.getSecondLastReg(), .source2 = self.getLastReg(), .instr = .SW }); 367 | self.freeLastReg(); 368 | }, 369 | .variable => |variable| { 370 | _ = try self.loadImmediate(try self.getVariableId(variable.identifier)); 371 | 372 | try self.appendIType(asm_ast.IType{ .destination = self.getLastReg(), .immediate = 0, .instr = .LW, .source = self.getLastReg() }); 373 | }, 374 | .constant => |constant| { 375 | return try self.loadImmediate(constant); 376 | }, 377 | .binary => |binary| { 378 | return try self.generateBinary(binary); 379 | }, 380 | .function_call => |function_call| { 381 | var arg_counter: usize = 0; 382 | for (function_call.args) |arg| { 383 | _ = try self.generateExpression(arg.*); 384 | self.rs1 = .t1; 385 | self.rs2 = .zero; 386 | // scuffed as shit. please remove this asap i beg you 387 | switch (arg_counter) { 388 | 0 => self.rd = .a0, 389 | 1 => self.rd = .a1, 390 | 2 => self.rd = .a2, 391 | 3 => self.rd = .a3, 392 | 4 => self.rd = .a4, 393 | 5 => self.rd = .a5, 394 | 6 => self.rd = .a6, 395 | 7 => self.rd = .a7, 396 | else => diagnostics.addError("Having more than 8 function arguments not supported yet", 0), 397 | } 398 | self.appendInstr(.ADD); 399 | arg_counter += 1; 400 | } 401 | 402 | self.label = function_call.identifier; 403 | self.rd = .ra; 404 | self.appendInstr(.JAL); 405 | 406 | self.rs1 = .a0; 407 | self.rs2 = .zero; 408 | self.rd = .t1; 409 | self.appendInstr(.ADD); 410 | }, 411 | } 412 | 413 | @panic("not implemented yet"); 414 | } 415 | 416 | fn generateIf(self: *Generator, if_: c_ast.If) !void { 417 | var if_name_array = std.ArrayList(u8).init(self.allocator); 418 | defer if_name_array.deinit(); 419 | try std.fmt.format(if_name_array.writer(), "if_end_{d}", .{self.if_counter}); 420 | const if_name = try if_name_array.toOwnedSlice(); 421 | var else_name_array = std.ArrayList(u8).init(self.allocator); 422 | defer else_name_array.deinit(); 423 | try std.fmt.format(else_name_array.writer(), "else_end_{d}", .{self.if_counter}); 424 | const else_name = try else_name_array.toOwnedSlice(); 425 | self.if_counter += 1; 426 | 427 | _ = try self.generateExpression(if_.condition); 428 | 429 | try self.appendBType(asm_ast.BType{ .instr = .BEQ, .label = if_name, .source1 = .zero, .source2 = self.getLastReg() }); 430 | self.freeLastReg(); 431 | 432 | try self.generateStatement(if_.then.*); 433 | if (if_.else_ != null) { 434 | try self.appendJType(asm_ast.JType{ .destination = self.getLastReg(), .instr = .JAL, .label = else_name }); 435 | } 436 | self.freeLastReg(); 437 | 438 | try self.appendLabel(if_name); 439 | 440 | if (if_.else_ != null) { 441 | try self.generateStatement(if_.else_.?.*); 442 | 443 | try self.appendLabel(else_name); 444 | } 445 | } 446 | 447 | fn generateStatement(self: *Generator, statement: c_ast.Statement) anyerror!void { 448 | switch (statement) { 449 | .ret => |ret| { 450 | _ = try self.generateExpression(ret.exp); 451 | }, 452 | .exp => |exp| { 453 | _ = try self.generateExpression(exp); 454 | }, 455 | .if_ => |if_| { 456 | try self.generateIf(if_); 457 | }, 458 | .compound => |compound| { 459 | for (compound.block_items) |block_item| { 460 | switch (block_item) { 461 | .statement => { 462 | try self.generateStatement(block_item.statement); 463 | }, 464 | .declaration => { 465 | try self.generateDeclaration(block_item.declaration); 466 | }, 467 | } 468 | } 469 | }, 470 | .break_ => |break_| { 471 | const identifier = try std.fmt.allocPrint(self.allocator, "break_{s}", .{break_.identifier.?}); 472 | 473 | try self.appendJType(asm_ast.JType{ .label = identifier, .destination = .zero, .instr = .JAL }); 474 | }, 475 | .continue_ => |continue_| { 476 | const identifier = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{continue_.identifier.?}); 477 | 478 | try self.appendJType(asm_ast.JType{ .label = identifier, .destination = .zero, .instr = .JAL }); 479 | }, 480 | .do_while => |do_while| { 481 | const identifier_start = try std.fmt.allocPrint(self.allocator, "{s}_start", .{do_while.identifier.?}); 482 | const identifier_continue = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{do_while.identifier.?}); 483 | const identifier_break = try std.fmt.allocPrint(self.allocator, "break_{s}", .{do_while.identifier.?}); 484 | try self.appendLabel(identifier_start); 485 | 486 | try self.generateStatement(do_while.body.*); 487 | try self.appendLabel(identifier_continue); 488 | 489 | _ = try self.generateExpression(do_while.condition); 490 | 491 | try self.appendBType(asm_ast.BType{ .instr = .BNE, .label = identifier_start, .source1 = self.getLastReg(), .source2 = .zero }); 492 | 493 | try self.appendLabel(identifier_break); 494 | }, 495 | .for_ => |for_| { 496 | const identifier_start = try std.fmt.allocPrint(self.allocator, "{s}_start", .{for_.identifier.?}); 497 | const identifier_continue = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{for_.identifier.?}); 498 | const identifier_break = try std.fmt.allocPrint(self.allocator, "break_{s}", .{for_.identifier.?}); 499 | 500 | switch (for_.init) { 501 | .init_decl => try self.generateDeclaration(.{ .variable_declaration = for_.init.init_decl }), 502 | .init_exp => if (for_.init.init_exp != null) { 503 | _ = try self.generateExpression(for_.init.init_exp.?); 504 | }, 505 | } 506 | try self.appendLabel(identifier_start); 507 | 508 | if (for_.condition != null) _ = try self.generateExpression(for_.condition.?); 509 | 510 | try self.appendBType(asm_ast.BType{ .instr = .BEQ, .label = identifier_break, .source1 = self.getLastReg(), .source2 = .zero }); 511 | self.freeLastReg(); 512 | 513 | try self.generateStatement(for_.body.*); 514 | self.freeLastReg(); 515 | 516 | try self.appendLabel(identifier_continue); 517 | 518 | if (for_.post != null) _ = try self.generateExpression(for_.post.?); 519 | 520 | try self.appendJType(asm_ast.JType{ .label = identifier_start, .destination = .zero, .instr = .JAL }); 521 | try self.appendLabel(identifier_break); 522 | self.freeLastReg(); 523 | }, 524 | .while_ => |while_| { 525 | const identifier_continue = try std.fmt.allocPrint(self.allocator, "continue_{s}", .{while_.identifier.?}); 526 | const identifier_break = try std.fmt.allocPrint(self.allocator, "break_{s}", .{while_.identifier.?}); 527 | try self.appendLabel(identifier_continue); 528 | 529 | _ = try self.generateExpression(while_.condition); 530 | 531 | try self.appendBType(asm_ast.BType{ .instr = .BEQ, .label = identifier_break, .source1 = self.getLastReg(), .source2 = .zero }); 532 | 533 | try self.generateStatement(while_.body.*); 534 | 535 | try self.appendJType(asm_ast.JType{ .destination = .zero, .instr = .JAL, .label = identifier_continue }); 536 | 537 | try self.appendLabel(identifier_break); 538 | }, 539 | } 540 | } 541 | 542 | fn generateDeclaration(self: *Generator, declaration: c_ast.Declaration) !void { 543 | switch (declaration) { 544 | .variable_declaration => { 545 | if (declaration.variable_declaration.initial == null) return else { 546 | _ = try self.generateExpression(declaration.variable_declaration.initial.?); 547 | _ = try self.loadImmediate(try self.getVariableId(declaration.variable_declaration.identifier)); 548 | 549 | try self.appendSType(asm_ast.SType{ .immediate = 0, .source1 = self.getSecondLastReg(), .source2 = self.getLastReg(), .instr = .SW }); 550 | self.freeLastReg(); 551 | self.freeLastReg(); 552 | } 553 | }, 554 | else => {}, 555 | } 556 | } 557 | 558 | fn generateBlock(self: *Generator, block: c_ast.Block) !void { 559 | for (block.block_items) |block_item| { 560 | switch (block_item) { 561 | .statement => { 562 | try self.generateStatement(block_item.statement); 563 | }, 564 | .declaration => { 565 | try self.generateDeclaration(block_item.declaration); 566 | }, 567 | } 568 | self.function_line += 1; 569 | } 570 | } 571 | 572 | fn generateFunction(self: *Generator, function: c_ast.FunctionDeclaration) !void { 573 | try self.ra.scanFunction(function); 574 | self.function_line = 0; 575 | 576 | if (std.mem.eql(u8, function.identifier, "main")) { 577 | try self.appendLabel("_start"); 578 | } else { 579 | try self.appendLabel(function.identifier); 580 | } 581 | 582 | try self.appendIType(asm_ast.IType{ .destination = .sp, .source = .sp, .immediate = -1, .instr = .ADDI }); 583 | try self.appendSType(asm_ast.SType{ .instr = .SW, .source1 = .ra, .immediate = 0, .source2 = .sp }); 584 | 585 | if (function.body != null) try self.generateBlock(function.body.?); 586 | 587 | try self.appendIType(asm_ast.IType{ .instr = .LW, .destination = .ra, .source = .sp, .immediate = 0 }); 588 | try self.appendIType(asm_ast.IType{ .destination = .sp, .source = .sp, .immediate = 1, .instr = .ADDI }); 589 | 590 | if (!std.mem.eql(u8, function.identifier, "main")) { 591 | self.rs1 = .ra; 592 | self.rd = .zero; 593 | self.immediate = 0; 594 | self.appendInstr(.JALR); 595 | } 596 | } 597 | 598 | pub fn generate(self: *Generator) !asm_ast.Program { 599 | for (self.program.function) |function| { 600 | try self.generateFunction(function); 601 | } 602 | 603 | return .{ 604 | .function = .{ 605 | .identifier = self.program.function[0].identifier, 606 | .instructions = try self.instruction_buffer.toOwnedSlice(), 607 | }, 608 | }; 609 | } 610 | }; 611 | -------------------------------------------------------------------------------- /assembler/src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const instr_types = @import("instruction-types.zig"); 3 | const instr_getters = @import("instruction-getters.zig"); 4 | 5 | const Instruction = union(enum) { 6 | RType: struct { 7 | instruction: instr_types.RTypeInstruction, 8 | rd: u8, 9 | rs1: u8, 10 | rs2: u8, 11 | }, 12 | IType: struct { 13 | instruction: instr_types.ITypeInstruction, 14 | rd: u8, 15 | rs1: u8, 16 | imm: i12, 17 | }, 18 | SType: struct { 19 | instruction: instr_types.STypeInstruction, 20 | rs1: u8, 21 | rs2: u8, 22 | imm: i12, 23 | }, 24 | BType: struct { 25 | instruction: instr_types.BTypeInstruction, 26 | rs1: u8, 27 | rs2: u8, 28 | imm: i12, 29 | }, 30 | UType: struct { 31 | instruction: instr_types.UTypeInstruction, 32 | rd: u8, 33 | imm: i20, 34 | }, 35 | JType: struct { 36 | instruction: instr_types.JTypeInstruction, 37 | rd: u8, 38 | imm: i20, 39 | }, 40 | fn encode(self: *const Instruction) !u32 { 41 | return switch (self.*) { 42 | .RType => |rtype| { 43 | const opcode = 0b0110011; 44 | 45 | const rd = @as(u32, rtype.rd); 46 | const rs1 = @as(u32, rtype.rs1); 47 | const rs2 = @as(u32, rtype.rs2); 48 | 49 | const funct3: u32 = switch (rtype.instruction) { 50 | .ADD, .SUB, .MUL => 0b000, 51 | .XOR, .DIV => 0b100, 52 | .OR, .REM => 0b110, 53 | .AND, .REMU => 0b111, 54 | .SLL, .MULH => 0b001, 55 | .SRL, .SRA, .DIVU => 0b101, 56 | .SLT, .MULSU => 0b010, 57 | .SLTU, .MULU => 0b011, 58 | }; 59 | 60 | const funct7: u32 = switch (rtype.instruction) { 61 | .SUB, .SRA => 0b0100000, 62 | .MUL, .MULH, .MULSU, .MULU, .DIV, .DIVU, .REM, .REMU => 0b0000001, 63 | else => 0b0000000, 64 | }; 65 | 66 | return opcode | 67 | (rd << 7) | 68 | (funct3 << 12) | 69 | (rs1 << 15) | 70 | (rs2 << 20) | 71 | (funct7 << 25); 72 | }, 73 | .IType => |itype| { 74 | const opcode: u32 = switch (itype.instruction) { 75 | .LB, .LH, .LW, .LBU, .LHU => 0b0000011, 76 | .JALR => 0b1100111, 77 | else => 0b0010011, 78 | }; 79 | 80 | const rd = @as(u32, itype.rd); 81 | const rs1 = @as(u32, itype.rs1); 82 | 83 | const imm_bits: u12 = @bitCast(itype.imm); 84 | const imm = switch (itype.instruction) { 85 | .SLLI, .SRLI => @as(u32, imm_bits & 0x1F), 86 | .SRAI => @as(u32, (imm_bits & 0x1F) | 0x400), 87 | else => @as(u32, imm_bits), 88 | }; 89 | 90 | const funct3: u32 = switch (itype.instruction) { 91 | .ADDI => 0b000, 92 | .XORI => 0b100, 93 | .ORI => 0b110, 94 | .ANDI => 0b111, 95 | .SLLI => 0b001, 96 | .SRLI, .SRAI => 0b101, 97 | .SLTI => 0b010, 98 | .SLTIU => 0b011, 99 | 100 | .LB => 0b000, 101 | .LH => 0b001, 102 | .LW => 0b010, 103 | .LBU => 0b100, 104 | .LHU => 0b101, 105 | .JALR => 0b000, 106 | }; 107 | 108 | return opcode | 109 | (rd << 7) | 110 | (funct3 << 12) | 111 | (rs1 << 15) | 112 | (imm << 20); 113 | }, 114 | .SType => |stype| { 115 | const opcode = 0b0100011; 116 | 117 | const rs1 = @as(u32, stype.rs1); 118 | const rs2 = @as(u32, stype.rs2); 119 | const imm_bits: u12 = @bitCast(stype.imm); 120 | 121 | const funct3: u32 = switch (stype.instruction) { 122 | .SB => 0b000, 123 | .SH => 0b001, 124 | .SW => 0b010, 125 | }; 126 | 127 | const imm_lo = imm_bits & 0x1F; 128 | const imm_hi = @as(u32, (imm_bits >> 5) & 0x7F); 129 | 130 | return opcode | 131 | (imm_lo << 7) | 132 | (funct3 << 12) | 133 | (rs1 << 15) | 134 | (rs2 << 20) | 135 | (imm_hi << 25); 136 | }, 137 | .BType => |btype| { 138 | const opcode = 0b1100011; 139 | 140 | const rs1 = @as(u32, btype.rs1); 141 | const rs2 = @as(u32, btype.rs2); 142 | const imm_bits: u12 = @bitCast(btype.imm); 143 | 144 | const imm_lo = imm_bits & 0x1F; 145 | const imm_hi = @as(u32, (imm_bits >> 5) & 0x7F); 146 | 147 | const funct3: u32 = switch (btype.instruction) { 148 | .BEQ => 0b000, 149 | .BNE => 0b001, 150 | .BLT => 0b100, 151 | .BGE => 0b101, 152 | .BLTU => 0b110, 153 | .BGEU => 0b111, 154 | }; 155 | 156 | return opcode | 157 | (imm_lo << 7) | 158 | (funct3 << 12) | 159 | (rs1 << 15) | 160 | (rs2 << 20) | 161 | (imm_hi << 25); 162 | }, 163 | .UType => |utype| { 164 | const opcode: u32 = switch (utype.instruction) { 165 | .LUI => 0b0110111, 166 | .AUIPC => 0b0010111, 167 | }; 168 | 169 | const rd = @as(u32, utype.rd); 170 | const imm_bits: u20 = @bitCast(utype.imm); 171 | const imm = @as(u32, imm_bits); 172 | 173 | return opcode | 174 | (rd << 7) | 175 | (imm << 12); 176 | }, 177 | .JType => |jtype| { 178 | const opcode = 0b1101111; 179 | const rd = @as(u32, jtype.rd); 180 | 181 | const imm_bits: u20 = @bitCast(jtype.imm); 182 | const imm = @as(u32, imm_bits); 183 | 184 | return opcode | 185 | (rd << 7) | 186 | (imm << 12); 187 | }, 188 | }; 189 | } 190 | }; 191 | 192 | fn splitStringIntoLines(allocator: *const std.mem.Allocator, input: []const u8) ![][]const u8 { 193 | var lines = std.ArrayList([]const u8).init(allocator.*); 194 | defer lines.deinit(); 195 | var tokenizer = std.mem.tokenize(u8, input, "\n"); 196 | while (tokenizer.next()) |line| { 197 | try lines.append(line); 198 | } 199 | return lines.toOwnedSlice(); 200 | } 201 | 202 | fn splitStringByWhitespace(allocator: *const std.mem.Allocator, input: []const u8) ![][]const u8 { 203 | var tokens = std.ArrayList([]const u8).init(allocator.*); 204 | defer tokens.deinit(); 205 | var tokenizer = std.mem.tokenize(u8, input, " \t\n\r"); 206 | while (tokenizer.next()) |token| { 207 | try tokens.append(token); 208 | } 209 | return tokens.toOwnedSlice(); 210 | } 211 | 212 | fn assemble(allocator: *const std.mem.Allocator, source: []const u8) !std.ArrayList(u32) { 213 | const lines = try splitStringIntoLines(allocator, source); 214 | defer allocator.free(lines); 215 | 216 | var encoded = std.ArrayList(u32).init(allocator.*); 217 | 218 | if (!@import("builtin").is_test) { 219 | const jal_str = "jal zero _start"; 220 | 221 | const jal_tokens = try splitStringByWhitespace(allocator, jal_str); 222 | defer allocator.free(jal_tokens); 223 | 224 | var jal_instruction = try parseInstruction(allocator, jal_tokens, lines, 0); 225 | jal_instruction.JType.imm += 1; 226 | try encoded.append(try jal_instruction.encode()); 227 | } 228 | 229 | for (lines, 0..) |line, index| { 230 | if (line.len > 0 and line[0] == ';' or line[line.len - 1] == ':') continue; 231 | 232 | const tokens = try splitStringByWhitespace(allocator, line); 233 | defer allocator.free(tokens); 234 | const pseudo_parsed_tokens = try parsePseudoInstruction(allocator, tokens); 235 | const instruction = try parseInstruction(allocator, pseudo_parsed_tokens, lines, index); 236 | try encoded.append(try instruction.encode()); 237 | } 238 | 239 | return encoded; 240 | } 241 | 242 | const InstructionType = enum { 243 | RType, 244 | IType, 245 | SType, 246 | BType, 247 | UType, 248 | JType, 249 | None, 250 | }; 251 | 252 | fn getInstructionType(instruction: []const u8, instruction_sets: struct { 253 | rtype: []const []const u8, 254 | itype: []const []const u8, 255 | stype: []const []const u8, 256 | btype: []const []const u8, 257 | utype: []const []const u8, 258 | jtype: []const []const u8, 259 | }) InstructionType { 260 | for (instruction_sets.rtype) |candidate| { 261 | if (std.mem.eql(u8, instruction, candidate)) { 262 | return .RType; 263 | } 264 | } 265 | for (instruction_sets.itype) |candidate| { 266 | if (std.mem.eql(u8, instruction, candidate)) { 267 | return .IType; 268 | } 269 | } 270 | for (instruction_sets.stype) |candidate| { 271 | if (std.mem.eql(u8, instruction, candidate)) { 272 | return .SType; 273 | } 274 | } 275 | for (instruction_sets.btype) |candidate| { 276 | if (std.mem.eql(u8, instruction, candidate)) { 277 | return .BType; 278 | } 279 | } 280 | for (instruction_sets.utype) |candidate| { 281 | if (std.mem.eql(u8, instruction, candidate)) { 282 | return .UType; 283 | } 284 | } 285 | for (instruction_sets.jtype) |candidate| { 286 | if (std.mem.eql(u8, instruction, candidate)) { 287 | return .JType; 288 | } 289 | } 290 | return .None; 291 | } 292 | 293 | fn createRegMap(allocator: *const std.mem.Allocator) !std.StringHashMap(u8) { 294 | const reg_names = [_][]const u8{ 295 | "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", 296 | "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", 297 | "t3", "t4", "t5", "t6", 298 | }; 299 | var map = std.StringHashMap(u8).init(allocator.*); 300 | for (reg_names, 0..) |name, index| { 301 | try map.put(name, @as(u8, @intCast(index))); 302 | } 303 | try map.put("fp", 8); 304 | return map; 305 | } 306 | 307 | fn parseRegister(reg: []const u8, reg_map: *const std.StringHashMap(u8)) !u8 { 308 | if (reg[0] == 'x') { 309 | return try std.fmt.parseInt(u8, reg[1..], 10); 310 | } else { 311 | return reg_map.get(reg).?; 312 | } 313 | } 314 | 315 | fn findLabelRelativeIndex(lines: [][]const u8, name: []const u8, index: usize) i32 { 316 | var found: ?usize = null; 317 | var buffer: [33]u8 = undefined; 318 | const label_name = std.fmt.bufPrint(&buffer, "{s}:", .{name}) catch unreachable; 319 | 320 | for (lines, 0..) |line, found_index| { 321 | if (std.mem.eql(u8, line[0..line.len], label_name)) { 322 | found = found_index; 323 | break; 324 | } 325 | } 326 | 327 | if (found == null) std.debug.panic("Label of name {s} not found", .{label_name}); 328 | const found_index = found.?; 329 | 330 | var labels_encountered: usize = 0; 331 | 332 | if (found_index < index) { 333 | for (lines[found_index..index]) |line| { 334 | if (line.len > 0 and line[line.len - 1] == ':') { 335 | labels_encountered += 1; 336 | } 337 | } 338 | 339 | return -@as(i32, @intCast(index - found_index - labels_encountered)); 340 | } else { 341 | for (lines[index..found_index]) |line| { 342 | if (line.len > 0 and line[line.len - 1] == ':') { 343 | labels_encountered += 1; 344 | } 345 | } 346 | 347 | return @as(i32, @intCast(found_index - index - labels_encountered)); 348 | } 349 | } 350 | 351 | fn parsePseudoInstruction(allocator: *const std.mem.Allocator, tokens: [][]const u8) ![][]const u8 { 352 | if (tokens.len == 0) return tokens; 353 | 354 | const pseudo = tokens[0]; 355 | 356 | if (std.mem.eql(u8, pseudo, "ret")) { 357 | var result = try allocator.alloc([]const u8, 4); 358 | result[0] = "jalr"; 359 | result[1] = "x0"; 360 | result[2] = "x1"; 361 | result[3] = "0"; 362 | return result; 363 | } 364 | 365 | return tokens; 366 | } 367 | 368 | fn parseInstruction(allocator: *const std.mem.Allocator, tokens: [][]const u8, lines: [][]const u8, index: usize) !Instruction { 369 | var reg_map = try createRegMap(allocator); 370 | defer reg_map.deinit(); 371 | 372 | const instruction_token = tokens[0]; 373 | const instruction_sets = .{ 374 | .rtype = &[_][]const u8{ "add", "sub", "mul", "div", "rem", "sll", "slt", "sltu", "xor", "srl", "sra", "or", "and" }, 375 | .itype = &[_][]const u8{ "addi", "muli", "divi", "slti", "sltiu", "xori", "andi", "ori", "slli", "srli", "srai", "lb", "lh", "lw", "lbu", "lhu", "jalr" }, 376 | .stype = &[_][]const u8{ "sb", "sh", "sw" }, 377 | .btype = &[_][]const u8{ "beq", "bne", "blt", "bge", "bltu", "bgeu" }, 378 | .utype = &[_][]const u8{ "lui", "auipc" }, 379 | .jtype = &[_][]const u8{"jal"}, 380 | }; 381 | 382 | var instruction: Instruction = undefined; 383 | 384 | switch (getInstructionType(instruction_token, instruction_sets)) { 385 | .RType => { 386 | instruction = .{ .RType = .{ 387 | .instruction = try instr_getters.getRTypeInstruction(instruction_token), 388 | .rd = try parseRegister(tokens[1], ®_map), 389 | .rs1 = try parseRegister(tokens[2], ®_map), 390 | .rs2 = try parseRegister(tokens[3], ®_map), 391 | } }; 392 | }, 393 | .IType => { 394 | const base_instruction = try instr_getters.getITypeInstruction(instruction_token); 395 | 396 | const load_instructions = [_][]const u8{ "lb", "lh", "lw", "lbu", "lhu" }; 397 | const is_load = for (load_instructions) |load_instr| { 398 | if (std.mem.eql(u8, instruction_token, load_instr)) break true; 399 | } else false; 400 | 401 | if (is_load) { 402 | const rd = try parseRegister(tokens[1], ®_map); 403 | 404 | const offset_reg = tokens[2]; 405 | const paren_idx = std.mem.indexOf(u8, offset_reg, "(").?; 406 | const imm = try std.fmt.parseInt(i12, offset_reg[0..paren_idx], 10); 407 | 408 | const rs1_str = offset_reg[paren_idx + 1 .. offset_reg.len - 1]; 409 | const rs1 = try parseRegister(rs1_str, ®_map); 410 | 411 | instruction = .{ .IType = .{ 412 | .instruction = base_instruction, 413 | .rd = rd, 414 | .rs1 = rs1, 415 | .imm = imm, 416 | } }; 417 | } else if (std.mem.eql(u8, instruction_token, "jalr")) { 418 | const rd = try parseRegister(tokens[1], ®_map); 419 | const rs1 = try parseRegister(tokens[2], ®_map); 420 | const imm = try std.fmt.parseInt(i12, tokens[3], 10); 421 | 422 | instruction = .{ .IType = .{ 423 | .instruction = base_instruction, 424 | .rd = rd, 425 | .rs1 = rs1, 426 | .imm = imm, 427 | } }; 428 | } else { 429 | instruction = .{ .IType = .{ 430 | .instruction = base_instruction, 431 | .rd = try parseRegister(tokens[1], ®_map), 432 | .rs1 = try parseRegister(tokens[2], ®_map), 433 | .imm = try std.fmt.parseInt(i12, tokens[3], 10), 434 | } }; 435 | } 436 | }, 437 | .SType => { 438 | const rs2 = try parseRegister(tokens[1], ®_map); 439 | const offset_rs1 = tokens[2]; 440 | const paren_idx = std.mem.indexOf(u8, offset_rs1, "(").?; 441 | const imm = try std.fmt.parseInt(i12, offset_rs1[0..paren_idx], 10); 442 | const rs1_str = offset_rs1[paren_idx + 1 .. offset_rs1.len - 1]; 443 | const rs1 = try parseRegister(rs1_str, ®_map); 444 | 445 | instruction = .{ 446 | .SType = .{ 447 | .instruction = try instr_getters.getSTypeInstruction(instruction_token), 448 | .rs1 = rs1, 449 | .rs2 = rs2, 450 | .imm = imm, 451 | }, 452 | }; 453 | }, 454 | .BType => { 455 | const imm: i12 = @intCast(findLabelRelativeIndex(lines, tokens[3], index)); 456 | 457 | instruction = .{ 458 | .BType = .{ 459 | .instruction = try instr_getters.getBTypeInstruction(instruction_token), 460 | .rs1 = try parseRegister(tokens[1], ®_map), 461 | .rs2 = try parseRegister(tokens[2], ®_map), 462 | .imm = imm, 463 | }, 464 | }; 465 | }, 466 | .UType => { 467 | instruction = .{ 468 | .UType = .{ 469 | .instruction = try instr_getters.getUTypeInstruction(instruction_token), 470 | .rd = try parseRegister(tokens[1], ®_map), 471 | .imm = try std.fmt.parseInt(i20, tokens[2], 10), 472 | }, 473 | }; 474 | }, 475 | .JType => { 476 | const imm: i20 = @intCast(findLabelRelativeIndex(lines, tokens[2], index)); 477 | 478 | instruction = .{ 479 | .JType = .{ 480 | .instruction = try instr_getters.getJTypeInstruction(instruction_token), 481 | .rd = try parseRegister(tokens[1], ®_map), 482 | .imm = imm, 483 | }, 484 | }; 485 | }, 486 | 487 | else => return error.UnknownInstruction, 488 | } 489 | 490 | return instruction; 491 | } 492 | 493 | pub fn main() !void { 494 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 495 | defer arena.deinit(); 496 | const allocator = arena.allocator(); 497 | 498 | const args = try std.process.argsAlloc(std.heap.page_allocator); 499 | defer std.process.argsFree(std.heap.page_allocator, args); 500 | 501 | const input_path = args[1]; 502 | const output_path = args[2]; 503 | 504 | const file = try std.fs.cwd().openFile(input_path, .{}); 505 | defer file.close(); 506 | 507 | const source_code = try file.readToEndAlloc(std.heap.page_allocator, 1024 * 1024); 508 | defer std.heap.page_allocator.free(source_code); 509 | 510 | const machine_code = try assemble(&allocator, source_code); 511 | 512 | const output_file = try std.fs.cwd().createFile(output_path, .{ 513 | .read = true, 514 | .truncate = true, 515 | }); 516 | defer output_file.close(); 517 | 518 | var buf: [4]u8 = undefined; 519 | for (machine_code.items) |code| { 520 | buf[0] = @truncate(code >> 24); 521 | buf[1] = @truncate(code >> 16); 522 | buf[2] = @truncate(code >> 8); 523 | buf[3] = @truncate(code); 524 | try output_file.writeAll(&buf); 525 | } 526 | } 527 | 528 | test "add" { 529 | const machine_code = try assemble(&std.testing.allocator, "add ra sp gp"); 530 | defer machine_code.deinit(); 531 | try std.testing.expectEqual(@as(u32, 0x3100B3), machine_code.items[0]); 532 | } 533 | 534 | test "sub" { 535 | const machine_code = try assemble(&std.testing.allocator, "sub tp t0 t1"); 536 | defer machine_code.deinit(); 537 | try std.testing.expectEqual(@as(u32, 0x40628233), machine_code.items[0]); 538 | } 539 | 540 | test "sll" { 541 | const machine_code = try assemble(&std.testing.allocator, "sll t2 s0 fp"); 542 | defer machine_code.deinit(); 543 | try std.testing.expectEqual(@as(u32, 0x8413B3), machine_code.items[0]); 544 | } 545 | 546 | test "slt" { 547 | const machine_code = try assemble(&std.testing.allocator, "slt s1 a0 a1"); 548 | defer machine_code.deinit(); 549 | try std.testing.expectEqual(@as(u32, 0xB524B3), machine_code.items[0]); 550 | } 551 | 552 | test "sltu" { 553 | const machine_code = try assemble(&std.testing.allocator, "sltu a2 a3 a4"); 554 | defer machine_code.deinit(); 555 | try std.testing.expectEqual(@as(u32, 0xE6B633), machine_code.items[0]); 556 | } 557 | 558 | test "xor" { 559 | const machine_code = try assemble(&std.testing.allocator, "xor a5 a6 a7"); 560 | defer machine_code.deinit(); 561 | try std.testing.expectEqual(@as(u32, 0x11847B3), machine_code.items[0]); 562 | } 563 | 564 | test "srl" { 565 | const machine_code = try assemble(&std.testing.allocator, "srl s2 s3 s4"); 566 | defer machine_code.deinit(); 567 | try std.testing.expectEqual(@as(u32, 0x149D933), machine_code.items[0]); 568 | } 569 | 570 | test "sra" { 571 | const machine_code = try assemble(&std.testing.allocator, "sra s5 s6 s7"); 572 | defer machine_code.deinit(); 573 | try std.testing.expectEqual(@as(u32, 0x417B5AB3), machine_code.items[0]); 574 | } 575 | 576 | test "or" { 577 | const machine_code = try assemble(&std.testing.allocator, "or s8 s9 s10"); 578 | defer machine_code.deinit(); 579 | try std.testing.expectEqual(@as(u32, 0x1ACEC33), machine_code.items[0]); 580 | } 581 | 582 | test "and" { 583 | const machine_code = try assemble(&std.testing.allocator, "and t3 t4 t5"); 584 | defer machine_code.deinit(); 585 | try std.testing.expectEqual(@as(u32, 0x1EEFE33), machine_code.items[0]); 586 | } 587 | 588 | test "addi" { 589 | const machine_code = try assemble(&std.testing.allocator, "addi t6 ra 3"); 590 | defer machine_code.deinit(); 591 | try std.testing.expectEqual(@as(u32, 0x308F93), machine_code.items[0]); 592 | } 593 | 594 | test "slti" { 595 | const machine_code = try assemble(&std.testing.allocator, "slti sp sp 3"); 596 | defer machine_code.deinit(); 597 | try std.testing.expectEqual(@as(u32, 0x312113), machine_code.items[0]); 598 | } 599 | 600 | test "sltiu" { 601 | const machine_code = try assemble(&std.testing.allocator, "sltiu a0 a0 3"); 602 | defer machine_code.deinit(); 603 | try std.testing.expectEqual(@as(u32, 0x353513), machine_code.items[0]); 604 | } 605 | 606 | test "xori" { 607 | const machine_code = try assemble(&std.testing.allocator, "xori a1 a1 3"); 608 | defer machine_code.deinit(); 609 | try std.testing.expectEqual(@as(u32, 0x35C593), machine_code.items[0]); 610 | } 611 | 612 | test "ori" { 613 | const machine_code = try assemble(&std.testing.allocator, "ori a2 a2 3"); 614 | defer machine_code.deinit(); 615 | try std.testing.expectEqual(@as(u32, 0x366613), machine_code.items[0]); 616 | } 617 | 618 | test "andi" { 619 | const machine_code = try assemble(&std.testing.allocator, "andi a3 a3 3"); 620 | defer machine_code.deinit(); 621 | try std.testing.expectEqual(@as(u32, 0x36F693), machine_code.items[0]); 622 | } 623 | 624 | test "slli" { 625 | const machine_code = try assemble(&std.testing.allocator, "slli a4 a4 3"); 626 | defer machine_code.deinit(); 627 | try std.testing.expectEqual(@as(u32, 0x371713), machine_code.items[0]); 628 | } 629 | 630 | test "srai" { 631 | const machine_code = try assemble(&std.testing.allocator, "srai a6 a6 3"); 632 | defer machine_code.deinit(); 633 | try std.testing.expectEqual(@as(u32, 0x40385813), machine_code.items[0]); 634 | } 635 | 636 | test "lb" { 637 | const machine_code = try assemble(&std.testing.allocator, "lb x19 24(x20)"); 638 | defer machine_code.deinit(); 639 | try std.testing.expectEqual(@as(u32, 0x018A0983), machine_code.items[0]); 640 | } 641 | 642 | test "lh" { 643 | const machine_code = try assemble(&std.testing.allocator, "lh x21 -32(x22)"); 644 | defer machine_code.deinit(); 645 | try std.testing.expectEqual(@as(u32, 0xFE0B1A83), machine_code.items[0]); 646 | } 647 | 648 | test "lw" { 649 | const machine_code = try assemble(&std.testing.allocator, "lw x23 64(x24)"); 650 | defer machine_code.deinit(); 651 | try std.testing.expectEqual(@as(u32, 0x040C2B83), machine_code.items[0]); 652 | } 653 | 654 | test "lbu" { 655 | const machine_code = try assemble(&std.testing.allocator, "lbu x25 16(x26)"); 656 | defer machine_code.deinit(); 657 | try std.testing.expectEqual(@as(u32, 0x010D4C83), machine_code.items[0]); 658 | } 659 | 660 | test "lhu" { 661 | const machine_code = try assemble(&std.testing.allocator, "lhu x27 -128(x28)"); 662 | defer machine_code.deinit(); 663 | try std.testing.expectEqual(@as(u32, 0xF80E5D83), machine_code.items[0]); 664 | } 665 | 666 | test "sb" { 667 | const machine_code = try assemble(&std.testing.allocator, "sb s4 0(s4)"); 668 | defer machine_code.deinit(); 669 | try std.testing.expectEqual(@as(u32, 0x14a0023), machine_code.items[0]); 670 | } 671 | 672 | test "sh" { 673 | const machine_code = try assemble(&std.testing.allocator, "sh s5 2(s5)"); 674 | defer machine_code.deinit(); 675 | try std.testing.expectEqual(@as(u32, 0x15a9123), machine_code.items[0]); 676 | } 677 | 678 | test "sw" { 679 | const machine_code = try assemble(&std.testing.allocator, "sw s6 3(s6)"); 680 | defer machine_code.deinit(); 681 | try std.testing.expectEqual(@as(u32, 0x16b21a3), machine_code.items[0]); 682 | } 683 | 684 | // TODO: these are more than likely broken, ai generated. fix them 685 | 686 | // test "beq" { 687 | // const source = 688 | // \\beq s7 s7 label 689 | // \\ addi x0 x0 0 690 | // \\ addi x0 x0 0 691 | // \\label: 692 | // \\ addi x0 x0 0 693 | // \\ 694 | // ; 695 | // const machine_code = try assemble(&std.testing.allocator, source); 696 | // defer machine_code.deinit(); 697 | 698 | // try std.testing.expectEqual(@as(u32, 0x17b8163), machine_code.items[0]); 699 | 700 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]); 701 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]); 702 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]); 703 | // } 704 | 705 | // test "bne" { 706 | // const source = 707 | // \\bne t0 t0 label 708 | // \\ addi x0 x0 0 709 | // \\ addi x0 x0 0 710 | // \\label: 711 | // \\ addi x0 x0 0 712 | // \\ 713 | // ; 714 | // const machine_code = try assemble(&std.testing.allocator, source); 715 | // defer machine_code.deinit(); 716 | 717 | // try std.testing.expectEqual(@as(u32, 0x529163), machine_code.items[0]); 718 | 719 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]); 720 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]); 721 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]); 722 | // } 723 | 724 | // test "blt" { 725 | // const source = 726 | // \\blt t1 t1 label 727 | // \\ addi x0 x0 0 728 | // \\ addi x0 x0 0 729 | // \\label: 730 | // \\ addi x0 x0 0 731 | // \\ 732 | // ; 733 | // const machine_code = try assemble(&std.testing.allocator, source); 734 | // defer machine_code.deinit(); 735 | 736 | // try std.testing.expectEqual(@as(u32, 0x634263), machine_code.items[0]); 737 | 738 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]); 739 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]); 740 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]); 741 | // } 742 | 743 | // test "bge" { 744 | // const source = 745 | // \\bge t2 t2 label 746 | // \\ addi x0 x0 0 747 | // \\ addi x0 x0 0 748 | // \\label: 749 | // \\ addi x0 x0 0 750 | // \\ 751 | // ; 752 | // const machine_code = try assemble(&std.testing.allocator, source); 753 | // defer machine_code.deinit(); 754 | 755 | // try std.testing.expectEqual(@as(u32, 0x73d163), machine_code.items[0]); 756 | 757 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]); 758 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]); 759 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]); 760 | // } 761 | 762 | // test "bltu" { 763 | // const source = 764 | // \\bltu t3 t3 label 765 | // \\ addi x0 x0 0 766 | // \\ addi x0 x0 0 767 | // \\label: 768 | // \\ addi x0 x0 0 769 | // \\ 770 | // ; 771 | // const machine_code = try assemble(&std.testing.allocator, source); 772 | // defer machine_code.deinit(); 773 | 774 | // try std.testing.expectEqual(@as(u32, 0x1ce6163), machine_code.items[0]); 775 | 776 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]); 777 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]); 778 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]); 779 | // } 780 | 781 | // test "bgeu" { 782 | // const source = 783 | // \\bgeu t4 t4 label 784 | // \\ addi x0 x0 0 785 | // \\ addi x0 x0 0 786 | // \\label: 787 | // \\ addi x0 x0 0 788 | // \\ 789 | // ; 790 | // const machine_code = try assemble(&std.testing.allocator, source); 791 | // defer machine_code.deinit(); 792 | 793 | // try std.testing.expectEqual(@as(u32, 0x1def163), machine_code.items[0]); 794 | 795 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[1]); 796 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[2]); 797 | // try std.testing.expectEqual(@as(u32, 0x00000013), machine_code.items[3]); 798 | // } 799 | 800 | test "lui" { 801 | const machine_code = try assemble(&std.testing.allocator, "lui t5 3"); 802 | defer machine_code.deinit(); 803 | try std.testing.expectEqual(@as(u32, 0x3f37), machine_code.items[0]); 804 | } 805 | 806 | test "auipc" { 807 | const machine_code = try assemble(&std.testing.allocator, "auipc t6 3"); 808 | defer machine_code.deinit(); 809 | try std.testing.expectEqual(@as(u32, 0x3f97), machine_code.items[0]); 810 | } 811 | 812 | // test "jal" { 813 | // const machine_code = try assemble(&std.testing.allocator, "jal ra 0"); 814 | // defer machine_code.deinit(); 815 | // try std.testing.expectEqual(@as(u32, 0xef), machine_code.items[0]); 816 | // } 817 | 818 | // test "jalr" { 819 | // const machine_code = try assemble(&std.testing.allocator, "jalr sp sp 3"); 820 | // defer machine_code.deinit(); 821 | // try std.testing.expectEqual(@as(u32, 0x310167), machine_code.items[0]); 822 | // } 823 | --------------------------------------------------------------------------------