├── .cargo └── config.toml ├── .gitignore ├── COPYING ├── Cargo.toml ├── Makefile ├── README.md ├── as ├── Cargo.toml └── src │ └── main.rs ├── bww-multiplier-generator ├── Cargo.toml └── src │ └── main.rs ├── freestanding ├── Cargo.toml ├── riscv64-arnavion-none-elf.json ├── riscv64-arnavion-none-elf.ld └── src │ └── main.rs ├── simulator ├── Cargo.toml └── src │ ├── csrs.rs │ ├── in_order.rs │ ├── in_order_ucode.rs │ ├── instruction.rs │ ├── main.rs │ ├── memory.rs │ ├── multiplier.rs │ ├── out_of_order.rs │ ├── tag.rs │ ├── ucode.rs │ └── x_regs.rs ├── src ├── instruction.rs ├── lib.rs ├── pseudo_instruction.rs ├── register.rs └── supported_extensions.rs └── tc ├── add5.S ├── add5.c ├── ai-showdown.S ├── ai-showdown.c ├── calibrating-laser-cannons-2.S ├── calibrating-laser-cannons-2.c ├── calibrating-laser-cannons-2.data ├── calibrating-laser-cannons.S ├── calibrating-laser-cannons.c ├── conditional-jumps.S ├── conditional-jumps.c ├── dancing-machine.S ├── dancing-machine.c ├── delicious-order.S ├── delicious-order.c ├── divide.S ├── divide.c ├── masking-time.S ├── masking-time.c ├── maze.S ├── maze.c ├── maze_save-breaker.S ├── planet-names.S ├── planet-names.c ├── random-number-generator.S ├── random-number-generator.c ├── spacial-invasion.S ├── spacial-invasion.c ├── storage-cracker.S ├── storage-cracker.c ├── sv ├── booth_multiplier.sv ├── booth_multiplier_multi_cycle.sv ├── bww_multiplier.sv ├── load32.sv ├── load64.sv ├── mop_fusion.sv ├── ram_cache.sv ├── ram_cache_tree_plru.sv ├── rv_alu.sv ├── rv_decoder.sv ├── rv_decompressing_decoder.sv ├── rv_decompressing_decoder_priority.sv ├── rv_decompressor.sv ├── rv_decompressor_priority.sv └── rv_register_file.sv ├── tower-of-alloy.S ├── tower-of-alloy.c ├── tower-of-alloy.ctz.S ├── tower-of-alloy.ctz.c ├── unseen-fruit.S ├── unseen-fruit.c ├── xor.S └── xor.c /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target."riscv64-arnavion-none-elf"] 2 | rustflags = ["-C", "link-arg=-Triscv64-arnavion-none-elf.ld"] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /Cargo.lock 2 | /target 3 | /freestanding/Cargo.lock 4 | /freestanding/target 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "riscv" 3 | version = "0.1.0" 4 | license = "AGPL-3.0-only" 5 | authors = ["Arnav Singh "] 6 | edition = "2024" 7 | publish = false 8 | 9 | 10 | [dependencies] 11 | 12 | 13 | [workspace] 14 | members = [ 15 | "as", 16 | "bww-multiplier-generator", 17 | "simulator", 18 | ] 19 | 20 | 21 | [workspace.dependencies] 22 | awint = { version = "0.18", default-features = false } 23 | 24 | 25 | [workspace.lints.rust] 26 | rust_2018_idioms = "deny" 27 | warnings = "deny" 28 | 29 | 30 | [workspace.lints.clippy] 31 | all = { level = "deny", priority = -1 } 32 | pedantic = { level = "deny", priority = -1 } 33 | default_trait_access = "allow" 34 | missing_errors_doc = "allow" 35 | missing_panics_doc = "allow" 36 | must_use_candidate = "allow" 37 | similar_names = "allow" 38 | too_many_arguments = "allow" 39 | too_many_lines = "allow" 40 | type_complexity = "allow" 41 | 42 | 43 | [lints] 44 | workspace = true 45 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default 2 | default: 3 | cargo build 4 | 5 | 6 | .PHONY: clean 7 | clean: 8 | rm -rf Cargo.lock target/ freestanding/Cargo.lock freestanding/target/ 9 | 10 | 11 | .PHONY: outdated 12 | outdated: 13 | cargo-outdated 14 | 15 | 16 | .PHONY: print 17 | print: 18 | git status --porcelain 19 | 20 | 21 | .PHONY: test 22 | test: 23 | cargo test --workspace 24 | for bitness in '--32' '--64'; do \ 25 | for compressed in 'false' 'true' 'Zcb'; do \ 26 | for zba in '' '--zba'; do \ 27 | for zbb in '' '--zbb'; do \ 28 | for f in tc/*.S; do cargo run -p as -- $$bitness "--compressed=$$compressed" $$zba $$zbb "$$f" >/dev/null || exit 1; done; \ 29 | done; \ 30 | done; \ 31 | done; \ 32 | done 33 | cargo clippy --workspace --tests --examples 34 | cd freestanding && cargo clippy --release --target riscv64-arnavion-none-elf.json -Z build-std=core 35 | cargo machete 36 | 37 | 38 | .PHONY: test-booth_multiplier 39 | test: test-booth_multiplier 40 | test-booth_multiplier: 41 | src="$$PWD" && \ 42 | d="$$(mktemp -d)" && \ 43 | trap "rm -rf '$$d'" EXIT && \ 44 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/booth_multiplier.sv" && ./test) 45 | 46 | 47 | .PHONY: test-booth_multiplier_multi_cycle 48 | test: test-booth_multiplier_multi_cycle 49 | test-booth_multiplier_multi_cycle: 50 | src="$$PWD" && \ 51 | d="$$(mktemp -d)" && \ 52 | trap "rm -rf '$$d'" EXIT && \ 53 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/booth_multiplier_multi_cycle.sv" && ./test) 54 | 55 | 56 | .PHONY: test-bww-multiplier-generator 57 | test: test-bww-multiplier-generator 58 | test-bww-multiplier-generator: 59 | cargo run -p bww-multiplier-generator -- --mulh 8 >tc/sv/bww_multiplier.sv 60 | src="$$PWD" && \ 61 | d="$$(mktemp -d)" && \ 62 | trap "rm -rf '$$d'" EXIT && \ 63 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/bww_multiplier.sv" && ./test); \ 64 | 65 | d="$$(mktemp -d)" && \ 66 | trap "rm -rf '$$d'" EXIT && \ 67 | for fma in '' '--fma'; do \ 68 | for mulh in '' '--mulh'; do \ 69 | cargo run -p bww-multiplier-generator -- $$fma $$mulh 8 >"$$d/bww_multiplier.sv" && \ 70 | (cd "$$d" && iverilog -g2012 -DTESTING -o test bww_multiplier.sv && ./test) || exit 1; \ 71 | done; \ 72 | done 73 | 74 | 75 | .PHONY: test-decompressor 76 | test: test-decompressor 77 | test-decompressor: 78 | src="$$PWD" && \ 79 | d="$$(mktemp -d)" && \ 80 | trap "rm -rf '$$d'" EXIT && \ 81 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/rv_decompressor.sv" && ./test) 82 | 83 | 84 | .PHONY: test-decompressor_priority 85 | test: test-decompressor_priority 86 | test-decompressor_priority: 87 | src="$$PWD" && \ 88 | d="$$(mktemp -d)" && \ 89 | trap "rm -rf '$$d'" EXIT && \ 90 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/rv_decompressor_priority.sv" && ./test) 91 | 92 | 93 | .PHONY: test-load 94 | test: test-load 95 | test-load: 96 | src="$$PWD" && \ 97 | d="$$(mktemp -d)" && \ 98 | trap "rm -rf '$$d'" EXIT && \ 99 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/load32.sv" "$$src/tc/sv/load64.sv" && ./test) 100 | 101 | 102 | .PHONY: test-ram_cache 103 | test: test-ram_cache 104 | test-ram_cache: 105 | src="$$PWD" && \ 106 | d="$$(mktemp -d)" && \ 107 | trap "rm -rf '$$d'" EXIT && \ 108 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/ram_cache.sv" && ./test) 109 | 110 | 111 | .PHONY: test-ram_cache_tree_plru 112 | test: test-ram_cache_tree_plru 113 | test-ram_cache_tree_plru: 114 | src="$$PWD" && \ 115 | d="$$(mktemp -d)" && \ 116 | trap "rm -rf '$$d'" EXIT && \ 117 | (cd "$$d" && iverilog -g2012 -DTESTING -o test "$$src/tc/sv/ram_cache_tree_plru.sv" && ./test) 118 | 119 | 120 | .PHONY: freestanding 121 | freestanding: 122 | cd freestanding && cargo build --release --target riscv64-arnavion-none-elf.json -Z build-std=core 123 | 124 | 125 | .PHONY: freestanding-inspect 126 | freestanding-inspect: freestanding 127 | ~/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/x86_64-unknown-linux-gnu/bin/llvm-objdump -D ./freestanding/target/riscv64-arnavion-none-elf/release/freestanding 128 | 129 | 130 | EMULATOR_SAVE_DIR = ~/non-oss-root/steam/.local/share/godot/app_userdata/Turing\ Complete/schematics/architecture/RISC-V 131 | EMULATOR_IN_FILE = ./tc/calibrating-laser-cannons-2.S 132 | 133 | .PHONY: freestanding-install 134 | freestanding-install: freestanding 135 | rm -f $(EMULATOR_SAVE_DIR)/sandbox/new_program.asm 136 | src="$$PWD" && \ 137 | d="$$(mktemp -d)" && \ 138 | trap "rm -rf '$$d'" EXIT && \ 139 | objcopy ./freestanding/target/riscv64-arnavion-none-elf/release/freestanding -O binary "$$d/flat" && \ 140 | od --address-radix=none --format=x8 --output-duplicates --width=8 "$$d/flat" | \ 141 | sed -Ee 's/^\s*0*(.*)/0x\1/;s/0x$$/0/' >>$(EMULATOR_SAVE_DIR)/sandbox/new_program.asm 142 | cp $(EMULATOR_IN_FILE) ~/non-oss-root/steam/in_file 143 | cp $(EMULATOR_IN_FILE) $(EMULATOR_SAVE_DIR)/in_file 144 | 145 | 146 | .PHONY: simulator 147 | test: simulator 148 | simulator: freestanding 149 | d="$$(mktemp -d)" && \ 150 | trap "rm -rf '$$d'" EXIT && \ 151 | objcopy ./freestanding/target/riscv64-arnavion-none-elf/release/freestanding -O binary "$$d/flat" && \ 152 | cargo run --release -p simulator -- --mode in-order -- "$$d/flat" $(EMULATOR_IN_FILE) 153 | 154 | 155 | .PHONY: simulator-ucode 156 | test: simulator-ucode 157 | simulator-ucode: freestanding 158 | d="$$(mktemp -d)" && \ 159 | trap "rm -rf '$$d'" EXIT && \ 160 | objcopy ./freestanding/target/riscv64-arnavion-none-elf/release/freestanding -O binary "$$d/flat" && \ 161 | cargo run --release -p simulator -- --mode in-order-ucode -- "$$d/flat" $(EMULATOR_IN_FILE) 162 | 163 | 164 | .PHONY: simulator-ooo 165 | test: simulator-ooo 166 | simulator-ooo: freestanding 167 | d="$$(mktemp -d)" && \ 168 | trap "rm -rf '$$d'" EXIT && \ 169 | objcopy ./freestanding/target/riscv64-arnavion-none-elf/release/freestanding -O binary "$$d/flat" && \ 170 | cargo run --release -p simulator -- --mode out-of-order --ooo-max-retire-per-cycle 4 -- "$$d/flat" $(EMULATOR_IN_FILE) 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | RISC-V assembler for an emulator I made in the game [Turing Complete](https://store.steampowered.com/app/1444480/Turing_Complete/) to learn about the ISA and have fun. 2 | 3 | --- 4 | 5 | ![Screenshot of the RISC-V emulator](https://www.arnavion.dev/img/tc-riscv.png) 6 | 7 | [Video of the RISC-V emulator solving Towers of Alloy](https://www.arnavion.dev/img/tc-riscv-tower-of-alloy.mp4) 8 | 9 | --- 10 | 11 | Per the [unprivileged ISA spec version 20240411,](https://github.com/riscv/riscv-isa-manual/releases/tag/20240411) the assembler supports: 12 | 13 | - RV32I 2.1 (32-bit integer register instructions) 14 | 15 | - RV64I 2.1 (64-bit integer register instructions) 16 | 17 | - Zba 1.0.0 (address generation instructions) 18 | 19 | - Zbb 1.0.0 (basic bit-manipulation instructions) 20 | 21 | - Zbs 1.0.0 (single-bit instructions) 22 | 23 | - Zca 1.0.0 (compressed instructions for integer registers) 24 | 25 | - Zcb 1.0.0 (additional compressed instructions for integer registers) 26 | 27 | - Zicntr 2.0 (cycle, time and instret CSRs) 28 | 29 | - Zicond 1.0.0 (conditional instructions for integer registers) 30 | 31 | - Zicsr 2.0 (CSR instructions) 32 | 33 | - Zmmul 1.0 (integer multiplication instructions) 34 | 35 | Further extensions are not supported, notably instructions for hardware division (M) and hardware floats (F, D). 36 | 37 | Compressed instructions are supported in the sense that the assembler will encode regular instructions like `add` and `lbu` into the compressed form when compression is enabled. The mnemonics for the compressed instructions like `c.add` and `c.lbu` are not supported. Instructions that only exist in compressed instruction extensions like `c.lwsp` can be written as `lwsp` or `c.lwsp`. 38 | 39 | The assembler also only partially implements the full syntax supported by GNU / LLVM, and notably does not support labels, symbolic constants or data sections. It *does* support the register mnemonics like `ra` and pseudo-instructions like `j` listed in [the ASM manual](https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc) (and older versions of the ISA spec before they were [removed](https://github.com/riscv/riscv-isa-manual/issues/1470)). 40 | 41 | --- 42 | 43 | The assembler can be compiled as a freestanding binary that runs on the emulator. In this case the input file is read from a RAM with "Initial data" set to "File", and the output is written to a memory address range that is expected to be present in a RAM linked to a Console. 44 | 45 | `make freestanding` will compile the binary, `make freestanding-install` will install it along with the input file, and `make freestanding-inspect` will run `llvm-objdump` on the binary. 46 | 47 | --- 48 | 49 | The `tc/` directory contains solutions for some of the game's architecture puzzles using the emulator. 50 | 51 | The `*.S` files contain the assembler programs. Running `cargo run -p as -- tc/foo.S` will print the compiled program to stdout which can then be copy-pasted into the game's Program component. The component must have "Data width" set to "16 Bit". Running `cargo run -p as -- --compressed tc/foo.S` will do the same but enable compressed instructions. Running `cargo run -p as -- --compressed=Zcb tc/foo.S` will also enable compressed instructions from the Zcb extension. 52 | 53 | The `--save-breaker` option will emit the output in the format used by the "save_breaker" branch's assembler. In this case, the program RAM's instruction Load port's width must be set to "32" ("64" when using MOP fusion). 54 | 55 | The assembler does not consider whether the target architecture is 32-bit or 64-bit and will simply encode whatever instructions are given to it. This works fine because RV64I does not modify the behavior of RV32I instructions, except for a few situations: 56 | 57 | 1. The shift instructions take 5-bit shift amount in RV32I and 6-bit shift amount in RV64I. 58 | 59 | 2. The pseudo-instructions `sext.b`, `sext.h` and `zext.h` shift the source register by different amounts in RV32I vs RV64I. 60 | 61 | 3. The non-pseudo `zext.h` instruction and the `rev8` instruction in the Zbb extension have different opcodes in RV32I vs RV64I. 62 | 63 | 4. `c.jal` is only valid in RV32C; an RV64C implementation would interpret it as `c.addiw` instead. Thus `jal` cannot be compressed into `c.jal` on RV64C. 64 | 65 | Therefore the assembler also has a `--64` flag to explicitly set the target architecture to RV64I. When combined with the `--compressed` flag it will instruct the assembler to not compress `jal`. 66 | 67 | The `*.c` files contain equivalent C solutions that can be put in [Compiler Explorer](https://gcc.godbolt.org/) with compiler set to `RISC-V (32-bits) gcc` or `RISC-V rv32gc clang` or corresponding 64-bit version, and flags set to `--std=c23 -Os -march=rv32id_zba_zbb_zbs_zicond_zmmul` or `--std=c23 -Os -march=rv64id_zba_zbb_zbs_zicond_zmmul`. Note that the assembler programs are hand-written and will not exactly match the compiler's output. 68 | 69 | The emulator has the Level Input and Level Output wired up to memory address `2^xlen - 8`, which is why the assembler programs refer to `li fp, -8; l{b,h}u rd, 0(fp)` and the C programs refer to `IO = (volatile uint{8,16}_t*)(intptr_t)-8; x = *IO;`. 70 | 71 | --- 72 | 73 | The `simulator/` directory contains a simulator with the same behavior and cycle timings as the in-game in-order emulator. It also contains a microcoded in-order implementation, and an out-of-order implementation with multiple functional units and multiple branch speculation. 74 | 75 | --- 76 | 77 | # License 78 | 79 | AGPL-3.0-only 80 | 81 | ``` 82 | riscv 83 | 84 | https://github.com/Arnavion/riscv 85 | 86 | Copyright 2024 Arnav Singh 87 | 88 | This program is free software: you can redistribute it and/or modify 89 | it under the terms of the GNU Affero General Public License as 90 | published by the Free Software Foundation, version 3 of the 91 | License. 92 | 93 | This program is distributed in the hope that it will be useful, 94 | but WITHOUT ANY WARRANTY; without even the implied warranty of 95 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 96 | GNU Affero General Public License for more details. 97 | 98 | You should have received a copy of the GNU Affero General Public License 99 | along with this program. If not, see . 100 | ``` 101 | -------------------------------------------------------------------------------- /as/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "as" 3 | version = "0.1.0" 4 | license = "AGPL-3.0-only" 5 | authors = ["Arnav Singh "] 6 | edition = "2024" 7 | publish = false 8 | 9 | 10 | [dependencies] 11 | riscv = { path = ".." } 12 | 13 | 14 | [lints] 15 | workspace = true 16 | -------------------------------------------------------------------------------- /as/src/main.rs: -------------------------------------------------------------------------------- 1 | fn main() -> Result<(), Box> { 2 | let mut args = std::env::args_os(); 3 | let argv0 = args.next().unwrap_or_else(|| env!("CARGO_BIN_NAME").into()); 4 | let (path, supported_extensions, save_breaker) = parse_args(args, &argv0); 5 | 6 | let program = std::fs::read_to_string(path)?; 7 | 8 | let mut pc = 0_u64; 9 | 10 | for instruction in riscv::parse_program(program.lines().map(str::as_bytes), supported_extensions) { 11 | let instruction = instruction.map_err(|err| err.to_string())?; 12 | let (lo, hi) = 13 | instruction.encode(supported_extensions) 14 | .map_err(|err| format!("instruction could not be encoded {instruction:?}: {err}"))?; 15 | #[allow(clippy::collapsible_else_if)] 16 | if let Some(hi) = hi { 17 | if save_breaker { 18 | println!("0x{hi:04x}{lo:04x} ; {pc:3}: {instruction}"); 19 | } 20 | else { 21 | println!("0x{lo:04x} 0x{hi:04x} # {pc:3}: {instruction}"); 22 | } 23 | 24 | pc += 4; 25 | } 26 | else { 27 | if save_breaker { 28 | println!("0x{lo:04x} ; {pc:3}: {instruction}"); 29 | } 30 | else { 31 | println!("0x{lo:04x} # {pc:3}: {instruction}"); 32 | } 33 | 34 | pc += 2; 35 | } 36 | } 37 | 38 | Ok(()) 39 | } 40 | 41 | fn parse_args(mut args: impl Iterator, argv0: &std::ffi::OsStr) -> (std::path::PathBuf, riscv::SupportedExtensions, bool) { 42 | let mut path = None; 43 | let mut supported_extensions = riscv::SupportedExtensions::RV32I; 44 | let mut save_breaker = false; 45 | 46 | for opt in &mut args { 47 | match opt.to_str() { 48 | Some("--help") => { 49 | write_usage(std::io::stdout(), argv0); 50 | std::process::exit(0); 51 | }, 52 | 53 | Some("--") => { 54 | path = args.next(); 55 | break; 56 | }, 57 | 58 | Some("-c" | "--compressed" | "--compressed=true") => supported_extensions |= riscv::SupportedExtensions::RVC, 59 | 60 | Some("--compressed=false") => supported_extensions &= !riscv::SupportedExtensions::ZCB, 61 | 62 | Some("--compressed=Zcb") => supported_extensions |= riscv::SupportedExtensions::ZCB, 63 | 64 | Some("--save-breaker" | "--sb") => save_breaker = true, 65 | 66 | Some("--zba") => supported_extensions |= riscv::SupportedExtensions::ZBA, 67 | 68 | Some("--zbb") => supported_extensions |= riscv::SupportedExtensions::ZBB, 69 | 70 | Some("--32") => supported_extensions &= !riscv::SupportedExtensions::RV64I, 71 | 72 | Some("--64") => supported_extensions |= riscv::SupportedExtensions::RV64I, 73 | 74 | _ if path.is_none() => path = Some(opt), 75 | 76 | _ => write_usage_and_crash(argv0), 77 | } 78 | } 79 | 80 | let None = args.next() else { write_usage_and_crash(argv0); }; 81 | 82 | let Some(path) = path else { write_usage_and_crash(argv0); }; 83 | (path.into(), supported_extensions, save_breaker) 84 | } 85 | 86 | fn write_usage_and_crash(argv0: &std::ffi::OsStr) -> ! { 87 | write_usage(std::io::stderr(), argv0); 88 | std::process::exit(1); 89 | } 90 | 91 | fn write_usage(mut w: impl std::io::Write, argv0: &std::ffi::OsStr) { 92 | _ = writeln!(w, "Usage: {} [ --32 | --64 ] [ -c | --compressed | --compressed=[true|false|Zcb] ] [ --sb | --save-breaker ] [ --zba ] [ --zbb ] [ -- ] ", argv0.to_string_lossy()); 93 | } 94 | -------------------------------------------------------------------------------- /bww-multiplier-generator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bww-multiplier-generator" 3 | version = "0.1.0" 4 | license = "AGPL-3.0-only" 5 | authors = ["Arnav Singh "] 6 | edition = "2024" 7 | publish = false 8 | 9 | 10 | [dependencies] 11 | num-bigint = { version = "0.4", default-features = false } 12 | num-traits = { version = "0.2", default-features = false } 13 | 14 | 15 | [lints] 16 | workspace = true 17 | -------------------------------------------------------------------------------- /bww-multiplier-generator/src/main.rs: -------------------------------------------------------------------------------- 1 | //! Verilog generator for a Baugh-Wooley multiplier, modified to support both signed and unsigned inputs, 2 | //! and the partial products are summed using a Wallace tree. 3 | //! 4 | //! Example: 5 | //! 6 | //! cargo run -p bww-multiplier-generator -- --mulh 8 >./tc/sv/bww_multiplier.sv 7 | 8 | use num_traits::{One as _, Zero as _}; 9 | 10 | fn main() { 11 | let mut args = std::env::args_os(); 12 | let argv0 = args.next().unwrap_or_else(|| env!("CARGO_BIN_NAME").into()); 13 | let (fma, mulh, width) = parse_args(args, &argv0); 14 | 15 | let width_minus_one = width - 1; 16 | 17 | println!("module bww_multiplier ("); 18 | 19 | println!("\tinput bit[{width_minus_one}:0] a,"); 20 | if mulh { 21 | println!("\tinput bit a_is_signed,"); 22 | } 23 | 24 | println!("\tinput bit[{width_minus_one}:0] b,"); 25 | if mulh { 26 | println!("\tinput bit b_is_signed,"); 27 | } 28 | 29 | if fma { 30 | println!("\tinput bit[{width_minus_one}:0] c,"); 31 | } 32 | 33 | println!(); 34 | print!("\toutput bit[{width_minus_one}:0] mul"); 35 | 36 | if mulh { 37 | print!(",\n\toutput bit[{width_minus_one}:0] mulh"); 38 | } 39 | 40 | println!("\n);"); 41 | 42 | let mut products = std::collections::BTreeMap::<_, num_bigint::BigUint>::new(); 43 | 44 | for b in 0_u8..(if mulh { width * 2 } else { width }) { 45 | for a in 0_u8..(if mulh { width * 2 } else { width }) { 46 | let pos = u64::from(a) + u64::from(b); 47 | if pos >= u64::from(width) * 2 { break; } 48 | 49 | let a = if a < width { InputWire::Index(a) } else { InputWire::SignExtended(width - 1) }; 50 | let b = if b < width { InputWire::Index(b) } else { InputWire::SignExtended(width - 1) }; 51 | *products.entry((a, b)).or_default() += num_bigint::BigUint::one() << pos; 52 | } 53 | } 54 | 55 | let mut cols: std::collections::VecDeque<_> = vec![std::collections::BinaryHeap::new(); usize::from(width) * if mulh { 2 } else { 1 }].into(); 56 | let mut constant = num_bigint::BigInt::ZERO; 57 | for ((a, b), mut count) in products { 58 | let mut col_i = 0_u8; 59 | while let Some(col) = cols.get_mut(usize::from(col_i)) { 60 | let Some(trailing_zeros) = count.trailing_zeros() else { break; }; 61 | if trailing_zeros > 0 { 62 | col_i += u8::try_from(trailing_zeros).unwrap(); 63 | count >>= trailing_zeros; 64 | continue; 65 | } 66 | 67 | let delay = a.delay().max(b.delay()) + 1; 68 | 69 | let trailing_ones = count.trailing_ones(); 70 | if trailing_ones == 1 { 71 | col.push(std::cmp::Reverse(Wire { delay, kind: WireKind::InputsAnd { a, b } })); 72 | count -= 1_u8; 73 | } 74 | else { 75 | col.push(std::cmp::Reverse(Wire { delay, kind: WireKind::InputsNand { a, b } })); 76 | constant -= num_bigint::BigInt::one() << col_i; 77 | count += 1_u8; 78 | } 79 | } 80 | } 81 | 82 | if fma { 83 | for (col_i, col) in cols.iter_mut().enumerate() { 84 | col.push(std::cmp::Reverse(Wire { 85 | delay: 0, 86 | kind: WireKind::Addend { i: InputWire::Index(u8::try_from(col_i).unwrap().min(width - 1)) } 87 | })); 88 | } 89 | } 90 | 91 | for col in &mut cols { 92 | if constant.is_zero() { 93 | break; 94 | } 95 | if constant.bit(0) { 96 | col.push(std::cmp::Reverse(Wire { delay: u64::MAX, kind: WireKind::One })); 97 | } 98 | constant >>= 1; 99 | } 100 | 101 | let mut adder_next_id = 0_u64; 102 | let mut outputs = vec![]; 103 | 104 | while let Some(mut col) = cols.pop_front() { 105 | loop { 106 | let std::cmp::Reverse(a) = col.pop().unwrap(); 107 | 108 | let Some(std::cmp::Reverse(b)) = col.pop() else { 109 | outputs.push(a); 110 | break; 111 | }; 112 | 113 | let Some(std::cmp::Reverse(c)) = col.pop() else { 114 | if let Some(next_col) = cols.front_mut() { 115 | let adder_id = adder_next_id; 116 | adder_next_id += 1; 117 | 118 | println!("\twire s{adder_id}, c{adder_id};"); 119 | println!("\thalf_adder adder{adder_id}({a}, {b}, s{adder_id}, c{adder_id});"); 120 | 121 | outputs.push(Wire { delay: a.delay.max(b.delay) + 2, kind: WireKind::Sum { adder_id } }); 122 | 123 | next_col.push(std::cmp::Reverse(Wire { delay: a.delay.max(b.delay) + 1, kind: WireKind::Carry { adder_id } })); 124 | } 125 | else { 126 | outputs.push(Wire { delay: a.delay.max(b.delay) + 2, kind: WireKind::Xor { a: Box::new(a.kind), b: Box::new(b.kind) } }); 127 | } 128 | 129 | break; 130 | }; 131 | 132 | match (&a.kind, &b.kind, &c.kind) { 133 | (_, WireKind::One, WireKind::One) => { 134 | col.push(std::cmp::Reverse(a)); 135 | if let Some(next_col) = cols.front_mut() { 136 | next_col.push(std::cmp::Reverse(Wire { delay: u64::MAX, kind: WireKind::One })); 137 | } 138 | }, 139 | 140 | (_, _, WireKind::One) => { 141 | let d = col.pop(); 142 | if let Some(std::cmp::Reverse(Wire { kind: WireKind::One, .. })) = &d { 143 | col.push(std::cmp::Reverse(a)); 144 | col.push(std::cmp::Reverse(b)); 145 | if let Some(next_col) = cols.front_mut() { 146 | next_col.push(std::cmp::Reverse(Wire { delay: u64::MAX, kind: WireKind::One })); 147 | } 148 | } 149 | else { 150 | assert_eq!(d, None, "cannot have anything greater than WireKind::One"); 151 | 152 | if let Some(next_col) = cols.front_mut() { 153 | let adder_id = adder_next_id; 154 | adder_next_id += 1; 155 | 156 | println!("\twire s{adder_id}, c{adder_id};"); 157 | println!("\thalf_adder_plus_one adder{adder_id} ({a}, {b}, s{adder_id}, c{adder_id});"); 158 | 159 | col.push(std::cmp::Reverse(Wire { delay: a.delay.max(b.delay) + 2, kind: WireKind::Sum { adder_id } })); 160 | next_col.push(std::cmp::Reverse(Wire { delay: a.delay.max(b.delay) + 1, kind: WireKind::Carry { adder_id } })); 161 | } 162 | else { 163 | col.push(std::cmp::Reverse(Wire { delay: a.delay.max(b.delay) + 2, kind: WireKind::Xnor { a: Box::new(b.kind), b: Box::new(c.kind) } })); 164 | } 165 | } 166 | }, 167 | 168 | (_, _, _) => { 169 | if let Some(next_col) = cols.front_mut() { 170 | let adder_id = adder_next_id; 171 | adder_next_id += 1; 172 | 173 | println!("\twire s{adder_id}, c{adder_id};"); 174 | println!("\tfull_adder adder{adder_id} ({a}, {b}, {c}, s{adder_id}, c{adder_id});"); 175 | 176 | col.push(std::cmp::Reverse(Wire { delay: (a.delay.max(b.delay) + 2).max(c.delay) + 2, kind: WireKind::Sum { adder_id } })); 177 | next_col.push(std::cmp::Reverse(Wire { delay: (a.delay.max(b.delay) + 1).max(c.delay) + 2, kind: WireKind::Carry { adder_id } })); 178 | } 179 | else { 180 | col.push(std::cmp::Reverse(c)); 181 | col.push(std::cmp::Reverse(Wire { delay: a.delay.max(b.delay) + 2, kind: WireKind::Xor { a: Box::new(a.kind), b: Box::new(b.kind) } })); 182 | } 183 | }, 184 | } 185 | } 186 | } 187 | 188 | let mut first = true; 189 | if mulh { 190 | print!("\tassign {{mulh, mul}} = {{"); 191 | } 192 | else { 193 | print!("\tassign mul = {{"); 194 | } 195 | for wire in outputs.into_iter().rev() { 196 | if first { 197 | first = false; 198 | } 199 | else { 200 | print!(","); 201 | } 202 | print!("\n\t\t{wire}"); 203 | } 204 | println!("\n\t}};"); 205 | 206 | println!("endmodule"); 207 | println!(); 208 | println!("module half_adder ("); 209 | println!("\tinput bit a,"); 210 | println!("\tinput bit b,"); 211 | println!("\toutput bit sum,"); 212 | println!("\toutput bit carry"); 213 | println!(");"); 214 | println!("\tassign {{carry, sum}} = {{1'b0, a}} + {{1'b0, b}};"); 215 | println!("endmodule"); 216 | println!(); 217 | println!("module half_adder_plus_one ("); 218 | println!("\tinput bit a,"); 219 | println!("\tinput bit b,"); 220 | println!("\toutput bit sum,"); 221 | println!("\toutput bit carry"); 222 | println!(");"); 223 | println!("\tassign {{carry, sum}} = {{1'b0, a}} + {{1'b0, b}} + 2'b01;"); 224 | println!("endmodule"); 225 | println!(); 226 | println!("module full_adder ("); 227 | println!("\tinput bit a,"); 228 | println!("\tinput bit b,"); 229 | println!("\tinput bit c,"); 230 | println!("\toutput bit sum,"); 231 | println!("\toutput bit carry"); 232 | println!(");"); 233 | println!("\tassign {{carry, sum}} = {{1'b0, a}} + {{1'b0, b}} + {{1'b0, c}};"); 234 | println!("endmodule"); 235 | 236 | println!(); 237 | println!("`ifdef TESTING"); 238 | println!("module test_bww_multiplier;"); 239 | println!("\tbit[{width_minus_one}:0] a;"); 240 | if mulh { 241 | println!("\tbit a_is_signed;"); 242 | } 243 | println!("\tbit[{width_minus_one}:0] b;"); 244 | if mulh { 245 | println!("\tbit b_is_signed;"); 246 | } 247 | if fma { 248 | println!("\tbit[{width_minus_one}:0] c;"); 249 | } 250 | println!("\twire[{width_minus_one}:0] mul;"); 251 | if mulh { 252 | println!("\twire[{width_minus_one}:0] mulh;"); 253 | } 254 | println!("\tbww_multiplier bww_multiplier_module ("); 255 | print!("\t\ta,"); 256 | if mulh { 257 | print!(" a_is_signed,"); 258 | } 259 | print!("\n\t\tb,"); 260 | if mulh { 261 | print!(" b_is_signed,"); 262 | } 263 | println!(); 264 | if fma { 265 | println!("\t\tc,"); 266 | } 267 | print!("\t\tmul"); 268 | if mulh { 269 | print!(", mulh"); 270 | } 271 | println!("\n\t);"); 272 | println!(); 273 | println!("\tinitial begin"); 274 | 275 | println!("\t\ta = -{width}'d1;"); 276 | if mulh { 277 | println!("\t\ta_is_signed = '0;"); 278 | } 279 | println!("\t\tb = -{width}'d1;"); 280 | if mulh { 281 | println!("\t\tb_is_signed = '0;"); 282 | } 283 | if fma { 284 | println!("\t\tc = {width}'d0;"); 285 | } 286 | println!("\t\t#1"); 287 | println!("\t\tassert(mul == {width}'d1) else $fatal;"); 288 | if mulh { 289 | println!("\t\tassert(mulh == -{width}'d2) else $fatal;"); 290 | } 291 | 292 | if fma { 293 | println!(); 294 | println!("\t\ta = -{width}'d1;"); 295 | if mulh { 296 | println!("\t\ta_is_signed = '0;"); 297 | } 298 | println!("\t\tb = -{width}'d1;"); 299 | if mulh { 300 | println!("\t\tb_is_signed = '0;"); 301 | } 302 | println!("\t\tc = -{width}'d1;"); 303 | println!("\t\t#1"); 304 | println!("\t\tassert(mul == {width}'d0) else $fatal;"); 305 | if mulh { 306 | println!("\t\tassert(mulh == -{width}'d2) else $fatal;"); 307 | } 308 | } 309 | 310 | if mulh { 311 | println!(); 312 | println!("\t\ta = -{width}'d1;"); 313 | println!("\t\ta_is_signed = '1;"); 314 | println!("\t\tb = -{width}'d1;"); 315 | println!("\t\tb_is_signed = '0;"); 316 | if fma { 317 | println!("\t\tc = {width}'d0;"); 318 | } 319 | println!("\t\t#1"); 320 | println!("\t\tassert(mul == {width}'d1) else $fatal;"); 321 | println!("\t\tassert(mulh == -{width}'d1) else $fatal;"); 322 | 323 | if fma { 324 | println!(); 325 | println!("\t\ta = -{width}'d1;"); 326 | println!("\t\ta_is_signed = '1;"); 327 | println!("\t\tb = -{width}'d1;"); 328 | println!("\t\tb_is_signed = '0;"); 329 | println!("\t\tc = -{width}'d1;"); 330 | println!("\t\t#1"); 331 | println!("\t\tassert(mul == {width}'d0) else $fatal;"); 332 | println!("\t\tassert(mulh == -{width}'d1) else $fatal;"); 333 | } 334 | 335 | println!(); 336 | println!("\t\ta = -{width}'d1;"); 337 | println!("\t\ta_is_signed = '0;"); 338 | println!("\t\tb = -{width}'d1;"); 339 | println!("\t\tb_is_signed = '1;"); 340 | if fma { 341 | println!("\t\tc = {width}'d0;"); 342 | } 343 | println!("\t\t#1"); 344 | println!("\t\tassert(mul == {width}'d1) else $fatal;"); 345 | println!("\t\tassert(mulh == -{width}'d1) else $fatal;"); 346 | 347 | if fma { 348 | println!(); 349 | println!("\t\ta = -{width}'d1;"); 350 | println!("\t\ta_is_signed = '0;"); 351 | println!("\t\tb = -{width}'d1;"); 352 | println!("\t\tb_is_signed = '1;"); 353 | println!("\t\tc = -{width}'d1;"); 354 | println!("\t\t#1"); 355 | println!("\t\tassert(mul == {width}'d0) else $fatal;"); 356 | println!("\t\tassert(mulh == -{width}'d1) else $fatal;"); 357 | } 358 | 359 | println!(); 360 | println!("\t\ta = -{width}'d1;"); 361 | println!("\t\ta_is_signed = '1;"); 362 | println!("\t\tb = -{width}'d1;"); 363 | println!("\t\tb_is_signed = '1;"); 364 | if fma { 365 | println!("\t\tc = {width}'d0;"); 366 | } 367 | println!("\t\t#1"); 368 | println!("\t\tassert(mul == {width}'d1) else $fatal;"); 369 | println!("\t\tassert(mulh == {width}'d0) else $fatal;"); 370 | 371 | if fma { 372 | println!(); 373 | println!("\t\ta = -{width}'d1;"); 374 | println!("\t\ta_is_signed = '1;"); 375 | println!("\t\tb = -{width}'d1;"); 376 | println!("\t\tb_is_signed = '1;"); 377 | println!("\t\tc = -{width}'d1;"); 378 | println!("\t\t#1"); 379 | println!("\t\tassert(mul == {width}'d0) else $fatal;"); 380 | println!("\t\tassert(mulh == {width}'d0) else $fatal;"); 381 | } 382 | } 383 | 384 | println!("\tend"); 385 | println!("endmodule"); 386 | println!("`endif"); 387 | } 388 | 389 | #[derive(Clone, Debug, Eq, PartialEq)] 390 | struct Wire { 391 | delay: u64, 392 | kind: WireKind, 393 | } 394 | 395 | #[derive(Clone, Debug, Eq, PartialEq)] 396 | enum WireKind { 397 | One, 398 | Addend { i: InputWire }, 399 | InputsAnd { a: InputWire, b: InputWire }, 400 | InputsNand { a: InputWire, b: InputWire }, 401 | Sum { adder_id: u64 }, 402 | Carry { adder_id: u64 }, 403 | Xor { a: Box, b: Box }, 404 | Xnor { a: Box, b: Box }, 405 | } 406 | 407 | #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] 408 | enum InputWire { 409 | Index(u8), 410 | SignExtended(u8), 411 | } 412 | 413 | impl std::fmt::Display for Wire { 414 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 415 | self.kind.fmt(f) 416 | } 417 | } 418 | 419 | impl PartialOrd for Wire { 420 | fn partial_cmp(&self, other: &Self) -> Option { 421 | Some(self.cmp(other)) 422 | } 423 | } 424 | 425 | impl Ord for Wire { 426 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 427 | self.delay.cmp(&other.delay).then_with(|| self.kind.cmp(&other.kind)) 428 | } 429 | } 430 | 431 | impl std::fmt::Display for WireKind { 432 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 433 | match self { 434 | Self::One => panic!("WireKind::One should never be emitted"), 435 | 436 | Self::Addend { i } => match i { 437 | InputWire::Index(c) => write!(f, "c[{c}]"), 438 | InputWire::SignExtended(c) => write!(f, "c[{c}] & c_is_signed"), 439 | }, 440 | 441 | Self::InputsAnd { a, b } => { 442 | match a { 443 | InputWire::Index(a) => write!(f, "a[{a}]")?, 444 | InputWire::SignExtended(a) => write!(f, "a[{a}] & a_is_signed")?, 445 | } 446 | f.write_str(" & ")?; 447 | match b { 448 | InputWire::Index(b) => write!(f, "b[{b}]")?, 449 | InputWire::SignExtended(b) => write!(f, "b[{b}] & b_is_signed")?, 450 | } 451 | Ok(()) 452 | }, 453 | 454 | Self::InputsNand { a, b } => { 455 | f.write_str("~(")?; 456 | match a { 457 | InputWire::Index(a) => write!(f, "a[{a}]")?, 458 | InputWire::SignExtended(a) => write!(f, "a[{a}] & a_is_signed")?, 459 | } 460 | f.write_str(" & ")?; 461 | match b { 462 | InputWire::Index(b) => write!(f, "b[{b}]")?, 463 | InputWire::SignExtended(b) => write!(f, "b[{b}] & b_is_signed")?, 464 | } 465 | f.write_str(")")?; 466 | Ok(()) 467 | }, 468 | 469 | Self::Sum { adder_id } => write!(f, "s{adder_id}"), 470 | 471 | Self::Carry { adder_id } => write!(f, "c{adder_id}"), 472 | 473 | Self::Xor { a, b } => write!(f, "({a}) ^ ({b})"), 474 | 475 | Self::Xnor { a, b } => write!(f, "~({a}) ^ ({b})"), 476 | } 477 | } 478 | } 479 | 480 | impl PartialOrd for WireKind { 481 | fn partial_cmp(&self, other: &Self) -> Option { 482 | Some(self.cmp(other)) 483 | } 484 | } 485 | 486 | impl Ord for WireKind { 487 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 488 | #[allow(clippy::match_same_arms)] 489 | match (self, other) { 490 | (Self::One, Self::One) => std::cmp::Ordering::Equal, 491 | (Self::One, _) => std::cmp::Ordering::Less, 492 | (_, Self::One) => other.cmp(self).reverse(), 493 | 494 | (Self::Addend { i: i1 }, Self::Addend { i: i2 }) => i1.cmp(i2), 495 | (Self::Addend { .. }, _) => std::cmp::Ordering::Less, 496 | (_, Self::Addend { .. }) => other.cmp(self).reverse(), 497 | 498 | (Self::InputsAnd { a: a1, b: b1 }, Self::InputsAnd { a: a2, b: b2 }) => 499 | a1.cmp(a2).then_with(|| b1.cmp(b2)), 500 | 501 | (Self::InputsAnd { a: a1, b: b1 }, Self::InputsNand { a: a2, b: b2 }) => 502 | a1.cmp(a2).then_with(|| b1.cmp(b2)).then(std::cmp::Ordering::Less), 503 | 504 | (Self::InputsNand { a: a1, b: b1 }, Self::InputsAnd { a: a2, b: b2 }) => 505 | a1.cmp(a2).then_with(|| b1.cmp(b2)).then(std::cmp::Ordering::Greater), 506 | 507 | (Self::InputsNand { a: a1, b: b1 }, Self::InputsNand { a: a2, b: b2 }) => 508 | a1.cmp(a2).then_with(|| b1.cmp(b2)), 509 | 510 | (Self::InputsAnd { .. } | Self::InputsNand { .. }, _) => std::cmp::Ordering::Less, 511 | 512 | (_, Self::InputsAnd { .. } | Self::InputsNand { .. }) => other.cmp(self).reverse(), 513 | 514 | (Self::Sum { adder_id: id1 }, Self::Sum { adder_id: id2 }) => 515 | id1.cmp(id2), 516 | 517 | (Self::Sum { adder_id: id1 }, Self::Carry { adder_id: id2 }) => 518 | id1.cmp(id2).then(std::cmp::Ordering::Less), 519 | 520 | (Self::Carry { adder_id: id1 }, Self::Carry { adder_id: id2 }) => 521 | id1.cmp(id2), 522 | 523 | (Self::Carry { adder_id: id1 }, Self::Sum { adder_id: id2 }) => 524 | id1.cmp(id2).then(std::cmp::Ordering::Greater), 525 | 526 | (Self::Sum { .. } | Self::Carry { .. }, _) => std::cmp::Ordering::Less, 527 | 528 | (_, Self::Sum { .. } | Self::Carry { .. }) => other.cmp(self).reverse(), 529 | 530 | (Self::Xor { a: a1, b: b1 }, Self::Xor { a: a2, b: b2 }) => 531 | a1.cmp(a2).then_with(|| b1.cmp(b2)), 532 | 533 | (Self::Xor { a: a1, b: b1 }, Self::Xnor { a: a2, b: b2 }) => 534 | a1.cmp(a2).then_with(|| b1.cmp(b2)).then(std::cmp::Ordering::Less), 535 | 536 | (Self::Xnor { a: a1, b: b1 }, Self::Xnor { a: a2, b: b2 }) => 537 | a1.cmp(a2).then_with(|| b1.cmp(b2)), 538 | 539 | (Self::Xnor { a: a1, b: b1 }, Self::Xor { a: a2, b: b2 }) => 540 | a1.cmp(a2).then_with(|| b1.cmp(b2)).then(std::cmp::Ordering::Greater), 541 | } 542 | } 543 | } 544 | 545 | impl InputWire { 546 | fn delay(self) -> u64 { 547 | match self { 548 | Self::Index(_) => 0, 549 | Self::SignExtended(_) => 1, 550 | } 551 | } 552 | } 553 | 554 | fn parse_args(mut args: impl Iterator, argv0: &std::ffi::OsStr) -> (bool, bool, u8) { 555 | let mut fma = false; 556 | let mut mulh = false; 557 | let mut width = None; 558 | 559 | for opt in &mut args { 560 | match opt.to_str() { 561 | Some("--help") => { 562 | write_usage(std::io::stdout(), argv0); 563 | std::process::exit(0); 564 | }, 565 | 566 | Some("--fma") => fma = true, 567 | 568 | Some("--mulh") => mulh = true, 569 | 570 | Some(value) if width.is_none() => match value.parse() { 571 | Ok(value) => width = Some(value), 572 | Err(_) => write_usage_and_crash(argv0), 573 | }, 574 | 575 | _ => write_usage_and_crash(argv0), 576 | } 577 | } 578 | 579 | let None = args.next() else { write_usage_and_crash(argv0); }; 580 | 581 | let Some(width) = width else { write_usage_and_crash(argv0); }; 582 | (fma, mulh, width) 583 | } 584 | 585 | fn write_usage_and_crash(argv0: &std::ffi::OsStr) -> ! { 586 | write_usage(std::io::stderr(), argv0); 587 | std::process::exit(1); 588 | } 589 | 590 | fn write_usage(mut w: impl std::io::Write, argv0: &std::ffi::OsStr) { 591 | _ = writeln!(w, "Usage: {} [--fma] [--mulh] ", argv0.to_string_lossy()); 592 | } 593 | -------------------------------------------------------------------------------- /freestanding/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "freestanding" 3 | version = "0.1.0" 4 | license = "AGPL-3.0-only" 5 | authors = ["Arnav Singh "] 6 | edition = "2024" 7 | publish = false 8 | 9 | 10 | [dependencies] 11 | riscv = { path = ".." } 12 | 13 | 14 | [lints.rust] 15 | rust_2018_idioms = "deny" 16 | warnings = "deny" 17 | 18 | 19 | [workspace] 20 | 21 | 22 | [lints.clippy] 23 | all = { level = "deny", priority = -1 } 24 | pedantic = { level = "deny", priority = -1 } 25 | let_and_return = "allow" 26 | similar_names = "allow" 27 | 28 | 29 | [profile.dev] 30 | panic = "abort" 31 | 32 | 33 | [profile.release] 34 | lto = true 35 | opt-level = "s" 36 | panic = "abort" 37 | strip = "symbols" 38 | -------------------------------------------------------------------------------- /freestanding/riscv64-arnavion-none-elf.json: -------------------------------------------------------------------------------- 1 | { 2 | "arch": "riscv64", 3 | "atomic-cas": false, 4 | "code-model": "medium", 5 | "cpu": "generic-rv64", 6 | "crt-objects-fallback": "false", 7 | "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128", 8 | "eh-frame-header": false, 9 | "emit-debug-gdb-scripts": false, 10 | "features": "+auipc-addi-fusion,+forced-atomics,+ld-add-fusion,+lui-addi-fusion,+zba,+zbb,+zbs,+zca,+zcb,+zic64b,+zicntr,+zicond,+zicsr,+zkt,+zmmul", 11 | "linker": "rust-lld", 12 | "linker-flavor": "gnu-lld", 13 | "llvm-abiname": "lp64", 14 | "llvm-target": "riscv64", 15 | "max-atomic-width": 64, 16 | "metadata": { 17 | "description": "Arnavion's RISC-V Emulator" 18 | }, 19 | "panic-strategy": "abort", 20 | "target-pointer-width": "64", 21 | "vendor": "arnavion" 22 | } 23 | -------------------------------------------------------------------------------- /freestanding/riscv64-arnavion-none-elf.ld: -------------------------------------------------------------------------------- 1 | ENTRY(_start) 2 | 3 | SECTIONS { 4 | .text 0x8000000000000000 : { 5 | *(.text.boot) 6 | } 7 | 8 | .rodata : { 9 | *(.rodata .rodata.* .got .data .data.*) 10 | } 11 | 12 | _STACK_PTR = 0x400000; 13 | _TIMER_TICK_NS = 1; 14 | _CONSOLE_PTR = 0x0000000000400000; 15 | _CONSOLE_END_PTR = _CONSOLE_PTR + 0x0000000000002000; 16 | _IN_FILE_PTR = 0xffffffffffe00000; 17 | _IN_FILE_END_PTR = _IN_FILE_PTR + 0xfffff; 18 | } 19 | -------------------------------------------------------------------------------- /freestanding/src/main.rs: -------------------------------------------------------------------------------- 1 | #![cfg(target_arch = "riscv64")] 2 | 3 | #![feature( 4 | maybe_uninit_write_slice, 5 | )] 6 | 7 | #![no_main] 8 | #![no_std] 9 | 10 | use core::fmt::Write; 11 | 12 | core::arch::global_asm!(" 13 | .global _start 14 | .extern _STACK_PTR 15 | 16 | .section .text.boot 17 | 18 | _start: 19 | lga sp, _STACK_PTR 20 | j {main} 21 | ", main = sym main); 22 | 23 | fn main() { 24 | { 25 | let timer = HardwareTimer::new(); 26 | 27 | let mut console = Console::new(); 28 | 29 | let result = main_inner(&mut console); 30 | _ = writeln!(console, "{result:?}"); 31 | 32 | let (cycles, time, instret) = timer.since(); 33 | let (time_s, time_ms) = (time.as_secs(), time.subsec_millis()); 34 | #[allow(clippy::cast_possible_truncation)] 35 | let frequency = (cycles * 1_000_000_000) / (time.as_nanos() as u64); 36 | _ = writeln!(console, "executed {instret} instructions in {cycles} cycles, {time_s}.{time_ms:03} s, {frequency} Hz"); 37 | } 38 | 39 | halt(); 40 | } 41 | 42 | fn main_inner(console: &mut Console<'_>) -> Result<(), ()> { 43 | let program = { 44 | unsafe extern "C" { 45 | safe static mut _IN_FILE_PTR: u8; 46 | safe static mut _IN_FILE_END_PTR: u8; 47 | } 48 | 49 | let in_file_ptr = &raw const _IN_FILE_PTR; 50 | let in_file_end_ptr = &raw const _IN_FILE_END_PTR; 51 | let in_file_max_len = unsafe { in_file_end_ptr.byte_offset_from_unsigned(in_file_ptr) }; 52 | 53 | let mut in_file = unsafe { core::slice::from_raw_parts(in_file_ptr, in_file_max_len) }; 54 | 55 | core::iter::from_fn(move || { 56 | if in_file.is_empty() { 57 | return None; 58 | } 59 | 60 | let (line, rest) = split_line(in_file); 61 | in_file = rest; 62 | Some(line) 63 | }) 64 | }; 65 | 66 | let supported_extensions = riscv::SupportedExtensions::RV64C_ZCB | riscv::SupportedExtensions::ZBA | riscv::SupportedExtensions::ZBB; 67 | 68 | let mut pc = 0_u64; 69 | 70 | for instruction in riscv::parse_program(program, supported_extensions) { 71 | let instruction = 72 | instruction 73 | .map_err(|err| { _ = writeln!(console, "{err}"); })?; 74 | let (lo, hi) = 75 | instruction.encode(supported_extensions) 76 | .map_err(|err| { _ = writeln!(console, "{err}"); })?; 77 | 78 | if let Some(hi) = hi { 79 | _ = writeln!(console, "0x{hi:04x}{lo:04x} ; {pc:3}: {instruction}"); 80 | 81 | pc += 4; 82 | } 83 | else { 84 | _ = writeln!(console, "0x{lo:04x} ; {pc:3}: {instruction}"); 85 | 86 | pc += 2; 87 | } 88 | } 89 | 90 | Ok(()) 91 | } 92 | 93 | #[derive(Clone, Copy)] 94 | struct HardwareTimer { 95 | tick_ns: u64, 96 | cycles: u64, 97 | time: u64, 98 | instret: u64, 99 | } 100 | 101 | impl HardwareTimer { 102 | fn new() -> Self { 103 | unsafe extern "C" { 104 | safe static _TIMER_TICK_NS: core::ffi::c_void; 105 | } 106 | 107 | let (cycles, time, instret) = Self::read(); 108 | 109 | let tick_ns: u64 = (&raw const _TIMER_TICK_NS).addr() as _; 110 | 111 | Self { 112 | tick_ns, 113 | cycles, 114 | time, 115 | instret, 116 | } 117 | } 118 | 119 | fn since(self) -> (u64, core::time::Duration, u64) { 120 | let (new_cycles, new_time, new_instret) = Self::read(); 121 | let time_ns = (new_time - self.time) * self.tick_ns; 122 | let time = core::time::Duration::from_nanos(time_ns); 123 | (new_cycles - self.cycles, time, new_instret - self.instret) 124 | } 125 | 126 | fn read() -> (u64, u64, u64) { 127 | let cycles: u64; 128 | let time: u64; 129 | let instret: u64; 130 | unsafe { 131 | core::arch::asm!( 132 | "rdcycle {cycles}", 133 | "rdtime {time}", 134 | "rdinstret {instret}", 135 | cycles = lateout(reg) cycles, 136 | time = lateout(reg) time, 137 | instret = lateout(reg) instret, 138 | options(nomem, nostack), 139 | ); 140 | } 141 | (cycles, time, instret) 142 | } 143 | } 144 | 145 | struct Console<'a> { 146 | region: &'a mut [core::mem::MaybeUninit], 147 | col: usize, 148 | } 149 | 150 | impl Console<'static> { 151 | fn new() -> Self { 152 | unsafe extern "C" { 153 | safe static mut _CONSOLE_PTR: core::mem::MaybeUninit; 154 | safe static mut _CONSOLE_END_PTR: core::mem::MaybeUninit; 155 | } 156 | 157 | let console_ptr: *mut core::mem::MaybeUninit = &raw mut _CONSOLE_PTR; 158 | let console_end_ptr: *mut core::mem::MaybeUninit = &raw mut _CONSOLE_END_PTR; 159 | let console_len: usize = unsafe { console_end_ptr.byte_offset_from_unsigned(console_ptr) }; 160 | 161 | Console { 162 | region: unsafe { core::slice::from_raw_parts_mut(console_ptr, console_len) }, 163 | col: 0, 164 | } 165 | } 166 | } 167 | 168 | impl Drop for Console<'_> { 169 | fn drop(&mut self) { 170 | unsafe { 171 | core::arch::asm!( 172 | "fence", 173 | options(nostack), 174 | ); 175 | } 176 | } 177 | } 178 | 179 | impl Write for Console<'_> { 180 | fn write_str(&mut self, s: &str) -> core::fmt::Result { 181 | for line in s.split_inclusive('\n') { 182 | let (line, nl) = match line.rsplit_once('\n') { 183 | Some((line, _)) => (line, true), 184 | None => (line, false), 185 | }; 186 | let (this_region, rest_region) = core::mem::take(&mut self.region).split_at_mut_checked(line.len()).ok_or(core::fmt::Error)?; 187 | this_region.write_copy_of_slice(line.as_bytes()); 188 | self.region = rest_region; 189 | self.col += line.len(); 190 | 191 | if nl { 192 | let num_spaces = 80 - (self.col % 80); 193 | let (this_region, rest_region) = core::mem::take(&mut self.region).split_at_mut_checked(num_spaces).ok_or(core::fmt::Error)?; 194 | if let Some(cursor) = this_region.first_mut() { 195 | cursor.write(b'\0'); 196 | } 197 | self.region = rest_region; 198 | self.col = 0; 199 | } 200 | } 201 | 202 | if let Some((first, _)) = self.region.split_first_mut() { 203 | first.write(b'_'); 204 | } 205 | 206 | Ok(()) 207 | } 208 | } 209 | 210 | #[panic_handler] 211 | fn panic(panic: &core::panic::PanicInfo<'_>) -> ! { 212 | { 213 | let mut console = Console::new(); 214 | _ = writeln!(console, "panic: {}", panic.message()); 215 | } 216 | 217 | halt(); 218 | } 219 | 220 | fn halt() -> ! { 221 | loop { 222 | unsafe { 223 | core::arch::asm!( 224 | "ebreak", 225 | options(nomem, nostack), 226 | ); 227 | } 228 | } 229 | } 230 | 231 | /// Returns `(line, rest)`, where `line` ends at either a `b'\0'` or a `b'\n'` or 232 | /// reached the end of the given slice. 233 | /// 234 | /// If `rest` is empty then `line` ended at a `b'\0'` or reached the end of the given slice, 235 | /// and is thus the last line. 236 | mod split_line { 237 | /// This is a SWAR implementation based on the Zbb extension. 238 | #[cfg(target_feature = "zbb")] 239 | pub(super) fn split_line(s: &[u8]) -> (&[u8], &[u8]) { 240 | const C1: usize = usize::from_ne_bytes([0x0a; core::mem::size_of::()]); 241 | 242 | fn expand_slice_to_usize(s: &[u8]) -> usize { 243 | unsafe { core::hint::assert_unchecked(s.len() < core::mem::size_of::()); } 244 | 245 | // `s` is guaranteed to be contained within an aligned usize-sized chunk. 246 | // We can read that chunk, then shift it so that the bytes of `s` are the first, 247 | // then set the excess bytes to 0xff. This result will then only contain a `b'\0'` or `b'\n'` 248 | // at the index that `s` contains a `b'\0'` or `b'\n'`. 249 | // 250 | // We can't dereference the chunk pointer in Rust code because there is no guarantee that 251 | // all `size_of::()` bytes are in the same allocation as `s`, 252 | // so dereferencing the pointer would be UB. miri confirms this. 253 | // However it *is* legal to read that usize using an inline assembly load instruction. 254 | // miri cannot introspect this to prove it, but this is also what the SWAR impl of `strlen` 255 | // in compiler_builtins does, with the same justification. 256 | 257 | let s_ptr = s.as_ptr().addr(); 258 | let s_aligned_start_ptr = (s.as_ptr().addr() / core::mem::size_of::()) * core::mem::size_of::(); 259 | 260 | let chunk: usize; 261 | unsafe { 262 | core::arch::asm!( 263 | "ld {chunk}, ({s_aligned_start_ptr})", 264 | s_aligned_start_ptr = in(reg) s_aligned_start_ptr, 265 | chunk = lateout(reg) chunk, 266 | options(nostack, pure, readonly), 267 | ); 268 | } 269 | 270 | #[cfg(target_endian = "little")] 271 | { 272 | let num_trailing_garbage_bits = (s_ptr % core::mem::size_of::()) * 8; 273 | let chunk = chunk >> num_trailing_garbage_bits; 274 | 275 | let num_valid_bits = s.len() * 8; 276 | let chunk = chunk | (usize::MAX << num_valid_bits); 277 | 278 | chunk 279 | } 280 | #[cfg(target_endian = "big")] 281 | { 282 | let num_leading_garbage_bits = (s_ptr % core::mem::size_of::()) * 8; 283 | let chunk = chunk << num_leading_garbage_bits; 284 | 285 | let num_valid_bits = s.len() * 8; 286 | let chunk = chunk | (usize::MAX >> num_valid_bits); 287 | 288 | chunk 289 | } 290 | } 291 | 292 | // `chunk` must have been formed by interpreting the underlying bytes in native-endian order. 293 | fn index_of_zero(chunk: usize) -> Option { 294 | let result: usize; 295 | unsafe { 296 | core::arch::asm!( 297 | "orc.b {result}, {chunk}", 298 | chunk = in(reg) chunk, 299 | result = lateout(reg) result, 300 | options(nomem, nostack, pure), 301 | ); 302 | } 303 | if result == usize::MAX { 304 | None 305 | } 306 | else { 307 | #[cfg(target_endian = "little")] 308 | let i = usize::try_from(result.trailing_ones() / 8).expect("u32 -> usize"); 309 | #[cfg(target_endian = "big")] 310 | let i = usize::try_from(result.leading_ones() / 8).expect("u32 -> usize"); 311 | 312 | Some(i) 313 | } 314 | } 315 | 316 | // Note: `s_aligned` elements will have been interpreted from the underlying bytes in native-endian order. 317 | let (s_head, s_aligned, s_tail) = unsafe { s.align_to::() }; 318 | 319 | { 320 | let chunk = expand_slice_to_usize(s_head); 321 | 322 | if let Some(i) = index_of_zero(chunk ^ C1) { 323 | return (unsafe { s.get_unchecked(..i) }, unsafe { s.get_unchecked(i + 1..) }); 324 | } 325 | 326 | if let Some(i) = index_of_zero(chunk) { 327 | return (unsafe { s.get_unchecked(..i) }, b""); 328 | } 329 | } 330 | 331 | let mut line_end = s_head.len(); 332 | for &chunk in s_aligned { 333 | if let Some(i) = index_of_zero(chunk ^ C1) { 334 | let i = line_end + i; 335 | return (unsafe { s.get_unchecked(..i) }, unsafe { s.get_unchecked(i + 1..) }); 336 | } 337 | 338 | if let Some(i) = index_of_zero(chunk) { 339 | let i = line_end + i; 340 | return (unsafe { s.get_unchecked(..i) }, b""); 341 | } 342 | 343 | line_end += core::mem::size_of::(); 344 | } 345 | 346 | { 347 | let chunk = expand_slice_to_usize(s_tail); 348 | 349 | if let Some(i) = index_of_zero(chunk ^ C1) { 350 | let i = line_end + i; 351 | return (unsafe { s.get_unchecked(..i) }, unsafe { s.get_unchecked(i + 1..) }); 352 | } 353 | 354 | if let Some(i) = index_of_zero(chunk) { 355 | let i = line_end + i; 356 | return (unsafe { s.get_unchecked(..i) }, b""); 357 | } 358 | } 359 | 360 | (s, b"") 361 | } 362 | 363 | /// This is a SWAR implementation used when the Zbb extension is not present. 364 | /// 365 | /// Ref: 366 | #[cfg(not(target_feature = "zbb"))] 367 | pub(super) fn split_line(s: &[u8]) -> (&[u8], &[u8]) { 368 | const C1: usize = usize::from_ne_bytes([0x0a; core::mem::size_of::()]); 369 | const C2: usize = usize::from_ne_bytes([0x01; core::mem::size_of::()]); 370 | const C3: usize = usize::from_ne_bytes([0x80; core::mem::size_of::()]); 371 | 372 | let (s_head, s_aligned, s_tail) = unsafe { s.align_to::() }; 373 | 374 | for (i, &b) in s_head.iter().enumerate() { 375 | if b == b'\n' { 376 | return (unsafe { s.get_unchecked(..i) }, unsafe { s.get_unchecked(i + 1..) }); 377 | } 378 | if b == b'\0' { 379 | return (unsafe { s.get_unchecked(..i) }, b""); 380 | } 381 | } 382 | 383 | let mut line_end = s_head.len(); 384 | for &chunk in s_aligned { 385 | { 386 | let chunk = chunk ^ C1; 387 | if chunk.wrapping_sub(C2) & !chunk & C3 != 0 { 388 | let i = chunk.to_ne_bytes().into_iter().position(|b| b == b'\0'); 389 | let i = unsafe { i .unwrap_unchecked() }; 390 | let i = line_end + i; 391 | return (unsafe { s.get_unchecked(..i) }, unsafe { s.get_unchecked(i + 1..) }); 392 | } 393 | } 394 | 395 | if chunk.wrapping_sub(C2) & !chunk & C3 != 0 { 396 | let i = chunk.to_ne_bytes().into_iter().position(|b| b == b'\0'); 397 | let i = unsafe { i .unwrap_unchecked() }; 398 | let i = line_end + i; 399 | return (unsafe { s.get_unchecked(..i) }, b""); 400 | } 401 | 402 | line_end += core::mem::size_of::(); 403 | } 404 | 405 | for (i, &b) in s_tail.iter().enumerate() { 406 | let i = line_end + i; 407 | if b == b'\n' { 408 | return (unsafe { s.get_unchecked(..i) }, unsafe { s.get_unchecked(i + 1..) }); 409 | } 410 | if b == b'\0' { 411 | return (unsafe { s.get_unchecked(..i) }, b""); 412 | } 413 | } 414 | 415 | (s, b"") 416 | } 417 | } 418 | use split_line::split_line; 419 | -------------------------------------------------------------------------------- /simulator/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "simulator" 3 | version = "0.1.0" 4 | license = "AGPL-3.0-only" 5 | authors = ["Arnav Singh "] 6 | edition = "2024" 7 | publish = false 8 | 9 | 10 | [dependencies] 11 | awint = { workspace = true } 12 | 13 | 14 | [lints] 15 | workspace = true 16 | -------------------------------------------------------------------------------- /simulator/src/csrs.rs: -------------------------------------------------------------------------------- 1 | use crate::{RegisterValue, Tag}; 2 | 3 | #[derive(Debug)] 4 | pub(crate) struct Csrs { 5 | cycle: i64, 6 | instret: i64, 7 | time: i64, 8 | } 9 | 10 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 11 | pub(crate) enum Csr { 12 | Cycle, 13 | Instret, 14 | Time, 15 | } 16 | 17 | impl Csrs { 18 | pub(crate) fn load(&self, csr: Csr) -> RegisterValue { 19 | match csr { 20 | Csr::Cycle => RegisterValue::Value(self.cycle), 21 | Csr::Instret => RegisterValue::Value(self.instret), 22 | Csr::Time => RegisterValue::Value(self.time), 23 | } 24 | } 25 | 26 | #[allow(clippy::unused_self)] 27 | pub(crate) fn rename(&mut self, csr: Csr, _tag: Tag) -> bool { 28 | #[allow(clippy::match_same_arms)] 29 | match csr { 30 | Csr::Cycle => false, 31 | Csr::Instret => false, 32 | Csr::Time => false, 33 | } 34 | } 35 | 36 | #[allow(clippy::unused_self)] 37 | pub(crate) fn store(&mut self, csr: Csr, _tag: Tag, _value: i64) { 38 | match csr { 39 | Csr::Cycle => panic!("cycle CSR is read-only"), 40 | Csr::Instret => panic!("instret CSR is read-only"), 41 | Csr::Time => panic!("time CSR is read-only"), 42 | } 43 | } 44 | 45 | #[allow(clippy::unused_self)] 46 | pub(crate) fn reset_all_tags( 47 | &mut self, 48 | _tags: impl IntoIterator)>, 49 | ) {} 50 | 51 | pub(crate) fn cycle(&self) -> i64 { 52 | self.cycle 53 | } 54 | 55 | pub(crate) fn tick(&mut self, cycles: i64, instret: i64) { 56 | self.cycle += cycles; 57 | self.instret += instret; 58 | self.time = nanos_since_unix_epoch(); 59 | } 60 | } 61 | 62 | impl Default for Csrs { 63 | fn default() -> Self { 64 | Self { 65 | cycle: 0, 66 | instret: 0, 67 | time: nanos_since_unix_epoch(), 68 | } 69 | } 70 | } 71 | 72 | impl std::fmt::Display for Csrs { 73 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 74 | writeln!(f, "cycle: 0x{:016x}", self.cycle)?; 75 | writeln!(f, "time: 0x{:016x}", self.time)?; 76 | writeln!(f, "instret: 0x{:016x}", self.instret)?; 77 | Ok(()) 78 | } 79 | } 80 | 81 | impl TryFrom for Csr { 82 | type Error = (); 83 | 84 | fn try_from(raw: u32) -> Result { 85 | Ok(match raw { 86 | 0xc00 => Self::Cycle, 87 | 0xc01 => Self::Time, 88 | 0xc02 => Self::Instret, 89 | _ => return Err(()), 90 | }) 91 | } 92 | } 93 | 94 | fn nanos_since_unix_epoch() -> i64 { 95 | std::time::SystemTime::now() 96 | .duration_since(std::time::SystemTime::UNIX_EPOCH) 97 | .unwrap() 98 | .as_nanos() 99 | .try_into().unwrap() 100 | } 101 | -------------------------------------------------------------------------------- /simulator/src/in_order.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | csrs::Csrs, 3 | instruction::{ 4 | Instruction, 5 | OpOp, OpImmOp, Op32Op, OpImm32Op, 6 | MemoryBase, MemoryOffset, 7 | }, 8 | memory::Memory, 9 | tag::EMPTY_TAG, 10 | x_regs::{XReg, XRegs}, 11 | LogLevel, 12 | Statistics, 13 | load_inst, 14 | }; 15 | 16 | pub(crate) fn run( 17 | memory: &mut Memory, 18 | x_regs: &mut XRegs, 19 | csrs: &mut Csrs, 20 | statistics: &mut Statistics, 21 | mut pc: i64, 22 | log_level: LogLevel, 23 | ) { 24 | loop { 25 | { 26 | let tick = csrs.cycle(); 27 | if log_level == LogLevel::Debug { 28 | if tick % 100 == 0 { 29 | eprintln!(); 30 | eprintln!("===== {tick} ====="); 31 | eprintln!("0x{pc:016x}"); 32 | } 33 | } 34 | else if log_level >= LogLevel::Trace { 35 | eprintln!(); 36 | eprintln!("===== {tick} ====="); 37 | eprintln!("{x_regs}"); 38 | eprintln!("{csrs}"); 39 | } 40 | } 41 | 42 | let (inst, inst_len, instret) = match load_inst(memory, pc, statistics) { 43 | Ok(inst) => inst, 44 | Err(inst) => panic!("SIGILL: 0x{inst:08x}"), 45 | }; 46 | 47 | if log_level >= LogLevel::Trace { 48 | eprintln!("+ 0x{pc:016x} : {inst:?}"); 49 | } 50 | 51 | if let Instruction::Ebreak = inst { 52 | break; 53 | } 54 | 55 | let mut next_pc = pc.wrapping_add(inst_len); 56 | 57 | execute(inst, pc, &mut next_pc, x_regs, csrs, memory); 58 | 59 | pc = next_pc; 60 | 61 | let cycles: u8 = match inst { 62 | Instruction::Op { op: OpOp::Mul | OpOp::Mulh | OpOp::Mulhsu | OpOp::Mulhu, .. } 63 | => 34, 64 | 65 | Instruction::Op32 { op: Op32Op::Mulw, .. } 66 | => 17, 67 | 68 | Instruction::OpImm { op: OpImmOp::Clz | OpImmOp::Ctz | OpImmOp::Cpop, .. } | 69 | Instruction::OpImm32 { op: OpImm32Op::Clzw | OpImm32Op::Ctzw | OpImm32Op::Cpopw, .. } 70 | => 3, 71 | 72 | _ 73 | => 1, 74 | }; 75 | 76 | statistics.num_ticks_where_instructions_retired += 1; 77 | statistics.num_ticks_where_instructions_not_retired += usize::from(cycles) - 1; 78 | 79 | csrs.tick(cycles.into(), instret); 80 | 81 | if log_level >= LogLevel::Trace { 82 | eprintln!("->"); 83 | eprintln!("{x_regs}"); 84 | eprintln!("{csrs}"); 85 | } 86 | } 87 | } 88 | 89 | fn execute( 90 | inst: Instruction, 91 | pc: i64, 92 | next_pc: &mut i64, 93 | x_regs: &mut XRegs, 94 | csrs: &mut Csrs, 95 | memory: &mut Memory, 96 | ) { 97 | match inst { 98 | Instruction::Abs { rd, rs } => { 99 | let arg = x_regs.load(rs).in_order(); 100 | x_regs.store(rd, EMPTY_TAG, arg.unsigned_abs().cast_signed()); 101 | }, 102 | 103 | Instruction::Auipc { rd, imm } => { 104 | x_regs.store(rd, EMPTY_TAG, pc.wrapping_add(imm)); 105 | }, 106 | 107 | Instruction::Branch { op, rs1, rs2, imm } => { 108 | let arg1 = x_regs.load(rs1).in_order(); 109 | let arg2 = x_regs.load(rs2).in_order(); 110 | if op.exec(arg1, arg2) { 111 | *next_pc = pc.wrapping_add(imm); 112 | } 113 | }, 114 | 115 | Instruction::Csrrw { rd, rs1, csr } => 116 | if rd == XReg::X0 { 117 | let new = x_regs.load(rs1).in_order(); 118 | csrs.store(csr, EMPTY_TAG, new); 119 | } 120 | else { 121 | let previous = csrs.load(csr).in_order(); 122 | let new = x_regs.load(rs1).in_order(); 123 | x_regs.store(rd, EMPTY_TAG, previous); 124 | csrs.store(csr, EMPTY_TAG, new); 125 | }, 126 | 127 | Instruction::Csrrwi { rd, imm, csr } => 128 | if rd == XReg::X0 { 129 | csrs.store(csr, EMPTY_TAG, imm); 130 | } 131 | else { 132 | let previous = csrs.load(csr).in_order(); 133 | x_regs.store(rd, EMPTY_TAG, previous); 134 | csrs.store(csr, EMPTY_TAG, imm); 135 | }, 136 | 137 | Instruction::Csrrs { rd, rs1, csr } => 138 | if rs1 == XReg::X0 { 139 | let previous = csrs.load(csr).in_order(); 140 | x_regs.store(rd, EMPTY_TAG, previous); 141 | } 142 | else { 143 | let previous = csrs.load(csr).in_order(); 144 | let new = previous | x_regs.load(rs1).in_order(); 145 | x_regs.store(rd, EMPTY_TAG, previous); 146 | csrs.store(csr, EMPTY_TAG, new); 147 | }, 148 | 149 | Instruction::Csrrsi { rd, imm, csr } => 150 | if imm == 0 { 151 | let previous = csrs.load(csr).in_order(); 152 | x_regs.store(rd, EMPTY_TAG, previous); 153 | } 154 | else { 155 | let previous = csrs.load(csr).in_order(); 156 | let new = previous | imm; 157 | x_regs.store(rd, EMPTY_TAG, previous); 158 | csrs.store(csr, EMPTY_TAG, new); 159 | }, 160 | 161 | Instruction::Csrrc { rd, rs1, csr } => 162 | if rs1 == XReg::X0 { 163 | let previous = csrs.load(csr).in_order(); 164 | x_regs.store(rd, EMPTY_TAG, previous); 165 | } 166 | else { 167 | let previous = csrs.load(csr).in_order(); 168 | let new = previous & !x_regs.load(rs1).in_order(); 169 | x_regs.store(rd, EMPTY_TAG, previous); 170 | csrs.store(csr, EMPTY_TAG, new); 171 | }, 172 | 173 | Instruction::Csrrci { rd, imm, csr } => 174 | if imm == 0 { 175 | let previous = csrs.load(csr).in_order(); 176 | x_regs.store(rd, EMPTY_TAG, previous); 177 | } 178 | else { 179 | let previous = csrs.load(csr).in_order(); 180 | let new = previous & !imm; 181 | x_regs.store(rd, EMPTY_TAG, previous); 182 | csrs.store(csr, EMPTY_TAG, new); 183 | }, 184 | 185 | Instruction::Ebreak => panic!("EBREAK"), 186 | 187 | Instruction::Fence => (), 188 | 189 | Instruction::Jal { rd, imm } => { 190 | x_regs.store(rd, EMPTY_TAG, *next_pc); 191 | *next_pc = pc.wrapping_add(imm); 192 | }, 193 | 194 | Instruction::Jalr { rd, rs1, imm } => { 195 | let arg1 = x_regs.load(rs1).in_order(); 196 | x_regs.store(rd, EMPTY_TAG, *next_pc); 197 | *next_pc = arg1.wrapping_add(imm) & 0xffff_ffff_ffff_fffe_u64.cast_signed(); 198 | }, 199 | 200 | Instruction::Load { op, rd, base, offset } => { 201 | let base = match base { 202 | MemoryBase::XReg(rs1) => x_regs.load(rs1).in_order(), 203 | MemoryBase::XRegSh1(rs1) => x_regs.load(rs1).in_order() << 1, 204 | MemoryBase::XRegSh2(rs1) => x_regs.load(rs1).in_order() << 2, 205 | MemoryBase::XRegSh3(rs1) => x_regs.load(rs1).in_order() << 3, 206 | MemoryBase::Pc => pc, 207 | }; 208 | let offset = match offset { 209 | MemoryOffset::Imm(imm) => imm, 210 | MemoryOffset::XReg(rs2) => x_regs.load(rs2).in_order(), 211 | }; 212 | let address = base.wrapping_add(offset); 213 | let value = op.exec(memory, address); 214 | x_regs.store(rd, EMPTY_TAG, value); 215 | }, 216 | 217 | Instruction::Lui { rd, imm } => { 218 | x_regs.store(rd, EMPTY_TAG, imm); 219 | }, 220 | 221 | Instruction::Op { op, rd, rs1, rs2 } => { 222 | let arg1 = x_regs.load(rs1).in_order(); 223 | let arg2 = x_regs.load(rs2).in_order(); 224 | let value = match op { 225 | OpOp::Add => arg1.wrapping_add(arg2), 226 | OpOp::And => arg1 & arg2, 227 | OpOp::Andn => arg1 & !arg2, 228 | OpOp::Bclr => arg1 & !(1 << (arg2 & 0x3f)), 229 | OpOp::Bext => (arg1 >> (arg2 & 0x3f)) & 0x1, 230 | OpOp::Binv => arg1 ^ (1 << (arg2 & 0x3f)), 231 | OpOp::Bset => arg1 | (1 << (arg2 & 0x3f)), 232 | OpOp::CzeroEqz => if arg2 == 0 { 0 } else { arg1 }, 233 | OpOp::CzeroNez => if arg2 == 0 { arg1 } else { 0 }, 234 | OpOp::Max => arg1.max(arg2), 235 | OpOp::Maxu => arg1.cast_unsigned().max(arg2.cast_unsigned()).cast_signed(), 236 | OpOp::Min => arg1.min(arg2), 237 | OpOp::Minu => arg1.cast_unsigned().min(arg2.cast_unsigned()).cast_signed(), 238 | OpOp::Mul => arg1.wrapping_mul(arg2), 239 | OpOp::Mulh => ((i128::from(arg1) * i128::from(arg2)) >> 64).try_into().unwrap(), 240 | OpOp::Mulhsu => ((i128::from(arg1) * i128::from(arg2.cast_unsigned())) >> 64).try_into().unwrap(), 241 | OpOp::Mulhu => ((u128::from(arg1.cast_unsigned()) * u128::from(arg2.cast_unsigned())).cast_signed() >> 64).try_into().unwrap(), 242 | OpOp::Or => arg1 | arg2, 243 | OpOp::Orn => arg1 | !arg2, 244 | OpOp::Rol => arg1.rotate_left((arg2 & 0x3f).try_into().unwrap()), 245 | OpOp::Ror => arg1.rotate_right((arg2 & 0x3f).try_into().unwrap()), 246 | OpOp::Sh1add => (arg1 << 1).wrapping_add(arg2), 247 | OpOp::Sh2add => (arg1 << 2).wrapping_add(arg2), 248 | OpOp::Sh3add => (arg1 << 3).wrapping_add(arg2), 249 | OpOp::Sll => arg1 << (arg2 & 0x3f), 250 | OpOp::Slt => (arg1 < arg2).into(), 251 | OpOp::Sltu => (arg1.cast_unsigned() < arg2.cast_unsigned()).into(), 252 | OpOp::Sra => arg1 >> (arg2 & 0x3f), 253 | OpOp::Srl => (arg1.cast_unsigned() >> (arg2 & 0x3f)).cast_signed(), 254 | OpOp::Sub => arg1.wrapping_sub(arg2), 255 | OpOp::Xnor => arg1 ^ !arg2, 256 | OpOp::Xor => arg1 ^ arg2, 257 | }; 258 | x_regs.store(rd, EMPTY_TAG, value); 259 | }, 260 | 261 | Instruction::Op32 { op, rd, rs1, rs2 } => { 262 | let arg1 = x_regs.load(rs1).in_order(); 263 | let arg2 = x_regs.load(rs2).in_order(); 264 | 265 | #[allow(clippy::cast_possible_truncation)] 266 | let arg1w = arg1 as i32; 267 | let arg1uw = arg1w.cast_unsigned(); 268 | 269 | #[allow(clippy::cast_possible_truncation)] 270 | let arg2w = arg2 as i32; 271 | 272 | let value = match op { 273 | Op32Op::AddUw => i64::from(arg1uw).wrapping_add(arg2), 274 | Op32Op::Addw => arg1w.wrapping_add(arg2w).into(), 275 | Op32Op::Mulw => arg1w.wrapping_mul(arg2w).into(), 276 | Op32Op::Rolw => arg1w.rotate_left((arg2w & 0x1f).try_into().unwrap()).into(), 277 | Op32Op::Rorw => arg1w.rotate_right((arg2w & 0x1f).try_into().unwrap()).into(), 278 | Op32Op::Sh1addUw => (i64::from(arg1uw) << 1).wrapping_add(arg2), 279 | Op32Op::Sh2addUw => (i64::from(arg1uw) << 2).wrapping_add(arg2), 280 | Op32Op::Sh3addUw => (i64::from(arg1uw) << 3).wrapping_add(arg2), 281 | Op32Op::Sllw => (arg1w << (arg2w & 0x1f)).into(), 282 | Op32Op::Sraw => (arg1w >> (arg2w & 0x1f)).into(), 283 | Op32Op::Srlw => (arg1uw >> (arg2w & 0x1f)).into(), 284 | Op32Op::Subw => arg1w.wrapping_sub(arg2w).into(), 285 | #[allow(clippy::cast_possible_truncation)] 286 | Op32Op::ZextH => i64::from(arg1.cast_unsigned() as u16), 287 | }; 288 | x_regs.store(rd, EMPTY_TAG, value); 289 | }, 290 | 291 | Instruction::OpImm { op, rd, rs1, imm } => { 292 | let arg1 = x_regs.load(rs1).in_order(); 293 | let value = match op { 294 | OpImmOp::Addi => arg1.wrapping_add(imm), 295 | OpImmOp::Andi => arg1 & imm, 296 | OpImmOp::Bclri => arg1 & !(1 << (imm & 0x3f)), 297 | OpImmOp::Bexti => (arg1 >> (imm & 0x3f)) & 0x1, 298 | OpImmOp::Binvi => arg1 ^ (1 << (imm & 0x3f)), 299 | OpImmOp::Bseti => arg1 | (1 << (imm & 0x3f)), 300 | OpImmOp::Clz => arg1.leading_zeros().into(), 301 | OpImmOp::Cpop => arg1.count_ones().into(), 302 | OpImmOp::Ctz => arg1.trailing_zeros().into(), 303 | OpImmOp::OrcB => i64::from_ne_bytes(arg1.to_ne_bytes().map(|b| if b == 0 { 0x00 } else { 0xff })), 304 | OpImmOp::Ori => arg1 | imm, 305 | OpImmOp::Rev8 => i64::from_be_bytes(arg1.to_le_bytes()), 306 | OpImmOp::Rori => arg1.rotate_right((imm & 0x3f).try_into().unwrap()), 307 | #[allow(clippy::cast_possible_truncation)] 308 | OpImmOp::SextB => (arg1 as i8).into(), 309 | #[allow(clippy::cast_possible_truncation)] 310 | OpImmOp::SextH => (arg1 as i16).into(), 311 | OpImmOp::Slli => arg1 << (imm & 0x3f), 312 | OpImmOp::Slti => (arg1 < imm).into(), 313 | OpImmOp::Sltiu => (arg1.cast_unsigned() < imm.cast_unsigned()).into(), 314 | OpImmOp::Srai => arg1 >> (imm & 0x3f), 315 | OpImmOp::Srli => (arg1.cast_unsigned() >> (imm & 0x3f)).cast_signed(), 316 | OpImmOp::Xori => arg1 ^ imm, 317 | }; 318 | x_regs.store(rd, EMPTY_TAG, value); 319 | }, 320 | 321 | Instruction::OpImm32 { op, rd, rs1, imm } => { 322 | #[allow(clippy::cast_possible_truncation)] 323 | let arg1 = x_regs.load(rs1).in_order() as i32; 324 | #[allow(clippy::cast_possible_truncation)] 325 | let imm = imm as i32; 326 | let value = match op { 327 | OpImm32Op::Addiw => arg1.wrapping_add(imm).into(), 328 | OpImm32Op::Clzw => arg1.leading_zeros().into(), 329 | OpImm32Op::Cpopw => arg1.count_ones().into(), 330 | OpImm32Op::Ctzw => arg1.trailing_zeros().into(), 331 | OpImm32Op::Roriw => arg1.rotate_right((imm & 0x1f).try_into().unwrap()).into(), 332 | OpImm32Op::SlliUw => i64::from(arg1.cast_unsigned()) << (imm & 0x3f), 333 | OpImm32Op::Slliw => (arg1 << (imm & 0x1f)).into(), 334 | OpImm32Op::Sraiw => (arg1 >> (imm & 0x1f)).into(), 335 | OpImm32Op::Srliw => (arg1.cast_unsigned() >> (imm & 0x1f)).cast_signed().into(), 336 | }; 337 | x_regs.store(rd, EMPTY_TAG, value); 338 | }, 339 | 340 | Instruction::Store { op, rs1, rs2, imm } => { 341 | let address = x_regs.load(rs1).in_order().wrapping_add(imm); 342 | let value = x_regs.load(rs2).in_order(); 343 | op.exec(memory, address, value); 344 | }, 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /simulator/src/in_order_ucode.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | csrs::Csrs, 3 | instruction::Instruction, 4 | memory::Memory, 5 | multiplier::{self, State}, 6 | tag::TagAllocator, 7 | ucode::{Ucode, BinaryOp, MulOp, UnaryOp}, 8 | x_regs::XRegs, 9 | LogLevel, 10 | RegisterValue, 11 | Statistics, 12 | load_inst, 13 | }; 14 | 15 | pub(crate) fn run( 16 | memory: &mut Memory, 17 | x_regs: &mut XRegs, 18 | csrs: &mut Csrs, 19 | statistics: &mut Statistics, 20 | mut pc: i64, 21 | log_level: LogLevel, 22 | ) { 23 | loop { 24 | { 25 | let tick = csrs.cycle(); 26 | if log_level == LogLevel::Debug { 27 | if tick % 100 == 0 { 28 | eprintln!(); 29 | eprintln!("===== {tick} ====="); 30 | eprintln!("0x{pc:016x}"); 31 | } 32 | } 33 | else if log_level >= LogLevel::Trace { 34 | eprintln!(); 35 | eprintln!("===== {tick} ====="); 36 | eprintln!("{x_regs}"); 37 | eprintln!("{csrs}"); 38 | } 39 | } 40 | 41 | let (inst, inst_len, instret) = match load_inst(memory, pc, statistics) { 42 | Ok(inst) => inst, 43 | Err(inst) => panic!("SIGILL: 0x{inst:08x}"), 44 | }; 45 | 46 | if log_level >= LogLevel::Trace { 47 | eprintln!("+ 0x{pc:016x} : {inst:?}"); 48 | } 49 | 50 | let next_inst_pc = pc.wrapping_add(inst_len); 51 | let predicted_next_pc = match inst { 52 | // Constant 53 | Instruction::Jal { rd: _, imm } => pc.wrapping_add(imm), 54 | 55 | _ => pc.wrapping_add(inst_len), 56 | }; 57 | 58 | let mut cycles = 0; 59 | let ucode_1234 = Ucode::new( 60 | inst, 61 | pc, 62 | next_inst_pc, 63 | predicted_next_pc, 64 | x_regs, 65 | csrs, 66 | TagAllocator::default().allocate(), 67 | ); 68 | let mut ucodes = std::collections::VecDeque::with_capacity(4); 69 | if let Some((ucode_1, ucode_234)) = ucode_1234 { 70 | ucodes.push_back(ucode_1); 71 | if let Some((ucode_2, ucode_34)) = ucode_234 { 72 | ucodes.push_back(ucode_2); 73 | if let Some((ucode_3, ucode_4)) = ucode_34 { 74 | ucodes.push_back(ucode_3); 75 | if let Some(ucode_4) = ucode_4 { 76 | ucodes.push_back(ucode_4); 77 | } 78 | } 79 | } 80 | } 81 | 82 | pc = next_inst_pc; 83 | 84 | while let Some(mut ucode) = ucodes.pop_front() { 85 | if let Ucode::Ebreak = ucode { 86 | return; 87 | } 88 | 89 | cycles += execute(&mut ucode, memory); 90 | 91 | if let Some((rd, tag, value)) = ucode.done_rd() { 92 | x_regs.store(rd, tag, value); 93 | for ucode in &mut ucodes { 94 | ucode.update(tag, value); 95 | } 96 | } 97 | if let Some((csr, tag, value)) = ucode.done_csr() { 98 | csrs.store(csr, tag, value); 99 | for ucode in &mut ucodes { 100 | ucode.update(tag, value); 101 | } 102 | } 103 | 104 | if let Ucode::Jump { pc: RegisterValue::Value(next_pc), predicted_next_pc: _ } = ucode { 105 | pc = next_pc; 106 | } 107 | } 108 | 109 | statistics.num_ticks_where_instructions_retired += 1; 110 | statistics.num_ticks_where_instructions_not_retired += usize::from(cycles) - 1; 111 | 112 | csrs.tick(cycles.into(), instret); 113 | 114 | if log_level >= LogLevel::Trace { 115 | eprintln!("->"); 116 | eprintln!("{x_regs}"); 117 | eprintln!("{csrs}"); 118 | } 119 | } 120 | } 121 | 122 | fn execute(inst: &mut Ucode, memory: &mut Memory) -> u8 { 123 | #[allow(clippy::match_same_arms)] 124 | match inst { 125 | Ucode::BinaryOp { 126 | op, 127 | rd, 128 | rs1: RegisterValue::Value(arg1), 129 | rs2: RegisterValue::Value(arg2), 130 | } => { 131 | let arg1 = *arg1; 132 | let arg2 = *arg2; 133 | 134 | #[allow(clippy::cast_possible_truncation)] 135 | let arg1w = arg1 as i32; 136 | let arg1uw = arg1w.cast_unsigned(); 137 | 138 | #[allow(clippy::cast_possible_truncation)] 139 | let arg2w = arg2 as i32; 140 | 141 | let (value, cycles) = match *op { 142 | BinaryOp::Add => (arg1.wrapping_add(arg2), 1), 143 | BinaryOp::AddUw => (i64::from(arg1uw).wrapping_add(arg2), 1), 144 | BinaryOp::Addw => (arg1w.wrapping_add(arg2w).into(), 1), 145 | BinaryOp::And => (arg1 & arg2, 1), 146 | BinaryOp::Andn => (arg1 & !arg2, 1), 147 | #[allow(clippy::unreadable_literal)] 148 | BinaryOp::Grev => { 149 | let value = arg1.cast_unsigned(); 150 | let value = (value << (arg2 & 0b100000)) | (value >> (arg2 & 0b100000)); 151 | let value = ((value & 0x0000ffff_0000ffff) << (arg2 & 0b010000)) | ((value & 0xffff0000_ffff0000) >> (arg2 & 0b010000)); 152 | let value = ((value & 0x00ff00ff_00ff00ff) << (arg2 & 0b001000)) | ((value & 0xff00ff00_ff00ff00) >> (arg2 & 0b001000)); 153 | let value = ((value & 0x0f0f0f0f_0f0f0f0f) << (arg2 & 0b000100)) | ((value & 0xf0f0f0f0_f0f0f0f0) >> (arg2 & 0b000100)); 154 | let value = ((value & 0x33333333_33333333) << (arg2 & 0b000010)) | ((value & 0xcccccccc_cccccccc) >> (arg2 & 0b000010)); 155 | let value = ((value & 0x55555555_55555555) << (arg2 & 0b000001)) | ((value & 0xaaaaaaaa_aaaaaaaa) >> (arg2 & 0b000001)); 156 | (value.cast_signed(), 1) 157 | }, 158 | BinaryOp::Or => (arg1 | arg2, 1), 159 | BinaryOp::Orn => (arg1 | !arg2, 1), 160 | BinaryOp::Rol => (arg1.rotate_left((arg2 & 0x3f).try_into().unwrap()), 1), 161 | BinaryOp::Rolw => (arg1w.rotate_left((arg2w & 0x1f).try_into().unwrap()).into(), 1), 162 | BinaryOp::Ror => (arg1.rotate_right((arg2 & 0x3f).try_into().unwrap()), 1), 163 | BinaryOp::Rorw => (arg1w.rotate_right((arg2w & 0x1f).try_into().unwrap()).into(), 1), 164 | BinaryOp::Sh1add => ((arg1 << 1).wrapping_add(arg2), 1), 165 | BinaryOp::Sh1addUw => ((i64::from(arg1uw) << 1).wrapping_add(arg2), 1), 166 | BinaryOp::Sh2add => ((arg1 << 2).wrapping_add(arg2), 1), 167 | BinaryOp::Sh2addUw => ((i64::from(arg1uw) << 2).wrapping_add(arg2), 1), 168 | BinaryOp::Sh3add => ((arg1 << 3).wrapping_add(arg2), 1), 169 | BinaryOp::Sh3addUw => ((i64::from(arg1uw) << 3).wrapping_add(arg2), 1), 170 | BinaryOp::Sll => (arg1 << (arg2 & 0x3f), 1), 171 | BinaryOp::SllUw => (i64::from(arg1uw) << (arg2 & 0x3f), 1), 172 | BinaryOp::Sllw => ((arg1w << (arg2w & 0x1f)).into(), 1), 173 | BinaryOp::Slt => ((arg1 < arg2).into(), 1), 174 | BinaryOp::Sltu => ((arg1.cast_unsigned() < arg2.cast_unsigned()).into(), 1), 175 | BinaryOp::Sra => (arg1 >> (arg2 & 0x3f), 1), 176 | BinaryOp::Sraw => ((arg1w >> (arg2w & 0x1f)).into(), 1), 177 | BinaryOp::Srl => ((arg1.cast_unsigned() >> (arg2 & 0x3f)).cast_signed(), 1), 178 | BinaryOp::Srlw => ((arg1uw >> (arg2w & 0x1f)).cast_signed().into(), 1), 179 | BinaryOp::Sub => (arg1.wrapping_sub(arg2), 1), 180 | BinaryOp::Subw => (arg1w.wrapping_sub(arg2w).into(), 1), 181 | BinaryOp::Xnor => (arg1 ^ !arg2, 1), 182 | BinaryOp::Xor => (arg1 ^ arg2, 1), 183 | }; 184 | rd.2 = Some(value); 185 | cycles 186 | }, 187 | 188 | Ucode::Csel { 189 | rd, 190 | rcond: RegisterValue::Value(0), 191 | rs_eqz: RegisterValue::Value(value), 192 | rs_nez: _, 193 | } => { 194 | rd.2 = Some(*value); 195 | 1 196 | }, 197 | 198 | Ucode::Csel { 199 | rd, 200 | rcond: RegisterValue::Value(rcond), 201 | rs_eqz: _, 202 | rs_nez: RegisterValue::Value(value), 203 | } if *rcond != 0 => { 204 | rd.2 = Some(*value); 205 | 1 206 | }, 207 | 208 | Ucode::Ebreak => panic!("EBREAK"), 209 | 210 | Ucode::Fence => 1, 211 | 212 | Ucode::Jump { pc: RegisterValue::Value(_), predicted_next_pc: _ } => 1, 213 | 214 | Ucode::Mul { 215 | op, 216 | rd, 217 | rs1: RegisterValue::Value(arg1), 218 | rs2: RegisterValue::Value(arg2), 219 | state, 220 | } => { 221 | let (arg1_is_signed, arg1) = match *op { 222 | MulOp::Mul | 223 | MulOp::Mulh | 224 | MulOp::Mulhsu => (true, *arg1), 225 | 226 | MulOp::Mulhu => (false, *arg1), 227 | 228 | #[allow(clippy::cast_possible_truncation)] 229 | MulOp::Mulw => (true, (*arg1 as i32).into()), 230 | }; 231 | 232 | let (i, p) = state.get_or_insert_with(|| match *op { 233 | MulOp::Mul | 234 | MulOp::Mulh => State::initial(true, *arg2), 235 | 236 | MulOp::Mulhsu | 237 | MulOp::Mulhu => State::initial(false, *arg2), 238 | 239 | #[allow(clippy::cast_possible_truncation)] 240 | MulOp::Mulw => State::initial(true, (*arg2 as i32).into()), 241 | }); 242 | 243 | let mut cycles = 0; 244 | let value = loop { 245 | match multiplier::round(arg1_is_signed, arg1, *i, *p) { 246 | State::Pending { i: i_, p: p_ } => { 247 | cycles += 1; 248 | *i = i_; 249 | *p = p_; 250 | }, 251 | 252 | State::Mulw { i: i_, p: p_, mulw } => { 253 | cycles += 1; 254 | if matches!(*op, MulOp::Mulw) { 255 | break mulw.into(); 256 | } 257 | 258 | *i = i_; 259 | *p = p_; 260 | }, 261 | 262 | State::Mul { mul, mulh } => { 263 | cycles += 1; 264 | break if matches!(*op, MulOp::Mul) { 265 | mul 266 | } 267 | else { 268 | mulh 269 | }; 270 | }, 271 | } 272 | }; 273 | rd.2 = Some(value); 274 | cycles 275 | }, 276 | 277 | Ucode::Mv { rd: _, value: RegisterValue::Value(_) } => 1, 278 | 279 | Ucode::MvCsr { csr: _, value: RegisterValue::Value(_) } => 1, 280 | 281 | Ucode::Load { op, rd, addr: RegisterValue::Value(addr) } => { 282 | let result = op.exec(memory, *addr); 283 | rd.2 = Some(result); 284 | 1 285 | }, 286 | 287 | Ucode::Store { 288 | op, 289 | addr: RegisterValue::Value(addr), 290 | value: RegisterValue::Value(value), 291 | } => { 292 | op.exec(memory, *addr, *value); 293 | 1 294 | }, 295 | 296 | Ucode::UnaryOp { 297 | op, 298 | rd, 299 | rs: RegisterValue::Value(arg), 300 | } => { 301 | let arg = *arg; 302 | 303 | #[allow(clippy::cast_possible_truncation)] 304 | let argw = arg as i32; 305 | 306 | let (value, cycles) = match *op { 307 | UnaryOp::Cpop => (arg.count_ones().into(), 2), 308 | UnaryOp::Cpopw => (argw.count_ones().into(), 2), 309 | UnaryOp::OrcB => (i64::from_ne_bytes(arg.to_ne_bytes().map(|b| if b == 0 { 0x00 } else { 0xff })), 1), 310 | #[allow(clippy::cast_possible_truncation)] 311 | UnaryOp::SextB => ((arg as i8).into(), 1), 312 | #[allow(clippy::cast_possible_truncation)] 313 | UnaryOp::SextH => ((arg as i16).into(), 1), 314 | UnaryOp::SextW => (argw.into(), 1), 315 | #[allow(clippy::cast_possible_truncation)] 316 | UnaryOp::ZextB => ((arg.cast_unsigned() as u8).into(), 1), 317 | #[allow(clippy::cast_possible_truncation)] 318 | UnaryOp::ZextH => ((arg.cast_unsigned() as u16).into(), 1), 319 | #[allow(clippy::cast_possible_truncation)] 320 | UnaryOp::ZextW => ((arg.cast_unsigned() as u32).into(), 1), 321 | }; 322 | rd.2 = Some(value); 323 | cycles 324 | }, 325 | 326 | _ => unreachable!("{inst:?}") 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /simulator/src/main.rs: -------------------------------------------------------------------------------- 1 | mod csrs; 2 | use csrs::Csrs; 3 | 4 | mod in_order; 5 | 6 | mod in_order_ucode; 7 | 8 | mod instruction; 9 | use instruction::{ 10 | Instruction, 11 | OpOp, Op32Op, 12 | OpImmOp, OpImm32Op, 13 | MemoryBase, MemoryOffset, 14 | }; 15 | 16 | mod memory; 17 | use memory::{Memory, LoadOp}; 18 | 19 | mod multiplier; 20 | 21 | mod out_of_order; 22 | 23 | mod tag; 24 | use tag::Tag; 25 | 26 | mod x_regs; 27 | use x_regs::{XReg, XRegs}; 28 | 29 | mod ucode; 30 | 31 | fn main() { 32 | let log_level = match std::env::var_os("SIMULATOR_LOG") { 33 | Some(var) if var.to_str() == Some("debug") => LogLevel::Debug, 34 | Some(var) if var.to_str() == Some("trace") => LogLevel::Trace, 35 | _ => LogLevel::Info, 36 | }; 37 | 38 | let mut args = std::env::args_os(); 39 | let argv0 = args.next().unwrap_or_else(|| env!("CARGO_BIN_NAME").into()); 40 | let (mode, program_path, in_file_path) = parse_args(args, &argv0); 41 | 42 | let mut memory = Memory::new(program_path, in_file_path); 43 | 44 | let mut x_regs: XRegs = Default::default(); 45 | 46 | let mut csrs: Csrs = Default::default(); 47 | 48 | let mut statistics: Statistics = Default::default(); 49 | 50 | let pc = 0x8000_0000_0000_0000_u64.cast_signed(); 51 | 52 | match mode { 53 | Mode::InOrder => in_order::run( 54 | &mut memory, 55 | &mut x_regs, 56 | &mut csrs, 57 | &mut statistics, 58 | pc, 59 | log_level, 60 | ), 61 | 62 | Mode::InOrderUcode => in_order_ucode::run( 63 | &mut memory, 64 | &mut x_regs, 65 | &mut csrs, 66 | &mut statistics, 67 | pc, 68 | log_level, 69 | ), 70 | 71 | Mode::OutOfOrder { max_retire_per_cycle } => out_of_order::run( 72 | &mut memory, 73 | &mut x_regs, 74 | &mut csrs, 75 | &mut statistics, 76 | pc, 77 | max_retire_per_cycle, 78 | log_level, 79 | ), 80 | } 81 | 82 | memory.dump_console(); 83 | 84 | println!("{statistics}"); 85 | 86 | println!("{x_regs}"); 87 | 88 | println!("{csrs}"); 89 | } 90 | 91 | fn load_inst(memory: &Memory, pc: i64, statistics: &mut Statistics) -> Result<(Instruction, i64, i64), u32> { 92 | let inst1 = LoadOp::HalfWordUnsigned.exec(memory, pc).cast_unsigned(); 93 | let inst2 = LoadOp::HalfWordUnsigned.exec(memory, pc + 2).cast_unsigned(); 94 | let inst3 = LoadOp::HalfWordUnsigned.exec(memory, pc + 4).cast_unsigned(); 95 | let inst4 = LoadOp::HalfWordUnsigned.exec(memory, pc + 6).cast_unsigned(); 96 | let inst = inst1 | (inst2 << 16) | (inst3 << 32) | (inst4 << 48); 97 | 98 | #[allow(clippy::cast_possible_truncation)] 99 | let Ok((inst_a, inst_a_len)) = Instruction::decode(inst as u32) else { 100 | return Err(inst as u32); 101 | }; 102 | 103 | let inst = inst >> (inst_a_len * 8); 104 | #[allow(clippy::cast_possible_truncation)] 105 | let result = 106 | if let Ok((inst_b, inst_b_len)) = Instruction::decode(inst as u32) { 107 | macro_op_fuse(inst_a, inst_a_len, inst_b, inst_b_len, &mut statistics.fusions) 108 | } 109 | else { 110 | (inst_a, inst_a_len, 1) 111 | }; 112 | Ok(result) 113 | } 114 | 115 | fn macro_op_fuse( 116 | inst_a: Instruction, inst_a_len: i64, 117 | inst_b: Instruction, inst_b_len: i64, 118 | fusions: &mut std::collections::BTreeMap<&'static str, usize>, 119 | ) -> (Instruction, i64, i64) { 120 | let (inst, entry) = match (inst_a, inst_b) { 121 | ( 122 | Instruction::Auipc { rd: rd_a, imm: imm_a }, 123 | Instruction::OpImm { op: OpImmOp::Addi, rd: rd_b, rs1: rs1_b, imm: imm_b }, 124 | ) if 125 | rd_a == rd_b && 126 | rd_a == rs1_b 127 | => ( 128 | Instruction::Auipc { rd: rd_a, imm: imm_a.wrapping_add(imm_b) }, 129 | "auipc; addi -> auipc", 130 | ), 131 | 132 | ( 133 | Instruction::Lui { rd: rd_a, imm: imm_a }, 134 | Instruction::Op { op: OpOp::Add, rd: rd_b, rs1: rs1_b, rs2: rs2_b }, 135 | ) if 136 | rd_a == rd_b && 137 | rd_a == rs1_b && 138 | rs1_b != rs2_b 139 | => ( 140 | Instruction::OpImm { op: OpImmOp::Addi, rd: rd_a, rs1: rs2_b, imm: imm_a }, 141 | "lui; add -> addi", 142 | ), 143 | 144 | ( 145 | Instruction::Lui { rd: rd_a, imm: imm_a }, 146 | Instruction::Op32 { op: Op32Op::Addw, rd: rd_b, rs1: rs1_b, rs2: rs2_b }, 147 | ) if 148 | rd_a == rd_b && 149 | rd_a == rs1_b && 150 | rs1_b != rs2_b 151 | => ( 152 | Instruction::OpImm32 { op: OpImm32Op::Addiw, rd: rd_a, rs1: rs2_b, imm: imm_a }, 153 | "lui; addw -> addiw", 154 | ), 155 | 156 | ( 157 | Instruction::Lui { rd: rd_a, imm: imm_a }, 158 | Instruction::OpImm { op: OpImmOp::Addi, rd: rd_b, rs1: rs1_b, imm: imm_b }, 159 | ) if 160 | rd_a == rd_b && 161 | rd_a == rs1_b 162 | => ( 163 | Instruction::Lui { rd: rd_a, imm: imm_a.wrapping_add(imm_b) }, 164 | "lui; addi -> lui", 165 | ), 166 | 167 | ( 168 | Instruction::Lui { rd: rd_a, imm: imm_a }, 169 | Instruction::OpImm32 { op: OpImm32Op::Addiw, rd: rd_b, rs1: rs1_b, imm: imm_b }, 170 | ) if 171 | rd_a == rd_b && 172 | rd_a == rs1_b 173 | => ( 174 | Instruction::Lui { rd: rd_a, imm: ((imm_a << 31).wrapping_add(imm_b << 31)) >> 31 }, 175 | "lui; addiw -> lui", 176 | ), 177 | 178 | ( 179 | Instruction::Auipc { rd: rd_a, imm: imm_a }, 180 | Instruction::Jalr { rd: rd_b, rs1: rs1_b, imm: imm_b }, 181 | ) if 182 | rd_a == rd_b && 183 | rd_a == rs1_b 184 | => ( 185 | Instruction::Jal { rd: rd_a, imm: imm_a.wrapping_add(imm_b) }, 186 | "auipc; jalr -> jal", 187 | ), 188 | 189 | ( 190 | Instruction::Auipc { rd: rd_a, imm: imm_a }, 191 | Instruction::Load { op: op_b, rd: rd_b, base: MemoryBase::XReg(rs1_b), offset: MemoryOffset::Imm(offset_b) }, 192 | ) if 193 | rd_a == rd_b && 194 | rd_a == rs1_b 195 | => ( 196 | Instruction::Load { op: op_b, rd: rd_a, base: MemoryBase::Pc, offset: MemoryOffset::Imm(imm_a.wrapping_add(offset_b)) }, 197 | "auipc; load -> load.pc", 198 | ), 199 | 200 | ( 201 | Instruction::Lui { rd: rd_a, imm: imm_a }, 202 | Instruction::Load { op: op_b, rd: rd_b, base: MemoryBase::XReg(rs1_b), offset: MemoryOffset::Imm(offset_b) }, 203 | ) if 204 | rd_a == rd_b && 205 | rd_a == rs1_b 206 | => ( 207 | Instruction::Load { op: op_b, rd: rd_a, base: MemoryBase::XReg(XReg::X0), offset: MemoryOffset::Imm(imm_a.wrapping_add(offset_b)) }, 208 | "lui; load -> load", 209 | ), 210 | 211 | ( 212 | Instruction::Op { op: OpOp::Add, rd: rd_a, rs1: rs1_a, rs2: rs2_a }, 213 | Instruction::Load { op: op_b, rd: rd_b, base: MemoryBase::XReg(rs1_b), offset: MemoryOffset::Imm(0) }, 214 | ) if 215 | rd_a == rd_b && 216 | rd_a == rs1_b 217 | => ( 218 | Instruction::Load { op: op_b, rd: rd_a, base: MemoryBase::XReg(rs1_a), offset: MemoryOffset::XReg(rs2_a) }, 219 | "add; load -> load.add", 220 | ), 221 | 222 | ( 223 | Instruction::Op { op: OpOp::Sh1add, rd: rd_a, rs1: rs1_a, rs2: rs2_a }, 224 | Instruction::Load { op: op_b, rd: rd_b, base: MemoryBase::XReg(rs1_b), offset: MemoryOffset::Imm(0) }, 225 | ) if 226 | rd_a == rd_b && 227 | rd_a == rs1_b 228 | => ( 229 | Instruction::Load { op: op_b, rd: rd_a, base: MemoryBase::XRegSh1(rs1_a), offset: MemoryOffset::XReg(rs2_a) }, 230 | "sh1add; load -> load.sh1add", 231 | ), 232 | 233 | ( 234 | Instruction::Op { op: OpOp::Sh2add, rd: rd_a, rs1: rs1_a, rs2: rs2_a }, 235 | Instruction::Load { op: op_b, rd: rd_b, base: MemoryBase::XReg(rs1_b), offset: MemoryOffset::Imm(0) }, 236 | ) if 237 | rd_a == rd_b && 238 | rd_a == rs1_b 239 | => ( 240 | Instruction::Load { op: op_b, rd: rd_a, base: MemoryBase::XRegSh2(rs1_a), offset: MemoryOffset::XReg(rs2_a) }, 241 | "sh2add; load -> load.sh2add", 242 | ), 243 | 244 | ( 245 | Instruction::Op { op: OpOp::Sh3add, rd: rd_a, rs1: rs1_a, rs2: rs2_a }, 246 | Instruction::Load { op: op_b, rd: rd_b, base: MemoryBase::XReg(rs1_b), offset: MemoryOffset::Imm(0) }, 247 | ) if 248 | rd_a == rd_b && 249 | rd_a == rs1_b 250 | => ( 251 | Instruction::Load { op: op_b, rd: rd_a, base: MemoryBase::XRegSh3(rs1_a), offset: MemoryOffset::XReg(rs2_a) }, 252 | "sh3add; load -> load.sh3add", 253 | ), 254 | 255 | ( 256 | Instruction::Op { op: OpOp::Sub, rd: rd_a, rs1: XReg::X0, rs2: rs2_a }, 257 | Instruction::Op { op: OpOp::Max, rd: rd_b, rs1: rs1_b, rs2: rs2_b }, 258 | ) if 259 | rd_a == rd_b && 260 | rd_a == rs1_b && 261 | rs2_a == rs2_b 262 | => ( 263 | Instruction::Abs { rd: rd_a, rs: rs2_a }, 264 | "sub; max -> abs", 265 | ), 266 | 267 | (_, _) => return (inst_a, inst_a_len, 1), 268 | }; 269 | *fusions.entry(entry).or_default() += 1; 270 | (inst, inst_a_len + inst_b_len, 2) 271 | } 272 | 273 | #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] 274 | enum LogLevel { 275 | Info, 276 | Debug, 277 | Trace, 278 | } 279 | 280 | #[derive(Default)] 281 | struct Statistics { 282 | fusions: std::collections::BTreeMap<&'static str, usize>, 283 | fu_utilization: std::collections::BTreeMap<&'static str, usize>, 284 | num_ticks_where_instructions_retired: usize, 285 | num_ticks_where_instructions_not_retired: usize, 286 | jump_predictions: usize, 287 | jump_mispredictions: usize, 288 | } 289 | 290 | impl std::fmt::Display for Statistics { 291 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 292 | #[allow(clippy::cast_precision_loss)] 293 | let fu_utilization: std::collections::BTreeMap<_, _> = 294 | self.fu_utilization.iter() 295 | .map(|(&key, &value)| ( 296 | key, 297 | format!( 298 | "{:.9}", 299 | (value as f64) * 100. / ((self.num_ticks_where_instructions_retired + self.num_ticks_where_instructions_not_retired) as f64), 300 | ), 301 | )) 302 | .collect(); 303 | writeln!(f, "fusions: {:#?}", self.fusions)?; 304 | writeln!(f, "FU utilization: {fu_utilization:#?}")?; 305 | writeln!(f, "num ticks where instructions retired: {}", self.num_ticks_where_instructions_retired)?; 306 | writeln!(f, "num ticks where instructions not retired: {}", self.num_ticks_where_instructions_not_retired)?; 307 | writeln!(f, "jump predictions: {}", self.jump_predictions)?; 308 | writeln!(f, "jump mispredictions: {}", self.jump_mispredictions)?; 309 | Ok(()) 310 | } 311 | } 312 | 313 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 314 | enum RegisterValue { 315 | Value(i64), 316 | Tag(Tag), 317 | } 318 | 319 | impl RegisterValue { 320 | fn update(&mut self, tag: Tag, value: i64) { 321 | if matches!(self, Self::Tag(tag_) if *tag_ == tag) { 322 | *self = Self::Value(value); 323 | } 324 | } 325 | 326 | fn in_order(self) -> i64 { 327 | match self { 328 | Self::Value(value) => value, 329 | Self::Tag(_) => unreachable!(), 330 | } 331 | } 332 | } 333 | 334 | enum Mode { 335 | InOrder, 336 | InOrderUcode, 337 | OutOfOrder { max_retire_per_cycle: std::num::NonZero }, 338 | } 339 | 340 | fn parse_args(mut args: impl Iterator, argv0: &std::ffi::OsStr) -> ( 341 | Mode, 342 | std::path::PathBuf, 343 | std::path::PathBuf, 344 | ) { 345 | let mut mode = None; 346 | let mut out_of_order_max_retire_per_cycle = None; 347 | let mut program_path = None; 348 | let mut in_file_path = None; 349 | 350 | while let Some(opt) = args.next() { 351 | match opt.to_str() { 352 | Some("--help") => { 353 | write_usage(std::io::stdout(), argv0); 354 | std::process::exit(0); 355 | }, 356 | 357 | Some("--mode") if mode.is_none() => { 358 | let Some(arg) = args.next() else { write_usage_and_crash(argv0); }; 359 | mode = Some(arg); 360 | }, 361 | 362 | Some("--ooo-max-retire-per-cycle") if out_of_order_max_retire_per_cycle.is_none() => { 363 | let Some(arg) = args.next() else { write_usage_and_crash(argv0); }; 364 | out_of_order_max_retire_per_cycle = Some(arg); 365 | }, 366 | 367 | Some("--") => { 368 | program_path = args.next(); 369 | in_file_path = args.next(); 370 | break; 371 | }, 372 | 373 | _ if program_path.is_none() => program_path = Some(opt), 374 | 375 | _ if in_file_path.is_none() => in_file_path = Some(opt), 376 | 377 | _ => write_usage_and_crash(argv0), 378 | } 379 | } 380 | 381 | let None = args.next() else { write_usage_and_crash(argv0); }; 382 | 383 | let out_of_order_max_retire_per_cycle = 384 | if let Some(out_of_order_max_retire_per_cycle) = out_of_order_max_retire_per_cycle { 385 | let Some(out_of_order_max_retire_per_cycle) = out_of_order_max_retire_per_cycle.to_str() else { write_usage_and_crash(argv0); }; 386 | let Ok(out_of_order_max_retire_per_cycle) = out_of_order_max_retire_per_cycle.parse() else { write_usage_and_crash(argv0); }; 387 | let Some(out_of_order_max_retire_per_cycle) = std::num::NonZero::new(out_of_order_max_retire_per_cycle) else { write_usage_and_crash(argv0); }; 388 | out_of_order_max_retire_per_cycle 389 | } 390 | else { 391 | std::num::NonZero::new(32).expect("hard-coded value is not 0") 392 | }; 393 | 394 | let mode = match mode { 395 | Some(arg) if arg.to_str() == Some("in-order") => Mode::InOrder, 396 | Some(arg) if arg.to_str() == Some("in-order-ucode") => Mode::InOrderUcode, 397 | Some(arg) if arg.to_str() == Some("out-of-order") => Mode::OutOfOrder { max_retire_per_cycle: out_of_order_max_retire_per_cycle }, 398 | _ => write_usage_and_crash(argv0), 399 | }; 400 | 401 | let Some(program_path) = program_path else { write_usage_and_crash(argv0); }; 402 | 403 | let Some(in_file_path) = in_file_path else { write_usage_and_crash(argv0); }; 404 | 405 | (mode, program_path.into(), in_file_path.into()) 406 | } 407 | 408 | fn write_usage_and_crash(argv0: &std::ffi::OsStr) -> ! { 409 | write_usage(std::io::stderr(), argv0); 410 | std::process::exit(1); 411 | } 412 | 413 | fn write_usage(mut w: impl std::io::Write, argv0: &std::ffi::OsStr) { 414 | _ = writeln!(w, "Usage: {} [--ooo ] [ -- ] ", argv0.to_string_lossy()); 415 | } 416 | -------------------------------------------------------------------------------- /simulator/src/memory.rs: -------------------------------------------------------------------------------- 1 | pub(crate) struct Memory { 2 | ram: Vec, 3 | program: Vec, 4 | in_file: Vec, 5 | } 6 | 7 | #[derive(Clone, Copy, Debug)] 8 | pub(crate) enum LoadOp { 9 | Byte, 10 | ByteUnsigned, 11 | HalfWord, 12 | HalfWordUnsigned, 13 | Word, 14 | WordUnsigned, 15 | DoubleWord, 16 | } 17 | 18 | #[derive(Clone, Copy, Debug)] 19 | pub(crate) enum StoreOp { 20 | Byte, 21 | HalfWord, 22 | Word, 23 | DoubleWord, 24 | } 25 | 26 | impl Memory { 27 | pub(crate) fn new( 28 | program_path: impl AsRef, 29 | in_file_path: impl AsRef, 30 | ) -> Self { 31 | let program = std::fs::read(program_path).unwrap(); 32 | let in_file = std::fs::read(in_file_path).unwrap(); 33 | Self { 34 | ram: vec![], 35 | program, 36 | in_file, 37 | } 38 | } 39 | 40 | pub(crate) fn dump_console(&self) { 41 | println!("{}", Console(self.ram.get(0x400_000..).unwrap_or_default())); 42 | } 43 | } 44 | 45 | impl LoadOp { 46 | pub(crate) fn exec(self, memory: &Memory, address: i64) -> i64 { 47 | let address = address.cast_unsigned(); 48 | 49 | match self { 50 | Self::Byte | Self::ByteUnsigned => (), 51 | Self::HalfWord | Self::HalfWordUnsigned => assert!(address % 2 == 0, "0x{address:016x} is not aligned for reading a halfword"), 52 | Self::Word | Self::WordUnsigned => assert!(address % 4 == 0, "0x{address:016x} is not aligned for reading a word"), 53 | Self::DoubleWord => assert!(address % 8 == 0, "0x{address:016x} is not aligned for reading a doubleword"), 54 | } 55 | 56 | let data = match address { 57 | 0xffff_ffff_ffe0_0000..=0xffff_ffff_ffef_ffff => { 58 | // in_file 59 | let address = usize::try_from(address - 0xffff_ffff_ffe0_0000).unwrap(); 60 | [ 61 | memory.in_file.get(address).copied().unwrap_or_default(), 62 | memory.in_file.get(address + 1).copied().unwrap_or_default(), 63 | memory.in_file.get(address + 2).copied().unwrap_or_default(), 64 | memory.in_file.get(address + 3).copied().unwrap_or_default(), 65 | memory.in_file.get(address + 4).copied().unwrap_or_default(), 66 | memory.in_file.get(address + 5).copied().unwrap_or_default(), 67 | memory.in_file.get(address + 6).copied().unwrap_or_default(), 68 | memory.in_file.get(address + 7).copied().unwrap_or_default(), 69 | ] 70 | }, 71 | 72 | 0x8000_0000_0000_0000..=0xffff_ffff_ffff_ffff => { 73 | // program 74 | let address = usize::try_from(address - 0x8000_0000_0000_0000).unwrap(); 75 | [ 76 | memory.program.get(address).copied().unwrap_or_default(), 77 | memory.program.get(address + 1).copied().unwrap_or_default(), 78 | memory.program.get(address + 2).copied().unwrap_or_default(), 79 | memory.program.get(address + 3).copied().unwrap_or_default(), 80 | memory.program.get(address + 4).copied().unwrap_or_default(), 81 | memory.program.get(address + 5).copied().unwrap_or_default(), 82 | memory.program.get(address + 6).copied().unwrap_or_default(), 83 | memory.program.get(address + 7).copied().unwrap_or_default(), 84 | ] 85 | }, 86 | 87 | 0x0000_0000_0000_0000..=0x0000_0000_0400_1fff => { 88 | // ram 89 | let address = usize::try_from(address).unwrap(); 90 | [ 91 | memory.ram.get(address).copied().unwrap_or_default(), 92 | memory.ram.get(address + 1).copied().unwrap_or_default(), 93 | memory.ram.get(address + 2).copied().unwrap_or_default(), 94 | memory.ram.get(address + 3).copied().unwrap_or_default(), 95 | memory.ram.get(address + 4).copied().unwrap_or_default(), 96 | memory.ram.get(address + 5).copied().unwrap_or_default(), 97 | memory.ram.get(address + 6).copied().unwrap_or_default(), 98 | memory.ram.get(address + 7).copied().unwrap_or_default(), 99 | ] 100 | }, 101 | 102 | address => panic!("EFAULT: 0x{address:016x} is unmapped"), 103 | }; 104 | 105 | match self { 106 | Self::Byte => i8::from_le_bytes([data[0]]).into(), 107 | Self::ByteUnsigned => u64::from(u8::from_le_bytes([data[0]])).cast_signed(), 108 | Self::HalfWord => i16::from_le_bytes([data[0], data[1]]).into(), 109 | Self::HalfWordUnsigned => u64::from(u16::from_le_bytes([data[0], data[1]])).cast_signed(), 110 | Self::Word => i32::from_le_bytes([data[0], data[1], data[2], data[3]]).into(), 111 | Self::WordUnsigned => u64::from(u32::from_le_bytes([data[0], data[1], data[2], data[3]])).cast_signed(), 112 | Self::DoubleWord => i64::from_le_bytes(data), 113 | } 114 | } 115 | } 116 | 117 | impl TryFrom for LoadOp { 118 | type Error = (); 119 | 120 | fn try_from(funct3: u8) -> Result { 121 | Ok(match funct3 { 122 | 0b000 => Self::Byte, 123 | 0b001 => Self::HalfWord, 124 | 0b010 => Self::Word, 125 | 0b011 => Self::DoubleWord, 126 | 0b100 => Self::ByteUnsigned, 127 | 0b101 => Self::HalfWordUnsigned, 128 | 0b110 => Self::WordUnsigned, 129 | _ => return Err(()), 130 | }) 131 | } 132 | } 133 | 134 | impl StoreOp { 135 | pub(crate) fn exec(self, memory: &mut Memory, address: i64, value: i64) { 136 | let address = address.cast_unsigned(); 137 | 138 | match self { 139 | Self::Byte => (), 140 | Self::HalfWord => assert!(address % 2 == 0, "0x{address:016x} is not aligned for writing a halfword"), 141 | Self::Word => assert!(address % 4 == 0, "0x{address:016x} is not aligned for writing a word"), 142 | Self::DoubleWord => assert!(address % 8 == 0, "0x{address:016x} is not aligned for writing a doubleword"), 143 | } 144 | 145 | let data: &mut [u8; 8] = match address { 146 | 0xffff_ffff_ffe0_0000..=0xffff_ffff_ffef_ffff => { 147 | // in_file 148 | panic!("EFAULT: 0x{address:016x} is readonly"); 149 | }, 150 | 151 | 0x8000_0000_0000_0000..=0xffff_ffff_ffff_ffff => { 152 | // program 153 | panic!("EFAULT: 0x{address:016x} is readonly"); 154 | }, 155 | 156 | 0x0000_0000_0000_0000..=0x0000_0000_0400_1fff => { 157 | // ram 158 | let address = usize::try_from(address).unwrap(); 159 | if let Some(raw) = memory.ram.get_mut(address..).and_then(|s| s.first_chunk_mut()) { 160 | raw 161 | } 162 | else { 163 | memory.ram.resize(address + 8, 0_u8); 164 | memory.ram[address..].first_chunk_mut().unwrap() 165 | } 166 | }, 167 | 168 | address => panic!("EFAULT: 0x{address:016x} is unmapped"), 169 | }; 170 | 171 | let copy_len = match self { 172 | Self::Byte => 1, 173 | Self::HalfWord => 2, 174 | Self::Word => 4, 175 | Self::DoubleWord => 8, 176 | }; 177 | data[..copy_len].copy_from_slice(&value.to_le_bytes()[..copy_len]); 178 | } 179 | } 180 | 181 | impl TryFrom for StoreOp { 182 | type Error = (); 183 | 184 | fn try_from(funct3: u8) -> Result { 185 | Ok(match funct3 { 186 | 0b000 => Self::Byte, 187 | 0b001 => Self::HalfWord, 188 | 0b010 => Self::Word, 189 | 0b011 => Self::DoubleWord, 190 | _ => return Err(()), 191 | }) 192 | } 193 | } 194 | 195 | struct Console<'a>(&'a [u8]); 196 | 197 | impl std::fmt::Display for Console<'_> { 198 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 199 | fn write_line(f: &mut std::fmt::Formatter<'_>, line: &[u8]) -> std::fmt::Result { 200 | for &c in line { 201 | match c { 202 | b'\0' => write!(f, " ")?, 203 | b'a'..=b'z' | 204 | b'A'..=b'Z' | 205 | b'0'..=b'9' | 206 | b' ' | b'.' | b',' | b';' | b':' | b'-' | b'_' | 207 | b'\'' | b'"' | b'`' | b'=' | 208 | b'(' | b')' | b'<' | b'>' 209 | => write!(f, "{}", char::from(c))?, 210 | 211 | _ 212 | => write!(f, "0x{c:02x}")?, 213 | } 214 | } 215 | 216 | Ok(()) 217 | } 218 | 219 | let mut lines = self.0.chunks_exact(80); 220 | let mut first = true; 221 | for line in &mut lines { 222 | if first { 223 | first = false; 224 | } 225 | else { 226 | writeln!(f)?; 227 | } 228 | 229 | write_line(f, line)?; 230 | } 231 | 232 | if !first { 233 | writeln!(f)?; 234 | } 235 | 236 | write_line(f, lines.remainder())?; 237 | 238 | Ok(()) 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /simulator/src/multiplier.rs: -------------------------------------------------------------------------------- 1 | //! Ref: `booth_multiplier_multi_cycle.sv` 2 | 3 | mod awint { 4 | use awint::{Bits, InlAwi}; 5 | 6 | type Awi64 = awint::inlawi_ty!(64); 7 | type Awi65 = awint::inlawi_ty!(65); 8 | type Awi66 = awint::inlawi_ty!(66); 9 | type Awi132 = awint::inlawi_ty!(132); 10 | 11 | #[derive(Clone, Copy, Debug)] 12 | #[repr(transparent)] 13 | pub(super) struct I65(Awi65); 14 | 15 | impl From for I65 { 16 | fn from(i: i64) -> Self { 17 | let i: Awi64 = i.into(); 18 | let mut result = Awi65::zero(); 19 | result.sign_resize_(&i); 20 | Self(result) 21 | } 22 | } 23 | 24 | impl From for I65 { 25 | fn from(i: u64) -> Self { 26 | let i: Awi64 = i.into(); 27 | let mut result = Awi65::zero(); 28 | result.zero_resize_(&i); 29 | Self(result) 30 | } 31 | } 32 | 33 | #[derive(Clone, Copy, Debug)] 34 | #[repr(transparent)] 35 | pub(super) struct I66(Awi66); 36 | 37 | impl From for I66 { 38 | fn from(i: i64) -> Self { 39 | let i: Awi64 = i.into(); 40 | let mut result = Awi66::zero(); 41 | result.sign_resize_(&i); 42 | Self(result) 43 | } 44 | } 45 | 46 | impl From for I66 { 47 | fn from(i: u64) -> Self { 48 | let i: Awi64 = i.into(); 49 | let mut result = Awi66::zero(); 50 | result.zero_resize_(&i); 51 | Self(result) 52 | } 53 | } 54 | 55 | #[derive(Clone, Copy, Debug)] 56 | #[repr(transparent)] 57 | pub(crate) struct I132(Awi132); 58 | 59 | impl I132 { 60 | pub(super) fn field_to_1(i: I65) -> Self { 61 | Self(awint::inlawi!(0u66, &i.0, 0u1 ; ..132).unwrap()) 62 | } 63 | 64 | pub(super) fn to_u8(self) -> u8 { 65 | self.0.to_u8() 66 | } 67 | 68 | pub(super) fn i32_at(self, i: usize) -> i32 { 69 | awint::inlawi!(self.0[i..(i + 32)]).unwrap().to_i32() 70 | } 71 | 72 | pub(super) fn i64_at(self, i: usize) -> i64 { 73 | awint::inlawi!(self.0[i..(i + 64)]).unwrap().to_i64() 74 | } 75 | 76 | // self[66..] +=/-= rhs 77 | pub(super) fn add_upper( 78 | self, 79 | neg: bool, 80 | rhs: I66, 81 | ) -> Self { 82 | let mut lhs = awint::inlawi!(&self.0[66..132]).unwrap(); 83 | lhs.neg_add_(neg, &rhs.0).unwrap(); 84 | Self(awint::inlawi!(lhs, &self.0[..66] ; ..132).unwrap()) 85 | } 86 | } 87 | 88 | impl std::ops::Shr for I132 { 89 | type Output = Self; 90 | 91 | fn shr(self, rhs: usize) -> Self::Output { 92 | let mut inner = self.0; 93 | inner.ashr_(rhs).unwrap(); 94 | Self(inner) 95 | } 96 | } 97 | } 98 | use awint::{I65, I66}; 99 | pub(crate) use awint::I132; 100 | 101 | #[derive(Debug)] 102 | pub(crate) enum State { 103 | Pending { i: u8, p: I132 }, 104 | Mulw { i: u8, p: I132, mulw: i32 }, 105 | Mul { mul: i64, mulh: i64 }, 106 | } 107 | 108 | impl State { 109 | pub(crate) fn initial(r_is_signed: bool, r: i64) -> (u8, I132) { 110 | let r: I65 = if r_is_signed { r.into() } else { r.cast_unsigned().into() }; 111 | (0, I132::field_to_1(r)) 112 | } 113 | } 114 | 115 | pub(crate) fn round( 116 | m_is_signed: bool, 117 | m: i64, 118 | i: u8, 119 | p: I132, 120 | ) -> State { 121 | let a: I66 = if m_is_signed { m.into() } else { m.cast_unsigned().into() }; 122 | 123 | if i == 0 { 124 | let p = match p.to_u8() & 0b10 { 125 | 0b00 => p >> 1, 126 | 0b10 => p.add_upper(true, a) >> 1, 127 | _ => unreachable!(), 128 | }; 129 | State::Pending { i: i + 1, p } 130 | } 131 | else { 132 | #[allow(clippy::match_same_arms)] 133 | let p = match p.to_u8() & 0b111 { 134 | 0b000 => p >> 2, 135 | 136 | 0b001 | 137 | 0b010 => p.add_upper(false, a) >> 2, 138 | 139 | 0b011 => (p >> 1).add_upper(false, a) >> 1, 140 | 141 | 0b100 => (p >> 1).add_upper(true, a) >> 1, 142 | 143 | 0b101 | 144 | 0b110 => p.add_upper(true, a) >> 2, 145 | 146 | 0b111 => p >> 2, 147 | 148 | _ => unreachable!(), 149 | }; 150 | 151 | match i { 152 | 16 => State::Mulw { i: i + 1, p, mulw: p.i32_at(33) }, 153 | 32 => State::Mul { mul: p.i64_at(1), mulh: p.i64_at(65) }, 154 | i => State::Pending { i: i + 1, p }, 155 | } 156 | } 157 | } 158 | 159 | #[cfg(test)] 160 | mod tests { 161 | use super::{State, round}; 162 | 163 | #[test] 164 | fn it_works() { 165 | const TESTS: &[(i64, i64)] = &[ 166 | (0, 0), 167 | (0, 1), 168 | (1, 0), 169 | (1, 1), 170 | 171 | (1, 1), 172 | (1, -1), 173 | (-1, 1), 174 | (-1, -1), 175 | 176 | (-0x8000_0000_0000_0000, -0x8000_0000_0000_0000), 177 | 178 | (15, 6), 179 | (-15, 6), 180 | (15, -6), 181 | (-15, -6), 182 | 183 | (0xa0b6_b812_9b5b_dfd9_u64.cast_signed(), 0xbcba_1c19_8109_3535_u64.cast_signed()), 184 | (0xbcba_1c19_8109_3535_u64.cast_signed(), 0xa0b6_b812_9b5b_dfd9_u64.cast_signed()), 185 | ]; 186 | for &(m, r) in TESTS { 187 | for (m_is_signed, r_is_signed) in [ 188 | (false, false), 189 | (false, true), 190 | (true, false), 191 | (true, true), 192 | ] { 193 | let (mut i, mut p) = State::initial(r_is_signed, r); 194 | let mut mulw = None; 195 | let mul; 196 | let mulh; 197 | loop { 198 | match round(m_is_signed, m, i, p) { 199 | State::Pending { i: i_, p: p_ } => { 200 | i = i_; 201 | p = p_; 202 | }, 203 | 204 | State::Mulw { i: i_, p: p_, mulw: mulw_ } => { 205 | i = i_; 206 | p = p_; 207 | assert!(mulw.replace(mulw_).is_none()); 208 | }, 209 | 210 | State::Mul { mul: mul_, mulh: mulh_ } => { 211 | mul = mul_; 212 | mulh = mulh_; 213 | break; 214 | }, 215 | } 216 | }; 217 | let mulw = mulw.unwrap(); 218 | println!( 219 | "0x{m:016x}_{}64 * 0x{r:016x}_{}64 -> 0x{mulh:016x}_{mul:016x} / 0x{mulw:08x}", 220 | if m_is_signed { "i" } else { "u" }, 221 | if r_is_signed { "i" } else { "u" }, 222 | ); 223 | 224 | let expected_mulw = { 225 | #[allow(clippy::cast_possible_truncation)] 226 | let m = i64::from(m as i32); 227 | #[allow(clippy::cast_possible_truncation)] 228 | let r = i64::from(r as i32); 229 | #[allow(clippy::cast_possible_truncation)] 230 | let expected = m.wrapping_mul(r) as i32; 231 | expected 232 | }; 233 | 234 | let (expected_mul, expected_mulh) = { 235 | let m: i128 = if m_is_signed { m.into() } else { m.cast_unsigned().into() }; 236 | let r: i128 = if r_is_signed { r.into() } else { r.cast_unsigned().into() }; 237 | let expected = m.wrapping_mul(r); 238 | #[allow(clippy::cast_possible_truncation)] 239 | (expected as i64, (expected >> 64) as i64) 240 | }; 241 | 242 | assert_eq!(mulw, expected_mulw); 243 | assert_eq!(mul, expected_mul); 244 | assert_eq!(mulh, expected_mulh); 245 | } 246 | } 247 | } 248 | } 249 | -------------------------------------------------------------------------------- /simulator/src/tag.rs: -------------------------------------------------------------------------------- 1 | #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] 2 | #[repr(transparent)] 3 | pub(crate) struct Tag(u16); 4 | 5 | pub(crate) const EMPTY_TAG: Tag = Tag(0); 6 | 7 | impl std::fmt::Display for Tag { 8 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 9 | self.0.fmt(f) 10 | } 11 | } 12 | 13 | #[repr(transparent)] 14 | pub(crate) struct TagAllocator { 15 | next: Tag, 16 | } 17 | 18 | impl TagAllocator { 19 | pub(crate) fn allocate(&mut self) -> OneTickTags4 { 20 | let next = &mut self.next; 21 | let result = OneTickTags4(Tag(next.0), Tag(next.0.wrapping_add(1)), Tag(next.0.wrapping_add(2)), Tag(next.0.wrapping_add(3))); 22 | next.0 = next.0.wrapping_add(4); 23 | result 24 | } 25 | } 26 | 27 | impl Default for TagAllocator { 28 | fn default() -> Self { 29 | Self { next: EMPTY_TAG } 30 | } 31 | } 32 | 33 | pub(crate) struct OneTickTags4(Tag, Tag, Tag, Tag); 34 | 35 | impl OneTickTags4 { 36 | pub(crate) fn allocate(self) -> (Tag, OneTickTags3) { 37 | (self.0, OneTickTags3(self.1, self.2, self.3)) 38 | } 39 | } 40 | 41 | pub(crate) struct OneTickTags3(Tag, Tag, Tag); 42 | 43 | impl OneTickTags3 { 44 | pub(crate) fn allocate(self) -> (Tag, OneTickTags2) { 45 | (self.0, OneTickTags2(self.1, self.2)) 46 | } 47 | } 48 | 49 | pub(crate) struct OneTickTags2(Tag, Tag); 50 | 51 | impl OneTickTags2 { 52 | pub(crate) fn allocate(self) -> (Tag, OneTickTags1) { 53 | (self.0, OneTickTags1(self.1)) 54 | } 55 | } 56 | 57 | pub(crate) struct OneTickTags1(Tag); 58 | 59 | impl OneTickTags1 { 60 | pub(crate) fn allocate(self) -> Tag { 61 | self.0 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /simulator/src/x_regs.rs: -------------------------------------------------------------------------------- 1 | use crate::{RegisterValue, Tag}; 2 | 3 | #[derive(Debug, Default)] 4 | pub(crate) struct XRegs { 5 | inner: [(i64, Option); 32], 6 | } 7 | 8 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 9 | pub(crate) enum XReg { 10 | X0, 11 | X1, 12 | X2, 13 | X3, 14 | X4, 15 | X5, 16 | X6, 17 | X7, 18 | X8, 19 | X9, 20 | X10, 21 | X11, 22 | X12, 23 | X13, 24 | X14, 25 | X15, 26 | X16, 27 | X17, 28 | X18, 29 | X19, 30 | X20, 31 | X21, 32 | X22, 33 | X23, 34 | X24, 35 | X25, 36 | X26, 37 | X27, 38 | X28, 39 | X29, 40 | X30, 41 | X31, 42 | } 43 | 44 | impl XRegs { 45 | pub(crate) fn load(&self, x_reg: XReg) -> RegisterValue { 46 | let i = usize::from(x_reg); 47 | let reg = self.inner[i]; 48 | if let Some(tag) = reg.1 { 49 | RegisterValue::Tag(tag) 50 | } 51 | else { 52 | RegisterValue::Value(reg.0) 53 | } 54 | } 55 | 56 | pub(crate) fn rename(&mut self, x_reg: XReg, tag: Tag) -> bool { 57 | let i = usize::from(x_reg); 58 | if i == 0 { 59 | false 60 | } 61 | else { 62 | self.inner[i].1 = Some(tag); 63 | true 64 | } 65 | } 66 | 67 | pub(crate) fn store(&mut self, x_reg: XReg, tag: Tag, value: i64) { 68 | let i = usize::from(x_reg); 69 | if i != 0 { 70 | let reg = &mut self.inner[i]; 71 | reg.0 = value; 72 | if reg.1 == Some(tag) { 73 | reg.1 = None; 74 | } 75 | } 76 | } 77 | 78 | pub(crate) fn reset_all_tags( 79 | &mut self, 80 | tags: impl IntoIterator)>, 81 | ) { 82 | for reg in &mut self.inner { 83 | reg.1 = None; 84 | } 85 | 86 | for (x_reg, tag, _) in tags { 87 | let i = usize::from(x_reg); 88 | if i != 0 { 89 | self.inner[i].1 = Some(tag); 90 | } 91 | } 92 | } 93 | } 94 | 95 | impl std::fmt::Display for XRegs { 96 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 97 | for row in 0_usize..8 { 98 | if row != 0 { 99 | writeln!(f)?; 100 | } 101 | 102 | for col in 0_usize..4 { 103 | if col != 0 { 104 | write!(f, " | ")?; 105 | } 106 | 107 | let i = row + col * 8; 108 | let (value, tag) = self.inner[i]; 109 | write!(f, "x{i:<2}: 0x{value:016x}")?; 110 | match tag { 111 | Some(tag) => write!(f, " # {tag:3} #")?, 112 | None => write!(f, " # #")?, 113 | } 114 | } 115 | } 116 | 117 | Ok(()) 118 | } 119 | } 120 | 121 | impl TryFrom for XReg { 122 | type Error = (); 123 | 124 | fn try_from(raw: u8) -> Result { 125 | u32::from(raw).try_into() 126 | } 127 | } 128 | 129 | impl TryFrom for XReg { 130 | type Error = (); 131 | 132 | fn try_from(raw: u16) -> Result { 133 | u32::from(raw).try_into() 134 | } 135 | } 136 | 137 | impl TryFrom for XReg { 138 | type Error = (); 139 | 140 | fn try_from(raw: u32) -> Result { 141 | Ok(match raw { 142 | 0 => Self::X0, 143 | 1 => Self::X1, 144 | 2 => Self::X2, 145 | 3 => Self::X3, 146 | 4 => Self::X4, 147 | 5 => Self::X5, 148 | 6 => Self::X6, 149 | 7 => Self::X7, 150 | 8 => Self::X8, 151 | 9 => Self::X9, 152 | 10 => Self::X10, 153 | 11 => Self::X11, 154 | 12 => Self::X12, 155 | 13 => Self::X13, 156 | 14 => Self::X14, 157 | 15 => Self::X15, 158 | 16 => Self::X16, 159 | 17 => Self::X17, 160 | 18 => Self::X18, 161 | 19 => Self::X19, 162 | 20 => Self::X20, 163 | 21 => Self::X21, 164 | 22 => Self::X22, 165 | 23 => Self::X23, 166 | 24 => Self::X24, 167 | 25 => Self::X25, 168 | 26 => Self::X26, 169 | 27 => Self::X27, 170 | 28 => Self::X28, 171 | 29 => Self::X29, 172 | 30 => Self::X30, 173 | 31 => Self::X31, 174 | _ => return Err(()), 175 | }) 176 | } 177 | } 178 | 179 | impl From for u8 { 180 | fn from(x_reg: XReg) -> Self { 181 | match x_reg { 182 | XReg::X0 => 0, 183 | XReg::X1 => 1, 184 | XReg::X2 => 2, 185 | XReg::X3 => 3, 186 | XReg::X4 => 4, 187 | XReg::X5 => 5, 188 | XReg::X6 => 6, 189 | XReg::X7 => 7, 190 | XReg::X8 => 8, 191 | XReg::X9 => 9, 192 | XReg::X10 => 10, 193 | XReg::X11 => 11, 194 | XReg::X12 => 12, 195 | XReg::X13 => 13, 196 | XReg::X14 => 14, 197 | XReg::X15 => 15, 198 | XReg::X16 => 16, 199 | XReg::X17 => 17, 200 | XReg::X18 => 18, 201 | XReg::X19 => 19, 202 | XReg::X20 => 20, 203 | XReg::X21 => 21, 204 | XReg::X22 => 22, 205 | XReg::X23 => 23, 206 | XReg::X24 => 24, 207 | XReg::X25 => 25, 208 | XReg::X26 => 26, 209 | XReg::X27 => 27, 210 | XReg::X28 => 28, 211 | XReg::X29 => 29, 212 | XReg::X30 => 30, 213 | XReg::X31 => 31, 214 | } 215 | } 216 | } 217 | 218 | impl From for usize { 219 | fn from(x_reg: XReg) -> Self { 220 | u8::from(x_reg).into() 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /src/register.rs: -------------------------------------------------------------------------------- 1 | use crate::{EncodeError, instruction::Imm, ParseError}; 2 | 3 | macro_rules! registers { 4 | ( 5 | $vis:vis enum $ty:ident { 6 | $($variant:ident = $asm:literal $(, $asm_alt:literal)* => $encoded:literal ,)* 7 | } 8 | ) => { 9 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 10 | $vis enum $ty { 11 | $($variant ,)* 12 | } 13 | 14 | impl $ty { 15 | $vis const fn encode_5b(self) -> u32 { 16 | match self { 17 | $(Self::$variant => $encoded ,)* 18 | } 19 | } 20 | } 21 | 22 | impl core::fmt::Display for $ty { 23 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 24 | match self { 25 | $(Self::$variant => f.write_str($asm),)* 26 | } 27 | } 28 | } 29 | 30 | impl<'a> TryFrom<&'a [u8]> for $ty { 31 | type Error = ParseError<'a>; 32 | 33 | fn try_from(token: &'a [u8]) -> Result { 34 | let token = core::str::from_utf8(token).map_err(|_| crate::ParseError::InvalidUtf8 { token })?; 35 | 36 | Ok(match token { 37 | $($asm $(| $asm_alt)* => Self::$variant,)* 38 | 39 | _ => return Err(crate::ParseError::MalformedRegister { token } ), 40 | }) 41 | } 42 | } 43 | }; 44 | } 45 | 46 | registers! { 47 | pub enum Register { 48 | X0 = "x0", "zero" => 0b00000, 49 | X1 = "x1", "ra" => 0b00001, 50 | X2 = "x2", "sp" => 0b00010, 51 | X3 = "x3", "gp" => 0b00011, 52 | X4 = "x4", "tp" => 0b00100, 53 | X5 = "x5", "t0" => 0b00101, 54 | X6 = "x6", "t1" => 0b00110, 55 | X7 = "x7", "t2" => 0b00111, 56 | X8 = "x8", "s0", "fp" => 0b01000, 57 | X9 = "x9", "s1" => 0b01001, 58 | X10 = "x10", "a0" => 0b01010, 59 | X11 = "x11", "a1" => 0b01011, 60 | X12 = "x12", "a2" => 0b01100, 61 | X13 = "x13", "a3" => 0b01101, 62 | X14 = "x14", "a4" => 0b01110, 63 | X15 = "x15", "a5" => 0b01111, 64 | X16 = "x16", "a6" => 0b10000, 65 | X17 = "x17", "a7" => 0b10001, 66 | X18 = "x18", "s2" => 0b10010, 67 | X19 = "x19", "s3" => 0b10011, 68 | X20 = "x20", "s4" => 0b10100, 69 | X21 = "x21", "s5" => 0b10101, 70 | X22 = "x22", "s6" => 0b10110, 71 | X23 = "x23", "s7" => 0b10111, 72 | X24 = "x24", "s8" => 0b11000, 73 | X25 = "x25", "s9" => 0b11001, 74 | X26 = "x26", "s10" => 0b11010, 75 | X27 = "x27", "s11" => 0b11011, 76 | X28 = "x28", "t3" => 0b11100, 77 | X29 = "x29", "t4" => 0b11101, 78 | X30 = "x30", "t5" => 0b11110, 79 | X31 = "x31", "t6" => 0b11111, 80 | 81 | // RV{32,64}{F,D,Q} 82 | F0 = "f0", "ft0" => 0b00000, 83 | F1 = "f1", "ft1" => 0b00001, 84 | F2 = "f2", "ft2" => 0b00010, 85 | F3 = "f3", "ft3" => 0b00011, 86 | F4 = "f4", "ft4" => 0b00100, 87 | F5 = "f5", "ft5" => 0b00101, 88 | F6 = "f6", "ft6" => 0b00110, 89 | F7 = "f7", "ft7" => 0b00111, 90 | F8 = "f8", "fs0" => 0b01000, 91 | F9 = "f9", "fs1" => 0b01001, 92 | F10 = "f10", "fa0" => 0b01010, 93 | F11 = "f11", "fa1" => 0b01011, 94 | F12 = "f12", "fa2" => 0b01100, 95 | F13 = "f13", "fa3" => 0b01101, 96 | F14 = "f14", "fa4" => 0b01110, 97 | F15 = "f15", "fa5" => 0b01111, 98 | F16 = "f16", "fa6" => 0b10000, 99 | F17 = "f17", "fa7" => 0b10001, 100 | F18 = "f18", "fs2" => 0b10010, 101 | F19 = "f19", "fs3" => 0b10011, 102 | F20 = "f20", "fs4" => 0b10100, 103 | F21 = "f21", "fs5" => 0b10101, 104 | F22 = "f22", "fs6" => 0b10110, 105 | F23 = "f23", "fs7" => 0b10111, 106 | F24 = "f24", "fs8" => 0b11000, 107 | F25 = "f25", "fs9" => 0b11001, 108 | F26 = "f26", "fs10" => 0b11010, 109 | F27 = "f27", "fs11" => 0b11011, 110 | F28 = "f28", "ft8" => 0b11100, 111 | F29 = "f29", "ft9" => 0b11101, 112 | F30 = "f30", "ft10" => 0b11110, 113 | F31 = "f31", "ft11" => 0b11111, 114 | } 115 | } 116 | 117 | impl Register { 118 | pub(crate) const fn encode_rd_5b(self) -> u32 { 119 | self.encode_5b() << 7 120 | } 121 | 122 | pub(crate) const fn encode_rs1_5b(self) -> u32 { 123 | self.encode_5b() << 15 124 | } 125 | 126 | pub(crate) const fn encode_rs2_5b(self) -> u32 { 127 | self.encode_5b() << 20 128 | } 129 | 130 | pub(crate) const fn is_compressible(self) -> bool { 131 | matches!( 132 | self, 133 | Self::X8 | 134 | Self::X9 | 135 | Self::X10 | 136 | Self::X11 | 137 | Self::X12 | 138 | Self::X13 | 139 | Self::X14 | 140 | Self::X15, 141 | ) 142 | } 143 | 144 | pub(crate) fn encode_3b(self) -> Result { 145 | Ok(match self { 146 | Self::X8 => 0b000, 147 | Self::X9 => 0b001, 148 | Self::X10 => 0b010, 149 | Self::X11 => 0b011, 150 | Self::X12 => 0b100, 151 | Self::X13 => 0b101, 152 | Self::X14 => 0b110, 153 | Self::X15 => 0b111, 154 | _ => return Err(EncodeError::IncompressibleRegister), 155 | }) 156 | } 157 | } 158 | 159 | macro_rules! csr { 160 | ( 161 | $vis:vis enum $ty:ident { 162 | $($variant:ident = $asm:literal => $encoded:literal ,)* 163 | } 164 | ) => { 165 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] 166 | $vis enum $ty { 167 | $($variant ,)* 168 | 169 | Other(u16), 170 | } 171 | 172 | impl $ty { 173 | $vis fn encode_12b(self) -> u32 { 174 | match self { 175 | $(Self::$variant => $encoded << 20,)* 176 | 177 | Self::Other(encoded) => u32::from(encoded) << 20, 178 | } 179 | } 180 | } 181 | 182 | impl core::fmt::Display for $ty { 183 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 184 | match self { 185 | $(Self::$variant => f.write_str($asm),)* 186 | 187 | Self::Other(encoded) => write!(f, "{encoded}"), 188 | } 189 | } 190 | } 191 | 192 | impl<'a> TryFrom<&'a [u8]> for $ty { 193 | type Error = ParseError<'a>; 194 | 195 | fn try_from(token: &'a [u8]) -> Result { 196 | if let Ok(Imm(encoded)) = token.try_into() { 197 | let encoded = encoded.try_into().map_err(|_| crate::ParseError::MalformedIntegerCsr { token })?; 198 | Ok(Self::Other(encoded)) 199 | } 200 | else { 201 | let token = core::str::from_utf8(token).map_err(|_| crate::ParseError::InvalidUtf8 { token })?; 202 | 203 | Ok(match token { 204 | $($asm => Self::$variant,)* 205 | 206 | _ => return Err(crate::ParseError::MalformedRegister { token }), 207 | }) 208 | } 209 | } 210 | } 211 | }; 212 | } 213 | 214 | csr! { 215 | pub enum Csr { 216 | Cycle = "cycle" => 0x0c00, 217 | CycleH = "cycleh" => 0x0c80, 218 | InstRet = "instret" => 0x0c02, 219 | InstRetH = "instreth" => 0x0c82, 220 | Time = "time" => 0x0c01, 221 | TimeH = "timeh" => 0x0c81, 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /src/supported_extensions.rs: -------------------------------------------------------------------------------- 1 | #[derive(Clone, Copy, Debug)] 2 | pub struct SupportedExtensions(u8); 3 | 4 | impl SupportedExtensions { 5 | pub const RV32I: Self = Self(0); 6 | pub const RV64I: Self = Self(1 << 0); 7 | pub const RVC: Self = Self(1 << 1); 8 | pub const ZCB: Self = Self(Self::RVC.0 | (1 << 2)); 9 | pub const ZBA: Self = Self(1 << 3); 10 | pub const ZBB: Self = Self(1 << 4); 11 | 12 | pub const RV32C: Self = Self(Self::RV32I.0 | Self::RVC.0); 13 | pub const RV32C_ZCB: Self = Self(Self::RV32I.0 | Self::RVC.0 | Self::ZCB.0); 14 | pub const RV64C: Self = Self(Self::RV64I.0 | Self::RVC.0); 15 | pub const RV64C_ZCB: Self = Self(Self::RV64I.0 | Self::RVC.0 | Self::ZCB.0); 16 | } 17 | 18 | impl SupportedExtensions { 19 | pub(crate) fn contains(self, other: Self) -> bool { 20 | self.0 & other.0 == other.0 21 | } 22 | } 23 | 24 | impl core::ops::BitAnd for SupportedExtensions { 25 | type Output = Self; 26 | 27 | fn bitand(self, other: Self) -> Self::Output { 28 | Self(self.0 & other.0) 29 | } 30 | } 31 | 32 | impl core::ops::BitAndAssign for SupportedExtensions { 33 | fn bitand_assign(&mut self, other: Self) { 34 | self.0 &= other.0; 35 | } 36 | } 37 | 38 | impl core::ops::BitOr for SupportedExtensions { 39 | type Output = Self; 40 | 41 | fn bitor(self, other: Self) -> Self::Output { 42 | Self(self.0 | other.0) 43 | } 44 | } 45 | 46 | impl core::ops::BitOrAssign for SupportedExtensions { 47 | fn bitor_assign(&mut self, other: Self) { 48 | self.0 |= other.0; 49 | } 50 | } 51 | 52 | impl core::ops::Not for SupportedExtensions { 53 | type Output = Self; 54 | 55 | fn not(self) -> Self::Output { 56 | Self(!self.0) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tc/add5.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lbu a0, 0(fp) 3 | addi a0, a0, 5 4 | sb a0, 0(fp) 5 | -------------------------------------------------------------------------------- /tc/add5.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t x = *IO; 7 | *IO = x + 5; 8 | 9 | __builtin_unreachable(); 10 | } 11 | -------------------------------------------------------------------------------- /tc/ai-showdown.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | # loop 3 | lbu a0, 0(fp) 4 | addi a0, a0, -1 5 | andi a0, a0, 0b11 6 | sb a0, 0(fp) 7 | j -8 # loop 8 | -------------------------------------------------------------------------------- /tc/ai-showdown.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | while (true) { 7 | uint8_t cards_remaining = *IO; 8 | switch (cards_remaining % 4) { 9 | case 0: *IO = 3; break; 10 | case 2: *IO = 1; break; 11 | case 3: *IO = 2; break; 12 | default: __builtin_unreachable(); break; 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /tc/calibrating-laser-cannons-2.S: -------------------------------------------------------------------------------- 1 | # Computed jump version 2 | # 3 | # fp: IO 4 | # sp: top of stack (last element) 5 | # a0: current input 6 | # a1: previous number 7 | # a2: current token started with - 8 | # 9 | # 10 | # Tokens 11 | # 12 | # \0 | 0x00 13 | # | 0x20 14 | # 0 | 0x30 15 | # 1 | 0x31 16 | # 2 | 0x32 17 | # 3 | 0x33 18 | # 4 | 0x34 19 | # 5 | 0x35 20 | # 6 | 0x36 21 | # 7 | 0x37 22 | # 8 | 0x38 23 | # 9 | 0x39 24 | # 25 | # 26 | # Operators 27 | # 28 | # & | 0x26 29 | # + | 0x2b 30 | # - | 0x2d 31 | # << | 0x3c 32 | # >> | 0x3e 33 | # ^ | 0x5e 34 | # | | 0x7c 35 | 36 | 37 | li sp, 0x400000 38 | li fp, -8 39 | auipc a5, 0 40 | 41 | 42 | # loop 43 | lbu a0, 0(fp) 44 | 45 | srli a4, a0, 1 46 | add a4, a4, a5 47 | lbu a4, 160(a4) # cjump 48 | add a4, a4, a5 49 | jr a4 50 | 51 | 52 | # end 53 | sh a1, 0(fp) 54 | 55 | 56 | # parse_negative_int 57 | li a2, -1 58 | # fallthrough parse_int 59 | 60 | 61 | # parse_int 62 | addi sp, sp, -2 63 | sh a1, 0(sp) 64 | li a1, 0 65 | # fallthrough parse_int_inner 66 | 67 | 68 | # parse_int_inner 69 | addi a0, a0, -48 # a0 = a0 - '0' 70 | 71 | sh2add a1, a1, a1 # a1 = a1 * 10 + a0 72 | sh1add a1, a1, a0 # 73 | 74 | lbu a0, 0(fp) # a0 = *IO 75 | andi a3, a0, 0xdf # if (a0 != 0x00 && a0 != 0x20) 76 | bnez a3, -18 # goto parse_int_inner 77 | 78 | add a1, a1, a2 # a1 = a2 == -1 ? -a1 : a1 79 | xor a1, a1, a2 # 80 | li a2, 0 81 | j -54 # loop 82 | 83 | 84 | # op_and 85 | lhu a3, 0(sp) 86 | addi sp, sp, 2 87 | and a1, a3, a1 88 | lhu s1, 0(fp) 89 | j -66 # loop 90 | 91 | 92 | # op_add 93 | lhu a3, 0(sp) 94 | addi sp, sp, 2 95 | add a1, a3, a1 96 | lhu s1, 0(fp) 97 | j -78 # loop 98 | 99 | 100 | # hyphen 101 | lbu a0, 0(fp) # a0 = *IO 102 | andi a3, a0, 0xdf # if (a0 != 0x00 && a0 != 0x20) 103 | bnez a3, -68 # goto parse_negative_int 104 | 105 | lhu a3, 0(sp) 106 | addi sp, sp, 2 107 | sub a1, a3, a1 108 | j -98 # loop 109 | 110 | 111 | # op_lsl 112 | lhu a3, 0(sp) 113 | addi sp, sp, 2 114 | sll a1, a3, a1 115 | lhu s1, 0(fp) 116 | lhu s1, 0(fp) 117 | j -114 # loop 118 | 119 | 120 | # op_lsr 121 | lhu a3, 0(sp) 122 | addi sp, sp, 2 123 | srl a1, a3, a1 124 | lhu s1, 0(fp) 125 | lhu s1, 0(fp) 126 | j -130 # loop 127 | 128 | 129 | # op_xor 130 | lhu a3, 0(sp) 131 | addi sp, sp, 2 132 | xor a1, a3, a1 133 | lhu s1, 0(fp) 134 | j -142 # loop 135 | 136 | 137 | # op_or 138 | lhu a3, 0(sp) 139 | addi sp, sp, 2 140 | or a1, a3, a1 141 | lhu s1, 0(fp) 142 | j -154 # loop 143 | 144 | 145 | # include calibrate-laser-cannons-2.data 146 | -------------------------------------------------------------------------------- /tc/calibrating-laser-cannons-2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static volatile uint8_t* const IN = (volatile uint8_t*)(intptr_t)-8; 5 | static volatile uint16_t* const OUT = (volatile uint16_t*)(intptr_t)-8; 6 | 7 | static uint16_t* const STACK = (uint16_t*)(intptr_t)0x400000; 8 | 9 | static void done(uint16_t result) { 10 | *OUT = result; 11 | __builtin_unreachable(); 12 | } 13 | 14 | static uint16_t parse_int(uint8_t c, bool negative) { 15 | uint16_t current_token = 0; 16 | 17 | for (;;) { 18 | c = c - '0'; 19 | current_token = current_token * 10 + c; 20 | 21 | c = *IN; 22 | switch (c) { 23 | case ' ': 24 | if (negative) { 25 | current_token = -current_token; 26 | } 27 | return current_token; 28 | 29 | case '0' ... '9': 30 | break; 31 | 32 | default: 33 | __builtin_unreachable(); 34 | break; 35 | } 36 | } 37 | } 38 | 39 | int main(void) { 40 | uint16_t* stack_top = STACK; 41 | 42 | uint16_t previous_num; 43 | { 44 | uint8_t c = *IN; 45 | bool negative = c == '-'; 46 | if (negative) { 47 | c = *IN; 48 | } 49 | previous_num = parse_int(c, negative); 50 | } 51 | 52 | for (;;) { 53 | uint8_t c = *IN; 54 | 55 | switch (c) { 56 | case '\0': 57 | done(previous_num); 58 | break; 59 | 60 | case '&': 61 | previous_num = *(stack_top++) & previous_num; 62 | *IN; 63 | break; 64 | 65 | case '+': 66 | previous_num = *(stack_top++) + previous_num; 67 | *IN; 68 | break; 69 | 70 | case '-': 71 | c = *IN; 72 | switch (c) { 73 | case '\0': 74 | previous_num = *(stack_top++) - previous_num; 75 | done(previous_num); 76 | break; 77 | 78 | case ' ': 79 | previous_num = *(stack_top++) - previous_num; 80 | break; 81 | 82 | case '0' ... '9': 83 | *(--stack_top) = previous_num; 84 | previous_num = parse_int(c, true); 85 | break; 86 | 87 | default: 88 | __builtin_unreachable(); 89 | break; 90 | } 91 | break; 92 | 93 | case '0' ... '9': 94 | *(--stack_top) = previous_num; 95 | previous_num = parse_int(c, false); 96 | break; 97 | 98 | case '<': 99 | previous_num = (uint16_t)(((uint64_t)*(stack_top++)) << previous_num); 100 | *IN; 101 | *IN; 102 | break; 103 | 104 | case '>': 105 | previous_num = (uint16_t)(((uint64_t)*(stack_top++)) >> previous_num); 106 | *IN; 107 | *IN; 108 | break; 109 | 110 | case '^': 111 | previous_num = *(stack_top++) ^ previous_num; 112 | *IN; 113 | break; 114 | 115 | case '|': 116 | previous_num = *(stack_top++) | previous_num; 117 | *IN; 118 | break; 119 | 120 | default: 121 | __builtin_unreachable(); 122 | break; 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /tc/calibrating-laser-cannons-2.data: -------------------------------------------------------------------------------- 1 | ; cjump 2 | 3 | ; 0x00 4 | 20 ; 0x00: end 5 | 0 6 | 7 | ; 0x10 8 | 0 9 | 10 | ; 0x20 11 | 0 12 | 60 ; 0x26: op_and 13 | 0 14 | 72 ; 0x2b: op_add 15 | 84 ; 0x2d: hyphen 16 | 0 17 | 18 | ; 0x30 19 | 24 ; 0x30, 0x31: parse_int 20 | 24 ; 0x32, 0x33: parse_int 21 | 24 ; 0x34, 0x35: parse_int 22 | 24 ; 0x36, 0x37: parse_int 23 | 24 ; 0x38, 0x39: parse_int 24 | 0 25 | 104 ; 0x3c: op_lsl 26 | 120 ; 0x3e: op_lsr 27 | 28 | ; 0x40 29 | 0 30 | 31 | ; 0x50 32 | 0 33 | 136 ; 0x5e: op_xor 34 | 35 | ; 0x60 36 | 0 37 | 38 | ; 0x70 39 | 0 40 | 148 ; 0x7c: op_or 41 | -------------------------------------------------------------------------------- /tc/calibrating-laser-cannons.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lbu a0, 0(fp) 3 | sh1add a0, a0, a0 4 | slli a0, a0, 1 5 | sb a0, 0(fp) 6 | -------------------------------------------------------------------------------- /tc/calibrating-laser-cannons.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t x = *IO; 7 | *IO = x * 6; 8 | 9 | __builtin_unreachable(); 10 | } 11 | -------------------------------------------------------------------------------- /tc/conditional-jumps.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | li a2, 37 3 | 4 | # loop 5 | lbu a0, 0(fp) 6 | addi a1, a1, 1 7 | bne a0, a2, -4 # loop 8 | 9 | sb a1, 0(fp) 10 | -------------------------------------------------------------------------------- /tc/conditional-jumps.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t count = 0; 7 | while (true) { 8 | uint8_t n = *IO; 9 | count++; 10 | if (n == 37) { 11 | break; 12 | } 13 | } 14 | *IO = count; 15 | } 16 | -------------------------------------------------------------------------------- /tc/dancing-machine.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lbu a0, 0(fp) 3 | 4 | # loop 5 | srli a1, a0, 1 6 | xor a1, a0, a1 7 | slli a2, a1, 1 8 | zext.b a2, a2 9 | xor a2, a1, a2 10 | srli a0, a2, 2 11 | xor a0, a0, a2 12 | andi a1, a0, 0b11 13 | sb a1, 0(fp) 14 | j -26 # loop 15 | -------------------------------------------------------------------------------- /tc/dancing-machine.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t state = *IO; 7 | 8 | while (true) { 9 | uint8_t temp1 = state ^ (state >> 1); 10 | uint8_t temp2 = temp1 ^ (temp1 << 1); 11 | state = temp2 ^ (temp2 >> 2); 12 | *IO = state % 4; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tc/delicious-order.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | li sp, 0xff # end of array 3 | mv a0, sp # one before array start 4 | addi a1, sp, -15 5 | 6 | # next_order 7 | mv a2, a0 8 | lbu a3, 0(fp) 9 | # find_insertion_pos 10 | beq a2, sp, 16 # found_insertion_pos 11 | lbu a4, 1(a2) 12 | bgeu a4, a3, 10 # found_insertion_pos 13 | sb a4, 0(a2) 14 | addi a2, a2, 1 15 | j -14 # find_insertion_pos 16 | # found_insertion_pos 17 | sb a3, 0(a2) 18 | addi a0, a0, -1 19 | bne a0, a1, -24 # next_order 20 | 21 | # prev_order 22 | lbu a1, 1(a0) 23 | sb a1, 0(fp) 24 | addi a0, a0, 1 25 | j -6 # prev_order 26 | -------------------------------------------------------------------------------- /tc/delicious-order.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t scores[15]; 7 | for (int i = 0; i < sizeof(scores) / sizeof(scores[0]); i++) { 8 | uint8_t new = *IO; 9 | 10 | int j; 11 | for (j = i; j > 0; j--) { 12 | if (scores[j - 1] <= new) { 13 | break; 14 | } 15 | 16 | scores[j] = scores[j - 1]; 17 | } 18 | 19 | scores[j] = new; 20 | } 21 | 22 | for (int i = 0; i < sizeof(scores) / sizeof(scores[0]); i++) { 23 | *IO = scores[i]; 24 | } 25 | 26 | __builtin_unreachable(); 27 | } 28 | -------------------------------------------------------------------------------- /tc/divide.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lbu a0, 0(fp) # numerator 3 | lbu a1, 0(fp) # denominator 4 | slli a1, a1, 7 5 | li a4, 7 6 | j 6 # loop2 7 | 8 | # loop 9 | srli a1, a1, 1 10 | addi a4, a4, -1 11 | # loop2 12 | sltu a3, a0, a1 # a3 = (numerator < denominator) ? 1 : 0 13 | czero.nez a5, a1, a3 # a5 = (numerator < denominator) ? 0 : denominator 14 | sub a0, a0, a5 15 | binvi a3, a3, 0 # a3 = (numerator >= denominator) ? 1 : 0 16 | sh1add a2, a2, a3 17 | bgtz a4, -22 # loop 18 | 19 | sb a2, 0(fp) # quotient 20 | sb a0, 0(fp) # remainder 21 | -------------------------------------------------------------------------------- /tc/divide.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint16_t numerator = *IO; 7 | uint16_t denominator = *IO << 7; 8 | uint8_t quotient = 0; 9 | 10 | for (int8_t i = 7; ; i--) { 11 | if (numerator >= denominator) { 12 | numerator -= denominator; 13 | quotient |= 1; 14 | } 15 | 16 | if (i == 0) { 17 | break; 18 | } 19 | 20 | quotient <<= 1; 21 | denominator >>= 1; 22 | } 23 | 24 | *IO = quotient; 25 | *IO = numerator; 26 | 27 | __builtin_unreachable(); 28 | } 29 | -------------------------------------------------------------------------------- /tc/masking-time.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lbu a0, 0(fp) 3 | andi a0, a0, 0b11 4 | sb a0, 0(fp) 5 | -------------------------------------------------------------------------------- /tc/masking-time.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t x = *IO; 7 | *IO = x & 3; 8 | 9 | __builtin_unreachable(); 10 | } 11 | -------------------------------------------------------------------------------- /tc/maze.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | # s1 = LEFT = 0 3 | # a1 = FORWARD = 1 4 | li a1, 1 5 | # a2 = RIGHT = 2 6 | li a2, 2 7 | # a4 = USE = 4 8 | li a4, 4 9 | 10 | # loop 11 | sb s1, 0(fp) 12 | 13 | # check_and_turn_right 14 | sb a4, 0(fp) 15 | lbu a3, 0(fp) 16 | beqz a3, 6 # done_turning_right 17 | sb a2, 0(fp) 18 | j -8 # check_and_turn_right 19 | 20 | # done_turning_right 21 | sb a1, 0(fp) 22 | j -14 # loop 23 | -------------------------------------------------------------------------------- /tc/maze.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | const uint8_t LEFT = 0; 6 | const uint8_t FORWARD = 1; 7 | const uint8_t RIGHT = 2; 8 | const uint8_t USE = 4; 9 | 10 | const uint8_t NOTHING = 0; 11 | 12 | int main(void) { 13 | while (true) { 14 | *IO = LEFT; 15 | 16 | while (true) { 17 | *IO = USE; 18 | 19 | if (*IO == NOTHING) { 20 | break; 21 | } 22 | 23 | *IO = RIGHT; 24 | } 25 | 26 | *IO = FORWARD; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tc/maze_save-breaker.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | 3 | li a1, 1 # FORWARD 4 | sb a1, 0(fp) 5 | 6 | li a2, 2 # RIGHT 7 | 8 | # loop 9 | sb a2, 0(fp) 10 | 11 | # loop2 12 | lbu a0, 0(fp) 13 | sub a3, a1, a0 # EMPTY -> FORWARD, WALL -> LEFT 14 | sb a3, 0(fp) 15 | bnez a0, -8 # loop2 16 | j -12 # loop 17 | -------------------------------------------------------------------------------- /tc/planet-names.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | li a1, 32 # CASE_DIFF, SPACE 3 | li a2, 1 # capitalize 4 | 5 | # loop 6 | lbu a0, 0(fp) 7 | czero.eqz a4, a1, a2 # a4 = capitalize ? CASE_DIFF : 0 8 | sub a3, a0, a4 # c2 = c - a4 9 | sb a3, 0(fp) 10 | sub a3, a0, a1 # a3 = c - SPACE 11 | seqz a2, a3 # capitalize = c == SPACE 12 | j -20 # loop 13 | -------------------------------------------------------------------------------- /tc/planet-names.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | static const uint8_t SPACE = ' '; 6 | static const uint8_t CASE_DIFF = 'a' - 'A'; 7 | 8 | int main(void) { 9 | bool capitalize = true; 10 | 11 | while (true) { 12 | uint8_t c = *IO; 13 | 14 | if (c == SPACE) { 15 | capitalize = true; 16 | } 17 | else if (capitalize) { 18 | c -= CASE_DIFF; 19 | capitalize = false; 20 | } 21 | 22 | *IO = c; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /tc/random-number-generator.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lhu a0, 0(fp) 3 | 4 | # loop 5 | srli a1, a0, 7 6 | xor a0, a0, a1 7 | slli a1, a0, 9 8 | xor a0, a0, a1 9 | zext.h a0, a0 10 | srli a1, a0, 8 11 | xor a0, a0, a1 12 | sh a0, 0(fp) 13 | j -22 # loop 14 | -------------------------------------------------------------------------------- /tc/random-number-generator.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint64_t* const IO = (volatile uint64_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint16_t state = *IO; 7 | 8 | while (true) { 9 | uint16_t temp1 = state ^ (state >> 7); 10 | uint16_t temp2 = temp1 ^ (temp1 << 9); 11 | state = temp2 ^ (temp2 >> 8); 12 | *IO = state; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tc/spacial-invasion.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | li a0, 5 # shoot 3 | sb a0, 0(fp) 4 | li a1, 1 # forward 5 | li a2, 3 # enjoy 6 | 7 | # loop 8 | sb a1, 0(fp) 9 | lbu a1, 0(fp) 10 | snez a1, a1 # a1 = (a1 == 0) ? 3 : 5 11 | sh1add a1, a1, a2 # = (a1 != 0) * 2 + 3 12 | j -12 # loop 13 | -------------------------------------------------------------------------------- /tc/spacial-invasion.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | static const uint8_t FORWARD = 1; 6 | static const uint8_t ENJOY = 3; 7 | static const uint8_t SHOOT = 5; 8 | 9 | int main(void) { 10 | *IO = SHOOT; 11 | *IO = FORWARD; 12 | while (true) { 13 | uint8_t current = *IO; 14 | if (__builtin_expect(current, 0) > 0) { 15 | *IO = SHOOT; 16 | } 17 | else { 18 | *IO = ENJOY; 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /tc/storage-cracker.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | 3 | li a0, 0x80 # guess 4 | li a1, 0x80 # mask 5 | 6 | # loop 7 | sb a0, 0(fp) 8 | 9 | # 0 => too high, next guess should be lower 10 | # 1 => too low, next guess should be higher 11 | lbu a2, 0(fp) 12 | 13 | czero.eqz a2, a1, a2 # a0 = a2 ? (a0 ^ a1) : a0 14 | xor a0, a0, a2 # = a0 ^ (a2 ? a1 : 0) 15 | srli a1, a1, 1 16 | or a0, a0, a1 17 | 18 | j -14 # loop 19 | -------------------------------------------------------------------------------- /tc/storage-cracker.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t guess = 0x80; 7 | uint8_t mask = 0x80; 8 | 9 | for (;;) { 10 | *IO = guess; 11 | if (*IO == 1) { 12 | guess ^= mask; 13 | } 14 | mask >>= 1; 15 | guess |= mask; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tc/sv/booth_multiplier.sv: -------------------------------------------------------------------------------- 1 | /* 2 | 00 => P = (P ) >> 1 3 | 01 => P = (P + A) >> 1 4 | 10 => P = (P - A) >> 1 5 | 11 => P = (P ) >> 1 6 | 7 | 8 | 000 => P = ((P ) >> 1 ) >> 1 9 | 001 => P = ((P + A) >> 1 ) >> 1 10 | 010 => P = ((P + A) >> 1 ) >> 1 11 | 011 => P = ((P ) >> 1 + A) >> 1 12 | 100 => P = ((P ) >> 1 - A) >> 1 13 | 101 => P = ((P - A) >> 1 ) >> 1 14 | 110 => P = ((P - A) >> 1 ) >> 1 15 | 111 => P = ((P ) >> 1 ) >> 1 16 | */ 17 | 18 | module booth_multiplier #( 19 | parameter width = 64 20 | ) ( 21 | input bit[width - 1:0] m, 22 | input bit m_is_signed, 23 | input bit[width - 1:0] r, 24 | input bit r_is_signed, 25 | 26 | output bit[width - 1:0] mul, 27 | output bit[width - 1:0] mulh 28 | ); 29 | bit[width + 2 - 1:0] a; 30 | assign a = {{2{m_is_signed & m[width - 1]}}, m}; 31 | 32 | bit[width + 2 + width + 1 + 1 - 1:0] p; 33 | 34 | assign mul = p[1+:width]; 35 | assign mulh = p[width + 1+:width]; 36 | 37 | always_comb begin 38 | p = {{(width + 2){r[0]}} & -a, r_is_signed & r[width - 1], r, 1'b0}; 39 | p = unsigned'(signed'(p) >>> 1); 40 | 41 | for (int i = 0; i < width; i = i + 2) begin 42 | unique case (p[0+:3]) 43 | 3'b000: begin 44 | p = unsigned'(signed'(p) >>> 2); 45 | end 46 | 3'b001, 47 | 3'b010: begin 48 | p[width + 1 + 1+:width + 2] += a; 49 | p = unsigned'(signed'(p) >>> 2); 50 | end 51 | 3'b011: begin 52 | p = unsigned'(signed'(p) >>> 1); 53 | p[width + 1 + 1+:width + 2] += a; 54 | p = unsigned'(signed'(p) >>> 1); 55 | end 56 | 3'b100: begin 57 | p = unsigned'(signed'(p) >>> 1); 58 | p[width + 1 + 1+:width + 2] -= a; 59 | p = unsigned'(signed'(p) >>> 1); 60 | end 61 | 3'b101, 62 | 3'b110: begin 63 | p[width + 1 + 1+:width + 2] -= a; 64 | p = unsigned'(signed'(p) >>> 2); 65 | end 66 | 3'b111: begin 67 | p = unsigned'(signed'(p) >>> 2); 68 | end 69 | endcase 70 | end 71 | end 72 | endmodule 73 | 74 | `ifdef TESTING 75 | module test_booth_multiplier; 76 | bit[64 - 1:0] m; 77 | bit m_is_signed; 78 | bit[64 - 1:0] r; 79 | bit r_is_signed; 80 | wire[64 - 1:0] mul; 81 | wire[64 - 1:0] mulh; 82 | booth_multiplier #(.width(64)) booth_multiplier_module ( 83 | m, m_is_signed, 84 | r, r_is_signed, 85 | mul, mulh 86 | ); 87 | 88 | `define test_case(m_, m_is_signed_, r_, r_is_signed_, expected_mul, expected_mulh) begin \ 89 | m = m_; \ 90 | m_is_signed = m_is_signed_; \ 91 | r = r_; \ 92 | r_is_signed = r_is_signed_; \ 93 | #1 \ 94 | assert(mul == expected_mul) else $fatal; \ 95 | assert(mulh == expected_mulh) else $fatal; \ 96 | end 97 | 98 | initial begin 99 | `test_case(64'h0000000000000000, '0, 64'h0000000000000000, '0, 64'h0000000000000000, 64'h0000000000000000) 100 | 101 | `test_case(64'h0000000000000001, '0, 64'h0000000000000001, '0, 64'h0000000000000001, 64'h0000000000000000) 102 | 103 | `test_case(64'hffffffffffffffff, '0, 64'hffffffffffffffff, '0, 64'h0000000000000001, 64'hfffffffffffffffe) 104 | `test_case(64'hffffffffffffffff, '0, 64'hffffffffffffffff, '1, 64'h0000000000000001, 64'hffffffffffffffff) 105 | `test_case(64'hffffffffffffffff, '1, 64'hffffffffffffffff, '0, 64'h0000000000000001, 64'hffffffffffffffff) 106 | `test_case(64'hffffffffffffffff, '1, 64'hffffffffffffffff, '1, 64'h0000000000000001, 64'h0000000000000000) 107 | 108 | `test_case(64'ha0b6b8129b5bdfd9, '0, 64'hbcba1c1981093535, '0, 64'h2aff503c66fe44ed, 64'h767b059366983688) 109 | `test_case(64'ha0b6b8129b5bdfd9, '0, 64'hbcba1c1981093535, '1, 64'h2aff503c66fe44ed, 64'hd5c44d80cb3c56af) 110 | `test_case(64'ha0b6b8129b5bdfd9, '1, 64'hbcba1c1981093535, '0, 64'h2aff503c66fe44ed, 64'hb9c0e979e58f0153) 111 | `test_case(64'ha0b6b8129b5bdfd9, '1, 64'hbcba1c1981093535, '1, 64'h2aff503c66fe44ed, 64'h190a31674a33217a) 112 | 113 | `test_case(64'hbcba1c1981093535, '0, 64'ha0b6b8129b5bdfd9, '0, 64'h2aff503c66fe44ed, 64'h767b059366983688) 114 | `test_case(64'hbcba1c1981093535, '0, 64'ha0b6b8129b5bdfd9, '1, 64'h2aff503c66fe44ed, 64'hb9c0e979e58f0153) 115 | `test_case(64'hbcba1c1981093535, '1, 64'ha0b6b8129b5bdfd9, '0, 64'h2aff503c66fe44ed, 64'hd5c44d80cb3c56af) 116 | `test_case(64'hbcba1c1981093535, '1, 64'ha0b6b8129b5bdfd9, '1, 64'h2aff503c66fe44ed, 64'h190a31674a33217a) 117 | end 118 | endmodule 119 | `endif 120 | -------------------------------------------------------------------------------- /tc/sv/booth_multiplier_multi_cycle.sv: -------------------------------------------------------------------------------- 1 | /* 2 | 00 => P = (P ) >> 1 3 | 01 => P = (P + A) >> 1 4 | 10 => P = (P - A) >> 1 5 | 11 => P = (P ) >> 1 6 | 7 | 8 | 000 => P = ((P ) >> 1 ) >> 1 9 | 001 => P = ((P + A) >> 1 ) >> 1 10 | 010 => P = ((P + A) >> 1 ) >> 1 11 | 011 => P = ((P ) >> 1 + A) >> 1 12 | 100 => P = ((P ) >> 1 - A) >> 1 13 | 101 => P = ((P - A) >> 1 ) >> 1 14 | 110 => P = ((P - A) >> 1 ) >> 1 15 | 111 => P = ((P ) >> 1 ) >> 1 16 | */ 17 | 18 | module booth_multiplier_multi_cycle #( 19 | parameter width = 64, 20 | localparam i_width = $clog2(width / 2 + 1) 21 | ) ( 22 | input bit clock, 23 | input bit reset, 24 | 25 | input bit start, 26 | input bit[width - 1:0] m, 27 | input bit m_is_signed, 28 | input bit[width - 1:0] r, 29 | input bit r_is_signed, 30 | 31 | output bit mulw_busy, 32 | output bit[width - 1:0] mulw, 33 | output bit mul_busy, 34 | output bit[width - 1:0] mul, 35 | output bit[width - 1:0] mulh 36 | ); 37 | bit[i_width - 1:0] i; 38 | 39 | bit[width + 1 + width + 1 + 1 - 1:0] p; 40 | bit[width + 1 + width + 1 + 1 - 1:0] next_p; 41 | 42 | multiplier_round #(.width(width)) multiplier_round_module ( 43 | m, m_is_signed, 44 | r, r_is_signed, 45 | i == '0, p, 46 | mulw, mul, mulh, 47 | next_p 48 | ); 49 | 50 | assign mulw_busy = start & (i < i_width'({1'b1, {(i_width - 2){1'b0}}})); 51 | assign mul_busy = start & (i < {1'b1, {(i_width - 1){1'b0}}}); 52 | 53 | always_ff @(posedge clock) begin 54 | if (reset) begin 55 | i <= '0; 56 | p <= '0; 57 | end else begin 58 | i <= {i_width{mul_busy}} & (i + 1); 59 | p <= next_p; 60 | end 61 | end 62 | endmodule 63 | 64 | module multiplier_round #( 65 | parameter width = 64 66 | ) ( 67 | input bit[width - 1:0] m, 68 | input bit m_is_signed, 69 | input bit[width - 1:0] r, 70 | input bit r_is_signed, 71 | 72 | input bit first_round, 73 | input bit[width + 1 + width + 1 + 1 - 1:0] p, 74 | 75 | output bit[width - 1:0] mulw, 76 | output bit[width - 1:0] mul, 77 | output bit[width - 1:0] mulh, 78 | output bit[width + 1 + width + 1 + 1 - 1:0] next_p 79 | ); 80 | bit[width - 1:0] next_p1; 81 | bit next_p2; 82 | bit[width - 1:0] next_p3; 83 | bit next_p4; 84 | bit next_p5; 85 | assign next_p = {next_p5, next_p4, next_p3, next_p2, next_p1}; 86 | 87 | bit[width - 1:0] p2; 88 | 89 | bit p_sub; 90 | wire[width - 1:0] m_maybe_neg = m ^ {width{p_sub}}; 91 | // ab ^ c = a(b ^ c) + a'c = a ? (b ^ c) : c 92 | // => m_sext = (m_is_signed & m[width - 1]) ^ p_sub = ... 93 | wire m_sext = m_is_signed ? m_maybe_neg[width - 1] : p_sub; 94 | wire[width + 2 - 1:0] p_plus; 95 | wire[width - 1:0] p_plus_inner_sum; 96 | wire p_plus_inner_cout; 97 | adder #(.width(width)) p_plus_inner_module ( 98 | p_sub, p2, m_maybe_neg, 99 | p_plus_inner_sum, p_plus_inner_cout 100 | ); 101 | assign p_plus = { 102 | // {2{p2[width - 1]}} + {2{m_sext}} + 2'(p_plus_inner_cout) 103 | (p2[width - 1] & m_sext) | 104 | (p2[width - 1] & ~p_plus_inner_cout) | 105 | (m_sext & ~p_plus_inner_cout), 106 | p2[width - 1] ^ m_sext ^ p_plus_inner_cout, 107 | p_plus_inner_sum 108 | }; 109 | 110 | always_comb begin 111 | mulw = 'x; 112 | mul = 'x; 113 | mulh = 'x; 114 | 115 | p_sub = 'x; 116 | p2 = 'x; 117 | 118 | if (first_round) begin 119 | next_p1 = r; 120 | next_p2 = r_is_signed & r[width - 1]; 121 | 122 | unique case (r[0]) 123 | 1'b0: begin 124 | next_p3 = '0; 125 | next_p4 = '0; 126 | next_p5 = '0; 127 | end 128 | 129 | 1'b1: begin 130 | p_sub = '1; 131 | p2 = '0; 132 | next_p3 = p_plus[0+:width]; 133 | next_p4 = p_plus[width]; 134 | next_p5 = p_plus[width + 1]; 135 | end 136 | endcase 137 | 138 | end else begin 139 | next_p1 = p[2+:width]; 140 | 141 | unique case (p[0+:2] ^ {2{p[2]}}) 142 | 2'b00: begin 143 | next_p2 = p[width + 2]; 144 | next_p3 = p[width + 3+:width]; 145 | next_p4 = p[width + width + 2]; 146 | next_p5 = p[width + width + 2]; 147 | end 148 | 149 | 2'b01, 150 | 2'b10: begin 151 | p_sub = p[2]; 152 | p2 = p[width + 2+:width]; 153 | next_p2 = p_plus[0]; 154 | next_p3 = p_plus[1+:width]; 155 | next_p4 = p_plus[width + 1]; 156 | next_p5 = p_plus[width + 1]; 157 | end 158 | 159 | 2'b11: begin 160 | p_sub = p[2]; 161 | p2 = p[width + 3+:width]; 162 | next_p2 = p[width + 2]; 163 | next_p3 = p_plus[0+:width]; 164 | next_p4 = p_plus[width]; 165 | next_p5 = p_plus[width + 1]; 166 | end 167 | endcase 168 | 169 | mulw = {{(width / 2 + 1){next_p2}}, next_p1[width / 2 + 1+:width / 2 - 1]}; 170 | mul = {next_p2, next_p1[1+:width - 1]}; 171 | mulh = next_p3; 172 | end 173 | end 174 | endmodule 175 | 176 | module adder #( 177 | parameter width = 64 178 | ) ( 179 | input bit cin, 180 | input bit[width - 1:0] a, 181 | input bit[width - 1:0] b, 182 | output bit[width -1:0] sum, 183 | output bit cout 184 | ); 185 | assign {cout, sum} = {1'b0, a} + {1'b0, b} + (width + 1)'(cin); 186 | endmodule 187 | 188 | `ifdef TESTING 189 | module test_booth_multiplier_multi_cycle; 190 | bit clock; 191 | bit reset; 192 | bit start; 193 | bit[64 - 1:0] m; 194 | bit m_is_signed; 195 | bit[64 - 1:0] r; 196 | bit r_is_signed; 197 | wire mulw_busy; 198 | wire[64 - 1:0] mulw; 199 | wire mul_busy; 200 | wire[64 - 1:0] mul; 201 | wire[64 - 1:0] mulh; 202 | booth_multiplier_multi_cycle #(.width(64)) booth_multiplier_multi_cycle_module ( 203 | clock, reset, 204 | start, 205 | m, m_is_signed, 206 | r, r_is_signed, 207 | mulw_busy, mulw, 208 | mul_busy, mul, mulh 209 | ); 210 | 211 | `define test_case(m_, m_is_signed_, r_, r_is_signed_, expected_mulw, expected_mul, expected_mulh) begin \ 212 | m = m_; \ 213 | m_is_signed = m_is_signed_; \ 214 | r = r_; \ 215 | r_is_signed = r_is_signed_; \ 216 | start = '1; \ 217 | #1 \ 218 | assert(mulw_busy == '1) else $fatal; \ 219 | assert(mul_busy == '1) else $fatal; \ 220 | \ 221 | for (int i = 0; i < 16; i++) begin \ 222 | assert(mulw_busy == '1) else $fatal; \ 223 | clock = '1; \ 224 | #1 \ 225 | clock = '0; \ 226 | #1 \ 227 | ; \ 228 | end \ 229 | assert(mulw_busy == '0) else $fatal; \ 230 | assert(mulw == expected_mulw) else $fatal; \ 231 | assert(mul_busy == '1) else $fatal; \ 232 | \ 233 | for (int i = 0; i < 16; i++) begin \ 234 | assert(mul_busy == '1) else $fatal; \ 235 | clock = '1; \ 236 | #1 \ 237 | clock = '0; \ 238 | #1 \ 239 | ; \ 240 | end \ 241 | assert(mul_busy == '0) else $fatal; \ 242 | assert(mul == expected_mul) else $fatal; \ 243 | assert(mulh == expected_mulh) else $fatal; \ 244 | \ 245 | start = '0; \ 246 | #1 \ 247 | clock = '1; \ 248 | #1 \ 249 | clock = '0; \ 250 | end 251 | 252 | initial begin 253 | clock = '0; 254 | reset = '0; 255 | start = '0; 256 | m = 64'h0000000000000000; 257 | m_is_signed = '0; 258 | r = 64'h0000000000000000; 259 | r_is_signed = '0; 260 | 261 | reset = '1; 262 | #1 263 | clock = '1; 264 | #1 265 | reset = '0; 266 | #1 267 | clock = '0; 268 | #1 269 | 270 | assert(mulw_busy == '0) else $fatal; 271 | assert(mul_busy == '0) else $fatal; 272 | 273 | `test_case(64'h0000000000000000, '0, 64'h0000000000000000, '0, 64'h0000000000000000, 64'h0000000000000000, 64'h0000000000000000) 274 | 275 | `test_case(64'h0000000000000001, '0, 64'h0000000000000001, '0, 64'h0000000000000001, 64'h0000000000000001, 64'h0000000000000000) 276 | 277 | `test_case(64'hffffffffffffffff, '0, 64'hffffffffffffffff, '0, 64'h0000000000000001, 64'h0000000000000001, 64'hfffffffffffffffe) 278 | `test_case(64'hffffffffffffffff, '0, 64'hffffffffffffffff, '1, 64'h0000000000000001, 64'h0000000000000001, 64'hffffffffffffffff) 279 | `test_case(64'hffffffffffffffff, '1, 64'hffffffffffffffff, '0, 64'h0000000000000001, 64'h0000000000000001, 64'hffffffffffffffff) 280 | `test_case(64'hffffffffffffffff, '1, 64'hffffffffffffffff, '1, 64'h0000000000000001, 64'h0000000000000001, 64'h0000000000000000) 281 | 282 | `test_case(64'ha0b6b8129b5bdfd9, '0, 64'hbcba1c1981093535, '0, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'h767b059366983688) 283 | `test_case(64'ha0b6b8129b5bdfd9, '0, 64'hbcba1c1981093535, '1, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'hd5c44d80cb3c56af) 284 | `test_case(64'ha0b6b8129b5bdfd9, '1, 64'hbcba1c1981093535, '0, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'hb9c0e979e58f0153) 285 | `test_case(64'ha0b6b8129b5bdfd9, '1, 64'hbcba1c1981093535, '1, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'h190a31674a33217a) 286 | 287 | `test_case(64'hbcba1c1981093535, '0, 64'ha0b6b8129b5bdfd9, '0, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'h767b059366983688) 288 | `test_case(64'hbcba1c1981093535, '0, 64'ha0b6b8129b5bdfd9, '1, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'hb9c0e979e58f0153) 289 | `test_case(64'hbcba1c1981093535, '1, 64'ha0b6b8129b5bdfd9, '0, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'hd5c44d80cb3c56af) 290 | `test_case(64'hbcba1c1981093535, '1, 64'ha0b6b8129b5bdfd9, '1, 64'h0000000066fe44ed, 64'h2aff503c66fe44ed, 64'h190a31674a33217a) 291 | end 292 | endmodule 293 | `endif 294 | -------------------------------------------------------------------------------- /tc/sv/bww_multiplier.sv: -------------------------------------------------------------------------------- 1 | module bww_multiplier ( 2 | input bit[7:0] a, 3 | input bit a_is_signed, 4 | input bit[7:0] b, 5 | input bit b_is_signed, 6 | 7 | output bit[7:0] mul, 8 | output bit[7:0] mulh 9 | ); 10 | wire s0, c0; 11 | half_adder adder0(a[0] & b[1], a[1] & b[0], s0, c0); 12 | wire s1, c1; 13 | full_adder adder1 (a[0] & b[2], a[1] & b[1], a[2] & b[0], s1, c1); 14 | wire s2, c2; 15 | half_adder adder2(c0, s1, s2, c2); 16 | wire s3, c3; 17 | full_adder adder3 (a[0] & b[3], a[1] & b[2], a[2] & b[1], s3, c3); 18 | wire s4, c4; 19 | full_adder adder4 (a[3] & b[0], c1, s3, s4, c4); 20 | wire s5, c5; 21 | half_adder adder5(c2, s4, s5, c5); 22 | wire s6, c6; 23 | full_adder adder6 (a[0] & b[4], a[1] & b[3], a[2] & b[2], s6, c6); 24 | wire s7, c7; 25 | full_adder adder7 (a[3] & b[1], a[4] & b[0], c3, s7, c7); 26 | wire s8, c8; 27 | full_adder adder8 (s6, s7, c4, s8, c8); 28 | wire s9, c9; 29 | half_adder adder9(c5, s8, s9, c9); 30 | wire s10, c10; 31 | full_adder adder10 (a[0] & b[5], a[1] & b[4], a[2] & b[3], s10, c10); 32 | wire s11, c11; 33 | full_adder adder11 (a[3] & b[2], a[4] & b[1], a[5] & b[0], s11, c11); 34 | wire s12, c12; 35 | full_adder adder12 (c6, s10, s11, s12, c12); 36 | wire s13, c13; 37 | full_adder adder13 (c7, c8, s12, s13, c13); 38 | wire s14, c14; 39 | half_adder adder14(c9, s13, s14, c14); 40 | wire s15, c15; 41 | full_adder adder15 (a[0] & b[6], a[1] & b[5], a[2] & b[4], s15, c15); 42 | wire s16, c16; 43 | full_adder adder16 (a[3] & b[3], a[4] & b[2], a[5] & b[1], s16, c16); 44 | wire s17, c17; 45 | full_adder adder17 (a[6] & b[0], c10, c11, s17, c17); 46 | wire s18, c18; 47 | full_adder adder18 (s15, s16, c12, s18, c18); 48 | wire s19, c19; 49 | full_adder adder19 (s17, s18, c13, s19, c19); 50 | wire s20, c20; 51 | half_adder adder20(c14, s19, s20, c20); 52 | wire s21, c21; 53 | full_adder adder21 (a[0] & b[7], a[1] & b[6], a[2] & b[5], s21, c21); 54 | wire s22, c22; 55 | full_adder adder22 (a[3] & b[4], a[4] & b[3], a[5] & b[2], s22, c22); 56 | wire s23, c23; 57 | full_adder adder23 (a[6] & b[1], a[7] & b[0], c15, s23, c23); 58 | wire s24, c24; 59 | full_adder adder24 (c16, s21, s22, s24, c24); 60 | wire s25, c25; 61 | full_adder adder25 (s23, c17, s24, s25, c25); 62 | wire s26, c26; 63 | full_adder adder26 (c18, s25, c19, s26, c26); 64 | wire s27, c27; 65 | half_adder adder27(c20, s26, s27, c27); 66 | wire s28, c28; 67 | full_adder adder28 (a[1] & b[7], a[2] & b[6], a[3] & b[5], s28, c28); 68 | wire s29, c29; 69 | full_adder adder29 (a[4] & b[4], a[5] & b[3], a[6] & b[2], s29, c29); 70 | wire s30, c30; 71 | full_adder adder30 (a[7] & b[1], ~(a[0] & b[7] & b_is_signed), ~(a[7] & a_is_signed & b[0]), s30, c30); 72 | wire s31, c31; 73 | full_adder adder31 (c21, c22, s28, s31, c31); 74 | wire s32, c32; 75 | full_adder adder32 (s29, c23, s30, s32, c32); 76 | wire s33, c33; 77 | full_adder adder33 (c24, s31, s32, s33, c33); 78 | wire s34, c34; 79 | full_adder adder34 (c25, s33, c26, s34, c34); 80 | wire s35, c35; 81 | half_adder adder35(c27, s34, s35, c35); 82 | wire s36, c36; 83 | full_adder adder36 (a[2] & b[7], a[3] & b[6], a[4] & b[5], s36, c36); 84 | wire s37, c37; 85 | full_adder adder37 (a[5] & b[4], a[6] & b[3], a[7] & b[2], s37, c37); 86 | wire s38, c38; 87 | full_adder adder38 (~(a[1] & b[7] & b_is_signed), ~(a[7] & a_is_signed & b[1]), c28, s38, c38); 88 | wire s39, c39; 89 | full_adder adder39 (c29, c30, s36, s39, c39); 90 | wire s40, c40; 91 | full_adder adder40 (s37, s38, c31, s40, c40); 92 | wire s41, c41; 93 | full_adder adder41 (c32, s39, s40, s41, c41); 94 | wire s42, c42; 95 | full_adder adder42 (c33, s41, c34, s42, c42); 96 | wire s43, c43; 97 | half_adder_plus_one adder43 (c35, s42, s43, c43); 98 | wire s44, c44; 99 | full_adder adder44 (a[3] & b[7], a[4] & b[6], a[5] & b[5], s44, c44); 100 | wire s45, c45; 101 | full_adder adder45 (a[6] & b[4], a[7] & b[3], ~(a[2] & b[7] & b_is_signed), s45, c45); 102 | wire s46, c46; 103 | full_adder adder46 (~(a[7] & a_is_signed & b[2]), c36, c37, s46, c46); 104 | wire s47, c47; 105 | full_adder adder47 (s44, s45, c38, s47, c47); 106 | wire s48, c48; 107 | full_adder adder48 (c39, s46, c40, s48, c48); 108 | wire s49, c49; 109 | full_adder adder49 (s47, c41, s48, s49, c49); 110 | wire s50, c50; 111 | full_adder adder50 (s49, c42, c43, s50, c50); 112 | wire s51, c51; 113 | full_adder adder51 (a[4] & b[7], a[5] & b[6], a[6] & b[5], s51, c51); 114 | wire s52, c52; 115 | full_adder adder52 (a[7] & b[4], ~(a[3] & b[7] & b_is_signed), ~(a[7] & a_is_signed & b[3]), s52, c52); 116 | wire s53, c53; 117 | full_adder adder53 (c44, c45, s51, s53, c53); 118 | wire s54, c54; 119 | full_adder adder54 (s52, c46, c47, s54, c54); 120 | wire s55, c55; 121 | full_adder adder55 (s53, c48, s54, s55, c55); 122 | wire s56, c56; 123 | full_adder adder56 (c49, s55, c50, s56, c56); 124 | wire s57, c57; 125 | full_adder adder57 (a[5] & b[7], a[6] & b[6], a[7] & b[5], s57, c57); 126 | wire s58, c58; 127 | full_adder adder58 (~(a[4] & b[7] & b_is_signed), ~(a[7] & a_is_signed & b[4]), c51, s58, c58); 128 | wire s59, c59; 129 | full_adder adder59 (c52, s57, s58, s59, c59); 130 | wire s60, c60; 131 | full_adder adder60 (c53, s59, c54, s60, c60); 132 | wire s61, c61; 133 | full_adder adder61 (s60, c55, c56, s61, c61); 134 | wire s62, c62; 135 | full_adder adder62 (a[6] & b[7], a[7] & b[6], ~(a[5] & b[7] & b_is_signed), s62, c62); 136 | wire s63, c63; 137 | full_adder adder63 (~(a[7] & a_is_signed & b[5]), c57, s62, s63, c63); 138 | wire s64, c64; 139 | full_adder adder64 (c58, c59, s63, s64, c64); 140 | wire s65, c65; 141 | full_adder adder65 (c60, s64, c61, s65, c65); 142 | wire s66, c66; 143 | full_adder adder66 (a[7] & b[7], ~(a[6] & b[7] & b_is_signed), ~(a[7] & a_is_signed & b[6]), s66, c66); 144 | wire s67, c67; 145 | full_adder adder67 (c62, s66, c63, s67, c67); 146 | wire s68, c68; 147 | full_adder adder68 (s67, c64, c65, s68, c68); 148 | assign {mulh, mul} = { 149 | ((((a[7] & b[7] & b_is_signed) ^ (a[7] & a_is_signed & b[7])) ^ (c66)) ^ (c67)) ^ (c68), 150 | s68, 151 | s65, 152 | s61, 153 | s56, 154 | s50, 155 | s43, 156 | s35, 157 | s27, 158 | s20, 159 | s14, 160 | s9, 161 | s5, 162 | s2, 163 | s0, 164 | a[0] & b[0] 165 | }; 166 | endmodule 167 | 168 | module half_adder ( 169 | input bit a, 170 | input bit b, 171 | output bit sum, 172 | output bit carry 173 | ); 174 | assign {carry, sum} = {1'b0, a} + {1'b0, b}; 175 | endmodule 176 | 177 | module half_adder_plus_one ( 178 | input bit a, 179 | input bit b, 180 | output bit sum, 181 | output bit carry 182 | ); 183 | assign {carry, sum} = {1'b0, a} + {1'b0, b} + 2'b01; 184 | endmodule 185 | 186 | module full_adder ( 187 | input bit a, 188 | input bit b, 189 | input bit c, 190 | output bit sum, 191 | output bit carry 192 | ); 193 | assign {carry, sum} = {1'b0, a} + {1'b0, b} + {1'b0, c}; 194 | endmodule 195 | 196 | `ifdef TESTING 197 | module test_bww_multiplier; 198 | bit[7:0] a; 199 | bit a_is_signed; 200 | bit[7:0] b; 201 | bit b_is_signed; 202 | wire[7:0] mul; 203 | wire[7:0] mulh; 204 | bww_multiplier bww_multiplier_module ( 205 | a, a_is_signed, 206 | b, b_is_signed, 207 | mul, mulh 208 | ); 209 | 210 | initial begin 211 | a = -8'd1; 212 | a_is_signed = '0; 213 | b = -8'd1; 214 | b_is_signed = '0; 215 | #1 216 | assert(mul == 8'd1) else $fatal; 217 | assert(mulh == -8'd2) else $fatal; 218 | 219 | a = -8'd1; 220 | a_is_signed = '1; 221 | b = -8'd1; 222 | b_is_signed = '0; 223 | #1 224 | assert(mul == 8'd1) else $fatal; 225 | assert(mulh == -8'd1) else $fatal; 226 | 227 | a = -8'd1; 228 | a_is_signed = '0; 229 | b = -8'd1; 230 | b_is_signed = '1; 231 | #1 232 | assert(mul == 8'd1) else $fatal; 233 | assert(mulh == -8'd1) else $fatal; 234 | 235 | a = -8'd1; 236 | a_is_signed = '1; 237 | b = -8'd1; 238 | b_is_signed = '1; 239 | #1 240 | assert(mul == 8'd1) else $fatal; 241 | assert(mulh == 8'd0) else $fatal; 242 | end 243 | endmodule 244 | `endif 245 | -------------------------------------------------------------------------------- /tc/sv/load32.sv: -------------------------------------------------------------------------------- 1 | module load32 ( 2 | input bit[1:0] address, 3 | input bit[2:0] funct3, 4 | input bit[31:0] ram_load_value, 5 | input bit[31:0] store_value, 6 | 7 | output bit efault, 8 | output logic[31:0] load_value, 9 | output logic[31:0] ram_store_value 10 | ); 11 | bit[31:0] store_mask; 12 | 13 | always_comb begin 14 | unique case (funct3[0+:2]) 15 | 2'b00: efault = '0; 16 | 2'b01: efault = address[0]; 17 | 2'b10: efault = (| address) | funct3[2]; 18 | 2'b11: efault = '1; 19 | endcase 20 | 21 | if (efault) begin 22 | load_value = 'x; 23 | store_mask = 'x; 24 | ram_store_value = 'x; 25 | 26 | end else begin 27 | load_value = ram_load_value >> {address, 3'b000}; 28 | unique case (funct3[0+:2]) 29 | 2'b00: load_value[8+:24] = {24{~funct3[2] & load_value[7]}}; 30 | 2'b01: load_value[16+:16] = {16{~funct3[2] & load_value[15]}}; 31 | 2'b10, 32 | 2'b11: ; 33 | endcase 34 | 35 | if (funct3[2]) begin 36 | store_mask = 'x; 37 | ram_store_value = 'x; 38 | 39 | end else begin 40 | store_mask = { 41 | {16{funct3[1]}}, // lw 42 | {8{| funct3[0+:2]}}, // lh(u), lw 43 | {8{1'b1}} // lb(u), lh(u), lw 44 | } << {address, 3'b000}; 45 | 46 | ram_store_value = 47 | (ram_load_value & ~store_mask) | 48 | ((store_value << {address, 3'b000}) & store_mask); 49 | end 50 | end 51 | end 52 | endmodule 53 | 54 | `ifdef TESTING 55 | module test_load32; 56 | bit[1:0] address; 57 | bit[2:0] funct3; 58 | bit[31:0] ram_load_value; 59 | bit[31:0] store_value; 60 | wire efault; 61 | wire[31:0] load_value; 62 | wire[31:0] ram_store_value; 63 | load32 load32_module ( 64 | address, 65 | funct3, 66 | ram_load_value, 67 | store_value, 68 | efault, 69 | load_value, 70 | ram_store_value 71 | ); 72 | 73 | initial begin 74 | ram_load_value = 32'h456789ab; 75 | store_value = 32'hffffffff; 76 | 77 | // lb 78 | funct3 = 3'b000; 79 | 80 | address = 2'b00; 81 | #1 82 | assert(efault == 1'b0) else $fatal; 83 | assert(load_value == 32'hffffffab) else $fatal; 84 | assert(ram_store_value == 32'h456789ff) else $fatal; 85 | 86 | address = 2'b01; 87 | #1 88 | assert(efault == 1'b0) else $fatal; 89 | assert(load_value == 32'hffffff89) else $fatal; 90 | assert(ram_store_value == 32'h4567ffab) else $fatal; 91 | 92 | address = 2'b10; 93 | #1 94 | assert(efault == 1'b0) else $fatal; 95 | assert(load_value == 32'h00000067) else $fatal; 96 | assert(ram_store_value == 32'h45ff89ab) else $fatal; 97 | 98 | address = 2'b11; 99 | #1 100 | assert(efault == 1'b0) else $fatal; 101 | assert(load_value == 32'h00000045) else $fatal; 102 | assert(ram_store_value == 32'hff6789ab) else $fatal; 103 | 104 | // lbu 105 | funct3 = 3'b100; 106 | 107 | address = 2'b00; 108 | #1 109 | assert(efault == 1'b0) else $fatal; 110 | assert(load_value == 32'h000000ab) else $fatal; 111 | 112 | address = 2'b01; 113 | #1 114 | assert(efault == 1'b0) else $fatal; 115 | assert(load_value == 32'h00000089) else $fatal; 116 | 117 | address = 2'b10; 118 | #1 119 | assert(efault == 1'b0) else $fatal; 120 | assert(load_value == 32'h00000067) else $fatal; 121 | 122 | address = 2'b11; 123 | #1 124 | assert(efault == 1'b0) else $fatal; 125 | assert(load_value == 32'h00000045) else $fatal; 126 | 127 | // lh 128 | funct3 = 3'b001; 129 | 130 | address = 2'b00; 131 | #1 132 | assert(efault == 1'b0) else $fatal; 133 | assert(load_value == 32'hffff89ab) else $fatal; 134 | assert(ram_store_value == 32'h4567ffff) else $fatal; 135 | 136 | address = 2'b01; 137 | #1 138 | assert(efault == 1'b1) else $fatal; 139 | 140 | address = 2'b10; 141 | #1 142 | assert(efault == 1'b0) else $fatal; 143 | assert(load_value == 32'h00004567) else $fatal; 144 | assert(ram_store_value == 32'hffff89ab) else $fatal; 145 | 146 | address = 2'b11; 147 | #1 148 | assert(efault == 1'b1) else $fatal; 149 | 150 | // lhu 151 | funct3 = 3'b101; 152 | 153 | address = 2'b00; 154 | #1 155 | assert(efault == 1'b0) else $fatal; 156 | assert(load_value == 32'h000089ab) else $fatal; 157 | 158 | address = 2'b01; 159 | #1 160 | assert(efault == 1'b1) else $fatal; 161 | 162 | address = 2'b10; 163 | #1 164 | assert(efault == 1'b0) else $fatal; 165 | assert(load_value == 32'h00004567) else $fatal; 166 | 167 | address = 2'b11; 168 | #1 169 | assert(efault == 1'b1) else $fatal; 170 | 171 | // lw 172 | funct3 = 3'b010; 173 | 174 | address = 2'b00; 175 | #1 176 | assert(efault == 1'b0) else $fatal; 177 | assert(load_value == 32'h456789ab) else $fatal; 178 | assert(ram_store_value == 32'hffffffff) else $fatal; 179 | 180 | address = 2'b01; 181 | #1 182 | assert(efault == 1'b1) else $fatal; 183 | 184 | address = 2'b10; 185 | #1 186 | assert(efault == 1'b1) else $fatal; 187 | 188 | address = 2'b11; 189 | #1 190 | assert(efault == 1'b1) else $fatal; 191 | 192 | // lwu 193 | funct3 = 3'b110; 194 | 195 | address = 2'b00; 196 | #1 197 | assert(efault == 1'b1) else $fatal; 198 | 199 | address = 2'b01; 200 | #1 201 | assert(efault == 1'b1) else $fatal; 202 | 203 | address = 2'b10; 204 | #1 205 | assert(efault == 1'b1) else $fatal; 206 | 207 | address = 2'b11; 208 | #1 209 | assert(efault == 1'b1) else $fatal; 210 | 211 | // ld 212 | funct3 = 3'b011; 213 | 214 | address = 2'b00; 215 | #1 216 | assert(efault == 1'b1) else $fatal; 217 | 218 | address = 2'b01; 219 | #1 220 | assert(efault == 1'b1) else $fatal; 221 | 222 | address = 2'b10; 223 | #1 224 | assert(efault == 1'b1) else $fatal; 225 | 226 | address = 2'b11; 227 | #1 228 | assert(efault == 1'b1) else $fatal; 229 | 230 | // ldu 231 | funct3 = 3'b111; 232 | 233 | address = 2'b00; 234 | #1 235 | assert(efault == 1'b1) else $fatal; 236 | 237 | address = 2'b01; 238 | #1 239 | assert(efault == 1'b1) else $fatal; 240 | 241 | address = 2'b10; 242 | #1 243 | assert(efault == 1'b1) else $fatal; 244 | 245 | address = 2'b11; 246 | #1 247 | assert(efault == 1'b1) else $fatal; 248 | end 249 | endmodule 250 | `endif 251 | -------------------------------------------------------------------------------- /tc/sv/load64.sv: -------------------------------------------------------------------------------- 1 | module load64 ( 2 | input bit[2:0] address, 3 | input bit[2:0] funct3, 4 | input bit[63:0] ram_load_value, 5 | input bit[63:0] store_value, 6 | 7 | output bit efault, 8 | output logic[63:0] load_value, 9 | output logic[63:0] ram_store_value 10 | ); 11 | bit[63:0] store_mask; 12 | 13 | always_comb begin 14 | unique case (funct3[0+:2]) 15 | 2'b00: efault = '0; 16 | 2'b01: efault = address[0]; 17 | 2'b10: efault = | address[0+:2]; 18 | 2'b11: efault = (| address) | funct3[2]; 19 | endcase 20 | 21 | if (efault) begin 22 | load_value = 'x; 23 | store_mask = 'x; 24 | ram_store_value = 'x; 25 | 26 | end else begin 27 | load_value = ram_load_value >> {address, 3'b000}; 28 | unique case (funct3[0+:2]) 29 | 2'b00: load_value[8+:56] = {56{~funct3[2] & load_value[7]}}; 30 | 2'b01: load_value[16+:48] = {48{~funct3[2] & load_value[15]}}; 31 | 2'b10: load_value[32+:32] = {32{~funct3[2] & load_value[31]}}; 32 | 2'b11: ; 33 | endcase 34 | 35 | if (funct3[2]) begin 36 | store_mask = 'x; 37 | ram_store_value = 'x; 38 | 39 | end else begin 40 | store_mask = { 41 | {32{& funct3[0+:2]}}, // ld 42 | {16{funct3[1]}}, // lw(u), ld 43 | {8{| funct3[0+:2]}}, // lh(u), lw(u), ld 44 | {8{1'b1}} // lb(u), lh(u), lw(u), ld 45 | } << {address, 3'b000}; 46 | 47 | ram_store_value = 48 | (ram_load_value & ~store_mask) | 49 | ((store_value << {address, 3'b000}) & store_mask); 50 | end 51 | end 52 | end 53 | endmodule 54 | 55 | `ifdef TESTING 56 | module test_load64; 57 | bit[2:0] address; 58 | bit[2:0] funct3; 59 | bit[63:0] ram_load_value; 60 | bit[63:0] store_value; 61 | wire efault; 62 | wire[63:0] load_value; 63 | wire[63:0] ram_store_value; 64 | load64 load64_module ( 65 | address, 66 | funct3, 67 | ram_load_value, 68 | store_value, 69 | efault, 70 | load_value, 71 | ram_store_value 72 | ); 73 | 74 | initial begin 75 | ram_load_value = 64'h0123456789abcdef; 76 | store_value = 64'hffffffffffffffff; 77 | 78 | // lb 79 | funct3 = 3'b000; 80 | 81 | address = 3'b000; 82 | #1 83 | assert(efault == 1'b0) else $fatal; 84 | assert(load_value == 64'hffffffffffffffef) else $fatal; 85 | assert(ram_store_value == 64'h0123456789abcdff) else $fatal; 86 | 87 | address = 3'b001; 88 | #1 89 | assert(efault == 1'b0) else $fatal; 90 | assert(load_value == 64'hffffffffffffffcd) else $fatal; 91 | assert(ram_store_value == 64'h0123456789abffef) else $fatal; 92 | 93 | address = 3'b010; 94 | #1 95 | assert(efault == 1'b0) else $fatal; 96 | assert(load_value == 64'hffffffffffffffab) else $fatal; 97 | assert(ram_store_value == 64'h0123456789ffcdef) else $fatal; 98 | 99 | address = 3'b011; 100 | #1 101 | assert(efault == 1'b0) else $fatal; 102 | assert(load_value == 64'hffffffffffffff89) else $fatal; 103 | assert(ram_store_value == 64'h01234567ffabcdef) else $fatal; 104 | 105 | address = 3'b100; 106 | #1 107 | assert(efault == 1'b0) else $fatal; 108 | assert(load_value == 64'h0000000000000067) else $fatal; 109 | assert(ram_store_value == 64'h012345ff89abcdef) else $fatal; 110 | 111 | address = 3'b101; 112 | #1 113 | assert(efault == 1'b0) else $fatal; 114 | assert(load_value == 64'h0000000000000045) else $fatal; 115 | assert(ram_store_value == 64'h0123ff6789abcdef) else $fatal; 116 | 117 | address = 3'b110; 118 | #1 119 | assert(efault == 1'b0) else $fatal; 120 | assert(load_value == 64'h0000000000000023) else $fatal; 121 | assert(ram_store_value == 64'h01ff456789abcdef) else $fatal; 122 | 123 | address = 3'b111; 124 | #1 125 | assert(efault == 1'b0) else $fatal; 126 | assert(load_value == 64'h0000000000000001) else $fatal; 127 | assert(ram_store_value == 64'hff23456789abcdef) else $fatal; 128 | 129 | // lbu 130 | funct3 = 3'b100; 131 | 132 | address = 3'b000; 133 | #1 134 | assert(efault == 1'b0) else $fatal; 135 | assert(load_value == 64'h00000000000000ef) else $fatal; 136 | 137 | address = 3'b001; 138 | #1 139 | assert(efault == 1'b0) else $fatal; 140 | assert(load_value == 64'h00000000000000cd) else $fatal; 141 | 142 | address = 3'b010; 143 | #1 144 | assert(efault == 1'b0) else $fatal; 145 | assert(load_value == 64'h00000000000000ab) else $fatal; 146 | 147 | address = 3'b011; 148 | #1 149 | assert(efault == 1'b0) else $fatal; 150 | assert(load_value == 64'h0000000000000089) else $fatal; 151 | 152 | address = 3'b100; 153 | #1 154 | assert(efault == 1'b0) else $fatal; 155 | assert(load_value == 64'h0000000000000067) else $fatal; 156 | 157 | address = 3'b101; 158 | #1 159 | assert(efault == 1'b0) else $fatal; 160 | assert(load_value == 64'h0000000000000045) else $fatal; 161 | 162 | address = 3'b110; 163 | #1 164 | assert(efault == 1'b0) else $fatal; 165 | assert(load_value == 64'h0000000000000023) else $fatal; 166 | 167 | address = 3'b111; 168 | #1 169 | assert(efault == 1'b0) else $fatal; 170 | assert(load_value == 64'h0000000000000001) else $fatal; 171 | 172 | // lh 173 | funct3 = 3'b001; 174 | 175 | address = 3'b000; 176 | #1 177 | assert(efault == 1'b0) else $fatal; 178 | assert(load_value == 64'hffffffffffffcdef) else $fatal; 179 | assert(ram_store_value == 64'h0123456789abffff) else $fatal; 180 | 181 | address = 3'b001; 182 | #1 183 | assert(efault == 1'b1) else $fatal; 184 | 185 | address = 3'b010; 186 | #1 187 | assert(efault == 1'b0) else $fatal; 188 | assert(load_value == 64'hffffffffffff89ab) else $fatal; 189 | assert(ram_store_value == 64'h01234567ffffcdef) else $fatal; 190 | 191 | address = 3'b011; 192 | #1 193 | assert(efault == 1'b1) else $fatal; 194 | 195 | address = 3'b100; 196 | #1 197 | assert(efault == 1'b0) else $fatal; 198 | assert(load_value == 64'h0000000000004567) else $fatal; 199 | assert(ram_store_value == 64'h0123ffff89abcdef) else $fatal; 200 | 201 | address = 3'b101; 202 | #1 203 | assert(efault == 1'b1) else $fatal; 204 | 205 | address = 3'b110; 206 | #1 207 | assert(efault == 1'b0) else $fatal; 208 | assert(load_value == 64'h0000000000000123) else $fatal; 209 | assert(ram_store_value == 64'hffff456789abcdef) else $fatal; 210 | 211 | address = 3'b111; 212 | #1 213 | assert(efault == 1'b1) else $fatal; 214 | 215 | // lhu 216 | funct3 = 3'b101; 217 | 218 | address = 3'b000; 219 | #1 220 | assert(efault == 1'b0) else $fatal; 221 | assert(load_value == 64'h000000000000cdef) else $fatal; 222 | 223 | address = 3'b001; 224 | #1 225 | assert(efault == 1'b1) else $fatal; 226 | 227 | address = 3'b010; 228 | #1 229 | assert(efault == 1'b0) else $fatal; 230 | assert(load_value == 64'h00000000000089ab) else $fatal; 231 | 232 | address = 3'b011; 233 | #1 234 | assert(efault == 1'b1) else $fatal; 235 | 236 | address = 3'b100; 237 | #1 238 | assert(efault == 1'b0) else $fatal; 239 | assert(load_value == 64'h0000000000004567) else $fatal; 240 | 241 | address = 3'b101; 242 | #1 243 | assert(efault == 1'b1) else $fatal; 244 | 245 | address = 3'b110; 246 | #1 247 | assert(efault == 1'b0) else $fatal; 248 | assert(load_value == 64'h0000000000000123) else $fatal; 249 | 250 | address = 3'b111; 251 | #1 252 | assert(efault == 1'b1) else $fatal; 253 | 254 | // lw 255 | funct3 = 3'b010; 256 | 257 | address = 3'b000; 258 | #1 259 | assert(efault == 1'b0) else $fatal; 260 | assert(load_value == 64'hffffffff89abcdef) else $fatal; 261 | assert(ram_store_value == 64'h01234567ffffffff) else $fatal; 262 | 263 | address = 3'b001; 264 | #1 265 | assert(efault == 1'b1) else $fatal; 266 | 267 | address = 3'b010; 268 | #1 269 | assert(efault == 1'b1) else $fatal; 270 | 271 | address = 3'b011; 272 | #1 273 | assert(efault == 1'b1) else $fatal; 274 | 275 | address = 3'b100; 276 | #1 277 | assert(efault == 1'b0) else $fatal; 278 | assert(load_value == 64'h0000000001234567) else $fatal; 279 | assert(ram_store_value == 64'hffffffff89abcdef) else $fatal; 280 | 281 | address = 3'b101; 282 | #1 283 | assert(efault == 1'b1) else $fatal; 284 | 285 | address = 3'b110; 286 | #1 287 | assert(efault == 1'b1) else $fatal; 288 | 289 | address = 3'b111; 290 | #1 291 | assert(efault == 1'b1) else $fatal; 292 | 293 | // lwu 294 | funct3 = 3'b110; 295 | 296 | address = 3'b000; 297 | #1 298 | assert(efault == 1'b0) else $fatal; 299 | assert(load_value == 64'h0000000089abcdef) else $fatal; 300 | 301 | address = 3'b001; 302 | #1 303 | assert(efault == 1'b1) else $fatal; 304 | 305 | address = 3'b010; 306 | #1 307 | assert(efault == 1'b1) else $fatal; 308 | 309 | address = 3'b011; 310 | #1 311 | assert(efault == 1'b1) else $fatal; 312 | 313 | address = 3'b100; 314 | #1 315 | assert(efault == 1'b0) else $fatal; 316 | assert(load_value == 64'h0000000001234567) else $fatal; 317 | 318 | address = 3'b101; 319 | #1 320 | assert(efault == 1'b1) else $fatal; 321 | 322 | address = 3'b110; 323 | #1 324 | assert(efault == 1'b1) else $fatal; 325 | 326 | address = 3'b111; 327 | #1 328 | assert(efault == 1'b1) else $fatal; 329 | 330 | // ld 331 | funct3 = 3'b011; 332 | 333 | address = 3'b000; 334 | #1 335 | assert(efault == 1'b0) else $fatal; 336 | assert(load_value == 64'h0123456789abcdef) else $fatal; 337 | assert(ram_store_value == 64'hffffffffffffffff) else $fatal; 338 | 339 | address = 3'b001; 340 | #1 341 | assert(efault == 1'b1) else $fatal; 342 | 343 | address = 3'b010; 344 | #1 345 | assert(efault == 1'b1) else $fatal; 346 | 347 | address = 3'b011; 348 | #1 349 | assert(efault == 1'b1) else $fatal; 350 | 351 | address = 3'b100; 352 | #1 353 | assert(efault == 1'b1) else $fatal; 354 | 355 | address = 3'b101; 356 | #1 357 | assert(efault == 1'b1) else $fatal; 358 | 359 | address = 3'b110; 360 | #1 361 | assert(efault == 1'b1) else $fatal; 362 | 363 | address = 3'b111; 364 | #1 365 | assert(efault == 1'b1) else $fatal; 366 | 367 | // ldu 368 | funct3 = 3'b111; 369 | 370 | address = 3'b000; 371 | #1 372 | assert(efault == 1'b1) else $fatal; 373 | 374 | address = 3'b001; 375 | #1 376 | assert(efault == 1'b1) else $fatal; 377 | 378 | address = 3'b010; 379 | #1 380 | assert(efault == 1'b1) else $fatal; 381 | 382 | address = 3'b011; 383 | #1 384 | assert(efault == 1'b1) else $fatal; 385 | 386 | address = 3'b100; 387 | #1 388 | assert(efault == 1'b1) else $fatal; 389 | 390 | address = 3'b101; 391 | #1 392 | assert(efault == 1'b1) else $fatal; 393 | 394 | address = 3'b110; 395 | #1 396 | assert(efault == 1'b1) else $fatal; 397 | 398 | address = 3'b111; 399 | #1 400 | assert(efault == 1'b1) else $fatal; 401 | end 402 | endmodule 403 | `endif 404 | -------------------------------------------------------------------------------- /tc/sv/mop_fusion.sv: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Refs: 4 | 5 | - https://arxiv.org/pdf/1607.02318 6 | 7 | - https://en.wikichip.org/wiki/macro-operation_fusion#RISC-V 8 | 9 | - https://github.com/llvm/llvm-project/blob/173907b5d77115623f160978a95159e36e05ee6c/llvm/lib/Target/RISCV/RISCVMacroFusion.td 10 | 11 | --- 12 | 13 | Load immediate 14 | 15 | +---------------------------+-------------------------------------------------+-------------------------------+ 16 | | Instructions | Fusion condition | Fused instruction | 17 | +===========================+=================================================+===============================+ 18 | | auipc rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | auipc rd_a, (imm_a + imm_b) | 19 | | addi rd_b, rs1_b, imm_b | | | 20 | +---------------------------+-------------------------------------------------+-------------------------------+ 21 | | lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b && rs1_b != rs2_b | addi rd_a, rs2_b, imm_a | 22 | | add rd_b, rs1_b, rs2_b | | | 23 | +---------------------------+-------------------------------------------------+-------------------------------+ 24 | | lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b && rs1_b != rs2_b | addiw rd_a, rs2_b, imm_a | 25 | | addw rd_b, rs1_b, rs2_b | | | 26 | +---------------------------+-------------------------------------------------+-------------------------------+ 27 | | lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | lui rd_a, (imm_a + imm_b) | 28 | | addi rd_b, rs1_b, imm_b | | | 29 | +---------------------------+-------------------------------------------------+-------------------------------+ 30 | | lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | lui rd_a, 32'(imm_a + imm_b) | 31 | | addiw rd_b, rs1_b, imm_b | | | 32 | +---------------------------+-------------------------------------------------+-------------------------------+ 33 | 34 | Jump 35 | 36 | +---------------------------+-------------------------------------------------+-------------------------------+ 37 | | Instructions | Fusion condition | Fused instruction | 38 | +===========================+=================================================+===============================+ 39 | | auipc rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | jal rd_a, (imm_a + imm_b) | 40 | | jalr rd_b, imm_b(rs1_b) | | | 41 | +---------------------------+-------------------------------------------------+-------------------------------+ 42 | 43 | Load 44 | 45 | +---------------------------+-------------------------------------------------+-------------------------------+ 46 | | Instructions | Fusion condition | Fused instruction | 47 | +===========================+=================================================+===============================+ 48 | | auipc rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | ld.pc rd_a, (imm_a + imm_b)pc | 49 | | ld rd_b, imm_b(rs1_b) | | | 50 | +---------------------------+-------------------------------------------------+-------------------------------+ 51 | | lui rd_a, imm_a | rd_a == rd_b && rd_a == rs1_b | ld rd_a, (imm_a + imm_b)x0 | 52 | | ld rd_b, imm_b(rs1_b) | | | 53 | +---------------------------+-------------------------------------------------+-------------------------------+ 54 | | add rd_a, rs1_a, rs2_a | rd_a == rd_b && rd_a == rs1_b && imm_b == 0 | ld.add rd_a, (rs1_a)(rs2_a) | 55 | | ld rd_b, imm_b(rs1_b) | | | 56 | +---------------------------+-------------------------------------------------+-------------------------------+ 57 | | sh1add rd_a, rs1_a, rs2_a | rd_a == rd_b && rd_a == rs1_b && imm_b == 0 | ld.sh1add rd_a, rs1_a, rs2_a | 58 | | ld rd_b, imm_b(rs1_b) | | | 59 | +---------------------------+-------------------------------------------------+-------------------------------+ 60 | 61 | Op 62 | 63 | +---------------------------+-------------------------------------------------+-------------------------------+ 64 | | Instructions | Fusion condition | Fused instruction | 65 | +===========================+=================================================+===============================+ 66 | | sub rd_a, x0, rs2_a | rd_a == rd_b && rd_a == rs1_b && rs2_a == rs2_b | abs rd_a, rs2_a | 67 | | max rd_b, rs1_b, rs2_b | | | 68 | +---------------------------+-------------------------------------------------+-------------------------------+ 69 | 70 | --- 71 | 72 | Fused instruction length 73 | 74 | +------+-------+-------+------+------+------+ 75 | | fuse | RVC_2 | RVC_1 | len2 | len4 | len8 | 76 | +======+=======+=======+======+======+======+ 77 | | 0 | 0 | 0 | 0 | 1 | 0 | 78 | | 0 | 0 | 1 | 1 | 0 | 0 | 79 | | 0 | 1 | 0 | 0 | 1 | 0 | 80 | | 0 | 1 | 1 | 1 | 0 | 0 | 81 | | 1 | 0 | 0 | 0 | 0 | 1 | 82 | | 1 | 0 | 1 | 1 | 1 | 0 | 83 | | 1 | 1 | 0 | 1 | 1 | 0 | 84 | | 1 | 1 | 1 | 0 | 1 | 0 | 85 | +------+-------+-------+------+------+------+ 86 | 87 | */ 88 | 89 | module mop_fusion ( 90 | input bit a_is_compressed, 91 | input logic[4:0] a_rd, 92 | input logic[4:0] a_rs1, 93 | input logic[4:0] a_rs2, 94 | input logic[11:0] a_csr, 95 | input bit[4:0] a_opcode, 96 | input logic[2:0] a_funct3, 97 | input logic[6:0] a_funct7, 98 | input logic[4:0] a_funct5, 99 | input logic[31:0] a_imm, 100 | input logic[4:0] a_csrimm, 101 | 102 | input bit b_is_compressed, 103 | input logic[4:0] b_rd, 104 | input logic[4:0] b_rs1, 105 | input logic[4:0] b_rs2, 106 | input bit[4:0] b_opcode, 107 | input logic[2:0] b_funct3, 108 | input logic[6:0] b_funct7, 109 | input logic[31:0] b_imm, 110 | 111 | output bit[1:0] insts_num, 112 | output bit[3:0] insts_len, 113 | 114 | output logic[4:0] rd, 115 | output logic[4:0] rs1, 116 | output logic[4:0] rs2, 117 | output logic[11:0] csr, 118 | output bit[4:0] opcode, 119 | output logic[2:0] funct3, 120 | output logic[6:0] funct7, 121 | output logic[4:0] funct5, 122 | output logic[32:0] imm, 123 | output logic[4:0] csrimm 124 | ); 125 | typedef enum bit[4:0] { 126 | OpCode_Load = 5'b00000, 127 | OpCode_Zarnavion = 5'b00010, 128 | OpCode_OpImm = 5'b00100, 129 | OpCode_Auipc = 5'b00101, 130 | OpCode_OpImm32 = 5'b00110, 131 | OpCode_Op = 5'b01100, 132 | OpCode_Lui = 5'b01101, 133 | OpCode_Op32 = 5'b01110, 134 | OpCode_Jalr = 5'b11001, 135 | OpCode_Jal = 5'b11011 136 | } OpCode; 137 | 138 | wire[32:0] imm_a_plus_b = {a_imm[31], a_imm} + {b_imm[31], b_imm}; 139 | 140 | bit performed_fusion; 141 | 142 | always_comb begin 143 | performed_fusion = '0; 144 | 145 | rd = a_rd; 146 | rs1 = a_rs1; 147 | rs2 = a_rs2; 148 | csr = a_csr; 149 | opcode = a_opcode; 150 | funct3 = a_funct3; 151 | funct7 = a_funct7; 152 | funct5 = a_funct5; 153 | imm = {a_imm[31], a_imm}; 154 | csrimm = a_csrimm; 155 | 156 | if (a_rd == b_rd && a_rd == b_rs1) 157 | unique casez ({a_opcode, a_funct3, a_funct7, b_opcode, b_funct3, b_funct7}) 158 | // auipc, addi -> auipc 159 | {OpCode_Auipc, 10'b???_???????, OpCode_OpImm, 10'b000_???????}: begin 160 | performed_fusion = '1; 161 | 162 | rd = a_rd; 163 | rs1 = a_rs1; // = '0 164 | rs2 = a_rs2; // = '0 165 | csr = a_csr; // = 'x 166 | opcode = a_opcode; 167 | funct3 = 'x; 168 | funct7 = 'x; 169 | funct5 = 'x; 170 | imm = imm_a_plus_b; 171 | csrimm = 'x; 172 | end 173 | 174 | // lui, add -> addi 175 | {OpCode_Lui, 10'b???_???????, OpCode_Op, 10'b000_0000000}: if (b_rs1 == b_rs2) begin 176 | performed_fusion = '1; 177 | 178 | rd = a_rd; 179 | rs1 = b_rs2; 180 | rs2 = a_rs2; // = '0 181 | csr = a_csr; // = 'x 182 | opcode = OpCode_OpImm; 183 | funct3 = b_funct3; 184 | funct7 = 'x; 185 | funct5 = 'x; 186 | imm = {a_imm[31], a_imm}; 187 | csrimm = 'x; 188 | end 189 | 190 | // lui, addw -> addiw 191 | {OpCode_Lui, 10'b???_???????, OpCode_Op32, 10'b000_0000000}: if (b_rs1 == b_rs2) begin 192 | performed_fusion = '1; 193 | 194 | rd = a_rd; 195 | rs1 = b_rs2; 196 | rs2 = a_rs2; // = '0 197 | csr = a_csr; // = 'x 198 | opcode = OpCode_OpImm32; 199 | funct3 = b_funct3; 200 | funct7 = 'x; 201 | funct5 = 'x; 202 | imm = {a_imm[31], a_imm}; 203 | csrimm = 'x; 204 | end 205 | 206 | // lui, addi -> lui 207 | {OpCode_Lui, 10'b???_???????, OpCode_OpImm, 10'b000_???????}: begin 208 | performed_fusion = '1; 209 | 210 | rd = a_rd; 211 | rs1 = a_rs1; // = '0 212 | rs2 = a_rs2; // = '0 213 | csr = a_csr; // = 'x 214 | opcode = a_opcode; 215 | funct3 = 'x; 216 | funct7 = 'x; 217 | funct5 = 'x; 218 | imm = imm_a_plus_b; 219 | csrimm = 'x; 220 | end 221 | 222 | // lui, addiw -> lui 223 | {OpCode_Lui, 10'b???_???????, OpCode_OpImm32, 10'b000_???????}: begin 224 | performed_fusion = '1; 225 | 226 | rd = a_rd; 227 | rs1 = a_rs1; // = '0 228 | rs2 = a_rs2; // = '0 229 | csr = a_csr; // = 'x 230 | opcode = a_opcode; 231 | funct3 = 'x; 232 | funct7 = 'x; 233 | funct5 = 'x; 234 | imm = {imm_a_plus_b[31], imm_a_plus_b[0+:32]}; 235 | csrimm = 'x; 236 | end 237 | 238 | // auipc, jalr -> jal 239 | {OpCode_Auipc, 10'b???_???????, OpCode_Jalr, 10'b???_???????}: begin 240 | performed_fusion = '1; 241 | 242 | rd = a_rd; 243 | rs1 = a_rs1; // = '0 244 | rs2 = a_rs2; // = '0 245 | csr = a_csr; // = 'x 246 | opcode = OpCode_Jal; 247 | funct3 = 'x; 248 | funct7 = 'x; 249 | funct5 = 'x; 250 | imm = imm_a_plus_b; 251 | csrimm = 'x; 252 | end 253 | 254 | // auipc, load -> load.pc 255 | {OpCode_Auipc, 10'b???_???????, OpCode_Load, 10'b???_???????}: begin 256 | performed_fusion = '1; 257 | 258 | rd = a_rd; 259 | rs1 = a_rs1; // = '0 260 | rs2 = a_rs2; // = '0 261 | csr = a_csr; // = 'x 262 | opcode = OpCode_Zarnavion; 263 | funct3 = b_funct3; 264 | funct7 = 7'b0000000; 265 | funct5 = 'x; 266 | imm = imm_a_plus_b; 267 | csrimm = 'x; 268 | end 269 | 270 | // lui, load -> load 271 | {OpCode_Lui, 10'b???_???????, OpCode_Load, 10'b???_???????}: begin 272 | performed_fusion = '1; 273 | 274 | rd = a_rd; 275 | rs1 = 5'b00000; 276 | rs2 = a_rs2; // = '0 277 | csr = a_csr; // = 'x 278 | opcode = b_opcode; 279 | funct3 = b_funct3; 280 | funct7 = 'x; 281 | funct5 = 'x; 282 | imm = imm_a_plus_b; 283 | csrimm = 'x; 284 | end 285 | 286 | // add, load -> load.add 287 | {OpCode_Op, 10'b000_0000000, OpCode_Load, 10'b???_???????}: if (b_imm == '0) begin 288 | performed_fusion = '1; 289 | 290 | rd = a_rd; 291 | rs1 = a_rs1; 292 | rs2 = a_rs2; 293 | csr = a_csr; // = 'x 294 | opcode = OpCode_Zarnavion; 295 | funct3 = b_funct3; 296 | funct7 = 7'b0000001; 297 | funct5 = 'x; 298 | imm = 'x; 299 | csrimm = 'x; 300 | end 301 | 302 | // sh1add, load -> load.sh1add 303 | {OpCode_Op, 10'b010_0010000, OpCode_Load, 10'b???_???????}: if (b_imm == '0) begin 304 | performed_fusion = '1; 305 | 306 | rd = a_rd; 307 | rs1 = a_rs1; 308 | rs2 = a_rs2; 309 | csr = a_csr; // = 'x 310 | opcode = OpCode_Zarnavion; 311 | funct3 = b_funct3; 312 | funct7 = 7'b0000010; 313 | funct5 = 'x; 314 | imm = 'x; 315 | csrimm = 'x; 316 | end 317 | 318 | // sh2add, load -> load.sh2add 319 | {OpCode_Op, 10'b100_0010000, OpCode_Load, 10'b???_???????}: if (b_imm == '0) begin 320 | performed_fusion = '1; 321 | 322 | rd = a_rd; 323 | rs1 = a_rs1; 324 | rs2 = a_rs2; 325 | csr = a_csr; // = 'x 326 | opcode = OpCode_Zarnavion; 327 | funct3 = b_funct3; 328 | funct7 = 7'b0000011; 329 | funct5 = 'x; 330 | imm = 'x; 331 | csrimm = 'x; 332 | end 333 | 334 | // sh3add, load -> load.sh3add 335 | {OpCode_Op, 10'b110_0010000, OpCode_Load, 10'b???_???????}: if (b_imm == '0) begin 336 | performed_fusion = '1; 337 | 338 | rd = a_rd; 339 | rs1 = a_rs1; 340 | rs2 = a_rs2; 341 | csr = a_csr; // = 'x 342 | opcode = OpCode_Zarnavion; 343 | funct3 = b_funct3; 344 | funct7 = 7'b0000100; 345 | funct5 = 'x; 346 | imm = 'x; 347 | csrimm = 'x; 348 | end 349 | 350 | // sub, max -> abs 351 | {OpCode_Op, 10'b000_0100000, OpCode_Op, 10'b110_0000101}: if (a_rs1 == 5'b00000 && a_rs2 == b_rs2) begin 352 | performed_fusion = '1; 353 | 354 | rd = a_rd; 355 | rs1 = a_rs2; 356 | rs2 = 5'b00000; 357 | csr = a_csr; // = 'x 358 | opcode = 5'b00010; 359 | funct3 = 3'b000; 360 | funct7 = 7'b0000101; 361 | funct5 = 'x; 362 | imm = 'x; 363 | csrimm = 'x; 364 | end 365 | 366 | default: ; 367 | endcase 368 | 369 | // insts_num = performed_fusion ? 2'd2 : 2'd1; 370 | insts_num = {performed_fusion, ~performed_fusion}; 371 | // insts_len = (a_is_compressed ? 4'd2 : 4'd4) + performed_fusion ? (b_is_compressed ? 4'd2 : 4'd4) : 4'd0; 372 | insts_len = { 373 | performed_fusion & ~(a_is_compressed | b_is_compressed), 374 | ~(performed_fusion ^ a_is_compressed) | (performed_fusion & b_is_compressed), 375 | a_is_compressed ^ (performed_fusion & b_is_compressed), 376 | 1'b0 377 | }; 378 | end 379 | endmodule 380 | -------------------------------------------------------------------------------- /tc/sv/ram_cache_tree_plru.sv: -------------------------------------------------------------------------------- 1 | module ram_cache_tree_plru #( 2 | parameter rv64 = 1, 3 | localparam xlen = rv64 ? 64 : 32 4 | ) ( 5 | input bit clock, 6 | input bit reset, 7 | 8 | input bit ram_input_load, 9 | input bit ram_input_store, 10 | input bit ram_input_flush, 11 | 12 | input bit cache_left_address_in_cache, 13 | input logic cache_left_busy, 14 | input logic[xlen - 1:0] cache_left_load_value, 15 | 16 | input bit cache_right_address_in_cache, 17 | input logic cache_right_busy, 18 | input logic[xlen - 1:0] cache_right_load_value, 19 | 20 | output bit address_in_cache, 21 | 22 | output bit busy, 23 | 24 | output logic[xlen - 1:0] load_value, 25 | 26 | output bit cache_left_ram_input_load, 27 | output bit cache_left_ram_input_store, 28 | output bit cache_left_ram_input_flush, 29 | output bit cache_right_ram_input_load, 30 | output bit cache_right_ram_input_store, 31 | output bit cache_right_ram_input_flush 32 | ); 33 | bit oldest; 34 | wire next_oldest; 35 | 36 | ram_cache_tree_plru_inner #(.rv64(rv64)) ram_cache_tree_plru_inner_module ( 37 | ram_input_load, ram_input_store, ram_input_flush, 38 | cache_left_address_in_cache, cache_left_busy, cache_left_load_value, 39 | cache_left_address_in_cache, cache_right_busy, cache_right_load_value, 40 | oldest, 41 | address_in_cache, 42 | busy, 43 | load_value, 44 | cache_left_ram_input_load, cache_left_ram_input_store, cache_left_ram_input_flush, 45 | cache_right_ram_input_load, cache_right_ram_input_store, cache_right_ram_input_flush, 46 | next_oldest 47 | ); 48 | 49 | always_ff @(posedge clock) begin 50 | if (reset) 51 | oldest <= '0; 52 | else 53 | oldest <= next_oldest; 54 | end 55 | endmodule 56 | 57 | module ram_cache_tree_plru_inner #( 58 | parameter rv64 = 1, 59 | localparam xlen = rv64 ? 64 : 32 60 | ) ( 61 | input bit ram_input_load, 62 | input bit ram_input_store, 63 | input bit ram_input_flush, 64 | 65 | input bit cache_left_address_in_cache, 66 | input logic cache_left_busy, 67 | input logic[xlen - 1:0] cache_left_load_value, 68 | 69 | input bit cache_right_address_in_cache, 70 | input logic cache_right_busy, 71 | input logic[xlen - 1:0] cache_right_load_value, 72 | 73 | input bit oldest, 74 | 75 | output bit address_in_cache, 76 | 77 | output bit busy, 78 | 79 | output logic[xlen - 1:0] load_value, 80 | 81 | output bit cache_left_ram_input_load, 82 | output bit cache_left_ram_input_store, 83 | output bit cache_left_ram_input_flush, 84 | 85 | output bit cache_right_ram_input_load, 86 | output bit cache_right_ram_input_store, 87 | output bit cache_right_ram_input_flush, 88 | 89 | output bit next_oldest 90 | ); 91 | assign address_in_cache = cache_left_address_in_cache | cache_right_address_in_cache; 92 | 93 | assign busy = cache_left_busy | cache_right_busy; 94 | 95 | assign load_value = 96 | cache_left_address_in_cache ? cache_left_load_value : 97 | cache_right_address_in_cache ? cache_right_load_value : 98 | 'x; 99 | 100 | assign cache_left_ram_input_load = ram_input_load & (cache_left_address_in_cache | (~cache_right_address_in_cache & ~oldest)); 101 | assign cache_left_ram_input_store = ram_input_store & (cache_left_address_in_cache | (~cache_right_address_in_cache & ~oldest)); 102 | assign cache_left_ram_input_flush = ram_input_flush; 103 | 104 | assign cache_right_ram_input_load = ram_input_load & ~cache_left_address_in_cache & (cache_right_address_in_cache | oldest); 105 | assign cache_right_ram_input_store = ram_input_store & ~cache_left_address_in_cache & (cache_right_address_in_cache | oldest); 106 | assign cache_right_ram_input_flush = ram_input_flush & ~cache_left_busy; 107 | 108 | assign next_oldest = 109 | ((~| {ram_input_load, ram_input_store}) | ram_input_flush) ? 110 | oldest : 111 | cache_left_address_in_cache | (~cache_right_address_in_cache & oldest); 112 | endmodule 113 | 114 | `ifdef TESTING 115 | module test_ram_cache_tree_plru_inner; 116 | bit ram_input_load; 117 | bit ram_input_store; 118 | bit ram_input_flush; 119 | bit cache_left_address_in_cache; 120 | logic cache_left_busy; 121 | logic[31:0] cache_left_load_value; 122 | bit cache_right_address_in_cache; 123 | logic cache_right_busy; 124 | logic[31:0] cache_right_load_value; 125 | bit oldest; 126 | wire address_in_cache; 127 | wire busy; 128 | wire[31:0] load_value; 129 | wire cache_left_ram_input_load; 130 | wire cache_left_ram_input_store; 131 | wire cache_left_ram_input_flush; 132 | wire cache_right_ram_input_load; 133 | wire cache_right_ram_input_store; 134 | wire cache_right_ram_input_flush; 135 | wire next_oldest; 136 | ram_cache_tree_plru_inner #(.rv64(0)) ram_cache_tree_plru_inner_module ( 137 | ram_input_load, ram_input_store, ram_input_flush, 138 | cache_left_address_in_cache, cache_left_busy, cache_left_load_value, 139 | cache_right_address_in_cache, cache_right_busy, cache_right_load_value, 140 | oldest, 141 | address_in_cache, 142 | busy, 143 | load_value, 144 | cache_left_ram_input_load, cache_left_ram_input_store, cache_left_ram_input_flush, 145 | cache_right_ram_input_load, cache_right_ram_input_store, cache_right_ram_input_flush, 146 | next_oldest 147 | ); 148 | 149 | `define test_case( 150 | ram_input_load_, ram_input_store_, ram_input_flush_, 151 | cache_left_address_in_cache_, cache_left_busy_, cache_left_load_value_, 152 | cache_right_address_in_cache_, cache_right_busy_, cache_right_load_value_, 153 | oldest_, 154 | expected_address_in_cache, 155 | expected_busy, 156 | expected_load_value, 157 | expected_cache_left_ram_input_load, expected_cache_left_ram_input_store, expected_cache_left_ram_input_flush, 158 | expected_cache_right_ram_input_load, expected_cache_right_ram_input_store, expected_cache_right_ram_input_flush, 159 | expected_next_oldest 160 | ) begin \ 161 | ram_input_load = ram_input_load_; \ 162 | ram_input_store = ram_input_store_; \ 163 | ram_input_flush = ram_input_flush_; \ 164 | cache_left_address_in_cache = cache_left_address_in_cache_; \ 165 | cache_left_busy = cache_left_busy_; \ 166 | cache_left_load_value = cache_left_load_value_; \ 167 | cache_right_address_in_cache = cache_right_address_in_cache_; \ 168 | cache_right_busy = cache_right_busy_; \ 169 | cache_right_load_value = cache_right_load_value_; \ 170 | oldest = oldest_; \ 171 | #1 \ 172 | assert(address_in_cache == expected_address_in_cache) else $fatal(1, "address_in_cache: expected %h, got %h", expected_address_in_cache, address_in_cache); \ 173 | assert(busy == expected_busy) else $fatal(1, "busy: expected %h, got %h", expected_busy, busy); \ 174 | if (ram_input_load & ~busy) assert(load_value == expected_load_value) else $fatal(1, "load_value: expected %h, got %h", expected_load_value, load_value); \ 175 | assert(cache_left_ram_input_load == expected_cache_left_ram_input_load) else $fatal(1, "cache_left_ram_input_load: expected %h, got %h", expected_cache_left_ram_input_load, cache_left_ram_input_load); \ 176 | assert(cache_left_ram_input_store == expected_cache_left_ram_input_store) else $fatal(1, "cache_left_ram_input_store: expected %h, got %h", expected_cache_left_ram_input_store, cache_left_ram_input_store); \ 177 | assert(cache_left_ram_input_flush == expected_cache_left_ram_input_flush) else $fatal(1, "cache_left_ram_input_flush: expected %h, got %h", expected_cache_left_ram_input_flush, cache_left_ram_input_flush); \ 178 | assert(cache_right_ram_input_load == expected_cache_right_ram_input_load) else $fatal(1, "cache_right_ram_input_load: expected %h, got %h", expected_cache_right_ram_input_load, cache_right_ram_input_load); \ 179 | assert(cache_right_ram_input_store == expected_cache_right_ram_input_store) else $fatal(1, "cache_right_ram_input_store: expected %h, got %h", expected_cache_right_ram_input_store, cache_right_ram_input_store); \ 180 | assert(cache_right_ram_input_flush == expected_cache_right_ram_input_flush) else $fatal(1, "cache_right_ram_input_flush: expected %h, got %h", expected_cache_right_ram_input_flush, cache_right_ram_input_flush); \ 181 | assert(next_oldest == expected_next_oldest) else $fatal(1, "next_oldest: expected %h, got %h", expected_next_oldest, next_oldest); \ 182 | end 183 | 184 | initial begin 185 | `test_case( 186 | '0, '0, '0, 187 | '0, '0, 32'h01234567, 188 | '0, '0, 32'hfedcba98, 189 | '0, 190 | '0, 191 | '0, 192 | 'x, 193 | '0, '0, '0, 194 | '0, '0, '0, 195 | '0 196 | ) 197 | 198 | `test_case( 199 | '1, '0, '0, 200 | '0, '1, 32'h01234567, 201 | '0, '0, 32'hfedcba98, 202 | '0, 203 | '0, 204 | '1, 205 | 'x, 206 | '1, '0, '0, 207 | '0, '0, '0, 208 | '0 209 | ) 210 | 211 | `test_case( 212 | '1, '0, '0, 213 | '0, '1, 32'h01234567, 214 | '0, '0, 32'hfedcba98, 215 | '1, 216 | '0, 217 | '1, 218 | 'x, 219 | '0, '0, '0, 220 | '1, '0, '0, 221 | '1 222 | ) 223 | 224 | `test_case( 225 | '1, '0, '0, 226 | '1, '0, 32'h01234567, 227 | '0, '0, 32'hfedcba98, 228 | '0, 229 | '1, 230 | '0, 231 | 32'h01234567, 232 | '1, '0, '0, 233 | '0, '0, '0, 234 | '1 235 | ) 236 | 237 | `test_case( 238 | '1, '0, '0, 239 | '1, '1, 32'h01234567, 240 | '0, '0, 32'hfedcba98, 241 | '0, 242 | '1, 243 | '1, 244 | 32'h01234567, 245 | '1, '0, '0, 246 | '0, '0, '0, 247 | '1 248 | ) 249 | 250 | `test_case( 251 | '1, '0, '0, 252 | '0, '0, 32'h01234567, 253 | '1, '0, 32'hfedcba98, 254 | '0, 255 | '1, 256 | '0, 257 | 32'hfedcba98, 258 | '0, '0, '0, 259 | '1, '0, '0, 260 | '0 261 | ) 262 | 263 | `test_case( 264 | '1, '0, '0, 265 | '0, '0, 32'h01234567, 266 | '1, '1, 32'hfedcba98, 267 | '0, 268 | '1, 269 | '1, 270 | 32'hfedcba98, 271 | '0, '0, '0, 272 | '1, '0, '0, 273 | '0 274 | ) 275 | 276 | `test_case( 277 | '0, '0, '1, 278 | '1, '0, 32'h01234567, 279 | '0, '0, 32'hfedcba98, 280 | '0, 281 | '1, 282 | '0, 283 | 32'hfedcba98, 284 | '0, '0, '1, 285 | '0, '0, '1, 286 | '0 287 | ) 288 | 289 | `test_case( 290 | '0, '0, '1, 291 | '1, '1, 32'h01234567, 292 | '0, '0, 32'hfedcba98, 293 | '0, 294 | '1, 295 | '1, 296 | 32'hfedcba98, 297 | '0, '0, '1, 298 | '0, '0, '0, 299 | '0 300 | ) 301 | 302 | `test_case( 303 | '0, '0, '1, 304 | '1, '0, 32'h01234567, 305 | '0, '1, 32'hfedcba98, 306 | '0, 307 | '1, 308 | '1, 309 | 32'hfedcba98, 310 | '0, '0, '1, 311 | '0, '0, '1, 312 | '0 313 | ) 314 | end 315 | endmodule 316 | `endif 317 | -------------------------------------------------------------------------------- /tc/sv/rv_decoder.sv: -------------------------------------------------------------------------------- 1 | module rv_decoder ( 2 | input bit[31:0] in, 3 | 4 | output bit sigill, 5 | output logic[4:0] rd, 6 | output logic[4:0] rs1, 7 | output logic[4:0] rs2, 8 | output logic[11:0] csr, 9 | output logic csr_load, 10 | output logic csr_store, 11 | output logic[4:0] opcode, 12 | output logic[2:0] funct3, 13 | output logic[6:0] funct7, 14 | output logic[4:0] funct5, 15 | output logic[31:0] imm, 16 | output logic[4:0] csrimm 17 | ); 18 | bit[11:0] imm_31_20; 19 | bit[7:0] imm_19_12; 20 | bit imm_11; 21 | bit[5:0] imm_10_5; 22 | bit[3:0] imm_4_1; 23 | bit imm_0; 24 | 25 | `define imm {imm_31_20, imm_19_12, imm_11, imm_10_5, imm_4_1, imm_0} 26 | 27 | assign imm = `imm; 28 | 29 | always_comb begin 30 | sigill = '1; 31 | opcode = 'x; 32 | funct3 = 'x; 33 | funct7 = 'x; 34 | funct5 = 'x; 35 | rd = 'x; 36 | rs1 = 'x; 37 | rs2 = 'x; 38 | csr = 'x; 39 | csr_load = 'x; 40 | csr_store = 'x; 41 | `imm = 'x; 42 | csrimm = 'x; 43 | 44 | if (in[0+:2] == 2'b11) begin 45 | unique casez (in[2+:5]) 46 | // op, op-32 47 | 5'b011?0: begin 48 | sigill = '0; 49 | 50 | opcode = in[2+:5]; 51 | funct3 = in[12+:3]; 52 | funct7 = in[25+:7]; 53 | funct5 = in[20+:5]; 54 | 55 | rd = in[7+:5]; 56 | rs1 = in[15+:5]; 57 | rs2 = in[20+:5]; 58 | csr_load = '0; 59 | csr_store = '0; 60 | end 61 | 62 | // load 63 | 5'b00000, 64 | // op-imm, op-imm-32 65 | 5'b001?0, 66 | // misc-mem 67 | 5'b00111, 68 | // jalr 69 | 5'b11001: begin 70 | sigill = '0; 71 | 72 | opcode = in[2+:5]; 73 | funct3 = in[12+:3]; 74 | 75 | rd = in[7+:5]; 76 | rs1 = in[15+:5]; 77 | rs2 = '0; 78 | csr_load = '0; 79 | csr_store = '0; 80 | 81 | `imm = unsigned'(32'(signed'(in[20+:12]))); 82 | end 83 | 84 | // system 85 | 5'b11100: begin 86 | sigill = '0; 87 | 88 | opcode = in[2+:5]; 89 | funct3 = in[12+:3]; 90 | 91 | rd = '0; 92 | rs1 = '0; 93 | rs2 = '0; 94 | csr_load = '0; 95 | csr_store = '0; 96 | 97 | unique case (funct3) 98 | // ebreak, ecall 99 | 3'b000: begin 100 | funct7 = in[25+:7]; 101 | funct5 = in[20+:5]; 102 | end 103 | 104 | 3'b001: // csrrw 105 | begin 106 | rd = in[7+:5]; 107 | rs1 = in[15+:5]; 108 | csr = in[20+:12]; 109 | csr_load = in[7+:5] != '0; 110 | csr_store = '1; 111 | end 112 | 113 | 3'b010: // csrrs 114 | begin 115 | rd = in[7+:5]; 116 | rs1 = in[15+:5]; 117 | csr = in[20+:12]; 118 | csr_load = '1; 119 | csr_store = in[15+:5] != '0; 120 | end 121 | 122 | 3'b011: // csrrc 123 | begin 124 | rd = in[7+:5]; 125 | rs1 = in[15+:5]; 126 | csr = in[20+:12]; 127 | csr_load = '1; 128 | csr_store = in[15+:5] != '0; 129 | end 130 | 131 | 3'b101: // csrrwi 132 | begin 133 | rd = in[7+:5]; 134 | csr = in[20+:12]; 135 | csrimm = in[15+:5]; 136 | csr_load = in[7+:5] != '0; 137 | csr_store = '1; 138 | end 139 | 140 | 3'b110: // csrrsi 141 | begin 142 | rd = in[7+:5]; 143 | csr = in[20+:12]; 144 | csrimm = in[15+:5]; 145 | csr_load = '1; 146 | csr_store = in[15+:5] != '0; 147 | end 148 | 149 | 3'b111: // csrrci 150 | begin 151 | rd = in[7+:5]; 152 | csr = in[20+:12]; 153 | csrimm = in[15+:5]; 154 | csr_load = '1; 155 | csr_store = in[15+:5] != '0; 156 | end 157 | 158 | default: begin 159 | sigill = '1; 160 | 161 | opcode = 'x; 162 | funct3 = 'x; 163 | 164 | rd = 'x; 165 | rs1 = 'x; 166 | rs2 = 'x; 167 | csr_load = 'x; 168 | csr_store = 'x; 169 | end 170 | endcase 171 | end 172 | 173 | // store 174 | 5'b01000: begin 175 | sigill = '0; 176 | 177 | opcode = in[2+:5]; 178 | funct3 = in[12+:3]; 179 | funct7 = in[25+:7]; 180 | funct5 = in[20+:5]; 181 | 182 | rd = '0; 183 | rs1 = in[15+:5]; 184 | rs2 = in[20+:5]; 185 | csr_load = '0; 186 | csr_store = '0; 187 | 188 | `imm = unsigned'(32'(signed'({in[25+:7], in[7+:5]}))); 189 | end 190 | 191 | // branch 192 | 5'b11000: begin 193 | sigill = '0; 194 | 195 | opcode = in[2+:5]; 196 | funct3 = in[12+:3]; 197 | funct7 = in[25+:7]; 198 | funct5 = in[20+:5]; 199 | 200 | rd = '0; 201 | rs1 = in[15+:5]; 202 | rs2 = in[20+:5]; 203 | csr_load = '0; 204 | csr_store = '0; 205 | 206 | `imm = unsigned'(32'(signed'({in[31], in[7], in[25+:6], in[8+:4], 1'b0}))); 207 | end 208 | 209 | // auipc, lui 210 | 5'b0?101: begin 211 | sigill = '0; 212 | 213 | opcode = in[2+:5]; 214 | funct3 = in[12+:3]; 215 | funct7 = in[25+:7]; 216 | funct5 = in[20+:5]; 217 | 218 | rd = in[7+:5]; 219 | rs1 = '0; 220 | rs2 = '0; 221 | csr_load = '0; 222 | csr_store = '0; 223 | 224 | `imm = {in[12+:20], 12'b0}; 225 | end 226 | 227 | // jal 228 | 5'b11011: begin 229 | sigill = '0; 230 | 231 | opcode = in[2+:5]; 232 | funct3 = in[12+:3]; 233 | funct7 = in[25+:7]; 234 | funct5 = in[20+:5]; 235 | 236 | rd = in[7+:5]; 237 | rs1 = '0; 238 | rs2 = '0; 239 | csr_load = '0; 240 | csr_store = '0; 241 | 242 | `imm = unsigned'(32'(signed'({in[31], in[12+:8], in[20], in[21+:10], 1'b0}))); 243 | end 244 | 245 | default: ; 246 | endcase 247 | end 248 | end 249 | endmodule 250 | -------------------------------------------------------------------------------- /tc/sv/rv_register_file.sv: -------------------------------------------------------------------------------- 1 | module rv_register_file #( 2 | parameter rv64 = 1, 3 | localparam xlen = rv64 ? 64 : 32 4 | ) ( 5 | input bit clock, 6 | 7 | input bit[4:0] rd, 8 | input logic[xlen - 1:0] rd_value, 9 | input bit[4:0] rs1, 10 | input bit[4:0] rs2, 11 | 12 | output logic [xlen - 1:0] rs1_value, 13 | output logic [xlen - 1:0] rs2_value 14 | ); 15 | bit[xlen - 1:0] registers[31:0]; 16 | 17 | initial 18 | for (int i = 0; i <= 32; i++) 19 | registers[i] = '0; 20 | 21 | always @(posedge clock) 22 | if (rd != '0) 23 | registers[rd] <= rd_value; 24 | 25 | always_comb begin 26 | rs1_value = registers[rs1]; 27 | rs2_value = registers[rs2]; 28 | end 29 | endmodule 30 | -------------------------------------------------------------------------------- /tc/tower-of-alloy.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | 3 | li a5, 5 # toggle magnet 4 | 5 | lbu a0, 0(fp) # disk_nr 6 | lbu a1, 0(fp) # source 7 | lbu a2, 0(fp) # destination 8 | lbu a3, 0(fp) # spare 9 | 10 | li sp, 0x100 11 | 12 | # fallthrough move 13 | 14 | 15 | # move 16 | # 17 | # a0: disk_nr 18 | # a1: source 19 | # a2: destination 20 | # a3: spare 21 | 22 | addi sp, sp, -16 23 | sw a0, 0(sp) 24 | sw a1, 4(sp) 25 | sw a2, 8(sp) 26 | sw a3, 12(sp) 27 | 28 | addi, a0, a0, -1 29 | mv a4, a2 30 | mv a2, a3 31 | mv a3, a4 32 | bnez a0, -18 # move, tail call 33 | # fallthrough move_inner 34 | 35 | 36 | # move_inner 37 | sb a1, 0(fp) 38 | sb a5, 0(fp) 39 | sb a2, 0(fp) 40 | sb a5, 0(fp) 41 | 42 | lw a0, 0(sp) 43 | lw a3, 4(sp) 44 | lw a2, 8(sp) 45 | lw a1, 12(sp) 46 | addi sp, sp, 16 47 | 48 | sb a3, 0(fp) 49 | sb a5, 0(fp) 50 | sb a2, 0(fp) 51 | sb a5, 0(fp) 52 | 53 | addi a0, a0, -1 54 | beqz a0, -28 # move_inner, tail call 55 | j -50 # move, tail call 56 | -------------------------------------------------------------------------------- /tc/tower-of-alloy.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | static const uint8_t TOGGLE = 5; 6 | 7 | static void move(uint8_t disk_nr, uint8_t source, uint8_t destination, uint8_t spare) { 8 | if (disk_nr > 0) { 9 | move(disk_nr - 1, source, spare, destination); 10 | } 11 | 12 | *IO = source; 13 | *IO = TOGGLE; 14 | *IO = destination; 15 | *IO = TOGGLE; 16 | 17 | if (disk_nr > 0) { 18 | move(disk_nr - 1, spare, destination, source); 19 | } 20 | } 21 | 22 | int main(void) { 23 | uint8_t disk_nr = *IO; 24 | uint8_t source = *IO; 25 | uint8_t destination = *IO; 26 | uint8_t spare = *IO; 27 | move(disk_nr, source, destination, spare); 28 | 29 | __builtin_unreachable(); 30 | } 31 | -------------------------------------------------------------------------------- /tc/tower-of-alloy.ctz.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | 3 | lbu a2, 0(fp) # disk_nr 4 | lbu a3, 0(fp) # source 5 | lbu a4, 0(fp) # destination 6 | lbu a5, 0(fp) # spare 7 | 8 | # a0: positions = 0 9 | 10 | # a1: pegs 11 | # = src | 12 | # (dest << ((1 + disk_nr_is_odd) * 8)) | 13 | # (spare << ((2 - disk_nr_is_odd) * 8)) 14 | bexti a2, a2, 0 # disk_nr_is_odd 15 | slli a2, a2, 3 16 | li s1, 16 17 | sub s1, s1, a2 18 | sllw a5, a5, s1 19 | or a1, a3, a5 20 | addi a2, a2, 8 21 | sllw a4, a4, a2 22 | or a1, a1, a4 23 | 24 | li a2, -1 # i: -1 -> -inf 25 | 26 | li a3, 5 # toggle magnet 27 | 28 | li s2, -3 29 | 30 | # loop 31 | ctz a5, a2 # j 32 | 33 | slli a5, a5, 3 # j * 8 34 | ror a0, a0, a5 # positions = positions >>> (j * 8) 35 | andi s1, a0, 0xff # position 36 | 37 | slli a4, s1, 3 # *IO = (pegs >> (position * 8)) & 0xff 38 | srlw a4, a1, a4 # 39 | sb a4, 0(fp) # 40 | sb a3, 0(fp) 41 | 42 | addi s1, s1, 1 # next_position = (position + 1 + j & 1) % 3 43 | bexti a4, a5, 3 # 44 | add s1, s1, a4 # 45 | sltiu a4, s1, 3 # 46 | czero.nez a4, s2, a4 # 47 | add s1, s1, a4 # 48 | 49 | slli a4, s1, 3 # *IO = (pegs >> (next_position * 8)) & 0xff 50 | srlw a4, a1, a4 # 51 | sb a4, 0(fp) # 52 | sb a3, 0(fp) 53 | 54 | andi a0, a0, -256 # positions = (positions & 0xffffffffffffff00) | next_position 55 | or a0, a0, s1 # 56 | 57 | rol a0, a0, a5 # positions = positions <<< (j * 8) 58 | 59 | addi a2, a2, -1 60 | 61 | j -68 # loop 62 | -------------------------------------------------------------------------------- /tc/tower-of-alloy.ctz.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | static const uint8_t TOGGLE = 5; 6 | 7 | int main(void) { 8 | uint8_t highest_disk_nr = *IO; 9 | uint8_t src = *IO; 10 | uint8_t dest = *IO; 11 | uint8_t spare = *IO; 12 | 13 | uint64_t positions = 0; 14 | uint32_t num_disks_is_even = highest_disk_nr % 2; 15 | uint32_t pegs = 16 | ((uint32_t) src) | 17 | (((uint32_t) dest) << ((1 + num_disks_is_even) * 8)) | 18 | (((uint32_t) spare) << ((2 - num_disks_is_even) * 8)); 19 | 20 | for (uint64_t i = 0; ; i++) { 21 | uint8_t j = __builtin_ctzl(~i); 22 | 23 | // positions = stdc_rotate_right(positions, ((uint32_t) j) * 8); 24 | positions = (positions >> ((uint32_t) j) * 8) | (positions << (64 - ((uint32_t) j) * 8)); 25 | 26 | uint8_t position = positions & 0b11; 27 | *IO = (pegs >> (position * 8)) & 0xff; 28 | *IO = TOGGLE; 29 | 30 | uint8_t next_position = (position + 1 + (j & 1)) % 3; 31 | *IO = (pegs >> (next_position * 8)) & 0xff; 32 | *IO = TOGGLE; 33 | 34 | positions = (positions & 0xffffffffffffff00ULL) | ((uint64_t) next_position); 35 | // positions = stdc_rotate_left(positions, ((uint32_t) j) * 8); 36 | positions = (positions << ((uint32_t) j) * 8) | (positions >> (64 - ((uint32_t) j) * 8)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /tc/unseen-fruit.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | sb a1, 0(fp) # left 3 | li a0, 1 # forward 4 | sb a0, 0(fp) 5 | sb a1, 0(fp) 6 | sb a0, 0(fp) 7 | sb a0, 0(fp) 8 | sb a0, 0(fp) 9 | sb a0, 0(fp) 10 | sb a1, 0(fp) 11 | sb a0, 0(fp) 12 | li a1, 2 # right 13 | sb a1, 0(fp) 14 | sb a0, 0(fp) 15 | li a2, 3 # enjoy 16 | li a3, 4 # use 17 | li a4, 92 # empty conveyor belt 18 | # loop 19 | lbu a0, 0(fp) 20 | bne a0, a4, 8 # fruit 21 | # idle 22 | sb a2, 0(fp) 23 | j -8 # loop 24 | # fruit 25 | lbu a5, 0(a0) 26 | bnez a5, 6 # found_duplicate 27 | sb a0, 0(a0) 28 | j -10 # idle 29 | # found_duplicate 30 | sb a1, 0(fp) 31 | sb a3 0(fp) 32 | -------------------------------------------------------------------------------- /tc/unseen-fruit.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | static const uint8_t LEFT = 0; 6 | static const uint8_t FORWARD = 1; 7 | static const uint8_t RIGHT = 2; 8 | static const uint8_t ENJOY = 3; 9 | static const uint8_t USE = 4; 10 | 11 | int main(void) { 12 | volatile uint8_t* mem = 0; 13 | 14 | *IO = LEFT; 15 | *IO = FORWARD; 16 | *IO = LEFT; 17 | *IO = FORWARD; 18 | *IO = FORWARD; 19 | *IO = FORWARD; 20 | *IO = FORWARD; 21 | *IO = LEFT; 22 | *IO = FORWARD; 23 | *IO = RIGHT; 24 | *IO = FORWARD; 25 | 26 | while (true) { 27 | uint8_t current = *IO; 28 | if (__builtin_expect(current, 92) == 92) { 29 | *IO = ENJOY; 30 | continue; 31 | } 32 | 33 | if (mem[current] > 0) { 34 | *IO = RIGHT; 35 | *IO = USE; 36 | 37 | __builtin_unreachable(); 38 | } 39 | 40 | mem[current] = current; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /tc/xor.S: -------------------------------------------------------------------------------- 1 | li fp, -8 2 | lbu a0, 0(fp) 3 | lbu a1, 0(fp) 4 | xor a0, a0, a1 5 | sb a0, 0(fp) 6 | -------------------------------------------------------------------------------- /tc/xor.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static volatile uint8_t* const IO = (volatile uint8_t*)(intptr_t)-8; 4 | 5 | int main(void) { 6 | uint8_t x = *IO; 7 | uint8_t y = *IO; 8 | *IO = x ^ y; 9 | 10 | __builtin_unreachable(); 11 | } 12 | --------------------------------------------------------------------------------